From 70cdd8b1ef77a5eca4bb41b8b7c42a77b0923ba8 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Tue, 26 Sep 2023 12:52:39 +0200 Subject: [PATCH] ntp: add client support for network correction If the network correction is known for both the request and response, and their sum is not larger that the measured peer delay, allowing the transparent clocks to be running up to 100 ppm faster than the client's clock, apply the corrections to the NTP offset and peer delay. Don't correct the root delay to not change the estimated maximum error. --- candm.h | 1 + client.c | 5 +- cmdmon.c | 6 +- cmdparse.c | 3 + doc/chrony.conf.adoc | 24 ++++++-- ntp_core.c | 74 ++++++++++++++++++++++- test/simulation/110-chronyc | 2 +- test/simulation/142-ntpoverptp | 106 +++++++++++++++++++++++++++++++++ test/simulation/142-ptpport | 41 ------------- test/simulation/test.common | 5 ++ 10 files changed, 216 insertions(+), 51 deletions(-) create mode 100755 test/simulation/142-ntpoverptp delete mode 100755 test/simulation/142-ptpport diff --git a/candm.h b/candm.h index 909252f..033cdb9 100644 --- a/candm.h +++ b/candm.h @@ -278,6 +278,7 @@ typedef struct { #define REQ_ADDSRC_NTS 0x200 #define REQ_ADDSRC_COPY 0x400 #define REQ_ADDSRC_EF_EXP_MONO_ROOT 0x800 +#define REQ_ADDSRC_EF_EXP_NET_CORRECTION 0x1000 typedef struct { uint32_t type; diff --git a/client.c b/client.c index ee606ab..7cfefba 100644 --- a/client.c +++ b/client.c @@ -958,7 +958,10 @@ process_cmd_add_source(CMD_Request *msg, char *line) (data.params.burst ? REQ_ADDSRC_BURST : 0) | (data.params.nts ? REQ_ADDSRC_NTS : 0) | (data.params.copy ? REQ_ADDSRC_COPY : 0) | - (data.params.ext_fields & NTP_EF_FLAG_EXP_MONO_ROOT ? REQ_ADDSRC_EF_EXP_MONO_ROOT : 0) | + (data.params.ext_fields & NTP_EF_FLAG_EXP_MONO_ROOT ? + REQ_ADDSRC_EF_EXP_MONO_ROOT : 0) | + (data.params.ext_fields & NTP_EF_FLAG_EXP_NET_CORRECTION ? + REQ_ADDSRC_EF_EXP_NET_CORRECTION : 0) | convert_addsrc_sel_options(data.params.sel_options)); msg->data.ntp_source.filter_length = htonl(data.params.filter_length); msg->data.ntp_source.cert_set = htonl(data.params.cert_set); diff --git a/cmdmon.c b/cmdmon.c index 988bb42..9adc9d6 100644 --- a/cmdmon.c +++ b/cmdmon.c @@ -783,8 +783,10 @@ handle_add_source(CMD_Request *rx_message, CMD_Reply *tx_message) params.burst = ntohl(rx_message->data.ntp_source.flags) & REQ_ADDSRC_BURST ? 1 : 0; params.nts = ntohl(rx_message->data.ntp_source.flags) & REQ_ADDSRC_NTS ? 1 : 0; params.copy = ntohl(rx_message->data.ntp_source.flags) & REQ_ADDSRC_COPY ? 1 : 0; - params.ext_fields = ntohl(rx_message->data.ntp_source.flags) & REQ_ADDSRC_EF_EXP_MONO_ROOT ? - NTP_EF_FLAG_EXP_MONO_ROOT : 0; + params.ext_fields = (ntohl(rx_message->data.ntp_source.flags) & REQ_ADDSRC_EF_EXP_MONO_ROOT ? + NTP_EF_FLAG_EXP_MONO_ROOT : 0) | + (ntohl(rx_message->data.ntp_source.flags) & REQ_ADDSRC_EF_EXP_NET_CORRECTION ? + NTP_EF_FLAG_EXP_NET_CORRECTION : 0); params.sel_options = convert_addsrc_select_options(ntohl(rx_message->data.ntp_source.flags)); status = NSR_AddSourceByName(name, port, pool, type, ¶ms, NULL); diff --git a/cmdparse.c b/cmdparse.c index 57fea1c..ac5ace2 100644 --- a/cmdparse.c +++ b/cmdparse.c @@ -118,6 +118,9 @@ CPS_ParseNTPSourceAdd(char *line, CPS_NTP_Source *src) case NTP_EF_EXP_MONO_ROOT: src->params.ext_fields |= NTP_EF_FLAG_EXP_MONO_ROOT; break; + case NTP_EF_EXP_NET_CORRECTION: + src->params.ext_fields |= NTP_EF_FLAG_EXP_NET_CORRECTION; + break; default: return 0; } diff --git a/doc/chrony.conf.adoc b/doc/chrony.conf.adoc index 0628832..1fdd5d7 100644 --- a/doc/chrony.conf.adoc +++ b/doc/chrony.conf.adoc @@ -322,7 +322,9 @@ server implementations do not respond to requests containing an unknown extension field (*chronyd* as a server responded to such requests since version 2.0). + -The following extension field can be enabled by this option: +This option can be used multiple times to enable multiple extension fields. ++ +The following extension fields are supported: + _F323_:::: An experimental extension field to enable several improvements that were @@ -331,6 +333,14 @@ root delay and dispersion in higher resolution and a monotonic receive timestamp, which enables a frequency transfer between the server and client to significantly improve stability of the synchronisation. This field should be enabled only for servers known to be running *chronyd* version 4.2 or later. +_F324_:::: +An experimental extension field to enable the use of the Precision Time +Protocol (PTP) correction field in NTP-over-PTP messages updated by one-step +end-to-end transparent clocks in network switches and routers to significantly +improve accuracy and stability of the synchronisation. NTP-over-PTP can be +enabled by the <> directive and setting the *port* option to +the PTP port. This field should be enabled only for servers known to be running +*chronyd* version 4.5 or later. {blank}::: [[pool]]*pool* _name_ [_option_]...:: @@ -2727,8 +2737,10 @@ pidfile /run/chronyd.pid The *ptpport* directive enables *chronyd* to send and receive NTP messages contained in PTP event messages (NTP-over-PTP) to enable hardware timestamping on NICs which cannot timestamp NTP packets, but can timestamp unicast PTP -packets. The port recognized by the NICs is 319 (PTP event port). The default -value is 0 (disabled). +packets, and also use corrections provided by PTP one-step end-to-end +transparent clocks in network switches and routers. The port recognized by the +NICs and PTP transparent clocks is 319 (PTP event port). The default value is 0 +(disabled). + The NTP-over-PTP support is experimental. The protocol and configuration can change in future. It should be used only in local networks. @@ -2738,12 +2750,14 @@ server or client. The directive does not change the default protocol of specified NTP sources. Each NTP source that should use NTP-over-PTP needs to be specified with the *port* option set to the PTP port. To actually enable hardware timestamping on NICs which can timestamp PTP packets only, the -*rxfilter* option of the *hwtimestamp* directive needs to be set to _ptp_. +*rxfilter* option of the *hwtimestamp* directive needs to be set to _ptp_. The +extension field _F324_ needs to be enabled to use the corrections provided by +the PTP transparent clocks. + An example of client configuration is: + ---- -server foo.example.net minpoll 0 maxpoll 0 xleave port 319 +server foo.example.net minpoll 0 maxpoll 0 xleave port 319 extfield F324 hwtimestamp * rxfilter ptp ptpport 319 ---- diff --git a/ntp_core.c b/ntp_core.c index 6cd7879..2b2b78e 100644 --- a/ntp_core.c +++ b/ntp_core.c @@ -314,6 +314,9 @@ static ARR_Instance broadcasts; /* Maximum acceptable change in server mono<->real offset */ #define MAX_MONO_DOFFSET 16.0 +/* Maximum assumed frequency error in network corrections */ +#define MAX_NET_CORRECTION_FREQ 100.0e-6 + /* Invalid socket, different from the one in ntp_io.c */ #define INVALID_SOCK_FD -2 @@ -1661,6 +1664,53 @@ parse_packet(NTP_Packet *packet, int length, NTP_PacketInfo *info) /* ================================================== */ +static void +apply_net_correction(NTP_Sample *sample, NTP_Local_Timestamp *rx, NTP_Local_Timestamp *tx, + double precision) +{ + double rx_correction, tx_correction, low_delay_correction; + + /* Require some correction from transparent clocks to be present + in both directions (not just the local RX timestamp correction) */ + if (rx->net_correction <= rx->rx_duration || tx->net_correction <= 0.0) + return; + + /* With perfect corrections from PTP transparent clocks and short cables + the peer delay would be close to zero, or even negative if the server or + transparent clocks were running faster than client, which would invert the + sample weighting. Adjust the correction to get a delay corresponding to + a direct connection to the server. For simplicity, assume the TX and RX + link speeds are equal. If not, the reported delay will be wrong, but it + will not cause an error in the offset. */ + rx_correction = rx->net_correction - rx->rx_duration; + tx_correction = tx->net_correction - rx->rx_duration; + + /* Use a slightly smaller value in the correction of delay to not overcorrect + if the transparent clocks run up to 100 ppm fast and keep a part of the + uncorrected delay for the sample weighting */ + low_delay_correction = (rx_correction + tx_correction) * + (1.0 - MAX_NET_CORRECTION_FREQ); + + /* Make sure the correction is sane. The values are not authenticated! */ + if (low_delay_correction < 0.0 || low_delay_correction > sample->peer_delay) { + DEBUG_LOG("Invalid correction %.9f peer_delay=%.9f", + low_delay_correction, sample->peer_delay); + return; + } + + /* Correct the offset and peer delay, but not the root delay to not + change the estimated maximum error */ + sample->offset += (rx_correction - tx_correction) / 2.0; + sample->peer_delay -= low_delay_correction; + if (sample->peer_delay < precision) + sample->peer_delay = precision; + + DEBUG_LOG("Applied correction rx=%.9f tx=%.9f dur=%.9f", + rx->net_correction, tx->net_correction, rx->rx_duration); +} + +/* ================================================== */ + static int check_delay_ratio(NCR_Instance inst, SST_Stats stats, struct timespec *sample_time, double delay) @@ -1923,10 +1973,11 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, int parsed, ef_length, ef_type, ef_body_length; void *ef_body; NTP_EFExpMonoRoot *ef_mono_root; + NTP_EFExpNetCorrection *ef_net_correction; NTP_Local_Timestamp local_receive, local_transmit; double remote_interval, local_interval, response_time; - double delay_time, precision, mono_doffset; + double delay_time, precision, mono_doffset, net_correction; int updated_timestamps; /* ==================== */ @@ -1934,6 +1985,7 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, stats = SRC_GetSourcestats(inst->source); ef_mono_root = NULL; + ef_net_correction = NULL; /* Find requested non-authentication extension fields */ if (inst->ext_field_flags & info->ext_field_flags) { @@ -1949,6 +2001,12 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, NTP_EF_EXP_MONO_ROOT_MAGIC)) ef_mono_root = ef_body; break; + case NTP_EF_EXP_NET_CORRECTION: + if (inst->ext_field_flags & NTP_EF_FLAG_EXP_NET_CORRECTION && + is_exp_ef(ef_body, ef_body_length, sizeof (*ef_net_correction), + NTP_EF_EXP_NET_CORRECTION_MAGIC)) + ef_net_correction = ef_body; + break; } } } @@ -2055,6 +2113,12 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, mono_doffset = 0.0; } + if (ef_net_correction) { + net_correction = UTI_Ntp64ToDouble(&ef_net_correction->correction); + } else { + net_correction = 0.0; + } + /* Select remote and local timestamps for the new sample */ if (interleaved_packet) { /* Prefer previous local TX and remote RX timestamps if it will make @@ -2074,6 +2138,7 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, UTI_Ntp64ToTimespec(&message->receive_ts, &remote_receive); UTI_Ntp64ToTimespec(&inst->remote_ntp_rx, &remote_request_receive); local_transmit = inst->local_tx; + local_transmit.net_correction = net_correction; root_delay = MAX(pkt_root_delay, inst->remote_root_delay); root_dispersion = MAX(pkt_root_dispersion, inst->remote_root_dispersion); } @@ -2088,6 +2153,7 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, remote_request_receive = remote_receive; local_receive = *rx_ts; local_transmit = inst->local_tx; + local_transmit.net_correction = net_correction; root_delay = pkt_root_delay; root_dispersion = pkt_root_dispersion; } @@ -2131,6 +2197,9 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, skew * fabs(local_interval); sample.root_delay = root_delay + sample.peer_delay; sample.root_dispersion = root_dispersion + sample.peer_dispersion; + + /* Apply corrections from PTP transparent clocks if available and sane */ + apply_net_correction(&sample, &local_receive, &local_transmit, precision); /* If the source is an active peer, this is the minimum assumed interval between previous two transmissions (if not constrained by minpoll) */ @@ -2186,6 +2255,7 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, sample.root_delay = sample.root_dispersion = 0.0; sample.time = rx_ts->ts; mono_doffset = 0.0; + net_correction = 0.0; local_receive = *rx_ts; local_transmit = inst->local_tx; testA = testB = testC = testD = 0; @@ -2229,6 +2299,8 @@ process_response(NCR_Instance inst, int saved, NTP_Local_Address *local_addr, inst->mono_doffset = 0.0; } + inst->local_tx.net_correction = net_correction; + /* Don't use the same set of timestamps for the next sample */ if (interleaved_packet) inst->prev_local_tx = inst->local_tx; diff --git a/test/simulation/110-chronyc b/test/simulation/110-chronyc index 97abc21..46b0a3f 100755 --- a/test/simulation/110-chronyc +++ b/test/simulation/110-chronyc @@ -114,7 +114,7 @@ limit=1 for chronyc_conf in \ "accheck 1.2.3.4" \ "add peer 10.0.0.0 minpoll 2 maxpoll 6" \ - "add server 10.0.0.0 minpoll 6 maxpoll 10 iburst burst key 1 certset 2 maxdelay 1e-3 maxdelayratio 10.0 maxdelaydevratio 10.0 maxdelayquant 0.5 mindelay 1e-4 asymmetry 0.5 offset 1e-5 minsamples 6 maxsamples 6 filter 3 offline auto_offline prefer noselect trust require xleave polltarget 20 port 123 presend 7 minstratum 3 version 4 nts ntsport 4460 copy extfield F323" \ + "add server 10.0.0.0 minpoll 6 maxpoll 10 iburst burst key 1 certset 2 maxdelay 1e-3 maxdelayratio 10.0 maxdelaydevratio 10.0 maxdelayquant 0.5 mindelay 1e-4 asymmetry 0.5 offset 1e-5 minsamples 6 maxsamples 6 filter 3 offline auto_offline prefer noselect trust require xleave polltarget 20 port 123 presend 7 minstratum 3 version 4 nts ntsport 4460 copy extfield F323 extfield F324" \ "add server node1.net1.clk" \ "allow 1.2.3.4" \ "allow 1.2" \ diff --git a/test/simulation/142-ntpoverptp b/test/simulation/142-ntpoverptp new file mode 100755 index 0000000..2996dc0 --- /dev/null +++ b/test/simulation/142-ntpoverptp @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +. ./test.common + +test_start "NTP over PTP" + +# Block communication between 3 and 1 +base_delay="(+ 1e-4 (* -1 (equal 0.1 from 3) (equal 0.1 to 1)))" + +cat > tmp/peer.keys <<-EOF +1 MD5 1234567890 +EOF + +clients=2 +peers=2 +max_sync_time=420 + +server_conf=" +ptpport 319" +client_conf=" +ptpport 319 +authselectmode ignore +keyfile tmp/peer.keys" +client_server_options="minpoll 6 maxpoll 6 port 319" +client_peer_options="minpoll 6 maxpoll 6 port 319 key 1" + +run_test || test_fail +check_chronyd_exit || test_fail +check_source_selection || test_fail +check_sync || test_fail + +check_file_messages " 2 1 .* 319 319 1 96 " 150 160 \ + log.packets || test_fail +check_file_messages " 1 2 .* 319 319 1 96 " 150 160 \ + log.packets || test_fail +check_file_messages " 2 3 .* 319 319 1 116 " 150 160 \ + log.packets || test_fail +check_file_messages " 3 2 .* 319 319 1 116 " 150 160 \ + log.packets || test_fail + +check_config_h 'HAVE_LINUX_TIMESTAMPING 1' || test_skip + +export CLKNETSIM_TIMESTAMPING=2 +export CLKNETSIM_LINK_SPEED=100 + +client_server_options+=" extfield F324 minpoll 0 maxpoll 0" +client_peer_options+=" extfield F324 minpoll 0 maxpoll 0 maxdelaydevratio 1e6" +server_conf+=" +clockprecision 1e-9 +hwtimestamp eth0" +client_conf+=" +clockprecision 1e-9 +hwtimestamp eth0" +delay_correction="(+ delay (* -8e-8 (+ length 46)))" +wander=1e-9 +limit=1000 +freq_offset=-1e-4 +min_sync_time=5 +max_sync_time=20 +time_max_limit=1e-7 +time_rms_limit=2e-8 +freq_max_limit=1e-7 +freq_rms_limit=5e-8 +client_chronyd_options="-d" + +run_test || test_fail +check_chronyd_exit || test_fail +check_source_selection || test_fail +check_sync || test_fail + +if check_config_h 'FEAT_DEBUG 1'; then + check_log_messages "apply_net_correction.*Applied" 900 2100 || test_fail + check_log_messages "apply_net_correction.*Invalid" 0 4 || test_fail +fi + +client_server_options+=" xleave" +client_peer_options+=" xleave" + +run_test || test_fail +check_chronyd_exit || test_fail +check_source_selection || test_fail +check_sync || test_fail + +if check_config_h 'FEAT_DEBUG 1'; then + check_log_messages "apply_net_correction.*Applied" 900 2100 || test_fail + check_log_messages "apply_net_correction.*Invalid" 0 4 || test_fail + + freq_offset=0.0 + delay_correction="(+ -1.0e-9 (* 1.0001 delay))" + + run_test || test_fail + check_chronyd_exit || test_fail + + check_log_messages "apply_net_correction.*Applied" 350 1400 || test_fail + check_log_messages "apply_net_correction.*Invalid" 350 1400 || test_fail + + server_conf="ptpport 319" + client_conf="ptpport 319" + + run_test || test_fail + check_chronyd_exit || test_fail + + check_log_messages "apply_net_correction.*Applied" 0 0 || test_fail +fi + +test_pass diff --git a/test/simulation/142-ptpport b/test/simulation/142-ptpport deleted file mode 100755 index 060932c..0000000 --- a/test/simulation/142-ptpport +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash - -. ./test.common - -test_start "PTP port" - -# Block communication between 3 and 1 -base_delay="(+ 1e-4 (* -1 (equal 0.1 from 3) (equal 0.1 to 1)))" - -cat > tmp/peer.keys <<-EOF -1 MD5 1234567890 -EOF - -clients=2 -peers=2 -max_sync_time=420 - -server_conf=" -ptpport 319" -client_conf=" -ptpport 319 -authselectmode ignore -keyfile tmp/peer.keys" -client_server_options="minpoll 6 maxpoll 6 port 319" -client_peer_options="minpoll 6 maxpoll 6 port 319 key 1" - -run_test || test_fail -check_chronyd_exit || test_fail -check_source_selection || test_fail -check_sync || test_fail - -check_file_messages " 2 1 .* 319 319 1 96 " 150 160 \ - log.packets || test_fail -check_file_messages " 1 2 .* 319 319 1 96 " 150 160 \ - log.packets || test_fail -check_file_messages " 2 3 .* 319 319 1 116 " 150 160 \ - log.packets || test_fail -check_file_messages " 3 2 .* 319 319 1 116 " 150 160 \ - log.packets || test_fail - -test_pass diff --git a/test/simulation/test.common b/test/simulation/test.common index 3f6e80b..42a2917 100644 --- a/test/simulation/test.common +++ b/test/simulation/test.common @@ -31,6 +31,7 @@ default_primary_time_offset=0.0 default_time_offset=1e-1 default_freq_offset=1e-4 default_base_delay=1e-4 +default_delay_correction="" default_jitter=1e-4 default_jitter_asymmetry=0.0 default_wander=1e-9 @@ -460,6 +461,10 @@ run_test() { for j in $(seq 1 $nodes); do echo "node${i}_delay${j} = $(get_delay_expr up)" echo "node${j}_delay${i} = $(get_delay_expr down)" + if [ -n "$delay_correction" ]; then + echo "node${i}_delay_correction${j} = $delay_correction" + echo "node${j}_delay_correction${i} = $delay_correction" + fi done done > tmp/conf