From 78b9c13a11b63348a2060b8f3b55662b3ec2c814 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Wed, 28 Aug 2024 10:49:52 +0200 Subject: [PATCH] sources: replace unreachable sources before selection The commit c43efccf0273 ("sources: update source selection with unreachable sources") caused a high rate of failures in the 148-replacement test (1 falseticker vs 2 unreachable sources). This was due to a larger fraction of the replacement attempts being made for the source incorrectly marked as a falseticker instead of the second unreachable source and the random process needed more time to get to the expected state with both unreachable sources replaced. When updating reachability of an unreachable source, try to request the replacement of the source before calling the source selection, where other sources may be replaced, to better balance the different replacement attempts. --- sources.c | 9 ++++----- test/simulation/148-replacement | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/sources.c b/sources.c index 2614448..ff7a792 100644 --- a/sources.c +++ b/sources.c @@ -526,11 +526,6 @@ SRC_UpdateReachability(SRC_Instance inst, int reachable) if (inst->reachability_size < SOURCE_REACH_BITS) inst->reachability_size++; - /* Source selection can change with unreachable sources */ - if (inst->reachability == 0) { - SRC_SelectSource(NULL); - } - /* Check if special reference update mode failed */ if (REF_GetMode() != REF_ModeNormal && special_mode_end()) { REF_SetUnsynchronised(); @@ -539,6 +534,10 @@ SRC_UpdateReachability(SRC_Instance inst, int reachable) /* Try to replace unreachable NTP sources */ if (inst->reachability == 0 && inst->reachability_size == SOURCE_REACH_BITS) handle_bad_source(inst); + + /* Source selection can change with unreachable sources */ + if (inst->reachability == 0) + SRC_SelectSource(NULL); } /* ================================================== */ diff --git a/test/simulation/148-replacement b/test/simulation/148-replacement index f15fc4d..d09fba6 100755 --- a/test/simulation/148-replacement +++ b/test/simulation/148-replacement @@ -53,4 +53,28 @@ check_log_messages "2010-01-01T0[5-9]:.*Source 192.168.123.. replaced with" 0 15 check_file_messages "20.*192.168.123.* 11.1 6 6 " 20 500 measurements.log || test_fail rm -f tmp/measurements.log +# 2 replaceable falsetickers and 1 replaceable unreachable server +servers=6 +falsetickers=2 +base_delay="(+ 1e-4 (* -1 (equal 0.1 to 3)))" +client_server_conf=" +server nodes-4-1.net1.clk +server nodes-5-2.net1.clk +server nodes-6-3.net1.clk" + +run_test || test_fail +check_chronyd_exit || test_fail +check_source_selection && test_fail +check_packet_interval || test_fail +check_sync || test_fail + +check_log_messages "Can't synchronise: no majority" 1 1 || test_fail +check_log_messages "Detected falseticker" 0 2 || test_fail +check_log_messages "Source 192.168.123.. replaced with" 3 60 || test_fail +check_log_messages "Source 192.168.123.1 replaced with" 1 25 || test_fail +check_log_messages "Source 192.168.123.2 replaced with" 1 25 || test_fail +check_log_messages "Source 192.168.123.3 replaced with" 1 25 || test_fail +check_file_messages "20.*192.168.123.* 11.1 6 6 " 50 800 measurements.log || test_fail +rm -f tmp/measurements.log + test_pass