sources: replace unreachable sources before selection

The commit c43efccf02 ("sources: update source selection with
unreachable sources") caused a high rate of failures in the
148-replacement test (1 falseticker vs 2 unreachable sources). This was
due to a larger fraction of the replacement attempts being made for the
source incorrectly marked as a falseticker instead of the second
unreachable source and the random process needed more time to get to the
expected state with both unreachable sources replaced.

When updating reachability of an unreachable source, try to request the
replacement of the source before calling the source selection, where
other sources may be replaced, to better balance the different
replacement attempts.
This commit is contained in:
Miroslav Lichvar 2024-08-28 10:49:52 +02:00
parent 1ab5b88939
commit 78b9c13a11
2 changed files with 28 additions and 5 deletions

View file

@ -526,11 +526,6 @@ SRC_UpdateReachability(SRC_Instance inst, int reachable)
if (inst->reachability_size < SOURCE_REACH_BITS)
inst->reachability_size++;
/* Source selection can change with unreachable sources */
if (inst->reachability == 0) {
SRC_SelectSource(NULL);
}
/* Check if special reference update mode failed */
if (REF_GetMode() != REF_ModeNormal && special_mode_end()) {
REF_SetUnsynchronised();
@ -539,6 +534,10 @@ SRC_UpdateReachability(SRC_Instance inst, int reachable)
/* Try to replace unreachable NTP sources */
if (inst->reachability == 0 && inst->reachability_size == SOURCE_REACH_BITS)
handle_bad_source(inst);
/* Source selection can change with unreachable sources */
if (inst->reachability == 0)
SRC_SelectSource(NULL);
}
/* ================================================== */

View file

@ -53,4 +53,28 @@ check_log_messages "2010-01-01T0[5-9]:.*Source 192.168.123.. replaced with" 0 15
check_file_messages "20.*192.168.123.* 11.1 6 6 " 20 500 measurements.log || test_fail
rm -f tmp/measurements.log
# 2 replaceable falsetickers and 1 replaceable unreachable server
servers=6
falsetickers=2
base_delay="(+ 1e-4 (* -1 (equal 0.1 to 3)))"
client_server_conf="
server nodes-4-1.net1.clk
server nodes-5-2.net1.clk
server nodes-6-3.net1.clk"
run_test || test_fail
check_chronyd_exit || test_fail
check_source_selection && test_fail
check_packet_interval || test_fail
check_sync || test_fail
check_log_messages "Can't synchronise: no majority" 1 1 || test_fail
check_log_messages "Detected falseticker" 0 2 || test_fail
check_log_messages "Source 192.168.123.. replaced with" 3 60 || test_fail
check_log_messages "Source 192.168.123.1 replaced with" 1 25 || test_fail
check_log_messages "Source 192.168.123.2 replaced with" 1 25 || test_fail
check_log_messages "Source 192.168.123.3 replaced with" 1 25 || test_fail
check_file_messages "20.*192.168.123.* 11.1 6 6 " 50 800 measurements.log || test_fail
rm -f tmp/measurements.log
test_pass