Fix discovery re-announce: gate on new peer or restarted peer, not every packet
The original unconditional cond_signal (every received packet) caused a multicast storm: each node instantly reflected every announcement back as its own, creating a tight loop at wire speed. The previous fix (gate on is_new only) broke the restart case: a peer that restarts with the same addr+port is already in the table so is_new stays 0, meaning we'd wait up to interval_ms before that peer learned about us. Correct fix: also signal when site_id changes for a known addr+port entry, which reliably indicates a restart. Steady-state keepalive packets (same site_id) no longer trigger re-announcement. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -204,13 +204,16 @@ static void *receive_thread_fn(void *arg) {
|
|||||||
|
|
||||||
uint32_t addr = src.sin_addr.s_addr;
|
uint32_t addr = src.sin_addr.s_addr;
|
||||||
uint64_t ts = now_ms();
|
uint64_t ts = now_ms();
|
||||||
int is_new = 0;
|
int is_new = 0;
|
||||||
|
int reannounce = 0;
|
||||||
struct Discovery_Peer peer_copy;
|
struct Discovery_Peer peer_copy;
|
||||||
|
|
||||||
pthread_mutex_lock(&d->peers_mutex);
|
pthread_mutex_lock(&d->peers_mutex);
|
||||||
|
|
||||||
int idx = find_peer(d, addr, tcp_port);
|
int idx = find_peer(d, addr, tcp_port);
|
||||||
if (idx >= 0) {
|
if (idx >= 0) {
|
||||||
|
/* detect restart: same addr+port but site_id changed */
|
||||||
|
if (d->peers[idx].info.site_id != site_id) { reannounce = 1; }
|
||||||
d->peers[idx].last_seen_ms = ts;
|
d->peers[idx].last_seen_ms = ts;
|
||||||
d->peers[idx].info.site_id = site_id;
|
d->peers[idx].info.site_id = site_id;
|
||||||
d->peers[idx].info.tcp_port = tcp_port;
|
d->peers[idx].info.tcp_port = tcp_port;
|
||||||
@@ -233,9 +236,9 @@ static void *receive_thread_fn(void *arg) {
|
|||||||
|
|
||||||
pthread_mutex_unlock(&d->peers_mutex);
|
pthread_mutex_unlock(&d->peers_mutex);
|
||||||
|
|
||||||
if (is_new) {
|
if (is_new || reannounce) {
|
||||||
/* announce ourselves immediately so the new peer learns about us
|
/* new peer, or peer restarted (site_id changed) — announce ourselves
|
||||||
* without waiting up to interval_ms */
|
* immediately so it learns about us without waiting up to interval_ms */
|
||||||
pthread_mutex_lock(&d->announce_mutex);
|
pthread_mutex_lock(&d->announce_mutex);
|
||||||
pthread_cond_signal(&d->announce_cond);
|
pthread_cond_signal(&d->announce_cond);
|
||||||
pthread_mutex_unlock(&d->announce_mutex);
|
pthread_mutex_unlock(&d->announce_mutex);
|
||||||
|
|||||||
Reference in New Issue
Block a user