Fix operational problems with persistently-deferring peers

author Ian Jackson <ijackson@chiark.greenend.org.uk>

Fri, 18 Oct 2024 16:21:08 +0000 (17:21 +0100)

committer Ian Jackson <ijackson@chiark.greenend.org.uk>

Fri, 18 Oct 2024 16:36:51 +0000 (17:36 +0100)
author Ian Jackson <ijackson@chiark.greenend.org.uk>
Fri, 18 Oct 2024 16:21:08 +0000 (17:21 +0100)
committer Ian Jackson <ijackson@chiark.greenend.org.uk>
Fri, 18 Oct 2024 16:36:51 +0000 (17:36 +0100)
diff --git a/duct.c b/duct.c

index bf8c8087842c986400cfd1598bf3191876f1b046..b76cdf8b3dbd49e7b10c2fc4aea1fb1738bdd973 100644 (file)
--- a/duct.c
+++ b/duct.c
@@ -47,6 +47,8 @@ int target_max_feedfile_size=100000;
  int period_seconds=30;
  int filepoll_seconds=5;
  int max_queue_per_ipf=-1;
+int defer_all_max_per_sec=10;
+int defer_all_burst_pause=10;
  
  int connection_setup_timeout=200;
  int inndcomm_flush_timeout=100;
@@ -359,6 +361,8 @@ static const Option innduct_options[]= {
  
  {0,"no-check-proportion",   "PERCENT",   &nocheck_thresh,       op_double   },
  {0,"no-check-response-time","ARTICLES",  &nocheck_decay,        op_double   },
+{0,"defer-all-max-per-sec", "ARTICLES",  &defer_all_max_per_sec, op_integer  },
+{0,"defer-all-burst-pause", "TIME",      &defer_all_burst_pause, op_seconds  },
  
  {0,"reconnect-interval",     "PERIOD", &reconnect_delay_periods,  op_seconds },
  {0,"flush-retry-interval",   "PERIOD", &flushfail_retry_periods,  op_seconds },
diff --git a/innduct.h b/innduct.h

index c85d057386bc3c5e413ed32be0939754195f928c..c24f8ecab76c3c6deeb4083539be74a4c56bf426 100644 (file)
--- a/innduct.h
+++ b/innduct.h
@@ -151,6 +151,7 @@ extern const char *inndconffile;
  extern int max_connections, max_queue_per_conn, target_max_feedfile_size;
  extern int period_seconds, filepoll_seconds, max_queue_per_ipf;
  extern int connection_setup_timeout, inndcomm_flush_timeout;
+extern int defer_all_max_per_sec, defer_all_burst_pause;
  
  extern double nocheck_thresh;
  extern double nocheck_decay;
diff --git a/recv.c b/recv.c

index 44587bf1fb954dca661bf9e1a0569b1f5b26b26a..1fb746a15399255f4e91fbbcd67211cbdc05b8bc 100644 (file)
--- a/recv.c
+++ b/recv.c
@@ -12,6 +12,8 @@
  
  /*========== handling responses from peer ==========*/
  
+static int defer_consecutive_count;
+
  const oop_rd_style peer_rd_style= {
    OOP_RD_DELIM_STRIP, '\n',
    OOP_RD_NUL_FORBID,
@@ -193,6 +195,7 @@ void *peer_rd_ok(oop_source *lp, oop_read *oread, oop_rd_event ev,
      code_streaming= (streaming);                               \
      GET_ARTICLE(musthavesent);                                 \
      article_done(art, RC_##how);                               \
+    defer_consecutive_count = 0;                                \
      goto dealtwith;                                            \
    }while(0)
  
@@ -241,6 +244,24 @@ void *peer_rd_ok(oop_source *lp, oop_read *oread, oop_rd_event ev,
    case 436: /* IHAVE says try later */
      GET_ARTICLE(0);
      article_defer(art, RC_deferred);
+
+    // Some implementations don't reject connections when they're expiring
+    // etc., but accept them and defer every article.  If such a situation
+    // persists, we will run through the entire backlog each retry interval
+    // getting the same answer to each.  We try to detect this.  If we only
+    // seem to be getting deferrals, we pause the whole feed.
+    defer_consecutive_count += 1;
+    if (defer_consecutive_count
+       > defer_all_max_per_sec * defer_all_burst_pause) {
+      warn("%d consecutive deferrals, pausing %ds; last response: %s",
+          defer_consecutive_count, defer_all_burst_pause, data);
+      defer_consecutive_count = 0;
+      // This causes the *whole program* including the entire event loop
+      // to freeze.  That is, surprisingly, what we want.  When we wake up
+      // hopefully things are better.  If not we will keep sleeping here,
+      // and making no progress - which is as expected.
+      sleep(defer_all_burst_pause);
+    }
      break;
  
    }
author	Ian Jackson <ijackson@chiark.greenend.org.uk>
	Fri, 18 Oct 2024 16:21:08 +0000 (17:21 +0100)
committer	Ian Jackson <ijackson@chiark.greenend.org.uk>
	Fri, 18 Oct 2024 16:36:51 +0000 (17:36 +0100)
duct.c		patch \| blob \| history
innduct.h		patch \| blob \| history
recv.c		patch \| blob \| history