chiark / gitweb /
hostside: multiplex: spot when restart is looping and abandon it
authorIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 9 Jan 2011 16:54:39 +0000 (16:54 +0000)
committerIan Jackson <ian@liberator.relativity.greenend.org.uk>
Sun, 9 Jan 2011 16:54:39 +0000 (16:54 +0000)
hostside/TODO
hostside/multiplex

index dec646e31210cceddbdd55eeb8caef79edfdec8f..89640cc91380ba58c8af9a923a71cd5552e69ea2 100644 (file)
@@ -1,5 +1,4 @@
 
-multiplex should have restart loop limiting
 why did removing +persist.* not fix it
 should expire dumps, eventually
 want realtime kernel for bessar
index 7a014dd1d67885875e8407f3d65a3d418d4738ef..4988c4445e31b4c0545dd171503cc9cdeddb466c 100755 (executable)
@@ -13,6 +13,7 @@
 #    $permissions         [list allow|super|deny $ipaddrhex $maskhex ...]
 #    $realtime            pipes
 #    $realtime_retry      [list $timeoutid $awaitedpongmsg|{} $buf]
+#    $realtime_last_retries        [list $now $now $now ... $now]
 #    $replay("$pri $key") $rhs     $pri is \d\d; causes replay of  "$key $rhs"
 #    $detect0($seg)       unset -> 1 or irrelevant; [after ...]
 #    $conns($conn)        1
 #    $realtime         any          unset      set       unset
 #    $realtime_retry   unset        {}         {}        [list ...]
 #
+# realtime_last_retries is unset at startup, and becomes unset when we
+# enter auto mode.  unset means in auto mode we have to set it.
+# It contains the last few startup times.
+#
 # replay priorities and messages:
 #    10 stastate
 #    40 warning realtime-failed
@@ -234,7 +239,7 @@ proc local/select-replay {conn args} {
 #---------- automatic realtime restart ----------
 
 proc global/!realtime {conn args} {
-    global realtime realtime_retry
+    global realtime realtime_retry realtime_last_retries
     nargs $args 1
     set how [lindex $args 0]
 
@@ -258,6 +263,7 @@ proc global/!realtime {conn args} {
        auto {
            realtime-retry-reset
            set realtime_retry {}
+           catch { unset realtime_last_retries }
        }
        stop - start - start-manual {
            realtime-retry-reset
@@ -291,9 +297,26 @@ proc realtime-retry-reset {} {
 
 proc realtime-retry-check {} {
     global realtime_retry realtime
+    global realtime_last_retries restart_min_mean_interval
     if {![info exists realtime_retry]} return
     if {[llength $realtime_retry]} return
     if {[info exists realtime]} return
+
+    if {![info exists realtime_last_retries]} {
+       set realtime_last_retries {0 0 0 0 0}
+    }
+    set oldest [lindex $realtime_last_retries 0]
+    set now [clock seconds]
+    if {$now - $oldest <
+       $restart_min_mean_interval * [llength $realtime_last_retries]} {
+       xmit-relevantly-savereplay 40 \
+           "warning realtime-failed" looping-disabled
+       unset realtime_retry
+       return
+    }
+    set realtime_last_retries [lrange $realtime_last_retries 1 end]
+    lappend realtime_last_retries $now
+
     realtime-start {}
 }
 
@@ -831,6 +854,7 @@ proc startup {} {
     setting testmode 0 {[01]}
     setting lputs 0 {[01]}
     setting dev_railway {} {/.*}
+    setting restart_min_mean_interval 5 {^\d+}
     parse-argv {}
     
     uplevel #0 source $libdir/multiplex-config