chiark / gitweb /
Improved error handling: autorestart realtime, logging, etc.
authorian <ian>
Sun, 8 Jun 2008 17:57:07 +0000 (17:57 +0000)
committerian <ian>
Sun, 8 Jun 2008 17:57:07 +0000 (17:57 +0000)
hostside/Makefile
hostside/README.commands
hostside/gui
hostside/multiplex
hostside/multiplex-config
hostside/startup.c
hostside/stastate.h.gen

index f3dc3f1b929caf85ee62d3612026c2a3f29ea55d..9e60d67292219f9a7a397ccb680d40fcd77b5974 100644 (file)
@@ -93,7 +93,7 @@ input-codes.h:        extract-input-codes $(INPUT_H)
                ./$^ $o
 
 tidy:
-               rm -f ./+dump.*
+               rm -rf ./+dump.*
 
 clean:         tidy
                rm -f *.o *.d $(TARGETS) selectors.h
index 052e823470dc123a06290c4b9e45ef5ecc5cd5dd..80a7f66ec1d87852a685ae3ef834c99305f10e56 100644 (file)
@@ -168,7 +168,12 @@ MULTIPLEXER-IMPLEMENTED FUNCTIONALITY AFFECTING WHOLE SYSTEM
  C> [!]<command> <args>...
  O< ?nak|executing...ack...  as above
 
- C> !realtime kill|start|start-manual
+ C> !realtime auto              set automatic restarting; starts if applicable
+ C> !realtime kill              kill current instance, if auto awaits off
+ C> !realtime restart           kill and restart
+ C> !realtime stop              } set manual mode, stop
+ C> !realtime start             } set manual mode, start as specified,
+ C> !realtime start-manual      }   restarting if already running
  C> !save-dump
 
 
index 3ba7c88315a0781bdc048108b8bdaf7424d5567f..663da155e252f1eaaab2e515718f213b6e74c81c 100755 (executable)
@@ -9,10 +9,7 @@ tk_setPalette background black foreground white
 source lib.tcl
 
 proc pagew {page} { return ".picture-$page" }
-
 proc debug {m} { puts $m }
-proc warning {m} { puts stderr $m }
-proc fixme {args} { puts stderr "####FIXME##### $args #####FIXME#####" }
 
 proc sconn {m} {
     global sconn
@@ -703,6 +700,10 @@ register-event ?resolution_inexplicable {message} \
     report-problem "resolution: $message"
 }
 
+register-event ?warning {message} {^.warning (\S+ .*)$} {
+    report-problem "warning: $message"
+}
+
 register-event {} {} {^=connected } {
     global pages gui_pipe server port event_selections
 
index bc8beb412090bceb91d72e50c54ee94fc6681aa1..814341639f4cbfc5d83ea72f3cdb202a51799562 100755 (executable)
@@ -12,6 +12,7 @@
 #    $master              socket
 #    $permissions         [list allow|super|deny $ipaddrhex $maskhex ...]
 #    $realtime            pipes
+#    $realtime_retry      [list $serialchan $timeoutid $awaitedpongmsg|{} $buf]
 #    $replay("$pri $key") $rhs     $pri is \d\d; causes replay of  "$key $rhs"
 #    $detect0($seg)       unset -> 1 or irrelevant; [after ...]
 #    $conns($conn)        1
 #    $currentconn    unset    $conn or {}    $conn or {}  $conn         unset
 #    $executing      0        0              1            1             0
 #
+# globals relating to realtime start/stop:
+#                     manual      ready to   running   awaiting off
+#    $realtime         any          unset      set       unset
+#    $realtime_retry   unset        {}         {}        [list ...]
+#
 # replay priorities and messages:
 #    10 stastate
 #    40 warning realtime-failed
@@ -101,7 +107,7 @@ proc xmit-relevantly {m} {
 proc xmit-relevantly-savereplay {pri key rhs} {
     set pk "$pri $key"
     upvar #0 replay($pk) rep
-    append rep $rhs
+    set rep $rhs
     xmit-relevantly "$key $rhs"
 }
 
@@ -210,14 +216,124 @@ proc local/select-replay {conn args} {
     do-replay $conn
 }
 
+#---------- automatic realtime restart ----------
+
 proc global/!realtime {conn args} {
+    global realtime realtime_retry
     nargs $args 1
-    switch -exact [lindex $args 0] {
-       kill { realtime-failed killed "termination requested by command" }
-       start { realtime-start {} }
-       start-manual { realtime-start -m }
-       default { cmderr BadCmd "unknown !realtime subcommand" }
+    set how [lindex $args 0]
+
+    # perhaps kill the running instance
+    # this switch also checks the argument
+    switch -exact -- $how {
+       kill - stop - restart - start - start-manual {
+           if {[info exists realtime]} {
+               realtime-failed killed "termination requested by command"
+           }
+       }
+       auto {
+       }
+       default {
+           cmderr BadCmd "unknown !realtime subcommand"
+       }
     }
+
+    # set the operating mode
+    switch -exact -- $how {
+       auto {
+           realtime-retry-reset
+           set realtime_retry {}
+       }
+       stop - start - start-manual {
+           realtime-retry-reset
+           catch { unset realtime_retry }
+       }
+    }
+
+    # (re)start if applicable
+    switch -exact -- $how {
+       start - restart {
+           realtime-start {}
+       }
+       start-manual {
+           realtime-start -m
+       }
+       default {
+           after idle realtime-retry-check
+       }
+    }
+}
+
+proc realtime-retry-reset {} {
+    global realtime_retry
+    if {![info exists realtime_retry]} return
+    if {![llength $realtime_retry]} return
+    manyset $realtime_retry serchan timeoutid
+    catch { close $serchan }
+    after cancel $timeoutid
+    set realtime_retry {}
+}
+
+proc realtime-retry-check {} {
+    global realtime_retry
+    if {![info exists realtime_retry]} return
+    if {[llength $realtime_retry]} return
+    if {[info exists realtime]} return
+    realtime-start {}
+}
+
+proc realtime-retry-realtime-failed {} {
+    global realtime_retry dev_railway
+    if {![info exists realtime_retry]} return
+    if {[llength $realtime_retry]} { error "huh? $realtime_retry" }
+    set serchan [open $dev_railway r+]
+    fconfigure $serchan -translation binary -buffering none -blocking 0
+    fileevent $serchan readable realtime-retry-serchan-readable
+    set realtime_retry [list $serchan x {} {}]
+    realtime-retry-send-ping
+}
+
+proc realtime-retry-send-ping {} {
+    global realtime_retry urandom
+    manyset $realtime_retry serchan after pong buf
+    set x [read $urandom 2]
+    binary scan $x H* x
+    if {[string length $x] != 4} { error "urandom short read `$x'" }
+    set x [expr "0x$x & 0x077f"]
+    set ping [format %04x [expr {0x8800 ^ $x}]]
+    set pong [format %04x [expr {0x885a ^ $x}]]
+    puts "<! picioh out await-off $pong  (await $pong)"
+    puts -nonewline $serchan [binary format H* $ping]
+    set after [after 700 realtime-retry-now-off]
+    set realtime_retry [list $serchan $after $pong {}]
+}
+
+proc realtime-retry-serchan-readable {} {
+    global realtime_retry
+    manyset $realtime_retry serchan after pong buf
+    set x [read $serchan]
+    binary scan $x H* x
+    puts "<! picioh in await-off $x"
+    if {![string length $x] && [eof $serchan]} { error "eof on serial port" }
+    append buf $x
+    while {[regexp {^((?:[89a-f].)*[0-7].)(.*)$} $buf dummy msg buf]} {
+       if {![string compare 09 $msg]} {
+           realtime-retry-now-off
+           return
+       }
+       if {![string compare $pong $msg]} {
+           after cancel $after
+           set after [after 200 realtime-retry-send-ping]
+           set realtime_retry [list $serchan $after {} {}]
+           return
+       }
+    }
+    set realtime_retry [list $serchan $after $pong $buf]
+}
+
+proc realtime-retry-now-off {} {
+    realtime-retry-reset
+    realtime-retry-check
 }
 
 #---------- connection and commands ----------
@@ -464,14 +580,14 @@ proc realtime-failed {k m} {
        set sef [open +realtime.stderr r]
        while {[gets $sef l] >= 0} {
            xmit-relevantly-savereplay 40 \
-                   "warning realtime-failed" "stderr : $l"
+                   "warning realtime-failed stderr" ": $l"
        }
        close $sef
        unset sef
     } emsg]} {
        if {![string match {POSIX ENOENT *} $errorCode]} {
            xmit-relevantly-savereplay 40 \
-                   "warning realtime-failed" "stderr-unreadable : $emsg"
+                   "warning realtime-failed stderr" "unreadable : $emsg"
        }
        catch { close $sef }
     }
@@ -486,22 +602,23 @@ proc realtime-failed {k m} {
        unset currentcmd
        unset currentconn
     }
-    realtime-notrunning-init
+    realtime-notrunning
+    realtime-retry-realtime-failed
 }
 
-proc realtime-notrunning-init {} {
-    xmit-relevantly-savereplay 10 stastate -
+proc realtime-notrunning {} {
+    xmit-relevantly-savereplay 10 stastate Crashed
 }
 
 proc realtime-start {xopts} {
-    global realtime records
-    if {[info exists realtime]} {
-       cmderr InvalidState "realtime already running"
-    }
+    global realtime records realtime_xopts dev_railway
+    if {[info exists realtime]} { error "realtime already running" }
+    realtime-retry-reset
     set cl [concat \
-           [list 2> +realtime.stderr ./realtime -v2] \
-           $xopts $records]
-    savereplay-clear "40 warning realtime-failed"
+           [list 2> +realtime.stderr ./realtime -v2 -s$dev_railway] \
+           $realtime_xopts $xopts $records]
+    puts "<> $cl"
+    savereplay-clear-re "40 warning realtime-failed"
     if {[catch {
        set realtime [open |$cl r+]
        fconfigure $realtime -translation binary -buffering line -blocking 0
@@ -687,7 +804,7 @@ proc binding {addr blist} {
 }
 
 proc startup {} {
-    global queueing executing testmode realtime port realtime_xopts
+    global queueing executing testmode realtime port urandom
     catch { close $master }; catch { unset master }
 
     setting testmode 0 {[01]}
@@ -696,10 +813,12 @@ proc startup {} {
     uplevel #0 source multiplex-config
     set queueing {}
     set executing 0
-    realtime-notrunning-init
+    set urandom [open /dev/urandom r]
+    fconfigure $urandom -buffering none -translation binary
+    realtime-notrunning
     if {!$testmode} {
        start_commandloop
-       realtime-start $realtime_xopts
+       after idle realtime-retry-check
     } else {
        fconfig-trainproto stdin
        fconfig-trainproto stdout
index 5a5ec922d17a203c72ccd9e87f3ad43a58955033..7a45a8267a47f7c156d87a4b87f0ff63eb35ff60 100644 (file)
@@ -16,3 +16,5 @@ set records {
 }
 set detectlag 100
 set realtime_xopts {}
+set dev_railway /dev/railway
+set realtime_retry {} ;# unset would mean don't
index 2d38116af3b18ec7bb4c15a6599f7af2cef55138..853700c7ac1dee375d824e201ae6067725a5f95f 100644 (file)
@@ -114,6 +114,7 @@ static void sta_goto(StartupState new_state) {
   case Sta_Resolving:  sta_toev.duration=   500;   break;
   case Sta_Finalising:                             break;
   case Sta_Run:                                    break;
+  case Sta_Crashed: abort();
   }
 
   if (new_state < Sta_Run)
@@ -146,6 +147,7 @@ static void sta_goto(StartupState new_state) {
       persist_install();
     retransmit_start();
     break;
+  case Sta_Crashed: abort();
   }
   if (piob.l) serial_transmit(&piob);
 
@@ -324,6 +326,7 @@ void on_pic_detect1(const PicInsnInfo *pii, const PicInsn *pi, int objnum) {
   case Sta_Run:
     safety_notify_detection(seg);
     break;
+  case Sta_Crashed: abort();
   }
 }
 
index d05ce1c95aebb6739abf833ae2fa5dc98cdf772b..373269bbadde3d6408acbffc9e915b73ece1c497 100755 (executable)
@@ -3,6 +3,7 @@
 set -e
 Sta () { l="$l $1"; }
 
+  Sta Crashed
   Sta Flush
   Sta Off
   Sta Manual