From: ian Date: Sun, 8 Jun 2008 17:57:07 +0000 (+0000) Subject: Improved error handling: autorestart realtime, logging, etc. X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ijackson/git?a=commitdiff_plain;h=12551eb9ccd1621902caa423bd9c0ffbfe704a01;p=trains.git Improved error handling: autorestart realtime, logging, etc. --- diff --git a/hostside/Makefile b/hostside/Makefile index f3dc3f1..9e60d67 100644 --- a/hostside/Makefile +++ b/hostside/Makefile @@ -93,7 +93,7 @@ input-codes.h: extract-input-codes $(INPUT_H) ./$^ $o tidy: - rm -f ./+dump.* + rm -rf ./+dump.* clean: tidy rm -f *.o *.d $(TARGETS) selectors.h diff --git a/hostside/README.commands b/hostside/README.commands index 052e823..80a7f66 100644 --- a/hostside/README.commands +++ b/hostside/README.commands @@ -168,7 +168,12 @@ MULTIPLEXER-IMPLEMENTED FUNCTIONALITY AFFECTING WHOLE SYSTEM C> [!] ... O< ?nak|executing...ack... as above - C> !realtime kill|start|start-manual + C> !realtime auto set automatic restarting; starts if applicable + C> !realtime kill kill current instance, if auto awaits off + C> !realtime restart kill and restart + C> !realtime stop } set manual mode, stop + C> !realtime start } set manual mode, start as specified, + C> !realtime start-manual } restarting if already running C> !save-dump diff --git a/hostside/gui b/hostside/gui index 3ba7c88..663da15 100755 --- a/hostside/gui +++ b/hostside/gui @@ -9,10 +9,7 @@ tk_setPalette background black foreground white source lib.tcl proc pagew {page} { return ".picture-$page" } - proc debug {m} { puts $m } -proc warning {m} { puts stderr $m } -proc fixme {args} { puts stderr "####FIXME##### $args #####FIXME#####" } proc sconn {m} { global sconn @@ -703,6 +700,10 @@ register-event ?resolution_inexplicable {message} \ report-problem "resolution: $message" } +register-event ?warning {message} {^.warning (\S+ .*)$} { + report-problem "warning: $message" +} + register-event {} {} {^=connected } { global pages gui_pipe server port event_selections diff --git a/hostside/multiplex b/hostside/multiplex index bc8beb4..8143416 100755 --- a/hostside/multiplex +++ b/hostside/multiplex @@ -12,6 +12,7 @@ # $master socket # $permissions [list allow|super|deny $ipaddrhex $maskhex ...] # $realtime pipes +# $realtime_retry [list $serialchan $timeoutid $awaitedpongmsg|{} $buf] # $replay("$pri $key") $rhs $pri is \d\d; causes replay of "$key $rhs" # $detect0($seg) unset -> 1 or irrelevant; [after ...] # $conns($conn) 1 @@ -31,6 +32,11 @@ # $currentconn unset $conn or {} $conn or {} $conn unset # $executing 0 0 1 1 0 # +# globals relating to realtime start/stop: +# manual ready to running awaiting off +# $realtime any unset set unset +# $realtime_retry unset {} {} [list ...] +# # replay priorities and messages: # 10 stastate # 40 warning realtime-failed @@ -101,7 +107,7 @@ proc xmit-relevantly {m} { proc xmit-relevantly-savereplay {pri key rhs} { set pk "$pri $key" upvar #0 replay($pk) rep - append rep $rhs + set rep $rhs xmit-relevantly "$key $rhs" } @@ -210,14 +216,124 @@ proc local/select-replay {conn args} { do-replay $conn } +#---------- automatic realtime restart ---------- + proc global/!realtime {conn args} { + global realtime realtime_retry nargs $args 1 - switch -exact [lindex $args 0] { - kill { realtime-failed killed "termination requested by command" } - start { realtime-start {} } - start-manual { realtime-start -m } - default { cmderr BadCmd "unknown !realtime subcommand" } + set how [lindex $args 0] + + # perhaps kill the running instance + # this switch also checks the argument + switch -exact -- $how { + kill - stop - restart - start - start-manual { + if {[info exists realtime]} { + realtime-failed killed "termination requested by command" + } + } + auto { + } + default { + cmderr BadCmd "unknown !realtime subcommand" + } } + + # set the operating mode + switch -exact -- $how { + auto { + realtime-retry-reset + set realtime_retry {} + } + stop - start - start-manual { + realtime-retry-reset + catch { unset realtime_retry } + } + } + + # (re)start if applicable + switch -exact -- $how { + start - restart { + realtime-start {} + } + start-manual { + realtime-start -m + } + default { + after idle realtime-retry-check + } + } +} + +proc realtime-retry-reset {} { + global realtime_retry + if {![info exists realtime_retry]} return + if {![llength $realtime_retry]} return + manyset $realtime_retry serchan timeoutid + catch { close $serchan } + after cancel $timeoutid + set realtime_retry {} +} + +proc realtime-retry-check {} { + global realtime_retry + if {![info exists realtime_retry]} return + if {[llength $realtime_retry]} return + if {[info exists realtime]} return + realtime-start {} +} + +proc realtime-retry-realtime-failed {} { + global realtime_retry dev_railway + if {![info exists realtime_retry]} return + if {[llength $realtime_retry]} { error "huh? $realtime_retry" } + set serchan [open $dev_railway r+] + fconfigure $serchan -translation binary -buffering none -blocking 0 + fileevent $serchan readable realtime-retry-serchan-readable + set realtime_retry [list $serchan x {} {}] + realtime-retry-send-ping +} + +proc realtime-retry-send-ping {} { + global realtime_retry urandom + manyset $realtime_retry serchan after pong buf + set x [read $urandom 2] + binary scan $x H* x + if {[string length $x] != 4} { error "urandom short read `$x'" } + set x [expr "0x$x & 0x077f"] + set ping [format %04x [expr {0x8800 ^ $x}]] + set pong [format %04x [expr {0x885a ^ $x}]] + puts "= 0} { xmit-relevantly-savereplay 40 \ - "warning realtime-failed" "stderr : $l" + "warning realtime-failed stderr" ": $l" } close $sef unset sef } emsg]} { if {![string match {POSIX ENOENT *} $errorCode]} { xmit-relevantly-savereplay 40 \ - "warning realtime-failed" "stderr-unreadable : $emsg" + "warning realtime-failed stderr" "unreadable : $emsg" } catch { close $sef } } @@ -486,22 +602,23 @@ proc realtime-failed {k m} { unset currentcmd unset currentconn } - realtime-notrunning-init + realtime-notrunning + realtime-retry-realtime-failed } -proc realtime-notrunning-init {} { - xmit-relevantly-savereplay 10 stastate - +proc realtime-notrunning {} { + xmit-relevantly-savereplay 10 stastate Crashed } proc realtime-start {xopts} { - global realtime records - if {[info exists realtime]} { - cmderr InvalidState "realtime already running" - } + global realtime records realtime_xopts dev_railway + if {[info exists realtime]} { error "realtime already running" } + realtime-retry-reset set cl [concat \ - [list 2> +realtime.stderr ./realtime -v2] \ - $xopts $records] - savereplay-clear "40 warning realtime-failed" + [list 2> +realtime.stderr ./realtime -v2 -s$dev_railway] \ + $realtime_xopts $xopts $records] + puts "<> $cl" + savereplay-clear-re "40 warning realtime-failed" if {[catch { set realtime [open |$cl r+] fconfigure $realtime -translation binary -buffering line -blocking 0 @@ -687,7 +804,7 @@ proc binding {addr blist} { } proc startup {} { - global queueing executing testmode realtime port realtime_xopts + global queueing executing testmode realtime port urandom catch { close $master }; catch { unset master } setting testmode 0 {[01]} @@ -696,10 +813,12 @@ proc startup {} { uplevel #0 source multiplex-config set queueing {} set executing 0 - realtime-notrunning-init + set urandom [open /dev/urandom r] + fconfigure $urandom -buffering none -translation binary + realtime-notrunning if {!$testmode} { start_commandloop - realtime-start $realtime_xopts + after idle realtime-retry-check } else { fconfig-trainproto stdin fconfig-trainproto stdout diff --git a/hostside/multiplex-config b/hostside/multiplex-config index 5a5ec92..7a45a82 100644 --- a/hostside/multiplex-config +++ b/hostside/multiplex-config @@ -16,3 +16,5 @@ set records { } set detectlag 100 set realtime_xopts {} +set dev_railway /dev/railway +set realtime_retry {} ;# unset would mean don't diff --git a/hostside/startup.c b/hostside/startup.c index 2d38116..853700c 100644 --- a/hostside/startup.c +++ b/hostside/startup.c @@ -114,6 +114,7 @@ static void sta_goto(StartupState new_state) { case Sta_Resolving: sta_toev.duration= 500; break; case Sta_Finalising: break; case Sta_Run: break; + case Sta_Crashed: abort(); } if (new_state < Sta_Run) @@ -146,6 +147,7 @@ static void sta_goto(StartupState new_state) { persist_install(); retransmit_start(); break; + case Sta_Crashed: abort(); } if (piob.l) serial_transmit(&piob); @@ -324,6 +326,7 @@ void on_pic_detect1(const PicInsnInfo *pii, const PicInsn *pi, int objnum) { case Sta_Run: safety_notify_detection(seg); break; + case Sta_Crashed: abort(); } } diff --git a/hostside/stastate.h.gen b/hostside/stastate.h.gen index d05ce1c..373269b 100755 --- a/hostside/stastate.h.gen +++ b/hostside/stastate.h.gen @@ -3,6 +3,7 @@ set -e Sta () { l="$l $1"; } + Sta Crashed Sta Flush Sta Off Sta Manual