chiark / gitweb /
probes: debugging, new report feature
authorIan Jackson <ijackson@chiark.greenend.org.uk>
Tue, 10 Oct 2017 14:51:40 +0000 (15:51 +0100)
committerIan Jackson <ijackson@chiark.greenend.org.uk>
Tue, 10 Oct 2017 14:51:40 +0000 (15:51 +0100)
probes/modrelays-probe

index 6c78c62..27c5545 100755 (executable)
@@ -1,10 +1,17 @@
 #!/bin/bash
 
-set -e
+set -e$MODRELAYS_PROBE_SET_X
 
-. settings
+MODRELAYS=moderators.isc.org
+PROBE_TIMEOUT=$(( 20 * 60 ))
+PROBE_EXPIRE=$(( 32 * 86400 ))
+
+. ../global-settings
+. ./settings
 
 id=$(date +%s)_$$
+statedir=probes/probes
+lockfile=$statedir/.lock
 
 fail () {
        printf >&2 "%s\n" "modrelays-probe: error: $1"
@@ -21,7 +28,7 @@ record-probing () {
        .*|*/*) fail "yikes, sanitisation bug!" ;;
        esac
 
-       td="probes/probes/$probeid"
+       td="$statedir/$probeid"
        mkdir -p $td
 }
 
@@ -46,7 +53,7 @@ probe-addr () {
        record-probing "mx=$mx,addr=$addr"
 
        set +e
-       swaks   --to "${GROUP//./-/}@$domain" \
+       swaks   --to "${GROUP//./-}@$domain" \
                --server $addr \
                --tls-optional-strict \
                --header 'Subject: test modrelays probe test' \
@@ -64,8 +71,8 @@ probe-addr () {
        local rhs
        local prefix
        local expect_no_5xx='initial connection'
-       exec <$td/swaks.log
-       while read prefix rhs; do
+       exec 4<$td/swaks.log
+       while read <&4 prefix rhs; do
                case "$prefix" in
                '<'*)
                        case "$rhs" in
@@ -107,12 +114,12 @@ probe-domain () {
        case $rc in
        0)
                # have a list of MX's
-               exec <$td/dns
+               exec 3<$td/dns
                local pref
                local mx
                local statustype
                local rhs
-               while read pref mx statustype statustypenum rhs; do
+               while read <&3 pref mx statustype statustypenum rhs; do
                        case $statustypenum in
                        0)
                                # have a list of relays
@@ -161,9 +168,9 @@ probe-domain () {
        case $rc in
        0)
                # have a list of A's (dealt with MXs above)
-               exec <$td/dns
+               exec 3<$td/dns
                local addr
-               while read addr; do
+               while read <&3 addr; do
                        probe-addr 'NONE' $addr
                done
                record-success dns
@@ -189,6 +196,129 @@ no_args () {
        esac
 }
 
+acquire_lock () {
+       local lock_mode="$1"
+       if [ x"$WEBSTUMP_PROBE_LOCK" = x"$lockfile" ]; then return; fi
+       WEBSTUMP_PROBE_LOCK=$lockfile \
+       exec with-lock-ex $lock_mode "$lockfile" "$0" "$mode" "$@"
+}
+
+maybe-report () {
+       local outcome=$1
+
+       if $found_to_report; then return; fi
+       if ! [ -e "$attempt/$outcome" ]; then return; fi
+       found_to_report=true
+
+       read <"$attempt/$outcome" message
+
+       local reported
+       if [ -e "$attempt/reported" ]; then
+               read <"$attempt/reported" reported
+       fi
+       if [ "x$outcome" = "x$reported" ]; then return; fi
+
+       if [ x"$outcome" = x"ok" ] && [ x"$reported" = x ]; then
+               echo ok >"$attempt/reported"
+               return
+       fi
+
+       local info=${attempt##*/}
+       info=${info//,/ }
+
+       delim=`od -N 50 -An -x -w50 </dev/urandom`
+       delim=${delim// /}
+
+       local email="$attempt/.report.$outcome"
+       cat >"$email" <<END
+To: $ADMIN
+Subject: mod relay probe $outcome $info
+Content-Type: multipart/mixed; boundary="$delim"
+MIME-Version: 1.0
+
+--$delim
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: 7bit
+
+The moderation relay probe
+  $info
+resulted in the outcome
+  $outcome
+END
+       if [ "x$message" != x ]; then
+               cat >>"$email" <<END
+with the message
+  $message
+END
+       fi
+
+       if [ "x$reported" != x ]; then
+               cat >>"$email" <<END
+This is even though previously the outcome seemed to be
+  $reported
+and this was reported previously.
+END
+       fi
+
+       cat >>"$email" <<END
+
+Logs are in
+  $attempt
+and concatenated to this email.
+
+END
+
+       local log
+       for log in "$attempt"/*; do
+               cat >>"$email" <<END
+--$delim
+Content-Type: text/plain; charset="utf-8"
+Content-Disposition: inline; filename="${log##*/}"
+Content-Transfer-Encoding: 8bit
+
+END
+               cat >>"$email" <"$log"
+               echo >>"$email"
+       done
+
+       cat >>"$email" <<END
+--$delim--
+END
+
+       /usr/sbin/sendmail -odb -oem -oee -t <"$email"
+       echo "$outcome" >"$attempt"/reported
+}
+
+mode_report () {
+       acquire_lock -w "$@"
+
+       local attempt
+       for attempt in $statedir/*; do
+
+               local now=$(date +%s)
+               local age=$(stat -c %Y "$attempt")
+               age=$(( $now - $age ))
+
+               local found_to_report=false
+               maybe-report ok
+               maybe-report permfail
+               maybe-report tempfail
+
+               if ! [ -e $attempt/reported ] && \
+                    [ $age -gt $PROBE_TIMEOUT ]; then
+                       echo >"$attempt"/timeout \
+       "Message did not arrive after ${PROBE_TIMEOUT}s"
+               fi
+
+               maybe-report timeout
+
+               if [ -e $attempt/reported ] && \
+                  [ $age -gt $PROBE_EXPIRE ]; then
+                       rm -rf "$attempt"
+               fi
+       done
+}
+
 mode_all () {
        no_args $#
        for domain in $MODRELAYS; do