From: Ian Jackson Date: Tue, 10 Oct 2017 14:51:40 +0000 (+0100) Subject: probes: debugging, new report feature X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~webstump/git?p=modbot-mtm.git;a=commitdiff_plain;h=593412d7007f2eb7886b058b3aceee502d696836 probes: debugging, new report feature --- diff --git a/probes/modrelays-probe b/probes/modrelays-probe index 6c78c62..27c5545 100755 --- a/probes/modrelays-probe +++ b/probes/modrelays-probe @@ -1,10 +1,17 @@ #!/bin/bash -set -e +set -e$MODRELAYS_PROBE_SET_X -. settings +MODRELAYS=moderators.isc.org +PROBE_TIMEOUT=$(( 20 * 60 )) +PROBE_EXPIRE=$(( 32 * 86400 )) + +. ../global-settings +. ./settings id=$(date +%s)_$$ +statedir=probes/probes +lockfile=$statedir/.lock fail () { printf >&2 "%s\n" "modrelays-probe: error: $1" @@ -21,7 +28,7 @@ record-probing () { .*|*/*) fail "yikes, sanitisation bug!" ;; esac - td="probes/probes/$probeid" + td="$statedir/$probeid" mkdir -p $td } @@ -46,7 +53,7 @@ probe-addr () { record-probing "mx=$mx,addr=$addr" set +e - swaks --to "${GROUP//./-/}@$domain" \ + swaks --to "${GROUP//./-}@$domain" \ --server $addr \ --tls-optional-strict \ --header 'Subject: test modrelays probe test' \ @@ -64,8 +71,8 @@ probe-addr () { local rhs local prefix local expect_no_5xx='initial connection' - exec <$td/swaks.log - while read prefix rhs; do + exec 4<$td/swaks.log + while read <&4 prefix rhs; do case "$prefix" in '<'*) case "$rhs" in @@ -107,12 +114,12 @@ probe-domain () { case $rc in 0) # have a list of MX's - exec <$td/dns + exec 3<$td/dns local pref local mx local statustype local rhs - while read pref mx statustype statustypenum rhs; do + while read <&3 pref mx statustype statustypenum rhs; do case $statustypenum in 0) # have a list of relays @@ -161,9 +168,9 @@ probe-domain () { case $rc in 0) # have a list of A's (dealt with MXs above) - exec <$td/dns + exec 3<$td/dns local addr - while read addr; do + while read <&3 addr; do probe-addr 'NONE' $addr done record-success dns @@ -189,6 +196,129 @@ no_args () { esac } +acquire_lock () { + local lock_mode="$1" + if [ x"$WEBSTUMP_PROBE_LOCK" = x"$lockfile" ]; then return; fi + WEBSTUMP_PROBE_LOCK=$lockfile \ + exec with-lock-ex $lock_mode "$lockfile" "$0" "$mode" "$@" +} + +maybe-report () { + local outcome=$1 + + if $found_to_report; then return; fi + if ! [ -e "$attempt/$outcome" ]; then return; fi + found_to_report=true + + read <"$attempt/$outcome" message + + local reported + if [ -e "$attempt/reported" ]; then + read <"$attempt/reported" reported + fi + if [ "x$outcome" = "x$reported" ]; then return; fi + + if [ x"$outcome" = x"ok" ] && [ x"$reported" = x ]; then + echo ok >"$attempt/reported" + return + fi + + local info=${attempt##*/} + info=${info//,/ } + + delim=`od -N 50 -An -x -w50 "$email" <>"$email" <>"$email" <>"$email" <>"$email" <>"$email" <"$log" + echo >>"$email" + done + + cat >>"$email" <"$attempt"/reported +} + +mode_report () { + acquire_lock -w "$@" + + local attempt + for attempt in $statedir/*; do + + local now=$(date +%s) + local age=$(stat -c %Y "$attempt") + age=$(( $now - $age )) + + local found_to_report=false + maybe-report ok + maybe-report permfail + maybe-report tempfail + + if ! [ -e $attempt/reported ] && \ + [ $age -gt $PROBE_TIMEOUT ]; then + echo >"$attempt"/timeout \ + "Message did not arrive after ${PROBE_TIMEOUT}s" + fi + + maybe-report timeout + + if [ -e $attempt/reported ] && \ + [ $age -gt $PROBE_EXPIRE ]; then + rm -rf "$attempt" + fi + done +} + mode_all () { no_args $# for domain in $MODRELAYS; do