From b9dedfa6e0494bbeba9f954ed91fdd5c9fb2570e Mon Sep 17 00:00:00 2001 Message-Id: From: Mark Wooding Date: Fri, 8 Sep 2017 09:51:31 +0100 Subject: [PATCH] svc/connect.in: Report statistics about adopted peers. Organization: Straylight/Edgeware From: Mark Wooding These are thrown into the `info' output along with the configuration. --- svc/connect.8.in | 63 +++++++++++++++++++++++++++++++++++++++++++++++- svc/connect.in | 46 +++++++++++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 3 deletions(-) diff --git a/svc/connect.8.in b/svc/connect.8.in index 7112fb0c..0d951bf7 100644 --- a/svc/connect.8.in +++ b/svc/connect.8.in @@ -523,7 +523,7 @@ provided about each peer, in the form of subsequent tokens. Clients should be prepared to ignore such tokens.) .SP .BI "info " peer -Lists the database record for the named +Lists the database record and additional information about the named .IR peer . For each key/value pair, a line .RS @@ -532,6 +532,67 @@ For each key/value pair, a line .IB key = value .PP is output. The key/value pairs are output in an arbitrary order. +.PP +In addition to the fields of the peer's database record, the following +additional keys are defined. +.TP +.B failures +The number of failed pings in the current or most recent batch, in +decimal. +.TP +.B last-ping +The round-trip time of the most recent ping in milliseconds, in the form +.IB nn.n ms\fR, +or +.B timeout +if the most recent ping timed out, +or +.B \- +if no pings have yet completed. +.TP +.B max-ping +The maximum successful ping time so far in milliseconds, in the form +.IB nn.n ms\fR, +or +.B \- +if no pings have yet succeeded. +.TP +.B mean-ping +The average successful ping time so far in milliseconds, in the form +.IB nn.n ms\fR, +or +.B \- +if no pings have yet succeeded. +.TP +.B min-ping +The minimum successful ping time so far in milliseconds, in the form +.IB nn.n ms\fR, +or +.B \- +if no pings have yet succeeded. +.TP +.B n-lost +The number of pings which have been declared timed out so far, in +decimal. +.TP +.B n-ping +The number of successful pings so far, in decimal. +.TP +.B sd-ping +The standard deviation of ping times so far in milliseconds, in the form +.IB nn.n ms\fR, +or +.B \- +if no pings have yet succeeded. +.TP +.B state +One of the strings: +.B idle +if the peer has responded to a ping recently, and we are waiting for the +.B every +delay before we try again; or +.B check +if we are currently waiting for a ping to return. .RE .SP .BI "kick " peer diff --git a/svc/connect.in b/svc/connect.in index 3e9fa0a0..e8981411 100644 --- a/svc/connect.in +++ b/svc/connect.in @@ -33,6 +33,7 @@ import tripe as T import os as OS import signal as SIG import errno as E +from math import sqrt import cdb as CDB import mLib as M import re as RX @@ -395,6 +396,12 @@ class PingPeer (object): me.seq = _pingseq _pingseq += 1 me._failures = 0 + me._last = '-' + me._nping = 0 + me._nlost = 0 + me._sigma_t = 0 + me._sigma_t2 = 0 + me._min = me._max = '-' if pingnow: me._timer = None me._ping() @@ -464,18 +471,47 @@ class PingPeer (object): if stuff[0] == 'ping-ok': if me._failures > 0: S.warn('connect', 'ping-ok', me._peer) + t = float(stuff[1]) + me._last = '%.1fms' % t + me._sigma_t += t + me._sigma_t2 += t*t + me._nping += 1 + if me._min == '-' or t < me._min: me._min = t + if me._max == '-' or t > me._max: me._max = t me._timer = M.SelTimer(time() + me._every, me._time) elif stuff[0] == 'ping-timeout': me._failures += 1 + me._nlost += 1 S.warn('connect', 'ping-timeout', me._peer, 'attempt', str(me._failures), 'of', str(me._retries)) if me._failures < me._retries: me._ping() + me._last = 'timeout' else: me._reconnect() elif stuff[0] == 'ping-peer-died': me._pinger.kill(me._peer) + def info(me): + if not me._nping: + mean = sd = '-' + else: + mean = me._sigma_t/me._nping + sd = sqrt(me._sigma_t2/me._nping - mean*mean) + n = me._nping + me._nlost + if not n: pclost = '-' + else: pclost = '%d' % ((100*me._nlost + n//2)//n) + return { 'last-ping': me._last, + 'mean-ping': '%.1fms' % mean, + 'sd-ping': '%.1fms' % sd, + 'n-ping': '%d' % me._nping, + 'n-lost': '%d' % me._nlost, + 'percent-lost': pclost, + 'min-ping': '%.1fms' % me._min, + 'max-ping': '%.1fms' % me._max, + 'state': me._timer and 'idle' or 'check', + 'failures': me._failures } + @T._callback def _time(me): """ @@ -759,10 +795,16 @@ def cmd_info(name): """ try: peer = Peer(name) except KeyError: raise T.TripeJobError('unknown-peer', name) - items = list(peer.list()) + d = {} + try: pp = pinger.find(name) + except KeyError: pass + else: d.update(pp.info()) + items = list(peer.list()) + d.keys() items.sort() for i in items: - T.svcinfo('%s=%s' % (i, peer.get(i))) + try: v = d[i] + except KeyError: v = peer.get(i) + T.svcinfo('%s=%s' % (i, v)) def cmd_userpeer(user): """ -- [mdw]