From: Matthew Vernon Date: Fri, 4 Jun 2010 15:35:46 +0000 (+0100) Subject: revised url-tracking now ready to go (still commented out, though) X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~matthewv/git?a=commitdiff_plain;h=c0b84406e088cfa837f29e7ea44e3ee87c7a158e;p=irc.git revised url-tracking now ready to go (still commented out, though) --- diff --git a/Servus-chiark.py b/Servus-chiark.py index 3fcdc01..37cccf9 100644 --- a/Servus-chiark.py +++ b/Servus-chiark.py @@ -141,7 +141,12 @@ class game: losetime=time.time()+300000 # initialise the urldb on startup -c.urldb={} +urldb={} +lastexp=time.time() +#expire urls if not asked about or seen for >71 hours +expirelen=71*60*60 +#do an expiry run every hour +expirevery=60*60 # Command processing: whenever something is said that the bot can hear, # "command" is invoked and must decide what to do. This configuration @@ -163,7 +168,7 @@ commands = {"karma": (c.karmaq,karmadb), "die": quit, "define": c.defineq, "google": c.googleq, -# "url": urlq, +# "url": (c.urlq,urldb), "units": c.unitq, "help": c.helpq, # "game": (c.gameq,game), @@ -189,9 +194,14 @@ def command(bot, cmd, nick, conn, public): except IndexError: command="" + t=time.time() + if t - lastexp > expirevery: + c.urlexpire(urldb,expirelen) + lastexp=t + # if public: # if c.urlre.search(cmd): -# c.dourl(bot,conn,nick,cmd) +# c.dourl(bot,conn,nick,cmd,urldb) # karma: up if command.endswith("++"): diff --git a/commands.py b/commands.py index eac7bb7..8dd8e59 100644 --- a/commands.py +++ b/commands.py @@ -307,10 +307,29 @@ def nicetime(tempus): tm="%d hours ago"%int(tempus/3600) return tm +### class to store URL data +class UrlLog: + "contains meta-data about a URL seen on-channel" + def __init__(self,url,nick): + self.nick=nick + self.url=url + self.first=time.time() + self.count=1 + self.lastseen=time.time() + self.lastasked=time.time() + def recenttime(self): + return max(self.lastseen,self.lastasked) + def firstmen(self): + return nicetime(time.time()-self.first) + def urltype(self): + z=max(len(urlcomplaints), self.count-1) + return urlcomplaints(z) + urlre = re.compile("(https?://[^ ]+)( |$)") -urlcomplaints = [" contemporary","n interesting"," fascinating","n overused"," vastly overused"] +urlcomplaints = ["a contemporary","an interesting","a fascinating","an overused","a vastly overused"] -def urlq(bot, cmd, nick, conn, public): +### Deal with /msg bot url or ~url in channel +def urlq(bot, cmd, nick, conn, public,urldb): if (not urlre.search(cmd)): bot.automsg(False,nick,"Please use 'url' only with http URLs") return @@ -319,45 +338,42 @@ def urlq(bot, cmd, nick, conn, public): url=canonical_url(url) if (url in urldb): - users = urldb[url] - complaint="The url %s was mentioned %s by %s"%(url,nicetime(time.time()-users[-1][1]),users[-1][0]) + T = urldb[url] + complaint="That's %s URL that was first mentioned %s by %s" % \ + (T.urltype(),T.firstmen(),T.nick) if (public): complaint=complaint+". Furthermore it defeats the point of this command to use it other than via /msg." bot.automsg(False,nick,complaint) + T.lastasked=time.time() else: if (public): bot.automsg(False,nick,"That URL was unique. There is little point in using !url out loud; please use it via /msg") else: conn.privmsg(bot.channel,"%s would like to draw your attention to %s"%(nick,url)) - urldb[url]=[[nick,time.time()]] + urldb[url]=UrlLog(url,nick) -def dourl(bot,conn,nick,command): +### Deal with URLs spotted in channel +def dourl(bot,conn,nick,command,urldb): urlstring=urlre.search(command).group(1) urlstring=canonical_url(urlstring) if urlstring in urldb: T=urldb[urlstring] - uci = len(T) - if uci >= len(urlcomplaints): - uci = len(urlcomplaints) - message="observes a"+urlcomplaints[uci-1]+" URL: mentioned by " - if (len(T)>5): - cutoff=len(T)-5 - else: - cutoff=-1 - for t in range(len(T)-1,cutoff,-1): - tempus = time.time()-T[t][1] - message += T[t][0]+" ("+nicetime(tempus)+")" - if (t!=cutoff+1): - message += ", " - if (cutoff != -1): - message += ", amongst others" + message="observes %s URL, first mentioned %s by %s" % \ + (T.urltype(),T.firstmen(),T.nick) conn.action(bot.channel, message) - urldb[urlstring]=[[nick,time.time()]]+urldb[urlstring] + T.lastseen=time.time() else: - urldb[urlstring]=[[nick,time.time()]] + urldb[urlstring]=UrlLog(urlstring,nick) + +### Expire old urls +def urlexpire(urldb,expire): + urls=urldb.keys() + for u in urls: + if time.time() - urldb[u].recenttime() > expire: + del urldb[u] -# canonicalise BBC URLs +# canonicalise BBC URLs (internal use only) def canonical_url(urlstring): if (urlstring.find("news.bbc.co.uk") != -1): for middle in ("/low/","/mobile/"):