X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~matthewv/git?p=irc.git;a=blobdiff_plain;f=commands.py;h=5a7cc94cdac1165910175f136cb1959afb9bd09b;hp=6a1dd73918aa73f05e2d1ae959b76ad910e1ef7d;hb=refs%2Fheads%2Fmaster;hpb=f51b635da737e39a03b9195c0ec9e58357322516 diff --git a/commands.py b/commands.py old mode 100644 new mode 100755 index 6a1dd73..eec69e7 --- a/commands.py +++ b/commands.py @@ -1,6 +1,13 @@ # Part of Acrobat. -import string, cPickle, random, urllib, sys, time, re, os +import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime, urlparse, hashlib +from collections import defaultdict from irclib import irc_lower, nm_to_n +import json + +try: + from blame_filter import bfd +except ImportError: + bfd = None # query karma def karmaq(bot, cmd, nick, conn, public, karma): @@ -36,7 +43,7 @@ def karmadelq(bot, cmd, nick, conn, public, karma): # help - provides the URL of the help file def helpq(bot, cmd, nick, conn, public): bot.automsg(public,nick, - "For help see http://www.pick.ucam.org/~matthew/irc/servus.html") + "For help see http://www.chiark.greenend.org.uk/~matthewv/irc/servus.html") # query bot status @@ -47,6 +54,16 @@ def infoq(bot, cmd, nick, conn, public, karma): (bot.revision.split()[1], bot.channel, conn.get_nickname(), bot.owner, len(karma.keys()))) +class FishPond: + def __init__(fishpond): + fishpond.last=[] + fishpond.DoS=0 + fishpond.quotatime=0 + + def note_last(fishpond, msg, cfg): + fishpond.last.insert(0,(msg,cfg)) + fishpond.last = fishpond.last[0:10] + # Check on fish stocks def fish_quota(pond): if pond.DoS: @@ -61,6 +78,9 @@ def fish_quota(pond): pond.cur_fish=pond.max_fish pond.quotatime=time.time() +# List of things the bot might be called to work round the self-trouting code +synonyms=["itself","the bot","themself"] + # trout someone, or flirt with them def troutq(bot, cmd, nick, conn, public, cfg): fishlist=cfg[0] @@ -84,8 +104,9 @@ def troutq(bot, cmd, nick, conn, public, cfg): return me = bot.connection.get_nickname() trout_msg = random.choice(fishlist) + fishpond.note_last(trout_msg,cfg) # The bot won't trout or flirt with itself; - if irc_lower(me) == irc_lower(target): + if irc_lower(me) == irc_lower(target) or irc_lower(target) in synonyms: target = nick # There's a chance the game may be given away if the request was not # public... @@ -124,13 +145,14 @@ def slashq(bot, cmd, nick, conn, public, cfg): return me = bot.connection.get_nickname() slash_msg = random.choice(fishlist) + fishpond.note_last(slash_msg,cfg) # The bot won't slash people with themselves if irc_lower(who[0]) == irc_lower(who[1]): conn.notice(nick, "oooooh no missus!") return # The bot won't slash with itself, instead slashing the requester for n in [0,1]: - if irc_lower(me) == irc_lower(who[n]): + if irc_lower(me) == irc_lower(who[n]) or irc_lower(who[n]) in synonyms: who[n] = nick # Perhaps someone asked to slash themselves with the bot then we get if irc_lower(who[0]) == irc_lower(who[1]): @@ -154,9 +176,9 @@ def unitq(bot, cmd, nick, conn, public): conn.notice(nick, "syntax: units arg1 as arg2") return if args[1]=='?': - sin,sout=os.popen2(["units","--verbose",args[0]],"r") + sin,sout=os.popen4(["units","--verbose","--",args[0]],"r") else: - sin,sout=os.popen2(["units","--verbose",args[0],args[1]],"r") + sin,sout=os.popen4(["units","--verbose","--",args[0],args[1]],"r") sin.close() res=sout.readlines() #popen2 doesn't clean up the child properly. Do this by hand @@ -187,24 +209,6 @@ def reloadq(bot, cmd, nick, conn, public): bot.automsg(public,nick, "Configuration can only be reloaded by my owner, by /msg.") -# lose the game and/or install a new trigger word -def gameq(bot, cmd, nick, conn, public, game): - #only install a new trigger if it's not too short. - if len(' '.join(cmd.split()[1:]))>2: - game.trigger=' '.join(cmd.split()[1:]) - if (time.time()> game.grace): - if not public: - if irc_lower(nick) == irc_lower(bot.owner): - conn.action(bot.channel,"loses the game!") - else: - conn.privmsg(bot.channel,nick+" just lost the game!") - else: - if not public: - conn.notice(nick, "It's a grace period!") - game.grace=time.time()+60*20 #20 minutes' grace - game.losetime=time.time()+random.randrange(game.minlose,game.maxlose) - conn.notice(bot.owner, str(game.losetime-time.time())+" "+game.trigger) - # quit irc def quitq(bot, cmd, nick, conn, public): if irc_lower(nick) == irc_lower(bot.owner): @@ -232,6 +236,9 @@ def googleq(bot, cmd, nick, conn, public): # Look up the definition of something using google def defineq(bot, cmd, nick, conn, public): + #this doesn't work any more + bot.automsg(public,nick,"'define' is broken because google are bastards :(") + return cmdrest = string.join(cmd.split()[1:]) targ = ("http://www.google.co.uk/search?q=define%%3A%s&ie=utf-8&oe=utf-8" % urllib.quote_plus(cmdrest)) @@ -257,6 +264,114 @@ def defineq(bot, cmd, nick, conn, public): except IOError: # if the connection times out. This blocks. :( bot.automsg(public,nick,"The web's broken. Waah!") +# Look up a currency conversion via xe.com +def currencyq(bot, cmd, nick, conn, public): + args = ' '.join(cmd.split()[1:]).split(' as ') + if len(args) != 2 or len(args[0]) != 3 or len(args[1]) != 3: + conn.notice(nick, "syntax: currency arg1 as arg2") + return + targ = ("http://www.xe.com/ucc/convert.cgi?From=%s&To=%s" % (args[0], args[1])) + try: + currencypage = urllib.urlopen(targ).read() + match = re.search(r"(1 %s = [\d\.]+ %s)" % (args[0].upper(),args[1].upper()),currencypage,re.MULTILINE) + if match == None: + bot.automsg(public,nick,"Dear Chief Secretary, there is no money.") + else: + conversion = match.group(1); + conversion = conversion.replace(' ',' '); + bot.automsg(public,nick,conversion + " (from xe.com)") + except IOError: # if the connection times out. This blocks. :( + bot.automsg(public,nick,"The web's broken. Waah!") + + +### extract the commit message and timestamp for commit +def __getcommitinfo(commit): + cmd=["git","log","-n","1","--pretty=format:%ct|%s",commit] + x=subprocess.Popen(cmd, + stdout=subprocess.PIPE,stderr=subprocess.PIPE) + out,err=x.communicate() + + if len(err): + return(err) + + ts,mes=out.split('|') + mes=mes.strip() + md5mes=hashlib.md5(mes).hexdigest() + if bfd and md5mes in bfd: + mes=bfd[md5mes] + when=datetime.date.fromtimestamp(float(ts)) + return mes, when + +###Return an array of commit messages and timestamps for lines in db that match what +def __getcommits(db,keys,what): + ans=[] + for k in keys: + if what in k: + ret=__getcommitinfo(db[k]) + if len(ret)==1: #error message + return ["Error message from git blame: %s" % ret] + else: + ans.append( (k,ret[0],ret[1]) ) + return ans + +###search all three databases for what +def __getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,what): + if what.strip()=="": + return [] + tans=__getcommits(tdb,tdbk,what) + fans=__getcommits(fdb,fdbk,what) + sans=__getcommits(sdb,sdbk,what) + return tans+fans+sans + +def blameq(bot,cmd,nick,conn,public,fishpond,cfgs): + tdb,tdbk,x = cfgs[0][7] # urgh, magic, to support magic knowledge below + fdb,fdbk,x = cfgs[1][7] + sdb,sdbk,x = cfgs[2][7] + clist=cmd.split() + if len(clist) < 2: + bot.automsg(public,nick,"Who or what do you want to blame?") + return + cwhat=' '.join(clist[2:]) + kindsfile = "fish?" + if clist[1]=="#last": + try: + n = abs(int(clist[2]))-1 + if n < 0: raise ValueError + except IndexError: n = 0 + except ValueError: + bot.automsg(public,nick,"Huh?") + return + try: lmsg, lcfg = fishpond.last[n] + except IndexError: + bot.automsg(public,nick,"Nothing") + return + xdb,xdbk,kindsfile = lcfg[7] + ans=__getcommits(xdb,xdbk,lmsg) + elif clist[1]=="#trouts" or clist[1]=="#trout": + ans=__getcommits(tdb,tdbk,cwhat) + elif clist[1]=="#flirts" or clist[1]=="#flirt": + ans=__getcommits(fdb,fdbk,cwhat) + elif clist[1]=="#slashes" or clist[1]=="#slash": + ans=__getcommits(sdb,sdbk,cwhat) + else: + cwhat=' '.join(clist[1:]) + ans=__getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,cwhat) + if len(ans)==0: + bot.automsg(public,nick,"No match found") + elif len(ans)==1: + if len(ans[0])==1: + bot.automsg(public,nick,ans[0]) + else: + bot.automsg(public,nick,"Modified %s %s: %s" % (kindsfile, ans[0][2].isoformat(),ans[0][1])) + elif len(ans)>4: + bot.automsg(public,nick,"I found %d matches, which is too many. Please be more specific!" % (len(ans)) ) + else: + for a in ans: + if len(a)==1: + bot.automsg(public,nick,a) + else: + bot.automsg(public,nick,"%s '%s' modified on %s: %s" % (kindsfile, a[0],a[2].isoformat(),a[1])) + ### say to msg/channel def sayq(bot, cmd, nick, conn, public): if irc_lower(nick) == irc_lower(bot.owner): @@ -314,46 +429,64 @@ class UrlLog: self.nick=nick self.url=url self.first=time.time() + self.localfirst=time.localtime(self.first) self.count=1 self.lastseen=time.time() self.lastasked=time.time() def recenttime(self): return max(self.lastseen,self.lastasked) def firstmen(self): - return nicetime(time.time()-self.first) + n=time.localtime(time.time()) + s="%02d:%02d" % (self.localfirst.tm_hour,self.localfirst.tm_min) + if n.tm_yday != self.localfirst.tm_yday: + s+=time.strftime(" on %d %B", self.localfirst) + return s def urltype(self): - z=min(len(urlcomplaints)-1, self.count-1) - return urlcomplaints[z] - -urlre = re.compile("(https?://[^ ]+)( |$)") -urlcomplaints = ["a contemporary","an interesting","a fascinating","an overused","a vastly overused"] + z=min(len(urlinfos)-1, self.count-1) + return urlinfos[z] + +#(?:) is a regexp that doesn't group +urlre = re.compile(r"((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)") +hturlre= re.compile(r"(http)(s?://[^ ]+)( |$)") +#matches \bre\:?\s+ before a regexp; (?i)==case insensitive match +shibboleth = re.compile(r"(?i)\bre\:?\s+((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)") +#How long (in s) to wait since the most recent mention before commenting +url_repeat_time = 300 +urlinfos = ["a new", + "a fascinating", + "an interesting", + "a popular"] ### Deal with /msg bot url or ~url in channel def urlq(bot, cmd, nick, conn, public,urldb): if (not urlre.search(cmd)): - bot.automsg(False,nick,"Please use 'url' only with http URLs") + bot.automsg(False,nick,"Please use 'url' only with http, https, nsfw, or nsfws URLs") return urlstring=urlre.search(cmd).group(1) url=canonical_url(urlstring) if (url in urldb): T = urldb[url] - complaint="That's %s URL that was first mentioned %s by %s" % \ - (T.urltype(),T.firstmen(),T.nick) + comment="I saw that URL in scrool, first mentioned by %s at %s" % \ + (T.nick,T.firstmen()) if (public): - complaint=complaint+". Furthermore it defeats the point of this command to use it other than via /msg." + comment=comment+". Furthermore it defeats the point of this command to use it other than via /msg." T.count+=1 - bot.automsg(False,nick,complaint) + bot.automsg(False,nick,comment) T.lastasked=time.time() + #URL suppressed, so mention in #urls + if urlstring != cmd.split()[1]: #first argument to URL was not the url + conn.privmsg("#urls","%s remarks: %s" % (nick," ".join(cmd.split()[1:]))) + else: + conn.privmsg("#urls","(via %s) %s"%(nick," ".join(cmd.split()[1:]))) else: if (public): bot.automsg(False,nick,"That URL was unique. There is little point in using !url out loud; please use it via /msg") else: if urlstring != cmd.split()[1]: #first argument to URL was not the url - conn.privmsg(bot.channel,"%s remarks %s" % (nick," ".join(cmd.split()[1:]))) + conn.privmsg(bot.channel,"%s remarks: %s" % (nick," ".join(cmd.split()[1:]))) else: - conn.privmsg(bot.channel,"%s would like to draw your attention to %s"%(nick," ".join(cmd.split()[1:]))) - bot.automsg(False,nick,"That URL was unique; I have posted it into IRC") + conn.privmsg(bot.channel,"(via %s) %s"%(nick," ".join(cmd.split()[1:]))) urldb[url]=UrlLog(url,nick) ### Deal with URLs spotted in channel @@ -363,9 +496,11 @@ def dourl(bot,conn,nick,command,urldb): if urlstring in urldb: T=urldb[urlstring] - message="observes %s URL, first mentioned %s by %s" % \ - (T.urltype(),T.firstmen(),T.nick) - conn.action(bot.channel, message) + message="saw that URL in scrool, first mentioned by %s at %s" % \ + (T.nick,T.firstmen()) + if shibboleth.search(command)==None and \ + time.time() - T.lastseen > url_repeat_time: + conn.action(bot.channel, message) T.lastseen=time.time() T.count+=1 else: @@ -380,6 +515,8 @@ def urlexpire(urldb,expire): # canonicalise BBC URLs (internal use only) def canonical_url(urlstring): + if "nsfw://" in urlstring or "nsfws://" in urlstring: + urlstring=urlstring.replace("nsfw","http",1) if (urlstring.find("news.bbc.co.uk") != -1): for middle in ("/low/","/mobile/"): x = urlstring.find(middle) @@ -387,3 +524,157 @@ def canonical_url(urlstring): urlstring.replace(middle,"/hi/") return urlstring +# automatically make nsfw urls for you and pass them on to url +def nsfwq(bot,cmd,nick,conn,public,urldb): + if (not hturlre.search(cmd)): + bot.automsg(False,nick,"Please use 'nsfw' only with http or https URLs") + return + newcmd=hturlre.sub(nsfwify,cmd) + urlq(bot,newcmd,nick,conn,public,urldb) + +def nsfwify(match): + a,b,c=match.groups() + return 'nsfw'+b+c + +#get tweet text +def twitterq(bot,cmd,nick,conn,public,twitapi): + + if (not urlre.search(cmd)): + bot.automsg(False,nick,"Please use 'twit' only with http or https URLs") + return + + urlstring = urlre.search(cmd).group(1) + if (urlstring.find("twitter.com") !=-1): + stringsout = getTweet(urlstring,twitapi) + for stringout in stringsout: + bot.automsg(public, nick, stringout) + +def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0): + unobfuscate_urls=True + expand_included_tweets=True + stringsout=[] + + path = urlparse.urlparse(urlstring).path + tweetID = path.split('/')[-1] + try: + status = twitapi.GetStatus(tweetID) + if status == {}: + return "twitapi.GetStatus returned nothing :-(" + if status.user == None and status.text == None: + return "Empty status object returned :(" + if status.retweeted_status and status.retweeted_status.text: + status = status.retweeted_status + if status.user is not None: + tweeter_screen = status.user.screen_name #.encode('UTF-8', 'replace') + tweeter_name = status.user.name #.encode('UTF-8', 'replace') + else: + tweeter_screen = "[not returned]" ; tweeter_name = "[not returned]" + tweeter_name = tweeter_name + " RTing " + status.user.name #.encode('UTF-8', 'replace') + tweetText = status.full_text + if status.media: + replacements = defaultdict(list) + + for medium in status.media: + replacements[medium.url].append(medium.media_url_https) + + # The twitter-api 'conveniently' parses this for you and + # throws away the actual video URLs, so we have to take the + # JSON and reparse it :sadpanda: + # This is particularly annoying because we don't know + # for sure that status.media and the JSON 'media' entry + # have the same elements in the same order. Probably they + # do but maybe twitter-api randomly reorganised things or + # filtered the list or something. So instead we go through + # the JSON and handle the media urls, discarding whatever + # unfortunate thing we have put in replacements already. + parsed_tweet = json.loads(status.AsJsonString()) + for medium in parsed_tweet.get('media', []): + if medium['type'] == 'video': + best = { 'bitrate': -1 } + for vt in medium['video_info']['variants']: + if (vt.get('content_type') == 'video/mp4' and + vt.get('bitrate', -1) > best['bitrate']): + best = vt + if 'url' in best: + video_url = best['url'].split('?',1)[0] + duration = medium['video_info']['duration_millis'] + # ^ duration_millis is a string + duration = "%.1f" % (float(duration)/1000.) + video_desc = "%s (%ss)" % (video_url, duration) + replacements[medium['url']] = [video_desc] + + for k,v in replacements.items(): + if len(v) > 1: + replacementstring = "[" + " ; ".join(v) +"]" + else: + replacementstring = v[0] + tweetText = tweetText.replace(k, replacementstring) + + for url in status.urls: + toReplace = url.expanded_url + + if unobfuscate_urls: + import urllib + rv = urlparse.urlparse(toReplace) + if rv.hostname in { + # sourced from http://bit.do/list-of-url-shorteners.php + "bit.do", "t.co", "lnkd.in", "db.tt", "qr.ae", "adf.ly", + "goo.gl", "bitly.com", "cur.lv", "tinyurl.com", "ow.ly", + "bit.ly", "adcrun.ch", "ity.im", "q.gs", "viralurl.com", + "is.gd", "po.st", "vur.me", "bc.vc", "twitthis.com", "u.to", + "j.mp", "buzurl.com", "cutt.us", "u.bb", "yourls.org", + "crisco.com", "x.co", "prettylinkpro.com", "viralurl.biz", + "adcraft.co", "virl.ws", "scrnch.me", "filoops.info", "vurl.bz", + "vzturl.com", "lemde.fr", "qr.net", "1url.com", "tweez.me", + "7vd.cn", "v.gd", "dft.ba", "aka.gr", "tr.im", + # added by ASB: + "trib.al", "dlvr.it" + }: + #expand list as needed. + response = urllib.urlopen('http://urlex.org/txt/' + toReplace) + resptext = response.read() + if resptext.startswith('http'): # ie it looks urlish (http or https) + if resptext != toReplace: + toReplace = resptext + # maybe make a note of the domain of the original URL to compile list of shortenable domains? + + # remove tracking utm_ query parameters, for privacy and brevity + # code snippet from https://gist.github.com/lepture/5997883 + rv = urlparse.urlparse(toReplace) + if rv.query: + query = re.sub(r'utm_\w+=[^&]+&?', '', rv.query) + if query: + toReplace = '%s://%s%s?%s' % (rv.scheme, rv.hostname, rv.path, query) + else: + toReplace = '%s://%s%s' % (rv.scheme, rv.hostname, rv.path) # leave off the final '?' + + if expand_included_tweets: + if rv.hostname == 'twitter.com' and re.search(r'status/\d+',rv.path): + if recurlvl > 2: + stringsout = [ "{{ Recursion level too high }}" ] + stringsout + else: + quotedtweet = getTweet(toReplace, twitapi, inclusion=True, recurlvl=recurlvl+1) # inclusion parameter limits recursion. + if not quotedtweet: + quotedtweet = [""] + quotedtweet[0] = "Q{ " + quotedtweet[0] + quotedtweet[-1] += " }" + stringsout = quotedtweet + stringsout + + tweetText = tweetText.replace(url.url, toReplace) + + tweetText = tweetText.replace(">",">") + tweetText = tweetText.replace("<","<") + tweetText = tweetText.replace("&","&") + tweetText = tweetText.replace("\n"," ") + stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText) + except twitter.TwitterError: + terror = sys.exc_info() + stringout = "Twitter error: %s" % terror[1].__str__() + except Exception: + terror = sys.exc_info() + stringout = "Error: %s" % terror[1].__str__() + stringsout = [stringout] + stringsout + if inclusion: + return stringsout # don't want to double-encode it, so just pass it on for now and encode later + + return map(lambda x: x.encode('UTF-8', 'replace'), stringsout)