chiark / gitweb /
new trout from rejs ( https://twitter.com/kingsbookstore/status/1620432807196778496 )
[irc.git] / commands.py
old mode 100644 (file)
new mode 100755 (executable)
index 0f958e2..eec69e7
@@ -1,6 +1,13 @@
 # Part of Acrobat.
-import string, cPickle, random, urllib, sys, time, re, os, twitter
+import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime, urlparse, hashlib
+from collections import defaultdict
 from irclib import irc_lower, nm_to_n
+import json
+
+try:
+    from blame_filter import bfd
+except ImportError:
+    bfd = None
 
 # query karma
 def karmaq(bot, cmd, nick, conn, public, karma):
@@ -47,6 +54,16 @@ def infoq(bot, cmd, nick, conn, public, karma):
        (bot.revision.split()[1], bot.channel, conn.get_nickname(),
         bot.owner, len(karma.keys())))
 
+class FishPond:
+    def __init__(fishpond):
+       fishpond.last=[]
+       fishpond.DoS=0
+       fishpond.quotatime=0
+
+    def note_last(fishpond, msg, cfg):
+       fishpond.last.insert(0,(msg,cfg))
+       fishpond.last = fishpond.last[0:10]
+
 # Check on fish stocks
 def fish_quota(pond):
     if pond.DoS:
@@ -61,6 +78,9 @@ def fish_quota(pond):
             pond.cur_fish=pond.max_fish
         pond.quotatime=time.time()
 
+# List of things the bot might be called to work round the self-trouting code
+synonyms=["itself","the bot","themself"]
+
 # trout someone, or flirt with them
 def troutq(bot, cmd, nick, conn, public, cfg):
     fishlist=cfg[0]
@@ -84,8 +104,9 @@ def troutq(bot, cmd, nick, conn, public, cfg):
        return
     me = bot.connection.get_nickname()
     trout_msg = random.choice(fishlist)
+    fishpond.note_last(trout_msg,cfg)
     # The bot won't trout or flirt with itself;
-    if irc_lower(me) == irc_lower(target):
+    if irc_lower(me) == irc_lower(target) or irc_lower(target) in synonyms:
         target = nick
     # There's a chance the game may be given away if the request was not
     # public...
@@ -124,13 +145,14 @@ def slashq(bot, cmd, nick, conn, public, cfg):
        return
     me = bot.connection.get_nickname()
     slash_msg = random.choice(fishlist)
+    fishpond.note_last(slash_msg,cfg)
     # The bot won't slash people with themselves
     if irc_lower(who[0]) == irc_lower(who[1]):
        conn.notice(nick, "oooooh no missus!")
        return
     # The bot won't slash with itself, instead slashing the requester
     for n in [0,1]:
-       if irc_lower(me) == irc_lower(who[n]):
+       if irc_lower(me) == irc_lower(who[n]) or irc_lower(who[n]) in synonyms:
            who[n] = nick
     # Perhaps someone asked to slash themselves with the bot then we get
     if irc_lower(who[0]) == irc_lower(who[1]):
@@ -154,9 +176,9 @@ def unitq(bot, cmd, nick, conn, public):
             conn.notice(nick, "syntax: units arg1 as arg2")
             return
     if args[1]=='?':
-        sin,sout=os.popen2(["units","--verbose",args[0]],"r")
+        sin,sout=os.popen4(["units","--verbose","--",args[0]],"r")
     else:
-        sin,sout=os.popen2(["units","--verbose",args[0],args[1]],"r")
+        sin,sout=os.popen4(["units","--verbose","--",args[0],args[1]],"r")
     sin.close()
     res=sout.readlines()
     #popen2 doesn't clean up the child properly. Do this by hand
@@ -187,24 +209,6 @@ def reloadq(bot, cmd, nick, conn, public):
         bot.automsg(public,nick,
                "Configuration can only be reloaded by my owner, by /msg.")
 
-# lose the game and/or install a new trigger word
-def gameq(bot, cmd, nick, conn, public, game):
-    #only install a new trigger if it's not too short.
-    if len(' '.join(cmd.split()[1:]))>2:
-        game.trigger=' '.join(cmd.split()[1:])
-    if (time.time()> game.grace):
-        if not public:
-            if irc_lower(nick) == irc_lower(bot.owner):
-                conn.action(bot.channel,"loses the game!")
-            else:
-                conn.privmsg(bot.channel,nick+" just lost the game!")
-    else:
-        if not public:
-            conn.notice(nick, "It's a grace period!")
-    game.grace=time.time()+60*20 #20 minutes' grace
-    game.losetime=time.time()+random.randrange(game.minlose,game.maxlose)
-    conn.notice(bot.owner, str(game.losetime-time.time())+" "+game.trigger)
-
 # quit irc
 def quitq(bot, cmd, nick, conn, public):
     if irc_lower(nick) == irc_lower(bot.owner):
@@ -232,6 +236,9 @@ def googleq(bot, cmd, nick, conn, public):
 
 # Look up the definition of something using google
 def defineq(bot, cmd, nick, conn, public):
+    #this doesn't work any more
+    bot.automsg(public,nick,"'define' is broken because google are bastards :(")
+    return
     cmdrest = string.join(cmd.split()[1:])
     targ = ("http://www.google.co.uk/search?q=define%%3A%s&ie=utf-8&oe=utf-8"
             % urllib.quote_plus(cmdrest))
@@ -257,6 +264,114 @@ def defineq(bot, cmd, nick, conn, public):
     except IOError: # if the connection times out. This blocks. :(
          bot.automsg(public,nick,"The web's broken. Waah!")
 
+# Look up a currency conversion via xe.com
+def currencyq(bot, cmd, nick, conn, public):
+    args = ' '.join(cmd.split()[1:]).split(' as ')
+    if len(args) != 2 or len(args[0]) != 3 or len(args[1]) != 3:
+        conn.notice(nick, "syntax: currency arg1 as arg2")
+        return
+    targ = ("http://www.xe.com/ucc/convert.cgi?From=%s&To=%s" % (args[0], args[1]))
+    try:
+        currencypage = urllib.urlopen(targ).read()
+        match = re.search(r"(1 %s = [\d\.]+ %s)" % (args[0].upper(),args[1].upper()),currencypage,re.MULTILINE)
+        if match == None:
+            bot.automsg(public,nick,"Dear Chief Secretary, there is no money.")
+        else:
+            conversion = match.group(1);
+            conversion = conversion.replace(' ',' ');
+            bot.automsg(public,nick,conversion + " (from xe.com)")
+    except IOError: # if the connection times out. This blocks. :(
+        bot.automsg(public,nick,"The web's broken. Waah!")
+                 
+
+### extract the commit message and timestamp for commit 
+def __getcommitinfo(commit):
+    cmd=["git","log","-n","1","--pretty=format:%ct|%s",commit]
+    x=subprocess.Popen(cmd,
+                       stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+    out,err=x.communicate()
+
+    if len(err):
+       return(err)
+
+    ts,mes=out.split('|')
+    mes=mes.strip()
+    md5mes=hashlib.md5(mes).hexdigest()
+    if bfd and md5mes in bfd:
+        mes=bfd[md5mes]
+    when=datetime.date.fromtimestamp(float(ts))
+    return mes, when
+
+###Return an array of commit messages and timestamps for lines in db that match what
+def __getcommits(db,keys,what):
+    ans=[]
+    for k in keys:
+        if what in k:
+           ret=__getcommitinfo(db[k])
+           if len(ret)==1: #error message
+               return ["Error message from git blame: %s" % ret]
+           else:
+               ans.append( (k,ret[0],ret[1]) )
+    return ans
+
+###search all three databases for what
+def __getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,what):
+    if what.strip()=="":
+        return []
+    tans=__getcommits(tdb,tdbk,what)
+    fans=__getcommits(fdb,fdbk,what)
+    sans=__getcommits(sdb,sdbk,what)
+    return tans+fans+sans
+
+def blameq(bot,cmd,nick,conn,public,fishpond,cfgs):
+    tdb,tdbk,x = cfgs[0][7] # urgh, magic, to support magic knowledge below
+    fdb,fdbk,x = cfgs[1][7]
+    sdb,sdbk,x = cfgs[2][7]
+    clist=cmd.split()
+    if len(clist) < 2:
+       bot.automsg(public,nick,"Who or what do you want to blame?")
+       return
+    cwhat=' '.join(clist[2:])
+    kindsfile = "fish?"
+    if clist[1]=="#last":
+        try:
+            n = abs(int(clist[2]))-1
+            if n < 0: raise ValueError
+        except IndexError: n = 0
+        except ValueError:
+            bot.automsg(public,nick,"Huh?")
+            return
+        try: lmsg, lcfg = fishpond.last[n]
+        except IndexError:
+           bot.automsg(public,nick,"Nothing")
+           return
+       xdb,xdbk,kindsfile = lcfg[7]
+       ans=__getcommits(xdb,xdbk,lmsg)
+    elif clist[1]=="#trouts" or clist[1]=="#trout":
+       ans=__getcommits(tdb,tdbk,cwhat)
+    elif clist[1]=="#flirts" or clist[1]=="#flirt":
+       ans=__getcommits(fdb,fdbk,cwhat)
+    elif clist[1]=="#slashes" or clist[1]=="#slash":
+       ans=__getcommits(sdb,sdbk,cwhat)
+    else:
+       cwhat=' '.join(clist[1:])
+       ans=__getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,cwhat)
+    if len(ans)==0:
+       bot.automsg(public,nick,"No match found")
+    elif len(ans)==1:
+       if len(ans[0])==1:
+           bot.automsg(public,nick,ans[0])
+       else:
+           bot.automsg(public,nick,"Modified %s %s: %s" % (kindsfile, ans[0][2].isoformat(),ans[0][1]))
+    elif len(ans)>4:
+       bot.automsg(public,nick,"I found %d matches, which is too many. Please be more specific!" % (len(ans)) )
+    else:
+       for a in ans:
+           if len(a)==1:
+               bot.automsg(public,nick,a)
+           else:
+               bot.automsg(public,nick,"%s '%s' modified on %s: %s" % (kindsfile, a[0],a[2].isoformat(),a[1]))
+
 ### say to msg/channel            
 def sayq(bot, cmd, nick, conn, public):
     if irc_lower(nick) == irc_lower(bot.owner):
@@ -314,37 +429,56 @@ class UrlLog:
         self.nick=nick
         self.url=url
         self.first=time.time()
+        self.localfirst=time.localtime(self.first)
         self.count=1
         self.lastseen=time.time()
         self.lastasked=time.time()
     def recenttime(self):
         return max(self.lastseen,self.lastasked)
     def firstmen(self):
-        return nicetime(time.time()-self.first)
+        n=time.localtime(time.time())
+        s="%02d:%02d" % (self.localfirst.tm_hour,self.localfirst.tm_min)
+        if n.tm_yday != self.localfirst.tm_yday:
+            s+=time.strftime(" on %d %B", self.localfirst)
+        return s
     def urltype(self):
-        z=min(len(urlcomplaints)-1, self.count-1)
-        return urlcomplaints[z]
-        
-urlre = re.compile("(https?://[^ ]+)( |$)")
-urlcomplaints = ["a contemporary","an interesting","a fascinating","an overused","a vastly overused"]
+        z=min(len(urlinfos)-1, self.count-1)
+        return urlinfos[z]
+
+#(?:) is a regexp that doesn't group        
+urlre = re.compile(r"((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
+hturlre= re.compile(r"(http)(s?://[^ ]+)( |$)")
+#matches \bre\:?\s+ before a regexp; (?i)==case insensitive match
+shibboleth = re.compile(r"(?i)\bre\:?\s+((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
+#How long (in s) to wait since the most recent mention before commenting
+url_repeat_time = 300
+urlinfos = ["a new",
+            "a fascinating",
+            "an interesting",
+            "a popular"]
 
 ### Deal with /msg bot url or ~url in channel
 def urlq(bot, cmd, nick, conn, public,urldb):
   if (not urlre.search(cmd)):
-    bot.automsg(False,nick,"Please use 'url' only with http URLs")
+    bot.automsg(False,nick,"Please use 'url' only with http, https, nsfw, or nsfws URLs")
     return
 
   urlstring=urlre.search(cmd).group(1)
   url=canonical_url(urlstring)
   if (url in urldb):
     T = urldb[url]
-    complaint="That's %s URL that was first mentioned %s by %s" % \
-               (T.urltype(),T.firstmen(),T.nick)
+    comment="I saw that URL in scrool, first mentioned by %s at %s" % \
+               (T.nick,T.firstmen())
     if (public):
-      complaint=complaint+". Furthermore it defeats the point of this command to use it other than via /msg."
+      comment=comment+". Furthermore it defeats the point of this command to use it other than via /msg."
       T.count+=1
-    bot.automsg(False,nick,complaint)
+    bot.automsg(False,nick,comment)
     T.lastasked=time.time()
+    #URL suppressed, so mention in #urls
+    if urlstring != cmd.split()[1]: #first argument to URL was not the url
+      conn.privmsg("#urls","%s remarks: %s" % (nick," ".join(cmd.split()[1:])))
+    else:
+      conn.privmsg("#urls","(via %s) %s"%(nick," ".join(cmd.split()[1:])))
   else:
     if (public):
       bot.automsg(False,nick,"That URL was unique. There is little point in using !url out loud; please use it via /msg")
@@ -353,7 +487,6 @@ def urlq(bot, cmd, nick, conn, public,urldb):
         conn.privmsg(bot.channel,"%s remarks: %s" % (nick," ".join(cmd.split()[1:])))
       else:
         conn.privmsg(bot.channel,"(via %s) %s"%(nick," ".join(cmd.split()[1:])))
-      bot.automsg(False,nick,"That URL was unique; I have posted it into IRC")
     urldb[url]=UrlLog(url,nick)
 
 ### Deal with URLs spotted in channel
@@ -363,9 +496,11 @@ def dourl(bot,conn,nick,command,urldb):
 
   if urlstring in urldb:
     T=urldb[urlstring]
-    message="observes %s URL, first mentioned %s by %s" % \
-             (T.urltype(),T.firstmen(),T.nick)
-    conn.action(bot.channel, message)
+    message="saw that URL in scrool, first mentioned by %s at %s" % \
+             (T.nick,T.firstmen())
+    if shibboleth.search(command)==None and \
+       time.time() - T.lastseen > url_repeat_time:
+        conn.action(bot.channel, message)
     T.lastseen=time.time()
     T.count+=1
   else:
@@ -380,6 +515,8 @@ def urlexpire(urldb,expire):
 
 # canonicalise BBC URLs (internal use only)
 def canonical_url(urlstring):
+  if "nsfw://" in urlstring or "nsfws://" in urlstring:
+      urlstring=urlstring.replace("nsfw","http",1)
   if (urlstring.find("news.bbc.co.uk") != -1):
     for middle in ("/low/","/mobile/"):
       x = urlstring.find(middle)
@@ -387,28 +524,157 @@ def canonical_url(urlstring):
         urlstring.replace(middle,"/hi/")
   return urlstring
 
+# automatically make nsfw urls for you and pass them on to url
+def nsfwq(bot,cmd,nick,conn,public,urldb):
+  if (not hturlre.search(cmd)):
+    bot.automsg(False,nick,"Please use 'nsfw' only with http or https URLs")
+    return
+  newcmd=hturlre.sub(nsfwify,cmd)
+  urlq(bot,newcmd,nick,conn,public,urldb)
+
+def nsfwify(match):
+    a,b,c=match.groups()
+    return 'nsfw'+b+c
 
 #get tweet text
 def twitterq(bot,cmd,nick,conn,public,twitapi):
   
   if (not urlre.search(cmd)):
-    bot.automsg(False,nick,"Please use 'twit' only with http URLs")
+    bot.automsg(False,nick,"Please use 'twit' only with http or https URLs")
     return
 
   urlstring = urlre.search(cmd).group(1)
   if (urlstring.find("twitter.com") !=-1):
-    stringout = getTweet(urlstring,twitapi)
-    try:
+    stringsout = getTweet(urlstring,twitapi)
+    for stringout in stringsout:
         bot.automsg(public, nick, stringout)
-    except UnicodeEncodeError:
-        bot.automsg(public, nick, "Sorry, that tweet contained non-ASCII characters")
   
-def getTweet(urlstring,twitapi):
-  parts = string.split(urlstring,'/')
-  tweetID = parts[-1]
-  status = twitapi.GetStatus(tweetID)
-  tweeter_screen = status.user.screen_name
-  tweeter_name = status.user.name
-  tweetText = status.text
-  stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText)
-  return stringout
+def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0):
+  unobfuscate_urls=True
+  expand_included_tweets=True
+  stringsout=[]
+
+  path = urlparse.urlparse(urlstring).path
+  tweetID = path.split('/')[-1]
+  try:
+    status = twitapi.GetStatus(tweetID)
+    if status == {}:
+        return "twitapi.GetStatus returned nothing :-("
+    if status.user == None and status.text == None:
+        return "Empty status object returned :("
+    if status.retweeted_status and status.retweeted_status.text:
+        status = status.retweeted_status
+    if status.user is not None:
+        tweeter_screen = status.user.screen_name #.encode('UTF-8', 'replace')
+        tweeter_name = status.user.name #.encode('UTF-8', 'replace')
+    else:
+        tweeter_screen = "[not returned]" ; tweeter_name = "[not returned]"
+        tweeter_name = tweeter_name + " RTing " + status.user.name #.encode('UTF-8', 'replace')
+    tweetText = status.full_text
+    if status.media:
+        replacements = defaultdict(list)
+
+        for medium in status.media:
+            replacements[medium.url].append(medium.media_url_https)
+
+        # The twitter-api 'conveniently' parses this for you and
+        # throws away the actual video URLs, so we have to take the
+        # JSON and reparse it :sadpanda:
+        # This is particularly annoying because we don't know
+        # for sure that status.media and the JSON 'media' entry
+        # have the same elements in the same order.  Probably they
+        # do but maybe twitter-api randomly reorganised things or
+        # filtered the list or something.  So instead we go through
+        # the JSON and handle the media urls, discarding whatever
+        # unfortunate thing we have put in replacements already.
+        parsed_tweet = json.loads(status.AsJsonString())
+        for medium in parsed_tweet.get('media', []):
+            if medium['type'] == 'video':
+                best = { 'bitrate': -1 }
+                for vt in medium['video_info']['variants']:
+                    if (vt.get('content_type') == 'video/mp4' and
+                        vt.get('bitrate', -1) > best['bitrate']):
+                        best = vt
+                if 'url' in best:
+                    video_url = best['url'].split('?',1)[0]
+                    duration = medium['video_info']['duration_millis']
+                    # ^ duration_millis is a string
+                    duration = "%.1f" % (float(duration)/1000.)
+                    video_desc = "%s (%ss)" % (video_url, duration)
+                    replacements[medium['url']] = [video_desc]
+
+        for k,v in replacements.items():
+            if len(v) > 1:
+                replacementstring = "[" +  " ; ".join(v) +"]"
+            else:
+                replacementstring = v[0]
+            tweetText = tweetText.replace(k, replacementstring)
+
+    for url in status.urls:
+        toReplace = url.expanded_url
+
+        if unobfuscate_urls:
+            import urllib
+            rv = urlparse.urlparse(toReplace)
+            if rv.hostname in {
+                # sourced from http://bit.do/list-of-url-shorteners.php
+                "bit.do", "t.co", "lnkd.in", "db.tt", "qr.ae", "adf.ly",
+                "goo.gl", "bitly.com", "cur.lv", "tinyurl.com", "ow.ly",
+                "bit.ly", "adcrun.ch", "ity.im", "q.gs", "viralurl.com",
+                "is.gd", "po.st", "vur.me", "bc.vc", "twitthis.com", "u.to",
+                "j.mp", "buzurl.com", "cutt.us", "u.bb", "yourls.org",
+                "crisco.com", "x.co", "prettylinkpro.com", "viralurl.biz",
+                "adcraft.co", "virl.ws", "scrnch.me", "filoops.info", "vurl.bz",
+                "vzturl.com", "lemde.fr", "qr.net", "1url.com", "tweez.me",
+                "7vd.cn", "v.gd", "dft.ba", "aka.gr", "tr.im",
+                 # added by ASB:
+                 "trib.al", "dlvr.it"
+                               }:
+                #expand list as needed.
+                response = urllib.urlopen('http://urlex.org/txt/' + toReplace)
+                resptext = response.read()
+                if resptext.startswith('http'): # ie it looks urlish (http or https)
+                    if resptext != toReplace:
+                        toReplace = resptext
+                    # maybe make a note of the domain of the original URL to compile list of shortenable domains?
+
+        # remove tracking utm_ query parameters, for privacy and brevity
+        # code snippet from https://gist.github.com/lepture/5997883
+        rv = urlparse.urlparse(toReplace)
+        if rv.query:
+            query = re.sub(r'utm_\w+=[^&]+&?', '', rv.query)
+            if query:
+                toReplace = '%s://%s%s?%s' % (rv.scheme, rv.hostname, rv.path, query)
+            else:
+                toReplace = '%s://%s%s' % (rv.scheme, rv.hostname, rv.path) # leave off the final '?'
+
+        if expand_included_tweets:
+            if rv.hostname == 'twitter.com' and re.search(r'status/\d+',rv.path):
+                if recurlvl > 2:
+                  stringsout = [ "{{ Recursion level too high }}" ] + stringsout
+                else:
+                  quotedtweet = getTweet(toReplace, twitapi, inclusion=True, recurlvl=recurlvl+1) # inclusion parameter limits recursion.
+                  if not quotedtweet:
+                      quotedtweet = [""]
+                  quotedtweet[0] = "Q{ " + quotedtweet[0]
+                  quotedtweet[-1] += " }"
+                  stringsout = quotedtweet + stringsout
+
+        tweetText = tweetText.replace(url.url, toReplace)
+
+    tweetText = tweetText.replace("&gt;",">")
+    tweetText = tweetText.replace("&lt;","<")
+    tweetText = tweetText.replace("&amp;","&")
+    tweetText = tweetText.replace("\n"," ")
+    stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText)
+  except twitter.TwitterError:
+    terror = sys.exc_info()
+    stringout = "Twitter error: %s" % terror[1].__str__()
+  except Exception:
+    terror = sys.exc_info()
+    stringout = "Error: %s" % terror[1].__str__()
+  stringsout = [stringout] + stringsout
+  if inclusion:
+      return stringsout # don't want to double-encode it, so just pass it on for now and encode later
+
+  return map(lambda x: x.encode('UTF-8', 'replace'), stringsout)