X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~matthewv/git?a=blobdiff_plain;f=commands.py;h=488711a254f67b03ff84f2410ce3d1574d8290ba;hb=81cc28847cc46cc2971c35ec5c54b2022e5929ca;hp=bbcaeb898dd83def7686c40f29a6fb5c566689d1;hpb=80d9c9c42e877de154e6a0c7ce4c725a1448c51f;p=irc.git

diff --git a/commands.py b/commands.py
old mode 100644
new mode 100755
index bbcaeb8..488711a
--- a/commands.py
+++ b/commands.py
@@ -1,5 +1,6 @@
 # Part of Acrobat.
-import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime
+import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime, urlparse
+from collections import defaultdict
 from irclib import irc_lower, nm_to_n
 
 # query karma
@@ -61,6 +62,9 @@ def fish_quota(pond):
             pond.cur_fish=pond.max_fish
         pond.quotatime=time.time()
 
+# List of things the bot might be called to work round the self-trouting code
+synonyms=["itself","the bot","themself"]
+
 # trout someone, or flirt with them
 def troutq(bot, cmd, nick, conn, public, cfg):
     fishlist=cfg[0]
@@ -86,7 +90,7 @@ def troutq(bot, cmd, nick, conn, public, cfg):
     trout_msg = random.choice(fishlist)
     fishpond.last=trout_msg
     # The bot won't trout or flirt with itself;
-    if irc_lower(me) == irc_lower(target):
+    if irc_lower(me) == irc_lower(target) or irc_lower(target) in synonyms:
         target = nick
     # There's a chance the game may be given away if the request was not
     # public...
@@ -125,13 +129,14 @@ def slashq(bot, cmd, nick, conn, public, cfg):
 	return
     me = bot.connection.get_nickname()
     slash_msg = random.choice(fishlist)
+    fishpond.last=slash_msg
     # The bot won't slash people with themselves
     if irc_lower(who[0]) == irc_lower(who[1]):
 	conn.notice(nick, "oooooh no missus!")
 	return
     # The bot won't slash with itself, instead slashing the requester
     for n in [0,1]:
-	if irc_lower(me) == irc_lower(who[n]):
+	if irc_lower(me) == irc_lower(who[n]) or irc_lower(who[n]) in synonyms:
 	    who[n] = nick
     # Perhaps someone asked to slash themselves with the bot then we get
     if irc_lower(who[0]) == irc_lower(who[1]):
@@ -252,7 +257,7 @@ def currencyq(bot, cmd, nick, conn, public):
     targ = ("http://www.xe.com/ucc/convert.cgi?From=%s&To=%s" % (args[0], args[1]))
     try:
         currencypage = urllib.urlopen(targ).read()
-        match = re.search(r"(1&nbsp;%s&nbsp;=&nbsp;[\d\.]+&nbsp;%s)" % (args[0],args[1]),currencypage,re.MULTILINE)
+        match = re.search(r"(1&nbsp;%s&nbsp;=&nbsp;[\d\.]+&nbsp;%s)" % (args[0].upper(),args[1].upper()),currencypage,re.MULTILINE)
         if match == None:
             bot.automsg(public,nick,"Dear Chief Secretary, there is no money.")
         else:
@@ -300,6 +305,9 @@ def __getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,what):
 
 def blameq(bot,cmd,nick,conn,public,fish,tdb,tdbk,fdb,fdbk,sdb,sdbk):
     clist=cmd.split()
+    if len(clist) < 2:
+	bot.automsg(public,nick,"Who or what do you want to blame?")
+	return
     cwhat=' '.join(clist[2:])
     if clist[1]=="#last":
 	ans=__getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,fish.last)
@@ -318,7 +326,7 @@ def blameq(bot,cmd,nick,conn,public,fish,tdb,tdbk,fdb,fdbk,sdb,sdbk):
 	if len(ans[0])==1:
 	    bot.automsg(public,nick,ans[0])
 	else:
-	    bot.automsg(public,nick,"Added %s: %s" % (ans[0][2].isoformat(),ans[0][1]))
+	    bot.automsg(public,nick,"Modified %s: %s" % (ans[0][2].isoformat(),ans[0][1]))
     elif len(ans)>4:
 	bot.automsg(public,nick,"I found %d matches, which is too many. Please be more specific!" % (len(ans)) )
     else:
@@ -326,7 +334,7 @@ def blameq(bot,cmd,nick,conn,public,fish,tdb,tdbk,fdb,fdbk,sdb,sdbk):
 	    if len(a)==1:
 	        bot.automsg(public,nick,a)
 	    else:
-	        bot.automsg(public,nick,"'%s' added on %s: %s" % (a[0],a[2].isoformat(),a[1]))
+	        bot.automsg(public,nick,"'%s' modified on %s: %s" % (a[0],a[2].isoformat(),a[1]))
 
 ### say to msg/channel            
 def sayq(bot, cmd, nick, conn, public):
@@ -393,15 +401,20 @@ class UrlLog:
     def firstmen(self):
         return nicetime(time.time()-self.first)
     def urltype(self):
-        z=min(len(urlcomplaints)-1, self.count-1)
-        return urlcomplaints[z]
+        z=min(len(urlinfos)-1, self.count-1)
+        return urlinfos[z]
 
 #(?:) is a regexp that doesn't group        
 urlre = re.compile(r"((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
 hturlre= re.compile(r"(http)(s?://[^ ]+)( |$)")
 #matches \bre\:?\s+ before a regexp; (?i)==case insensitive match
 shibboleth = re.compile(r"(?i)\bre\:?\s+((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
-urlcomplaints = ["a contemporary","an interesting","a fascinating","an overused","a vastly overused"]
+#How long (in s) to wait since the most recent mention before commenting
+url_repeat_time = 300
+urlinfos = ["a new",
+            "a fascinating",
+            "an interesting",
+            "a popular"]
 
 ### Deal with /msg bot url or ~url in channel
 def urlq(bot, cmd, nick, conn, public,urldb):
@@ -433,7 +446,6 @@ def urlq(bot, cmd, nick, conn, public,urldb):
         conn.privmsg(bot.channel,"%s remarks: %s" % (nick," ".join(cmd.split()[1:])))
       else:
         conn.privmsg(bot.channel,"(via %s) %s"%(nick," ".join(cmd.split()[1:])))
-      bot.automsg(False,nick,"That URL was unique; I have posted it into IRC")
     urldb[url]=UrlLog(url,nick)
 
 ### Deal with URLs spotted in channel
@@ -445,7 +457,8 @@ def dourl(bot,conn,nick,command,urldb):
     T=urldb[urlstring]
     message="observes %s URL, first mentioned %s by %s" % \
              (T.urltype(),T.firstmen(),T.nick)
-    if shibboleth.search(command)==None:
+    if shibboleth.search(command)==None and \
+       time.time() - T.lastseen > url_repeat_time:
         conn.action(bot.channel, message)
     T.lastseen=time.time()
     T.count+=1
@@ -486,25 +499,116 @@ def nsfwify(match):
 def twitterq(bot,cmd,nick,conn,public,twitapi):
   
   if (not urlre.search(cmd)):
-    bot.automsg(False,nick,"Please use 'twit' only with http URLs")
+    bot.automsg(False,nick,"Please use 'twit' only with http or https URLs")
     return
 
   urlstring = urlre.search(cmd).group(1)
   if (urlstring.find("twitter.com") !=-1):
-    stringout = getTweet(urlstring,twitapi)
-    bot.automsg(public, nick, stringout)
+    stringsout = getTweet(urlstring,twitapi)
+    for stringout in stringsout:
+        bot.automsg(public, nick, stringout)
+  
+def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0):
+  unobfuscate_urls=True
+  expand_included_tweets=True
+  stringsout=[]
   
-def getTweet(urlstring,twitapi):
   parts = string.split(urlstring,'/')
   tweetID = parts[-1]
   try:
     status = twitapi.GetStatus(tweetID)
-    tweeter_screen = status.user.screen_name.encode('UTF-8', 'replace')
-    tweeter_name = status.user.name.encode('UTF-8', 'replace')
-    tweetText = status.text.encode('UTF-8', 'replace')
-    tweetText = tweetText.replace('\n',' ')
+    if status == {}:
+        return "twitapi.GetStatus returned nothing :-("
+    if status.user == None and status.text == None:
+        return "Empty status object returned :("
+    if status.retweeted_status and status.retweeted_status.text:
+        status = status.retweeted_status
+    if status.user is not None:
+        tweeter_screen = status.user.screen_name #.encode('UTF-8', 'replace')
+        tweeter_name = status.user.name #.encode('UTF-8', 'replace')
+    else:
+        tweeter_screen = "[not returned]" ; tweeter_name = "[not returned]"
+        tweeter_name = tweeter_name + " RTing " + status.user.name #.encode('UTF-8', 'replace')
+    tweetText = status.full_text
+    if status.media:
+        replacements = defaultdict( list )
+        for medium in status.media:
+            replacements[medium.url].append(medium.media_url_https)
+
+        for k,v in replacements.items():
+
+            v = [re.sub(r"/tweet_video_thumb/([\w\-]+).jpg", r"/tweet_video/\1.mp4", link) for link in v]
+            if len(v) > 1:
+                replacementstring = "[" +  " ; ".join(v) +"]"
+            else:
+                replacementstring = v[0]
+            tweetText = tweetText.replace(k, replacementstring)
+
+    for url in status.urls:
+        toReplace = url.expanded_url
+
+        if unobfuscate_urls:
+            import urllib
+            rv = urlparse.urlparse(toReplace)
+            if rv.hostname in {
+                # sourced from http://bit.do/list-of-url-shorteners.php
+                "bit.do", "t.co", "lnkd.in", "db.tt", "qr.ae", "adf.ly",
+                "goo.gl", "bitly.com", "cur.lv", "tinyurl.com", "ow.ly",
+                "bit.ly", "adcrun.ch", "ity.im", "q.gs", "viralurl.com",
+                "is.gd", "po.st", "vur.me", "bc.vc", "twitthis.com", "u.to",
+                "j.mp", "buzurl.com", "cutt.us", "u.bb", "yourls.org",
+                "crisco.com", "x.co", "prettylinkpro.com", "viralurl.biz",
+                "adcraft.co", "virl.ws", "scrnch.me", "filoops.info", "vurl.bz",
+                "vzturl.com", "lemde.fr", "qr.net", "1url.com", "tweez.me",
+                "7vd.cn", "v.gd", "dft.ba", "aka.gr", "tr.im",
+                 # added by ASB:
+                 "trib.al", "dlvr.it"
+                               }:
+                #expand list as needed.
+                response = urllib.urlopen('http://urlex.org/txt/' + toReplace)
+                resptext = response.read()
+                if resptext.startswith('http'): # ie it looks urlish (http or https)
+                    if resptext != toReplace:
+                        toReplace = resptext
+                    # maybe make a note of the domain of the original URL to compile list of shortenable domains?
+
+        # remove tracking utm_ query parameters, for privacy and brevity
+        # code snippet from https://gist.github.com/lepture/5997883
+        rv = urlparse.urlparse(toReplace)
+        if rv.query:
+            query = re.sub(r'utm_\w+=[^&]+&?', '', rv.query)
+            if query:
+                toReplace = '%s://%s%s?%s' % (rv.scheme, rv.hostname, rv.path, query)
+            else:
+                toReplace = '%s://%s%s' % (rv.scheme, rv.hostname, rv.path) # leave off the final '?'
+
+        if expand_included_tweets:
+            if rv.hostname == 'twitter.com' and re.search(r'status/\d+',rv.path):
+                if recurlvl > 2:
+                  stringsout = [ "{{ Recursion level too high }}" ] + stringsout
+                else:
+                  quotedtweet = getTweet(toReplace, twitapi, inclusion=True, recurlvl=recurlvl+1) # inclusion parameter limits recursion.
+                  if not quotedtweet:
+                      quotedtweet = [""]
+                  quotedtweet[0] = "Q{ " + quotedtweet[0]
+                  quotedtweet[-1] += " }"
+                  stringsout = quotedtweet + stringsout
+
+        tweetText = tweetText.replace(url.url, toReplace)
+
+    tweetText = tweetText.replace("&gt;",">")
+    tweetText = tweetText.replace("&lt;","<")
+    tweetText = tweetText.replace("&amp;","&")
+    tweetText = tweetText.replace("\n"," ")
     stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText)
   except twitter.TwitterError:
     terror = sys.exc_info()
     stringout = "Twitter error: %s" % terror[1].__str__()
-  return stringout
+  except Exception:
+    terror = sys.exc_info()
+    stringout = "Error: %s" % terror[1].__str__()
+  stringsout = [stringout] + stringsout
+  if inclusion:
+      return stringsout # don't want to double-encode it, so just pass it on for now and encode later
+
+  return map(lambda x: x.encode('UTF-8', 'replace'), stringsout)