chiark / gitweb /
Don't remark on URLs that are repeated after a short period
[irc.git] / commands.py
index 8e3bfe6f336272374a82b0f67df0b81924dfebf2..488711a254f67b03ff84f2410ce3d1574d8290ba 100755 (executable)
@@ -401,15 +401,20 @@ class UrlLog:
     def firstmen(self):
         return nicetime(time.time()-self.first)
     def urltype(self):
-        z=min(len(urlcomplaints)-1, self.count-1)
-        return urlcomplaints[z]
+        z=min(len(urlinfos)-1, self.count-1)
+        return urlinfos[z]
 
 #(?:) is a regexp that doesn't group        
 urlre = re.compile(r"((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
 hturlre= re.compile(r"(http)(s?://[^ ]+)( |$)")
 #matches \bre\:?\s+ before a regexp; (?i)==case insensitive match
 shibboleth = re.compile(r"(?i)\bre\:?\s+((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
-urlcomplaints = ["a contemporary","an interesting","a fascinating","an overused","a vastly overused"]
+#How long (in s) to wait since the most recent mention before commenting
+url_repeat_time = 300
+urlinfos = ["a new",
+            "a fascinating",
+            "an interesting",
+            "a popular"]
 
 ### Deal with /msg bot url or ~url in channel
 def urlq(bot, cmd, nick, conn, public,urldb):
@@ -452,7 +457,8 @@ def dourl(bot,conn,nick,command,urldb):
     T=urldb[urlstring]
     message="observes %s URL, first mentioned %s by %s" % \
              (T.urltype(),T.firstmen(),T.nick)
-    if shibboleth.search(command)==None:
+    if shibboleth.search(command)==None and \
+       time.time() - T.lastseen > url_repeat_time:
         conn.action(bot.channel, message)
     T.lastseen=time.time()
     T.count+=1
@@ -498,11 +504,15 @@ def twitterq(bot,cmd,nick,conn,public,twitapi):
 
   urlstring = urlre.search(cmd).group(1)
   if (urlstring.find("twitter.com") !=-1):
-    stringout = getTweet(urlstring,twitapi)
-    bot.automsg(public, nick, stringout)
+    stringsout = getTweet(urlstring,twitapi)
+    for stringout in stringsout:
+        bot.automsg(public, nick, stringout)
   
-def getTweet(urlstring,twitapi):
+def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0):
   unobfuscate_urls=True
+  expand_included_tweets=True
+  stringsout=[]
+  
   parts = string.split(urlstring,'/')
   tweetID = parts[-1]
   try:
@@ -519,7 +529,7 @@ def getTweet(urlstring,twitapi):
     else:
         tweeter_screen = "[not returned]" ; tweeter_name = "[not returned]"
         tweeter_name = tweeter_name + " RTing " + status.user.name #.encode('UTF-8', 'replace')
-    tweetText = status.text
+    tweetText = status.full_text
     if status.media:
         replacements = defaultdict( list )
         for medium in status.media:
@@ -527,7 +537,7 @@ def getTweet(urlstring,twitapi):
 
         for k,v in replacements.items():
 
-            v = [re.sub(r"/tweet_video_thumb/(\w+).jpg", r"/tweet_video/\1.mp4", link) for link in v]
+            v = [re.sub(r"/tweet_video_thumb/([\w\-]+).jpg", r"/tweet_video/\1.mp4", link) for link in v]
             if len(v) > 1:
                 replacementstring = "[" +  " ; ".join(v) +"]"
             else:
@@ -572,11 +582,24 @@ def getTweet(urlstring,twitapi):
             else:
                 toReplace = '%s://%s%s' % (rv.scheme, rv.hostname, rv.path) # leave off the final '?'
 
+        if expand_included_tweets:
+            if rv.hostname == 'twitter.com' and re.search(r'status/\d+',rv.path):
+                if recurlvl > 2:
+                  stringsout = [ "{{ Recursion level too high }}" ] + stringsout
+                else:
+                  quotedtweet = getTweet(toReplace, twitapi, inclusion=True, recurlvl=recurlvl+1) # inclusion parameter limits recursion.
+                  if not quotedtweet:
+                      quotedtweet = [""]
+                  quotedtweet[0] = "Q{ " + quotedtweet[0]
+                  quotedtweet[-1] += " }"
+                  stringsout = quotedtweet + stringsout
+
         tweetText = tweetText.replace(url.url, toReplace)
 
     tweetText = tweetText.replace(">",">")
     tweetText = tweetText.replace("&lt;","<")
     tweetText = tweetText.replace("&amp;","&")
+    tweetText = tweetText.replace("\n"," ")
     stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText)
   except twitter.TwitterError:
     terror = sys.exc_info()
@@ -584,4 +607,8 @@ def getTweet(urlstring,twitapi):
   except Exception:
     terror = sys.exc_info()
     stringout = "Error: %s" % terror[1].__str__()
-  return stringout.encode('UTF-8', 'replace')
+  stringsout = [stringout] + stringsout
+  if inclusion:
+      return stringsout # don't want to double-encode it, so just pass it on for now and encode later
+
+  return map(lambda x: x.encode('UTF-8', 'replace'), stringsout)