Don't remark on URLs that are repeated after a short period

[irc.git] / commands.py
diff --git a/commands.py b/commands.py

index 8e3bfe6f336272374a82b0f67df0b81924dfebf2..488711a254f67b03ff84f2410ce3d1574d8290ba 100755 (executable)
--- a/commands.py
+++ b/commands.py
@@ -401,15 +401,20 @@ class UrlLog:
      def firstmen(self):
          return nicetime(time.time()-self.first)
      def urltype(self):
-        z=min(len(urlcomplaints)-1, self.count-1)
-        return urlcomplaints[z]
+        z=min(len(urlinfos)-1, self.count-1)
+        return urlinfos[z]
  
  #(?:) is a regexp that doesn't group        
  urlre = re.compile(r"((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
  hturlre= re.compile(r"(http)(s?://[^ ]+)( |$)")
  #matches \bre\:?\s+ before a regexp; (?i)==case insensitive match
  shibboleth = re.compile(r"(?i)\bre\:?\s+((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)")
-urlcomplaints = ["a contemporary","an interesting","a fascinating","an overused","a vastly overused"]
+#How long (in s) to wait since the most recent mention before commenting
+url_repeat_time = 300
+urlinfos = ["a new",
+            "a fascinating",
+            "an interesting",
+            "a popular"]
  
  ### Deal with /msg bot url or ~url in channel
  def urlq(bot, cmd, nick, conn, public,urldb):
@@ -452,7 +457,8 @@ def dourl(bot,conn,nick,command,urldb):
      T=urldb[urlstring]
      message="observes %s URL, first mentioned %s by %s" % \
               (T.urltype(),T.firstmen(),T.nick)
-    if shibboleth.search(command)==None:
+    if shibboleth.search(command)==None and \
+       time.time() - T.lastseen > url_repeat_time:
          conn.action(bot.channel, message)
      T.lastseen=time.time()
      T.count+=1
@@ -498,11 +504,15 @@ def twitterq(bot,cmd,nick,conn,public,twitapi):
  
    urlstring = urlre.search(cmd).group(1)
    if (urlstring.find("twitter.com") !=-1):
-    stringout = getTweet(urlstring,twitapi)
-    bot.automsg(public, nick, stringout)
+    stringsout = getTweet(urlstring,twitapi)
+    for stringout in stringsout:
+        bot.automsg(public, nick, stringout)
    
-def getTweet(urlstring,twitapi):
+def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0):
    unobfuscate_urls=True
+  expand_included_tweets=True
+  stringsout=[]
+  
    parts = string.split(urlstring,'/')
    tweetID = parts[-1]
    try:
@@ -519,7 +529,7 @@ def getTweet(urlstring,twitapi):
      else:
          tweeter_screen = "[not returned]" ; tweeter_name = "[not returned]"
          tweeter_name = tweeter_name + " RTing " + status.user.name #.encode('UTF-8', 'replace')
-    tweetText = status.text
+    tweetText = status.full_text
      if status.media:
          replacements = defaultdict( list )
          for medium in status.media:
@@ -527,7 +537,7 @@ def getTweet(urlstring,twitapi):
  
          for k,v in replacements.items():
  
-            v = [re.sub(r"/tweet_video_thumb/(\w+).jpg", r"/tweet_video/\1.mp4", link) for link in v]
+            v = [re.sub(r"/tweet_video_thumb/([\w\-]+).jpg", r"/tweet_video/\1.mp4", link) for link in v]
              if len(v) > 1:
                  replacementstring = "[" +  " ; ".join(v) +"]"
              else:
@@ -572,11 +582,24 @@ def getTweet(urlstring,twitapi):
              else:
                  toReplace = '%s://%s%s' % (rv.scheme, rv.hostname, rv.path) # leave off the final '?'
  
+        if expand_included_tweets:
+            if rv.hostname == 'twitter.com' and re.search(r'status/\d+',rv.path):
+                if recurlvl > 2:
+                  stringsout = [ "{{ Recursion level too high }}" ] + stringsout
+                else:
+                  quotedtweet = getTweet(toReplace, twitapi, inclusion=True, recurlvl=recurlvl+1) # inclusion parameter limits recursion.
+                  if not quotedtweet:
+                      quotedtweet = [""]
+                  quotedtweet[0] = "Q{ " + quotedtweet[0]
+                  quotedtweet[-1] += " }"
+                  stringsout = quotedtweet + stringsout
+
          tweetText = tweetText.replace(url.url, toReplace)
  
      tweetText = tweetText.replace("&gt;",">")
      tweetText = tweetText.replace("&lt;","<")
      tweetText = tweetText.replace("&amp;","&")
+    tweetText = tweetText.replace("\n"," ")
      stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText)
    except twitter.TwitterError:
      terror = sys.exc_info()
@@ -584,4 +607,8 @@ def getTweet(urlstring,twitapi):
    except Exception:
      terror = sys.exc_info()
      stringout = "Error: %s" % terror[1].__str__()
-  return stringout.encode('UTF-8', 'replace')
+  stringsout = [stringout] + stringsout
+  if inclusion:
+      return stringsout # don't want to double-encode it, so just pass it on for now and encode later
+
+  return map(lambda x: x.encode('UTF-8', 'replace'), stringsout)