X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~matthewv/git?a=blobdiff_plain;f=commands.py;h=488711a254f67b03ff84f2410ce3d1574d8290ba;hb=81cc28847cc46cc2971c35ec5c54b2022e5929ca;hp=bbcaeb898dd83def7686c40f29a6fb5c566689d1;hpb=80d9c9c42e877de154e6a0c7ce4c725a1448c51f;p=irc.git diff --git a/commands.py b/commands.py old mode 100644 new mode 100755 index bbcaeb8..488711a --- a/commands.py +++ b/commands.py @@ -1,5 +1,6 @@ # Part of Acrobat. -import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime +import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime, urlparse +from collections import defaultdict from irclib import irc_lower, nm_to_n # query karma @@ -61,6 +62,9 @@ def fish_quota(pond): pond.cur_fish=pond.max_fish pond.quotatime=time.time() +# List of things the bot might be called to work round the self-trouting code +synonyms=["itself","the bot","themself"] + # trout someone, or flirt with them def troutq(bot, cmd, nick, conn, public, cfg): fishlist=cfg[0] @@ -86,7 +90,7 @@ def troutq(bot, cmd, nick, conn, public, cfg): trout_msg = random.choice(fishlist) fishpond.last=trout_msg # The bot won't trout or flirt with itself; - if irc_lower(me) == irc_lower(target): + if irc_lower(me) == irc_lower(target) or irc_lower(target) in synonyms: target = nick # There's a chance the game may be given away if the request was not # public... @@ -125,13 +129,14 @@ def slashq(bot, cmd, nick, conn, public, cfg): return me = bot.connection.get_nickname() slash_msg = random.choice(fishlist) + fishpond.last=slash_msg # The bot won't slash people with themselves if irc_lower(who[0]) == irc_lower(who[1]): conn.notice(nick, "oooooh no missus!") return # The bot won't slash with itself, instead slashing the requester for n in [0,1]: - if irc_lower(me) == irc_lower(who[n]): + if irc_lower(me) == irc_lower(who[n]) or irc_lower(who[n]) in synonyms: who[n] = nick # Perhaps someone asked to slash themselves with the bot then we get if irc_lower(who[0]) == irc_lower(who[1]): @@ -252,7 +257,7 @@ def currencyq(bot, cmd, nick, conn, public): targ = ("http://www.xe.com/ucc/convert.cgi?From=%s&To=%s" % (args[0], args[1])) try: currencypage = urllib.urlopen(targ).read() - match = re.search(r"(1 %s = [\d\.]+ %s)" % (args[0],args[1]),currencypage,re.MULTILINE) + match = re.search(r"(1 %s = [\d\.]+ %s)" % (args[0].upper(),args[1].upper()),currencypage,re.MULTILINE) if match == None: bot.automsg(public,nick,"Dear Chief Secretary, there is no money.") else: @@ -300,6 +305,9 @@ def __getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,what): def blameq(bot,cmd,nick,conn,public,fish,tdb,tdbk,fdb,fdbk,sdb,sdbk): clist=cmd.split() + if len(clist) < 2: + bot.automsg(public,nick,"Who or what do you want to blame?") + return cwhat=' '.join(clist[2:]) if clist[1]=="#last": ans=__getall(tdb,tdbk,fdb,fdbk,sdb,sdbk,fish.last) @@ -318,7 +326,7 @@ def blameq(bot,cmd,nick,conn,public,fish,tdb,tdbk,fdb,fdbk,sdb,sdbk): if len(ans[0])==1: bot.automsg(public,nick,ans[0]) else: - bot.automsg(public,nick,"Added %s: %s" % (ans[0][2].isoformat(),ans[0][1])) + bot.automsg(public,nick,"Modified %s: %s" % (ans[0][2].isoformat(),ans[0][1])) elif len(ans)>4: bot.automsg(public,nick,"I found %d matches, which is too many. Please be more specific!" % (len(ans)) ) else: @@ -326,7 +334,7 @@ def blameq(bot,cmd,nick,conn,public,fish,tdb,tdbk,fdb,fdbk,sdb,sdbk): if len(a)==1: bot.automsg(public,nick,a) else: - bot.automsg(public,nick,"'%s' added on %s: %s" % (a[0],a[2].isoformat(),a[1])) + bot.automsg(public,nick,"'%s' modified on %s: %s" % (a[0],a[2].isoformat(),a[1])) ### say to msg/channel def sayq(bot, cmd, nick, conn, public): @@ -393,15 +401,20 @@ class UrlLog: def firstmen(self): return nicetime(time.time()-self.first) def urltype(self): - z=min(len(urlcomplaints)-1, self.count-1) - return urlcomplaints[z] + z=min(len(urlinfos)-1, self.count-1) + return urlinfos[z] #(?:) is a regexp that doesn't group urlre = re.compile(r"((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)") hturlre= re.compile(r"(http)(s?://[^ ]+)( |$)") #matches \bre\:?\s+ before a regexp; (?i)==case insensitive match shibboleth = re.compile(r"(?i)\bre\:?\s+((?:(?:http)|(?:nsfw))s?://[^ ]+)( |$)") -urlcomplaints = ["a contemporary","an interesting","a fascinating","an overused","a vastly overused"] +#How long (in s) to wait since the most recent mention before commenting +url_repeat_time = 300 +urlinfos = ["a new", + "a fascinating", + "an interesting", + "a popular"] ### Deal with /msg bot url or ~url in channel def urlq(bot, cmd, nick, conn, public,urldb): @@ -433,7 +446,6 @@ def urlq(bot, cmd, nick, conn, public,urldb): conn.privmsg(bot.channel,"%s remarks: %s" % (nick," ".join(cmd.split()[1:]))) else: conn.privmsg(bot.channel,"(via %s) %s"%(nick," ".join(cmd.split()[1:]))) - bot.automsg(False,nick,"That URL was unique; I have posted it into IRC") urldb[url]=UrlLog(url,nick) ### Deal with URLs spotted in channel @@ -445,7 +457,8 @@ def dourl(bot,conn,nick,command,urldb): T=urldb[urlstring] message="observes %s URL, first mentioned %s by %s" % \ (T.urltype(),T.firstmen(),T.nick) - if shibboleth.search(command)==None: + if shibboleth.search(command)==None and \ + time.time() - T.lastseen > url_repeat_time: conn.action(bot.channel, message) T.lastseen=time.time() T.count+=1 @@ -486,25 +499,116 @@ def nsfwify(match): def twitterq(bot,cmd,nick,conn,public,twitapi): if (not urlre.search(cmd)): - bot.automsg(False,nick,"Please use 'twit' only with http URLs") + bot.automsg(False,nick,"Please use 'twit' only with http or https URLs") return urlstring = urlre.search(cmd).group(1) if (urlstring.find("twitter.com") !=-1): - stringout = getTweet(urlstring,twitapi) - bot.automsg(public, nick, stringout) + stringsout = getTweet(urlstring,twitapi) + for stringout in stringsout: + bot.automsg(public, nick, stringout) + +def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0): + unobfuscate_urls=True + expand_included_tweets=True + stringsout=[] -def getTweet(urlstring,twitapi): parts = string.split(urlstring,'/') tweetID = parts[-1] try: status = twitapi.GetStatus(tweetID) - tweeter_screen = status.user.screen_name.encode('UTF-8', 'replace') - tweeter_name = status.user.name.encode('UTF-8', 'replace') - tweetText = status.text.encode('UTF-8', 'replace') - tweetText = tweetText.replace('\n',' ') + if status == {}: + return "twitapi.GetStatus returned nothing :-(" + if status.user == None and status.text == None: + return "Empty status object returned :(" + if status.retweeted_status and status.retweeted_status.text: + status = status.retweeted_status + if status.user is not None: + tweeter_screen = status.user.screen_name #.encode('UTF-8', 'replace') + tweeter_name = status.user.name #.encode('UTF-8', 'replace') + else: + tweeter_screen = "[not returned]" ; tweeter_name = "[not returned]" + tweeter_name = tweeter_name + " RTing " + status.user.name #.encode('UTF-8', 'replace') + tweetText = status.full_text + if status.media: + replacements = defaultdict( list ) + for medium in status.media: + replacements[medium.url].append(medium.media_url_https) + + for k,v in replacements.items(): + + v = [re.sub(r"/tweet_video_thumb/([\w\-]+).jpg", r"/tweet_video/\1.mp4", link) for link in v] + if len(v) > 1: + replacementstring = "[" + " ; ".join(v) +"]" + else: + replacementstring = v[0] + tweetText = tweetText.replace(k, replacementstring) + + for url in status.urls: + toReplace = url.expanded_url + + if unobfuscate_urls: + import urllib + rv = urlparse.urlparse(toReplace) + if rv.hostname in { + # sourced from http://bit.do/list-of-url-shorteners.php + "bit.do", "t.co", "lnkd.in", "db.tt", "qr.ae", "adf.ly", + "goo.gl", "bitly.com", "cur.lv", "tinyurl.com", "ow.ly", + "bit.ly", "adcrun.ch", "ity.im", "q.gs", "viralurl.com", + "is.gd", "po.st", "vur.me", "bc.vc", "twitthis.com", "u.to", + "j.mp", "buzurl.com", "cutt.us", "u.bb", "yourls.org", + "crisco.com", "x.co", "prettylinkpro.com", "viralurl.biz", + "adcraft.co", "virl.ws", "scrnch.me", "filoops.info", "vurl.bz", + "vzturl.com", "lemde.fr", "qr.net", "1url.com", "tweez.me", + "7vd.cn", "v.gd", "dft.ba", "aka.gr", "tr.im", + # added by ASB: + "trib.al", "dlvr.it" + }: + #expand list as needed. + response = urllib.urlopen('http://urlex.org/txt/' + toReplace) + resptext = response.read() + if resptext.startswith('http'): # ie it looks urlish (http or https) + if resptext != toReplace: + toReplace = resptext + # maybe make a note of the domain of the original URL to compile list of shortenable domains? + + # remove tracking utm_ query parameters, for privacy and brevity + # code snippet from https://gist.github.com/lepture/5997883 + rv = urlparse.urlparse(toReplace) + if rv.query: + query = re.sub(r'utm_\w+=[^&]+&?', '', rv.query) + if query: + toReplace = '%s://%s%s?%s' % (rv.scheme, rv.hostname, rv.path, query) + else: + toReplace = '%s://%s%s' % (rv.scheme, rv.hostname, rv.path) # leave off the final '?' + + if expand_included_tweets: + if rv.hostname == 'twitter.com' and re.search(r'status/\d+',rv.path): + if recurlvl > 2: + stringsout = [ "{{ Recursion level too high }}" ] + stringsout + else: + quotedtweet = getTweet(toReplace, twitapi, inclusion=True, recurlvl=recurlvl+1) # inclusion parameter limits recursion. + if not quotedtweet: + quotedtweet = [""] + quotedtweet[0] = "Q{ " + quotedtweet[0] + quotedtweet[-1] += " }" + stringsout = quotedtweet + stringsout + + tweetText = tweetText.replace(url.url, toReplace) + + tweetText = tweetText.replace(">",">") + tweetText = tweetText.replace("<","<") + tweetText = tweetText.replace("&","&") + tweetText = tweetText.replace("\n"," ") stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText) except twitter.TwitterError: terror = sys.exc_info() stringout = "Twitter error: %s" % terror[1].__str__() - return stringout + except Exception: + terror = sys.exc_info() + stringout = "Error: %s" % terror[1].__str__() + stringsout = [stringout] + stringsout + if inclusion: + return stringsout # don't want to double-encode it, so just pass it on for now and encode later + + return map(lambda x: x.encode('UTF-8', 'replace'), stringsout)