X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~matthewv/git?p=irc.git;a=blobdiff_plain;f=commands.py;h=491ab3c7254840f476270abd60f8c6f33e478004;hp=ce6b199589c628117aa137811de023606036d929;hb=75d083908bb82c71b58b77fda06dc4f9af8be4fe;hpb=eaf2614d23a5ae0a326b1afeeeb1ef7cbf6e1720 diff --git a/commands.py b/commands.py index ce6b199..491ab3c 100755 --- a/commands.py +++ b/commands.py @@ -1,5 +1,6 @@ # Part of Acrobat. import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime, urlparse +from collections import defaultdict from irclib import irc_lower, nm_to_n # query karma @@ -500,7 +501,10 @@ def twitterq(bot,cmd,nick,conn,public,twitapi): stringout = getTweet(urlstring,twitapi) bot.automsg(public, nick, stringout) -def getTweet(urlstring,twitapi): +def getTweet(urlstring,twitapi,inclusion=False): + unobfuscate_urls=True + expand_included_tweets=True + parts = string.split(urlstring,'/') tweetID = parts[-1] try: @@ -509,38 +513,85 @@ def getTweet(urlstring,twitapi): return "twitapi.GetStatus returned nothing :-(" if status.user == None and status.text == None: return "Empty status object returned :(" + if status.retweeted_status and status.retweeted_status.text: + status = status.retweeted_status if status.user is not None: - tweeter_screen = status.user.screen_name.encode('UTF-8', 'replace') - tweeter_name = status.user.name.encode('UTF-8', 'replace') + tweeter_screen = status.user.screen_name #.encode('UTF-8', 'replace') + tweeter_name = status.user.name #.encode('UTF-8', 'replace') else: tweeter_screen = "[not returned]" ; tweeter_name = "[not returned]" - tweetText = status.text.encode('UTF-8', 'replace') - tweetText = tweetText.replace('\n',' ') - - for medium in status.media: - if "media_url_https" in medium: - link = medium["media_url_https"] - link = re.sub(r"/tweet_video_thumb/(\w+).jpg", r"/tweet_video/\1.mp4", link) - tweetText = tweetText.replace(medium["url"], link) + tweeter_name = tweeter_name + " RTing " + status.user.name #.encode('UTF-8', 'replace') + tweetText = status.text + if status.media: + replacements = defaultdict( list ) + for medium in status.media: + replacements[medium.url].append(medium.media_url_https) + + for k,v in replacements.items(): + + v = [re.sub(r"/tweet_video_thumb/([\w\-]+).jpg", r"/tweet_video/\1.mp4", link) for link in v] + if len(v) > 1: + replacementstring = "[" + " ; ".join(v) +"]" + else: + replacementstring = v[0] + tweetText = tweetText.replace(k, replacementstring) + for url in status.urls: toReplace = url.expanded_url + if unobfuscate_urls: + import urllib + rv = urlparse.urlparse(toReplace) + if rv.hostname in { + # sourced from http://bit.do/list-of-url-shorteners.php + "bit.do", "t.co", "lnkd.in", "db.tt", "qr.ae", "adf.ly", + "goo.gl", "bitly.com", "cur.lv", "tinyurl.com", "ow.ly", + "bit.ly", "adcrun.ch", "ity.im", "q.gs", "viralurl.com", + "is.gd", "po.st", "vur.me", "bc.vc", "twitthis.com", "u.to", + "j.mp", "buzurl.com", "cutt.us", "u.bb", "yourls.org", + "crisco.com", "x.co", "prettylinkpro.com", "viralurl.biz", + "adcraft.co", "virl.ws", "scrnch.me", "filoops.info", "vurl.bz", + "vzturl.com", "lemde.fr", "qr.net", "1url.com", "tweez.me", + "7vd.cn", "v.gd", "dft.ba", "aka.gr", "tr.im", + # added by ASB: + "trib.al", "dlvr.it" + }: + #expand list as needed. + response = urllib.urlopen('http://urlex.org/txt/' + toReplace) + resptext = response.read() + if resptext.startswith('http'): # ie it looks urlish (http or https) + if resptext != toReplace: + toReplace = resptext + # maybe make a note of the domain of the original URL to compile list of shortenable domains? + # remove tracking utm_ query parameters, for privacy and brevity # code snippet from https://gist.github.com/lepture/5997883 rv = urlparse.urlparse(toReplace) if rv.query: query = re.sub(r'utm_\w+=[^&]+&?', '', rv.query) - toReplace = '%s://%s%s?%s' % (rv.scheme, rv.hostname, rv.path, query) - + if query: + toReplace = '%s://%s%s?%s' % (rv.scheme, rv.hostname, rv.path, query) + else: + toReplace = '%s://%s%s' % (rv.scheme, rv.hostname, rv.path) # leave off the final '?' + + if expand_included_tweets and not inclusion: + if rv.hostname == 'twitter.com' and re.search(r'status/\d+',rv.path): + quotedtweet = getTweet(toReplace, twitapi, inclusion=True) # inclusion parameter limits recursion. + tweetText += " Q{" + quotedtweet + "}" tweetText = tweetText.replace(url.url, toReplace) tweetText = tweetText.replace(">",">") tweetText = tweetText.replace("<","<") tweetText = tweetText.replace("&","&") - tweetText = tweetText.encode('UTF-8', 'replace') - + tweetText = tweetText.replace("\n"," ") stringout = "tweet by %s (%s): %s" %(tweeter_screen,tweeter_name,tweetText) except twitter.TwitterError: terror = sys.exc_info() stringout = "Twitter error: %s" % terror[1].__str__() - return stringout + except Exception: + terror = sys.exc_info() + stringout = "Error: %s" % terror[1].__str__() + if inclusion: + return stringout # don't want to double-encode it, so just pass it on for now and encode later + print stringout.encode('UTF-8', 'replace') + return stringout.encode('UTF-8', 'replace')