From f024eaca7f9d7ddf1acaae2d0849544d5e857800 Mon Sep 17 00:00:00 2001 From: Tom Womack Date: Wed, 26 Jun 2019 21:34:53 +0100 Subject: [PATCH] Parse video objects less stoatily The https://github.com/bear/python-twitter API throws away the extended-information field in which the name of the video file is stored; so parse the JSON again ourselves to get hold of it. Pick the highest bit rate MP4 video. Throw away the old ad-hoc mangling of video_thumb urls; that is no longer needed. Signed-off-by: Tom Womack Signed-off-by: Ian Jackson --- v2: Fix logic error: When tweet contains multiple media, handle videos as videos and non-videos as non-videos. Resulting patch has rather different organisation and does less violence to the original code. Commit message changes: Mention deletion of tweet_video_thumb hack Style fixes and changes: Do not add a comment about the new import Use .get(DEFVAL) rather than nesting in if KEY in VAL, throughout Reformat new comment about JSON parsing to 80 columns Use ' rather than " for protocol elements such as hash keys Use split rather than a regexp to trim ? part from url Comments say "we" do something, not "I". When searching for the best, have a single `best' variable Divide float by float not int Adjust new replacement string construction to be more natural Be consistent about spaces around == --- commands.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/commands.py b/commands.py index 512b4f7..eec69e7 100755 --- a/commands.py +++ b/commands.py @@ -2,6 +2,7 @@ import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime, urlparse, hashlib from collections import defaultdict from irclib import irc_lower, nm_to_n +import json try: from blame_filter import bfd @@ -572,11 +573,37 @@ def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0): tweetText = status.full_text if status.media: replacements = defaultdict(list) + for medium in status.media: replacements[medium.url].append(medium.media_url_https) + # The twitter-api 'conveniently' parses this for you and + # throws away the actual video URLs, so we have to take the + # JSON and reparse it :sadpanda: + # This is particularly annoying because we don't know + # for sure that status.media and the JSON 'media' entry + # have the same elements in the same order. Probably they + # do but maybe twitter-api randomly reorganised things or + # filtered the list or something. So instead we go through + # the JSON and handle the media urls, discarding whatever + # unfortunate thing we have put in replacements already. + parsed_tweet = json.loads(status.AsJsonString()) + for medium in parsed_tweet.get('media', []): + if medium['type'] == 'video': + best = { 'bitrate': -1 } + for vt in medium['video_info']['variants']: + if (vt.get('content_type') == 'video/mp4' and + vt.get('bitrate', -1) > best['bitrate']): + best = vt + if 'url' in best: + video_url = best['url'].split('?',1)[0] + duration = medium['video_info']['duration_millis'] + # ^ duration_millis is a string + duration = "%.1f" % (float(duration)/1000.) + video_desc = "%s (%ss)" % (video_url, duration) + replacements[medium['url']] = [video_desc] + for k,v in replacements.items(): - v = [re.sub(r"/tweet_video_thumb/([\w\-]+).jpg", r"/tweet_video/\1.mp4", link) for link in v] if len(v) > 1: replacementstring = "[" + " ; ".join(v) +"]" else: -- 2.30.2