Parse video objects less stoatily

author Tom Womack <tom@womack.net>

Wed, 26 Jun 2019 20:34:53 +0000 (21:34 +0100)

committer Ian Jackson <ijackson@chiark.greenend.org.uk>

Wed, 26 Jun 2019 22:08:14 +0000 (23:08 +0100)
author Tom Womack <tom@womack.net>
Wed, 26 Jun 2019 20:34:53 +0000 (21:34 +0100)
committer Ian Jackson <ijackson@chiark.greenend.org.uk>
Wed, 26 Jun 2019 22:08:14 +0000 (23:08 +0100)
diff --git a/commands.py b/commands.py

index 512b4f7e3a0a634741ffd21ec955cfb642e7d311..eec69e7f416dd0d085f60e8b08dda26e1ae3b57b 100755 (executable)
--- a/commands.py
+++ b/commands.py
@@ -2,6 +2,7 @@
  import string, cPickle, random, urllib, sys, time, re, os, twitter, subprocess, datetime, urlparse, hashlib
  from collections import defaultdict
  from irclib import irc_lower, nm_to_n
+import json
  
  try:
      from blame_filter import bfd
@@ -572,11 +573,37 @@ def getTweet(urlstring,twitapi,inclusion=False,recurlvl=0):
      tweetText = status.full_text
      if status.media:
          replacements = defaultdict(list)
+
          for medium in status.media:
              replacements[medium.url].append(medium.media_url_https)
  
+        # The twitter-api 'conveniently' parses this for you and
+        # throws away the actual video URLs, so we have to take the
+        # JSON and reparse it :sadpanda:
+        # This is particularly annoying because we don't know
+        # for sure that status.media and the JSON 'media' entry
+        # have the same elements in the same order.  Probably they
+        # do but maybe twitter-api randomly reorganised things or
+        # filtered the list or something.  So instead we go through
+        # the JSON and handle the media urls, discarding whatever
+        # unfortunate thing we have put in replacements already.
+        parsed_tweet = json.loads(status.AsJsonString())
+        for medium in parsed_tweet.get('media', []):
+            if medium['type'] == 'video':
+                best = { 'bitrate': -1 }
+                for vt in medium['video_info']['variants']:
+                    if (vt.get('content_type') == 'video/mp4' and
+                        vt.get('bitrate', -1) > best['bitrate']):
+                        best = vt
+                if 'url' in best:
+                    video_url = best['url'].split('?',1)[0]
+                    duration = medium['video_info']['duration_millis']
+                    # ^ duration_millis is a string
+                    duration = "%.1f" % (float(duration)/1000.)
+                    video_desc = "%s (%ss)" % (video_url, duration)
+                    replacements[medium['url']] = [video_desc]
+
          for k,v in replacements.items():
-            v = [re.sub(r"/tweet_video_thumb/([\w\-]+).jpg", r"/tweet_video/\1.mp4", link) for link in v]
              if len(v) > 1:
                  replacementstring = "[" +  " ; ".join(v) +"]"
              else:
author	Tom Womack <tom@womack.net>
	Wed, 26 Jun 2019 20:34:53 +0000 (21:34 +0100)
committer	Ian Jackson <ijackson@chiark.greenend.org.uk>
	Wed, 26 Jun 2019 22:08:14 +0000 (23:08 +0100)