[twitter] add experimental 'videos' option (#99)

Enabling this option will detect videos in tweets and output them as "unsupported" URLs, so that these can then be downloaded with youtube-dl There are a lot of improvements to be made to the current implementation, but it works and does what it is supposed to, even if inefficient as can be ...
mikf · Sep 30, 2018 · f8b3b00 · f8b3b00
1 parent 5507f5c
commit f8b3b00
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 8 deletions.
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -654,6 +654,15 @@ Description Extract images from retweets.
 =========== =====
 
 
+extractor.twitter.videos
+------------------------
+=========== =====
+Type        ``bool``
+Default     ``false``
+Description Output video tweets as unsupported URLs.
+=========== =====
+
+
 extractor.[booru].tags
 ----------------------
 =========== =====

diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
@@ -110,7 +110,8 @@
         },
         "twitter":
         {
-            "retweets": true
+            "retweets": true,
+            "videos": false
         },
         "booru":
         {

diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
@@ -9,7 +9,7 @@
 """Extract images from https://twitter.com/"""
 
 from .common import Extractor, Message
-from .. import text
+from .. import text, extractor
 
 
 class TwitterExtractor(Extractor):
@@ -24,32 +24,38 @@ def __init__(self, match):
         Extractor.__init__(self)
         self.user = match.group(1)
         self.retweets = self.config("retweets", True)
+        self.videos = self.config("videos", False)
+
+        if self.videos:
+            self._blacklist = extractor.blacklist(("twitter",))
 
     def items(self):
         yield Message.Version, 1
         yield Message.Directory, self.metadata()
 
         for tweet in self.tweets():
-            images = list(text.extract_iter(
-                tweet, 'data-image-url="', '"'))
-            if not images:
-                continue
-
             data = self._data_from_tweet(tweet)
             if not self.retweets and data["retweet_id"]:
                 continue
 
+            images = text.extract_iter(
+                tweet, 'data-image-url="', '"')
             for data["num"], url in enumerate(images, 1):
                 text.nameext_from_url(url, data)
                 yield Message.Url, url + ":orig", data
 
+            if self.videos and "-videoContainer" in tweet:
+                url = "{}/{}/status/{}".format(
+                    self.root, data["user"], data["tweet_id"])
+                with self._blacklist:
+                    yield Message.Queue, url, data
+
     def metadata(self):
         """Return general metadata"""
         return {"user": self.user}
 
     def tweets(self):
         """Yield HTML content of all relevant tweets"""
-        return ()
 
     @staticmethod
     def _data_from_tweet(tweet):