Skip to content

Commit

Permalink
[twitter] add experimental 'videos' option (#99)
Browse files Browse the repository at this point in the history
Enabling this option will detect videos in tweets and output them as
"unsupported" URLs, so that these can then be downloaded with youtube-dl

There are a lot of improvements to be made to the current
implementation, but it works and does what it is supposed to, even if
inefficient as can be ...
  • Loading branch information
mikf committed Sep 30, 2018
1 parent 5507f5c commit f8b3b00
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
9 changes: 9 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,15 @@ Description Extract images from retweets.
=========== =====


extractor.twitter.videos
------------------------
=========== =====
Type ``bool``
Default ``false``
Description Output video tweets as unsupported URLs.
=========== =====


extractor.[booru].tags
----------------------
=========== =====
Expand Down
3 changes: 2 additions & 1 deletion docs/gallery-dl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@
},
"twitter":
{
"retweets": true
"retweets": true,
"videos": false
},
"booru":
{
Expand Down
20 changes: 13 additions & 7 deletions gallery_dl/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""Extract images from https://twitter.com/"""

from .common import Extractor, Message
from .. import text
from .. import text, extractor


class TwitterExtractor(Extractor):
Expand All @@ -24,32 +24,38 @@ def __init__(self, match):
Extractor.__init__(self)
self.user = match.group(1)
self.retweets = self.config("retweets", True)
self.videos = self.config("videos", False)

if self.videos:
self._blacklist = extractor.blacklist(("twitter",))

def items(self):
yield Message.Version, 1
yield Message.Directory, self.metadata()

for tweet in self.tweets():
images = list(text.extract_iter(
tweet, 'data-image-url="', '"'))
if not images:
continue

data = self._data_from_tweet(tweet)
if not self.retweets and data["retweet_id"]:
continue

images = text.extract_iter(
tweet, 'data-image-url="', '"')
for data["num"], url in enumerate(images, 1):
text.nameext_from_url(url, data)
yield Message.Url, url + ":orig", data

if self.videos and "-videoContainer" in tweet:
url = "{}/{}/status/{}".format(
self.root, data["user"], data["tweet_id"])
with self._blacklist:
yield Message.Queue, url, data

def metadata(self):
"""Return general metadata"""
return {"user": self.user}

def tweets(self):
"""Yield HTML content of all relevant tweets"""
return ()

@staticmethod
def _data_from_tweet(tweet):
Expand Down

0 comments on commit f8b3b00

Please sign in to comment.