From 40c0553523bb28790de0e6a07a978a42e2be88c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 7 Mar 2024 00:52:50 +0100 Subject: [PATCH] [twitter] add 'quotes' extractor (#5262) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/mikf/gallery-dl/issues/5262#issuecomment-1981571924 It's implemented as a search for 'quoted_tweet_id:…' on Twitter. --- docs/supportedsites.md | 2 +- gallery_dl/extractor/twitter.py | 16 ++++++++++++++-- test/results/twitter.py | 10 +++++++++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1bbfa12eda..b004d7dcd7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -898,7 +898,7 @@ Consider all listed sites to potentially be NSFW. Twitter https://twitter.com/ - Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles + Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles Supported diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index ad5bfc62e4..e6bf9b0f05 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -731,9 +731,9 @@ def tweets(self): class TwitterTweetExtractor(TwitterExtractor): - """Extractor for images from individual tweets""" + """Extractor for individual tweets""" subcategory = "tweet" - pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)/?$" example = "https://twitter.com/USER/status/12345" def __init__(self, match): @@ -810,6 +810,18 @@ def _tweets_conversation(self, tweet_id): return itertools.chain(buffer, tweets) +class TwitterQuotesExtractor(TwitterExtractor): + """Extractor for quotes of a Tweet""" + subcategory = "quotes" + pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes" + example = "https://twitter.com/USER/status/12345/quotes" + + def items(self): + url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user) + data = {"_extractor": TwitterSearchExtractor} + yield Message.Queue, url, data + + class TwitterAvatarExtractor(TwitterExtractor): subcategory = "avatar" filename_fmt = "avatar {date}.{extension}" diff --git a/test/results/twitter.py b/test/results/twitter.py index 5150a11a68..f7fd8dbae3 100644 --- a/test/results/twitter.py +++ b/test/results/twitter.py @@ -218,7 +218,7 @@ "#category": ("", "twitter", "hashtag"), "#class" : twitter.TwitterHashtagExtractor, "#pattern" : twitter.TwitterSearchExtractor.pattern, - "#sha1_url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9", + "#urls" : "https://twitter.com/search?q=%23nature", }, { @@ -537,6 +537,14 @@ "The analysis by Texas A&M University seems to contradict statements by state and federal regulators that air near the crash site is completely safe, despite residents complaining about rashes, breathing problems and other health effects." Your reaction.""", }, +{ + "#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes", + "#category": ("", "twitter", "quotes"), + "#class" : twitter.TwitterQuotesExtractor, + "#pattern" : twitter.TwitterSearchExtractor.pattern, + "#urls" : "https://twitter.com/search?q=quoted_tweet_id:1263832915173048321", +}, + { "#url" : "https://twitter.com/supernaturepics/photo", "#category": ("", "twitter", "avatar"),