Skip to content

Commit

Permalink
[tumblr] attempt to fetch high-quality inline images (#2877)
Browse files Browse the repository at this point in the history
* [tumblr] attempt to fetch high-quality images (again)

Fixes #1846, and fixes #1344

* slight refactor

* update configuration.rst entry
  • Loading branch information
the-blank-x committed Aug 31, 2022
1 parent daef91c commit 9745b48
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
5 changes: 3 additions & 2 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2266,10 +2266,11 @@ Type
Default
``true``
Description
Download full-resolution ``photo`` images.
Download full-resolution ``photo`` and ``inline`` images.

For each photo with "maximum" resolution
(width equal to 2048 or height equal to 3072),
(width equal to 2048 or height equal to 3072)
or each inline image,
use an extra HTTP request to find the URL to its full-resolution version.


Expand Down
34 changes: 24 additions & 10 deletions gallery_dl/extractor/tumblr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,6 @@
import re


def _original_inline_image(url):
return re.sub(
(r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)"),
r"https://\1_1280.\2", url
)


def _original_video(url):
return re.sub(
(r"https?://((?:vt|vtt|ve)(?:\.media)?\.tumblr\.com"
Expand Down Expand Up @@ -141,7 +133,7 @@ def items(self):
# API response, but they can't contain images/videos anyway
body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
for url in re.findall('<img src="([^"]+)"', body):
url = _original_inline_image(url)
url = self._original_inline_image(url)
posts.append(self._prepare_image(url, post.copy()))
for url in re.findall('<source src="([^"]+)"', body):
url = _original_video(url)
Expand Down Expand Up @@ -221,7 +213,21 @@ def _skip_reblog_same_blog(self, post):
return self.blog != post.get("reblogged_root_uuid")

def _original_image(self, url):
url = url.replace("/s2048x3072/", "/s99999x99999/", 1)
return self._update_image_token(
url.replace("/s2048x3072/", "/s99999x99999/", 1))

def _original_inline_image(self, url):
if self.original:
url, n = re.subn(r"/s\d+x\d+/", "/s99999x99999/", url, 1)
if n:
return self._update_image_token(url)
return re.sub(
(r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)"),
r"https://\1_1280.\2", url
)

def _update_image_token(self, url):
headers = {"Accept": "text/html,*/*;q=0.8"}
response = self.request(url, headers=headers)
return text.extract(response.text, '" src="', '"')[0]
Expand Down Expand Up @@ -305,6 +311,14 @@ class TumblrPostExtractor(TumblrExtractor):
("https://mikf123.tumblr.com/post/181022380064/chat-post", {
"count": 0,
}),
("https://kichatundk.tumblr.com/post/654953419288821760", {
"count": 2, # high-quality images (#1846)
"content": "d6fcc7b6f750d835d55c7f31fa3b63be26c9f89b",
}),
("https://hameru-is-cool.tumblr.com/post/639261855227002880", {
"count": 2, # high-quality images (#1344)
"content": "6bc19a42787e46e1bba2ef4aeef5ca28fcd3cd34",
}),
("https://mikf123.tumblr.com/image/689860196535762944", {
"pattern": r"^https://\d+\.media\.tumblr\.com"
r"/134791621559a79793563b636b5fe2c6"
Expand Down

0 comments on commit 9745b48

Please sign in to comment.