[tumblr] attempt to fetch high-quality inline images (#2877)

* [tumblr] attempt to fetch high-quality images (again) Fixes #1846, and fixes #1344 * slight refactor * update configuration.rst entry
mikf · Aug 31, 2022 · 9745b48 · 9745b48
1 parent daef91c
commit 9745b48
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 12 deletions.
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -2266,10 +2266,11 @@ Type
 Default
     ``true``
 Description
-    Download full-resolution ``photo`` images.
+    Download full-resolution ``photo`` and ``inline`` images.
 
     For each photo with "maximum" resolution
-    (width equal to 2048 or height equal to 3072),
+    (width equal to 2048 or height equal to 3072)
+    or each inline image,
     use an extra HTTP request to find the URL to its full-resolution version.
 
 

diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
@@ -14,14 +14,6 @@
 import re
 
 
-def _original_inline_image(url):
-    return re.sub(
-        (r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
-         r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)"),
-        r"https://\1_1280.\2", url
-    )
-
-
 def _original_video(url):
     return re.sub(
         (r"https?://((?:vt|vtt|ve)(?:\.media)?\.tumblr\.com"
@@ -141,7 +133,7 @@ def items(self):
                 # API response, but they can't contain images/videos anyway
                 body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
                 for url in re.findall('<img src="([^"]+)"', body):
-                    url = _original_inline_image(url)
+                    url = self._original_inline_image(url)
                     posts.append(self._prepare_image(url, post.copy()))
                 for url in re.findall('<source src="([^"]+)"', body):
                     url = _original_video(url)
@@ -221,7 +213,21 @@ def _skip_reblog_same_blog(self, post):
         return self.blog != post.get("reblogged_root_uuid")
 
     def _original_image(self, url):
-        url = url.replace("/s2048x3072/", "/s99999x99999/", 1)
+        return self._update_image_token(
+            url.replace("/s2048x3072/", "/s99999x99999/", 1))
+
+    def _original_inline_image(self, url):
+        if self.original:
+            url, n = re.subn(r"/s\d+x\d+/", "/s99999x99999/", url, 1)
+            if n:
+                return self._update_image_token(url)
+        return re.sub(
+            (r"https?://(\d+\.media\.tumblr\.com(?:/[0-9a-f]+)?"
+             r"/tumblr(?:_inline)?_[^_]+)_\d+\.([0-9a-z]+)"),
+            r"https://\1_1280.\2", url
+        )
+
+    def _update_image_token(self, url):
         headers = {"Accept": "text/html,*/*;q=0.8"}
         response = self.request(url, headers=headers)
         return text.extract(response.text, '" src="', '"')[0]
@@ -305,6 +311,14 @@ class TumblrPostExtractor(TumblrExtractor):
         ("https://mikf123.tumblr.com/post/181022380064/chat-post", {
             "count": 0,
         }),
+        ("https://kichatundk.tumblr.com/post/654953419288821760", {
+            "count": 2,  # high-quality images (#1846)
+            "content": "d6fcc7b6f750d835d55c7f31fa3b63be26c9f89b",
+        }),
+        ("https://hameru-is-cool.tumblr.com/post/639261855227002880", {
+            "count": 2,  # high-quality images (#1344)
+            "content": "6bc19a42787e46e1bba2ef4aeef5ca28fcd3cd34",
+        }),
         ("https://mikf123.tumblr.com/image/689860196535762944", {
             "pattern": r"^https://\d+\.media\.tumblr\.com"
                        r"/134791621559a79793563b636b5fe2c6"