From 93265db9b383a3cc5ad953b3e95de02c2ab05efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 13 Oct 2024 20:42:41 +0200 Subject: [PATCH] [8chan] support '/last/' thread URLs (#6318) --- gallery_dl/extractor/8chan.py | 31 ++++++++++------------ test/results/8chan.py | 48 +++++++++++++++++------------------ 2 files changed, 36 insertions(+), 43 deletions(-) diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index f81d2a1ff8..b3b9f3f34f 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -64,16 +64,14 @@ class _8chanThreadExtractor(_8chanExtractor): "{threadId} {subject[:50]}") filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}" archive_fmt = "{boardUri}_{postId}_{num}" - pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)" example = "https://8chan.moe/a/res/12345.html" - def __init__(self, match): - _8chanExtractor.__init__(self, match) - _, self.board, self.thread = match.groups() - def items(self): + _, board, thread = self.groups + # fetch thread data - url = "{}/{}/res/{}.".format(self.root, self.board, self.thread) + url = "{}/{}/res/{}.".format(self.root, board, thread) self.session.headers["Referer"] = url + "html" thread = self.request(url + "json").json() thread["postId"] = thread["threadId"] @@ -106,25 +104,22 @@ class _8chanBoardExtractor(_8chanExtractor): pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$" example = "https://8chan.moe/a/" - def __init__(self, match): - _8chanExtractor.__init__(self, match) - _, self.board, self.page = match.groups() - def items(self): - page = text.parse_int(self.page, 1) - url = "{}/{}/{}.json".format(self.root, self.board, page) - board = self.request(url).json() - threads = board["threads"] + _, board, pnum = self.groups + pnum = text.parse_int(pnum, 1) + url = "{}/{}/{}.json".format(self.root, board, pnum) + data = self.request(url).json() + threads = data["threads"] while True: for thread in threads: thread["_extractor"] = _8chanThreadExtractor url = "{}/{}/res/{}.html".format( - self.root, self.board, thread["threadId"]) + self.root, board, thread["threadId"]) yield Message.Queue, url, thread - page += 1 - if page > board["pageCount"]: + pnum += 1 + if pnum > data["pageCount"]: return - url = "{}/{}/{}.json".format(self.root, self.board, page) + url = "{}/{}/{}.json".format(self.root, board, pnum) threads = self.request(url).json()["threads"] diff --git a/test/results/8chan.py b/test/results/8chan.py index e9dc0cf48b..1e26c71ac3 100644 --- a/test/results/8chan.py +++ b/test/results/8chan.py @@ -10,11 +10,10 @@ __tests__ = ( { - "#url" : "https://8chan.moe/vhs/res/4.html", - "#category": ("", "8chan", "thread"), - "#class" : _8chan._8chanThreadExtractor, - "#pattern" : r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$", - "#count" : 14, + "#url" : "https://8chan.moe/vhs/res/4.html", + "#class": _8chan._8chanThreadExtractor, + "#pattern": r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$", + "#count" : 14, "archived" : False, "autoSage" : False, @@ -51,41 +50,40 @@ }, { - "#url" : "https://8chan.se/vhs/res/4.html", - "#category": ("", "8chan", "thread"), - "#class" : _8chan._8chanThreadExtractor, + "#url" : "https://8chan.moe/vhs/last/4.html", + "#class": _8chan._8chanThreadExtractor, }, { - "#url" : "https://8chan.cc/vhs/res/4.html", - "#category": ("", "8chan", "thread"), - "#class" : _8chan._8chanThreadExtractor, + "#url" : "https://8chan.se/vhs/res/4.html", + "#class": _8chan._8chanThreadExtractor, }, { - "#url" : "https://8chan.moe/vhs/", - "#category": ("", "8chan", "board"), - "#class" : _8chan._8chanBoardExtractor, + "#url" : "https://8chan.cc/vhs/res/4.html", + "#class": _8chan._8chanThreadExtractor, }, { - "#url" : "https://8chan.moe/vhs/2.html", - "#category": ("", "8chan", "board"), - "#class" : _8chan._8chanBoardExtractor, - "#pattern" : _8chan._8chanThreadExtractor.pattern, - "#count" : range(24, 32), + "#url" : "https://8chan.moe/vhs/", + "#class": _8chan._8chanBoardExtractor, }, { - "#url" : "https://8chan.se/vhs/", - "#category": ("", "8chan", "board"), - "#class" : _8chan._8chanBoardExtractor, + "#url" : "https://8chan.moe/vhs/2.html", + "#class": _8chan._8chanBoardExtractor, + "#pattern": _8chan._8chanThreadExtractor.pattern, + "#count" : range(24, 32), }, { - "#url" : "https://8chan.cc/vhs/", - "#category": ("", "8chan", "board"), - "#class" : _8chan._8chanBoardExtractor, + "#url" : "https://8chan.se/vhs/", + "#class": _8chan._8chanBoardExtractor, +}, + +{ + "#url" : "https://8chan.cc/vhs/", + "#class": _8chan._8chanBoardExtractor, }, )