Skip to content

Commit

Permalink
[8chan] support '/last/' thread URLs (#6318)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Oct 14, 2024
1 parent c7f0d89 commit 93265db
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 43 deletions.
31 changes: 13 additions & 18 deletions gallery_dl/extractor/8chan.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,14 @@ class _8chanThreadExtractor(_8chanExtractor):
"{threadId} {subject[:50]}")
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)"
example = "https://8chan.moe/a/res/12345.html"

def __init__(self, match):
_8chanExtractor.__init__(self, match)
_, self.board, self.thread = match.groups()

def items(self):
_, board, thread = self.groups

# fetch thread data
url = "{}/{}/res/{}.".format(self.root, self.board, self.thread)
url = "{}/{}/res/{}.".format(self.root, board, thread)
self.session.headers["Referer"] = url + "html"
thread = self.request(url + "json").json()
thread["postId"] = thread["threadId"]
Expand Down Expand Up @@ -106,25 +104,22 @@ class _8chanBoardExtractor(_8chanExtractor):
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
example = "https://8chan.moe/a/"

def __init__(self, match):
_8chanExtractor.__init__(self, match)
_, self.board, self.page = match.groups()

def items(self):
page = text.parse_int(self.page, 1)
url = "{}/{}/{}.json".format(self.root, self.board, page)
board = self.request(url).json()
threads = board["threads"]
_, board, pnum = self.groups
pnum = text.parse_int(pnum, 1)
url = "{}/{}/{}.json".format(self.root, board, pnum)
data = self.request(url).json()
threads = data["threads"]

while True:
for thread in threads:
thread["_extractor"] = _8chanThreadExtractor
url = "{}/{}/res/{}.html".format(
self.root, self.board, thread["threadId"])
self.root, board, thread["threadId"])
yield Message.Queue, url, thread

page += 1
if page > board["pageCount"]:
pnum += 1
if pnum > data["pageCount"]:
return
url = "{}/{}/{}.json".format(self.root, self.board, page)
url = "{}/{}/{}.json".format(self.root, board, pnum)
threads = self.request(url).json()["threads"]
48 changes: 23 additions & 25 deletions test/results/8chan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@

__tests__ = (
{
"#url" : "https://8chan.moe/vhs/res/4.html",
"#category": ("", "8chan", "thread"),
"#class" : _8chan._8chanThreadExtractor,
"#pattern" : r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$",
"#count" : 14,
"#url" : "https://8chan.moe/vhs/res/4.html",
"#class": _8chan._8chanThreadExtractor,
"#pattern": r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$",
"#count" : 14,

"archived" : False,
"autoSage" : False,
Expand Down Expand Up @@ -51,41 +50,40 @@
},

{
"#url" : "https://8chan.se/vhs/res/4.html",
"#category": ("", "8chan", "thread"),
"#class" : _8chan._8chanThreadExtractor,
"#url" : "https://8chan.moe/vhs/last/4.html",
"#class": _8chan._8chanThreadExtractor,
},

{
"#url" : "https://8chan.cc/vhs/res/4.html",
"#category": ("", "8chan", "thread"),
"#class" : _8chan._8chanThreadExtractor,
"#url" : "https://8chan.se/vhs/res/4.html",
"#class": _8chan._8chanThreadExtractor,
},

{
"#url" : "https://8chan.moe/vhs/",
"#category": ("", "8chan", "board"),
"#class" : _8chan._8chanBoardExtractor,
"#url" : "https://8chan.cc/vhs/res/4.html",
"#class": _8chan._8chanThreadExtractor,
},

{
"#url" : "https://8chan.moe/vhs/2.html",
"#category": ("", "8chan", "board"),
"#class" : _8chan._8chanBoardExtractor,
"#pattern" : _8chan._8chanThreadExtractor.pattern,
"#count" : range(24, 32),
"#url" : "https://8chan.moe/vhs/",
"#class": _8chan._8chanBoardExtractor,
},

{
"#url" : "https://8chan.se/vhs/",
"#category": ("", "8chan", "board"),
"#class" : _8chan._8chanBoardExtractor,
"#url" : "https://8chan.moe/vhs/2.html",
"#class": _8chan._8chanBoardExtractor,
"#pattern": _8chan._8chanThreadExtractor.pattern,
"#count" : range(24, 32),
},

{
"#url" : "https://8chan.cc/vhs/",
"#category": ("", "8chan", "board"),
"#class" : _8chan._8chanBoardExtractor,
"#url" : "https://8chan.se/vhs/",
"#class": _8chan._8chanBoardExtractor,
},

{
"#url" : "https://8chan.cc/vhs/",
"#class": _8chan._8chanBoardExtractor,
},

)

0 comments on commit 93265db

Please sign in to comment.