Skip to content

Commit

Permalink
[zerochan] use API by default (#3669)
Browse files Browse the repository at this point in the history
add 'pagination' option
  • Loading branch information
mikf committed Feb 24, 2024
1 parent efccd3d commit cc6b9e4
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 8 deletions.
15 changes: 15 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4131,6 +4131,21 @@ Description
Note: This requires 1-2 additional HTTP requests per post.


extractor.zerochan.pagination
-----------------------------
Type
``string``
Default
``"api"``
Description
Controls how to paginate over tag search results.

* ``"api"``: Use the `JSON API <https://www.zerochan.net/api>`__
(no ``extension`` metadata)
* ``"html"``: Parse HTML pages
(limited to 100 pages * 24 posts)


extractor.[booru].tags
----------------------
Type
Expand Down
58 changes: 52 additions & 6 deletions gallery_dl/extractor/zerochan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .booru import BooruExtractor
from ..cache import cache
from .. import text, exception
from .. import text, util, exception

BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"

Expand All @@ -21,8 +21,11 @@ class ZerochanExtractor(BooruExtractor):
root = "https://www.zerochan.net"
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
page_start = 1
per_page = 250
cookies_domain = ".zerochan.net"
cookies_names = ("z_id", "z_hash")
request_interval = (0.5, 1.5)

def login(self):
self._logged_in = True
Expand Down Expand Up @@ -86,7 +89,7 @@ def _parse_entry_html(self, entry_id):

return data

def _parse_entry_json(self, entry_id):
def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
item = self.request(url).json()

Expand Down Expand Up @@ -117,14 +120,22 @@ def __init__(self, match):
ZerochanExtractor.__init__(self, match)
self.search_tag, self.query = match.groups()

def _init(self):
if self.config("pagination") == "html":
self.posts = self.posts_html
self.per_page = 24
else:
self.posts = self.posts_api
self.session.headers["User-Agent"] = util.USERAGENT

def metadata(self):
return {"search_tags": text.unquote(
self.search_tag.replace("+", " "))}

def posts(self):
def posts_html(self):
url = self.root + "/" + self.search_tag
params = text.parse_query(self.query)
params["p"] = text.parse_int(params.get("p"), 1)
params["p"] = text.parse_int(params.get("p"), self.page_start)
metadata = self.config("metadata")

while True:
Expand All @@ -140,7 +151,7 @@ def posts(self):
if metadata:
entry_id = extr('href="/', '"')
post = self._parse_entry_html(entry_id)
post.update(self._parse_entry_json(entry_id))
post.update(self._parse_entry_api(entry_id))
yield post
else:
yield {
Expand All @@ -157,6 +168,41 @@ def posts(self):
break
params["p"] += 1

def posts_api(self):
url = self.root + "/" + self.search_tag
metadata = self.config("metadata")
params = {
"json": "1",
"l" : self.per_page,
"p" : self.page_start,
}

static = "https://static.zerochan.net/.full."

while True:
data = self.request(url, params=params).json()
try:
posts = data["items"]
except ValueError:
return

if metadata:
for post in posts:
post_id = post["id"]
post.update(self._parse_entry_html(post_id))
post.update(self._parse_entry_api(post_id))
else:
for post in posts:
base = static + str(post["id"])
post["file_url"] = base + ".jpg"
post["_fallback"] = (base + ".png",)

yield from posts

if not data.get("next"):
return
params["p"] += 1


class ZerochanImageExtractor(ZerochanExtractor):
subcategory = "image"
Expand All @@ -170,5 +216,5 @@ def __init__(self, match):
def posts(self):
post = self._parse_entry_html(self.image_id)
if self.config("metadata"):
post.update(self._parse_entry_json(self.image_id))
post.update(self._parse_entry_api(self.image_id))
return (post,)
52 changes: 50 additions & 2 deletions test/results/zerochan.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,27 @@
"#url" : "https://www.zerochan.net/Perth+%28Kantai+Collection%29",
"#category": ("booru", "zerochan", "tag"),
"#class" : zerochan.ZerochanTagExtractor,
"#pattern" : r"https://static\.zerochan\.net/\.full\.\d+\.jpg",
"#count" : "> 50",

"extension" : r"jpg",
"file_url" : r"re:https://static\.zerochan\.net/\.full\.\d+\.jpg",
"filename" : r"re:\.full\.\d+",
"height" : int,
"id" : int,
"search_tags": "Perth (Kantai Collection)",
"tag" : r"re:(Perth \(Kantai Collection\)|Kantai Collection)",
"tags" : list,
"width" : int,
},

{
"#url" : "https://www.zerochan.net/Perth+%28Kantai+Collection%29",
"#category": ("booru", "zerochan", "tag"),
"#class" : zerochan.ZerochanTagExtractor,
"#options" : {"pagination": "html"},
"#pattern" : r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
"#count" : "> 24",
"#count" : "> 45",

"extension" : r"re:jpg|png",
"file_url" : r"re:https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
Expand Down Expand Up @@ -58,8 +77,37 @@
"Theme:Personification",
"Theme:Pins",
"Theme:Ribbon",
"Theme:Shirt",
"Theme:Short Hair",
"Theme:Top",
],
"uploader": "YukinoTokisaki",
"width" : 1920,
},

{
"#url" : "https://www.zerochan.net/2920445",
"#category": ("booru", "zerochan", "image"),
"#class" : zerochan.ZerochanImageExtractor,
"#pattern" : r"https://static\.zerochan\.net/Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
"#auth" : False,

"author" : "YeFan 葉凡",
"date" : "dt:2020-04-24 21:33:44",
"file_url": "https://static.zerochan.net/Perth.%28Kantai.Collection%29.full.2920445.jpg",
"filename": "Perth.(Kantai.Collection).full.2920445",
"height" : 1366,
"id" : 2920445,
"path" : [
"Kantai Collection",
"Perth (Kantai Collection)",
],
"size" : 1975296,
"tags" : [
"Mangaka:YeFan 葉凡",
"Game:Kantai Collection",
"Character:Perth (Kantai Collection)",
"Theme:Firefighter Outfit",
"Theme:Pins",
],
"uploader": "YukinoTokisaki",
"width" : 1920,
Expand Down

0 comments on commit cc6b9e4

Please sign in to comment.