Skip to content

Commit

Permalink
merge #4945: {shimmie2[ support 'rule34hentai.net' (#861, #4789)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Dec 21, 2023
2 parents 627ed79 + 79e4606 commit 01bb75f
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,12 @@ Consider all listed sites to potentially be NSFW.
<td>Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Rule34Hentai</td>
<td>https://rule34hentai.net/</td>
<td>Posts, Tag Searches</td>
<td></td>
</tr>

<tr>
<td colspan="4"><strong>szurubooru Instances</strong></td>
Expand Down
78 changes: 78 additions & 0 deletions gallery_dl/extractor/shimmie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def _init(self):

if self.category == "giantessbooru":
self.posts = self._posts_giantessbooru
elif self.category == "rule34hentai":
self.posts = self._posts_rule34hentai

def items(self):
data = self.metadata()
Expand Down Expand Up @@ -85,6 +87,10 @@ def posts(self):
"pattern": r"booru\.cavemanon\.xyz",
"file_url": "{0}/index.php?q=image/{2}.{4}",
},
"rule34hentai": {
"root": "https://rule34hentai.net",
"pattern": r"rule34hentai\.net",
},
}

BASE_PATTERN = Shimmie2Extractor.update(INSTANCES) + r"/(?:index\.php\?q=/?)?"
Expand Down Expand Up @@ -187,6 +193,56 @@ def _posts_giantessbooru(self):
if not extr('/{}">{}<'.format(pnum, pnum), ">"):
return

def _posts_rule34hentai(self):
pnum = text.parse_int(self.page, 1)
file_url_fmt = self.file_url_fmt.format

init = True
mime = ""

while True:
url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
page = self.request(url).text
extr = text.extract_from(page)

if init:
init = False
has_mime = ("data-mime=\"" in page)
has_pid = ("data-post-id=\"" in page)

while True:
if has_mime:
mime = extr("data-mime=\"", "\"")
if has_pid:
pid = extr("data-post-id=\"", "\"")
else:
pid = extr("href='/post/view/", "?")

if not pid:
break

tags, dimensions, size, ext = extr(
"title=\"", "\"").split(" // ")
width, _, height = dimensions.partition("x")
md5 = extr("/_thumbs/", "/")

yield {
"file_url": file_url_fmt(
self.root, md5, pid, text.quote(tags),
mime.rpartition("/")[2] if mime else "jpg"),
"id": pid,
"md5": md5,
"tags": tags,
"width": width,
"height": height,
"size": text.parse_bytes(size[:-1]),
}

pnum += 1
if not extr(">Next<", ">"):
if not extr("/{}'>{}<".format(pnum, pnum), ">"):
return


class Shimmie2PostExtractor(Shimmie2Extractor):
"""Extractor for single shimmie2 posts"""
Expand Down Expand Up @@ -234,3 +290,25 @@ def _posts_giantessbooru(self):
"height" : 0,
"size" : 0,
},)

def _posts_rule34hentai(self):
url = "{}/post/view/{}".format(self.root, self.post_id)
extr = text.extract_from(self.request(url).text)

post = {
"id" : self.post_id,
"tags" : extr(": ", "<").partition(" - ")[0].rstrip(")"),
"md5" : extr("/_thumbs/", "/"),
"file_url": self.root + (
extr('id="main_image" src="', '"') or
extr('<source src="', '"')).lstrip("."),
"width" : extr("data-width=", " ").strip("\"'"),
"height" : extr("data-height=", ">").partition(
" ")[0].strip("\"'"),
"size" : 0,
}

if not post["md5"]:
post["md5"] = text.extr(post["file_url"], "/_images/", "/")

return (post,)
1 change: 1 addition & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
"redgifs" : "RedGIFs",
"rozenarcana" : "Rozen Arcana",
"rule34" : "Rule 34",
"rule34hentai" : "Rule34Hentai",
"rule34us" : "Rule 34",
"sankaku" : "Sankaku Channel",
"sankakucomplex" : "Sankaku Complex",
Expand Down
51 changes: 51 additions & 0 deletions test/results/rule34hentai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import shimmie2


__tests__ = (
{
"#url" : "https://rule34hentai.net/post/list/mizuki_kotora/1",
"#category": ("shimmie2", "rule34hentai", "tag"),
"#class" : shimmie2.Shimmie2TagExtractor,
"#urls" : (
"https://rule34hentai.net/_images/7f3a411263d0f6de936e47ae8f9d35fb/332%20-%20Darkstalkers%20Felicia%20mizuki_kotora.jpeg",
"https://rule34hentai.net/_images/1a8eca7c04f8bf325bc993c5751a91c4/264%20-%20Darkstalkers%20Felicia%20mizuki_kotora.jpeg",
"https://rule34hentai.net/_images/09511511c4c9e9e1f9b795e059a60832/259%20-%20Darkstalkers%20Felicia%20mizuki_kotora.jpeg",
),

"extension" : "jpeg",
"file_url" : r"re:https://rule34hentai.net/_images/.+\.jpeg",
"filename" : r"re:\d+ - \w+",
"height" : range(496, 875),
"id" : range(259, 332),
"md5" : r"re:^[0-9a-f]{32}$",
"search_tags": "mizuki_kotora",
"size" : int,
"tags" : str,
"width" : range(500, 850),
},

{
"#url" : "https://rule34hentai.net/post/view/264",
"#category": ("shimmie2", "rule34hentai", "post"),
"#class" : shimmie2.Shimmie2PostExtractor,
"#urls" : "https://rule34hentai.net/_images/1a8eca7c04f8bf325bc993c5751a91c4/264%20-%20Darkstalkers%20Felicia%20mizuki_kotora.jpg",
"#sha1_content": "6c23780bb78673cbff1bca9accb77ea11ec734f3",

"extension": "jpg",
"file_url" : "https://rule34hentai.net/_images/1a8eca7c04f8bf325bc993c5751a91c4/264%20-%20Darkstalkers%20Felicia%20mizuki_kotora.jpg",
"filename" : "264 - Darkstalkers Felicia mizuki_kotora",
"height" : 875,
"id" : 264,
"md5" : "1a8eca7c04f8bf325bc993c5751a91c4",
"size" : 0,
"tags" : "Darkstalkers Felicia mizuki_kotora",
"width" : 657,
},

)

0 comments on commit 01bb75f

Please sign in to comment.