Skip to content

Commit

Permalink
[sankakucomplex] add 'tag' extractor (#258)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed May 27, 2019
1 parent 74c2415 commit bcd1801
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docs/supportedsites.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ rule #34 https://rule34.paheal.net/ Posts, Tag-Searches
Rule 34 https://rule34.xxx/ Pools, Posts, Tag-Searches
Safebooru https://safebooru.org/ Pools, Posts, Tag-Searches
Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional
Sankaku Complex https://www.sankakucomplex.com/ Articles
Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag-Searches
Sen Manga https://raw.senmanga.com/ Chapters
Sense-Scans http://sensescans.com/reader/ Chapters, Manga
Sex.com https://www.sex.com/ Boards, Pins, Search Results
Expand Down
32 changes: 32 additions & 0 deletions gallery_dl/extractor/sankakucomplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,35 @@ def images(self, extr):
"num" : num,
}))
urls.add(url)


class SankakucomplexTagExtractor(SankakucomplexExtractor):
"""Extractor for sankakucomplex blog articles by tag or author"""
subcategory = "tag"
pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
r"/((?:tag|category|author)/[^/&?#]+)")
test = (
("https://www.sankakucomplex.com/tag/cosplay/", {
"range": "1-50",
"pattern": SankakucomplexArticleExtractor.pattern,
}),
("https://www.sankakucomplex.com/category/anime/"),
("https://www.sankakucomplex.com/author/rift/page/5/"),
)

def items(self):
pnum = 1
last = None
data = {"_extractor": SankakucomplexArticleExtractor}

while True:
url = "{}/{}/page/{}/".format(self.root, self.path, pnum)
response = self.request(url, expect=(404,))
if response.status_code == 404:
return
for url in text.extract_iter(response.text, 'data-direct="', '"'):
if url != last:
last = url
yield Message.Queue, url, data
return
pnum += 1

0 comments on commit bcd1801

Please sign in to comment.