Skip to content

Commit

Permalink
[kemonoparty] implement filtering duplicate revisions (#5013)
Browse files Browse the repository at this point in the history
set 'revisions' to '"unique"' to have it ignore duplicate revisions
  • Loading branch information
mikf committed Jan 26, 2024
1 parent c28475d commit afd20ef
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 24 deletions.
5 changes: 4 additions & 1 deletion docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2212,12 +2212,15 @@ Description
extractor.kemonoparty.revisions
-------------------------------
Type
``bool``
* ``bool``
* ``string``
Default
``false``
Description
Extract post revisions.

Set this to ``"unique"`` to filter out duplicate revisions.

Note: This requires 1 additional HTTP request per post.


Expand Down
59 changes: 36 additions & 23 deletions gallery_dl/extractor/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __init__(self, match):

def _init(self):
self.revisions = self.config("revisions")
if self.revisions:
self.revisions_unique = (self.revisions == "unique")
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
Expand Down Expand Up @@ -222,8 +224,37 @@ def _discord_channels(self, server):
self.root, server)
return self.request(url).json()

@memcache(keyarg=1)
def _post_revisions(self, url):
def _revisions_post(self, post, url):
post["revision_id"] = 0

try:
revs = self.request(url + "/revisions").json()
except exception.HttpError:
post["revision_hash"] = self._revision_hash(post)
post["revision_index"] = 1
return (post,)
revs.insert(0, post)

for rev in revs:
rev["revision_hash"] = self._revision_hash(rev)

if self.revisions_unique:
uniq = []
last = None
for rev in revs:
if last != rev["revision_hash"]:
last = rev["revision_hash"]
uniq.append(rev)
revs = uniq

idx = len(revs)
for rev in revs:
rev["revision_index"] = idx
idx -= 1

return revs

def _revisions_all(self, url):
revs = self.request(url + "/revisions").json()

idx = len(revs)
Expand Down Expand Up @@ -277,18 +308,8 @@ def posts(self):

if self.revisions:
for post in posts:
post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
post_url = "{}/post/{}".format(self.api_url, post["id"])
try:
revs = self._post_revisions(post_url)
except exception.HttpError:
post["revision_index"] = 1
yield post
else:
post["revision_index"] = len(revs) + 1
yield post
yield from revs
yield from self._revisions_post(post, post_url)
else:
yield from posts

Expand Down Expand Up @@ -316,18 +337,10 @@ def posts(self):
if not self.revision:
post = self.request(self.api_url).json()
if self.revisions:
post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
try:
revs = self._post_revisions(self.api_url)
except exception.HttpError:
post["revision_index"] = 1
else:
post["revision_index"] = len(revs) + 1
return itertools.chain((post,), revs)
return self._revisions_post(post, self.api_url)
return (post,)

revs = self._post_revisions(self.api_url)
revs = self._revisions_all(self.api_url)
if not self.revision_id:
return revs

Expand Down
15 changes: 15 additions & 0 deletions test/results/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,21 @@
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
},

{
"#url" : "https://kemono.su/patreon/user/3161935/post/68231671",
"#comment" : "unique revisions (#5013)",
"#category": ("", "kemonoparty", "patreon"),
"#class" : kemonoparty.KemonopartyPostExtractor,
"#options" : {"revisions": "unique"},
"#urls" : "https://kemono.su/data/88/52/88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86.jpg",

"filename" : "wip update",
"hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86",
"revision_id" : 0,
"revision_index": 1,
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
},

{
"#url" : "https://kemono.party/patreon/user/3161935/post/68231671/revisions",
"#comment" : "revisions (#4498)",
Expand Down

0 comments on commit afd20ef

Please sign in to comment.