Skip to content

Commit

Permalink
[kemonoparty] add 'revision_hash' metadata (mikf#4706, mikf#4727, mik…
Browse files Browse the repository at this point in the history
…f#5013)

A SHA1 hexdigest of other relevant metadata fields like
title, content, file and attachment URLs.

This value does NOT reflect which revisions are listed on the website.
Neither does 'edited' or any other metadata field (combinations).
  • Loading branch information
mikf authored and bradenhilton committed Feb 5, 2024
1 parent 8ec3d5c commit 1418ccc
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
26 changes: 22 additions & 4 deletions gallery_dl/extractor/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
"""Extractors for https://kemono.party/"""

from .common import Extractor, Message
from .. import text, exception
from .. import text, util, exception
from ..cache import cache, memcache
import itertools
import json
import re

BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
Expand All @@ -37,10 +38,14 @@ def __init__(self, match):
Extractor.__init__(self, match)

def _init(self):
self.revisions = self.config("revisions")
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
self._json_dumps = json.JSONEncoder(
ensure_ascii=False, check_circular=False,
sort_keys=True, separators=(",", ":")).encode

def items(self):
find_hash = re.compile(HASH_PATTERN).match
Expand Down Expand Up @@ -223,11 +228,23 @@ def _post_revisions(self, url):

idx = len(revs)
for rev in revs:
rev["revision_hash"] = self._revision_hash(rev)
rev["revision_index"] = idx
idx -= 1

return revs

def _revision_hash(self, revision):
rev = revision.copy()
rev.pop("revision_id", None)
rev.pop("added", None)
rev.pop("next", None)
rev.pop("prev", None)
rev["file"].pop("name", None)
for a in rev["attachments"]:
a.pop("name", None)
return util.sha1(self._json_dumps(rev))


def _validate(response):
return (response.headers["content-length"] != "9" or
Expand All @@ -252,13 +269,13 @@ def posts(self):
url = self.api_url
params = text.parse_query(self.query)
params["o"] = text.parse_int(params.get("o"))
revisions = self.config("revisions")

while True:
posts = self.request(url, params=params).json()

if revisions:
if self.revisions:
for post in posts:
post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
post_url = "{}/post/{}".format(self.api_url, post["id"])
try:
Expand Down Expand Up @@ -296,7 +313,8 @@ def __init__(self, match):
def posts(self):
if not self.revision:
post = self.request(self.api_url).json()
if self.config("revisions"):
if self.revisions:
post["revision_hash"] = self._revision_hash(post)
post["revision_id"] = 0
try:
revs = self._post_revisions(self.api_url)
Expand Down
2 changes: 2 additions & 0 deletions test/results/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@

"revision_id": 142470,
"revision_index": 2,
"revision_hash": "e0e93281495e151b11636c156e52bfe9234c2a40",
},

{
Expand All @@ -190,6 +191,7 @@

"revision_id": range(134996, 3052965),
"revision_index": range(1, 9),
"revision_hash": r"re:^[0-9a-f]{40}$",
},


Expand Down

0 comments on commit 1418ccc

Please sign in to comment.