Skip to content

Commit

Permalink
[deviantart] fix & improve journal/literature extraction (#6254, #6207)
Browse files Browse the repository at this point in the history
fetch text from HTML __INITIAL_STATE__,
since the API doesn't reliably work and is unusable for sta.sh journals
  • Loading branch information
mikf committed Oct 1, 2024
1 parent 8f09e4e commit ed859f0
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 21 deletions.
56 changes: 35 additions & 21 deletions gallery_dl/extractor/deviantart.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,24 +177,7 @@ def items(self):
yield self.commit(deviation, deviation["flash"])

if self.commit_journal:
if "excerpt" in deviation:
# journal = self.api.deviation_content(
# deviation["deviationid"])
if not self.eclipse_api:
self.eclipse_api = DeviantartEclipseAPI(self)
content = self.eclipse_api.deviation_extended_fetch(
deviation["index"],
deviation["author"]["username"],
"journal",
)["deviation"]["textContent"]
html = content["html"]["markup"]
if html.startswith("{"):
html = content["excerpt"].replace("\n", "<br />")
journal = {"html": html}
elif "body" in deviation:
journal = {"html": deviation.pop("body")}
else:
journal = None
journal = self._extract_journal(deviation)
if journal:
if self.extra:
deviation["_journal"] = journal["html"]
Expand Down Expand Up @@ -375,6 +358,33 @@ def _commit_journal_text(self, deviation, journal):
deviation["extension"] = "txt"
return Message.Url, txt, deviation

def _extract_journal(self, deviation):
if "excerpt" in deviation:
# # empty 'html'
# return self.api.deviation_content(deviation["deviationid"])

if "_page" in deviation:
page = deviation["_page"]
del deviation["_page"]
else:
page = self._limited_request(deviation["url"]).text

state = util.json_loads(text.extr(
page, 'window.__INITIAL_STATE__ = JSON.parse("', '");')
.replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"'))

deviation = state["@@entities"]["deviation"].popitem()[1]
content = deviation["textContent"]

html = content["html"]["markup"]
if html.startswith("{"):
html = content["excerpt"].replace("\n", "<br />")
return {"html": html}

if "body" in deviation:
return {"html": deviation.pop("body")}
return None

def _extract_content(self, deviation):
content = deviation["content"]

Expand Down Expand Up @@ -728,6 +738,7 @@ def deviations(self, stash_id=None):
uuid = text.extr(page, '//deviation/', '"')
if uuid:
deviation = self.api.deviation(uuid)
deviation["_page"] = page
deviation["index"] = text.parse_int(text.extr(
page, '\\"deviationId\\":', ','))
yield deviation
Expand Down Expand Up @@ -939,11 +950,14 @@ def deviations(self):
else:
url = "{}/view/{}/".format(self.root, self.deviation_id)

uuid = text.extr(self._limited_request(url).text,
'"deviationUuid\\":\\"', '\\')
page = self._limited_request(url, notfound="deviation").text
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
if not uuid:
raise exception.NotFoundError("deviation")
return (self.api.deviation(uuid),)

deviation = self.api.deviation(uuid)
deviation["_page"] = page
return (deviation,)


class DeviantartScrapsExtractor(DeviantartExtractor):
Expand Down
25 changes: 25 additions & 0 deletions test/results/deviantart.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,16 @@
"extension": "swf",
},

{
"#url" : "https://www.deviantart.com/justatest235723/art/video-1103119114",
"#comment" : "video",
"#class" : deviantart.DeviantartDeviationExtractor,
"#pattern" : r"/f/940f2d05-c5eb-4917-8192-7eb6a2d508c6/di8ro5m-e2a5bdf0-daee-4e18-bede-fbfc394d6c65\.mp4\?token=ey",

"filename" : "video_63aebdd4bc0323da460796b9a2ac8522_by_justatest235723-di8ro5m",
"extension": "mp4",
},

{
"#url" : "https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498",
"#comment" : "sta.sh URLs from description (#302)",
Expand Down Expand Up @@ -810,6 +820,21 @@
"#sha1_url": "8ca1dc8df53d3707c778d08a604f9ad9ddba7469",
},

{
"#url" : "https://www.deviantart.com/stash/09z3557z648",
"#comment" : "sta.sh journal (#6207)",
"#class" : deviantart.DeviantartStashExtractor,
"#pattern" : """text:<!DOCTYPE html>\n""",
},

{
"#url" : "https://www.deviantart.com/starvinglunatic/art/Against-the-world-chapter-1-50968347",
"#comment" : "literature (#6254)",
"#class" : deviantart.DeviantartDeviationExtractor,
"#pattern" : """text:<!DOCTYPE html>\n""",
},


{
"#url" : "https://www.deviantart.com/neotypical/art/985226590",
"#comment" : "subscription locked (#4567)",
Expand Down

0 comments on commit ed859f0

Please sign in to comment.