Skip to content

Commit

Permalink
[patreon] improve 'campaign_id' handling (#4699, #4715)
Browse files Browse the repository at this point in the history
- add ways to directly specify a 'campaign_id'
  - 'campaign-id' config option
  - 'c' or 'campaign_id' URL query parameter
- more descriptive error messages
- show 'campaign_id' value in debug log
  • Loading branch information
mikf committed Oct 25, 2023
1 parent 31dbbff commit 12a800c
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 16 deletions.
14 changes: 14 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2440,6 +2440,20 @@ Description
Note: This requires 1 additional HTTP request per post.


extractor.patreon.campaign-id
-----------------------------
Type
``string``
Default
``"auto"``
Description
Alternative way of specifying the ``campaign_id`` value of a creator
in case the automatic extraction method no longer functions.

Another way of specifying this value is using a ``c`` or ``campaign_id``
URL query parameter, e,g, ``https://www.patreon.com/NAME?c=12345``.


extractor.patreon.files
-----------------------
Type
Expand Down
50 changes: 34 additions & 16 deletions gallery_dl/extractor/patreon.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,34 +267,52 @@ def __init__(self, match):

def posts(self):
query = text.parse_query(self.query)
campaign_id = self._get_campaign_id(query)
filters = self._get_filters(query)

creator_id = query.get("u")
if creator_id:
url = "{}/user/posts?u={}".format(self.root, creator_id)
self.log.debug("campaign_id: %s", campaign_id)

url = self._build_url("posts", (
"&filter[campaign_id]=" + campaign_id +
"&filter[contains_exclusive_posts]=true"
"&filter[is_draft]=false" + filters +
"&sort=" + query.get("sort", "-published_at")
))
return self._pagination(url)

def _get_campaign_id(self, query):
campaign_id = self.config("campaign-id")
if campaign_id and campaign_id != "auto":
return str(campaign_id)

campaign_id = query.get("c") or query.get("campaign_id")
if campaign_id:
return campaign_id

user_id = query.get("u")
if user_id:
url = "{}/user/posts?u={}".format(self.root, user_id)
else:
url = "{}/{}/posts".format(self.root, self.creator)
page = self.request(url, notfound="creator").text

try:
data = None
data = self._extract_bootstrap(page)
campaign_id = data["campaign"]["data"]["id"]
except (KeyError, ValueError):
raise exception.NotFoundError("creator")

filters = "".join(
return data["campaign"]["data"]["id"]
except (KeyError, ValueError) as exc:
self.log.debug(data)
raise exception.StopExtraction(
"Unable to extract campaign ID (%s: %s)",
exc.__class__.__name__, exc)

def _get_filters(self, query):
return "".join(
"&filter[{}={}".format(key[8:], text.escape(value))
for key, value in query.items()
if key.startswith("filters[")
)

url = self._build_url("posts", (
"&filter[campaign_id]=" + campaign_id +
"&filter[contains_exclusive_posts]=true"
"&filter[is_draft]=false" + filters +
"&sort=" + query.get("sort", "-published_at")
))
return self._pagination(url)


class PatreonUserExtractor(PatreonExtractor):
"""Extractor for media from creators supported by you"""
Expand Down

0 comments on commit 12a800c

Please sign in to comment.