diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 676ba938ad..02b55230b4 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -694,7 +694,7 @@ Consider all sites to be NSFW unless otherwise known.
Pornhub |
https://www.pornhub.com/ |
- Galleries, User Profiles |
+ Galleries, Gifs, Photos, User Profiles |
|
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index d3619da695..6cb2063b48 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -19,6 +19,35 @@ class PornhubExtractor(Extractor):
category = "pornhub"
root = "https://www.pornhub.com"
+ def _init(self):
+ self.cookies.set(
+ "accessAgeDisclaimerPH", "1", domain=".pornhub.com")
+
+ def _pagination(self, user, path):
+ if "/" not in path:
+ path += "/public"
+
+ url = "{}/{}/{}/ajax".format(self.root, user, path)
+ params = {"page": 1}
+ headers = {
+ "Referer": url[:-5],
+ "X-Requested-With": "XMLHttpRequest",
+ }
+
+ while True:
+ response = self.request(
+ url, method="POST", headers=headers, params=params,
+ allow_redirects=False)
+
+ if 300 <= response.status_code < 400:
+ url = "{}{}/{}/ajax".format(
+ self.root, response.headers["location"], path)
+ continue
+
+ yield response.text
+
+ params["page"] += 1
+
class PornhubGalleryExtractor(PornhubExtractor):
"""Extractor for image galleries on pornhub.com"""
@@ -58,9 +87,6 @@ def __init__(self, match):
self._first = None
def items(self):
- self.cookies.set(
- "accessAgeDisclaimerPH", "1", domain=".pornhub.com")
-
data = self.metadata()
yield Message.Directory, data
for num, image in enumerate(self.images(), 1):
@@ -116,17 +142,83 @@ def images(self):
return
+class PornhubGifExtractor(PornhubExtractor):
+ """Extractor for pornhub.com gifs"""
+ subcategory = "gif"
+ directory_fmt = ("{category}", "{user}", "gifs")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/gif/(\d+)"
+ test = (
+ ("https://www.pornhub.com/gif/33643461", {
+ "pattern": r"https://\w+\.phncdn\.com/pics/gifs"
+ r"/033/643/461/33643461a\.webm",
+ "keyword": {
+ "date": "dt:2020-10-31 00:00:00",
+ "extension": "webm",
+ "filename": "33643461a",
+ "id": "33643461",
+ "tags": ["big boobs", "lana rhoades"],
+ "title": "Big boobs",
+ "url": str,
+ "user": "Lana Rhoades",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ PornhubExtractor.__init__(self, match)
+ self.gallery_id = match.group(1)
+
+ def items(self):
+ url = "{}/gif/{}".format(self.root, self.gallery_id)
+ extr = text.extract_from(self.request(url).text)
+
+ gif = {
+ "id" : self.gallery_id,
+ "tags" : extr("data-context-tag='", "'").split(","),
+ "title": extr('"name": "', '"'),
+ "url" : extr('"contentUrl": "', '"'),
+ "date" : text.parse_datetime(
+ extr('"uploadDate": "', '"'), "%Y-%m-%d"),
+ "user" : extr('data-mxptext="', '"'),
+ }
+
+ yield Message.Directory, gif
+ yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)
+
+
class PornhubUserExtractor(PornhubExtractor):
- """Extractor for all galleries of a pornhub user"""
+ """Extractor for a pornhub user"""
subcategory = "user"
- pattern = (BASE_PATTERN + r"/(users|model|pornstar)/([^/?#]+)"
- "(?:/photos(?:/(public|private|favorites))?)?/?$")
+ pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
+ test = ("https://www.pornhub.com/pornstar/danika-mori",)
+
+ def __init__(self, match):
+ PornhubExtractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def initialize(self):
+ pass
+
+ def items(self):
+ base = "{}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (PornhubPhotosExtractor, base + "photos"),
+ (PornhubGifsExtractor , base + "gifs"),
+ ), ("photos",))
+
+
+class PornhubPhotosExtractor(PornhubExtractor):
+ """Extractor for all galleries of a pornhub user"""
+ subcategory = "photos"
+ pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ "/(photos(?:/[^/?#]+)?)")
test = (
("https://www.pornhub.com/pornstar/danika-mori/photos", {
"pattern": PornhubGalleryExtractor.pattern,
"count": ">= 6",
}),
- ("https://www.pornhub.com/users/flyings0l0/"),
("https://www.pornhub.com/users/flyings0l0/photos/public"),
("https://www.pornhub.com/users/flyings0l0/photos/private"),
("https://www.pornhub.com/users/flyings0l0/photos/favorites"),
@@ -135,33 +227,41 @@ class PornhubUserExtractor(PornhubExtractor):
def __init__(self, match):
PornhubExtractor.__init__(self, match)
- self.type, self.user, self.cat = match.groups()
+ self.user, self.path = match.groups()
def items(self):
- url = "{}/{}/{}/photos/{}/ajax".format(
- self.root, self.type, self.user, self.cat or "public")
- params = {"page": 1}
- headers = {
- "Referer": url[:-5],
- "X-Requested-With": "XMLHttpRequest",
- }
-
data = {"_extractor": PornhubGalleryExtractor}
- while True:
- response = self.request(
- url, method="POST", headers=headers, params=params,
- allow_redirects=False)
-
- if 300 <= response.status_code < 400:
- url = "{}{}/photos/{}/ajax".format(
- self.root, response.headers["location"],
- self.cat or "public")
- continue
-
+ for page in self._pagination(self.user, self.path):
gid = None
- for gid in text.extract_iter(response.text, 'id="albumphoto', '"'):
+ for gid in text.extract_iter(page, 'id="albumphoto', '"'):
yield Message.Queue, self.root + "/album/" + gid, data
if gid is None:
return
- params["page"] += 1
+
+class PornhubGifsExtractor(PornhubExtractor):
+ """Extractor for a pornhub user's gifs"""
+ subcategory = "gifs"
+ pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
+ "/(gifs(?:/[^/?#]+)?)")
+ test = (
+ ("https://www.pornhub.com/pornstar/danika-mori/gifs", {
+ "pattern": PornhubGifExtractor.pattern,
+ "count": ">= 42",
+ }),
+ ("https://www.pornhub.com/users/flyings0l0/gifs"),
+ ("https://www.pornhub.com/model/bossgirl/gifs/video"),
+ )
+
+ def __init__(self, match):
+ PornhubExtractor.__init__(self, match)
+ self.user, self.path = match.groups()
+
+ def items(self):
+ data = {"_extractor": PornhubGifExtractor}
+ for page in self._pagination(self.user, self.path):
+ gid = None
+ for gid in text.extract_iter(page, 'id="gif', '"'):
+ yield Message.Queue, self.root + "/gif/" + gid, data
+ if gid is None:
+ return
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 68925caaaa..9077a0366a 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -224,6 +224,9 @@
"sketch": "Sketch",
"work": "individual Images",
},
+ "pornhub": {
+ "gifs": "",
+ },
"reddit": {
"home": "Home Feed",
},