Skip to content

Commit

Permalink
[pornhub] add 'gif' support (#4463)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Aug 29, 2023
1 parent ba84298 commit a783c4f
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 30 deletions.
2 changes: 1 addition & 1 deletion docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Pornhub</td>
<td>https://www.pornhub.com/</td>
<td>Galleries, User Profiles</td>
<td>Galleries, Gifs, Photos, User Profiles</td>
<td></td>
</tr>
<tr>
Expand Down
158 changes: 129 additions & 29 deletions gallery_dl/extractor/pornhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,35 @@ class PornhubExtractor(Extractor):
category = "pornhub"
root = "https://www.pornhub.com"

def _init(self):
self.cookies.set(
"accessAgeDisclaimerPH", "1", domain=".pornhub.com")

def _pagination(self, user, path):
if "/" not in path:
path += "/public"

url = "{}/{}/{}/ajax".format(self.root, user, path)
params = {"page": 1}
headers = {
"Referer": url[:-5],
"X-Requested-With": "XMLHttpRequest",
}

while True:
response = self.request(
url, method="POST", headers=headers, params=params,
allow_redirects=False)

if 300 <= response.status_code < 400:
url = "{}{}/{}/ajax".format(
self.root, response.headers["location"], path)
continue

yield response.text

params["page"] += 1


class PornhubGalleryExtractor(PornhubExtractor):
"""Extractor for image galleries on pornhub.com"""
Expand Down Expand Up @@ -58,9 +87,6 @@ def __init__(self, match):
self._first = None

def items(self):
self.cookies.set(
"accessAgeDisclaimerPH", "1", domain=".pornhub.com")

data = self.metadata()
yield Message.Directory, data
for num, image in enumerate(self.images(), 1):
Expand Down Expand Up @@ -116,17 +142,83 @@ def images(self):
return


class PornhubGifExtractor(PornhubExtractor):
"""Extractor for pornhub.com gifs"""
subcategory = "gif"
directory_fmt = ("{category}", "{user}", "gifs")
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/gif/(\d+)"
test = (
("https://www.pornhub.com/gif/33643461", {
"pattern": r"https://\w+\.phncdn\.com/pics/gifs"
r"/033/643/461/33643461a\.webm",
"keyword": {
"date": "dt:2020-10-31 00:00:00",
"extension": "webm",
"filename": "33643461a",
"id": "33643461",
"tags": ["big boobs", "lana rhoades"],
"title": "Big boobs",
"url": str,
"user": "Lana Rhoades",
},
}),
)

def __init__(self, match):
PornhubExtractor.__init__(self, match)
self.gallery_id = match.group(1)

def items(self):
url = "{}/gif/{}".format(self.root, self.gallery_id)
extr = text.extract_from(self.request(url).text)

gif = {
"id" : self.gallery_id,
"tags" : extr("data-context-tag='", "'").split(","),
"title": extr('"name": "', '"'),
"url" : extr('"contentUrl": "', '"'),
"date" : text.parse_datetime(
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
"user" : extr('data-mxptext="', '"'),
}

yield Message.Directory, gif
yield Message.Url, gif["url"], text.nameext_from_url(gif["url"], gif)


class PornhubUserExtractor(PornhubExtractor):
"""Extractor for all galleries of a pornhub user"""
"""Extractor for a pornhub user"""
subcategory = "user"
pattern = (BASE_PATTERN + r"/(users|model|pornstar)/([^/?#]+)"
"(?:/photos(?:/(public|private|favorites))?)?/?$")
pattern = BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)/?$"
test = ("https://www.pornhub.com/pornstar/danika-mori",)

def __init__(self, match):
PornhubExtractor.__init__(self, match)
self.user = match.group(1)

def initialize(self):
pass

def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
(PornhubPhotosExtractor, base + "photos"),
(PornhubGifsExtractor , base + "gifs"),
), ("photos",))


class PornhubPhotosExtractor(PornhubExtractor):
"""Extractor for all galleries of a pornhub user"""
subcategory = "photos"
pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
"/(photos(?:/[^/?#]+)?)")
test = (
("https://www.pornhub.com/pornstar/danika-mori/photos", {
"pattern": PornhubGalleryExtractor.pattern,
"count": ">= 6",
}),
("https://www.pornhub.com/users/flyings0l0/"),
("https://www.pornhub.com/users/flyings0l0/photos/public"),
("https://www.pornhub.com/users/flyings0l0/photos/private"),
("https://www.pornhub.com/users/flyings0l0/photos/favorites"),
Expand All @@ -135,33 +227,41 @@ class PornhubUserExtractor(PornhubExtractor):

def __init__(self, match):
PornhubExtractor.__init__(self, match)
self.type, self.user, self.cat = match.groups()
self.user, self.path = match.groups()

def items(self):
url = "{}/{}/{}/photos/{}/ajax".format(
self.root, self.type, self.user, self.cat or "public")
params = {"page": 1}
headers = {
"Referer": url[:-5],
"X-Requested-With": "XMLHttpRequest",
}

data = {"_extractor": PornhubGalleryExtractor}
while True:
response = self.request(
url, method="POST", headers=headers, params=params,
allow_redirects=False)

if 300 <= response.status_code < 400:
url = "{}{}/photos/{}/ajax".format(
self.root, response.headers["location"],
self.cat or "public")
continue

for page in self._pagination(self.user, self.path):
gid = None
for gid in text.extract_iter(response.text, 'id="albumphoto', '"'):
for gid in text.extract_iter(page, 'id="albumphoto', '"'):
yield Message.Queue, self.root + "/album/" + gid, data
if gid is None:
return

params["page"] += 1

class PornhubGifsExtractor(PornhubExtractor):
"""Extractor for a pornhub user's gifs"""
subcategory = "gifs"
pattern = (BASE_PATTERN + r"/((?:users|model|pornstar)/[^/?#]+)"
"/(gifs(?:/[^/?#]+)?)")
test = (
("https://www.pornhub.com/pornstar/danika-mori/gifs", {
"pattern": PornhubGifExtractor.pattern,
"count": ">= 42",
}),
("https://www.pornhub.com/users/flyings0l0/gifs"),
("https://www.pornhub.com/model/bossgirl/gifs/video"),
)

def __init__(self, match):
PornhubExtractor.__init__(self, match)
self.user, self.path = match.groups()

def items(self):
data = {"_extractor": PornhubGifExtractor}
for page in self._pagination(self.user, self.path):
gid = None
for gid in text.extract_iter(page, 'id="gif', '"'):
yield Message.Queue, self.root + "/gif/" + gid, data
if gid is None:
return
3 changes: 3 additions & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@
"sketch": "Sketch",
"work": "individual Images",
},
"pornhub": {
"gifs": "",
},
"reddit": {
"home": "Home Feed",
},
Expand Down

0 comments on commit a783c4f

Please sign in to comment.