Skip to content

Commit

Permalink
[cien] add 'files' option (#2885)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Aug 3, 2024
1 parent f68b0ef commit f87783f
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 8 deletions.
16 changes: 16 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1448,6 +1448,22 @@ Description
Process reposts.


extractor.cien.files
--------------------
Type
``list`` of ``strings``
Default
``["image", "video", "download", "gallery"]``
Description
Determines the type and order of files to be downloaded.

Available types are
``image``,
``video``,
``download``,
``gallery``.


extractor.cyberdrop.domain
--------------------------
Type
Expand Down
26 changes: 20 additions & 6 deletions gallery_dl/extractor/cien.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,24 @@ def items(self):
def _extract_files(self, page):
files = []

self._extract_files_image(page, files)
self._extract_files_video(page, files)
self._extract_files_attachment(page, files)
self._extract_files_gallery(page, files)
filetypes = self.config("files")
if filetypes is None:
self._extract_files_image(page, files)
self._extract_files_video(page, files)
self._extract_files_download(page, files)
self._extract_files_gallery(page, files)
else:
generators = {
"image" : self._extract_files_image,
"video" : self._extract_files_video,
"download": self._extract_files_download,
"gallery" : self._extract_files_gallery,
"gallerie": self._extract_files_gallery,
}
if isinstance(filetypes, str):
filetypes = filetypes.split(",")
for ft in filetypes:
generators[ft.rstrip("s")](page, files)

return files

Expand Down Expand Up @@ -114,14 +128,14 @@ def _extract_files_video(self, page, files):
file["type"] = "video"
files.append(file)

def _extract_files_attachment(self, page, files):
def _extract_files_download(self, page, files):
for download in text.extract_iter(
page, 'class="downloadBlock', "</div>"):
name = text.extr(download, "<p>", "<")

file = text.nameext_from_url(name.rpartition(" ")[0])
file["url"] = text.extr(download, ' href="', '"')
file["type"] = "attachment"
file["type"] = "download"
files.append(file)

def _extract_files_gallery(self, page, files):
Expand Down
4 changes: 2 additions & 2 deletions test/results/cien.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@
"#url" : "https://ci-en.dlsite.com/creator/25509/article/1172460",
"#category": ("", "cien", "article"),
"#class" : cien.CienArticleExtractor,
"#range" : "3",
"#options" : {"files": "download"},
"#pattern" : r"https://media\.ci-en\.jp/private/attachment/creator/00025509/7fd3c039d2277ba9541e82592aca6f6751f6c268404038ccbf1112bcf2f93357/upload/.+\.zip\?px-time=.+",

"filename" : "VP 1.05.4 Tim-v9 ENG rec v3",
"extension": "zip",
"type" : "attachment",
"type" : "download",
},

{
Expand Down

0 comments on commit f87783f

Please sign in to comment.