Skip to content

Commit

Permalink
fix bug when processing input file comments (#2808)
Browse files Browse the repository at this point in the history
and move 'parse_inputfile()' to util.py
  • Loading branch information
mikf committed Oct 8, 2022
1 parent 14717f3 commit a6e2d96
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 78 deletions.
80 changes: 2 additions & 78 deletions gallery_dl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
# published by the Free Software Foundation.

import sys
import json
import logging
from . import version, config, option, output, extractor, job, util, exception

Expand All @@ -32,81 +31,6 @@ def progress(urls, pformat):
yield pinfo["url"]


def parse_inputfile(file, log):
"""Filter and process strings from an input file.
Lines starting with '#' and empty lines will be ignored.
Lines starting with '-' will be interpreted as a key-value pair separated
by an '='. where 'key' is a dot-separated option name and 'value' is a
JSON-parsable value. These configuration options will be applied while
processing the next URL.
Lines starting with '-G' are the same as above, except these options will
be applied for *all* following URLs, i.e. they are Global.
Everything else will be used as a potential URL.
Example input file:
# settings global options
-G base-directory = "/tmp/"
-G skip = false
# setting local options for the next URL
-filename="spaces_are_optional.jpg"
-skip = true
https://example.org/
# next URL uses default filename and 'skip' is false.
https://example.com/index.htm # comment1
https://example.com/404.htm # comment2
"""
gconf = []
lconf = []

for line in file:
line = line.strip()

if not line or line[0] == "#":
# empty line or comment
continue

elif line[0] == "-":
# config spec
if len(line) >= 2 and line[1] == "G":
conf = gconf
line = line[2:]
else:
conf = lconf
line = line[1:]

key, sep, value = line.partition("=")
if not sep:
log.warning("input file: invalid <key>=<value> pair: %s", line)
continue

try:
value = json.loads(value.strip())
except ValueError as exc:
log.warning("input file: unable to parse '%s': %s", value, exc)
continue

key = key.strip().split(".")
conf.append((key[:-1], key[-1], value))

else:
# url
if " #" in line:
line = line.partition(" #")[0].rstrip()
elif "\t#" in line:
line = line.partition("\t#")[0].rstrip()
if gconf or lconf:
yield util.ExtendedUrl(line, gconf, lconf)
gconf = []
lconf = []
else:
yield line


def main():
try:
if sys.stdout and sys.stdout.encoding.lower() != "utf-8":
Expand Down Expand Up @@ -275,12 +199,12 @@ def main():
try:
if inputfile == "-":
if sys.stdin:
urls += parse_inputfile(sys.stdin, log)
urls += util.parse_inputfile(sys.stdin, log)
else:
log.warning("input file: stdin is not readable")
else:
with open(inputfile, encoding="utf-8") as file:
urls += parse_inputfile(file, log)
urls += util.parse_inputfile(file, log)
except OSError as exc:
log.warning("input file: %s", exc)

Expand Down
76 changes: 76 additions & 0 deletions gallery_dl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,82 @@ def language_to_code(lang, default=None):
}


def parse_inputfile(file, log):
"""Filter and process strings from an input file.
Lines starting with '#' and empty lines will be ignored.
Lines starting with '-' will be interpreted as a key-value pair separated
by an '='. where 'key' is a dot-separated option name and 'value' is a
JSON-parsable value. These configuration options will be applied while
processing the next URL.
Lines starting with '-G' are the same as above, except these options will
be applied for *all* following URLs, i.e. they are Global.
Everything else will be used as a potential URL.
Example input file:
# settings global options
-G base-directory = "/tmp/"
-G skip = false
# setting local options for the next URL
-filename="spaces_are_optional.jpg"
-skip = true
https://example.org/
# next URL uses default filename and 'skip' is false.
https://example.com/index.htm # comment1
https://example.com/404.htm # comment2
"""
gconf = []
lconf = []
strip_comment = None

for line in file:
line = line.strip()

if not line or line[0] == "#":
# empty line or comment
continue

elif line[0] == "-":
# config spec
if len(line) >= 2 and line[1] == "G":
conf = gconf
line = line[2:]
else:
conf = lconf
line = line[1:]

key, sep, value = line.partition("=")
if not sep:
log.warning("input file: invalid <key>=<value> pair: %s", line)
continue

try:
value = json.loads(value.strip())
except ValueError as exc:
log.warning("input file: unable to parse '%s': %s", value, exc)
continue

key = key.strip().split(".")
conf.append((key[:-1], key[-1], value))

else:
# url
if " #" in line or "\t#" in line:
if strip_comment is None:
strip_comment = re.compile(r"\s+#.*").sub
line = strip_comment("", line)
if gconf or lconf:
yield ExtendedUrl(line, gconf, lconf)
gconf = []
lconf = []
else:
yield line


class UniversalNone():
"""None-style object that supports more operations than None itself"""
__slots__ = ()
Expand Down

0 comments on commit a6e2d96

Please sign in to comment.