Skip to content
This repository has been archived by the owner on Mar 2, 2022. It is now read-only.

Commit

Permalink
[FIX]: Parsing tweaks.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason Yip committed Sep 23, 2021
1 parent aa5a526 commit 27bfb03
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 14 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.1.3
2.1.4
9 changes: 8 additions & 1 deletion musescore_scraper/MuseScraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import warnings
from operator import itemgetter

from .helper import _valid_url




Expand Down Expand Up @@ -118,7 +120,6 @@ async def get_score_tags() -> str:
"Keywords": await get_score_tags(),
}

# svgs = await page.evaluate(bytes(get_data("musescore_scraper", "script.js"), "utf-8"))
svgs: List[str] = await page.evaluate(str(get_data("musescore_scraper",
"script.js",
), "utf-8"))
Expand Down Expand Up @@ -261,6 +262,9 @@ async def to_pdf(
:rtype: Output destination as ``pathlib.Path`` object.
May or may not differ depending on the output argument.
"""
if not _valid_url(url):
raise TypeError("Invalid URL.")

return self._convert(output, await asyncio.wait_for(
self._pyppeteer_main(url), self.timeout
))
Expand Down Expand Up @@ -319,6 +323,9 @@ def to_pdf(
:rtype: Output destination as ``pathlib.Path`` object.
May or may not differ depending on the output argument.
"""
if not _valid_url(url):
raise TypeError("Invalid URL.")

return self._convert(output, asyncio.get_event_loop().run_until_complete(
asyncio.wait_for(self._pyppeteer_main(url), self.timeout)
))
24 changes: 15 additions & 9 deletions musescore_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
import argparse

from pathlib import Path
from urllib.parse import urlparse
from urllib.parse import urlparse, urljoin
from typing import Optional, Union, List
from .MuseScraper import MuseScraper, AsyncMuseScraper
from .helper import _valid_url
import asyncio
from functools import partial

def _url_parse(url: str) -> str:
tup = urlparse(url)
return tup.scheme + "://" + tup.netloc + "/" + tup.path
if not _valid_url(url):
raise argparse.ArgumentTypeError("Invalid URL.")
return url

def _debug_path(path: str) -> Union[Path, str]:
return Path(path) if path else path
Expand All @@ -20,12 +22,15 @@ def _main(args: Union[None, List[str], str] = None) -> None:

parser = argparse.ArgumentParser(description="A MuseScore PDF scraper."
+ " Input a URL to a MuseScore score"
+ ", then outputs a multi-page PDF.")
+ ", then outputs a multi-page PDF."
)
parser.add_argument("urls", nargs='+', type=_url_parse,
help="an amount of valid MuseScore score URLs")
parser.add_argument("-o", "--output", nargs='*', type=Path, help="file destination(s)")
help="an amount of valid MuseScore score URLs"
)
parser.add_argument("-o", "--output", nargs='+', type=Path, help="file destination(s)")
parser.add_argument("-t", "--timeout", type=int, help=
"how many milliseconds should be given before aborting.")
"how many milliseconds should be given before aborting."
)
parser.add_argument("-d", "--debug-log", type=_debug_path, nargs="?", const="",
help="receive debug messages, to a log file if destination provided."
)
Expand All @@ -35,7 +40,8 @@ def _main(args: Union[None, List[str], str] = None) -> None:

args = parser.parse_args(args)

assert not args.output or len(args.urls) == len(args.output)
if not (not args.output or len(args.urls) == len(args.output)):
parser.error("# of outputs must match # of urls or omit output flag.")

outputs: List[Optional[Path]] = [None] * len(args.urls)
def set_output(i: int, task: asyncio.Task) -> None:
Expand All @@ -57,7 +63,7 @@ async def run():
task.add_done_callback(partial(set_output, i))
tasks.append(task)

result = await asyncio.wait_for(asyncio.gather(*tasks), args.timeout)
result = await asyncio.gather(*tasks)

return result

Expand Down
6 changes: 6 additions & 0 deletions musescore_scraper/helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from urllib.parse import urlparse

def _valid_url(url: str) -> bool:
final_url = urlparse(url + '/' * int(not url.endswith('/')))
return (all([final_url.scheme, final_url.netloc, final_url.path])
and '.' in final_url.netloc)
17 changes: 14 additions & 3 deletions test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
import pytest
from typing import Any
from tempfile import NamedTemporaryFile
import argparse

sys.path.insert(0, str(Path(__file__).parents[1].resolve()))

from musescore_scraper import main
from musescore_scraper import _main


URLS = [
Expand All @@ -32,22 +33,32 @@ def test_main():
for i in range(len(URLS)):
with NamedTemporaryFile(suffix=".pdf", delete=False) as tf:
fname: Path = Path(tf.name)
main([URLS[i], "-o", str(fname)])
_main([URLS[i], "-o", str(fname)])

assert fname.read_bytes() == DATA_PDFS[i]

fname.unlink()


def test_main_multiple():
temp_files = []
for i in range(len(URLS)):
temp_files.append(NamedTemporaryFile(suffix=".pdf", delete=False))
tf_names = [tf.name for tf in temp_files]

main([*URLS, "-o", *tf_names])
_main([*URLS, "-o", *tf_names])

for i in range(len(URLS)):
tf_path = Path(tf_names[i])
assert tf_path.read_bytes() == DATA_PDFS[i]

tf_path.unlink()


def test_invalid_opts():
with NamedTemporaryFile(suffix=".pdf") as tf:
with pytest.raises((argparse.ArgumentError, SystemExit)):
_main([*URLS, "-o", tf.name])

with pytest.raises((argparse.ArgumentTypeError, SystemExit)):
_main(["foo"])

0 comments on commit 27bfb03

Please sign in to comment.