Skip to content

Commit

Permalink
Refactor test_collections in scraper.
Browse files Browse the repository at this point in the history
  • Loading branch information
FledgeXu committed Jul 19, 2023
1 parent 7fc9557 commit 3f46c2b
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions nautiluszim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,9 @@ def test_collection(self):
nb_files = sum([len(i.get("files", [])) for i in self.json_collection])
logger.info(f"Collection loaded. {nb_items} items, {nb_files} files")

self.test_files()

def test_files(self):
with zipfile.ZipFile(self.archive_path, "r") as zh:
all_names = zh.namelist()

Expand All @@ -356,16 +359,20 @@ def test_collection(self):
except ValueError:
missing_files.append(entry["title"])

duplicate_file_names = set(
[
filename
for filename in all_file_names
if all_file_names.count(filename) > 1
]
)

if missing_files:
raise ValueError(
"File(s) referenced in collection but missing:\n - "
+ "\n - ".join(missing_files)
)
duplicate_file_names = set([
filename
for filename in all_file_names
if all_file_names.count(filename) > 1
])

if duplicate_file_names:
raise ValueError(
"Files in collection are duplicate:\n - "
Expand Down

0 comments on commit 3f46c2b

Please sign in to comment.