Skip to content
This repository has been archived by the owner on Oct 9, 2024. It is now read-only.

Commit

Permalink
feat: add extraction of archives
Browse files Browse the repository at this point in the history
  • Loading branch information
artem-burashnikov committed Oct 23, 2023
1 parent 8b5e0f0 commit 94c693a
Showing 1 changed file with 58 additions and 0 deletions.
58 changes: 58 additions & 0 deletions depinspect/load/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import lzma
import sys
from pathlib import Path

from depinspect.load import fetch, file_management


def extract_xz_archive(archive_path: Path, output_path: Path) -> None:
"""
Extract data from a .xz archive.
Parameters:
- archive_path (Path): The path to the .xz archive file.
- output_path (Path): The path to the output file where the extracted data will be saved.
Raises:
- lzma.LZMAError: If there is an issue with the .xz archive.
Returns:
None: The function does not return a value.
"""
with open(archive_path, "rb") as archive_file:
with lzma.open(archive_file, "rb") as xz_file:
extracted_data = xz_file.read()

with open(output_path, "wb") as output_file:
output_file.write(extracted_data)


def main() -> Path:
"""
Process archives by extracting contents and removing original archive files.
Returns:
Path: The path to the directory containing processed archives.
"""
archives_dir = fetch.main()
archives = file_management.list_files_in_directory(archives_dir)

for count, archive_path in enumerate(archives):
try:
# Construct output file path
file_prefix = count
file_name = "_Packages"
file_extension = ".txt"
output_path = archives_dir / f"{file_prefix}{file_name}{file_extension}"

extract_xz_archive(archive_path, output_path)
except Exception as e:
print(f"Failed to extract {archive_path}", {e})
sys.exit(1)
file_management.remove_file(archive_path)

return archives_dir


if __name__ == "__main__":
main()

0 comments on commit 94c693a

Please sign in to comment.