Skip to content
This repository has been archived by the owner on Oct 9, 2024. It is now read-only.

Commit

Permalink
style: add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
artem-burashnikov committed Dec 9, 2023
1 parent d82f0ce commit bbd4edc
Show file tree
Hide file tree
Showing 8 changed files with 270 additions and 47 deletions.
55 changes: 45 additions & 10 deletions depinspect/archives/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,39 @@


def extract_xz_archive(archive_path: Path, output_path: Path) -> None:
with open(archive_path, "rb") as archive:
with lzma.open(archive, "rb") as xz_archive:
data = xz_archive.read()
"""
Extract the contents of an XZ archive to a specified output file.
with open(output_path, "wb") as output_file:
output_file.write(data)
Parameters
----------
archive_path : Path
Path to the XZ-compressed archive.
output_path : Path
Path to the output file where the contents will be written.
"""
with lzma.open(archive_path, "rb") as xz_archive:
data = xz_archive.read()

with open(output_path, "wb") as output_file:
output_file.write(data)


def extract_bz2_archive(archive_path: Path, output_path: Path) -> None:
with open(archive_path, "rb") as archive:
with bz2.open(archive, "rb") as bz_archive:
data = bz_archive.read()
"""
Extract the contents of a BZ2 archive to a specified output file.
Parameters
----------
archive_path : Path
Path to the BZ2-compressed archive.
output_path : Path
Path to the output file where the contents will be written.
"""
with bz2.open(archive_path, "rb") as bz_archive:
data = bz_archive.read()

with open(output_path, "wb") as output_file:
output_file.write(data)
with open(output_path, "wb") as output_file:
output_file.write(data)


def process_archives(
Expand All @@ -32,6 +50,22 @@ def process_archives(
archive_extension: str,
extractor: Callable[[Path, Path], None],
) -> None:
"""
Process archives in the input directory.
Parameters
----------
input_dir : Path
Path to the directory containing input archives.
output_dir : Path
Path to the directory where extracted files will be saved.
file_extension : str
Desired file extension for the extracted files.
archive_extension : str
File extension of the archives to be processed.
extractor : Callable[[Path, Path], None]
Extractor function to be applied to each archive.
"""
archives_files = [
file
for file in list_files_in_directory(input_dir)
Expand All @@ -42,6 +76,7 @@ def process_archives(
file_name = archive_path.stem
out_file_path = output_dir / f"{file_name}{file_extension}"

# If an output file with the same name and extension exists, remove it
if (
out_file_path.exists()
and out_file_path.is_file()
Expand Down
31 changes: 31 additions & 0 deletions depinspect/archives/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@


def pull_target_from_url(target_url: str, local_target_path: Path) -> None:
"""
Pull a target from a given URL and save it to a local file.
Parameters
----------
target_url : str
The URL of the target to be pulled.
local_target_path : Path
The local path where the target will be saved.
Raises
------
ValueError
If the target URL does not start with "http".
"""
if target_url.lower().startswith("http"):
req = request.Request(target_url)
else:
Expand All @@ -18,6 +33,22 @@ def fetch_and_save_metadata(
distribution: str,
output_directory: Path,
) -> None:
"""
Fetch and save metadata for a specified distribution.
Parameters
----------
config : Dict[str, Dict[str, Dict[str, Dict[str, str]]]]
Configuration dictionary.
distribution : str
The distribution for which metadata should be fetched and saved.
output_directory : Path
The directory where the fetched metadata will be saved.
Returns
-------
None
"""
for release, branches in config[distribution].items():
for branch, archs in branches.items():
for arch, url in archs.items():
Expand Down
15 changes: 15 additions & 0 deletions depinspect/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@


def init(db_name: str, output_path: Path) -> Path:
"""
Initialize a SQLite database for package metadata.
Parameters
----------
db_name : str
Name of the SQLite database.
output_path : Path
Path to the directory where the database will be created.
Returns
-------
Path
Path to the initialized SQLite database.
"""
db_path = output_path / Path(db_name)

logging.info("Initializing a database.")
Expand Down
14 changes: 14 additions & 0 deletions depinspect/distributions/fedora.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,20 @@ def init(
db_suffix: str,
output_path: Path,
) -> None:
"""
Initialize and fetch metadata for Fedora releases.
Parameters
----------
tmp_dir : Path
Temporary directory for fetching and extracting archives.
config : dict[str, dict[str, dict[str, dict[str, str]]]]
Configuration dictionary.
db_suffix : str
Desired file extension for the extracted databases.
output_path : Path
The directory where the extracted databases will be saved.
"""
try:
for release in config["fedora"].keys():
logging.info("Fetching fedora rchives.")
Expand Down
119 changes: 117 additions & 2 deletions depinspect/distributions/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,23 @@


def validate_metadata_file_exists(file_path: Path) -> None:
"""
Validate the existence and format of a metadata file.
Parameters
----------
file_path : Path
Path to the metadata file to be validated.
Returns
-------
None
Raises
------
SystemExit
If the file does not exist or has an invalid suffix.
"""
if not file_path.is_file() or file_path.suffix != ".txt":
logging.exception(
"%s is not a valid metadata file or doesn't exist.", file_path.name
Expand All @@ -16,6 +33,23 @@ def validate_metadata_file_exists(file_path: Path) -> None:


def valid_database_file_exists(db_path: Path) -> None:
"""
Validate the existence and format of an SQLite database file.
Parameters
----------
db_path : Path
Path to the SQLite database file to be validated.
Returns
-------
None
Raises
------
SystemExit
If the file does not exist or has an invalid suffix.
"""
if not db_path.is_file() or db_path.suffix != ".sqlite":
logging.exception(
"%s is not a valid sqlite3 database or doesn't exist.", db_path.name
Expand All @@ -24,6 +58,21 @@ def valid_database_file_exists(db_path: Path) -> None:


def is_not_in_db(db_connection: sqlite3.Connection, pkg: Package) -> bool:
"""
Check if a package is not present in the SQLite database.
Parameters
----------
db_connection : sqlite3.Connection
SQLite database connection.
pkg : Package
The Package object representing the package to be checked.
Returns
-------
bool
True if the package is not present in the database, False otherwise.
"""
res = db_connection.execute(
"""SELECT name, arch, version, release FROM packages
WHERE name = ? AND arch = ? AND version = ? AND release = ?""",
Expand All @@ -34,6 +83,21 @@ def is_not_in_db(db_connection: sqlite3.Connection, pkg: Package) -> bool:


def insert_into_packages(db_connection: sqlite3.Connection, pkg: Package) -> int:
"""
Insert a package into the 'packages' table of an SQLite database.
Parameters
----------
db_connection : sqlite3.Connection
SQLite database connection.
pkg : Package
The Package object representing the package to be inserted.
Returns
-------
int
The row ID of the newly inserted package.
"""
res = db_connection.execute(
"""INSERT INTO packages (name, arch, version, release, description)
VALUES (?, ?, ?, ?, ?)""",
Expand All @@ -58,6 +122,23 @@ def insert_into_packages(db_connection: sqlite3.Connection, pkg: Package) -> int
def map_additional_info(
input_list: list[str], release: str, key: int
) -> list[tuple[str, str, int]]:
"""
Map additional information to each entry in a list.
Parameters
----------
input_list : list[str]
The list of entries to map additional information to.
release : str
The release information to be mapped.
key : int
The key information to be mapped.
Returns
-------
list[tuple[str, str, int]]
A list of tuples containing the mapped information for each entry.
"""
return [(entry, release, key) for entry in input_list]


Expand Down Expand Up @@ -141,6 +222,24 @@ def insert_into_provides(
def process_metadata_into_db(
file_path: Path, db_path: Path, distribution: str, release: str
) -> None:
"""
Process metadata from a file and insert it into an SQLite database.
Parameters
----------
file_path : Path
Path to the metadata file to be processed.
db_path : Path
Path to the SQLite database where the metadata will be inserted.
distribution : str
The distribution name.
release : str
The release name.
Returns
-------
None
"""
from depinspect.distributions.mapping import distribution_class_mapping

validate_metadata_file_exists(file_path)
Expand All @@ -162,8 +261,6 @@ def process_metadata_into_db(
insert_into_breaks(db_connection, pkg, pkg_key)
insert_into_conflicts(db_connection, pkg, pkg_key)
insert_into_provides(db_connection, pkg, pkg_key)
else:
continue

logging.info("File %s has been processed succesfully.", file_path.name)

Expand All @@ -173,6 +270,24 @@ def process_metadata_into_db(
def deserialize_ubuntu_metadata(
tmp_dir: Path, db_path: Path, distribution: str, release: str
) -> None:
"""
Deserialize Ubuntu metadata files in a directory into an SQLite database.
Parameters
----------
tmp_dir : Path
Temporary directory containing Ubuntu metadata files.
db_path : Path
Path to the SQLite database where the metadata will be inserted.
distribution : str
The distribution name.
release : str
The release name.
Returns
-------
None
"""
txt_files = [
txt_file
for txt_file in list_files_in_directory(tmp_dir)
Expand Down
33 changes: 33 additions & 0 deletions depinspect/distributions/ubuntu.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ def __init__(self) -> None:

@staticmethod
def parse_metadata(file_path: Path, dist_release: str) -> list["Package"]:
"""
Parse Ubuntu metadata file and return a list of Package objects.
Parameters
----------
file_path : Path
Path to the Ubuntu metadata file to be parsed.
dist_release : str
The release name.
Returns
-------
List[Package]
A list of Package objects representing Ubuntu packages.
"""
with open(file_path) as file:
file_content = file.read()
ubuntu_packages: list[Package] = []
Expand Down Expand Up @@ -54,6 +69,24 @@ def init(
db_suffix: str,
output_path: Path,
) -> None:
"""
Initialize the Ubuntu database.
Parameters
----------
tmp_dir : Path
Temporary directory to store intermediate files.
config : Dict[str, Dict[str, Dict[str, Dict[str, str]]]]
Configuration dictionary containing information about sources.
db_suffix : str
Suffix to be added to the database name.
output_path : Path
Output path for the initialized database.
Returns
-------
None
"""
try:
for release in config["ubuntu"].keys():
logging.info("Fetching archives from pre-defined URL sources.")
Expand Down
Loading

0 comments on commit bbd4edc

Please sign in to comment.