Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pip metadata refactoring #680

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 56 additions & 58 deletions cachi2/core/package_managers/pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,72 +273,78 @@ def _generate_purl_dependency(package: dict[str, Any]) -> str:
return purl.to_string()


def _get_pip_metadata(package_dir: RootedPath) -> tuple[str, Optional[str]]:
"""
Attempt to get the name and version of a Pip package.
def _infer_package_name_from_origin_url(package_dir: RootedPath) -> str:
try:
repo_id = get_repo_id(package_dir.root)
except UnsupportedFeature:
raise PackageRejected(
reason="Unable to infer package name from origin URL",
solution=(
"Provide valid metadata in the package files or ensure"
"the git repository has an 'origin' remote with a valid URL."
),
docs=PIP_METADATA_DOC,
)

repo_name = Path(repo_id.parsed_origin_url.path).stem
resolved_name = Path(repo_name).joinpath(package_dir.subpath_from_root)
return canonicalize_name(str(resolved_name).replace("/", "-")).strip("-.")

First, try to parse the setup.py script (if present) and extract name and version
from keyword arguments to the setuptools.setup() call. If either name or version
could not be resolved and there is a setup.cfg file, try to fill in the missing
values from metadata.name and metadata.version in the .cfg file.

:param package_dir: Path to the root directory of a Pip package
:return: Tuple of strings (name, version)
def _extract_metadata_from_config_files(
package_dir: RootedPath,
) -> tuple[Optional[str], Optional[str]]:
"""
name = None
version = None
Extract package name and version in the following order.

1. pyproject.toml
2. setup.py
3. setup.cfg

Note: version is optional in the SBOM, but name is required
"""
pyproject_toml = PyProjectTOML(package_dir)
if pyproject_toml.exists():
log.debug("Checking pyproject.toml for metadata")
name = pyproject_toml.get_name()
version = pyproject_toml.get_version()

if name:
return name, version

setup_py = SetupPY(package_dir)
if setup_py.exists():
log.debug("Checking setup.py for metadata")
name = setup_py.get_name()
version = setup_py.get_version()

if name:
return name, version

setup_cfg = SetupCFG(package_dir)
if setup_cfg.exists():
log.debug("Checking setup.cfg for metadata")
name = setup_cfg.get_name()
version = setup_cfg.get_version()

if pyproject_toml.exists():
log.info("Extracting metadata from pyproject.toml")
if pyproject_toml.check_dynamic_version():
log.warning("Parsing dynamic metadata from pyproject.toml is not supported")
if name:
return name, version

name = pyproject_toml.get_name()
version = pyproject_toml.get_version()
return None, None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This refactor isn't a 1:1 replacement - previously if a name was found it was carried over to other source checks. Here, if you extract name but can't find version your code will resort to returning (None,None) which means we'll infer the name of the project from the URL instead, that doesn't sound right.


if None in (name, version) and setup_py.exists():
log.info("Filling in missing metadata from setup.py")
name = name or setup_py.get_name()
version = version or setup_py.get_version()

if None in (name, version) and setup_cfg.exists():
log.info("Filling in missing metadata from setup.cfg")
name = name or setup_cfg.get_name()
version = version or setup_cfg.get_version()
def _get_pip_metadata(package_dir: RootedPath) -> tuple[str, Optional[str]]:
"""Attempt to retrieve name and version of a pip package."""
name, version = _extract_metadata_from_config_files(package_dir)

if not name:
slimreaper35 marked this conversation as resolved.
Show resolved Hide resolved
log.info("Processing metadata from git repository")
try:
repo_path = get_repo_id(package_dir.root).parsed_origin_url.path.removesuffix(".git")
repo_name = Path(repo_path).name
package_subpath = package_dir.subpath_from_root

resolved_path = Path(repo_name).joinpath(package_subpath)
normalized_path = canonicalize_name(str(resolved_path).replace("/", "-"))
name = normalized_path.strip("-.")
except UnsupportedFeature:
raise PackageRejected(
reason="Could not take name from the repository origin url",
solution=(
"Please specify package metadata in a way that Cachi2 understands"
" (see the docs)\n"
"or make sure that the directory Cachi2 is processing is a git"
" repository with\n"
"an 'origin' remote in which case Cachi2 will infer the package name from"
" the remote url."
),
docs=PIP_METADATA_DOC,
)
name = _infer_package_name_from_origin_url(package_dir)

log.info("Resolved package name: %r", name)
log.info("Resolved name %s for package at %s", name, package_dir)
if version:
log.info("Resolved package version: %r", version)
log.info("Resolved version %s for package at %s", version, package_dir)
else:
log.warning("Could not resolve package version")
slimreaper35 marked this conversation as resolved.
Show resolved Hide resolved
log.warning("Could not resolve version for package at %s", package_dir)

return name, version

Expand Down Expand Up @@ -453,14 +459,6 @@ def get_version(self) -> Optional[str]:
log.warning("No project.version in pyproject.toml")
return None

def check_dynamic_version(self) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't explain anywhere why you're dropping this check.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I use similar reasoning?

People should be aware of setup.py soft deprecation by now, do we want to hold everyone's hand? I mean displaying a warning for users who genuinely need setup.py (because pyproject.toml simply doesn't cut it for them as they may have C deps) doesn't feel right. I wouldn't strictly argue against having a warning if you proposed it somewhere in the code, I'm just questioning the usefulness given the circumstances.

Even though it is not deprecated. But the version is optional so I don't see much value in the warning message.

"""Check if project version is set dynamically."""
try:
dynamic_properties = self._parsed_toml["project"]["dynamic"]
return "version" in dynamic_properties
except KeyError:
return False

@functools.cached_property
def _parsed_toml(self) -> dict[str, Any]:
try:
Expand Down
Loading
Loading