Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

get sdist requires from egg-info #88

Merged
82 changes: 66 additions & 16 deletions stub_uploader/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
import graphlib
import os
import re
from collections.abc import Iterator, Iterable
import tarfile
from collections.abc import Generator, Iterable
from glob import glob
from pathlib import Path
from typing import Any, Optional

import requests
Expand Down Expand Up @@ -174,6 +177,43 @@ def verify_typeshed_req(req: Requirement) -> None:
}


def validate_response(resp: requests.Response, req: Requirement) -> None:
if resp.status_code != 200:
raise InvalidRequires(
f"Expected dependency {req} to be accessible on PyPI, but got {resp.status_code}"
)


def get_sdist_requires(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: "get" makes this function sound simpler than what it actually does. Maybe "extract"?

Also I think it would be safer to extract the sdist into an empty temporary directory than to where ever we currently are.

sdist_data: dict[str, str], req: Requirement
) -> Generator[Requirement, None, None]:
filename = sdist_data["filename"]
resp = requests.get(sdist_data["url"], stream=True)
validate_response(resp, req)
with open(filename, "wb") as file:
file.write(resp.raw.read())

folder_name = ""
with tarfile.open(filename) as file_in:
file_in.extractall()
for info in file_in:
folder_name = info.name
# Assume a single folder
break
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add a check here, whether folder_name was actually set, or maybe even check that there really is exactly one folder to account for broken eggs. But for the former case, the following should be simpler and have that required error check:

Suggested change
for info in file_in:
folder_name = info.name
# Assume a single folder
break
# Assume a single folder
folder_name = next(iter(file_in)).name

You can then also drop the initialization of folder_name.

Or just use * instead of folder_name in the glob below.

Copy link
Contributor Author

@Avasam Avasam May 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or just use * instead of folder_name in the glob below.

New code looks much cleaner like this, and makes no assumption. Especially since it's extracting to a temp folder now.


requires_filepath = Path(folder_name, "*.egg-info", "requires.txt")
matches = glob(str(requires_filepath))
for match in matches:
with open(match) as requires_file:
lines = requires_file.readlines()
for line in lines:
# Skip empty lines and extras
if line[0] not in {"\n", "["}:
yield Requirement(line)
# Assume a single *.egg-info folder
Avasam marked this conversation as resolved.
Show resolved Hide resolved
break


def verify_external_req(
req: Requirement,
upstream_distribution: Optional[str],
Expand All @@ -196,37 +236,47 @@ def verify_external_req(
f"There is no upstream distribution on PyPI, so cannot verify {req}"
)

resp = requests.get(f"https://pypi.org/pypi/{upstream_distribution}/json")
if resp.status_code != 200:
if req.name not in EXTERNAL_REQ_ALLOWLIST and not _unsafe_ignore_allowlist:
raise InvalidRequires(
f"Expected dependency {req} to be accessible on PyPI, but got {resp.status_code}"
f"Expected dependency {req} to be present in the allowlist"
)

data = resp.json()
resp = requests.get(f"https://pypi.org/pypi/{upstream_distribution}/json")
validate_response(resp, req)
data: dict[str, Any] = resp.json()

# TODO: consider allowing external dependencies for stubs for packages that do not ship wheels.
# Note that we can't build packages from sdists, since that can execute arbitrary code.
# We could do some hacky setup.py parsing though...
# TODO: PyPI doesn't seem to have version specific requires_dist. This does mean we can be
# broken by new releases of upstream packages, even if they do not match the version spec we
# have for the upstream distribution.
if req_canonical_name not in [

if req_canonical_name in [
canonical_name(Requirement(r).name)
for r in (data["info"].get("requires_dist") or [])
]:
return # Ok!

sdist_data: dict[str, Any] | None = next(
(
url_data
for url_data in reversed(data["urls"])
if url_data["packagetype"] == "sdist"
),
None,
)
if not (
sdist_data
and req_canonical_name
in [canonical_name(r.name) for r in get_sdist_requires(sdist_data, req)]
):
raise InvalidRequires(
f"Expected dependency {req} to be listed in {upstream_distribution}'s requires_dist"
)

if req.name not in EXTERNAL_REQ_ALLOWLIST and not _unsafe_ignore_allowlist:
raise InvalidRequires(
f"Expected dependency {req} to be present in the allowlist"
f"Expected dependency {req} to be listed in {upstream_distribution}'s "
+ "requires_dist or the sdist's *.egg-info/requires.txt"
)


def sort_by_dependency(
typeshed_dir: str, distributions: Iterable[str]
) -> Iterator[str]:
) -> Generator[str, None, None]:
# Just a simple topological sort. Unlike previous versions of the code, we do not rely
# on this to perform validation, like requiring the graph to be complete.
# We only use this to help with edge cases like multiple packages being uploaded
Expand Down