From 6adf10d1553ef40ce526c239f8c33ddfbabd586d Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Sun, 17 Mar 2024 00:36:42 +0100 Subject: [PATCH 01/10] =?UTF-8?q?=F0=9F=90=9B=20reqs=20with=20same=20name?= =?UTF-8?q?=20does=20not=20raise=20if=20matching=20specifier?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pip_audit/_dependency_source/requirement.py | 28 +++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/pip_audit/_dependency_source/requirement.py b/pip_audit/_dependency_source/requirement.py index 902a530c..be1c8e7d 100644 --- a/pip_audit/_dependency_source/requirement.py +++ b/pip_audit/_dependency_source/requirement.py @@ -225,18 +225,23 @@ def _fix_file(self, filename: Path, fix_version: ResolvedFixVersion) -> None: # Check ahead of time for anything invalid in the requirements file since we don't want to # encounter this while writing out the file. Check for duplicate requirements and lines that # failed to parse. - req_names: set[str] = set() + req_specifiers: dict[str, str] = dict() + for req in reqs: if ( isinstance(req, InstallRequirement) and (req.marker is None or req.marker.evaluate()) and req.req is not None ): - if req.name in req_names: + duplicate_req_specifier = req_specifiers.get(req.name) + + if not duplicate_req_specifier: + req_specifiers[req.name] = req.specifier + + elif duplicate_req_specifier != req.specifier: raise RequirementFixError( f"package {req.name} has duplicate requirements: {str(req)}" ) - req_names.add(req.name) elif isinstance(req, InvalidRequirementLine): raise RequirementFixError( f"requirement file {filename} has invalid requirement: {str(req)}" @@ -294,7 +299,7 @@ def _collect_preresolved_deps( """ Collect pre-resolved (pinned) dependencies. """ - req_names: set[str] = set() + req_specifiers: dict[str, str] = dict() for req in reqs: if not req.hash_options and require_hashes: raise RequirementSourceError(f"requirement {req.dumps()} does not contain a hash") @@ -315,12 +320,21 @@ def _collect_preresolved_deps( if req.marker is not None and not req.marker.evaluate(): continue # pragma: no cover - # This means we have a duplicate requirement for the same package - if req.name in req_names: + duplicate_req_specifier = req_specifiers.get(req.name) + + if not duplicate_req_specifier: + req_specifiers[req.name] = req.specifier + + # We have a duplicate requirement for the same package + # but different specifiers, meaning a badly resolved requirements.txt + elif duplicate_req_specifier != req.specifier: raise RequirementSourceError( f"package {req.name} has duplicate requirements: {str(req)}" ) - req_names.add(req.name) + else: + # We have a duplicate requirement for the same package and the specifier matches + # As they would return the same result from the audit, there no need to yield it a second time. + continue # NOTE: URL dependencies cannot be pinned, so skipping them # makes sense (under the same principle of skipping dependencies From a986445398db6d756d8f5dc40f8ce8493ef9afd8 Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Sun, 17 Mar 2024 00:39:09 +0100 Subject: [PATCH 02/10] =?UTF-8?q?=F0=9F=90=9B=20ensure=20fixed=20file=20is?= =?UTF-8?q?=20closed=20before=20moving=20it?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pip_audit/_dependency_source/pyproject.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pip_audit/_dependency_source/pyproject.py b/pip_audit/_dependency_source/pyproject.py index 36d6924a..30d2767c 100644 --- a/pip_audit/_dependency_source/pyproject.py +++ b/pip_audit/_dependency_source/pyproject.py @@ -142,8 +142,8 @@ def fix(self, fix_version: ResolvedFixVersion) -> None: # Now dump the new edited TOML to the temporary file. toml.dump(pyproject_data, tmp) - # And replace the original `pyproject.toml` file. - os.replace(tmp.name, self.filename) + # And replace the original `pyproject.toml` file. + os.replace(tmp.name, self.filename) class PyProjectSourceError(DependencySourceError): From 96306fa4ee2ba38300158620d444ae8f765782aa Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Sun, 17 Mar 2024 00:53:28 +0100 Subject: [PATCH 03/10] =?UTF-8?q?=F0=9F=90=9B=20fix=20pipe=20reading=20han?= =?UTF-8?q?ging=20indefinitely?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pip_audit/_subprocess.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pip_audit/_subprocess.py b/pip_audit/_subprocess.py index 72bd20de..efe80f3c 100644 --- a/pip_audit/_subprocess.py +++ b/pip_audit/_subprocess.py @@ -36,7 +36,7 @@ def run(args: Sequence[str], *, log_stdout: bool = False, state: AuditState = Au # Run the process with unbuffered I/O, to make the poll-and-read loop below # more responsive. - process = Popen(args, bufsize=0, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process = Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # NOTE(ww): We frequently run commands inside of ephemeral virtual environments, # which have long absolute paths on some platforms. These make for confusing @@ -52,9 +52,8 @@ def run(args: Sequence[str], *, log_stdout: bool = False, state: AuditState = Au # once `stdout` hits EOF, so we don't have to worry about that blocking. while not terminated: terminated = process.poll() is not None - # NOTE(ww): Buffer size chosen arbitrarily here and below. - stdout += process.stdout.read(4096) # type: ignore - stderr += process.stderr.read(4096) # type: ignore + stdout += process.stdout.read() # type: ignore + stderr += process.stderr.read() # type: ignore state.update_state( f"Running {pretty_args}", stdout.decode(errors="replace") if log_stdout else None, From 8f852d15940108debc52307bff7c89e528f73e91 Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Sun, 17 Mar 2024 01:08:20 +0100 Subject: [PATCH 04/10] =?UTF-8?q?=E2=9C=85=20update=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/dependency_source/test_requirement.py | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/dependency_source/test_requirement.py b/test/dependency_source/test_requirement.py index ad8d8ef5..82527ef4 100644 --- a/test/dependency_source/test_requirement.py +++ b/test/dependency_source/test_requirement.py @@ -523,6 +523,30 @@ def test_requirement_source_disable_pip_duplicate_dependencies(req_file): [(req_file(), "flask==1.0\nflask==1.0")], disable_pip=True, no_deps=True ) + specs = list(source.collect()) + + # If the dependency list has duplicates, then converting to a set will reduce the length of the + # collection + assert len(specs) == len(set(specs)) + + +def test_requirement_source_disable_pip_duplicate_dependencies_with_extras(req_file): + source = _init_requirement( + [(req_file(), "aiohttp==3.9\naiohttp[speedups]==3.9")], disable_pip=True, no_deps=True + ) + + specs = list(source.collect()) + + # If the dependency list has duplicates, then converting to a set will reduce the length of the + # collection + assert len(specs) == len(set(specs)) + + +def test_requirement_source_disable_pip_duplicate_dependencies_diff_specifier(req_file): + source = _init_requirement( + [(req_file(), "flask==1.0\nflask==2.0")], disable_pip=True, no_deps=True + ) + with pytest.raises(DependencySourceError): list(source.collect()) From b968c85e499221099fa241912a61090cb8e5e90e Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Sun, 17 Mar 2024 01:11:10 +0100 Subject: [PATCH 05/10] =?UTF-8?q?=E2=9C=8F=EF=B8=8F=20fix=20typehint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pip_audit/_dependency_source/requirement.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pip_audit/_dependency_source/requirement.py b/pip_audit/_dependency_source/requirement.py index be1c8e7d..7c0f3f50 100644 --- a/pip_audit/_dependency_source/requirement.py +++ b/pip_audit/_dependency_source/requirement.py @@ -225,7 +225,7 @@ def _fix_file(self, filename: Path, fix_version: ResolvedFixVersion) -> None: # Check ahead of time for anything invalid in the requirements file since we don't want to # encounter this while writing out the file. Check for duplicate requirements and lines that # failed to parse. - req_specifiers: dict[str, str] = dict() + req_specifiers: dict[str, SpecifierSet] = dict() for req in reqs: if ( @@ -299,7 +299,7 @@ def _collect_preresolved_deps( """ Collect pre-resolved (pinned) dependencies. """ - req_specifiers: dict[str, str] = dict() + req_specifiers: dict[str, SpecifierSet] = dict() for req in reqs: if not req.hash_options and require_hashes: raise RequirementSourceError(f"requirement {req.dumps()} does not contain a hash") From 0cf9887f02c40f0d755afad7b9e4c96b4a772fae Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Sun, 17 Mar 2024 21:17:11 +0100 Subject: [PATCH 06/10] =?UTF-8?q?=E2=8F=AA=20re-set=20bufsize=20to=200?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pip_audit/_subprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pip_audit/_subprocess.py b/pip_audit/_subprocess.py index efe80f3c..32b43885 100644 --- a/pip_audit/_subprocess.py +++ b/pip_audit/_subprocess.py @@ -36,7 +36,7 @@ def run(args: Sequence[str], *, log_stdout: bool = False, state: AuditState = Au # Run the process with unbuffered I/O, to make the poll-and-read loop below # more responsive. - process = Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process = Popen(args, bufsize=0, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # NOTE(ww): We frequently run commands inside of ephemeral virtual environments, # which have long absolute paths on some platforms. These make for confusing From 8ac2246f26c4578bae6ea95c5bb5540fffee8f19 Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Mon, 1 Apr 2024 17:59:11 +0200 Subject: [PATCH 07/10] =?UTF-8?q?Revert=20"=F0=9F=90=9B=20fix=20pipe=20rea?= =?UTF-8?q?ding=20hanging=20indefinitely"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 96306fa4ee2ba38300158620d444ae8f765782aa. --- pip_audit/_subprocess.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pip_audit/_subprocess.py b/pip_audit/_subprocess.py index 32b43885..72bd20de 100644 --- a/pip_audit/_subprocess.py +++ b/pip_audit/_subprocess.py @@ -52,8 +52,9 @@ def run(args: Sequence[str], *, log_stdout: bool = False, state: AuditState = Au # once `stdout` hits EOF, so we don't have to worry about that blocking. while not terminated: terminated = process.poll() is not None - stdout += process.stdout.read() # type: ignore - stderr += process.stderr.read() # type: ignore + # NOTE(ww): Buffer size chosen arbitrarily here and below. + stdout += process.stdout.read(4096) # type: ignore + stderr += process.stderr.read(4096) # type: ignore state.update_state( f"Running {pretty_args}", stdout.decode(errors="replace") if log_stdout else None, From c747317ba1921ebe6a76cc22f5eeb8bddee1f2db Mon Sep 17 00:00:00 2001 From: Mathieu <923463-mathbou@users.noreply.gitlab.com> Date: Mon, 1 Apr 2024 18:14:45 +0200 Subject: [PATCH 08/10] =?UTF-8?q?Revert=20"=F0=9F=90=9B=20ensure=20fixed?= =?UTF-8?q?=20file=20is=20closed=20before=20moving=20it"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a986445398db6d756d8f5dc40f8ce8493ef9afd8. --- pip_audit/_dependency_source/pyproject.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pip_audit/_dependency_source/pyproject.py b/pip_audit/_dependency_source/pyproject.py index 30d2767c..36d6924a 100644 --- a/pip_audit/_dependency_source/pyproject.py +++ b/pip_audit/_dependency_source/pyproject.py @@ -142,8 +142,8 @@ def fix(self, fix_version: ResolvedFixVersion) -> None: # Now dump the new edited TOML to the temporary file. toml.dump(pyproject_data, tmp) - # And replace the original `pyproject.toml` file. - os.replace(tmp.name, self.filename) + # And replace the original `pyproject.toml` file. + os.replace(tmp.name, self.filename) class PyProjectSourceError(DependencySourceError): From 43832a01e382ee4d26b18c51b2d91c0b678b15a5 Mon Sep 17 00:00:00 2001 From: Mathieu Bouzard <13415583+mathbou@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:47:10 +0200 Subject: [PATCH 09/10] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0bfc987..ab54bc19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ All versions prior to 0.0.9 are untracked. * `pip-audit` now allows some CLI flags to be configured via environment variables ([#755](https://github.com/pypa/pip-audit/pull/755)) +### Changed + +* `--disable-pip` allows duplicate requirements if specifiers matches ([#749](https://github.com/pypa/pip-audit/pull/749)) + ## [2.7.3] ### Fixed From 191bd596e1fd528b5cf906bcf3d7e8f63b0c7bd0 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 26 Aug 2024 11:21:17 -0400 Subject: [PATCH 10/10] Apply suggestions from code review --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab54bc19..0a97f1c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,9 +13,9 @@ All versions prior to 0.0.9 are untracked. * `pip-audit` now allows some CLI flags to be configured via environment variables ([#755](https://github.com/pypa/pip-audit/pull/755)) -### Changed +### Fixed -* `--disable-pip` allows duplicate requirements if specifiers matches ([#749](https://github.com/pypa/pip-audit/pull/749)) +* Auditing a fully-pinned requirements file with `--disable-pip` now allows for duplicates, so long as the duplicates don't have conflicting specifier sets ([#749](https://github.com/pypa/pip-audit/pull/749)) ## [2.7.3]