From 3a7ecb0d127a1f2b26d1d05d7793a469ebc7cda7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20D=C3=BCrr?= <102963075+cd-work@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:43:02 +0000 Subject: [PATCH] Fix requirements parsing with index URL option (#1251) When using `pip-compile` with a third party Python index, the `--index-url` and `--extra-index-url` options get included in the `requirements.txt` output file. This patch updates our parser to allow these options. The presence of `--index-url` in the file will make the parser assume that all packages are being pulled from the third party registry. This may or may not be true, but it doesn't really matter since we currently treat third party registries as if they were first party when submitting a job. Co-authored-by: Kyle Willmon --- CHANGELOG.md | 3 +++ lockfile/src/parsers/pypi.rs | 35 +++++++++++++++++++++----- lockfile/src/python.rs | 18 ++++++++++++- tests/fixtures/requirements-locked.txt | 7 ++++++ 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbe4d77a1..ce1879f1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Fixed +- Pip requirements.txt parser failing with third-party registries + ## [5.7.2] - 2023-10-10 ### Fixed diff --git a/lockfile/src/parsers/pypi.rs b/lockfile/src/parsers/pypi.rs index 86ed643f8..4064e3b5a 100644 --- a/lockfile/src/parsers/pypi.rs +++ b/lockfile/src/parsers/pypi.rs @@ -11,14 +11,15 @@ use nom::Err as NomErr; use phylum_types::types::package::PackageType; use crate::parsers::{self, IResult}; -use crate::{Package, PackageVersion}; +use crate::{Package, PackageVersion, ThirdPartyVersion}; pub fn parse(mut input: &str) -> IResult<&str, Vec> { let mut pkgs = Vec::new(); + let mut registry = None; while !input.is_empty() { // Get the next line. - let (new_input, line) = line(input)?; + let (new_input, line) = line(input, &mut registry)?; input = new_input; // Ignore empty lines. @@ -30,7 +31,7 @@ pub fn parse(mut input: &str) -> IResult<&str, Vec> { let (_, line) = alt((take_until(" #"), rest))(line)?; // Parse dependency. - let (_, pkg) = package(line)?; + let (_, pkg) = package(line, registry)?; pkgs.push(pkg); } @@ -38,7 +39,7 @@ pub fn parse(mut input: &str) -> IResult<&str, Vec> { } /// Parse one line in the lockfile. -fn line(input: &str) -> IResult<&str, &str> { +fn line<'a>(input: &'a str, registry: &mut Option<&'a str>) -> IResult<&'a str, &'a str> { // Take everything until the next newline. // // This takes line continuation characters into account. @@ -52,10 +53,26 @@ fn line(input: &str) -> IResult<&str, &str> { line = ""; } + // Ignore index config options. + // + // Since `ThirdPartyVersion` only allows a single registry, we only record the + // primary one. + if let Some(index_url) = line + .strip_prefix("--index-url") + .and_then(|line| line.strip_prefix(['=', ' '])) + .or_else(|| line.strip_prefix("-i")) + { + *registry = Some(index_url.trim()); + line = ""; + } + if line.starts_with("--extra-index-url") { + line = ""; + } + Ok((input, line)) } -fn package(input: &str) -> IResult<&str, Package> { +fn package<'a>(input: &'a str, registry: Option<&str>) -> IResult<&'a str, Package> { // Ignore everything after `;`. let (_, input) = alt((take_until(";"), rest))(input)?; @@ -85,7 +102,13 @@ fn package(input: &str) -> IResult<&str, Package> { // Parse first-party dependencies. let (input, version) = package_version(input)?; - let version = PackageVersion::FirstParty(version.trim().into()); + let version = match registry { + Some(registry) => PackageVersion::ThirdParty(ThirdPartyVersion { + version: version.trim().into(), + registry: registry.into(), + }), + None => PackageVersion::FirstParty(version.trim().into()), + }; // Ensure line is empty after the dependency. line_done(input)?; diff --git a/lockfile/src/python.rs b/lockfile/src/python.rs index 7cee3eda4..09b84b4f0 100644 --- a/lockfile/src/python.rs +++ b/lockfile/src/python.rs @@ -238,7 +238,7 @@ mod tests { let pkgs = PyRequirements .parse(include_str!("../../tests/fixtures/requirements-locked.txt")) .unwrap(); - assert_eq!(pkgs.len(), 12); + assert_eq!(pkgs.len(), 14); let expected_pkgs = [ Package { @@ -301,6 +301,22 @@ mod tests { version: PackageVersion::Path(Some("/tmp/editable".into())), package_type: PackageType::PyPi, }, + Package { + name: "other-registry-a".into(), + version: PackageVersion::ThirdParty(ThirdPartyVersion { + registry: "https://mirror1.phylum.io/simple/".into(), + version: "3.2.1".into(), + }), + package_type: PackageType::PyPi, + }, + Package { + name: "other-registry".into(), + version: PackageVersion::ThirdParty(ThirdPartyVersion { + registry: "https://mirror2.phylum.io/simple/".into(), + version: "1.2.3".into(), + }), + package_type: PackageType::PyPi, + }, ]; for expected_pkg in expected_pkgs { diff --git a/tests/fixtures/requirements-locked.txt b/tests/fixtures/requirements-locked.txt index 29142950b..07297db1c 100644 --- a/tests/fixtures/requirements-locked.txt +++ b/tests/fixtures/requirements-locked.txt @@ -32,3 +32,10 @@ tomli @ https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a25 -e git+ssh://git@github.com/phylum-dev/phylum-ci.git@7d6d859ad368d1ab0a933f24679e3d3c08a40eac#egg=phylum -e /tmp/editable ; python_version >= "3.7" and python_version < "3.12" + +--index-url https://unused.phylum.io/simple/ +--index-url=https://mirror1.phylum.io/simple/ +other-registry-a==3.2.1 +-ihttps://mirror2.phylum.io/simple/ +--extra-index-url=https://mirror3.phylum.io/simple/ +other-registry==1.2.3