Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: conda transitive dependencies of pypi packages are properly extracted #967

Merged
merged 5 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ human_bytes = "0.4.3"
humantime = "2.1.0"
indexmap = { version = "2.2.5", features = ["serde"] }
indicatif = "0.17.8"
insta = { version = "1.36.1", features = ["yaml"] }

install-wheel-rs = { git = "https://github.com/astral-sh/uv", tag = "0.1.16" }
is_executable = "1.0.1"
Expand Down Expand Up @@ -145,6 +144,7 @@ libc = { version = "0.2.153", default-features = false }
signal-hook = "0.3.17"

[dev-dependencies]
insta = { version = "1.36.1", features = ["yaml", "glob"] }
rattler_digest = "0.19.1"
rstest = "0.18.2"
serde_json = "1.0.114"
Expand Down
49 changes: 0 additions & 49 deletions src/lock_file/package_identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use crate::project::manifest::python::PyPiPackageName;
use crate::pypi_name_mapping;
use pep508_rs::{Requirement, VersionOrUrl};
use rattler_conda_types::{PackageUrl, RepoDataRecord};
use rattler_lock::CondaPackage;
use std::{collections::HashSet, str::FromStr};
use thiserror::Error;
use uv_normalize::{ExtraName, InvalidNameError, PackageName};
Expand All @@ -24,54 +23,6 @@ impl PypiPackageIdentifier {
Ok(result)
}

/// Constructs a new instance from a locked Pypi dependency.
pub fn from_locked_pypi_dependency(
package: &rattler_lock::PypiPackage,
) -> Result<Self, ConversionError> {
Ok(Self {
name: PyPiPackageName::from_normalized(package.data().package.name.clone()),
version: package.data().package.version.clone(),
extras: package.extras().iter().cloned().collect(),
})
}

/// Determine the python packages that will be installed when the specified locked dependency is
/// installed.
pub fn from_locked_conda_dependency(
package: &CondaPackage,
) -> Result<Vec<Self>, ConversionError> {
let record = package.package_record();
let mut result = Vec::new();

// Get the PyPI urls from the package
let mut has_pypi_purl = false;
for purl in record.purls.iter() {
if let Some(entry) = Self::try_from_purl(purl, &record.version.as_str())? {
result.push(entry);
has_pypi_purl = true;
}
}

// If there is no pypi purl, but the package is a conda-forge package, we just assume that
// the name of the package is equivalent to the name of the python package.
if !has_pypi_purl && pypi_name_mapping::is_conda_forge_url(package.url()) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
let name = PackageName::from_str(record.name.as_normalized()).ok();
let version = pep440_rs::Version::from_str(&record.version.as_str()).ok();
if let (Some(name), Some(version)) = (name, version) {
result.push(PypiPackageIdentifier {
name: PyPiPackageName::from_normalized(name),
version,
// TODO: We can't really tell which python extras are enabled in a conda package.
extras: Default::default(),
});
}
}

Ok(result)
}

/// Helper function to write the result of extract the python packages that will be installed
/// into a pre-allocated vector.
fn from_record_into(
Expand Down
192 changes: 119 additions & 73 deletions src/lock_file/records_by_name.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::lock_file::{PypiPackageIdentifier, PypiRecord};
use crate::pypi_tags::is_python_record;
use rattler_conda_types::{PackageName, RepoDataRecord};
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::hash::Hash;

/// A struct that holds both a ``Vec` of `RepoDataRecord` and a mapping from name to index.
Expand Down Expand Up @@ -33,11 +34,59 @@ impl RepoDataRecordsByName {
self.by_name.get(key).map(|idx| &self.records[*idx])
}

/// Returns the index of the record with the given name or `None` if no such record exists.
pub fn index_by_name<Q: ?Sized>(&self, key: &Q) -> Option<usize>
where
PackageName: Borrow<Q>,
Q: Hash + Eq,
{
self.by_name.get(key).copied()
}

/// Returns the record that represents the python interpreter or `None` if no such record exists.
pub fn python_interpreter_record(&self) -> Option<&RepoDataRecord> {
self.records.iter().find(|record| is_python_record(*record))
}

/// Returns true if there are no records stored in this instance
pub fn is_empty(&self) -> bool {
self.records.is_empty()
}

/// Returns the number of entries in the mapping.
pub fn len(&self) -> usize {
self.records.len()
}

/// Converts this instance into the internally stored records.
pub fn into_inner(self) -> Vec<RepoDataRecord> {
self.records
}

/// Constructs a new instance from an iterator of repodata records. If multiple records exist
/// for the same package name an error is returned.
pub fn from_unique_iter<I: IntoIterator<Item = RepoDataRecord>>(
iter: I,
) -> Result<Self, Box<RepoDataRecord>> {
let iter = iter.into_iter();
let min_size = iter.size_hint().0;
let mut by_name = HashMap::with_capacity(min_size);
let mut records = Vec::with_capacity(min_size);
for record in iter {
match by_name.entry(record.package_record.name.clone()) {
Entry::Vacant(entry) => {
let idx = records.len();
records.push(record);
entry.insert(idx);
}
Entry::Occupied(_) => {
return Err(Box::new(record));
}
}
}
Ok(Self { records, by_name })
}

/// Constructs a new instance from an iterator of repodata records. The records are
/// deduplicated where the record with the highest version wins.
pub fn from_iter<I: IntoIterator<Item = RepoDataRecord>>(iter: I) -> Self {
Expand Down Expand Up @@ -65,43 +114,28 @@ impl RepoDataRecordsByName {
Self { records, by_name }
}

/// Constructs a subset of the records in this set that only contain the packages with the given
/// names and recursively their dependencies.
pub fn subset(
/// Convert the records into a map of pypi package identifiers mapped to the records they were
/// extracted from.
pub fn by_pypi_name(
&self,
package_names: impl IntoIterator<Item = PackageName>,
virtual_packages: &HashSet<PackageName>,
) -> Self {
let mut queue = package_names.into_iter().collect::<Vec<_>>();
let mut queued_names = queue.iter().cloned().collect::<HashSet<_>>();
let mut records = Vec::new();
let mut by_name = HashMap::new();
while let Some(package) = queue.pop() {
// Find the record in the superset of records
let found_package = if virtual_packages.contains(&package) {
continue;
} else if let Some(record) = self.by_name(&package) {
record
} else {
continue;
};

// Find all the dependencies of the package and add them to the queue
for dependency in found_package.package_record.depends.iter() {
let dependency_name = PackageName::new_unchecked(
dependency.split_once(' ').unwrap_or((&dependency, "")).0,
);
if queued_names.insert(dependency_name.clone()) {
queue.push(dependency_name);
}
}

let idx = records.len();
by_name.insert(package, idx);
records.push(found_package.clone());
}

Self { records, by_name }
) -> HashMap<uv_normalize::PackageName, (PypiPackageIdentifier, usize, &RepoDataRecord)> {
self.records
.iter()
.enumerate()
.filter_map(|(idx, record)| {
PypiPackageIdentifier::from_record(record)
.ok()
.map(move |identifiers| (idx, record, identifiers))
})
.flat_map(|(idx, record, identifiers)| {
identifiers.into_iter().map(move |identifier| {
(
identifier.name.as_normalized().clone(),
(identifier, idx, record),
)
})
})
.collect()
}
}

Expand All @@ -121,11 +155,59 @@ impl PypiRecordsByName {
self.by_name.get(key).map(|idx| &self.records[*idx])
}

/// Returns the index of the record with the given name or `None` if no such record exists.
pub fn index_by_name<Q: ?Sized>(&self, key: &Q) -> Option<usize>
where
uv_normalize::PackageName: Borrow<Q>,
Q: Hash + Eq,
{
self.by_name.get(key).copied()
}

/// Returns true if there are no records stored in this instance
pub fn is_empty(&self) -> bool {
self.records.is_empty()
}

/// Returns the number of entries in the mapping.
pub fn len(&self) -> usize {
self.records.len()
}

/// Returns an iterator over the names of the records stored in this instance.
pub fn names(&self) -> impl Iterator<Item = &uv_normalize::PackageName> {
self.by_name.keys()
}

/// Converts this instance into the internally stored records.
pub fn into_inner(self) -> Vec<PypiRecord> {
self.records
}

/// Constructs a new instance from an iterator of pypi records. If multiple records exist
/// for the same package name an error is returned.
pub fn from_unique_iter<I: IntoIterator<Item = PypiRecord>>(
iter: I,
) -> Result<Self, PypiRecord> {
let iter = iter.into_iter();
let min_size = iter.size_hint().0;
let mut by_name = HashMap::with_capacity(min_size);
let mut records = Vec::with_capacity(min_size);
for record in iter {
match by_name.entry(record.0.name.clone()) {
Entry::Vacant(entry) => {
let idx = records.len();
records.push(record);
entry.insert(idx);
}
Entry::Occupied(_) => {
return Err(record);
}
}
}
Ok(Self { records, by_name })
}

/// Constructs a new instance from an iterator of repodata records. The records are
/// deduplicated where the record with the highest version wins.
pub fn from_iter<I: IntoIterator<Item = PypiRecord>>(iter: I) -> Self {
Expand All @@ -152,40 +234,4 @@ impl PypiRecordsByName {

Self { records, by_name }
}

/// Constructs a subset of the records in this set that only contain the packages with the given
/// names and recursively their dependencies.
pub fn subset(
&self,
package_names: impl IntoIterator<Item = uv_normalize::PackageName>,
conda_package_identifiers: &HashMap<uv_normalize::PackageName, PypiPackageIdentifier>,
) -> Self {
let mut queue = package_names.into_iter().collect::<Vec<_>>();
let mut queued_names = queue.iter().cloned().collect::<HashSet<_>>();
let mut records = Vec::new();
let mut by_name = HashMap::new();
while let Some(package) = queue.pop() {
// Find the record in the superset of records
let found_package = if conda_package_identifiers.contains_key(&package) {
continue;
} else if let Some(record) = self.by_name(&package) {
record
} else {
continue;
};

// Find all the dependencies of the package and add them to the queue
for dependency in found_package.0.requires_dist.iter() {
if queued_names.insert(dependency.name.clone()) {
queue.push(dependency.name.clone());
}
}

let idx = records.len();
by_name.insert(package, idx);
records.push(found_package.clone());
}

Self { records, by_name }
}
}
Loading
Loading