Skip to content

Commit

Permalink
fix: conda transitive dependencies of pypi packages are properly extr…
Browse files Browse the repository at this point in the history
…acted
  • Loading branch information
baszalmstra committed Mar 12, 2024
1 parent a6a7fa4 commit 1333292
Show file tree
Hide file tree
Showing 22 changed files with 6,327 additions and 636 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ human_bytes = "0.4.3"
humantime = "2.1.0"
indexmap = { version = "2.2.5", features = ["serde"] }
indicatif = "0.17.8"
insta = { version = "1.36.1", features = ["yaml"] }

install-wheel-rs = { git = "https://github.com/astral-sh/uv", tag = "0.1.16" }
is_executable = "1.0.1"
Expand Down Expand Up @@ -151,6 +150,8 @@ serde_json = "1.0.114"
serial_test = "3.0.0"
tokio = { version = "1.36.0", features = ["rt"] }
toml = "0.8.10"
insta = { version = "1.36.1", features = ["yaml", "glob"] }


[patch.crates-io]
# For pyproject-toml
Expand Down
95 changes: 47 additions & 48 deletions src/lock_file/package_identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use crate::project::manifest::python::PyPiPackageName;
use crate::pypi_name_mapping;
use pep508_rs::{Requirement, VersionOrUrl};
use rattler_conda_types::{PackageUrl, RepoDataRecord};
use rattler_lock::CondaPackage;
use std::{collections::HashSet, str::FromStr};
use thiserror::Error;
use uv_normalize::{ExtraName, InvalidNameError, PackageName};
Expand All @@ -24,53 +23,53 @@ impl PypiPackageIdentifier {
Ok(result)
}

/// Constructs a new instance from a locked Pypi dependency.
pub fn from_locked_pypi_dependency(
package: &rattler_lock::PypiPackage,
) -> Result<Self, ConversionError> {
Ok(Self {
name: PyPiPackageName::from_normalized(package.data().package.name.clone()),
version: package.data().package.version.clone(),
extras: package.extras().iter().cloned().collect(),
})
}

/// Determine the python packages that will be installed when the specified locked dependency is
/// installed.
pub fn from_locked_conda_dependency(
package: &CondaPackage,
) -> Result<Vec<Self>, ConversionError> {
let record = package.package_record();
let mut result = Vec::new();

// Get the PyPI urls from the package
let mut has_pypi_purl = false;
for purl in record.purls.iter() {
if let Some(entry) = Self::try_from_purl(purl, &record.version.as_str())? {
result.push(entry);
has_pypi_purl = true;
}
}

// If there is no pypi purl, but the package is a conda-forge package, we just assume that
// the name of the package is equivalent to the name of the python package.
if !has_pypi_purl && pypi_name_mapping::is_conda_forge_url(package.url()) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
let name = PackageName::from_str(record.name.as_normalized()).ok();
let version = pep440_rs::Version::from_str(&record.version.as_str()).ok();
if let (Some(name), Some(version)) = (name, version) {
result.push(PypiPackageIdentifier {
name: PyPiPackageName::from_normalized(name),
version,
// TODO: We can't really tell which python extras are enabled in a conda package.
extras: Default::default(),
});
}
}

Ok(result)
}
// /// Constructs a new instance from a locked Pypi dependency.
// pub fn from_locked_pypi_dependency(
// package: &rattler_lock::PypiPackage,
// ) -> Result<Self, ConversionError> {
// Ok(Self {
// name: PyPiPackageName::from_normalized(package.data().package.name.clone()),
// version: package.data().package.version.clone(),
// extras: package.extras().iter().cloned().collect(),
// })
// }
//
// /// Determine the python packages that will be installed when the specified locked dependency is
// /// installed.
// pub fn from_locked_conda_dependency(
// package: &CondaPackage,
// ) -> Result<Vec<Self>, ConversionError> {
// let record = package.package_record();
// let mut result = Vec::new();
//
// // Get the PyPI urls from the package
// let mut has_pypi_purl = false;
// for purl in record.purls.iter() {
// if let Some(entry) = Self::try_from_purl(purl, &record.version.as_str())? {
// result.push(entry);
// has_pypi_purl = true;
// }
// }
//
// // If there is no pypi purl, but the package is a conda-forge package, we just assume that
// // the name of the package is equivalent to the name of the python package.
// if !has_pypi_purl && pypi_name_mapping::is_conda_forge_url(package.url()) {
// // Convert the conda package names to pypi package names. If the conversion fails we
// // just assume that its not a valid python package.
// let name = PackageName::from_str(record.name.as_normalized()).ok();
// let version = pep440_rs::Version::from_str(&record.version.as_str()).ok();
// if let (Some(name), Some(version)) = (name, version) {
// result.push(PypiPackageIdentifier {
// name: PyPiPackageName::from_normalized(name),
// version,
// // TODO: We can't really tell which python extras are enabled in a conda package.
// extras: Default::default(),
// });
// }
// }
//
// Ok(result)
// }

/// Helper function to write the result of extract the python packages that will be installed
/// into a pre-allocated vector.
Expand Down
192 changes: 119 additions & 73 deletions src/lock_file/records_by_name.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::lock_file::{PypiPackageIdentifier, PypiRecord};
use crate::pypi_tags::is_python_record;
use rattler_conda_types::{PackageName, RepoDataRecord};
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::hash::Hash;

/// A struct that holds both a ``Vec` of `RepoDataRecord` and a mapping from name to index.
Expand Down Expand Up @@ -33,11 +34,59 @@ impl RepoDataRecordsByName {
self.by_name.get(key).map(|idx| &self.records[*idx])
}

/// Returns the index of the record with the given name or `None` if no such record exists.
pub fn index_by_name<Q: ?Sized>(&self, key: &Q) -> Option<usize>
where
PackageName: Borrow<Q>,
Q: Hash + Eq,
{
self.by_name.get(key).copied()
}

/// Returns the record that represents the python interpreter or `None` if no such record exists.
pub fn python_interpreter_record(&self) -> Option<&RepoDataRecord> {
self.records.iter().find(|record| is_python_record(*record))
}

/// Returns true if there are no records stored in this instance
pub fn is_empty(&self) -> bool {
self.records.is_empty()
}

/// Returns the number of entries in the mapping.
pub fn len(&self) -> usize {
self.records.len()
}

/// Converts this instance into the internally stored records.
pub fn into_inner(self) -> Vec<RepoDataRecord> {
self.records
}

/// Constructs a new instance from an iterator of repodata records. If multiple records exist
/// for the same package name an error is returned.
pub fn from_unique_iter<I: IntoIterator<Item = RepoDataRecord>>(
iter: I,
) -> Result<Self, Box<RepoDataRecord>> {
let iter = iter.into_iter();
let min_size = iter.size_hint().0;
let mut by_name = HashMap::with_capacity(min_size);
let mut records = Vec::with_capacity(min_size);
for record in iter {
match by_name.entry(record.package_record.name.clone()) {
Entry::Vacant(entry) => {
let idx = records.len();
records.push(record);
entry.insert(idx);
}
Entry::Occupied(_) => {
return Err(Box::new(record));
}
}
}
Ok(Self { records, by_name })
}

/// Constructs a new instance from an iterator of repodata records. The records are
/// deduplicated where the record with the highest version wins.
pub fn from_iter<I: IntoIterator<Item = RepoDataRecord>>(iter: I) -> Self {
Expand Down Expand Up @@ -65,43 +114,28 @@ impl RepoDataRecordsByName {
Self { records, by_name }
}

/// Constructs a subset of the records in this set that only contain the packages with the given
/// names and recursively their dependencies.
pub fn subset(
/// Convert the records into a map of pypi package identifiers mapped to the records they were
/// extracted from.
pub fn by_pypi_name(
&self,
package_names: impl IntoIterator<Item = PackageName>,
virtual_packages: &HashSet<PackageName>,
) -> Self {
let mut queue = package_names.into_iter().collect::<Vec<_>>();
let mut queued_names = queue.iter().cloned().collect::<HashSet<_>>();
let mut records = Vec::new();
let mut by_name = HashMap::new();
while let Some(package) = queue.pop() {
// Find the record in the superset of records
let found_package = if virtual_packages.contains(&package) {
continue;
} else if let Some(record) = self.by_name(&package) {
record
} else {
continue;
};

// Find all the dependencies of the package and add them to the queue
for dependency in found_package.package_record.depends.iter() {
let dependency_name = PackageName::new_unchecked(
dependency.split_once(' ').unwrap_or((&dependency, "")).0,
);
if queued_names.insert(dependency_name.clone()) {
queue.push(dependency_name);
}
}

let idx = records.len();
by_name.insert(package, idx);
records.push(found_package.clone());
}

Self { records, by_name }
) -> HashMap<uv_normalize::PackageName, (PypiPackageIdentifier, usize, &RepoDataRecord)> {
self.records
.iter()
.enumerate()
.filter_map(|(idx, record)| {
PypiPackageIdentifier::from_record(record)
.ok()
.map(move |identifiers| (idx, record, identifiers))
})
.flat_map(|(idx, record, identifiers)| {
identifiers.into_iter().map(move |identifier| {
(
identifier.name.as_normalized().clone(),
(identifier, idx, record),
)
})
})
.collect()
}
}

Expand All @@ -121,11 +155,59 @@ impl PypiRecordsByName {
self.by_name.get(key).map(|idx| &self.records[*idx])
}

/// Returns the index of the record with the given name or `None` if no such record exists.
pub fn index_by_name<Q: ?Sized>(&self, key: &Q) -> Option<usize>
where
uv_normalize::PackageName: Borrow<Q>,
Q: Hash + Eq,
{
self.by_name.get(key).copied()
}

/// Returns true if there are no records stored in this instance
pub fn is_empty(&self) -> bool {
self.records.is_empty()
}

/// Returns the number of entries in the mapping.
pub fn len(&self) -> usize {
self.records.len()
}

/// Returns an iterator over the names of the records stored in this instance.
pub fn names(&self) -> impl Iterator<Item = &uv_normalize::PackageName> {
self.by_name.keys()
}

/// Converts this instance into the internally stored records.
pub fn into_inner(self) -> Vec<PypiRecord> {
self.records
}

/// Constructs a new instance from an iterator of pypi records. If multiple records exist
/// for the same package name an error is returned.
pub fn from_unique_iter<I: IntoIterator<Item = PypiRecord>>(
iter: I,
) -> Result<Self, PypiRecord> {
let iter = iter.into_iter();
let min_size = iter.size_hint().0;
let mut by_name = HashMap::with_capacity(min_size);
let mut records = Vec::with_capacity(min_size);
for record in iter {
match by_name.entry(record.0.name.clone()) {
Entry::Vacant(entry) => {
let idx = records.len();
records.push(record);
entry.insert(idx);
}
Entry::Occupied(_) => {
return Err(record);
}
}
}
Ok(Self { records, by_name })
}

/// Constructs a new instance from an iterator of repodata records. The records are
/// deduplicated where the record with the highest version wins.
pub fn from_iter<I: IntoIterator<Item = PypiRecord>>(iter: I) -> Self {
Expand All @@ -152,40 +234,4 @@ impl PypiRecordsByName {

Self { records, by_name }
}

/// Constructs a subset of the records in this set that only contain the packages with the given
/// names and recursively their dependencies.
pub fn subset(
&self,
package_names: impl IntoIterator<Item = uv_normalize::PackageName>,
conda_package_identifiers: &HashMap<uv_normalize::PackageName, PypiPackageIdentifier>,
) -> Self {
let mut queue = package_names.into_iter().collect::<Vec<_>>();
let mut queued_names = queue.iter().cloned().collect::<HashSet<_>>();
let mut records = Vec::new();
let mut by_name = HashMap::new();
while let Some(package) = queue.pop() {
// Find the record in the superset of records
let found_package = if conda_package_identifiers.contains_key(&package) {
continue;
} else if let Some(record) = self.by_name(&package) {
record
} else {
continue;
};

// Find all the dependencies of the package and add them to the queue
for dependency in found_package.0.requires_dist.iter() {
if queued_names.insert(dependency.name.clone()) {
queue.push(dependency.name.clone());
}
}

let idx = records.len();
by_name.insert(package, idx);
records.push(found_package.clone());
}

Self { records, by_name }
}
}
Loading

0 comments on commit 1333292

Please sign in to comment.