Skip to content

Commit

Permalink
fix: add fallback mechanism for missing mapping (#1166)
Browse files Browse the repository at this point in the history
  • Loading branch information
nichmor authored Apr 11, 2024
1 parent 02c8867 commit 3fadd61
Show file tree
Hide file tree
Showing 9 changed files with 403 additions and 651 deletions.
348 changes: 8 additions & 340 deletions examples/conda_mapping/pixi.lock

Large diffs are not rendered by default.

520 changes: 243 additions & 277 deletions examples/pypi/pixi.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ mod pypi_marker_env;
mod pypi_tags;
mod uv_reporter;

mod pypi_mapping;
pub mod pypi_mapping;

pub use activation::get_activation_env;
pub use lock_file::load_lock_file;
Expand Down
28 changes: 22 additions & 6 deletions src/lock_file/package_identifier.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::project::manifest::python::PyPiPackageName;
use crate::{project::manifest::python::PyPiPackageName, pypi_mapping};
use pep508_rs::{Requirement, VersionOrUrl};
use rattler_conda_types::{PackageUrl, RepoDataRecord};
use std::{collections::HashSet, str::FromStr};
Expand Down Expand Up @@ -32,17 +32,33 @@ impl PypiPackageIdentifier {
result: &mut Vec<Self>,
) -> Result<(), ConversionError> {
// Check the PURLs for a python package.
let mut has_pypi_purl = false;
for purl in record.package_record.purls.iter() {
if let Some(entry) = Self::try_from_purl(purl, &record.package_record.version.as_str())?
{
result.push(entry);
has_pypi_purl = true;
}
}

// If there is no pypi purl, but the package is a conda-forge package, we just assume that
// the name of the package is equivalent to the name of the python package.
if !has_pypi_purl && pypi_mapping::is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
let name = PackageName::from_str(record.package_record.name.as_source()).ok();
let version =
pep440_rs::Version::from_str(&record.package_record.version.as_str()).ok();
if let (Some(name), Some(version)) = (name, version) {
result.push(PypiPackageIdentifier {
name: PyPiPackageName::from_normalized(name),
version,
url: record.url.clone(),
// TODO: We can't really tell which python extras are enabled in a conda package.
extras: Default::default(),
})
}
}
// // In previous implementation of this function
// // If there was no pypi purl, but the package was a conda-forge package, we assumed that
// // the name of the package is equivalent to the name of the python package.
// // This could potentially lead to issues similar with pandoc https://github.com/prefix-dev/pixi/issues/710
// // With new approach, we add them *only* if they are present in our mapping.

Ok(())
}
Expand Down
56 changes: 47 additions & 9 deletions src/pypi_mapping/custom_pypi_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,44 @@ use miette::{Context, IntoDiagnostic};
use rattler_conda_types::{PackageUrl, RepoDataRecord};
use reqwest_middleware::ClientWithMiddleware;
use std::{collections::HashMap, sync::Arc};
use url::Url;

use async_once_cell::OnceCell;

use crate::pypi_mapping::MappingLocation;

use super::{prefix_pypi_name_mapping, MappingMap, Reporter};

pub async fn fetch_mapping_from_url(
client: &ClientWithMiddleware,
url: &Url,
) -> miette::Result<HashMap<String, String>> {
let response = client
.get(url.clone())
.send()
.await
.into_diagnostic()
.context(format!(
"failed to download pypi mapping from {} location",
url.as_str()
))?;

if !response.status().is_success() {
return Err(miette::miette!(
"Could not request mapping located at {:?}",
url.as_str()
));
}

let mapping_by_name: HashMap<String, String> =
response.json().await.into_diagnostic().context(format!(
"failed to parse pypi name mapping located at {}. Please make sure that it's a valid json",
url
))?;

Ok(mapping_by_name)
}

pub async fn fetch_custom_mapping(
client: &ClientWithMiddleware,
mapping_url: &MappingMap,
Expand All @@ -29,18 +60,19 @@ pub async fn fetch_custom_mapping(
.send()
.await
.into_diagnostic()
.context(format!("failed to download pypi mapping from {} location", url.as_str()))?;
.context(format!(
"failed to download pypi mapping from {} location",
url.as_str()
))?;

if !response.status().is_success() {
return Err(miette::miette!("Could not request mapping located at {:?}", url.as_str()));
return Err(miette::miette!(
"Could not request mapping located at {:?}",
url.as_str()
));
}

let mapping_by_name: HashMap<String, String> = response
.json()
.await
.into_diagnostic()
.context(format!("failed to parse pypi name mapping located at {}. Please make sure that it's a valid json", url))
?;
let mapping_by_name = fetch_mapping_from_url(client, url).await?;

mapping_url_to_name.insert(name.to_string(), mapping_by_name);
}
Expand Down Expand Up @@ -82,12 +114,18 @@ pub async fn amend_pypi_purls(
reporter,
)
.await?;
let compressed_mapping =
prefix_pypi_name_mapping::conda_pypi_name_compressed_mapping(client).await?;

let custom_mapping = fetch_custom_mapping(client, mapping_url).await?;

for record in conda_packages.iter_mut() {
if !mapping_url.contains_key(&record.channel) {
prefix_pypi_name_mapping::amend_pypi_purls_for_record(record, &prefix_mapping)?;
prefix_pypi_name_mapping::amend_pypi_purls_for_record(
record,
&prefix_mapping,
&compressed_mapping,
)?;
} else {
amend_pypi_purls_for_record(record, custom_mapping)?;
}
Expand Down
14 changes: 12 additions & 2 deletions src/pypi_mapping/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, path::PathBuf, sync::Arc};
use std::{collections::HashMap, path::PathBuf, str::FromStr, sync::Arc};

use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
use rattler_conda_types::RepoDataRecord;
Expand All @@ -9,7 +9,7 @@ use url::Url;
use crate::config::get_cache_dir;

mod custom_pypi_mapping;
mod prefix_pypi_name_mapping;
pub mod prefix_pypi_name_mapping;

pub trait Reporter: Send + Sync {
fn download_started(&self, package: &RepoDataRecord, total: usize);
Expand Down Expand Up @@ -68,3 +68,13 @@ pub async fn amend_pypi_purls(

Ok(())
}

/// Returns `true` if the specified record refers to a conda-forge package.
pub fn is_conda_forge_record(record: &RepoDataRecord) -> bool {
Url::from_str(&record.channel).map_or(false, |u| is_conda_forge_url(&u))
}

/// Returns `true` if the specified url refers to a conda-forge channel.
pub fn is_conda_forge_url(url: &Url) -> bool {
url.path().starts_with("/conda-forge")
}
43 changes: 30 additions & 13 deletions src/pypi_mapping/prefix_pypi_name_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@ use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::Semaphore;
use url::Url;

use super::Reporter;
use super::{custom_pypi_mapping, Reporter};

const STORAGE_URL: &str = "https://conda-mapping.prefix.dev";
const HASH_DIR: &str = "hash-v0";
const COMPRESSED_MAPPING: &str =
"https://raw.githubusercontent.com/prefix-dev/parselmouth/main/files/mapping_as_grayskull.json";

#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Package {
Expand Down Expand Up @@ -137,15 +140,26 @@ pub async fn conda_pypi_name_mapping(
Ok(result_map)
}

/// Downloads and caches prefix.dev conda-pypi mapping.
pub async fn conda_pypi_name_compressed_mapping(
client: &ClientWithMiddleware,
) -> miette::Result<HashMap<String, String>> {
let compressed_mapping_url =
Url::parse(COMPRESSED_MAPPING).expect("COMPRESSED_MAPPING static variable should be valid");

custom_pypi_mapping::fetch_mapping_from_url(client, &compressed_mapping_url).await
}

/// Amend the records with pypi purls if they are not present yet.
pub async fn amend_pypi_purls(
client: &ClientWithMiddleware,
conda_packages: &mut [RepoDataRecord],
reporter: Option<Arc<dyn Reporter>>,
) -> miette::Result<()> {
let conda_mapping = conda_pypi_name_mapping(client, conda_packages, reporter).await?;
let compressed_mapping = conda_pypi_name_compressed_mapping(client).await?;
for record in conda_packages.iter_mut() {
amend_pypi_purls_for_record(record, &conda_mapping)?;
amend_pypi_purls_for_record(record, &conda_mapping, &compressed_mapping)?;
}

Ok(())
Expand All @@ -158,6 +172,7 @@ pub async fn amend_pypi_purls(
pub fn amend_pypi_purls_for_record(
record: &mut RepoDataRecord,
conda_forge_mapping: &HashMap<Sha256Hash, Package>,
compressed_mapping: &HashMap<String, String>,
) -> miette::Result<()> {
// If the package already has a pypi name we can stop here.
if record
Expand All @@ -171,21 +186,23 @@ pub fn amend_pypi_purls_for_record(

if let Some(sha256) = record.package_record.sha256 {
if let Some(mapped_name) = conda_forge_mapping.get(&sha256) {
if let Some(pypi_names_with_versions) = &mapped_name.versions {
for (pypi_name, pypi_version) in pypi_names_with_versions {
let mut purl = PackageUrl::builder(String::from("pypi"), pypi_name);
// sometimes packages are mapped to 0.0.0
// we don't want this version because we can't prove if it's actual or not
let pypi_version_str = pypi_version.to_string();
if pypi_version_str != "0.0.0" {
purl = purl.with_version(pypi_version_str);
};

let built_purl = purl.build().expect("valid pypi package url and version");
if let Some(pypi_names) = &mapped_name.versions {
for pypi_name in pypi_names.keys() {
let purl = PackageUrl::builder(String::from("pypi"), pypi_name);
let built_purl = purl.build().expect("valid pypi package url");
record.package_record.purls.push(built_purl);
}
}
} else if let Some(mapped_name) =
compressed_mapping.get(record.package_record.name.as_normalized())
{
// maybe the packages is not yet updated
// so fallback to the one from compressed mapping
let purl = PackageUrl::builder(String::from("pypi"), mapped_name);
let built_purl = purl.build().expect("valid pypi package url");
record.package_record.purls.push(built_purl);
}
// nothing was matched so we don't add purls for it
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion tests/common/package_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ impl PackageDatabase {
/// Description of a package.
#[derive(Clone, Debug)]
pub struct Package {
package_record: PackageRecord,
pub package_record: PackageRecord,
subdir: Platform,
archive_type: ArchiveType,
}
Expand Down
41 changes: 39 additions & 2 deletions tests/solve_group_tests.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::str::FromStr;
use std::{collections::HashMap, str::FromStr};

use crate::common::{
package_database::{Package, PackageDatabase},
LockFileExt, PixiControl,
};
use rattler_conda_types::{PackageName, Platform};
use pixi::pypi_mapping;
use rattler_conda_types::{PackageName, Platform, RepoDataRecord};
use rattler_lock::DEFAULT_ENVIRONMENT_NAME;
use serial_test::serial;
use tempfile::TempDir;
Expand Down Expand Up @@ -148,3 +149,39 @@ async fn test_purl_are_added_for_pypi() {
"boltons"
));
}

#[tokio::test]
async fn test_compressed_mapping_catch_missing_package() {
let pixi = PixiControl::new().unwrap();
pixi.init().await.unwrap();

let project = pixi.project().unwrap();
let client = project.authenticated_client();
let foo_bar_package = Package::build("foo-bar-car", "2").finish();

let mut repo_data_record = RepoDataRecord {
package_record: foo_bar_package.package_record,
file_name: "foo-bar-car".to_owned(),
url: Url::parse("https://pypi.org/simple/boltons/").unwrap(),
channel: "dummy-channel".to_owned(),
};

let packages = vec![repo_data_record.clone()];

let conda_mapping =
pypi_mapping::prefix_pypi_name_mapping::conda_pypi_name_mapping(client, &packages, None)
.await
.unwrap();
let compressed_mapping = HashMap::from([("foo-bar-car".to_owned(), "my-test-name".to_owned())]);

pypi_mapping::prefix_pypi_name_mapping::amend_pypi_purls_for_record(
&mut repo_data_record,
&conda_mapping,
&compressed_mapping,
)
.unwrap();

let first_purl = repo_data_record.package_record.purls.pop().unwrap();

assert!(first_purl.name() == "my-test-name")
}

0 comments on commit 3fadd61

Please sign in to comment.