From 9c2b2aad48a18f80adaa328cfedb78327d9d238b Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Wed, 30 Oct 2024 17:19:22 +0100 Subject: [PATCH] revamp search --- Cargo.lock | 36 ++++- Cargo.toml | 2 + src/cli/search.rs | 389 +++++++++++----------------------------------- 3 files changed, 127 insertions(+), 300 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a67d68e18..4ee8a4ff7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -582,6 +582,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bytecount" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" + [[package]] name = "byteorder" version = "1.5.0" @@ -1705,9 +1711,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "globset" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" dependencies = [ "aho-corasick", "bstr", @@ -3101,6 +3107,17 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb37767f6569cd834a413442455e0f066d0d522de8630436e2a1761d9726ba56" +[[package]] +name = "papergrid" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7419ad52a7de9b60d33e11085a0fe3df1fbd5926aa3f93d3dd53afbc9e86725" +dependencies = [ + "bytecount", + "fnv", + "unicode-width", +] + [[package]] name = "parking" version = "2.2.1" @@ -3357,6 +3374,7 @@ dependencies = [ "fs-err", "fs_extra", "futures", + "globset", "http 1.1.0", "human_bytes", "humantime", @@ -3409,6 +3427,7 @@ dependencies = [ "shlex", "signal-hook", "strsim", + "tabled", "tabwriter", "tar", "tempfile", @@ -5573,6 +5592,15 @@ dependencies = [ "libc", ] +[[package]] +name = "tabled" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c9303ee60b9bedf722012ea29ae3711ba13a67c9b9ae28993838b63057cb1b" +dependencies = [ + "papergrid", +] + [[package]] name = "tabwriter" version = "1.4.0" @@ -6005,9 +6033,9 @@ dependencies = [ [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unsafe-libyaml" diff --git a/Cargo.toml b/Cargo.toml index 5f5459fc6..e8e45b4cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -279,6 +279,8 @@ uv-resolver = { workspace = true } uv-types = { workspace = true } xxhash-rust = { workspace = true } zip = { workspace = true, features = ["deflate", "time"] } +tabled = { version = "0.16.0", default-features = false, features = ["std"] } +globset = "0.4.15" [target.'cfg(unix)'.dependencies] diff --git a/src/cli/search.rs b/src/cli/search.rs index ea882b906..dcc9c8254 100644 --- a/src/cli/search.rs +++ b/src/cli/search.rs @@ -5,6 +5,7 @@ use std::io::{self, Write}; use std::str::FromStr; use clap::Parser; +use ignore::gitignore::Glob; use itertools::Itertools; use miette::IntoDiagnostic; use pixi_config::default_channel_config; @@ -15,6 +16,7 @@ use rattler_conda_types::{PackageName, Platform, RepoDataRecord}; use rattler_repodata_gateway::{GatewayError, RepoData}; use regex::Regex; use strsim::jaro; +use tabled::settings::style::{HorizontalLine, VerticalLine}; use url::Url; use crate::cli::cli_config::ProjectConfig; @@ -31,7 +33,7 @@ use super::cli_config::ChannelsConfig; pub struct Args { /// Name of package to search #[arg(required = true)] - pub package: String, + pub search_spec: String, #[clap(flatten)] channels: ChannelsConfig, @@ -97,8 +99,78 @@ where Ok(latest_packages) } +use tabled::{builder::Builder, settings::Style}; + +/// Print a beautiful table of repodata records using tabled +fn print_table(records: &[RepoDataRecord], group_by_version: bool) { + let mut builder = Builder::default(); + + let style = Style::modern() + .horizontals([(1, HorizontalLine::inherit(Style::modern()).horizontal('═'))]) + .remove_frame() + .remove_horizontal() + .remove_vertical(); + + // header line + builder.push_record(vec!["Name", "Version", "Build", "Channel", "Subdir"]); + + if group_by_version { + // Group records by version + let mut version_groups: HashMap> = HashMap::new(); + for record in records { + version_groups + .entry(record.package_record.version.to_string()) + .or_default() + .push(record); + } + + for (version, records) in version_groups + .iter() + .sorted_by(|a, b| a.0.cmp(b.0)) + { + // Sort records within version group + let mut records = records.to_vec(); + records.sort_by(|a, b| a.package_record.build.cmp(&b.package_record.build)); + + // Take first record to display version info + let first = records[0]; + let build_count = if records.len() > 1 { + format!("{} (+{})", first.package_record.build, records.len() - 1) + } else { + first.package_record.build.to_string() + }; + + let row = vec![ + first.package_record.name.as_normalized().to_string(), + version.to_string(), + build_count, + first.channel.to_string(), + first.package_record.subdir.to_string(), + ]; + builder.push_record(row); + } + } else { + // Original non-grouped display + for record in records + .iter() + .sorted_by(|a, b| a.package_record.version.cmp(&b.package_record.version)) + { + let row = vec![ + record.package_record.name.as_normalized().to_string(), + record.package_record.version.to_string(), + record.package_record.build.to_string(), + record.channel.to_string(), + record.package_record.subdir.to_string(), + ]; + builder.push_record(row); + } + } + + let mut table = builder.build(); + println!("{}", table.with(style)); +} + pub async fn execute(args: Args) -> miette::Result<()> { - let stdout = io::stdout(); let project = Project::load_or_else_discover(args.project_config.manifest_path.as_deref()).ok(); // Resolve channels from project / CLI args @@ -108,8 +180,6 @@ pub async fn execute(args: Args) -> miette::Result<()> { channels.iter().map(|c| c.name()).format(", ") ); - let package_name_filter = args.package; - let client = project .as_ref() .map(|p| p.authenticated_client().clone()) @@ -128,309 +198,36 @@ pub async fn execute(args: Args) -> miette::Result<()> { .await .into_diagnostic()?; + let matched_names = match_names(&all_names, &args.search_spec); + println!("matched_names: {:?}", matched_names); + // Compute the repodata query function that will be used to fetch the repodata for // filtered package names - - let repodata_query_func = |some_specs: Vec| { + let repodata_query_func = |specs: Vec| { gateway - .query( - channels.clone(), - [args.platform, Platform::NoArch], - some_specs.clone(), - ) + .query(channels.clone(), [args.platform, Platform::NoArch], specs) .into_future() }; - // When package name filter contains * (wildcard), it will search and display a - // list of packages matching this filter - if package_name_filter.contains('*') { - let package_name_without_filter = package_name_filter.replace('*', ""); - let package_name = PackageName::try_from(package_name_without_filter).into_diagnostic()?; - - search_package_by_wildcard( - package_name, - &package_name_filter, - all_names, - repodata_query_func, - args.limit, - stdout, - ) - .await?; - } - // If package name filter doesn't contain * (wildcard), it will search and display specific - // package info (if any package is found) - else { - let package_name = PackageName::try_from(package_name_filter).into_diagnostic()?; - - search_exact_package(package_name, all_names, repodata_query_func, stdout).await?; - } - - Project::warn_on_discovered_from_env(args.project_config.manifest_path.as_deref()); - Ok(()) -} - -async fn search_exact_package( - package_name: PackageName, - all_repodata_names: Vec, - repodata_query_func: QF, - out: W, -) -> miette::Result<()> -where - QF: Fn(Vec) -> FR, - FR: Future, GatewayError>>, -{ - let package_name_search = package_name.clone(); - let packages = search_package_by_filter( - &package_name_search, - all_repodata_names, - repodata_query_func, - |pn, n| pn == n, - ) - .await?; - - if packages.is_empty() { - let normalized_package_name = package_name.as_normalized(); - return Err(miette::miette!("Package {normalized_package_name} not found, please use a wildcard '*' in the search name for a broader result.")); - } - - let package = packages.last(); - if let Some(package) = package { - if let Err(e) = print_package_info(package, out) { - if e.kind() != std::io::ErrorKind::BrokenPipe { - return Err(e).into_diagnostic(); - } - } - } - - Ok(()) -} - -fn print_package_info(package: &RepoDataRecord, mut out: W) -> io::Result<()> { - writeln!(out)?; - - let package = package.clone(); - let package_name = package.package_record.name.as_source(); - let build = &package.package_record.build; - let package_info = format!("{} {}", console::style(package_name), console::style(build)); - writeln!(out, "{}", package_info)?; - writeln!(out, "{}\n", "-".repeat(package_info.chars().count()))?; - - writeln!( - out, - "{:19} {:19}", - console::style("Name"), - console::style(package_name) - )?; - - writeln!( - out, - "{:19} {:19}", - console::style("Version"), - console::style(package.package_record.version) - )?; - - writeln!( - out, - "{:19} {:19}", - console::style("Build"), - console::style(build) - )?; - - let size = match package.package_record.size { - Some(size) => size.to_string(), - None => String::from("Not found."), - }; - writeln!( - out, - "{:19} {:19}", - console::style("Size"), - console::style(size) - )?; - - let license = match package.package_record.license { - Some(license) => license, - None => String::from("Not found."), - }; - writeln!( - out, - "{:19} {:19}", - console::style("License"), - console::style(license) - )?; - - writeln!( - out, - "{:19} {:19}", - console::style("Subdir"), - console::style(package.package_record.subdir) - )?; - - writeln!( - out, - "{:19} {:19}", - console::style("File Name"), - console::style(package.file_name) - )?; - - writeln!( - out, - "{:19} {:19}", - console::style("URL"), - console::style(package.url) - )?; - - let md5 = match package.package_record.md5 { - Some(md5) => format!("{:x}", md5), - None => "Not available".to_string(), - }; - writeln!( - out, - "{:19} {:19}", - console::style("MD5"), - console::style(md5) - )?; - - let sha256 = match package.package_record.sha256 { - Some(sha256) => format!("{:x}", sha256), - None => "Not available".to_string(), - }; - writeln!( - out, - "{:19} {:19}", - console::style("SHA256"), - console::style(sha256), - )?; - - writeln!(out, "\nDependencies:")?; - for dependency in package.package_record.depends { - writeln!(out, " - {}", dependency)?; - } - - Ok(()) -} - -async fn search_package_by_wildcard( - package_name: PackageName, - package_name_filter: &str, - all_package_names: Vec, - repodata_query_func: QF, - limit: Option, - out: W, -) -> miette::Result<()> -where - QF: Fn(Vec) -> FR + Clone, - FR: Future, GatewayError>>, -{ - let wildcard_pattern = Regex::new(&format!("^{}$", &package_name_filter.replace('*', ".*"))) - .expect("Expect only characters and/or * (wildcard)."); - - let package_name_search = package_name.clone(); - - let mut packages = await_in_progress("searching packages", |_| async { - let packages = search_package_by_filter( - &package_name_search, - all_package_names.clone(), - repodata_query_func.clone(), - |pn, _| wildcard_pattern.is_match(pn.as_normalized()), - ) - .await?; - - if !packages.is_empty() { - return Ok(packages); - } + for name in matched_names { + let result = repodata_query_func(vec![&name]) + .await + .unwrap(); - tracing::info!("No packages found with wildcard search, trying with fuzzy search."); - let similarity = 0.85; - search_package_by_filter( - &package_name_search, - all_package_names, - repodata_query_func, - |pn, n| jaro(pn.as_normalized(), n.as_normalized()) > similarity, - ) - .await - }) - .await?; - - let normalized_package_name = package_name.as_normalized(); - packages.sort_by(|a, b| { - let ord = jaro( - b.package_record.name.as_normalized(), - normalized_package_name, - ) - .partial_cmp(&jaro( - a.package_record.name.as_normalized(), - normalized_package_name, - )); - if let Some(ord) = ord { - ord - } else { - Ordering::Equal + // flatten the records + let mut flattened = Vec::new(); + for repo in result { + flattened.extend(repo.into_iter().cloned()); } - }); - - if packages.is_empty() { - return Err(miette::miette!("Could not find {normalized_package_name}")); - } + print_table(flattened.as_slice(), true); - if let Err(e) = print_matching_packages(&packages, out, limit) { - if e.kind() != std::io::ErrorKind::BrokenPipe { - return Err(e).into_diagnostic(); - } + Project::warn_on_discovered_from_env(args.project_config.manifest_path.as_deref()); } - Ok(()) } -fn print_matching_packages( - packages: &[RepoDataRecord], - mut out: W, - limit: Option, -) -> io::Result<()> { - writeln!( - out, - "{:40} {:19} {:19}", - console::style("Package").bold(), - console::style("Version").bold(), - console::style("Channel").bold(), - )?; - - // split off at `limit`, discard the second half - let limit = limit.unwrap_or(usize::MAX); - - let (packages, remaining_packages) = if limit < packages.len() { - packages.split_at(limit) - } else { - (packages, &[][..]) - }; - - let channel_config = default_channel_config(); - for package in packages { - // TODO: change channel fetch logic to be more robust - // currently it relies on channel field being a url with trailing slash - // https://github.com/mamba-org/rattler/issues/146 - - let channel_name = Url::from_str(&package.channel) - .ok() - .and_then(|url| channel_config.strip_channel_alias(&url)) - .unwrap_or_else(|| package.channel.to_string()); - - let channel_name = format!("{}/{}", channel_name, package.package_record.subdir); - - let package_name = &package.package_record.name; - let version = package.package_record.version.as_str(); - - writeln!( - out, - "{:40} {:19} {:19}", - console::style(package_name.as_source()).cyan().bright(), - console::style(version), - console::style(channel_name), - )?; - } - - if !remaining_packages.is_empty() { - println!("... and {} more", remaining_packages.len()); - } - - Ok(()) +// Use the `glob` crate to match the search_spec against the all_names +fn match_names(all_names: &[PackageName], search_spec: &str) -> Vec { + let glob = globset::Glob::from_str(search_spec).unwrap().compile_matcher(); + all_names.iter().filter(|name| glob.is_match(name.as_normalized())).cloned().collect() }