From c6bb5aa757af0d6c3782484401555ae1d968e3cc Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Thu, 11 Jul 2024 10:39:43 +0100 Subject: [PATCH] Add bbox for data command, add countries display --- src/cli.rs | 71 +++++++++++++++++++++++++++------------- src/data_request_spec.rs | 70 ++++++++++++++------------------------- src/display.rs | 49 +++++++++++++++++++++++++++ src/geo.rs | 4 +-- src/metadata.rs | 5 ++- src/parquet.rs | 11 ++++--- src/search.rs | 24 +++++++++----- 7 files changed, 148 insertions(+), 86 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 16aed06..37565b1 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -7,12 +7,11 @@ use log::{debug, info}; use nonempty::nonempty; use popgetter::{ config::Config, - data_request_spec::{self, DataRequestSpec, GeometrySpec}, + data_request_spec::{DataRequestSpec, GeometrySpec, RegionSpec}, formatters::{ CSVFormatter, GeoJSONFormatter, GeoJSONSeqFormatter, OutputFormatter, OutputGenerator, }, geo::BBox, - metadata::get_country_names, search::{ Country, DataPublisher, GeometryLevel, MetricId, SearchContext, SearchParams, SearchResults, SearchText, SourceDataRelease, SourceMetricId, YearRange, @@ -25,7 +24,7 @@ use std::fs::File; use std::{io, process}; use strum_macros::EnumString; -use crate::display::display_search_results; +use crate::display::{display_countries, display_search_results}; const DEFAULT_PROGRESS_SPINNER: Spinners = Spinners::Dots; const COMPLETE_PROGRESS_STRING: &str = "✔"; @@ -56,7 +55,8 @@ pub struct DataCommand { #[arg( short, long, - value_name = "MIN_LAT,MIN_LNG,MAX_LAT,MAX_LNG", + value_name = "MIN_LNG,MIN_LAT,MAX_LNG,MAX_LAT", + allow_hyphen_values = true, help = "Bounding box in which to get the results" )] bbox: Option, @@ -85,6 +85,30 @@ pub struct DataCommand { no_geometry: bool, } +impl From<&DataCommand> for DataRequestSpec { + fn from(value: &DataCommand) -> Self { + let region = value + .bbox + .as_ref() + .map(|bbox| vec![RegionSpec::BoundingBox(bbox.clone())]) + .unwrap_or_default(); + let geometry = GeometrySpec { + // If region_spec provided, override and always include_geoms + include_geoms: if region.len().gt(&0) { + true + } else { + !value.no_geometry + }, + ..Default::default() + }; + Self { + geometry, + region, + ..Default::default() + } + } +} + impl From<&OutputFormat> for OutputFormatter { fn from(value: &OutputFormat) -> Self { match value { @@ -116,13 +140,7 @@ impl RunCommand for DataCommand { // Make DataRequestSpec // TODO: possibly implement From<(DataCommand, SearchParams)> for DataRequestSpec - let data_request_spec = DataRequestSpec { - geometry: GeometrySpec { - include_geoms: !self.no_geometry, - ..Default::default() - }, - ..Default::default() - }; + let data_request_spec = self.into(); // sp.stop_and_persist is potentially a better method, but not obvious how to // store the timing. Leaving below until that option is ruled out. @@ -171,13 +189,16 @@ impl RunCommand for DataCommand { /// The set of ways to search will likley increase over time #[derive(Args, Debug)] pub struct MetricsCommand { - #[arg( - short, - long, - value_name = "MIN_LAT,MIN_LNG,MAX_LAT,MAX_LNG", - help = "Bounding box in which to get the results" - )] - bbox: Option, + // TODO: consider implementation of bbox as part of: + // [#67](https://github.com/Urban-Analytics-Technology-Platform/popgetter-cli/issues/67) + // #[arg( + // short, + // long, + // value_name = "MIN_LNG,MIN_LAT,MAX_LNG,MAX_LAT", + // allow_hyphen_values=true, + // help = "Bounding box in which to get the results" + // )] + // bbox: Option, #[arg( short, long, @@ -358,12 +379,16 @@ pub struct CountriesCommand; impl RunCommand for CountriesCommand { async fn run(&self, config: Config) -> Result<()> { + info!("Running `countries` subcommand"); + let spinner_message = "Downloading countries"; + let mut sp = Spinner::with_timer( + DEFAULT_PROGRESS_SPINNER, + spinner_message.to_string() + RUNNING_TAIL_STRING, + ); let popgetter = Popgetter::new_with_config(config).await?; - println!("The following countries are available:"); - get_country_names(&popgetter.config) - .await? - .into_iter() - .for_each(|country| println!("{country}")); + sp.stop_with_symbol(COMPLETE_PROGRESS_STRING); + println!("\nThe following countries are available:"); + display_countries(popgetter.metadata.countries, None)?; Ok(()) } } diff --git a/src/data_request_spec.rs b/src/data_request_spec.rs index 8d65a74..eb6e47c 100644 --- a/src/data_request_spec.rs +++ b/src/data_request_spec.rs @@ -1,11 +1,10 @@ -use anyhow::Result; +// TODO: this module to be refactored following implementation of SearchParams. +// See [#67](https://github.com/Urban-Analytics-Technology-Platform/popgetter-cli/issues/67) + use serde::{Deserialize, Serialize}; -use std::{ - ops::{Index, IndexMut}, - str::FromStr, -}; -use crate::{parquet::MetricRequest, search::MetricId}; +use crate::geo::BBox; +use crate::search::MetricId; #[derive(Serialize, Deserialize, Debug, Default)] pub struct DataRequestSpec { @@ -15,13 +14,13 @@ pub struct DataRequestSpec { pub years: Option>, } -#[derive(Debug)] -pub struct MetricRequestResult { - pub metrics: Vec, - pub selected_geometry: String, - pub years: Vec, -} - +// #[derive(Debug)] +// pub struct MetricRequestResult { +// pub metrics: Vec, +// pub selected_geometry: String, +// pub years: Vec, +// } +// // impl DataRequestSpec { // /// Generates a vector of metric requests from a `DataRequestSpec` and a catalogue. // pub fn metric_requests( @@ -85,46 +84,25 @@ pub enum RegionSpec { NamedArea(String), } -#[derive(Serialize, Deserialize, Debug)] -pub struct Polygon; - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct BBox(pub [f64; 4]); - -impl Index for BBox { - type Output = f64; - - fn index(&self, index: usize) -> &Self::Output { - &self.0[index] - } -} - -impl IndexMut for BBox { - fn index_mut(&mut self, index: usize) -> &mut Self::Output { - &mut self.0[index] - } -} - -impl FromStr for BBox { - type Err = &'static str; - fn from_str(value: &str) -> Result { - let parts: Vec = value - .split(',') - .map(|s| s.trim().parse::().map_err(|_| "Failed to parse bbox")) - .collect::, _>>()?; - - if parts.len() != 4 { - return Err("Bounding boxes need to have 4 coords"); +impl RegionSpec { + pub fn bbox(&self) -> Option { + match self { + RegionSpec::BoundingBox(bbox) => Some(bbox.clone()), + _ => None, } - let mut bbox = [0.0; 4]; - bbox.copy_from_slice(&parts); - Ok(BBox(bbox)) } } +#[derive(Serialize, Deserialize, Debug)] +pub struct Polygon; + #[cfg(test)] mod tests { + + use std::str::FromStr; + use super::*; + #[test] fn bbox_should_parse_if_correct() { let bbox = BBox::from_str("0.0,1.0,2.0,3.0"); diff --git a/src/display.rs b/src/display.rs index 909bd07..066544f 100644 --- a/src/display.rs +++ b/src/display.rs @@ -1,8 +1,57 @@ use comfy_table::{presets::NOTHING, *}; use itertools::izip; +use polars::{frame::DataFrame, prelude::SortMultipleOptions}; use popgetter::{search::SearchResults, COL}; +pub fn display_countries(countries: DataFrame, max_results: Option) -> anyhow::Result<()> { + let df_to_show = match max_results { + Some(max) => countries.head(Some(max)), + None => countries, + }; + let df_to_show = df_to_show.sort([COL::COUNTRY_ID], SortMultipleOptions::default())?; + let mut table = Table::new(); + table + .load_preset(NOTHING) + .set_content_arrangement(ContentArrangement::Dynamic) + .set_header(vec![ + Cell::new("Country ID").add_attribute(Attribute::Bold), + Cell::new("Country Name (official)").add_attribute(Attribute::Bold), + Cell::new("Country Name (short)").add_attribute(Attribute::Bold), + Cell::new("ISO3116-1 alpha-3").add_attribute(Attribute::Bold), + Cell::new("ISO3116-2").add_attribute(Attribute::Bold), + ]) + .set_style(comfy_table::TableComponent::BottomBorder, '─') + .set_style(comfy_table::TableComponent::MiddleHeaderIntersections, '─') + .set_style(comfy_table::TableComponent::HeaderLines, '─') + .set_style(comfy_table::TableComponent::BottomBorderIntersections, '─') + .set_style(comfy_table::TableComponent::TopBorder, '─') + .set_style(comfy_table::TableComponent::TopBorderIntersections, '─'); + for ( + country_id, + country_name_official, + country_name_short_en, + countr_iso3, + country_iso3116_2, + ) in izip!( + df_to_show.column(COL::COUNTRY_ID)?.str()?, + df_to_show.column(COL::COUNTRY_NAME_OFFICIAL)?.str()?, + df_to_show.column(COL::COUNTRY_NAME_SHORT_EN)?.str()?, + df_to_show.column(COL::COUNTRY_ISO3)?.str()?, + df_to_show.column(COL::COUNTRY_ISO3166_2)?.str()?, + ) { + table.add_row(vec![ + country_id.unwrap_or_default(), + country_name_official.unwrap_or_default(), + country_name_short_en.unwrap_or_default(), + countr_iso3.unwrap_or_default(), + country_iso3116_2.unwrap_or_default(), + ]); + } + println!("\n{}", table); + Ok(()) +} + pub fn display_search_results( results: SearchResults, max_results: Option, diff --git a/src/geo.rs b/src/geo.rs index 296779a..7639c06 100644 --- a/src/geo.rs +++ b/src/geo.rs @@ -15,7 +15,7 @@ use std::{ /// `bbox`: an optional bounding box to filter the features by /// /// Returns: a Result object containing a vector of (geometry, properties). -pub async fn get_geometries(file_url: &str, bbox: Option<&BBox>) -> Result { +pub async fn get_geometries(file_url: &str, bbox: Option) -> Result { let fgb = HttpFgbReader::open(file_url).await?; let mut fgb = if let Some(bbox) = bbox { @@ -235,7 +235,7 @@ mod tests { -1.373_095_490_899_146_4, 53.026_908_220_355_35, ]); - let geoms = get_geometries(&server.url("/fgb_example.fgb"), Some(&bbox)).await; + let geoms = get_geometries(&server.url("/fgb_example.fgb"), Some(bbox)).await; assert!(geoms.is_ok(), "The geometry call should not error"); let geoms = geoms.unwrap(); diff --git a/src/metadata.rs b/src/metadata.rs index 8147171..9044903 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -172,10 +172,9 @@ impl CountryMetadataLoader { } } -pub async fn get_country_names(config: &Config) -> anyhow::Result> { - let country_text_file = format!("{}/countries.txt", config.base_path); +async fn get_country_names(config: &Config) -> anyhow::Result> { Ok(reqwest::Client::new() - .get(&country_text_file) + .get(&format!("{}/countries.txt", config.base_path)) .send() .await? .text() diff --git a/src/parquet.rs b/src/parquet.rs index 566dc2f..f7cdd12 100644 --- a/src/parquet.rs +++ b/src/parquet.rs @@ -68,8 +68,7 @@ pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[&str]>) -> Resul // generally true let mut joined_df: Option = None; - // Merge the dataframes from each remove file in to a single - // dataframe + // Merge the dataframes from each remove file in to a single dataframe for df in dfs? { if let Some(prev_dfs) = joined_df { joined_df = Some(prev_dfs.join( @@ -82,8 +81,12 @@ pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[&str]>) -> Resul joined_df = Some(df.clone()); } } - - joined_df.with_context(|| "Failed to combine data queries") + // Return if None, or return df with COL::GEO_ID first + Ok(joined_df + .with_context(|| "Failed to combine data queries")? + .lazy() + .select(&[col(COL::GEO_ID), col("*").exclude([COL::GEO_ID])]) + .collect()?) } #[cfg(test)] diff --git a/src/search.rs b/src/search.rs index 2bd112f..b139f26 100644 --- a/src/search.rs +++ b/src/search.rs @@ -3,7 +3,7 @@ use crate::{ config::Config, data_request_spec::DataRequestSpec, - geo::{get_geometries, BBox}, + geo::get_geometries, metadata::ExpandedMetadata, parquet::{get_metrics, MetricRequest}, COL, @@ -308,11 +308,6 @@ impl From for Option { #[derive(Clone, Debug)] pub struct SearchResults(pub DataFrame); -pub(crate) struct DownloadConfig { - pub bbox: Option, - pub include_geometry: bool, -} - impl SearchResults { /// Convert all the metrics in the dataframe to MetricRequests pub fn to_metric_requests(self, config: &Config) -> Vec { @@ -374,12 +369,25 @@ impl SearchResults { // TODO Handle multiple responses if all_geom_files.len() > 1 { - panic!("Multiple geometries not yet supported"); + todo!("Multiple geometries not yet supported"); } let result = if data_request_spec.geometry.include_geoms { // TODO Pass in the bbox as the second argument here - let geoms = get_geometries(all_geom_files.iter().next().unwrap(), None); + if data_request_spec.region.len() > 1 { + todo!( + "Multiple region specifications are not yet supported: {:#?}", + data_request_spec.region + ); + } + debug!("{:#?}", data_request_spec.region.first().unwrap().bbox()); + let geoms = get_geometries( + all_geom_files.iter().next().unwrap(), + data_request_spec + .region + .first() + .and_then(|region_spec| region_spec.bbox().clone()), + ); // try_join requires us to have the errors from all futures be the same. // We use anyhow to get it back properly