diff --git a/src/cli.rs b/src/cli.rs index 9413d98..16aed06 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -7,6 +7,7 @@ use log::{debug, info}; use nonempty::nonempty; use popgetter::{ config::Config, + data_request_spec::{self, DataRequestSpec, GeometrySpec}, formatters::{ CSVFormatter, GeoJSONFormatter, GeoJSONSeqFormatter, OutputFormatter, OutputGenerator, }, @@ -77,6 +78,11 @@ pub struct DataCommand { help = "Force run without prompt" )] force_run: bool, + #[arg( + long = "no-geometry", + help = "When set, no geometry data is included in the results" + )] + no_geometry: bool, } impl From<&OutputFormat> for OutputFormatter { @@ -108,6 +114,16 @@ impl RunCommand for DataCommand { let popgetter = Popgetter::new_with_config(config).await?; let search_results = popgetter.search(self.search_params_args.clone().into()); + // Make DataRequestSpec + // TODO: possibly implement From<(DataCommand, SearchParams)> for DataRequestSpec + let data_request_spec = DataRequestSpec { + geometry: GeometrySpec { + include_geoms: !self.no_geometry, + ..Default::default() + }, + ..Default::default() + }; + // sp.stop_and_persist is potentially a better method, but not obvious how to // store the timing. Leaving below until that option is ruled out. // sp.stop_and_persist(&COMPLETE_PROGRESS_STRING, spinner_message.into()); @@ -132,7 +148,9 @@ impl RunCommand for DataCommand { DEFAULT_PROGRESS_SPINNER, spinner_message.to_string() + RUNNING_TAIL_STRING, ); - let mut data = search_results.download(&popgetter.config).await?; + let mut data = search_results + .download(&popgetter.config, data_request_spec) + .await?; sp.stop_with_symbol(COMPLETE_PROGRESS_STRING); debug!("{data:#?}"); @@ -142,8 +160,7 @@ impl RunCommand for DataCommand { let mut f = File::create(output_file)?; formatter.save(&mut f, &mut data)?; } else { - let stdout = std::io::stdout(); - let mut stdout_lock = stdout.lock(); + let mut stdout_lock = std::io::stdout().lock(); formatter.save(&mut stdout_lock, &mut data)?; }; Ok(()) diff --git a/src/data_request_spec.rs b/src/data_request_spec.rs new file mode 100644 index 0000000..8d65a74 --- /dev/null +++ b/src/data_request_spec.rs @@ -0,0 +1,149 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::{ + ops::{Index, IndexMut}, + str::FromStr, +}; + +use crate::{parquet::MetricRequest, search::MetricId}; + +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct DataRequestSpec { + pub geometry: GeometrySpec, + pub region: Vec, + pub metrics: Vec, + pub years: Option>, +} + +#[derive(Debug)] +pub struct MetricRequestResult { + pub metrics: Vec, + pub selected_geometry: String, + pub years: Vec, +} + +// impl DataRequestSpec { +// /// Generates a vector of metric requests from a `DataRequestSpec` and a catalogue. +// pub fn metric_requests( +// &self, +// catalogue: &Metadata, +// config: &Config, +// ) -> Result { +// // Find all the metrics which match the requested ones, expanding +// // any regex matches as we do so +// let expanded_metric_ids: Vec = self +// .metrics +// .iter() +// .filter_map(|metric_spec| match metric_spec { +// MetricSpec::Metric(id) => catalogue.expand_regex_metric(id).ok(), +// MetricSpec::DataProduct(_) => None, +// }) +// .flatten() +// .collect::>(); + +// let full_selection_plan = +// catalogue.generate_selection_plan(&expanded_metric_ids, &self.geometry, &self.years)?; + +// info!("Running your query with \n {full_selection_plan}"); + +// let metric_requests = +// catalogue.get_metric_requests(full_selection_plan.explicit_metric_ids, config)?; + +// Ok(MetricRequestResult { +// metrics: metric_requests, +// selected_geometry: full_selection_plan.geometry, +// years: full_selection_plan.year, +// }) +// } +// } + +#[derive(Serialize, Deserialize, Debug)] +pub enum MetricSpec { + Metric(MetricId), + DataProduct(String), +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct GeometrySpec { + pub geometry_level: Option, + pub include_geoms: bool, +} + +impl Default for GeometrySpec { + fn default() -> Self { + Self { + include_geoms: true, + geometry_level: None, + } + } +} + +#[derive(Serialize, Deserialize, Debug)] +pub enum RegionSpec { + BoundingBox(BBox), + Polygon(Polygon), + NamedArea(String), +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Polygon; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct BBox(pub [f64; 4]); + +impl Index for BBox { + type Output = f64; + + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] + } +} + +impl IndexMut for BBox { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + &mut self.0[index] + } +} + +impl FromStr for BBox { + type Err = &'static str; + fn from_str(value: &str) -> Result { + let parts: Vec = value + .split(',') + .map(|s| s.trim().parse::().map_err(|_| "Failed to parse bbox")) + .collect::, _>>()?; + + if parts.len() != 4 { + return Err("Bounding boxes need to have 4 coords"); + } + let mut bbox = [0.0; 4]; + bbox.copy_from_slice(&parts); + Ok(BBox(bbox)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn bbox_should_parse_if_correct() { + let bbox = BBox::from_str("0.0,1.0,2.0,3.0"); + assert!(bbox.is_ok(), "A four coord bbox should parse"); + } + + #[test] + fn bbox_should_not_parse_if_incorrect() { + let bbox = BBox::from_str("0.0,1.0,2.0"); + assert!( + bbox.is_err(), + "A string with fewer than 4 coords should not parse" + ); + let bbox = BBox::from_str("0.0,1.0,2.0,3.0,4.0"); + assert!( + bbox.is_err(), + "A string with 5 or more coords should not parse" + ); + let bbox = BBox::from_str("0.0sdfsd,1.0,2.0"); + assert!(bbox.is_err(), "A string with letters shouldn't parse"); + } +} diff --git a/src/lib.rs b/src/lib.rs index d21216f..225eea4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ pub use column_names as COL; // Modules pub mod column_names; pub mod config; +pub mod data_request_spec; pub mod error; #[cfg(feature = "formatters")] pub mod formatters; diff --git a/src/search.rs b/src/search.rs index 0ace36f..2bd112f 100644 --- a/src/search.rs +++ b/src/search.rs @@ -2,7 +2,8 @@ use crate::{ config::Config, - geo::get_geometries, + data_request_spec::DataRequestSpec, + geo::{get_geometries, BBox}, metadata::ExpandedMetadata, parquet::{get_metrics, MetricRequest}, COL, @@ -307,6 +308,11 @@ impl From for Option { #[derive(Clone, Debug)] pub struct SearchResults(pub DataFrame); +pub(crate) struct DownloadConfig { + pub bbox: Option, + pub include_geometry: bool, +} + impl SearchResults { /// Convert all the metrics in the dataframe to MetricRequests pub fn to_metric_requests(self, config: &Config) -> Vec { @@ -352,7 +358,11 @@ impl SearchResults { // Given a Data Request Spec // Return a DataFrame of the selected dataset - pub async fn download(self, config: &Config) -> anyhow::Result { + pub async fn download( + self, + config: &Config, + data_request_spec: DataRequestSpec, + ) -> anyhow::Result { let metric_requests = self.to_metric_requests(config); debug!("metric_requests = {:#?}", metric_requests); let all_geom_files: HashSet = metric_requests @@ -366,19 +376,26 @@ impl SearchResults { if all_geom_files.len() > 1 { panic!("Multiple geometries not yet supported"); } - // TODO Pass in the bbox as the second argument here - let geoms = get_geometries(all_geom_files.iter().next().unwrap(), None); - - // try_join requires us to have the errors from all futures be the same. - // We use anyhow to get it back properly - let (metrics, geoms) = try_join!( - async move { metrics.await.map_err(anyhow::Error::from) }, - geoms - )?; - debug!("geoms: {geoms:#?}"); - debug!("metrics: {metrics:#?}"); - - let result = geoms.inner_join(&metrics?, [COL::GEO_ID], [COL::GEO_ID])?; + + let result = if data_request_spec.geometry.include_geoms { + // TODO Pass in the bbox as the second argument here + let geoms = get_geometries(all_geom_files.iter().next().unwrap(), None); + + // try_join requires us to have the errors from all futures be the same. + // We use anyhow to get it back properly + let (metrics, geoms) = try_join!( + async move { metrics.await.map_err(anyhow::Error::from) }, + geoms + )?; + debug!("geoms: {geoms:#?}"); + debug!("metrics: {metrics:#?}"); + geoms.inner_join(&metrics?, [COL::GEO_ID], [COL::GEO_ID])? + } else { + let metrics = metrics.await.map_err(anyhow::Error::from)??; + debug!("metrics: {metrics:#?}"); + metrics + }; + Ok(result) } }