Skip to content

Commit

Permalink
Restore DataRequestSpec, add no-geometry flag
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreenbury committed Jul 10, 2024
1 parent 80ca5a1 commit 3e90966
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 18 deletions.
23 changes: 20 additions & 3 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use log::{debug, info};
use nonempty::nonempty;
use popgetter::{
config::Config,
data_request_spec::{self, DataRequestSpec, GeometrySpec},
formatters::{
CSVFormatter, GeoJSONFormatter, GeoJSONSeqFormatter, OutputFormatter, OutputGenerator,
},
Expand Down Expand Up @@ -77,6 +78,11 @@ pub struct DataCommand {
help = "Force run without prompt"
)]
force_run: bool,
#[arg(
long = "no-geometry",
help = "When set, no geometry data is included in the results"
)]
no_geometry: bool,
}

impl From<&OutputFormat> for OutputFormatter {
Expand Down Expand Up @@ -108,6 +114,16 @@ impl RunCommand for DataCommand {
let popgetter = Popgetter::new_with_config(config).await?;
let search_results = popgetter.search(self.search_params_args.clone().into());

// Make DataRequestSpec
// TODO: possibly implement From<(DataCommand, SearchParams)> for DataRequestSpec
let data_request_spec = DataRequestSpec {
geometry: GeometrySpec {
include_geoms: !self.no_geometry,
..Default::default()
},
..Default::default()
};

// sp.stop_and_persist is potentially a better method, but not obvious how to
// store the timing. Leaving below until that option is ruled out.
// sp.stop_and_persist(&COMPLETE_PROGRESS_STRING, spinner_message.into());
Expand All @@ -132,7 +148,9 @@ impl RunCommand for DataCommand {
DEFAULT_PROGRESS_SPINNER,
spinner_message.to_string() + RUNNING_TAIL_STRING,
);
let mut data = search_results.download(&popgetter.config).await?;
let mut data = search_results
.download(&popgetter.config, data_request_spec)
.await?;
sp.stop_with_symbol(COMPLETE_PROGRESS_STRING);

debug!("{data:#?}");
Expand All @@ -142,8 +160,7 @@ impl RunCommand for DataCommand {
let mut f = File::create(output_file)?;
formatter.save(&mut f, &mut data)?;
} else {
let stdout = std::io::stdout();
let mut stdout_lock = stdout.lock();
let mut stdout_lock = std::io::stdout().lock();
formatter.save(&mut stdout_lock, &mut data)?;
};
Ok(())
Expand Down
149 changes: 149 additions & 0 deletions src/data_request_spec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::{
ops::{Index, IndexMut},
str::FromStr,
};

use crate::{parquet::MetricRequest, search::MetricId};

#[derive(Serialize, Deserialize, Debug, Default)]
pub struct DataRequestSpec {
pub geometry: GeometrySpec,
pub region: Vec<RegionSpec>,
pub metrics: Vec<MetricSpec>,
pub years: Option<Vec<String>>,
}

#[derive(Debug)]
pub struct MetricRequestResult {
pub metrics: Vec<MetricRequest>,
pub selected_geometry: String,
pub years: Vec<String>,
}

// impl DataRequestSpec {
// /// Generates a vector of metric requests from a `DataRequestSpec` and a catalogue.
// pub fn metric_requests(
// &self,
// catalogue: &Metadata,
// config: &Config,
// ) -> Result<MetricRequestResult> {
// // Find all the metrics which match the requested ones, expanding
// // any regex matches as we do so
// let expanded_metric_ids: Vec<MetricId> = self
// .metrics
// .iter()
// .filter_map(|metric_spec| match metric_spec {
// MetricSpec::Metric(id) => catalogue.expand_regex_metric(id).ok(),
// MetricSpec::DataProduct(_) => None,
// })
// .flatten()
// .collect::<Vec<_>>();

// let full_selection_plan =
// catalogue.generate_selection_plan(&expanded_metric_ids, &self.geometry, &self.years)?;

// info!("Running your query with \n {full_selection_plan}");

// let metric_requests =
// catalogue.get_metric_requests(full_selection_plan.explicit_metric_ids, config)?;

// Ok(MetricRequestResult {
// metrics: metric_requests,
// selected_geometry: full_selection_plan.geometry,
// years: full_selection_plan.year,
// })
// }
// }

#[derive(Serialize, Deserialize, Debug)]
pub enum MetricSpec {
Metric(MetricId),
DataProduct(String),
}

#[derive(Serialize, Deserialize, Debug)]
pub struct GeometrySpec {
pub geometry_level: Option<String>,
pub include_geoms: bool,
}

impl Default for GeometrySpec {
fn default() -> Self {
Self {
include_geoms: true,
geometry_level: None,
}
}
}

#[derive(Serialize, Deserialize, Debug)]
pub enum RegionSpec {
BoundingBox(BBox),
Polygon(Polygon),
NamedArea(String),
}

#[derive(Serialize, Deserialize, Debug)]
pub struct Polygon;

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BBox(pub [f64; 4]);

impl Index<usize> for BBox {
type Output = f64;

fn index(&self, index: usize) -> &Self::Output {
&self.0[index]
}
}

impl IndexMut<usize> for BBox {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.0[index]
}
}

impl FromStr for BBox {
type Err = &'static str;
fn from_str(value: &str) -> Result<Self, Self::Err> {
let parts: Vec<f64> = value
.split(',')
.map(|s| s.trim().parse::<f64>().map_err(|_| "Failed to parse bbox"))
.collect::<Result<Vec<_>, _>>()?;

if parts.len() != 4 {
return Err("Bounding boxes need to have 4 coords");
}
let mut bbox = [0.0; 4];
bbox.copy_from_slice(&parts);
Ok(BBox(bbox))
}
}

#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bbox_should_parse_if_correct() {
let bbox = BBox::from_str("0.0,1.0,2.0,3.0");
assert!(bbox.is_ok(), "A four coord bbox should parse");
}

#[test]
fn bbox_should_not_parse_if_incorrect() {
let bbox = BBox::from_str("0.0,1.0,2.0");
assert!(
bbox.is_err(),
"A string with fewer than 4 coords should not parse"
);
let bbox = BBox::from_str("0.0,1.0,2.0,3.0,4.0");
assert!(
bbox.is_err(),
"A string with 5 or more coords should not parse"
);
let bbox = BBox::from_str("0.0sdfsd,1.0,2.0");
assert!(bbox.is_err(), "A string with letters shouldn't parse");
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub use column_names as COL;
// Modules
pub mod column_names;
pub mod config;
pub mod data_request_spec;
pub mod error;
#[cfg(feature = "formatters")]
pub mod formatters;
Expand Down
47 changes: 32 additions & 15 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
use crate::{
config::Config,
geo::get_geometries,
data_request_spec::DataRequestSpec,
geo::{get_geometries, BBox},
metadata::ExpandedMetadata,
parquet::{get_metrics, MetricRequest},
COL,
Expand Down Expand Up @@ -307,6 +308,11 @@ impl From<SearchParams> for Option<Expr> {
#[derive(Clone, Debug)]
pub struct SearchResults(pub DataFrame);

pub(crate) struct DownloadConfig {
pub bbox: Option<BBox>,
pub include_geometry: bool,
}

impl SearchResults {
/// Convert all the metrics in the dataframe to MetricRequests
pub fn to_metric_requests(self, config: &Config) -> Vec<MetricRequest> {
Expand Down Expand Up @@ -352,7 +358,11 @@ impl SearchResults {

// Given a Data Request Spec
// Return a DataFrame of the selected dataset
pub async fn download(self, config: &Config) -> anyhow::Result<DataFrame> {
pub async fn download(
self,
config: &Config,
data_request_spec: DataRequestSpec,
) -> anyhow::Result<DataFrame> {
let metric_requests = self.to_metric_requests(config);
debug!("metric_requests = {:#?}", metric_requests);
let all_geom_files: HashSet<String> = metric_requests
Expand All @@ -366,19 +376,26 @@ impl SearchResults {
if all_geom_files.len() > 1 {
panic!("Multiple geometries not yet supported");
}
// TODO Pass in the bbox as the second argument here
let geoms = get_geometries(all_geom_files.iter().next().unwrap(), None);

// try_join requires us to have the errors from all futures be the same.
// We use anyhow to get it back properly
let (metrics, geoms) = try_join!(
async move { metrics.await.map_err(anyhow::Error::from) },
geoms
)?;
debug!("geoms: {geoms:#?}");
debug!("metrics: {metrics:#?}");

let result = geoms.inner_join(&metrics?, [COL::GEO_ID], [COL::GEO_ID])?;

let result = if data_request_spec.geometry.include_geoms {
// TODO Pass in the bbox as the second argument here
let geoms = get_geometries(all_geom_files.iter().next().unwrap(), None);

// try_join requires us to have the errors from all futures be the same.
// We use anyhow to get it back properly
let (metrics, geoms) = try_join!(
async move { metrics.await.map_err(anyhow::Error::from) },
geoms
)?;
debug!("geoms: {geoms:#?}");
debug!("metrics: {metrics:#?}");
geoms.inner_join(&metrics?, [COL::GEO_ID], [COL::GEO_ID])?
} else {
let metrics = metrics.await.map_err(anyhow::Error::from)??;
debug!("metrics: {metrics:#?}");
metrics
};

Ok(result)
}
}
Expand Down

0 comments on commit 3e90966

Please sign in to comment.