Skip to content

Commit

Permalink
Add bbox for data command, add countries display
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreenbury committed Jul 11, 2024
1 parent 3e90966 commit c6bb5aa
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 86 deletions.
71 changes: 48 additions & 23 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@ use log::{debug, info};
use nonempty::nonempty;
use popgetter::{
config::Config,
data_request_spec::{self, DataRequestSpec, GeometrySpec},
data_request_spec::{DataRequestSpec, GeometrySpec, RegionSpec},
formatters::{
CSVFormatter, GeoJSONFormatter, GeoJSONSeqFormatter, OutputFormatter, OutputGenerator,
},
geo::BBox,
metadata::get_country_names,
search::{
Country, DataPublisher, GeometryLevel, MetricId, SearchContext, SearchParams,
SearchResults, SearchText, SourceDataRelease, SourceMetricId, YearRange,
Expand All @@ -25,7 +24,7 @@ use std::fs::File;
use std::{io, process};
use strum_macros::EnumString;

use crate::display::display_search_results;
use crate::display::{display_countries, display_search_results};

const DEFAULT_PROGRESS_SPINNER: Spinners = Spinners::Dots;
const COMPLETE_PROGRESS_STRING: &str = "✔";
Expand Down Expand Up @@ -56,7 +55,8 @@ pub struct DataCommand {
#[arg(
short,
long,
value_name = "MIN_LAT,MIN_LNG,MAX_LAT,MAX_LNG",
value_name = "MIN_LNG,MIN_LAT,MAX_LNG,MAX_LAT",
allow_hyphen_values = true,
help = "Bounding box in which to get the results"
)]
bbox: Option<BBox>,
Expand Down Expand Up @@ -85,6 +85,30 @@ pub struct DataCommand {
no_geometry: bool,
}

impl From<&DataCommand> for DataRequestSpec {
fn from(value: &DataCommand) -> Self {
let region = value
.bbox
.as_ref()
.map(|bbox| vec![RegionSpec::BoundingBox(bbox.clone())])
.unwrap_or_default();
let geometry = GeometrySpec {
// If region_spec provided, override and always include_geoms
include_geoms: if region.len().gt(&0) {
true
} else {
!value.no_geometry
},
..Default::default()
};
Self {
geometry,
region,
..Default::default()
}
}
}

impl From<&OutputFormat> for OutputFormatter {
fn from(value: &OutputFormat) -> Self {
match value {
Expand Down Expand Up @@ -116,13 +140,7 @@ impl RunCommand for DataCommand {

// Make DataRequestSpec
// TODO: possibly implement From<(DataCommand, SearchParams)> for DataRequestSpec
let data_request_spec = DataRequestSpec {
geometry: GeometrySpec {
include_geoms: !self.no_geometry,
..Default::default()
},
..Default::default()
};
let data_request_spec = self.into();

// sp.stop_and_persist is potentially a better method, but not obvious how to
// store the timing. Leaving below until that option is ruled out.
Expand Down Expand Up @@ -171,13 +189,16 @@ impl RunCommand for DataCommand {
/// The set of ways to search will likley increase over time
#[derive(Args, Debug)]
pub struct MetricsCommand {
#[arg(
short,
long,
value_name = "MIN_LAT,MIN_LNG,MAX_LAT,MAX_LNG",
help = "Bounding box in which to get the results"
)]
bbox: Option<BBox>,
// TODO: consider implementation of bbox as part of:
// [#67](https://github.com/Urban-Analytics-Technology-Platform/popgetter-cli/issues/67)
// #[arg(
// short,
// long,
// value_name = "MIN_LNG,MIN_LAT,MAX_LNG,MAX_LAT",
// allow_hyphen_values=true,
// help = "Bounding box in which to get the results"
// )]
// bbox: Option<BBox>,
#[arg(
short,
long,
Expand Down Expand Up @@ -358,12 +379,16 @@ pub struct CountriesCommand;

impl RunCommand for CountriesCommand {
async fn run(&self, config: Config) -> Result<()> {
info!("Running `countries` subcommand");
let spinner_message = "Downloading countries";
let mut sp = Spinner::with_timer(
DEFAULT_PROGRESS_SPINNER,
spinner_message.to_string() + RUNNING_TAIL_STRING,
);
let popgetter = Popgetter::new_with_config(config).await?;
println!("The following countries are available:");
get_country_names(&popgetter.config)
.await?
.into_iter()
.for_each(|country| println!("{country}"));
sp.stop_with_symbol(COMPLETE_PROGRESS_STRING);
println!("\nThe following countries are available:");
display_countries(popgetter.metadata.countries, None)?;
Ok(())
}
}
Expand Down
70 changes: 24 additions & 46 deletions src/data_request_spec.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use anyhow::Result;
// TODO: this module to be refactored following implementation of SearchParams.
// See [#67](https://github.com/Urban-Analytics-Technology-Platform/popgetter-cli/issues/67)

use serde::{Deserialize, Serialize};
use std::{
ops::{Index, IndexMut},
str::FromStr,
};

use crate::{parquet::MetricRequest, search::MetricId};
use crate::geo::BBox;
use crate::search::MetricId;

#[derive(Serialize, Deserialize, Debug, Default)]
pub struct DataRequestSpec {
Expand All @@ -15,13 +14,13 @@ pub struct DataRequestSpec {
pub years: Option<Vec<String>>,
}

#[derive(Debug)]
pub struct MetricRequestResult {
pub metrics: Vec<MetricRequest>,
pub selected_geometry: String,
pub years: Vec<String>,
}

// #[derive(Debug)]
// pub struct MetricRequestResult {
// pub metrics: Vec<MetricRequest>,
// pub selected_geometry: String,
// pub years: Vec<String>,
// }
//
// impl DataRequestSpec {
// /// Generates a vector of metric requests from a `DataRequestSpec` and a catalogue.
// pub fn metric_requests(
Expand Down Expand Up @@ -85,46 +84,25 @@ pub enum RegionSpec {
NamedArea(String),
}

#[derive(Serialize, Deserialize, Debug)]
pub struct Polygon;

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BBox(pub [f64; 4]);

impl Index<usize> for BBox {
type Output = f64;

fn index(&self, index: usize) -> &Self::Output {
&self.0[index]
}
}

impl IndexMut<usize> for BBox {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.0[index]
}
}

impl FromStr for BBox {
type Err = &'static str;
fn from_str(value: &str) -> Result<Self, Self::Err> {
let parts: Vec<f64> = value
.split(',')
.map(|s| s.trim().parse::<f64>().map_err(|_| "Failed to parse bbox"))
.collect::<Result<Vec<_>, _>>()?;

if parts.len() != 4 {
return Err("Bounding boxes need to have 4 coords");
impl RegionSpec {
pub fn bbox(&self) -> Option<BBox> {
match self {
RegionSpec::BoundingBox(bbox) => Some(bbox.clone()),
_ => None,
}
let mut bbox = [0.0; 4];
bbox.copy_from_slice(&parts);
Ok(BBox(bbox))
}
}

#[derive(Serialize, Deserialize, Debug)]
pub struct Polygon;

#[cfg(test)]
mod tests {

use std::str::FromStr;

use super::*;

#[test]
fn bbox_should_parse_if_correct() {
let bbox = BBox::from_str("0.0,1.0,2.0,3.0");
Expand Down
49 changes: 49 additions & 0 deletions src/display.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,57 @@
use comfy_table::{presets::NOTHING, *};
use itertools::izip;

use polars::{frame::DataFrame, prelude::SortMultipleOptions};
use popgetter::{search::SearchResults, COL};

pub fn display_countries(countries: DataFrame, max_results: Option<usize>) -> anyhow::Result<()> {
let df_to_show = match max_results {
Some(max) => countries.head(Some(max)),
None => countries,
};
let df_to_show = df_to_show.sort([COL::COUNTRY_ID], SortMultipleOptions::default())?;
let mut table = Table::new();
table
.load_preset(NOTHING)
.set_content_arrangement(ContentArrangement::Dynamic)
.set_header(vec![
Cell::new("Country ID").add_attribute(Attribute::Bold),
Cell::new("Country Name (official)").add_attribute(Attribute::Bold),
Cell::new("Country Name (short)").add_attribute(Attribute::Bold),
Cell::new("ISO3116-1 alpha-3").add_attribute(Attribute::Bold),
Cell::new("ISO3116-2").add_attribute(Attribute::Bold),
])
.set_style(comfy_table::TableComponent::BottomBorder, '─')
.set_style(comfy_table::TableComponent::MiddleHeaderIntersections, '─')
.set_style(comfy_table::TableComponent::HeaderLines, '─')
.set_style(comfy_table::TableComponent::BottomBorderIntersections, '─')
.set_style(comfy_table::TableComponent::TopBorder, '─')
.set_style(comfy_table::TableComponent::TopBorderIntersections, '─');
for (
country_id,
country_name_official,
country_name_short_en,
countr_iso3,
country_iso3116_2,
) in izip!(
df_to_show.column(COL::COUNTRY_ID)?.str()?,
df_to_show.column(COL::COUNTRY_NAME_OFFICIAL)?.str()?,
df_to_show.column(COL::COUNTRY_NAME_SHORT_EN)?.str()?,
df_to_show.column(COL::COUNTRY_ISO3)?.str()?,
df_to_show.column(COL::COUNTRY_ISO3166_2)?.str()?,
) {
table.add_row(vec![
country_id.unwrap_or_default(),
country_name_official.unwrap_or_default(),
country_name_short_en.unwrap_or_default(),
countr_iso3.unwrap_or_default(),
country_iso3116_2.unwrap_or_default(),
]);
}
println!("\n{}", table);
Ok(())
}

pub fn display_search_results(
results: SearchResults,
max_results: Option<usize>,
Expand Down
4 changes: 2 additions & 2 deletions src/geo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use std::{
/// `bbox`: an optional bounding box to filter the features by
///
/// Returns: a Result object containing a vector of (geometry, properties).
pub async fn get_geometries(file_url: &str, bbox: Option<&BBox>) -> Result<DataFrame> {
pub async fn get_geometries(file_url: &str, bbox: Option<BBox>) -> Result<DataFrame> {
let fgb = HttpFgbReader::open(file_url).await?;

let mut fgb = if let Some(bbox) = bbox {
Expand Down Expand Up @@ -235,7 +235,7 @@ mod tests {
-1.373_095_490_899_146_4,
53.026_908_220_355_35,
]);
let geoms = get_geometries(&server.url("/fgb_example.fgb"), Some(&bbox)).await;
let geoms = get_geometries(&server.url("/fgb_example.fgb"), Some(bbox)).await;

assert!(geoms.is_ok(), "The geometry call should not error");
let geoms = geoms.unwrap();
Expand Down
5 changes: 2 additions & 3 deletions src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,9 @@ impl CountryMetadataLoader {
}
}

pub async fn get_country_names(config: &Config) -> anyhow::Result<Vec<String>> {
let country_text_file = format!("{}/countries.txt", config.base_path);
async fn get_country_names(config: &Config) -> anyhow::Result<Vec<String>> {
Ok(reqwest::Client::new()
.get(&country_text_file)
.get(&format!("{}/countries.txt", config.base_path))
.send()
.await?
.text()
Expand Down
11 changes: 7 additions & 4 deletions src/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[&str]>) -> Resul
// generally true
let mut joined_df: Option<DataFrame> = None;

// Merge the dataframes from each remove file in to a single
// dataframe
// Merge the dataframes from each remove file in to a single dataframe
for df in dfs? {
if let Some(prev_dfs) = joined_df {
joined_df = Some(prev_dfs.join(
Expand All @@ -82,8 +81,12 @@ pub fn get_metrics(metrics: &[MetricRequest], geo_ids: Option<&[&str]>) -> Resul
joined_df = Some(df.clone());
}
}

joined_df.with_context(|| "Failed to combine data queries")
// Return if None, or return df with COL::GEO_ID first
Ok(joined_df
.with_context(|| "Failed to combine data queries")?
.lazy()
.select(&[col(COL::GEO_ID), col("*").exclude([COL::GEO_ID])])
.collect()?)
}

#[cfg(test)]
Expand Down
24 changes: 16 additions & 8 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::{
config::Config,
data_request_spec::DataRequestSpec,
geo::{get_geometries, BBox},
geo::get_geometries,
metadata::ExpandedMetadata,
parquet::{get_metrics, MetricRequest},
COL,
Expand Down Expand Up @@ -308,11 +308,6 @@ impl From<SearchParams> for Option<Expr> {
#[derive(Clone, Debug)]
pub struct SearchResults(pub DataFrame);

pub(crate) struct DownloadConfig {
pub bbox: Option<BBox>,
pub include_geometry: bool,
}

impl SearchResults {
/// Convert all the metrics in the dataframe to MetricRequests
pub fn to_metric_requests(self, config: &Config) -> Vec<MetricRequest> {
Expand Down Expand Up @@ -374,12 +369,25 @@ impl SearchResults {

// TODO Handle multiple responses
if all_geom_files.len() > 1 {
panic!("Multiple geometries not yet supported");
todo!("Multiple geometries not yet supported");
}

let result = if data_request_spec.geometry.include_geoms {
// TODO Pass in the bbox as the second argument here
let geoms = get_geometries(all_geom_files.iter().next().unwrap(), None);
if data_request_spec.region.len() > 1 {
todo!(
"Multiple region specifications are not yet supported: {:#?}",
data_request_spec.region
);
}
debug!("{:#?}", data_request_spec.region.first().unwrap().bbox());
let geoms = get_geometries(
all_geom_files.iter().next().unwrap(),
data_request_spec
.region
.first()
.and_then(|region_spec| region_spec.bbox().clone()),
);

// try_join requires us to have the errors from all futures be the same.
// We use anyhow to get it back properly
Expand Down

0 comments on commit c6bb5aa

Please sign in to comment.