From 38a076e247249cb6a1c90e654f0c77e5ed2e1862 Mon Sep 17 00:00:00 2001 From: Stuart Lynn Date: Fri, 10 May 2024 13:49:04 +0100 Subject: [PATCH] CSV output formatter working --- Cargo.lock | 2 ++ Cargo.toml | 6 ++++ src/cli.rs | 27 +++++++++++++---- src/formatters.rs | 76 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 11 +++++-- 5 files changed, 115 insertions(+), 7 deletions(-) create mode 100644 src/formatters.rs diff --git a/Cargo.lock b/Cargo.lock index 700810b..93f4495 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1180,6 +1180,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5d728c1df1fbf328d74151efe6cb0586f79ee813346ea981add69bd22c9241b" dependencies = [ + "geo-types", "log", "serde", "serde_json", @@ -2588,6 +2589,7 @@ dependencies = [ "clap", "enum_dispatch", "flatgeobuf", + "geojson", "geozero", "httpmock", "polars", diff --git a/Cargo.toml b/Cargo.toml index 520a5b6..b0fb237 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,3 +23,9 @@ enum_dispatch = "0.3" flatgeobuf = "4.1.0" geozero = {version = "0.12.0", features= []} httpmock = "0.7.0-rc.1" +geojson={version="0.24.1", optional=true } + + +[features] +default = ["formatters"] +formatters= ["dep:geojson"] diff --git a/src/cli.rs b/src/cli.rs index 8953899..30f3726 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -4,11 +4,11 @@ use anyhow::Result; use clap::{Args, Parser, Subcommand}; use enum_dispatch::enum_dispatch; use popgetter::{ - data_request_spec::{BBox, DataRequestSpec, MetricSpec, RegionSpec}, - Popgetter, + data_request_spec::{BBox, DataRequestSpec, MetricSpec, RegionSpec}, + formatters::{CSVFormatter, GeoJSONFormatter, OutputFormatter, OutputGenerator}, Popgetter }; use serde::{Deserialize, Serialize}; -use std::str::FromStr; +use std::{fs::File, str::FromStr}; use strum_macros::EnumString; /// Defines the output formats we are able to produce data in. @@ -43,16 +43,33 @@ pub struct DataCommand { #[arg(short, long)] metrics: Option, /// Specify output format - #[arg(short, long)] + #[arg(short='f', long)] output_format: OutputFormat, + + #[arg(short='o',long)] + output_file: String } impl RunCommand for DataCommand { async fn run(&self) -> Result<()> { let popgetter = Popgetter::new()?; let data_request = DataRequestSpec::from(self); - let results = popgetter.get_data_request(&data_request).await?; + let mut results = popgetter.get_data_request(&data_request).await?; + + let formatter = match(&self.output_format){ + OutputFormat::GeoJSON=>{ + OutputFormatter::GeoJSON(GeoJSONFormatter::default()) + }, + OutputFormat::Csv=>{ + OutputFormatter::Csv(CSVFormatter::default()) + }, + _=>todo!("output format not implemented") + }; + println!("{results:#?}"); + let mut f = File::create(&self.output_file)?; + formatter.save(&mut f,&mut results)?; + Ok(()) } } diff --git a/src/formatters.rs b/src/formatters.rs new file mode 100644 index 0000000..2a2cb6a --- /dev/null +++ b/src/formatters.rs @@ -0,0 +1,76 @@ +use anyhow::Result; +use enum_dispatch::enum_dispatch; +use geojson; +use geojson::{Feature, GeoJson, Geometry, Value}; +use polars::prelude::*; +use serde::{Deserialize, Serialize}; +use std::io::Cursor; +use std::{convert::TryFrom, io::Write}; + +#[enum_dispatch] +pub trait OutputGenerator { + fn format(&self, df: &mut DataFrame) -> Result; + fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()>; +} + +#[enum_dispatch(OutputGenerator)] +#[derive(Serialize, Deserialize, Debug)] +pub enum OutputFormatter { + GeoJSON(GeoJSONFormatter), + GeoJSONSeq(GeoJSONSeqFormatter), + Csv(CSVFormatter), +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct GeoJSONSeqFormatter; + +impl OutputGenerator for GeoJSONSeqFormatter { + fn format(&self, df: &mut DataFrame) -> Result { + Ok("Test".into()) + } + + fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()> { + let output = self.format(df)?; + Ok(()) + } +} + +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct GeoJSONFormatter; + +#[derive(Serialize, Deserialize, Debug)] +pub enum GeoFormat { + Wkb, + Wkt, +} + +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct CSVFormatter { + geo_format: Option, +} + +impl OutputGenerator for CSVFormatter { + fn format(&self, df: &mut DataFrame) -> Result { + let mut data: Vec = vec![0; 200]; + let mut buff = Cursor::new(&mut data); + self.save(&mut buff, df)?; + + Ok(String::from_utf8(data)?) + } + + fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()> { + CsvWriter::new(writer).finish(df)?; + Ok(()) + } +} + +impl OutputGenerator for GeoJSONFormatter { + fn format(&self, df: &mut DataFrame) -> Result { + Ok("Test".into()) + } + + fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()> { + let output = self.format(df)?; + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 93938bf..d51948d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,9 @@ pub mod geo; pub mod metadata; pub mod parquet; +#[cfg(feature="formatters")] +pub mod formatters; + pub struct Popgetter { pub metadata: SourceDataRelease, } @@ -33,13 +36,17 @@ impl Popgetter { get_metrics(&metric_requests,None) }); - let geoms = get_geometries(&geom_file, None, None); + /// TODO The custom geoid here is because of the legacy US code + /// This should be standardized on future pipeline outputs + let geoms = get_geometries(&geom_file, None, Some("AFFGEOID".into())); // try_from requires us to have the errors from all futures be the same. // We use anyhow to get it back properly let (metrics,geoms) = try_join!(async move { metrics.await.map_err(anyhow::Error::from)}, geoms)?; + println!("geoms {geoms:#?}"); + println!("metrics {metrics:#?}"); - let result =metrics?.left_join(&geoms,["GEO_ID"],["GEOID"])?; + let result =geoms.inner_join(&metrics?,["GEOID"],["GEO_ID"])?; Ok(result) } }