Skip to content

Commit

Permalink
CSV output formatter working
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartlynn committed May 10, 2024
1 parent 540f683 commit 38a076e
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 7 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,9 @@ enum_dispatch = "0.3"
flatgeobuf = "4.1.0"
geozero = {version = "0.12.0", features= []}
httpmock = "0.7.0-rc.1"
geojson={version="0.24.1", optional=true }


[features]
default = ["formatters"]
formatters= ["dep:geojson"]
27 changes: 22 additions & 5 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use anyhow::Result;
use clap::{Args, Parser, Subcommand};
use enum_dispatch::enum_dispatch;
use popgetter::{
data_request_spec::{BBox, DataRequestSpec, MetricSpec, RegionSpec},
Popgetter,
data_request_spec::{BBox, DataRequestSpec, MetricSpec, RegionSpec},
formatters::{CSVFormatter, GeoJSONFormatter, OutputFormatter, OutputGenerator}, Popgetter
};
use serde::{Deserialize, Serialize};
use std::str::FromStr;
use std::{fs::File, str::FromStr};
use strum_macros::EnumString;

/// Defines the output formats we are able to produce data in.
Expand Down Expand Up @@ -43,16 +43,33 @@ pub struct DataCommand {
#[arg(short, long)]
metrics: Option<String>,
/// Specify output format
#[arg(short, long)]
#[arg(short='f', long)]
output_format: OutputFormat,

#[arg(short='o',long)]
output_file: String
}

impl RunCommand for DataCommand {
async fn run(&self) -> Result<()> {
let popgetter = Popgetter::new()?;
let data_request = DataRequestSpec::from(self);
let results = popgetter.get_data_request(&data_request).await?;
let mut results = popgetter.get_data_request(&data_request).await?;

let formatter = match(&self.output_format){
OutputFormat::GeoJSON=>{
OutputFormatter::GeoJSON(GeoJSONFormatter::default())
},
OutputFormat::Csv=>{
OutputFormatter::Csv(CSVFormatter::default())
},
_=>todo!("output format not implemented")
};

println!("{results:#?}");
let mut f = File::create(&self.output_file)?;
formatter.save(&mut f,&mut results)?;

Ok(())
}
}
Expand Down
76 changes: 76 additions & 0 deletions src/formatters.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use anyhow::Result;
use enum_dispatch::enum_dispatch;
use geojson;
use geojson::{Feature, GeoJson, Geometry, Value};
use polars::prelude::*;
use serde::{Deserialize, Serialize};
use std::io::Cursor;
use std::{convert::TryFrom, io::Write};

#[enum_dispatch]
pub trait OutputGenerator {
fn format(&self, df: &mut DataFrame) -> Result<String>;
fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()>;
}

#[enum_dispatch(OutputGenerator)]
#[derive(Serialize, Deserialize, Debug)]
pub enum OutputFormatter {
GeoJSON(GeoJSONFormatter),
GeoJSONSeq(GeoJSONSeqFormatter),
Csv(CSVFormatter),
}

#[derive(Serialize, Deserialize, Debug)]
pub struct GeoJSONSeqFormatter;

impl OutputGenerator for GeoJSONSeqFormatter {
fn format(&self, df: &mut DataFrame) -> Result<String> {
Ok("Test".into())
}

fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()> {
let output = self.format(df)?;
Ok(())
}
}

#[derive(Serialize, Deserialize, Debug, Default)]
pub struct GeoJSONFormatter;

#[derive(Serialize, Deserialize, Debug)]
pub enum GeoFormat {
Wkb,
Wkt,
}

#[derive(Serialize, Deserialize, Debug, Default)]
pub struct CSVFormatter {
geo_format: Option<GeoFormat>,
}

impl OutputGenerator for CSVFormatter {
fn format(&self, df: &mut DataFrame) -> Result<String> {
let mut data: Vec<u8> = vec![0; 200];
let mut buff = Cursor::new(&mut data);
self.save(&mut buff, df)?;

Ok(String::from_utf8(data)?)
}

fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()> {
CsvWriter::new(writer).finish(df)?;
Ok(())
}
}

impl OutputGenerator for GeoJSONFormatter {
fn format(&self, df: &mut DataFrame) -> Result<String> {
Ok("Test".into())
}

fn save(&self, writer: &mut impl Write, df: &mut DataFrame) -> Result<()> {
let output = self.format(df)?;
Ok(())
}
}
11 changes: 9 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ pub mod geo;
pub mod metadata;
pub mod parquet;

#[cfg(feature="formatters")]
pub mod formatters;

pub struct Popgetter {
pub metadata: SourceDataRelease,
}
Expand All @@ -33,13 +36,17 @@ impl Popgetter {
get_metrics(&metric_requests,None)
});

let geoms = get_geometries(&geom_file, None, None);
/// TODO The custom geoid here is because of the legacy US code
/// This should be standardized on future pipeline outputs
let geoms = get_geometries(&geom_file, None, Some("AFFGEOID".into()));

// try_from requires us to have the errors from all futures be the same.
// We use anyhow to get it back properly
let (metrics,geoms) = try_join!(async move { metrics.await.map_err(anyhow::Error::from)}, geoms)?;
println!("geoms {geoms:#?}");
println!("metrics {metrics:#?}");

let result =metrics?.left_join(&geoms,["GEO_ID"],["GEOID"])?;
let result =geoms.inner_join(&metrics?,["GEOID"],["GEO_ID"])?;
Ok(result)
}
}
Expand Down

0 comments on commit 38a076e

Please sign in to comment.