From a6410a31661c296573ea7b68e2649401ff071368 Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Sun, 4 Aug 2024 02:51:23 +0200 Subject: [PATCH] finalised the parquet based intitalisations first steps --- server/main-api/src/setup/database/alias.rs | 22 +++++++++++++++------ server/main-api/src/setup/database/data.rs | 7 +++++-- server/main-api/src/setup/database/mod.rs | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/server/main-api/src/setup/database/alias.rs b/server/main-api/src/setup/database/alias.rs index adb38456a..047c1cf07 100644 --- a/server/main-api/src/setup/database/alias.rs +++ b/server/main-api/src/setup/database/alias.rs @@ -58,22 +58,28 @@ pub async fn download_updates() -> Result, crate::BoxedError> for index in 0..id_col.len() { let id = id_col.get(index).unwrap(); let r#type = type_col.get(index).unwrap(); - let visible_id = visible_id_col.get(index).unwrap(); + let visible_id = visible_id_col.get(index); + let visible_id = match visible_id { + Some(v) => v.to_string(), + None => id.to_string(), + }; aliase.push(Alias { alias: id.to_string(), key: id.to_string(), r#type: r#type.to_string(), - visible_id: visible_id.to_string(), + visible_id: visible_id.clone(), }); aliase.push(Alias { - alias: visible_id.to_string(), + alias: visible_id.clone(), key: id.to_string(), r#type: r#type.to_string(), - visible_id: visible_id.to_string(), + visible_id: visible_id.clone(), }); } let df_expanded = df.explode(["aliases"])?; + let mask = df_expanded.column("aliases")?.is_not_null(); + let df_expanded = df_expanded.filter(&mask)?; let id_col = df_expanded.column("id")?.str()?; let type_col = df_expanded.column("type")?.str()?; let visible_id_col = df_expanded.column("visible_id")?.str()?; @@ -82,12 +88,16 @@ pub async fn download_updates() -> Result, crate::BoxedError> let alias = aliases_col.get(index).unwrap(); let id = id_col.get(index).unwrap(); let r#type = type_col.get(index).unwrap(); - let visible_id = visible_id_col.get(index).unwrap(); + let visible_id = visible_id_col.get(index); + let visible_id = match visible_id { + Some(v) => v.to_string(), + None => id.to_string(), + }; aliase.push(Alias { alias: alias.to_string(), key: id.to_string(), r#type: r#type.to_string(), - visible_id: visible_id.to_string(), + visible_id, }); } Ok(LimitedVec(aliase)) diff --git a/server/main-api/src/setup/database/data.rs b/server/main-api/src/setup/database/data.rs index efc9754db..789922091 100644 --- a/server/main-api/src/setup/database/data.rs +++ b/server/main-api/src/setup/database/data.rs @@ -150,7 +150,7 @@ pub(super) async fn load_all_to_db( Ok(()) } #[tracing::instrument] -pub async fn download_status() -> Result<(LimitedVec,LimitedVec), crate::BoxedError> { +pub async fn download_status() -> Result<(LimitedVec, LimitedVec), crate::BoxedError> { let cdn_url = std::env::var("CDN_URL").unwrap_or_else(|_| "https://nav.tum.de/cdn".to_string()); let body = reqwest::get(format!("{cdn_url}/status_data.parquet")) .await? @@ -161,7 +161,10 @@ pub async fn download_status() -> Result<(LimitedVec,LimitedVec), c file.write_all(&body)?; let df = ParquetReader::new(&mut file).finish().unwrap(); let id_col = Vec::from(df.column("id")?.str()?); - let id_col=id_col.into_iter().filter_map(|s| s.map(String::from)).collect(); + let id_col = id_col + .into_iter() + .filter_map(|s| s.map(String::from)) + .collect(); let hash_col = Vec::from(df.column("hash")?.i64()?); let hash_col = hash_col.into_iter().flatten().collect(); Ok((LimitedVec(id_col), LimitedVec(hash_col))) diff --git a/server/main-api/src/setup/database/mod.rs b/server/main-api/src/setup/database/mod.rs index 9f18d4439..aa763b4e1 100644 --- a/server/main-api/src/setup/database/mod.rs +++ b/server/main-api/src/setup/database/mod.rs @@ -14,7 +14,7 @@ pub async fn setup(pool: &sqlx::PgPool) -> Result<(), crate::BoxedError> { } #[tracing::instrument(skip(pool))] pub async fn load_data(pool: &sqlx::PgPool) -> Result<(), crate::BoxedError> { - let (new_keys,new_hashes) = data::download_status().await?; + let (new_keys, new_hashes) = data::download_status().await?; { let _ = info_span!("deleting old data").enter(); let mut tx = pool.begin().await?;