From 5ff4e00d27f885747b61bd7753bfc4c535d0b83f Mon Sep 17 00:00:00 2001 From: Frank Elsinga Date: Thu, 15 Aug 2024 22:32:53 +0200 Subject: [PATCH] Exploded data storage (#1405) migrated partially to the exploded schema added a migration to explode the schema Necessary to migrate to a parquet based initialisation --- ...2ebc44b6c9061b717e78a97c0a9549b5a84f.json} | 34 ++----- ...15374ee3368f45250987475cce81910eec57.json} | 34 ++----- ...15200325_exploded-room-data-model.down.sql | 20 ++++ ...0815200325_exploded-room-data-model.up.sql | 99 +++++++++++++++++++ server/src/calendar/models.rs | 16 --- server/src/locations/preview.rs | 41 +++++--- server/src/maps/overlay_map.rs | 33 +++---- server/src/models.rs | 15 --- 8 files changed, 180 insertions(+), 112 deletions(-) rename server/.sqlx/{query-39cdd385e12341d2a7f05c50d9c399f6f367e997fd3a7da6c1c48a288202e82d.json => query-1225dec3f33fb9e257be062530e02ebc44b6c9061b717e78a97c0a9549b5a84f.json} (56%) rename server/.sqlx/{query-77fcf65552da24afce976f79421934413c43f978f3608a08883919e3d3a983bf.json => query-4db847e83ba3061db424551812c815374ee3368f45250987475cce81910eec57.json} (56%) create mode 100644 server/migrations/20240815200325_exploded-room-data-model.down.sql create mode 100644 server/migrations/20240815200325_exploded-room-data-model.up.sql diff --git a/server/.sqlx/query-39cdd385e12341d2a7f05c50d9c399f6f367e997fd3a7da6c1c48a288202e82d.json b/server/.sqlx/query-1225dec3f33fb9e257be062530e02ebc44b6c9061b717e78a97c0a9549b5a84f.json similarity index 56% rename from server/.sqlx/query-39cdd385e12341d2a7f05c50d9c399f6f367e997fd3a7da6c1c48a288202e82d.json rename to server/.sqlx/query-1225dec3f33fb9e257be062530e02ebc44b6c9061b717e78a97c0a9549b5a84f.json index 2d3ac985a..b3ea124ae 100644 --- a/server/.sqlx/query-39cdd385e12341d2a7f05c50d9c399f6f367e997fd3a7da6c1c48a288202e82d.json +++ b/server/.sqlx/query-1225dec3f33fb9e257be062530e02ebc44b6c9061b717e78a97c0a9549b5a84f.json @@ -1,47 +1,32 @@ { "db_name": "PostgreSQL", - "query": "SELECT key,name,last_calendar_scrape_at,calendar_url,type,type_common_name,lat,lon FROM en WHERE key = $1", + "query": "SELECT type,lat,lon,name,type_common_name FROM de WHERE key = $1", "describe": { "columns": [ { "ordinal": 0, - "name": "key", + "name": "type", "type_info": "Text" }, { "ordinal": 1, - "name": "name", - "type_info": "Text" + "name": "lat", + "type_info": "Float8" }, { "ordinal": 2, - "name": "last_calendar_scrape_at", - "type_info": "Timestamptz" + "name": "lon", + "type_info": "Float8" }, { "ordinal": 3, - "name": "calendar_url", + "name": "name", "type_info": "Text" }, { "ordinal": 4, - "name": "type", - "type_info": "Text" - }, - { - "ordinal": 5, "name": "type_common_name", "type_info": "Text" - }, - { - "ordinal": 6, - "name": "lat", - "type_info": "Float8" - }, - { - "ordinal": 7, - "name": "lon", - "type_info": "Float8" } ], "parameters": { @@ -52,13 +37,10 @@ "nullable": [ false, false, - true, - true, - false, false, false, false ] }, - "hash": "39cdd385e12341d2a7f05c50d9c399f6f367e997fd3a7da6c1c48a288202e82d" + "hash": "1225dec3f33fb9e257be062530e02ebc44b6c9061b717e78a97c0a9549b5a84f" } diff --git a/server/.sqlx/query-77fcf65552da24afce976f79421934413c43f978f3608a08883919e3d3a983bf.json b/server/.sqlx/query-4db847e83ba3061db424551812c815374ee3368f45250987475cce81910eec57.json similarity index 56% rename from server/.sqlx/query-77fcf65552da24afce976f79421934413c43f978f3608a08883919e3d3a983bf.json rename to server/.sqlx/query-4db847e83ba3061db424551812c815374ee3368f45250987475cce81910eec57.json index d0083d63f..7d5f17f87 100644 --- a/server/.sqlx/query-77fcf65552da24afce976f79421934413c43f978f3608a08883919e3d3a983bf.json +++ b/server/.sqlx/query-4db847e83ba3061db424551812c815374ee3368f45250987475cce81910eec57.json @@ -1,47 +1,32 @@ { "db_name": "PostgreSQL", - "query": "SELECT key,name,last_calendar_scrape_at,calendar_url,type,type_common_name,lat,lon FROM de WHERE key = $1", + "query": "SELECT type,lat,lon,name,type_common_name FROM en WHERE key = $1", "describe": { "columns": [ { "ordinal": 0, - "name": "key", + "name": "type", "type_info": "Text" }, { "ordinal": 1, - "name": "name", - "type_info": "Text" + "name": "lat", + "type_info": "Float8" }, { "ordinal": 2, - "name": "last_calendar_scrape_at", - "type_info": "Timestamptz" + "name": "lon", + "type_info": "Float8" }, { "ordinal": 3, - "name": "calendar_url", + "name": "name", "type_info": "Text" }, { "ordinal": 4, - "name": "type", - "type_info": "Text" - }, - { - "ordinal": 5, "name": "type_common_name", "type_info": "Text" - }, - { - "ordinal": 6, - "name": "lat", - "type_info": "Float8" - }, - { - "ordinal": 7, - "name": "lon", - "type_info": "Float8" } ], "parameters": { @@ -52,13 +37,10 @@ "nullable": [ false, false, - true, - true, - false, false, false, false ] }, - "hash": "77fcf65552da24afce976f79421934413c43f978f3608a08883919e3d3a983bf" + "hash": "4db847e83ba3061db424551812c815374ee3368f45250987475cce81910eec57" } diff --git a/server/migrations/20240815200325_exploded-room-data-model.down.sql b/server/migrations/20240815200325_exploded-room-data-model.down.sql new file mode 100644 index 000000000..f2712a7f6 --- /dev/null +++ b/server/migrations/20240815200325_exploded-room-data-model.down.sql @@ -0,0 +1,20 @@ +-- Add down migration script here + +alter table de drop column coordinate_source; +alter table en drop column coordinate_source; +alter table de drop column rank_type; +alter table en drop column rank_type; +alter table de drop column rank_combined; +alter table en drop column rank_combined; +alter table de drop column rank_usage; +alter table en drop column rank_usage; +alter table de drop column comment; +alter table en drop column comment; + +DROP MATERIALIZED VIEW operators_de; +DROP MATERIALIZED VIEW operators_en; +DROP MATERIALIZED VIEW usage; +DROP MATERIALIZED VIEW computed_properties; +DROP MATERIALIZED VIEW urls_de; +DROP MATERIALIZED VIEW urls_en; +DROP MATERIALIZED VIEW sources; diff --git a/server/migrations/20240815200325_exploded-room-data-model.up.sql b/server/migrations/20240815200325_exploded-room-data-model.up.sql new file mode 100644 index 000000000..701df7eb6 --- /dev/null +++ b/server/migrations/20240815200325_exploded-room-data-model.up.sql @@ -0,0 +1,99 @@ +-- Add up migration script here + +alter table de + add coordinate_accuracy text generated always as ((((data -> 'coords'::text) ->> 'accuracy'::text))::text) stored null; +alter table en + add coordinate_accuracy text generated always as ((((data -> 'coords'::text) ->> 'accuracy'::text))::text) stored null; +alter table de + add coordinate_source text generated always as ((((data -> 'coords'::text) ->> 'source'::text))::text) stored not null; +alter table en + add coordinate_source text generated always as ((((data -> 'coords'::text) ->> 'source'::text))::text) stored not null; +alter table de + add comment text generated always as (((data -> 'props'::text) ->> 'comment'::text)::text) stored null; +alter table en + add comment text generated always as (((data -> 'props'::text) ->> 'comment'::text)::text) stored null; + +CREATE MATERIALIZED VIEW ranking_factors AS +SELECT DISTINCT + data -> 'id' as id, + data -> 'ranking_factors' ->> 'rank_type' as rank_type, + data -> 'ranking_factors' ->> 'rank_combined' as rank_combined, + data -> 'ranking_factors' ->> 'rank_usage' as rank_usage, + data -> 'ranking_factors' ->> 'rank_custom' as rank_custom, + data -> 'ranking_factors' ->> 'rank_boost' as rank_boost +from de; + +CREATE MATERIALIZED VIEW operators_de AS +SELECT DISTINCT data -> 'props' -> 'operator' ->> 'id' as id, + data -> 'props' -> 'operator' ->> 'url' as url, + data -> 'props' -> 'operator' ->> 'code' as code, + data -> 'props' -> 'operator' ->> 'name' as name +from de; + +CREATE MATERIALIZED VIEW operators_en AS +SELECT DISTINCT data -> 'props' -> 'operator' ->> 'id' as id, + data -> 'props' -> 'operator' ->> 'url' as url, + data -> 'props' -> 'operator' ->> 'code' as code, + data -> 'props' -> 'operator' ->> 'name' as name +from en; + +CREATE MATERIALIZED VIEW usage AS +SELECT DISTINCT data -> 'usage' ->> 'name' as name, + data -> 'usage' ->> 'din_277' as din_277, + data -> 'usage' ->> 'din_277_desc' as din_277_desc +from de +UNION +DISTINCT +SELECT DISTINCT data -> 'usage' ->> 'name' as name, + data -> 'usage' ->> 'din_277' as din_277, + data -> 'usage' ->> 'din_277_desc' as din_277_desc +from en; + +CREATE MATERIALIZED VIEW computed_properties as +( +with facts(key, fact) as (SELECT key, JSON_ARRAY_ELEMENTS((data -> 'props' -> 'computed')::json) as fact + from de), + extracted_facts(key, name, value) as (Select key, fact ->> 'name' as name, fact ->> 'text' as value + From facts) + +select distinct f.key, + room_keys.value as room_key, + address.value as address, + level.value as level, + arch_name.value as arch_name, + room_cnt.value as room_cnt, + building_cnt.value as building_cnt +from extracted_facts f + left outer join extracted_facts room_keys on f.key = room_keys.key and room_keys.name = 'Gebäudekennungen' + left outer join extracted_facts address on f.key = address.key and address.name = 'Adresse' + left outer join extracted_facts level on f.key = level.key and level.name = 'Stockwerk' + left outer join extracted_facts arch_name on f.key = arch_name.key and arch_name.name = 'Architekten-Name' + left outer join extracted_facts room_cnt on f.key = room_cnt.key and room_cnt.name = 'Anzahl Räume' + left outer join extracted_facts building_cnt + on f.key = building_cnt.key and building_cnt.name = 'Anzahl Gebäude' + ); + +CREATE MATERIALIZED VIEW urls_de as +( +with unrolled_urls(key, url) as (SELECT key, JSON_ARRAY_ELEMENTS((data -> 'props' ->> 'links')::json) as url + from de) +SELECT key, url ->> 'url' as url, url ->> 'text' as text +FROM unrolled_urls); + +CREATE MATERIALIZED VIEW urls_en as +( +with unrolled_urls(key, url) as (SELECT key, JSON_ARRAY_ELEMENTS((data -> 'props' ->> 'links')::json) as url + from en) +SELECT key, url ->> 'url' as url, url ->> 'text' as text +FROM unrolled_urls); + +CREATE MATERIALIZED VIEW sources as +( +with unrolled_sources(key, source) as (SELECT key, + JSON_ARRAY_ELEMENTS((data -> 'sources' -> 'base')::json) as source + from de) +SELECT key, + source ->> 'url' as url, + source ->> 'name' as name +FROM unrolled_sources +ORDER BY key, source ->> 'name'); diff --git a/server/src/calendar/models.rs b/server/src/calendar/models.rs index bb06b6a98..37d8e911f 100644 --- a/server/src/calendar/models.rs +++ b/server/src/calendar/models.rs @@ -2,8 +2,6 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use std::fmt::Display; -use crate::models::Location; - #[derive(Serialize, Deserialize, Clone, Debug)] pub(super) struct CalendarLocation { pub key: String, @@ -13,20 +11,6 @@ pub(super) struct CalendarLocation { pub type_common_name: String, pub r#type: String, } - -impl From for CalendarLocation { - fn from(loc: Location) -> Self { - Self { - key: loc.key, - name: loc.name, - last_calendar_scrape_at: loc.last_calendar_scrape_at, - calendar_url: loc.calendar_url, - type_common_name: loc.type_common_name, - r#type: loc.r#type, - } - } -} - #[derive(Serialize, Deserialize, Clone, Debug)] pub(super) struct LocationEvents { pub(super) events: Vec, diff --git a/server/src/locations/preview.rs b/server/src/locations/preview.rs index 073230734..bd9f4258c 100644 --- a/server/src/locations/preview.rs +++ b/server/src/locations/preview.rs @@ -13,9 +13,17 @@ use crate::limited::vec::LimitedVec; use crate::localisation; use crate::maps::overlay_map::OverlayMapTask; use crate::maps::overlay_text::{OverlayText, CANTARELL_BOLD, CANTARELL_REGULAR}; -use crate::models::Location; use crate::models::LocationKeyAlias; +#[derive(Debug)] +struct Location { + name: String, + r#type: String, + type_common_name: String, + lat: f64, + lon: f64, +} + #[tracing::instrument(skip(pool))] async fn get_localised_data( pool: &PgPool, @@ -23,21 +31,29 @@ async fn get_localised_data( should_use_english: bool, ) -> Result { let result = if should_use_english { - sqlx::query_as!(Location, "SELECT key,name,last_calendar_scrape_at,calendar_url,type,type_common_name,lat,lon FROM en WHERE key = $1", id) - .fetch_all(pool) - .await + sqlx::query_as!( + Location, + "SELECT type,lat,lon,name,type_common_name FROM en WHERE key = $1", + id + ) + .fetch_all(pool) + .await } else { - sqlx::query_as!(Location, "SELECT key,name,last_calendar_scrape_at,calendar_url,type,type_common_name,lat,lon FROM de WHERE key = $1", id) - .fetch_all(pool) - .await + sqlx::query_as!( + Location, + "SELECT type,lat,lon,name,type_common_name FROM de WHERE key = $1", + id + ) + .fetch_all(pool) + .await }; match result { - Ok(r) => match r.len() { - 0 => Err(HttpResponse::NotFound() + Ok(mut r) => match r.pop() { + None => Err(HttpResponse::NotFound() .content_type("text/plain") .body("Not found")), - _ => Ok(r[0].clone()), + Some(item) => Ok(item), }, Err(e) => { error!("Error preparing statement: {e:?}"); @@ -59,7 +75,10 @@ async fn construct_image_from_data( }; // add the map - if !OverlayMapTask::from(&data).draw_onto(&mut img).await { + if !OverlayMapTask::new(&data.r#type, data.lat, data.lon) + .draw_onto(&mut img) + .await + { return None; } draw_pin(&mut img); diff --git a/server/src/maps/overlay_map.rs b/server/src/maps/overlay_map.rs index 3bcb61c51..632283726 100644 --- a/server/src/maps/overlay_map.rs +++ b/server/src/maps/overlay_map.rs @@ -5,7 +5,6 @@ use futures::{stream::FuturesUnordered, StreamExt}; use tracing::warn; use crate::maps::fetch_tile::FetchTileTask; -use crate::models::Location; pub struct OverlayMapTask { pub x: f64, @@ -13,23 +12,6 @@ pub struct OverlayMapTask { pub z: u32, } -impl From<&Location> for OverlayMapTask { - fn from(entry: &Location) -> Self { - let zoom = match entry.r#type.as_str() { - "campus" => 14, - "area" | "site" => 15, - "building" | "joined_building" => 16, - "virtual_room" | "room" | "poi" => 17, - _ => { - warn!("map generation encountered an type for {entry:?}. Assuming it to be a building"); - 16 - } - }; - let (x, y, z) = lat_lon_z_to_xyz(entry.lat, entry.lon, zoom); - Self { x, y, z } - } -} - impl fmt::Debug for OverlayMapTask { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("OverlayMapTask") @@ -43,6 +25,21 @@ impl fmt::Debug for OverlayMapTask { const POSSIBLE_INDEX_RANGE: Range = 0..7; impl OverlayMapTask { + pub fn new(r#type: &str, lat: f64, lon: f64) -> Self { + let zoom = match r#type { + "campus" => 14, + "area" | "site" => 15, + "building" | "joined_building" => 16, + "virtual_room" | "room" | "poi" => 17, + entry => { + warn!("map generation encountered an type for {entry:?}. Assuming it to be a building"); + 16 + } + }; + let (x, y, z) = lat_lon_z_to_xyz(lat, lon, zoom); + Self { x, y, z } + } + #[tracing::instrument(skip(img))] pub async fn draw_onto(&self, img: &mut image::RgbaImage) -> bool { // coordinate system is centered around the center of the image diff --git a/server/src/models.rs b/server/src/models.rs index 17e0c26f6..6327a06ff 100644 --- a/server/src/models.rs +++ b/server/src/models.rs @@ -1,18 +1,3 @@ -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Location { - pub key: String, - pub name: String, - pub last_calendar_scrape_at: Option>, - pub calendar_url: Option, - pub r#type: String, - pub type_common_name: String, - pub lat: f64, - pub lon: f64, -} - #[derive(Debug, Clone)] #[allow(dead_code)] // false positive. Clippy can't detect this due to macros pub struct LocationKeyAlias {