From e3e6b3563f833586b26d3ea19102956aaf3fe2c5 Mon Sep 17 00:00:00 2001 From: Binabh Date: Sun, 13 Aug 2023 07:51:23 +0545 Subject: [PATCH] adds support for id_column in tables for auto_publish (#790) Resolves #682 - [x] Get id_column string from config.yaml and use for id column - [x] Support for list of strings - [x] Add info/warnings if column is not there or is of wrong type - [x] if column for the feature ID is found, remove it from properties (see inline comment) - [x] cleanup logging messages - [x] need more tests to catch other edge cases --------- Co-authored-by: Yuri Astrakhan --- docs/src/config-file.md | 4 ++ justfile | 4 ++ martin/src/pg/config.rs | 6 +++ martin/src/pg/configurator.rs | 59 +++++++++++++++++++++- tests/config.yaml | 1 + tests/expected/auto/catalog_auto.json | 5 ++ tests/expected/configured/catalog_cfg.json | 5 ++ tests/expected/generated_config.yaml | 12 +++++ tests/expected/given_config.yaml | 22 +++++++- tests/fixtures/tables/autodetect.sql | 57 +++++++++++++++++++++ 10 files changed, 173 insertions(+), 2 deletions(-) diff --git a/docs/src/config-file.md b/docs/src/config-file.md index 8039aa714..c10bc97c2 100644 --- a/docs/src/config-file.md +++ b/docs/src/config-file.md @@ -67,6 +67,10 @@ postgres: source_id_format: 'table.{schema}.{table}.{column}' # Add more schemas to the ones listed above from_schemas: my_other_schema + # A table column to use as the feature ID + # If a table has no column with this name, `id_column` will not be set for that table. + # If a list of strings is given, the first found column will be treated as a feature ID. + id_columns: feature_id functions: # Optionally set how source ID should be generated based on the function's name and schema source_id_format: '{schema}.{function}' diff --git a/justfile b/justfile index 2b4431b7d..a02b4b0ea 100644 --- a/justfile +++ b/justfile @@ -29,6 +29,10 @@ debug-page *ARGS: start psql *ARGS: psql {{ ARGS }} {{ DATABASE_URL }} +# Run PSQL utility against the test database +pg_dump *ARGS: + pg_dump {{ ARGS }} {{ DATABASE_URL }} + # Perform cargo clean to delete all build files clean: clean-test stop cargo clean diff --git a/martin/src/pg/config.rs b/martin/src/pg/config.rs index 4880fd24d..85df68e91 100644 --- a/martin/src/pg/config.rs +++ b/martin/src/pg/config.rs @@ -76,6 +76,12 @@ pub struct PgCfgPublishType { #[serde(skip_serializing_if = "Option::is_none")] #[serde(alias = "id_format")] pub source_id_format: Option, + /// A table column to use as the feature ID + /// If a table has no column with this name, `id_column` will not be set for that table. + /// If a list of strings is given, the first found column will be treated as a feature ID. + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(alias = "id_column")] + pub id_columns: Option>, } impl PgConfig { diff --git a/martin/src/pg/configurator.rs b/martin/src/pg/configurator.rs index e7b2147b2..4890a5acf 100755 --- a/martin/src/pg/configurator.rs +++ b/martin/src/pg/configurator.rs @@ -14,7 +14,7 @@ use crate::pg::pool::PgPool; use crate::pg::table_source::{ calc_srid, merge_table_info, query_available_tables, table_to_query, }; -use crate::pg::utils::{find_info, normalize_key, InfoMap}; +use crate::pg::utils::{find_info, find_kv_ignore_case, normalize_key, InfoMap}; use crate::pg::PgError::InvalidTableExtent; use crate::pg::Result; use crate::source::Sources; @@ -27,6 +27,7 @@ pub type SqlTableInfoMapMapMap = InfoMap>>; pub struct PgBuilderAuto { source_id_format: String, schemas: Option>, + id_columns: Option>, } #[derive(Debug)] @@ -122,6 +123,7 @@ impl PgBuilder { let id2 = self.resolve_id(&source_id, &db_inf); let Some(srid) = calc_srid(&db_inf.format_id(), &id2, db_inf.srid, 0, self.default_srid) else { continue }; db_inf.srid = srid; + update_id_column(&id2, &mut db_inf, auto_tables); info!("Discovered source {id2} from {}", summary(&db_inf)); pending.push(table_to_query( id2, @@ -232,12 +234,57 @@ impl PgBuilder { } } +/// Try to find any ID column in a list of table columns (properties) that match one of the given `id_column` values. +/// If found, modify `id_column` value on the table info. +fn update_id_column(id: &str, inf: &mut TableInfo, auto_tables: &PgBuilderAuto) { + let Some(props) = inf.properties.as_mut() else { return }; + let Some(try_columns) = &auto_tables.id_columns else { return }; + + for key in try_columns.iter() { + let (column, typ) = if let Some(typ) = props.get(key) { + (key, typ) + } else { + match find_kv_ignore_case(props, key) { + Ok(Some(result)) => { + info!("For source {id}, id_column '{key}' was not found, but found '{result}' instead."); + (result, props.get(result).unwrap()) + } + Ok(None) => continue, + Err(multiple) => { + error!("Unable to configure source {id} because id_column '{key}' has no exact match or more than one potential matches: {}", multiple.join(", ")); + continue; + } + } + }; + // ID column can be any integer type as defined in + // https://github.com/postgis/postgis/blob/559c95d85564fb74fa9e3b7eafb74851810610da/postgis/mvt.c#L387C4-L387C66 + if typ != "int4" && typ != "int8" && typ != "int2" { + warn!("Unable to use column `{key}` in table {}.{} as a tile feature ID because it has a non-integer type `{typ}`.", inf.schema, inf.table); + continue; + } + + inf.id_column = Some(column.to_string()); + let mut final_props = props.clone(); + final_props.remove(column); + inf.properties = Some(final_props); + return; + } + + info!( + "No ID column found for table {}.{} - searched for an integer column named {}.", + inf.schema, + inf.table, + try_columns.join(", ") + ); +} + fn new_auto_publish(config: &PgConfig, is_function: bool) -> Option { let default_id_fmt = |is_func| (if is_func { "{function}" } else { "{table}" }).to_string(); let default = |schemas| { Some(PgBuilderAuto { source_id_format: default_id_fmt(is_function), schemas, + id_columns: None, }) }; @@ -252,6 +299,14 @@ fn new_auto_publish(config: &PgConfig, is_function: bool) -> Option default(merge_opt_hs(&a.from_schemas, &None)), BoolOrObject::Bool(false) => None, @@ -287,6 +342,7 @@ fn summary(info: &TableInfo) -> String { Some(true) => "view", _ => "table", }; + // TODO: add column_id to the summary if it is set format!( "{relkind} {}.{} with {} column ({}, SRID={})", info.schema, @@ -331,6 +387,7 @@ mod tests { Some(PgBuilderAuto { source_id_format: source_id_format.to_string(), schemas: schemas.map(|s| s.iter().map(|s| (*s).to_string()).collect()), + id_columns: None, }) } diff --git a/tests/config.yaml b/tests/config.yaml index 41c6a99dc..29c06eec7 100644 --- a/tests/config.yaml +++ b/tests/config.yaml @@ -22,6 +22,7 @@ postgres: auto_publish: tables: from_schemas: autodetect + id_columns: [feat_id, big_feat_id] # Associative arrays of table sources tables: diff --git a/tests/expected/auto/catalog_auto.json b/tests/expected/auto/catalog_auto.json index 40741ccf9..bbb9bd143 100644 --- a/tests/expected/auto/catalog_auto.json +++ b/tests/expected/auto/catalog_auto.json @@ -9,6 +9,11 @@ "content_type": "application/x-protobuf", "description": "autodetect.auto_table.geom" }, + { + "id": "bigint_table", + "content_type": "application/x-protobuf", + "description": "autodetect.bigint_table.geom" + }, { "id": "function_Mixed_Name", "content_type": "application/x-protobuf", diff --git a/tests/expected/configured/catalog_cfg.json b/tests/expected/configured/catalog_cfg.json index 1df36eae6..6272c6307 100644 --- a/tests/expected/configured/catalog_cfg.json +++ b/tests/expected/configured/catalog_cfg.json @@ -9,6 +9,11 @@ "content_type": "application/x-protobuf", "description": "autodetect.auto_table.geom" }, + { + "id": "bigint_table", + "content_type": "application/x-protobuf", + "description": "autodetect.bigint_table.geom" + }, { "id": "function_zxy_query", "content_type": "application/x-protobuf", diff --git a/tests/expected/generated_config.yaml b/tests/expected/generated_config.yaml index aa46f783e..a20c2cce9 100644 --- a/tests/expected/generated_config.yaml +++ b/tests/expected/generated_config.yaml @@ -28,6 +28,18 @@ postgres: properties: feat_id: int4 gid: int4 + bigint_table: + schema: autodetect + table: bigint_table + srid: 4326 + geometry_column: geom + extent: 4096 + buffer: 64 + clip_geom: true + geometry_type: POINT + properties: + big_feat_id: int8 + id: int4 points1: schema: public table: points1 diff --git a/tests/expected/given_config.yaml b/tests/expected/given_config.yaml index 2e20dc8f8..a7ac72201 100644 --- a/tests/expected/given_config.yaml +++ b/tests/expected/given_config.yaml @@ -8,6 +8,9 @@ postgres: auto_publish: tables: from_schemas: autodetect + id_columns: + - feat_id + - big_feat_id tables: MixPoints: schema: MixedCase @@ -28,6 +31,7 @@ postgres: table: auto_table srid: 4326 geometry_column: geom + id_column: feat_id bounds: - -166.87107126230424 - -53.44747249115674 @@ -38,8 +42,24 @@ postgres: clip_geom: true geometry_type: POINT properties: - feat_id: int4 gid: int4 + bigint_table: + schema: autodetect + table: bigint_table + srid: 4326 + geometry_column: geom + id_column: big_feat_id + bounds: + - -174.89475564568033 + - -77.2579745396886 + - 174.72753224514435 + - 73.80785950599903 + extent: 4096 + buffer: 64 + clip_geom: true + geometry_type: POINT + properties: + id: int4 points1: layer_id: abc schema: public diff --git a/tests/fixtures/tables/autodetect.sql b/tests/fixtures/tables/autodetect.sql index 0bd50b4c0..2a8c9eaf5 100644 --- a/tests/fixtures/tables/autodetect.sql +++ b/tests/fixtures/tables/autodetect.sql @@ -55,3 +55,60 @@ values (1, 71951, '0101000020E6100000211700C9E6DA6140F510E7C8F4DA2740'), CREATE INDEX ON autodetect.auto_table USING GIST (geom); CLUSTER auto_table_geom_idx ON autodetect.auto_table; + + +CREATE TABLE autodetect.bigint_table +( + id SERIAL PRIMARY KEY, + big_feat_id BIGINT, + geom GEOMETRY(POINT, 4326) +); + +-- INSERT INTO autodetect.bigint_table +-- SELECT generate_series(1, 3) as id, +-- (random() * 100000)::bigint as big_feat_id, +-- (ST_DUMP(ST_GENERATEPOINTS(st_transform(st_tileenvelope(18, 235085, 122323), 4326), 3))).geom; +-- INSERT INTO autodetect.bigint_table +-- SELECT generate_series(4, 30) as id, +-- (random() * 100000)::bigint as big_feat_id, +-- (ST_DUMP(ST_GENERATEPOINTS(st_transform(st_tileenvelope(0, 0, 0), 4326), 27))).geom; +-- +-- To dump the data above, uncomment code the above, comment the INSERT code bellow, and run: +-- just restart +-- just pg_dump --data-only --inserts --rows-per-insert=100 --table=autodetect.bigint_table + +INSERT INTO autodetect.bigint_table +VALUES (1, 89698, '0101000020E6100000D4F7A0E7E2DA61402E2E980CB9DA2740'), + (2, 97180, '0101000020E61000007A523359E5DA614005BA7E9C85DA2740'), + (3, 91401, '0101000020E6100000267D6637ECDA61404174148C04DB2740'), + (4, 19522, '0101000020E610000038BE427924075C40562A5373C3172B40'), + (5, 49248, '0101000020E610000056ACFE9B1B195040B046F6B4A23D4440'), + (6, 75734, '0101000020E6100000788397D6A1DC65C08EB80652655A3840'), + (7, 71026, '0101000020E6100000EC1697F79472644072CAA4A7825053C0'), + (8, 97852, '0101000020E6100000C6F5B3F147D7654069A3BA7DDAE75140'), + (9, 69521, '0101000020E6100000FC6AF6D55BBB56409A2D1063813A50C0'), + (10, 87053, '0101000020E6100000ED99CCB0CCE559C08127412EE0294840'), + (11, 12902, '0101000020E61000006217B1200EEB61C0244EB0D2B3343CC0'), + (12, 39932, '0101000020E61000004441096983A141405B2358D3501951C0'), + (13, 46528, '0101000020E61000000440A3130F2F22C0DA24320DBB884940'), + (14, 44983, '0101000020E6100000757C081A6AAF4C40F638AE6D76235240'), + (15, 67146, '0101000020E61000007362E3AC00ED5AC05E54ABF8767342C0'), + (16, 81358, '0101000020E61000002DFD3B5895995E40C2719E2453BC2E40'), + (17, 62090, '0101000020E610000088F832477C074D40D0BCE1674EF0EE3F'), + (18, 89445, '0101000020E6100000F842E68F1C3862C046498B626D844B40'), + (19, 26213, '0101000020E61000005CC3556056316440A8AF47A7B12750C0'), + (20, 32077, '0101000020E61000005A111ACFC82E6040D4815BF8B3735240'), + (21, 61633, '0101000020E6100000106F7D3FA780514078C38538196A31C0'), + (22, 78250, '0101000020E6100000C096DB4E2ECC4CC0BAD71FB916584C40'), + (23, 16594, '0101000020E6100000562AF86EEEAE5BC09253DCA354D24FC0'), + (24, 59198, '0101000020E61000002C1AA37C3C953C40EED3DEACD3904EC0'), + (25, 48619, '0101000020E61000008490A3B7F1984140EEAFFCB7C8C54A40'), + (26, 38695, '0101000020E61000005F575A5F50CD50C08E617B39FBDE3DC0'), + (27, 34060, '0101000020E6100000E8D20BA94C905D40A413D67AEBD73E40'), + (28, 36744, '0101000020E61000007E97BBCB8F2C54401C352F1857913A40'), + (29, 34349, '0101000020E6100000F25A591EB3AA5FC0809D251CD1694540'), + (30, 89548, '0101000020E610000052F29460E5F96140CAA28F5DB33C51C0'); + + +CREATE INDEX ON autodetect.bigint_table USING GIST (geom); +CLUSTER bigint_table_geom_idx ON autodetect.bigint_table;