Skip to content

Commit

Permalink
adds support for id_column in tables for auto_publish (#790)
Browse files Browse the repository at this point in the history
Resolves #682 

- [x] Get id_column string from config.yaml and use for id column
- [x] Support for list of strings
- [x] Add info/warnings if column is not there or is of wrong type
- [x] if column for the feature ID is found, remove it from properties
(see inline comment)
- [x] cleanup logging messages
- [x] need more tests to catch other edge cases

---------

Co-authored-by: Yuri Astrakhan <[email protected]>
  • Loading branch information
Binabh and nyurik authored Aug 13, 2023
1 parent 14ded48 commit e3e6b35
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 2 deletions.
4 changes: 4 additions & 0 deletions docs/src/config-file.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ postgres:
source_id_format: 'table.{schema}.{table}.{column}'
# Add more schemas to the ones listed above
from_schemas: my_other_schema
# A table column to use as the feature ID
# If a table has no column with this name, `id_column` will not be set for that table.
# If a list of strings is given, the first found column will be treated as a feature ID.
id_columns: feature_id
functions:
# Optionally set how source ID should be generated based on the function's name and schema
source_id_format: '{schema}.{function}'
Expand Down
4 changes: 4 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ debug-page *ARGS: start
psql *ARGS:
psql {{ ARGS }} {{ DATABASE_URL }}

# Run PSQL utility against the test database
pg_dump *ARGS:
pg_dump {{ ARGS }} {{ DATABASE_URL }}

# Perform cargo clean to delete all build files
clean: clean-test stop
cargo clean
Expand Down
6 changes: 6 additions & 0 deletions martin/src/pg/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ pub struct PgCfgPublishType {
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "id_format")]
pub source_id_format: Option<String>,
/// A table column to use as the feature ID
/// If a table has no column with this name, `id_column` will not be set for that table.
/// If a list of strings is given, the first found column will be treated as a feature ID.
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(alias = "id_column")]
pub id_columns: Option<OneOrMany<String>>,
}

impl PgConfig {
Expand Down
59 changes: 58 additions & 1 deletion martin/src/pg/configurator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::pg::pool::PgPool;
use crate::pg::table_source::{
calc_srid, merge_table_info, query_available_tables, table_to_query,
};
use crate::pg::utils::{find_info, normalize_key, InfoMap};
use crate::pg::utils::{find_info, find_kv_ignore_case, normalize_key, InfoMap};
use crate::pg::PgError::InvalidTableExtent;
use crate::pg::Result;
use crate::source::Sources;
Expand All @@ -27,6 +27,7 @@ pub type SqlTableInfoMapMapMap = InfoMap<InfoMap<InfoMap<TableInfo>>>;
pub struct PgBuilderAuto {
source_id_format: String,
schemas: Option<HashSet<String>>,
id_columns: Option<Vec<String>>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -122,6 +123,7 @@ impl PgBuilder {
let id2 = self.resolve_id(&source_id, &db_inf);
let Some(srid) = calc_srid(&db_inf.format_id(), &id2, db_inf.srid, 0, self.default_srid) else { continue };
db_inf.srid = srid;
update_id_column(&id2, &mut db_inf, auto_tables);
info!("Discovered source {id2} from {}", summary(&db_inf));
pending.push(table_to_query(
id2,
Expand Down Expand Up @@ -232,12 +234,57 @@ impl PgBuilder {
}
}

/// Try to find any ID column in a list of table columns (properties) that match one of the given `id_column` values.
/// If found, modify `id_column` value on the table info.
fn update_id_column(id: &str, inf: &mut TableInfo, auto_tables: &PgBuilderAuto) {
let Some(props) = inf.properties.as_mut() else { return };
let Some(try_columns) = &auto_tables.id_columns else { return };

for key in try_columns.iter() {
let (column, typ) = if let Some(typ) = props.get(key) {
(key, typ)
} else {
match find_kv_ignore_case(props, key) {
Ok(Some(result)) => {
info!("For source {id}, id_column '{key}' was not found, but found '{result}' instead.");
(result, props.get(result).unwrap())
}
Ok(None) => continue,
Err(multiple) => {
error!("Unable to configure source {id} because id_column '{key}' has no exact match or more than one potential matches: {}", multiple.join(", "));
continue;
}
}
};
// ID column can be any integer type as defined in
// https://github.com/postgis/postgis/blob/559c95d85564fb74fa9e3b7eafb74851810610da/postgis/mvt.c#L387C4-L387C66
if typ != "int4" && typ != "int8" && typ != "int2" {
warn!("Unable to use column `{key}` in table {}.{} as a tile feature ID because it has a non-integer type `{typ}`.", inf.schema, inf.table);
continue;
}

inf.id_column = Some(column.to_string());
let mut final_props = props.clone();
final_props.remove(column);
inf.properties = Some(final_props);
return;
}

info!(
"No ID column found for table {}.{} - searched for an integer column named {}.",
inf.schema,
inf.table,
try_columns.join(", ")
);
}

fn new_auto_publish(config: &PgConfig, is_function: bool) -> Option<PgBuilderAuto> {
let default_id_fmt = |is_func| (if is_func { "{function}" } else { "{table}" }).to_string();
let default = |schemas| {
Some(PgBuilderAuto {
source_id_format: default_id_fmt(is_function),
schemas,
id_columns: None,
})
};

Expand All @@ -252,6 +299,14 @@ fn new_auto_publish(config: &PgConfig, is_function: bool) -> Option<PgBuilderAut
.cloned()
.unwrap_or_else(|| default_id_fmt(is_function)),
schemas: merge_opt_hs(&a.from_schemas, &item.from_schemas),
id_columns: item.id_columns.as_ref().and_then(|ids| {
if is_function {
error!("Configuration parameter auto_publish.functions.id_columns is not supported");
None
} else {
Some(ids.iter().cloned().collect())
}
}),
}),
BoolOrObject::Bool(true) => default(merge_opt_hs(&a.from_schemas, &None)),
BoolOrObject::Bool(false) => None,
Expand Down Expand Up @@ -287,6 +342,7 @@ fn summary(info: &TableInfo) -> String {
Some(true) => "view",
_ => "table",
};
// TODO: add column_id to the summary if it is set
format!(
"{relkind} {}.{} with {} column ({}, SRID={})",
info.schema,
Expand Down Expand Up @@ -331,6 +387,7 @@ mod tests {
Some(PgBuilderAuto {
source_id_format: source_id_format.to_string(),
schemas: schemas.map(|s| s.iter().map(|s| (*s).to_string()).collect()),
id_columns: None,
})
}

Expand Down
1 change: 1 addition & 0 deletions tests/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ postgres:
auto_publish:
tables:
from_schemas: autodetect
id_columns: [feat_id, big_feat_id]

# Associative arrays of table sources
tables:
Expand Down
5 changes: 5 additions & 0 deletions tests/expected/auto/catalog_auto.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"content_type": "application/x-protobuf",
"description": "autodetect.auto_table.geom"
},
{
"id": "bigint_table",
"content_type": "application/x-protobuf",
"description": "autodetect.bigint_table.geom"
},
{
"id": "function_Mixed_Name",
"content_type": "application/x-protobuf",
Expand Down
5 changes: 5 additions & 0 deletions tests/expected/configured/catalog_cfg.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
"content_type": "application/x-protobuf",
"description": "autodetect.auto_table.geom"
},
{
"id": "bigint_table",
"content_type": "application/x-protobuf",
"description": "autodetect.bigint_table.geom"
},
{
"id": "function_zxy_query",
"content_type": "application/x-protobuf",
Expand Down
12 changes: 12 additions & 0 deletions tests/expected/generated_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ postgres:
properties:
feat_id: int4
gid: int4
bigint_table:
schema: autodetect
table: bigint_table
srid: 4326
geometry_column: geom
extent: 4096
buffer: 64
clip_geom: true
geometry_type: POINT
properties:
big_feat_id: int8
id: int4
points1:
schema: public
table: points1
Expand Down
22 changes: 21 additions & 1 deletion tests/expected/given_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ postgres:
auto_publish:
tables:
from_schemas: autodetect
id_columns:
- feat_id
- big_feat_id
tables:
MixPoints:
schema: MixedCase
Expand All @@ -28,6 +31,7 @@ postgres:
table: auto_table
srid: 4326
geometry_column: geom
id_column: feat_id
bounds:
- -166.87107126230424
- -53.44747249115674
Expand All @@ -38,8 +42,24 @@ postgres:
clip_geom: true
geometry_type: POINT
properties:
feat_id: int4
gid: int4
bigint_table:
schema: autodetect
table: bigint_table
srid: 4326
geometry_column: geom
id_column: big_feat_id
bounds:
- -174.89475564568033
- -77.2579745396886
- 174.72753224514435
- 73.80785950599903
extent: 4096
buffer: 64
clip_geom: true
geometry_type: POINT
properties:
id: int4
points1:
layer_id: abc
schema: public
Expand Down
57 changes: 57 additions & 0 deletions tests/fixtures/tables/autodetect.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,60 @@ values (1, 71951, '0101000020E6100000211700C9E6DA6140F510E7C8F4DA2740'),

CREATE INDEX ON autodetect.auto_table USING GIST (geom);
CLUSTER auto_table_geom_idx ON autodetect.auto_table;


CREATE TABLE autodetect.bigint_table
(
id SERIAL PRIMARY KEY,
big_feat_id BIGINT,
geom GEOMETRY(POINT, 4326)
);

-- INSERT INTO autodetect.bigint_table
-- SELECT generate_series(1, 3) as id,
-- (random() * 100000)::bigint as big_feat_id,
-- (ST_DUMP(ST_GENERATEPOINTS(st_transform(st_tileenvelope(18, 235085, 122323), 4326), 3))).geom;
-- INSERT INTO autodetect.bigint_table
-- SELECT generate_series(4, 30) as id,
-- (random() * 100000)::bigint as big_feat_id,
-- (ST_DUMP(ST_GENERATEPOINTS(st_transform(st_tileenvelope(0, 0, 0), 4326), 27))).geom;
--
-- To dump the data above, uncomment code the above, comment the INSERT code bellow, and run:
-- just restart
-- just pg_dump --data-only --inserts --rows-per-insert=100 --table=autodetect.bigint_table

INSERT INTO autodetect.bigint_table
VALUES (1, 89698, '0101000020E6100000D4F7A0E7E2DA61402E2E980CB9DA2740'),
(2, 97180, '0101000020E61000007A523359E5DA614005BA7E9C85DA2740'),
(3, 91401, '0101000020E6100000267D6637ECDA61404174148C04DB2740'),
(4, 19522, '0101000020E610000038BE427924075C40562A5373C3172B40'),
(5, 49248, '0101000020E610000056ACFE9B1B195040B046F6B4A23D4440'),
(6, 75734, '0101000020E6100000788397D6A1DC65C08EB80652655A3840'),
(7, 71026, '0101000020E6100000EC1697F79472644072CAA4A7825053C0'),
(8, 97852, '0101000020E6100000C6F5B3F147D7654069A3BA7DDAE75140'),
(9, 69521, '0101000020E6100000FC6AF6D55BBB56409A2D1063813A50C0'),
(10, 87053, '0101000020E6100000ED99CCB0CCE559C08127412EE0294840'),
(11, 12902, '0101000020E61000006217B1200EEB61C0244EB0D2B3343CC0'),
(12, 39932, '0101000020E61000004441096983A141405B2358D3501951C0'),
(13, 46528, '0101000020E61000000440A3130F2F22C0DA24320DBB884940'),
(14, 44983, '0101000020E6100000757C081A6AAF4C40F638AE6D76235240'),
(15, 67146, '0101000020E61000007362E3AC00ED5AC05E54ABF8767342C0'),
(16, 81358, '0101000020E61000002DFD3B5895995E40C2719E2453BC2E40'),
(17, 62090, '0101000020E610000088F832477C074D40D0BCE1674EF0EE3F'),
(18, 89445, '0101000020E6100000F842E68F1C3862C046498B626D844B40'),
(19, 26213, '0101000020E61000005CC3556056316440A8AF47A7B12750C0'),
(20, 32077, '0101000020E61000005A111ACFC82E6040D4815BF8B3735240'),
(21, 61633, '0101000020E6100000106F7D3FA780514078C38538196A31C0'),
(22, 78250, '0101000020E6100000C096DB4E2ECC4CC0BAD71FB916584C40'),
(23, 16594, '0101000020E6100000562AF86EEEAE5BC09253DCA354D24FC0'),
(24, 59198, '0101000020E61000002C1AA37C3C953C40EED3DEACD3904EC0'),
(25, 48619, '0101000020E61000008490A3B7F1984140EEAFFCB7C8C54A40'),
(26, 38695, '0101000020E61000005F575A5F50CD50C08E617B39FBDE3DC0'),
(27, 34060, '0101000020E6100000E8D20BA94C905D40A413D67AEBD73E40'),
(28, 36744, '0101000020E61000007E97BBCB8F2C54401C352F1857913A40'),
(29, 34349, '0101000020E6100000F25A591EB3AA5FC0809D251CD1694540'),
(30, 89548, '0101000020E610000052F29460E5F96140CAA28F5DB33C51C0');


CREATE INDEX ON autodetect.bigint_table USING GIST (geom);
CLUSTER bigint_table_geom_idx ON autodetect.bigint_table;

0 comments on commit e3e6b35

Please sign in to comment.