diff --git a/api/apps/api/src/migrations/api/1708525796000-PrepareToStoreListOfRelevantFeaturesDataAlongsideFeatures.ts b/api/apps/api/src/migrations/api/1708525796000-PrepareToStoreListOfRelevantFeaturesDataAlongsideFeatures.ts new file mode 100644 index 0000000000..e697752478 --- /dev/null +++ b/api/apps/api/src/migrations/api/1708525796000-PrepareToStoreListOfRelevantFeaturesDataAlongsideFeatures.ts @@ -0,0 +1,25 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class PrepareToStoreListOfRelevantFeaturesDataAlongsideFeatures1708525796000 + implements MigrationInterface +{ + public async up(queryRunner: QueryRunner): Promise { + /** + * Only create the column; this is going to be populated - for any existing + * features - through a separate script to be run outside of the + * NestJS/TypeORM db migration flow, as this operation needs to deal with + * data across apidb and geoprocessingdb. + */ + await queryRunner.query(` +ALTER TABLE features +ADD COLUMN feature_data_stable_ids uuid[]; + `); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` +ALTER TABLE features +DROP COLUMN feature_data__stable_ids; + `); + } +} diff --git a/api/apps/api/src/modules/geo-features/geo-feature.api.entity.ts b/api/apps/api/src/modules/geo-features/geo-feature.api.entity.ts index 6b2a9327e2..dd19b2cbd9 100644 --- a/api/apps/api/src/modules/geo-features/geo-feature.api.entity.ts +++ b/api/apps/api/src/modules/geo-features/geo-feature.api.entity.ts @@ -67,6 +67,14 @@ export class GeoFeature extends BaseEntity { @Column('uuid') intersection?: string[]; + /** + * Array of uuids that match all the stable_ids of (geodb)features_data rows + * relevant to the feature. + */ + @ApiPropertyOptional() + @Column('uuid', { array: true, name: 'feature_data_stable_ids' }) + featureDataStableIds?: string[]; + @Column('varchar', { name: 'creation_status' }) creationStatus?: JobStatus; diff --git a/api/apps/api/test/upload-feature/upload-feature.fixtures.ts b/api/apps/api/test/upload-feature/upload-feature.fixtures.ts index 3122875bf6..e8954fcc87 100644 --- a/api/apps/api/test/upload-feature/upload-feature.fixtures.ts +++ b/api/apps/api/test/upload-feature/upload-feature.fixtures.ts @@ -327,6 +327,7 @@ export const getFixtures = async () => { { id: expect.any(String), featureClassName: name, + featureDataStableIds: null, description, alias: null, amountMax: 5296399725.20094, diff --git a/api/apps/geoprocessing/src/export/pieces-exporters/project-custom-features.piece-exporter.ts b/api/apps/geoprocessing/src/export/pieces-exporters/project-custom-features.piece-exporter.ts index a565406a86..5c1cc0a02a 100644 --- a/api/apps/geoprocessing/src/export/pieces-exporters/project-custom-features.piece-exporter.ts +++ b/api/apps/geoprocessing/src/export/pieces-exporters/project-custom-features.piece-exporter.ts @@ -27,6 +27,7 @@ type ProjectCustomFeaturesSelectResult = { alias: string; description: string; property_name: string; + feature_data_stable_ids: string[]; intersection: string[]; creation_status: CreationStatus; list_property_keys: string[]; @@ -38,6 +39,7 @@ type ProjectCustomFeaturesSelectResult = { type FeaturesDataSelectResult = { feature_id: string; + stable_id: string; the_geom: string; properties: Record; source: GeometrySource; @@ -80,6 +82,7 @@ export class ProjectCustomFeaturesPieceExporter 'f.description', 'f.property_name', 'f.intersection', + 'f.feature_data_stable_ids', 'f.creation_status', 'f.list_property_keys', 'f.is_legacy', @@ -100,6 +103,7 @@ export class ProjectCustomFeaturesPieceExporter .createQueryBuilder() .select([ 'feature_id', + 'stable_id', 'the_geom', 'properties', 'source', @@ -120,7 +124,7 @@ export class ProjectCustomFeaturesPieceExporter ...feature, data: customFeaturesData .filter((data) => data.feature_id === id) - .map(({ feature_id, project_pu_id, ...data }) => { + .map(({ feature_id: _feature_id, project_pu_id, ...data }) => { const puid = project_pu_id ? projectPusMap[project_pu_id] : undefined; diff --git a/api/apps/geoprocessing/src/import/pieces-importers/project-custom-features.piece-importer.ts b/api/apps/geoprocessing/src/import/pieces-importers/project-custom-features.piece-importer.ts index 965a6139c8..aa2c274943 100644 --- a/api/apps/geoprocessing/src/import/pieces-importers/project-custom-features.piece-importer.ts +++ b/api/apps/geoprocessing/src/import/pieces-importers/project-custom-features.piece-importer.ts @@ -86,7 +86,7 @@ export class ProjectCustomFeaturesPieceImporter const featureIdByClassName: Record = {}; const featureInsertValues: any[] = []; const featureTagInsertValues: any[] = []; - features.forEach(({ data, tag, ...feature }) => { + features.forEach(({ data: _data, tag, ...feature }) => { const featureId = v4(); featureIdByClassName[feature.feature_class_name] = featureId; @@ -139,6 +139,7 @@ export class ProjectCustomFeaturesPieceImporter theGeom: () => `'${data.the_geom}'`, properties: data.properties, source: data.source, + stableId: data.stable_id, featureId: featureIdByClassName[feature_class_name], amount: data.amount, projectPuId: projectPuPuid diff --git a/api/apps/geoprocessing/src/migrations/geoprocessing/1708524274000-AddStableIdsToFeaturesDataRows.ts b/api/apps/geoprocessing/src/migrations/geoprocessing/1708524274000-AddStableIdsToFeaturesDataRows.ts new file mode 100644 index 0000000000..36628d2731 --- /dev/null +++ b/api/apps/geoprocessing/src/migrations/geoprocessing/1708524274000-AddStableIdsToFeaturesDataRows.ts @@ -0,0 +1,44 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class AddStableIdsToFeaturesDataRows1708524274000 + implements MigrationInterface +{ + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(` +DROP TRIGGER tr_precompute_feature_property_list ON features_data; + +CREATE TRIGGER tr_precompute_feature_property_list AFTER INSERT ON features_data +FOR EACH ROW EXECUTE +PROCEDURE precompute_feature_property_list(); + `); + + await queryRunner.query(` +ALTER TABLE features_data +ADD COLUMN stable_id uuid; + +CREATE INDEX features_data_stable_id__idx ON features_data(stable_id); + +CREATE UNIQUE INDEX features_data_unique_stable_ids_within_feature__idx ON features_data(feature_id, stable_id); + `); + + await queryRunner.query(` +UPDATE features_data +SET stable_id = id; + `); + + await queryRunner.query(` +ALTER TABLE features_data +ALTER COLUMN stable_id SET NOT NULL; + +ALTER TABLE features_data +ALTER COLUMN stable_id SET DEFAULT gen_random_uuid(); + `); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` +ALTER TABLE features_data +DROP COLUMN stable_id; + `); + } +} diff --git a/api/libs/cloning/src/infrastructure/clone-piece-data/project-custom-features.ts b/api/libs/cloning/src/infrastructure/clone-piece-data/project-custom-features.ts index 8452fc7083..34fa92d29e 100644 --- a/api/libs/cloning/src/infrastructure/clone-piece-data/project-custom-features.ts +++ b/api/libs/cloning/src/infrastructure/clone-piece-data/project-custom-features.ts @@ -15,6 +15,7 @@ export enum JobStatus { type FeatureData = { the_geom: string; + stable_id: string; properties: Record; source: GeometrySource; amount: number | null; @@ -27,6 +28,7 @@ export type ProjectCustomFeature = { description: string; property_name: string; intersection: string[]; + feature_data_stable_ids: string[]; creation_status: JobStatus; list_property_keys: string[]; is_legacy: boolean; diff --git a/api/libs/geofeatures/src/geo-feature.geo.entity.ts b/api/libs/geofeatures/src/geo-feature.geo.entity.ts index eeaff87636..54b9f5804a 100644 --- a/api/libs/geofeatures/src/geo-feature.geo.entity.ts +++ b/api/libs/geofeatures/src/geo-feature.geo.entity.ts @@ -10,6 +10,21 @@ export class GeoFeatureGeometry { @PrimaryColumn() id!: string; + /** + * A stable id is not guaranteed to be globally unique, but it will be unique + * across each feature. + * + * This stable id is kept as is (hence the name) when feature data is copied + * over to a new project as part of project cloning. This way, the link + * established between features (in apidb) and all the matching + * `features_data` rows, via `GeoFeature.featureDataIds`, is preserved across + * clones (i.e. no need to update `GeoFeature.featureDataIds` throughout the + * import side of a cloning or project upload flow). + */ + @ApiProperty() + @Column('uuid', { name: 'stable_id' }) + stableId!: string; + @Column('geometry', { name: 'the_geom' }) theGeom?: Geometry; diff --git a/docs/README_revision-of-feature-split-functionality-in-marxan-cloud-v2.md b/docs/README_revision-of-feature-split-functionality-in-marxan-cloud-v2.md new file mode 100644 index 0000000000..510471cd29 --- /dev/null +++ b/docs/README_revision-of-feature-split-functionality-in-marxan-cloud-v2.md @@ -0,0 +1,111 @@ +# Revising how feature splits work in Marxan Cloud v2 - February 2024 + +This document outlines the changes needed in order to make the feature split +functionality work in the v2 release of Marxan Cloud, taking into account the +breaking change (introduced in the same v2 release) related to how feature +amounts per planning unit are stored alongside other spatial data, rather than +always computed on the fly when needed, via spatial intersection. + +## Linking of split features to subsets of features_data + +For each feature obtained via split, we need to store a list of unique +identifiers of the `(geodb)features_data` rows that match the subset of feature +identified by the K/V set for the split feature. + +Although `(geodb)features_data.id` would be a natural candidate for these ids, +in practice this would complicate how we handle copying features over through +clones of projects, as we would need to update all the stored lists of matching +ids in each split feature we're copying over, to match the +`(geodb)features_data.id` generated on insert of the cloned parent features from +which splits were obtained. + +We already do some kind of magic around similar predicaments throughout feature +exporters/importers (basically creating temporary ids on export, which we then +reference on import, sort of), but it may be much more effective (as well as +simpler) at this stage to assign a stable id to each `(geodb)features_data` row, +and then reference this from within the list of ids stored with each split +feature. + +As for the linking of split features to `features_data`, this could be done via +a join table, but in this case it may be simpler to store this as an array of +ids within a single column in the `(apidb)features` table itself, not least +because the join-table alternative would not really provide any benefits from a +referential integrity point of view, since `features_data` is in geodb. + +Exporting and importing features metadata would also be more complicated if +using a join table. + +Whereas this plan is specifically for the split functionality, this linking of +features to features_data via arrays of stable ids could be done for _all_ the +features (plain ones, those obtained via splits, and those obtained via +stratification while this operation was enabled on any given instance): this +way, the same strategy can be used when querying features created in any of the +three possible ways listed above. + +In the case of the current global platform-wide feature, this would mean storing +a very large array of ids in the feature metadata, because of the large number +of geometries/`features_data` rows for this specific feature. Likewise for +user-uploaded features with large number of geometries. This may be a fair +tradeoff, however, especially as it would apply to a very limited number of +features, while allowing to avoid the need to query `features_data` in different +ways depending on how a feature has been created. + +## Updating existing data + +DB migrations will be needed to set stable ids for existing `features_data` rows. + +A self-standing script (such as previous Python ones created to update existing +data across apidb and geodb) will be needed to link existing split features to +the correct subsets of `features_data`. + +### Creating stable ids for existing `features_data` + +This could be a simple + +``` +update features_data set stable_id = id; +``` + +as we won't need to enforce any particular values at this stage when backporting +the new column on existing features. Making the column `not null default +gen_random_uuid()` would also work. + +### Linking existing split features to `features_data` + +Once the above step is done, we can run a migration to query the relevant stable +ids through the K/V pair which is stored as part of the feature, and then set +the array of relevant stable ids. + +The script may use the same queries already used to calculate subsets within +`SplitOperation` (and `StratificationOperation`, if wanting to do this for +features that may be obtained via stratification as well). + +## Updating the split operation + +The main issue preventing the split operation from working correctly without the +changes outlined in this document is that `SplitOperation` gets passed the id of +the split feature for the step in which it tries to compute the amount per PU of +the split feature, but since no `features_data` geometries are ever linked to +any other feature than "parent" (i.e. whole, not split) features, then we end up +with no geometries (nor amount per planning unit, and hence not even any min/max +of amounts per PU). + +So this needs to be changed to: + +- firstly, query the subset of `features_data` that matches the K/V pair requested +- store the list of stable ids of these `features_data` rows alongside the + feature being created +- use the list of `features_data` rows derived above, when calculating feature + amounts per PU + +## Updating piece exporters and importers + +Piece exporters and importers will need to: + +- export and import the new `(geodb)features_data.stable_id` column +- export and import the new `(apidb)feature_data_stable_ids` column + +## Updating queries for tiles of features + +These will need to use the list of relevant `features_data` rows as stored +alongside each feature.