Vizzuality · hotzevzl · Mar 5, 2024 · Feb 22, 2024 · Feb 23, 2024 · Feb 23, 2024
diff --git a/...migrations/api/1708525796000-PrepareToStoreListOfRelevantFeaturesDataAlongsideFeatures.ts b/...migrations/api/1708525796000-PrepareToStoreListOfRelevantFeaturesDataAlongsideFeatures.ts
@@ -0,0 +1,25 @@
+import { MigrationInterface, QueryRunner } from 'typeorm';
+
+export class PrepareToStoreListOfRelevantFeaturesDataAlongsideFeatures1708525796000
+  implements MigrationInterface
+{
+  public async up(queryRunner: QueryRunner): Promise<void> {
+    /**
+     * Only create the column; this is going to be populated - for any existing
+     * features - through a separate script to be run outside of the
+     * NestJS/TypeORM db migration flow, as this operation needs to deal with
+     * data across apidb and geoprocessingdb.
+     */
+    await queryRunner.query(`
+ALTER TABLE features
+ADD COLUMN feature_data_stable_ids uuid[];
+      `);
+  }
+
+  public async down(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+ALTER TABLE features
+DROP COLUMN feature_data__stable_ids;
+      `);
+  }
+}
diff --git a/api/apps/api/src/modules/geo-features/geo-feature.api.entity.ts b/api/apps/api/src/modules/geo-features/geo-feature.api.entity.ts
@@ -67,6 +67,14 @@ export class GeoFeature extends BaseEntity {
   @Column('uuid')
   intersection?: string[];
 
+  /**
+   * Array of uuids that match all the stable_ids of (geodb)features_data rows
+   * relevant to the feature.
+   */
+  @ApiPropertyOptional()
+  @Column('uuid', { array: true, name: 'feature_data_stable_ids' })
+  featureDataStableIds?: string[];
+
   @Column('varchar', { name: 'creation_status' })
   creationStatus?: JobStatus;
 

diff --git a/api/apps/api/test/upload-feature/upload-feature.fixtures.ts b/api/apps/api/test/upload-feature/upload-feature.fixtures.ts
@@ -327,6 +327,7 @@ export const getFixtures = async () => {
         {
           id: expect.any(String),
           featureClassName: name,
+          featureDataStableIds: null,
           description,
           alias: null,
           amountMax: 5296399725.20094,

diff --git a/api/apps/geoprocessing/src/export/pieces-exporters/project-custom-features.piece-exporter.ts b/api/apps/geoprocessing/src/export/pieces-exporters/project-custom-features.piece-exporter.ts
@@ -27,6 +27,7 @@ type ProjectCustomFeaturesSelectResult = {
   alias: string;
   description: string;
   property_name: string;
+  feature_data_stable_ids: string[];
   intersection: string[];
   creation_status: CreationStatus;
   list_property_keys: string[];
@@ -38,6 +39,7 @@ type ProjectCustomFeaturesSelectResult = {
 
 type FeaturesDataSelectResult = {
   feature_id: string;
+  stable_id: string;
   the_geom: string;
   properties: Record<string, string | number>;
   source: GeometrySource;
@@ -80,6 +82,7 @@ export class ProjectCustomFeaturesPieceExporter
           'f.description',
           'f.property_name',
           'f.intersection',
+          'f.feature_data_stable_ids',
           'f.creation_status',
           'f.list_property_keys',
           'f.is_legacy',
@@ -100,6 +103,7 @@ export class ProjectCustomFeaturesPieceExporter
         .createQueryBuilder()
         .select([
           'feature_id',
+          'stable_id',
           'the_geom',
           'properties',
           'source',
@@ -120,7 +124,7 @@ export class ProjectCustomFeaturesPieceExporter
         ...feature,
         data: customFeaturesData
           .filter((data) => data.feature_id === id)
-          .map(({ feature_id, project_pu_id, ...data }) => {
+          .map(({ feature_id: _feature_id, project_pu_id, ...data }) => {
             const puid = project_pu_id
               ? projectPusMap[project_pu_id]
               : undefined;

diff --git a/api/apps/geoprocessing/src/import/pieces-importers/project-custom-features.piece-importer.ts b/api/apps/geoprocessing/src/import/pieces-importers/project-custom-features.piece-importer.ts
@@ -86,7 +86,7 @@ export class ProjectCustomFeaturesPieceImporter
         const featureIdByClassName: Record<string, string> = {};
         const featureInsertValues: any[] = [];
         const featureTagInsertValues: any[] = [];
-        features.forEach(({ data, tag, ...feature }) => {
+        features.forEach(({ data: _data, tag, ...feature }) => {
           const featureId = v4();
           featureIdByClassName[feature.feature_class_name] = featureId;
 
@@ -139,6 +139,7 @@ export class ProjectCustomFeaturesPieceImporter
             theGeom: () => `'${data.the_geom}'`,
             properties: data.properties,
             source: data.source,
+            stableId: data.stable_id,
             featureId: featureIdByClassName[feature_class_name],
             amount: data.amount,
             projectPuId: projectPuPuid

diff --git a/...eoprocessing/src/migrations/geoprocessing/1708524274000-AddStableIdsToFeaturesDataRows.ts b/...eoprocessing/src/migrations/geoprocessing/1708524274000-AddStableIdsToFeaturesDataRows.ts
@@ -0,0 +1,44 @@
+import { MigrationInterface, QueryRunner } from 'typeorm';
+
+export class AddStableIdsToFeaturesDataRows1708524274000
+  implements MigrationInterface
+{
+  public async up(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+DROP TRIGGER tr_precompute_feature_property_list ON features_data;
+
+CREATE TRIGGER tr_precompute_feature_property_list AFTER INSERT ON features_data
+FOR EACH ROW EXECUTE
+PROCEDURE precompute_feature_property_list();
+      `);
+
+    await queryRunner.query(`
+ALTER TABLE features_data
+ADD COLUMN stable_id uuid;
+
+CREATE INDEX features_data_stable_id__idx ON features_data(stable_id);
+
+CREATE UNIQUE INDEX features_data_unique_stable_ids_within_feature__idx ON features_data(feature_id, stable_id);
+      `);
+
+    await queryRunner.query(`
+UPDATE features_data
+SET stable_id = id;
+      `);
+
+    await queryRunner.query(`
+ALTER TABLE features_data
+ALTER COLUMN stable_id SET NOT NULL;
+
+ALTER TABLE features_data
+ALTER COLUMN stable_id SET DEFAULT gen_random_uuid();
+      `);
+  }
+
+  public async down(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+ALTER TABLE features_data
+DROP COLUMN stable_id;
+      `);
+  }
+}
diff --git a/api/libs/cloning/src/infrastructure/clone-piece-data/project-custom-features.ts b/api/libs/cloning/src/infrastructure/clone-piece-data/project-custom-features.ts
@@ -15,6 +15,7 @@ export enum JobStatus {
 
 type FeatureData = {
   the_geom: string;
+  stable_id: string;
   properties: Record<string, string | number>;
   source: GeometrySource;
   amount: number | null;
@@ -27,6 +28,7 @@ export type ProjectCustomFeature = {
   description: string;
   property_name: string;
   intersection: string[];
+  feature_data_stable_ids: string[];
   creation_status: JobStatus;
   list_property_keys: string[];
   is_legacy: boolean;

diff --git a/api/libs/geofeatures/src/geo-feature.geo.entity.ts b/api/libs/geofeatures/src/geo-feature.geo.entity.ts
@@ -10,6 +10,21 @@ export class GeoFeatureGeometry {
   @PrimaryColumn()
   id!: string;
 
+  /**
+   * A stable id is not guaranteed to be globally unique, but it will be unique
+   * across each feature.
+   *
+   * This stable id is kept as is (hence the name) when feature data is copied
+   * over to a new project as part of project cloning. This way, the link
+   * established between features (in apidb) and all the matching
+   * `features_data` rows, via `GeoFeature.featureDataIds`, is preserved across
+   * clones (i.e. no need to update `GeoFeature.featureDataIds` throughout the
+   * import side of a cloning or project upload flow).
+   */
+  @ApiProperty()
+  @Column('uuid', { name: 'stable_id' })
+  stableId!: string;
+
   @Column('geometry', { name: 'the_geom' })
   theGeom?: Geometry;
 

diff --git a/docs/README_revision-of-feature-split-functionality-in-marxan-cloud-v2.md b/docs/README_revision-of-feature-split-functionality-in-marxan-cloud-v2.md
@@ -0,0 +1,111 @@
+# Revising how feature splits work in Marxan Cloud v2 - February 2024
+
+This document outlines the changes needed in order to make the feature split
+functionality work in the v2 release of Marxan Cloud, taking into account the
+breaking change (introduced in the same v2 release) related to how feature
+amounts per planning unit are stored alongside other spatial data, rather than
+always computed on the fly when needed, via spatial intersection.
+
+## Linking of split features to subsets of features_data
+
+For each feature obtained via split, we need to store a list of unique
+identifiers of the `(geodb)features_data` rows that match the subset of feature
+identified by the K/V set for the split feature.
+
+Although `(geodb)features_data.id` would be a natural candidate for these ids,
+in practice this would complicate how we handle copying features over through
+clones of projects, as we would need to update all the stored lists of matching
+ids in each split feature we're copying over, to match the
+`(geodb)features_data.id` generated on insert of the cloned parent features from
+which splits were obtained.
+
+We already do some kind of magic around similar predicaments throughout feature
+exporters/importers (basically creating temporary ids on export, which we then
+reference on import, sort of), but it may be much more effective (as well as
+simpler) at this stage to assign a stable id to each `(geodb)features_data` row,
+and then reference this from within the list of ids stored with each split
+feature.
+
+As for the linking of split features to `features_data`, this could be done via
+a join table, but in this case it may be simpler to store this as an array of
+ids within a single column in the `(apidb)features` table itself, not least
+because the join-table alternative would not really provide any benefits from a
+referential integrity point of view, since `features_data` is in geodb.
+
+Exporting and importing features metadata would also be more complicated if
+using a join table.
+
+Whereas this plan is specifically for the split functionality, this linking of
+features to features_data via arrays of stable ids could be done for _all_ the
+features (plain ones, those obtained via splits, and those obtained via
+stratification while this operation was enabled on any given instance): this
+way, the same strategy can be used when querying features created in any of the
+three possible ways listed above.
+
+In the case of the current global platform-wide feature, this would mean storing
+a very large array of ids in the feature metadata, because of the large number
+of geometries/`features_data` rows for this specific feature. Likewise for
+user-uploaded features with large number of geometries. This may be a fair
+tradeoff, however, especially as it would apply to a very limited number of
+features, while allowing to avoid the need to query `features_data` in different
+ways depending on how a feature has been created.
+
+## Updating existing data
+
+DB migrations will be needed to set stable ids for existing `features_data` rows.
+
+A self-standing script (such as previous Python ones created to update existing
+data across apidb and geodb) will be needed to link existing split features to
+the correct subsets of `features_data`.
+
+### Creating stable ids for existing `features_data`
+
+This could be a simple
+
+```
+update features_data set stable_id = id;
+```
+
+as we won't need to enforce any particular values at this stage when backporting
+the new column on existing features. Making the column `not null default
+gen_random_uuid()` would also work.
+
+### Linking existing split features to `features_data`
+
+Once the above step is done, we can run a migration to query the relevant stable
+ids through the K/V pair which is stored as part of the feature, and then set
+the array of relevant stable ids.
+
+The script may use the same queries already used to calculate subsets within
+`SplitOperation` (and `StratificationOperation`, if wanting to do this for
+features that may be obtained via stratification as well).
+
+## Updating the split operation
+
+The main issue preventing the split operation from working correctly without the
+changes outlined in this document is that `SplitOperation` gets passed the id of
+the split feature for the step in which it tries to compute the amount per PU of
+the split feature, but since no `features_data` geometries are ever linked to
+any other feature than "parent" (i.e. whole, not split) features, then we end up
+with no geometries (nor amount per planning unit, and hence not even any min/max
+of amounts per PU).
+
+So this needs to be changed to:
+
+- firstly, query the subset of `features_data` that matches the K/V pair requested
+- store the list of stable ids of these `features_data` rows alongside the
+  feature being created
+- use the list of `features_data` rows derived above, when calculating feature
+  amounts per PU
+
+## Updating piece exporters and importers
+
+Piece exporters and importers will need to:
+
+- export and import the new `(geodb)features_data.stable_id` column
+- export and import the new `(apidb)feature_data_stable_ids` column
+
+## Updating queries for tiles of features
+
+These will need to use the list of relevant `features_data` rows as stored
+alongside each feature.