From 9b433dcb98ec17a5ee967887c579a2d30a49a13d Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Thu, 5 Dec 2024 13:53:15 -0800 Subject: [PATCH 1/8] [TM-1401] Implement CSV import for cleaned tree species. --- .../ImportTreeSpeciesAssociations.php | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 app/Console/Commands/ImportTreeSpeciesAssociations.php diff --git a/app/Console/Commands/ImportTreeSpeciesAssociations.php b/app/Console/Commands/ImportTreeSpeciesAssociations.php new file mode 100644 index 00000000..0e36d604 --- /dev/null +++ b/app/Console/Commands/ImportTreeSpeciesAssociations.php @@ -0,0 +1,79 @@ +executeAbortableScript(function () { + $process = new Process(['wc', '-l', $this->argument('file')]); + $process->run(); + $this->assert($process->isSuccessful(), "WC failed {$process->getErrorOutput()}"); + + $lines = ((int)explode(" ", $process->getOutput())[0]) - 1; + + $fileHandle = fopen($this->argument('file'), "r"); + $this->parseHeaders(fgetcsv($fileHandle)); + + $this->withProgressBar($lines, function ($progressBar) use ($fileHandle) { + while ($csvRow = fgetcsv($fileHandle)) { + $treeSpeciesUuid = $csvRow[$this->treeSpeciesUuidColumn]; + $taxonId = $csvRow[$this->taxonIdColumn]; + + if ($taxonId != 'NA') { + TreeSpecies::isUuid($treeSpeciesUuid)->update(['taxon_id' => $taxonId]); + } + $progressBar->advance(); + } + + $progressBar->finish(); + }); + + fclose($fileHandle); + }); + } + + protected function parseHeaders(array $headerRow): void + { + foreach ($headerRow as $index => $header) { + if ($header == 'tree_species_uuid') { + $this->treeSpeciesUuidColumn = $index; + } else if ($header == 'taxon_id') { + $this->taxonIdColumn = $index; + } + } + + $this->assert( + $this->treeSpeciesUuidColumn != null && $this->taxonIdColumn != null, + 'Not all required columns were found' + ); + } +} From 14f9c7a4730be4f9cdbafb20dd3ccdf5df40822d Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Thu, 5 Dec 2024 13:58:10 -0800 Subject: [PATCH 2/8] [TM-1401] Establish the belongsTo association between v2_tree_species and tree_species_research. --- app/Models/V2/TreeSpecies/TreeSpecies.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/Models/V2/TreeSpecies/TreeSpecies.php b/app/Models/V2/TreeSpecies/TreeSpecies.php index c1995f25..ff4dc072 100644 --- a/app/Models/V2/TreeSpecies/TreeSpecies.php +++ b/app/Models/V2/TreeSpecies/TreeSpecies.php @@ -38,9 +38,7 @@ class TreeSpecies extends Model implements EntityRelationModel 'speciesable_id', 'collection', 'hidden', - - 'old_id', - 'old_model', + 'taxon_id', ]; public const COLLECTION_DIRECT_SEEDING = 'direct-seeding'; @@ -82,6 +80,11 @@ public function speciesable() return $this->morphTo(); } + public function taxonomicSpecies() + { + return $this->belongsTo(TreeSpeciesResearch::class, 'taxon_id'); + } + public function getRouteKeyName() { return 'uuid'; From 21b8138871539b10602cf10a561636f5b3808fa3 Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Thu, 5 Dec 2024 15:09:51 -0800 Subject: [PATCH 3/8] [TM-1401] lint fix --- app/Console/Commands/ImportTreeSpeciesAssociations.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/Console/Commands/ImportTreeSpeciesAssociations.php b/app/Console/Commands/ImportTreeSpeciesAssociations.php index 0e36d604..55bed7da 100644 --- a/app/Console/Commands/ImportTreeSpeciesAssociations.php +++ b/app/Console/Commands/ImportTreeSpeciesAssociations.php @@ -26,6 +26,7 @@ class ImportTreeSpeciesAssociations extends Command protected $description = 'Imports a CSV that links UUIDs from v2_tree_species to taxon_ids from tree_species_research'; protected int $treeSpeciesUuidColumn; + protected int $taxonIdColumn; /** @@ -38,9 +39,9 @@ public function handle() $process->run(); $this->assert($process->isSuccessful(), "WC failed {$process->getErrorOutput()}"); - $lines = ((int)explode(" ", $process->getOutput())[0]) - 1; + $lines = ((int)explode(' ', $process->getOutput())[0]) - 1; - $fileHandle = fopen($this->argument('file'), "r"); + $fileHandle = fopen($this->argument('file'), 'r'); $this->parseHeaders(fgetcsv($fileHandle)); $this->withProgressBar($lines, function ($progressBar) use ($fileHandle) { @@ -66,7 +67,7 @@ protected function parseHeaders(array $headerRow): void foreach ($headerRow as $index => $header) { if ($header == 'tree_species_uuid') { $this->treeSpeciesUuidColumn = $index; - } else if ($header == 'taxon_id') { + } elseif ($header == 'taxon_id') { $this->taxonIdColumn = $index; } } From cb17f0f8f7a873a5fbc1d7c1c54a094ce027d9c1 Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Thu, 5 Dec 2024 15:11:45 -0800 Subject: [PATCH 4/8] [TM-1401] Run the test suite on all pull requests. --- .github/workflows/pull-request.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 91c9a916..46fa9cfd 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -1,7 +1,6 @@ name: pull-request on: pull_request: - branches: [main, staging, release/**] jobs: lintTest: runs-on: ubuntu-latest From 165c7b98160ce97f3108cc51dfb550f39ac6267c Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Tue, 10 Dec 2024 13:14:45 -0800 Subject: [PATCH 5/8] [TM-1402] Include taxon_id in the tree species resource. --- app/Http/Resources/V2/TreeSpecies/TreeSpeciesResource.php | 1 + 1 file changed, 1 insertion(+) diff --git a/app/Http/Resources/V2/TreeSpecies/TreeSpeciesResource.php b/app/Http/Resources/V2/TreeSpecies/TreeSpeciesResource.php index 7ce666bf..12cd4daa 100644 --- a/app/Http/Resources/V2/TreeSpecies/TreeSpeciesResource.php +++ b/app/Http/Resources/V2/TreeSpecies/TreeSpeciesResource.php @@ -18,6 +18,7 @@ public function toArray($request) 'amount' => $this->amount, 'type' => $this->type, 'collection' => $this->collection, + 'taxon_id' => $this->taxon_id, ]; } } From 3af8790ed2c35993df788ce2a855bf039d1e29ee Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Tue, 10 Dec 2024 15:08:04 -0800 Subject: [PATCH 6/8] [TM-1402] Updates to data import for tree species. --- .../ImportTreeSpeciesAssociations.php | 24 +++++++++- .../OneOff/PopulateTreeSpeciesResearch.php | 44 ++++++++++++++++--- ...cific_epithet_to_tree_species_research.php | 29 ++++++++++++ 3 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 database/migrations/2024_12_10_220655_add_infraspecific_epithet_to_tree_species_research.php diff --git a/app/Console/Commands/ImportTreeSpeciesAssociations.php b/app/Console/Commands/ImportTreeSpeciesAssociations.php index 55bed7da..6371025e 100644 --- a/app/Console/Commands/ImportTreeSpeciesAssociations.php +++ b/app/Console/Commands/ImportTreeSpeciesAssociations.php @@ -3,7 +3,10 @@ namespace App\Console\Commands; use App\Console\Commands\Traits\Abortable; +use App\Console\Commands\Traits\AbortException; +use App\Console\Commands\Traits\ExceptionLevel; use App\Models\V2\TreeSpecies\TreeSpecies; +use App\Models\V2\TreeSpecies\TreeSpeciesResearch; use Illuminate\Console\Command; use Symfony\Component\Process\Process; @@ -45,17 +48,36 @@ public function handle() $this->parseHeaders(fgetcsv($fileHandle)); $this->withProgressBar($lines, function ($progressBar) use ($fileHandle) { + $abortExceptions = []; while ($csvRow = fgetcsv($fileHandle)) { $treeSpeciesUuid = $csvRow[$this->treeSpeciesUuidColumn]; $taxonId = $csvRow[$this->taxonIdColumn]; if ($taxonId != 'NA') { - TreeSpecies::isUuid($treeSpeciesUuid)->update(['taxon_id' => $taxonId]); + try { + $research = TreeSpeciesResearch::find($taxonId); + $this->assert($research != null, "Taxon ID not found: $taxonId", ExceptionLevel::Warning); + + TreeSpecies::isUuid($treeSpeciesUuid)->update([ + 'taxon_id' => $taxonId, + 'name' => $research->name, + ]); + } catch (AbortException $e) { + $abortExceptions[] = $e; + } } + $progressBar->advance(); } $progressBar->finish(); + + if (! empty($abortExceptions)) { + $this->warn("Errors and warnings encountered during parsing CSV Rows:\n"); + foreach ($abortExceptions as $error) { + $this->logException($error); + } + } }); fclose($fileHandle); diff --git a/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php b/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php index 67f4cc12..e16bdc1a 100644 --- a/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php +++ b/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php @@ -4,8 +4,10 @@ use App\Console\Commands\Traits\Abortable; use App\Console\Commands\Traits\AbortException; +use App\Console\Commands\Traits\ExceptionLevel; use App\Models\V2\TreeSpecies\TreeSpeciesResearch; use Illuminate\Console\Command; +use Symfony\Component\Process\Process; class PopulateTreeSpeciesResearch extends Command { @@ -33,6 +35,7 @@ class PopulateTreeSpeciesResearch extends Command 'family' => 'family', 'genus' => 'genus', 'specificEpithet' => 'specific_epithet', + 'infraspecificEpithet' => 'infraspecific_epithet', ]; // Populated by parseHeaders(), a mapping of DB colum name to the index in each row where that data is expected to @@ -45,21 +48,49 @@ class PopulateTreeSpeciesResearch extends Command public function handle() { $this->executeAbortableScript(function () { + $process = new Process(['wc', '-l', $this->argument('file')]); + $process->run(); + $this->assert($process->isSuccessful(), "WC failed {$process->getErrorOutput()}"); + + $lines = ((int)explode(' ', $process->getOutput())[0]) - 1; + $fileHandle = fopen($this->argument('file'), 'r'); - $this->parseHeaders(fgetcsv($fileHandle, separator: "\t")); + $this->parseHeaders(fgetcsv($fileHandle)); - // The input file at the time of this writing has 1618549 rows of data - $this->withProgressBar(1618549, function ($progressBar) use ($fileHandle) { - while ($csvRow = fgetcsv($fileHandle, separator: "\t")) { + $this->withProgressBar($lines, function ($progressBar) use ($fileHandle) { + $abortExceptions = []; + while ($csvRow = fgetcsv($fileHandle)) { $data = []; foreach ($this->columns as $column => $index) { $data[$column] = $csvRow[$index]; } - TreeSpeciesResearch::create($data); + + try { + $existing = TreeSpeciesResearch::where('scientific_name', $data['scientific_name'])->first(); + $this->assert( + $existing == null, + "Scientific name already exists, skipping: " . json_encode([ + 'existing_id' => $existing?->taxon_id, + 'new_id' => $data['taxon_id'], + 'scientific_name' => $data['scientific_name'], + ], JSON_PRETTY_PRINT), + ExceptionLevel::Warning + ); + TreeSpeciesResearch::create($data); + } catch (AbortException $e) { + $abortExceptions[] = $e; + } $progressBar->advance(); } $progressBar->finish(); + + if (!empty($abortExceptions)) { + $this->warn("Errors and warnings encountered during parsing CSV Rows:\n"); + foreach ($abortExceptions as $error) { + $this->logException($error); + } + } }); fclose($fileHandle); @@ -72,7 +103,8 @@ public function handle() protected function parseHeaders(array $headerRow): void { foreach ($headerRow as $index => $header) { - $header = trim($header); + // Excel puts some garbage at the beginning of the file that we need to filter out. + $header = trim($header, "\xEF\xBB\xBF\""); if (array_key_exists($header, self::COLUMN_MAPPING)) { $this->columns[self::COLUMN_MAPPING[$header]] = $index; diff --git a/database/migrations/2024_12_10_220655_add_infraspecific_epithet_to_tree_species_research.php b/database/migrations/2024_12_10_220655_add_infraspecific_epithet_to_tree_species_research.php new file mode 100644 index 00000000..4d993dcf --- /dev/null +++ b/database/migrations/2024_12_10_220655_add_infraspecific_epithet_to_tree_species_research.php @@ -0,0 +1,29 @@ +string('infraspecific_epithet'); + $table->unique('scientific_name'); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('tree_species_research', function (Blueprint $table) { + $table->dropColumn('infraspecific_epithet'); + $table->dropIndex('tree_species_research_scientific_name_unique'); + }); + } +}; From 888f1d62a23686d097aa6e8458ade1d8faabde76 Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Tue, 10 Dec 2024 20:52:44 -0800 Subject: [PATCH 7/8] [TM-1402] Update the saved form fields for tree species collection. --- .../Commands/OneOff/UpdateTreeCollections.php | 17 +++++++++++++++++ config/wri/linked-fields.php | 1 + 2 files changed, 18 insertions(+) diff --git a/app/Console/Commands/OneOff/UpdateTreeCollections.php b/app/Console/Commands/OneOff/UpdateTreeCollections.php index 2e042e24..d97c206d 100644 --- a/app/Console/Commands/OneOff/UpdateTreeCollections.php +++ b/app/Console/Commands/OneOff/UpdateTreeCollections.php @@ -2,6 +2,7 @@ namespace App\Console\Commands\OneOff; +use App\Models\V2\Forms\FormQuestion; use App\Models\V2\Organisation; use App\Models\V2\ProjectPitch; use App\Models\V2\Projects\Project; @@ -62,5 +63,21 @@ public function handle() $updateRequest->update(['content' => $content]); } }); + + $this->info('Updating form fields'); + FormQuestion::withoutTimestamps(function () { + $relationSets = data_get(config('wri.linked-fields.models'), '*.relations'); + foreach ($relationSets as $relations) { + foreach ($relations as $linkedFieldKey => $properties) { + if ($properties['input_type'] != 'treeSpecies') { + continue; + } + + FormQuestion::withTrashed() + ->where('linked_field_key', $linkedFieldKey) + ->update(['collection' => $properties['collection']]); + } + } + }); } } diff --git a/config/wri/linked-fields.php b/config/wri/linked-fields.php index afbe3169..28c236f9 100644 --- a/config/wri/linked-fields.php +++ b/config/wri/linked-fields.php @@ -151,6 +151,7 @@ 'label' => 'Tree Species', 'resource' => 'App\Http\Resources\V2\TreeSpecies\TreeSpeciesResource', 'input_type' => 'treeSpecies', + 'collection' => 'historical-tree-species' ], 'org-leadership-team' => [ 'property' => 'leadershipTeam', From c8fe1f94624f89cfcbaba82e89e12a569a7d826c Mon Sep 17 00:00:00 2001 From: Nathan Curtis Date: Thu, 12 Dec 2024 16:40:33 -0800 Subject: [PATCH 8/8] [TM-1402] Trim the headers on tree species assocations. --- app/Console/Commands/ImportTreeSpeciesAssociations.php | 3 ++- app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/app/Console/Commands/ImportTreeSpeciesAssociations.php b/app/Console/Commands/ImportTreeSpeciesAssociations.php index 6371025e..d6e3aa18 100644 --- a/app/Console/Commands/ImportTreeSpeciesAssociations.php +++ b/app/Console/Commands/ImportTreeSpeciesAssociations.php @@ -87,6 +87,7 @@ public function handle() protected function parseHeaders(array $headerRow): void { foreach ($headerRow as $index => $header) { + $header = trim($header, "\xEF\xBB\xBF\""); if ($header == 'tree_species_uuid') { $this->treeSpeciesUuidColumn = $index; } elseif ($header == 'taxon_id') { @@ -95,7 +96,7 @@ protected function parseHeaders(array $headerRow): void } $this->assert( - $this->treeSpeciesUuidColumn != null && $this->taxonIdColumn != null, + is_numeric($this->treeSpeciesUuidColumn) && is_numeric($this->taxonIdColumn), 'Not all required columns were found' ); } diff --git a/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php b/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php index e16bdc1a..ef2b4250 100644 --- a/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php +++ b/app/Console/Commands/OneOff/PopulateTreeSpeciesResearch.php @@ -69,7 +69,7 @@ public function handle() $existing = TreeSpeciesResearch::where('scientific_name', $data['scientific_name'])->first(); $this->assert( $existing == null, - "Scientific name already exists, skipping: " . json_encode([ + 'Scientific name already exists, skipping: ' . json_encode([ 'existing_id' => $existing?->taxon_id, 'new_id' => $data['taxon_id'], 'scientific_name' => $data['scientific_name'], @@ -85,7 +85,7 @@ public function handle() $progressBar->finish(); - if (!empty($abortExceptions)) { + if (! empty($abortExceptions)) { $this->warn("Errors and warnings encountered during parsing CSV Rows:\n"); foreach ($abortExceptions as $error) { $this->logException($error);