diff --git a/modules/harvest/harvest.install b/modules/harvest/harvest.install index 42defc8699..b09b7d2cb6 100644 --- a/modules/harvest/harvest.install +++ b/modules/harvest/harvest.install @@ -1,8 +1,5 @@ select($table_name_temp, 'hrt') - ->fields('hrt', ['id']); - $query->orderBy('id', 'ASC'); - $result = $query->execute()->fetchCol(0); - // Can't do orderBy as the sort end up natural, not numeric. - asort($result, SORT_NUMERIC); - - return $result ?? []; -} - -/** - * Reads a single harvest row from the temp table. - * - * @param string $table_name_temp - * Name of the table to read from. - * - * @param string $time_id - * The id to read from, which was also the timestamp. - * - * @return array - * Elements from the row ['id', 'harvest_plan_id', 'data', 'extract_status']. - */ -function harvest_read_harvest_run(string $table_name_temp, string $time_id): array { - $connection = Database::getConnection(); - $query = $connection->select($table_name_temp, 'hrt') - ->fields('hrt', ['id', 'harvest_plan_id', 'data', 'extract_status']) - ->condition('id', $time_id, '=') - ->orderBy('id', 'ASC'); - $result = $query->execute()->fetchAll(PDO::FETCH_ASSOC); - return reset($result); -} - -function harvest_write_harvest_run(string $id, string $harvest_plan_id, string $data, string $extract_status) { - /** @var \Drupal\Core\Database\Connection $connection */ - $connection = \Drupal::service('database'); - $result = $connection->insert('harvest_runs') - ->fields([ - 'timestamp' => (int) $id, - 'harvest_plan_id' => $harvest_plan_id, - 'data' => $data, - 'extract_status' => $extract_status, - ]) - ->execute(); -} - /** * Uninstall obsolete submodule harvest_dashboard. */ @@ -202,10 +142,12 @@ function harvest_update_8010(&$sandbox) { $table_name_temp = "{$table_name}_temp"; $messages = ''; $schema = \Drupal::database()->schema(); + /** @var \Drupal\harvest\HarvestUtility $harvest_utility */ + $harvest_utility = \Drupal::service('dkan.harvest.utility'); if (!isset($sandbox['total'])) { // Sandbox has not been initiated, so initiate it. - $sandbox['items_to_process'] = harvest_get_temp_run_ids($table_name_temp); + $sandbox['items_to_process'] = $harvest_utility->getTempRunIdsForUpdate($table_name_temp); $sandbox['total'] = count($sandbox['items_to_process']); $sandbox['current'] = 0; } @@ -214,9 +156,9 @@ function harvest_update_8010(&$sandbox) { // Loop through all the entries in temp table and save them new. foreach ($harvest_runs_batch as $key => $time_id) { // Load the old row. - $row = harvest_read_harvest_run($table_name_temp, $time_id); + $row = $harvest_utility->readTempHarvestRunForUpdate($table_name_temp, $time_id); // Write the new harvest run. - harvest_write_harvest_run($row['id'], $row['harvest_plan_id'], $row['data'], $row['extract_status']); + $harvest_utility->writeHarvestRunFromUpdate($row['id'], $row['harvest_plan_id'], $row['data'], $row['extract_status']); // The item has been processed, remove it from the array. unset($sandbox['items_to_process'][$key]); } diff --git a/modules/harvest/harvest.services.yml b/modules/harvest/harvest.services.yml index 8d9cb604eb..10dd0569ce 100644 --- a/modules/harvest/harvest.services.yml +++ b/modules/harvest/harvest.services.yml @@ -19,6 +19,7 @@ services: - '@dkan.harvest.storage.harvest_run_repository' - '@database' - '@dkan.harvest.logger_channel' + - '@uuid' dkan.harvest.harvest_plan_repository: class: Drupal\harvest\Entity\HarvestPlanRepository arguments: diff --git a/modules/harvest/src/HarvestUtility.php b/modules/harvest/src/HarvestUtility.php index 5a8e03b5cb..6c2aad0002 100644 --- a/modules/harvest/src/HarvestUtility.php +++ b/modules/harvest/src/HarvestUtility.php @@ -2,6 +2,7 @@ namespace Drupal\harvest; +use Drupal\Component\Uuid\UuidInterface; use Drupal\Core\Database\Connection; use Drupal\harvest\Entity\HarvestRunRepository; use Drupal\harvest\Storage\DatabaseTableFactory; @@ -11,8 +12,8 @@ /** * DKAN Harvest utility service for maintenance tasks. * - * These methods generally exist to support a thin Drush layer. These are - * methods that we don't need in the HarvestService object. + * These methods generally exist to support a thin Drush layer or hook_update_n. + * These are methods that we don't need in the HarvestService object. */ class HarvestUtility { @@ -58,6 +59,13 @@ class HarvestUtility { */ private LoggerInterface $logger; + /** + * Uuid service. + * + * @var \Drupal\Component\Uuid\UuidInterface + */ + private UuidInterface $uuidService; + /** * Constructor. */ @@ -67,7 +75,8 @@ public function __construct( HarvestHashesDatabaseTableFactory $hashesFactory, HarvestRunRepository $runRepository, Connection $connection, - LoggerInterface $loggerChannel + LoggerInterface $loggerChannel, + UuidInterface $uuid_service ) { $this->harvestService = $harvestService; $this->storeFactory = $storeFactory; @@ -75,6 +84,7 @@ public function __construct( $this->runRepository = $runRepository; $this->connection = $connection; $this->logger = $loggerChannel; + $this->uuidService = $uuid_service; } /** @@ -243,4 +253,74 @@ public function harvestRunsUpdate() { } } + /** + * Get the ids from the temp harvest run table. + * + * Only needed for harvest_update_8010. + * + * @param mixed $table_name_temp + * The name of the temp table. + * + * @return array + * The ids of all the harvest runs in the table sorted oldest to newest. + */ + public function getTempRunIdsForUpdate($table_name_temp) : array { + $query = $this->connection->select($table_name_temp, 'hrt') + ->fields('hrt', ['id']) + ->orderBy('id', 'ASC'); + $result = $query->execute()->fetchCol(0); + // Can't rely on orderBy as the sort ends up natural, not numeric. + asort($result, SORT_NUMERIC); + + return $result ?? []; + } + + /** + * Reads a single harvest row from the harvest run temp table. + * + * Only needed for harvest_update_8010. + * + * @param string $table_name_temp + * Name of the table to read from. + * @param string $timestamp + * The id to read from, which was also the timestamp. + * + * @return array + * Elements from the row['id', 'harvest_plan_id', 'data', 'extract_status']. + */ + public function readTempHarvestRunForUpdate(string $table_name_temp, string $timestamp): array { + $query = $this->connection->select($table_name_temp, 'hrt') + ->fields('hrt', ['id', 'harvest_plan_id', 'data', 'extract_status']) + ->condition('id', $timestamp, '=') + ->orderBy('id', 'ASC'); + $result = $query->execute()->fetchAll(\PDO::FETCH_ASSOC); + return reset($result); + } + + /** + * Creates a new entry in harvest_run based on data from harvest run temp. + * + * Only needed for harvest_update_8010. + * + * @param string $timestamp + * The id from the old harvest run, which was a timestamp. + * @param string $harvest_plan_id + * The harvest plan id. + * @param string $data + * Data about the harvest. + * @param string $extract_status + * The status of the harvest. + */ + public function writeHarvestRunFromUpdate(string $timestamp, string $harvest_plan_id, string $data, string $extract_status): void { + $this->connection->insert('harvest_runs') + ->fields([ + 'timestamp' => (int) $timestamp, + 'harvest_plan_id' => $harvest_plan_id, + 'uuid' => $this->uuidService->generate(), + 'data' => $data, + 'extract_status' => $extract_status, + ]) + ->execute(); + } + }