Skip to content

Commit

Permalink
Clean up leftover harvest data tables (#4049)
Browse files Browse the repository at this point in the history
  • Loading branch information
paul-m authored Nov 16, 2023
1 parent 762a75e commit 7fa499f
Show file tree
Hide file tree
Showing 9 changed files with 444 additions and 148 deletions.
1 change: 1 addition & 0 deletions modules/harvest/drush.services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ services:
arguments:
- '@dkan.harvest.service'
- '@dkan.harvest.logger_channel'
- '@dkan.harvest.utility'
tags:
- { name: drush.command }
24 changes: 24 additions & 0 deletions modules/harvest/harvest.install
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,30 @@
* @file
*/

function harvest_requirements($phase): array {
$requirements = [];
if ($phase == 'runtime') {
/** @var \Drupal\harvest\HarvestUtility $harvest_utility */
if ($harvest_utility = \Drupal::service('dkan.harvest.utility')) {
if ($leftover_harvest_data_ids = $harvest_utility->findOrphanedHarvestDataIds()) {
$requirements['dkan harvest leftover data'] = [
'title' => t('DKAN Harvest Leftover Plan Data'),
'value' => t('Leftover harvest data for plans: @plans', [
'@plans' => implode(', ', $leftover_harvest_data_ids)
]),
'description' => t(
'DKAN\'s harvest module has detected extra unneeded data tables.
You can remove them using this Drush command from the CLI:
<code>drush dkan:harvest:cleanup</code>'
),
'severity' => REQUIREMENT_WARNING,
];
}
}
}
return $requirements;
}

/**
* Uninstall obsolete submodule harvest_dashboard.
*/
Expand Down
6 changes: 6 additions & 0 deletions modules/harvest/harvest.services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ services:
- '@entity_type.manager'
calls:
- [ setLoggerFactory, [ '@logger.factory' ] ]
dkan.harvest.utility:
class: Drupal\harvest\HarvestUtility
arguments:
- '@dkan.harvest.service'
- '@dkan.harvest.storage.database_table'
- '@database'
dkan.harvest.storage.database_table:
class: Drupal\harvest\Storage\DatabaseTableFactory
arguments:
Expand Down
58 changes: 56 additions & 2 deletions modules/harvest/src/Commands/HarvestCommands.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Drupal\harvest\Commands;

use Drupal\Core\Logger\LoggerChannelInterface;
use Drupal\harvest\HarvestUtility;
use Drupal\harvest\Load\Dataset;
use Drupal\harvest\HarvestService;
use Drush\Commands\DrushCommands;
Expand All @@ -26,14 +27,26 @@ class HarvestCommands extends DrushCommands {
*/
protected $harvestService;

/**
* Harvest utility service.
*
* @var \Drupal\harvest\HarvestUtility
*/
protected HarvestUtility $harvestUtility;

/**
* Constructor.
*/
public function __construct(HarvestService $service, LoggerChannelInterface $logger) {
public function __construct(
HarvestService $service,
LoggerChannelInterface $logger,
HarvestUtility $harvestUtility
) {
parent::__construct();
// @todo passing via arguments doesn't seem play well with drush.services.yml
$this->harvestService = $service;
$this->logger = $logger;
$this->harvestUtility = $harvestUtility;
}

/**
Expand All @@ -51,7 +64,7 @@ function ($id) {
return [$id];
},
$this->harvestService->getAllHarvestIds()
);
);
(new Table(new ConsoleOutput()))->setHeaders(['plan id'])->setRows($rows)->render();
}

Expand Down Expand Up @@ -347,6 +360,47 @@ public function orphanDatasets(string $harvestId) : int {
}
}

/**
* Report and cleanup harvest data which may be cluttering your database.
*
* Will print a report. Add -y or --no-interaction to automatically perform
* this cleanup.
*
* @command dkan:harvest:cleanup
*
* @return int
* Bash status code.
*
* @bootstrap full
*/
public function harvestCleanup(): int {
$logger = $this->logger();
$orphaned = $this->harvestUtility->findOrphanedHarvestDataIds();
if ($orphaned) {
$logger->notice('Detected leftover harvest data for these plans: ' . implode(', ', $orphaned));
if ($this->io()->confirm('Do you want to remove this data?', FALSE)) {
$this->cleanupHarvestDataTables($orphaned);
}
}
else {
$logger->notice('No leftover harvest data detected.');
}
return DrushCommands::EXIT_SUCCESS;
}

/**
* Perform the harvest data table cleanup.
*
* @param array $plan_ids
* An array of plan identifiers to clean up.
*/
protected function cleanupHarvestDataTables(array $plan_ids) : void {
foreach ($plan_ids as $plan_id) {
$this->logger()->notice('Cleaning up: ' . $plan_id);
$this->harvestUtility->destructOrphanTables($plan_id);
}
}

/**
* Throw error if Harvest ID does not exist.
*
Expand Down
27 changes: 18 additions & 9 deletions modules/harvest/src/HarvestService.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class HarvestService implements ContainerInjectionInterface {
*/
public static function create(ContainerInterface $container) {
return new self(
$container->get("dkan.harvest.storage.database_table"),
$container->get('dkan.harvest.storage.database_table'),
$container->get('dkan.metastore.service'),
$container->get('entity_type.manager')
);
Expand Down Expand Up @@ -129,17 +129,26 @@ public function registerHarvest($plan) {
/**
* Deregister harvest.
*
* @param string $id
* Id.
* @param string $plan_id
* Plan identifier.
*
* @return bool
* Boolean.
* Whether this happened successfully.
*/
public function deregisterHarvest(string $id) {

$plan_store = $this->storeFactory->getInstance("harvest_plans");

return $plan_store->remove($id);
public function deregisterHarvest(string $plan_id) {
// Remove all the support tables for this plan id.
foreach ([
'harvest_' . $plan_id . '_items',
'harvest_' . $plan_id . '_hashes',
'harvest_' . $plan_id . '_runs',
] as $table_name) {
/** @var \Drupal\common\Storage\DatabaseTableInterface $store */
$store = $this->storeFactory->getInstance($table_name);
$store->destruct();
}
// Remove the plan id from the harvest_plans table.
$plan_store = $this->storeFactory->getInstance('harvest_plans');
return $plan_store->remove($plan_id);
}

/**
Expand Down
155 changes: 155 additions & 0 deletions modules/harvest/src/HarvestUtility.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
<?php

namespace Drupal\harvest;

use Drupal\Core\Database\Connection;
use Drupal\Core\DependencyInjection\ContainerInjectionInterface;
use Drupal\harvest\Storage\DatabaseTableFactory;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
* DKAN Harvest utility service for maintenance tasks.
*
* These methods generally exist to support a thin Drush layer. These are
* methods that we don't need in the HarvestService object.
*/
class HarvestUtility implements ContainerInjectionInterface {

/**
* Harvest service.
*
* @var \Drupal\harvest\HarvestService
*/
private HarvestService $harvestService;

/**
* Service to instantiate storage objects for Harvest plan storage.
*
* @var \Drupal\harvest\Storage\DatabaseTableFactory
*/
private DatabaseTableFactory $storeFactory;

/**
* Database connection.
*
* @var \Drupal\Core\Database\Connection
*/
private Connection $connection;

/**
* Create.
*
* @inheritdoc
*/
public static function create(ContainerInterface $container) {
return new self(
$container->get('dkan.harvest.service'),
$container->get('dkan.harvest.storage.database_table'),
$container->get('database'),
);
}

/**
* Constructor.
*/
public function __construct(
HarvestService $harvestService,
DatabaseTableFactory $storeFactory,
Connection $connection
) {
$this->harvestService = $harvestService;
$this->storeFactory = $storeFactory;
$this->connection = $connection;
}

/**
* Get the plan ID from a given harvest table name.
*
* Harvest table names are assumed to look like this:
* harvest_ID_that_might_have_underscores_[something]. For example:
* 'harvest_ABC_123_runs'.
*
* @param string $table_name
* The table name.
*
* @return string
* The ID gleaned from the table name. If no ID could be gleaned, returns
* an empty string.
*/
public static function planIdFromTableName(string $table_name): string {
$name_explode = explode('_', $table_name);
if (count($name_explode) < 3) {
return '';
}
// Remove first and last item.
array_shift($name_explode);
array_pop($name_explode);
return implode('_', $name_explode);
}

/**
* Find harvest IDs with data tables that aren't in the harvest_plans table.
*
* @return array
* Array of orphan plan ids, as both key and value. Empty if there are no
* orphaned plan ids.
*/
public function findOrphanedHarvestDataIds(): array {
$existing_plans = $this->harvestService->getAllHarvestIds();

$table_names = $this->findAllHarvestDataTables();

$orphan_ids = [];
// Find IDs that are not in the existing plans.
foreach ($table_names as $table_name) {
$plan_id = static::planIdFromTableName($table_name);
if (!in_array($plan_id, $existing_plans)) {
$orphan_ids[$plan_id] = $plan_id;
}
}
return $orphan_ids;
}

/**
* Find all the potential harvest data tables names in the database.
*
* @return array
* All the table names that might be harvest data tables.
*/
protected function findAllHarvestDataTables(): array {
$tables = [];
foreach ([
// @todo Figure out an expression for harvest_%_thing, since underscore
// is a special character.
'harvest%runs',
'harvest%items',
'harvest%hashes',
] as $table_expression) {
if ($found_tables = $this->connection->schema()->findTables($table_expression)) {
$tables = array_merge($tables, $found_tables);
}
}
return $tables;
}

/**
* Remove existing harvest data tables for the given plan identifier.
*
* Will not remove data tables for existing plans.
*
* @param string $plan_id
* Plan identifier to work with.
*/
public function destructOrphanTables(string $plan_id): void {
if (!in_array($plan_id, $this->harvestService->getAllHarvestIds())) {
foreach ([
'harvest_' . $plan_id . '_runs',
'harvest_' . $plan_id . '_items',
'harvest_' . $plan_id . '_hashes',
] as $table) {
$this->storeFactory->getInstance($table)->destruct();
}
}
}

}
Loading

0 comments on commit 7fa499f

Please sign in to comment.