From 6a388a0f44a3d2d88272b53e693e1110b6501671 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 23 Feb 2023 13:23:27 -0800 Subject: [PATCH 01/56] introduce RecordBuilder concept and re-organize Goals archiving code via RecordBuilders --- core/ArchiveProcessor/Record.php | 162 ++++++ core/ArchiveProcessor/RecordBuilder.php | 175 +++++++ core/Plugin/Archiver.php | 69 +++ core/Segment.php | 2 +- plugins/Goals/Archiver.php | 466 ------------------ plugins/Goals/RecordBuilders/Base.php | 34 ++ .../Goals/RecordBuilders/EcommerceRecords.php | 252 ++++++++++ .../RecordBuilders/GeneralGoalsRecords.php | 244 +++++++++ .../Reports/GetVisitsUntilConversion.php | 3 +- 9 files changed, 939 insertions(+), 468 deletions(-) create mode 100644 core/ArchiveProcessor/Record.php create mode 100644 core/ArchiveProcessor/RecordBuilder.php create mode 100644 plugins/Goals/RecordBuilders/Base.php create mode 100644 plugins/Goals/RecordBuilders/EcommerceRecords.php create mode 100644 plugins/Goals/RecordBuilders/GeneralGoalsRecords.php diff --git a/core/ArchiveProcessor/Record.php b/core/ArchiveProcessor/Record.php new file mode 100644 index 00000000000..b364e9432b3 --- /dev/null +++ b/core/ArchiveProcessor/Record.php @@ -0,0 +1,162 @@ +setType($type); + $record->setName($name); + return $record; + } + + /** + * @param string|null $plugin + * @return Record + */ + public function setPlugin(?string $plugin): Record + { + $this->plugin = $plugin; + return $this; + } + + /** + * @param string $name + * @return Record + */ + public function setName(string $name): Record + { + $this->name = $name; + return $this; + } + + /** + * @param int|string $columnToSortByBeforeTruncation + * @return Record + */ + public function setColumnToSortByBeforeTruncation($columnToSortByBeforeTruncation) + { + $this->columnToSortByBeforeTruncation = $columnToSortByBeforeTruncation; + return $this; + } + + /** + * @param int|null $maxRowsInTable + * @return Record + */ + public function setMaxRowsInTable(?int $maxRowsInTable): Record + { + $this->maxRowsInTable = $maxRowsInTable; + return $this; + } + + /** + * @param int|null $maxRowsInSubtable + * @return Record + */ + public function setMaxRowsInSubtable(?int $maxRowsInSubtable): Record + { + $this->maxRowsInSubtable = $maxRowsInSubtable; + return $this; + } + + /** + * @return string|null + */ + public function getPlugin(): ?string + { + return $this->plugin; + } + + /** + * @return string + */ + public function getName(): string + { + return $this->name; + } + + /** + * @return int|string + */ + public function getColumnToSortByBeforeTruncation() + { + return $this->columnToSortByBeforeTruncation; + } + + /** + * @return int|null + */ + public function getMaxRowsInTable(): ?int + { + return $this->maxRowsInTable; + } + + /** + * @return int|null + */ + public function getMaxRowsInSubtable(): ?int + { + return $this->maxRowsInSubtable; + } + + /** + * @param string $type + * @return Record + */ + public function setType(string $type): Record + { + $this->type = $type; + return $this; + } + + /** + * @return string + */ + public function getType(): string + { + return $this->type; + } +} diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php new file mode 100644 index 00000000000..48bfff84c8e --- /dev/null +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -0,0 +1,175 @@ +maxRowsInTable = $maxRowsInTable; + $this->maxRowsInSubtable = $maxRowsInSubtable; + $this->columnToSortByBeforeTruncation = $columnToSortByBeforeTruncation; + $this->columnAggregationOps = $columnAggregationOps; + } + + /** + * @return void + * @internal + */ + public function setArchiveProcessor(ArchiveProcessor $archiveProcessor) + { + $this->archiveProcessor = $archiveProcessor; + } + + public function isEnabled() + { + return true; + } + + public function build() + { + if (!$this->isEnabled()) { + return; + } + + $numericRecords = []; + + $records = $this->aggregate(); + foreach ($records as $recordName => $recordValue) { + if ($recordValue instanceof DataTable) { + $this->insertRecord($recordName, $recordValue); + + Common::destroy($recordValue); + unset($recordValue); + } else { + // collect numeric records so we can insert them all at once + $numericRecords[$recordName] = $recordValue; + } + } + unset($records); + + if (!empty($numericRecords)) { + // TODO: plugin name replace? + $this->archiveProcessor->insertNumericRecords($numericRecords); + } + } + + public function buildMultiplePeriod() + { + if (!$this->isEnabled()) { + return; + } + + $recordsBuilt = $this->getRecordMetadata(); + + $numericRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_NUMERIC; }); + $blobRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_BLOB; }); + + foreach ($blobRecords as $record) { + $maxRowsInTable = $record->getMaxRowsInTable() ?? $this->maxRowsInTable; + $maxRowsInSubtable = $record->getMaxRowsInSubtable() ?? $this->maxRowsInSubtable; + $columnToSortByBeforeTruncation = $record->getColumnToSortByBeforeTruncation() ?? $this->columnToSortByBeforeTruncation; + + $this->archiveProcessor->aggregateDataTableRecords( + $record->getName(), + $maxRowsInTable, + $maxRowsInSubtable, + $columnToSortByBeforeTruncation, + $this->columnAggregationOps + ); + } + + if (!empty($numericRecords)) { + $numericMetrics = array_map(function (Record $r) { return $r->getName(); }, $numericRecords); + $this->archiveProcessor->aggregateNumericMetrics($numericMetrics, $this->columnAggregationOps); + } + } + + /** + * @return Record[] + */ + public abstract function getRecordMetadata(); + + /** + * @return (DataTable|int|float|string)[] + */ + protected abstract function aggregate(); + + private function insertRecord($recordName, DataTable\DataTableInterface $record) + { + if ($record->getRowsCount() <= 0) { + return; + } + + $serialized = $record->getSerialized($this->maxRowsInTable, $this->maxRowsInSubtable, $this->columnToSortByBeforeTruncation); + $this->archiveProcessor->insertBlobRecord($recordName, $serialized); + unset($serialized); + } + + public function getMaxRowsInTable() + { + return $this->maxRowsInTable; + } + + public function getMaxRowsInSubtable() + { + return $this->maxRowsInSubtable; + } + + public function getColumnToSortByBeforeTruncation() + { + return $this->columnToSortByBeforeTruncation; + } + + private function getQueryDecoration() // TODO: use query decoration in LogAggregator or wherever + { + $recordBuilderName = get_class($this); + $recordBuilderName = explode('\\', $recordBuilderName); + return end($recordBuilderName); + } +} \ No newline at end of file diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 0566b2b1c6a..c4fcf78e977 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -10,8 +10,11 @@ namespace Piwik\Plugin; use Piwik\ArchiveProcessor; +use Piwik\Cache; use Piwik\Config as PiwikConfig; +use Piwik\Container\StaticContainer; use Piwik\ErrorHandler; +use Piwik\Piwik; /** * The base class that should be extended by plugins that compute their own @@ -83,6 +86,52 @@ public function __construct(ArchiveProcessor $processor) $this->enabled = true; } + private function getPluginName() + { + $className = get_class($this); + $parts = explode('\\', $className); + $parts = array_filter($parts); + $plugin = $parts[2]; + + if (!Manager::getInstance()->isPluginLoaded($plugin)) { + throw new \Exception('Unexpected state: archiver plugin \'' . $plugin . '\' is not loaded.'); + } + + return $plugin; + } + + /** + * @return ArchiveProcessor\RecordBuilder[] + * @throws \DI\DependencyException + * @throws \DI\NotFoundException + */ + private function getRecordBuilders() + { + $plugin = $this->getPluginName(); + + $transientCache = Cache::getTransientCache(); + $cacheKey = 'Archiver.RecordBuilders.' . $plugin; + + $recordBuilders = $transientCache->fetch($cacheKey); + if ($recordBuilders === false) { + $recordBuilderClasses = Manager::getInstance()->findMultipleComponents('RecordBuilders', ArchiveProcessor\RecordBuilder::class); + + $recordBuilders = array_map(function ($className) { + return StaticContainer::getContainer()->make($className); + }, $recordBuilderClasses); + + /** + * TODO + * + * @api + */ + Piwik::postEvent('Archiver.addRecordBuilders', [&$recordBuilders, $plugin]); + + $transientCache->save($cacheKey, $recordBuilders); + } + return $recordBuilders; + } + /** * @ignore */ @@ -91,6 +140,16 @@ final public function callAggregateDayReport() try { ErrorHandler::pushFatalErrorBreadcrumb(static::class); + $recordBuilders = $this->getRecordBuilders(); + foreach ($recordBuilders as $recordBuilder) { + if (!$recordBuilder->isEnabled()) { + continue; + } + + $recordBuilder->setArchiveProcessor($this->getProcessor()); + $recordBuilder->build(); + } + $this->aggregateDayReport(); } finally { ErrorHandler::popFatalErrorBreadcrumb(); @@ -105,6 +164,16 @@ final public function callAggregateMultipleReports() try { ErrorHandler::pushFatalErrorBreadcrumb(static::class); + $recordBuilders = $this->getRecordBuilders(); + foreach ($recordBuilders as $recordBuilder) { + if (!$recordBuilder->isEnabled()) { + continue; + } + + $recordBuilder->setArchiveProcessor($this->getProcessor()); + $recordBuilder->buildMultiplePeriod(); + } + $this->aggregateMultipleReports(); } finally { ErrorHandler::popFatalErrorBreadcrumb(); diff --git a/core/Segment.php b/core/Segment.php index 9464d8d24eb..af49f41b9f7 100644 --- a/core/Segment.php +++ b/core/Segment.php @@ -125,7 +125,7 @@ public function __construct($segmentCondition, $idSites, Date $startDate = null, { $this->segmentQueryBuilder = StaticContainer::get('Piwik\DataAccess\LogQueryBuilder'); - $segmentCondition = trim($segmentCondition); + $segmentCondition = trim($segmentCondition ?: ''); if (!SettingsPiwik::isSegmentationEnabled() && !empty($segmentCondition) ) { diff --git a/plugins/Goals/Archiver.php b/plugins/Goals/Archiver.php index de991c3100f..d538bc62551 100644 --- a/plugins/Goals/Archiver.php +++ b/plugins/Goals/Archiver.php @@ -42,43 +42,6 @@ class Archiver extends \Piwik\Plugin\Archiver public static $ARCHIVE_DEPENDENT = true; - /** - * This array stores the ranges to use when displaying the 'visits to conversion' report - */ - public static $visitCountRanges = array( - array(1, 1), - array(2, 2), - array(3, 3), - array(4, 4), - array(5, 5), - array(6, 6), - array(7, 7), - array(8, 8), - array(9, 14), - array(15, 25), - array(26, 50), - array(51, 100), - array(100) - ); - /** - * This array stores the ranges to use when displaying the 'days to conversion' report - */ - public static $daysToConvRanges = array( - array(0, 0), - array(1, 1), - array(2, 2), - array(3, 3), - array(4, 4), - array(5, 5), - array(6, 6), - array(7, 7), - array(8, 14), - array(15, 30), - array(31, 60), - array(61, 120), - array(121, 364), - array(364) - ); protected $dimensionRecord = [ self::SKU_FIELD => self::ITEMS_SKU_RECORD_NAME, self::NAME_FIELD => self::ITEMS_NAME_RECORD_NAME, @@ -94,162 +57,14 @@ class Archiver extends \Piwik\Plugin\Archiver self::CATEGORY5_FIELD => 'idaction_product_cat5', ]; - /** - * Array containing one DataArray for each Ecommerce items dimension (name/sku/category abandoned carts and orders) - * @var DataArray[][] - */ - protected $itemReports = []; - - /** - * @var int - */ - private $productReportsMaximumRows; - - public function __construct(ArchiveProcessor $processor) - { - parent::__construct($processor); - - $general = Config::getInstance()->General; - $this->productReportsMaximumRows = $general['datatable_archiving_maximum_rows_products']; - } - public function aggregateDayReport() { - $this->aggregateGeneralGoalMetrics(); - - if (Manager::getInstance()->isPluginActivated('Ecommerce')) { - $this->aggregateEcommerceItems(); - } - if (self::$ARCHIVE_DEPENDENT) { $this->getProcessor()->processDependentArchive('Goals', VisitFrequencyAPI::NEW_VISITOR_SEGMENT); $this->getProcessor()->processDependentArchive('Goals', VisitFrequencyAPI::RETURNING_VISITOR_SEGMENT); } } - private function hasAnyGoalOrEcommerce($idSite) - { - return $this->usesEcommerce($idSite) || !empty(GoalManager::getGoalIds($idSite)); - } - - private function usesEcommerce($idSite) - { - return Manager::getInstance()->isPluginActivated('Ecommerce') - && Site::isEcommerceEnabledFor($idSite); - } - - private function getSiteId() - { - return $this->getProcessor()->getParams()->getSite()->getId(); - } - - protected function aggregateGeneralGoalMetrics() - { - $prefixes = array( - self::VISITS_UNTIL_RECORD_NAME => 'vcv', - self::DAYS_UNTIL_CONV_RECORD_NAME => 'vdsf', - ); - - $totalConversions = $totalRevenue = 0; - $goals = new DataArray(); - $visitsToConversions = $daysToConversions = []; - - $siteHasEcommerceOrGoals = $this->hasAnyGoalOrEcommerce($this->getSiteId()); - - // Special handling for sites that contain subordinated sites, like in roll up reporting. - // A roll up site, might not have ecommerce enabled or any configured goals, - // but if a subordinated site has, we calculate the overview conversion metrics nevertheless - if ($siteHasEcommerceOrGoals === false) { - $idSitesToArchive = $this->getProcessor()->getParams()->getIdSites(); - - foreach ($idSitesToArchive as $idSite) { - if ($this->hasAnyGoalOrEcommerce($idSite)) { - $siteHasEcommerceOrGoals = true; - break; - } - } - } - - // try to query goal data only, if goals or ecommerce is actually used - // otherwise we simply insert empty records - if ($siteHasEcommerceOrGoals) { - $selects = []; - $selects = array_merge($selects, LogAggregator::getSelectsFromRangedColumn( - self::VISITS_COUNT_FIELD, self::$visitCountRanges, self::LOG_CONVERSION_TABLE, $prefixes[self::VISITS_UNTIL_RECORD_NAME] - )); - $selects = array_merge($selects, LogAggregator::getSelectsFromRangedColumn( - 'FLOOR(log_conversion.' . self::SECONDS_SINCE_FIRST_VISIT_FIELD . ' / 86400)', self::$daysToConvRanges, self::LOG_CONVERSION_TABLE, $prefixes[self::DAYS_UNTIL_CONV_RECORD_NAME] - )); - - $query = $this->getLogAggregator()->queryConversionsByDimension([], false, $selects); - if ($query === false) { - return; - } - - $conversionMetrics = $this->getLogAggregator()->getConversionsMetricFields(); - while ($row = $query->fetch()) { - $idGoal = $row['idgoal']; - unset($row['idgoal']); - unset($row['label']); - - $values = []; - foreach ($conversionMetrics as $field => $statement) { - $values[$field] = $row[$field]; - } - $goals->sumMetrics($idGoal, $values); - - if (empty($visitsToConversions[$idGoal])) { - $visitsToConversions[$idGoal] = new DataTable(); - } - $array = LogAggregator::makeArrayOneColumn($row, Metrics::INDEX_NB_CONVERSIONS, $prefixes[self::VISITS_UNTIL_RECORD_NAME]); - $visitsToConversions[$idGoal]->addDataTable(DataTable::makeFromIndexedArray($array)); - - if (empty($daysToConversions[$idGoal])) { - $daysToConversions[$idGoal] = new DataTable(); - } - $array = LogAggregator::makeArrayOneColumn($row, Metrics::INDEX_NB_CONVERSIONS, $prefixes[self::DAYS_UNTIL_CONV_RECORD_NAME]); - $daysToConversions[$idGoal]->addDataTable(DataTable::makeFromIndexedArray($array)); - - // We don't want to sum Abandoned cart metrics in the overall revenue/conversions/converted visits - // since it is a "negative conversion" - if ($idGoal != GoalManager::IDGOAL_CART) { - $totalConversions += $row[Metrics::INDEX_GOAL_NB_CONVERSIONS]; - $totalRevenue += $row[Metrics::INDEX_GOAL_REVENUE]; - } - } - } - - // Stats by goal, for all visitors - $numericRecords = $this->getConversionsNumericMetrics($goals); - $this->getProcessor()->insertNumericRecords($numericRecords); - - $this->insertReports(self::VISITS_UNTIL_RECORD_NAME, $visitsToConversions); - $this->insertReports(self::DAYS_UNTIL_CONV_RECORD_NAME, $daysToConversions); - - // Stats for all goals - $nbConvertedVisits = $this->getProcessor()->getNumberOfVisitsConverted(); - $metrics = array( - self::getRecordName('nb_conversions') => $totalConversions, - self::getRecordName('nb_visits_converted') => $nbConvertedVisits, - self::getRecordName('revenue') => $totalRevenue, - ); - $this->getProcessor()->insertNumericRecords($metrics); - } - - protected function getConversionsNumericMetrics(DataArray $goals) - { - $numericRecords = array(); - $goals = $goals->getDataArray(); - foreach ($goals as $idGoal => $array) { - foreach ($array as $metricId => $value) { - $metricName = Metrics::$mappingFromIdToNameGoal[$metricId]; - $recordName = self::getRecordName($metricName, $idGoal); - $numericRecords[$recordName] = $value; - } - } - return $numericRecords; - } - /** * @param string $recordName 'nb_conversions' * @param int|bool $idGoal idGoal to return the metrics for, or false to return overall @@ -264,219 +79,6 @@ public static function getRecordName($recordName, $idGoal = false) return 'Goal_' . $idGoalStr . $recordName; } - protected function insertReports($recordName, $visitsToConversions) - { - foreach ($visitsToConversions as $idGoal => $table) { - $record = self::getRecordName($recordName, $idGoal); - $this->getProcessor()->insertBlobRecord($record, $table->getSerialized()); - } - $overviewTable = $this->getOverviewFromGoalTables($visitsToConversions); - $this->getProcessor()->insertBlobRecord(self::getRecordName($recordName), $overviewTable->getSerialized()); - } - - protected function getOverviewFromGoalTables($tableByGoal) - { - $overview = new DataTable(); - foreach ($tableByGoal as $idGoal => $table) { - if ($this->isStandardGoal($idGoal)) { - $overview->addDataTable($table); - } - } - return $overview; - } - - protected function isStandardGoal($idGoal) - { - return !in_array($idGoal, $this->getEcommerceIdGoals()); - } - - protected function aggregateEcommerceItems() - { - $this->initItemReports(); - - // try to query ecommerce items only, if ecommerce is actually used - // otherwise we simply insert empty records - if ($this->usesEcommerce($this->getSiteId())) { - foreach ($this->getItemsDimensions() as $dimension) { - $query = $this->getLogAggregator()->queryEcommerceItems($dimension); - if ($query !== false) { - $this->aggregateFromEcommerceItems($query, $dimension); - } - - $query = $this->queryItemViewsForDimension($dimension); - if ($query !== false) { - $this->aggregateFromEcommerceViews($query, $dimension); - } - } - } - $this->insertItemReports(); - return true; - } - - protected function queryItemViewsForDimension($dimension) - { - $column = $this->actionMapping[$dimension]; - $where = "log_link_visit_action.$column is not null"; - - return $this->getLogAggregator()->queryActionsByDimension( - ['label' => 'log_action1.name'], - $where, - ['AVG(log_link_visit_action.product_price) AS `avg_price_viewed`'], - false, - null, - [$column] - ); - } - - protected function initItemReports() - { - foreach ($this->getEcommerceIdGoals() as $ecommerceType) { - foreach ($this->dimensionRecord as $dimension => $record) { - $this->itemReports[$dimension][$ecommerceType] = new DataArray(); - } - } - } - - protected function insertItemReports() - { - foreach ($this->itemReports as $dimension => $itemAggregatesByType) { - foreach ($itemAggregatesByType as $ecommerceType => $itemAggregate) { - $recordName = $this->dimensionRecord[$dimension]; - if ($ecommerceType == GoalManager::IDGOAL_CART) { - $recordName = self::getItemRecordNameAbandonedCart($recordName); - } - $table = $itemAggregate->asDataTable(); - $blobData = $table->getSerialized($this->productReportsMaximumRows, $this->productReportsMaximumRows, - Metrics::INDEX_ECOMMERCE_ITEM_REVENUE); - $this->getProcessor()->insertBlobRecord($recordName, $blobData); - - Common::destroy($table); - } - } - } - - protected function getItemsDimensions() - { - $dimensions = array_keys($this->dimensionRecord); - foreach ($this->getItemExtraCategories() as $category) { - $dimensions[] = $category; - } - return $dimensions; - } - - protected function getItemExtraCategories() - { - return array(self::CATEGORY2_FIELD, self::CATEGORY3_FIELD, self::CATEGORY4_FIELD, self::CATEGORY5_FIELD); - } - - protected function isItemExtraCategory($field) - { - return in_array($field, $this->getItemExtraCategories()); - } - - protected function aggregateFromEcommerceItems($query, $dimension) - { - while ($row = $query->fetch()) { - $ecommerceType = $row['ecommerceType']; - - $label = $this->cleanupRowGetLabel($row, $dimension); - if ($label === false) { - continue; - } - - // Aggregate extra categories in the Item categories array - if ($this->isItemExtraCategory($dimension)) { - $array = $this->itemReports[self::CATEGORY_FIELD][$ecommerceType]; - } else { - $array = $this->itemReports[$dimension][$ecommerceType]; - } - - $this->roundColumnValues($row); - $array->sumMetrics($label, $row); - } - } - - protected function aggregateFromEcommerceViews($query, $dimension) - { - while ($row = $query->fetch()) { - - $label = $this->getRowLabel($row, $dimension); - if ($label === false) { - continue; // ignore empty additional categories - } - - // Aggregate extra categories in the Item categories array - if ($this->isItemExtraCategory($dimension)) { - $array = $this->itemReports[self::CATEGORY_FIELD]; - } else { - $array = $this->itemReports[$dimension]; - } - - unset($row['label']); - - if (isset($row['avg_price_viewed'])) { - $row['avg_price_viewed'] = round($row['avg_price_viewed'], GoalManager::REVENUE_PRECISION); - } - - // add views to all types - foreach ($array as $ecommerceType => $dataArray) { - $dataArray->sumMetrics($label, $row); - } - } - } - - protected function cleanupRowGetLabel(&$row, $currentField) - { - $label = $this->getRowLabel($row, $currentField); - - if (isset($row['ecommerceType']) && $row['ecommerceType'] == GoalManager::IDGOAL_CART) { - // abandoned carts are the number of visits with an abandoned cart - $row[Metrics::INDEX_ECOMMERCE_ORDERS] = $row[Metrics::INDEX_NB_VISITS]; - } - - unset($row[Metrics::INDEX_NB_VISITS]); - unset($row['label']); - unset($row['labelIdAction']); - unset($row['ecommerceType']); - - return $label; - } - - protected function getRowLabel(&$row, $currentField) - { - $label = $row['label']; - if (empty($label)) { - // An empty additional category -> skip this iteration - if ($this->isItemExtraCategory($currentField)) { - return false; - } - $label = "Value not defined"; - } - return $label; - } - - protected function roundColumnValues(&$row) - { - $columnsToRound = array( - Metrics::INDEX_ECOMMERCE_ITEM_REVENUE, - Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY, - Metrics::INDEX_ECOMMERCE_ITEM_PRICE, - Metrics::INDEX_ECOMMERCE_ITEM_PRICE_VIEWED, - ); - foreach ($columnsToRound as $column) { - if (isset($row[$column]) - && $row[$column] == round($row[$column]) - ) { - $row[$column] = round($row[$column]); - } - } - } - - protected function getEcommerceIdGoals() - { - return array(GoalManager::IDGOAL_CART, GoalManager::IDGOAL_ORDER); - } - public static function getItemRecordNameAbandonedCart($recordName) { return $recordName . '_Cart'; @@ -487,74 +89,6 @@ public static function getItemRecordNameAbandonedCart($recordName) */ public function aggregateMultipleReports() { - /* - * Archive Ecommerce Items - */ - if (Manager::getInstance()->isPluginActivated('Ecommerce')) { - $dataTableToSum = $this->dimensionRecord; - foreach ($this->dimensionRecord as $recordName) { - $dataTableToSum[] = self::getItemRecordNameAbandonedCart($recordName); - } - $columnsAggregationOperation = null; - - $this->getProcessor()->aggregateDataTableRecords($dataTableToSum, - $maximumRowsInDataTableLevelZero = $this->productReportsMaximumRows, - $maximumRowsInSubDataTable = $this->productReportsMaximumRows, - $columnToSortByBeforeTruncation = Metrics::INDEX_ECOMMERCE_ITEM_REVENUE, - $columnsAggregationOperation, - $columnsToRenameAfterAggregation = null, - $countRowsRecursive = []); - } - - /* - * Archive General Goal metrics - */ - $goalIdsToSum = GoalManager::getGoalIds($this->getProcessor()->getParams()->getSite()->getId()); - - //Ecommerce - if (Manager::getInstance()->isPluginActivated('Ecommerce')) { - $goalIdsToSum = array_merge($goalIdsToSum, $this->getEcommerceIdGoals()); - } - - // Overall goal metrics - $goalIdsToSum[] = false; - - $fieldsToSum = array(); - foreach ($goalIdsToSum as $goalId) { - $metricsToSum = Goals::getGoalColumns($goalId); - foreach ($metricsToSum as $metricName) { - $fieldsToSum[] = self::getRecordName($metricName, $goalId); - } - } - $this->getProcessor()->aggregateNumericMetrics($fieldsToSum); - - $columnsAggregationOperation = null; - - foreach ($goalIdsToSum as $goalId) { - // sum up the visits to conversion data table & the days to conversion data table - $this->getProcessor()->aggregateDataTableRecords( - array(self::getRecordName(self::VISITS_UNTIL_RECORD_NAME, $goalId), - self::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME, $goalId)), - $maximumRowsInDataTableLevelZero = null, - $maximumRowsInSubDataTable = null, - $columnToSortByBeforeTruncation = null, - $columnsAggregationOperation, - $columnsToRenameAfterAggregation = null, - $countRowsRecursive = array()); - } - - $columnsAggregationOperation = null; - // sum up goal overview reports - $this->getProcessor()->aggregateDataTableRecords( - array(self::getRecordName(self::VISITS_UNTIL_RECORD_NAME), - self::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME)), - $maximumRowsInDataTableLevelZero = null, - $maximumRowsInSubDataTable = null, - $columnToSortByBeforeTruncation = null, - $columnsAggregationOperation, - $columnsToRenameAfterAggregation = null, - $countRowsRecursive = array()); - if (self::$ARCHIVE_DEPENDENT) { $this->getProcessor()->processDependentArchive('Goals', VisitFrequencyAPI::NEW_VISITOR_SEGMENT); $this->getProcessor()->processDependentArchive('Goals', VisitFrequencyAPI::RETURNING_VISITOR_SEGMENT); diff --git a/plugins/Goals/RecordBuilders/Base.php b/plugins/Goals/RecordBuilders/Base.php new file mode 100644 index 00000000000..03c771f0a43 --- /dev/null +++ b/plugins/Goals/RecordBuilders/Base.php @@ -0,0 +1,34 @@ +archiveProcessor->getParams()->getSite()->getId(); + } + + protected function usesEcommerce($idSite) + { + return Manager::getInstance()->isPluginActivated('Ecommerce') + && Site::isEcommerceEnabledFor($idSite); + } + + protected function getEcommerceIdGoals() + { + return array(GoalManager::IDGOAL_CART, GoalManager::IDGOAL_ORDER); + } +} \ No newline at end of file diff --git a/plugins/Goals/RecordBuilders/EcommerceRecords.php b/plugins/Goals/RecordBuilders/EcommerceRecords.php new file mode 100644 index 00000000000..b781a221dcd --- /dev/null +++ b/plugins/Goals/RecordBuilders/EcommerceRecords.php @@ -0,0 +1,252 @@ + self::ITEMS_SKU_RECORD_NAME, + self::NAME_FIELD => self::ITEMS_NAME_RECORD_NAME, + self::CATEGORY_FIELD => self::ITEMS_CATEGORY_RECORD_NAME + ]; + + protected $actionMapping = [ + self::SKU_FIELD => 'idaction_product_sku', + self::NAME_FIELD => 'idaction_product_name', + self::CATEGORY_FIELD => 'idaction_product_cat', + self::CATEGORY2_FIELD => 'idaction_product_cat2', + self::CATEGORY3_FIELD => 'idaction_product_cat3', + self::CATEGORY4_FIELD => 'idaction_product_cat4', + self::CATEGORY5_FIELD => 'idaction_product_cat5', + ]; + + public function __construct() + { + $general = Config::getInstance()->General; + $productReportsMaximumRows = $general['datatable_archiving_maximum_rows_products']; + + parent::__construct($productReportsMaximumRows, $productReportsMaximumRows, Metrics::INDEX_ECOMMERCE_ITEM_REVENUE); + } + + public function isEnabled() + { + return Manager::getInstance()->isPluginActivated('Ecommerce'); + } + + public function getRecordMetadata() + { + $result = []; + foreach ($this->dimensionRecord as $recordName) { + $result[] = Record::make(Record::TYPE_BLOB, $recordName); + + $abandonedCartRecordName = Archiver::getItemRecordNameAbandonedCart($recordName); + $result[] = Record::make(Record::TYPE_BLOB, $abandonedCartRecordName); + } + return $result; + } + + protected function aggregate() + { + $itemReports = []; + foreach ($this->getEcommerceIdGoals() as $ecommerceType) { + foreach ($this->dimensionRecord as $dimension => $record) { + $itemReports[$dimension][$ecommerceType] = new DataArray(); + } + } + + $logAggregator = $this->archiveProcessor->getLogAggregator(); + + // try to query ecommerce items only, if ecommerce is actually used + // otherwise we simply insert empty records + if ($this->usesEcommerce($this->getSiteId())) { + foreach ($this->getItemsDimensions() as $dimension) { + $query = $logAggregator->queryEcommerceItems($dimension); + if ($query !== false) { + $this->aggregateFromEcommerceItems($itemReports, $query, $dimension); + } + + $query = $this->queryItemViewsForDimension($dimension); + if ($query !== false) { + $this->aggregateFromEcommerceViews($itemReports, $query, $dimension); + } + } + } + + $records = []; + foreach ($itemReports as $dimension => $itemAggregatesByType) { + foreach ($itemAggregatesByType as $ecommerceType => $itemAggregate) { + $recordName = $this->dimensionRecord[$dimension]; + if ($ecommerceType == GoalManager::IDGOAL_CART) { + $recordName = Archiver::getItemRecordNameAbandonedCart($recordName); + } + + $table = $itemAggregate->asDataTable(); + $records[$recordName] = $table; + } + } + return $records; + } + + protected function getItemsDimensions() + { + $dimensions = array_keys($this->dimensionRecord); + foreach ($this->getItemExtraCategories() as $category) { + $dimensions[] = $category; + } + return $dimensions; + } + + protected function getItemExtraCategories() + { + return array(self::CATEGORY2_FIELD, self::CATEGORY3_FIELD, self::CATEGORY4_FIELD, self::CATEGORY5_FIELD); + } + + protected function isItemExtraCategory($field) + { + return in_array($field, $this->getItemExtraCategories()); + } + + protected function aggregateFromEcommerceItems($itemReports, $query, $dimension) + { + while ($row = $query->fetch()) { + $ecommerceType = $row['ecommerceType']; + + $label = $this->cleanupRowGetLabel($row, $dimension); + if ($label === false) { + continue; + } + + // Aggregate extra categories in the Item categories array + if ($this->isItemExtraCategory($dimension)) { + $array = $itemReports[self::CATEGORY_FIELD][$ecommerceType]; + } else { + $array = $itemReports[$dimension][$ecommerceType]; + } + + $this->roundColumnValues($row); + $array->sumMetrics($label, $row); + } + } + + protected function aggregateFromEcommerceViews($itemReports, $query, $dimension) + { + while ($row = $query->fetch()) { + + $label = $this->getRowLabel($row, $dimension); + if ($label === false) { + continue; // ignore empty additional categories + } + + // Aggregate extra categories in the Item categories array + if ($this->isItemExtraCategory($dimension)) { + $array = $itemReports[self::CATEGORY_FIELD]; + } else { + $array = $itemReports[$dimension]; + } + + unset($row['label']); + + if (isset($row['avg_price_viewed'])) { + $row['avg_price_viewed'] = round($row['avg_price_viewed'], GoalManager::REVENUE_PRECISION); + } + + // add views to all types + foreach ($array as $ecommerceType => $dataArray) { + $dataArray->sumMetrics($label, $row); + } + } + } + + protected function queryItemViewsForDimension($dimension) + { + $column = $this->actionMapping[$dimension]; + $where = "log_link_visit_action.$column is not null"; + + $logAggregator = $this->archiveProcessor->getLogAggregator(); + return $logAggregator->queryActionsByDimension( + ['label' => 'log_action1.name'], + $where, + ['AVG(log_link_visit_action.product_price) AS `avg_price_viewed`'], + false, + null, + [$column] + ); + } + + protected function roundColumnValues(&$row) + { + $columnsToRound = array( + Metrics::INDEX_ECOMMERCE_ITEM_REVENUE, + Metrics::INDEX_ECOMMERCE_ITEM_QUANTITY, + Metrics::INDEX_ECOMMERCE_ITEM_PRICE, + Metrics::INDEX_ECOMMERCE_ITEM_PRICE_VIEWED, + ); + foreach ($columnsToRound as $column) { + if (isset($row[$column]) + && $row[$column] == round($row[$column]) + ) { + $row[$column] = round($row[$column]); + } + } + } + + protected function getRowLabel(&$row, $currentField) + { + $label = $row['label']; + if (empty($label)) { + // An empty additional category -> skip this iteration + if ($this->isItemExtraCategory($currentField)) { + return false; + } + $label = "Value not defined"; + } + return $label; + } + + protected function cleanupRowGetLabel(&$row, $currentField) + { + $label = $this->getRowLabel($row, $currentField); + + if (isset($row['ecommerceType']) && $row['ecommerceType'] == GoalManager::IDGOAL_CART) { + // abandoned carts are the number of visits with an abandoned cart + $row[Metrics::INDEX_ECOMMERCE_ORDERS] = $row[Metrics::INDEX_NB_VISITS]; + } + + unset($row[Metrics::INDEX_NB_VISITS]); + unset($row['label']); + unset($row['labelIdAction']); + unset($row['ecommerceType']); + + return $label; + } +} \ No newline at end of file diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php new file mode 100644 index 00000000000..dc5895b81be --- /dev/null +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -0,0 +1,244 @@ + 'vcv', + self::DAYS_UNTIL_CONV_RECORD_NAME => 'vdsf', + ); + + $totalConversions = 0; + $totalRevenue = 0; + + $goals = new DataArray(); + + $visitsToConversions = []; + $daysToConversions = []; + + $siteHasEcommerceOrGoals = $this->hasAnyGoalOrEcommerce($this->getSiteId()); + + // Special handling for sites that contain subordinated sites, like in roll up reporting. + // A roll up site, might not have ecommerce enabled or any configured goals, + // but if a subordinated site has, we calculate the overview conversion metrics nevertheless + if ($siteHasEcommerceOrGoals === false) { + $idSitesToArchive = $this->archiveProcessor->getParams()->getIdSites(); + + foreach ($idSitesToArchive as $idSite) { + if ($this->hasAnyGoalOrEcommerce($idSite)) { + $siteHasEcommerceOrGoals = true; + break; + } + } + } + + $logAggregator = $this->archiveProcessor->getLogAggregator(); + + // try to query goal data only, if goals or ecommerce is actually used + // otherwise we simply insert empty records + if ($siteHasEcommerceOrGoals) { + $selects = []; + $selects = array_merge($selects, LogAggregator::getSelectsFromRangedColumn( + self::VISITS_COUNT_FIELD, self::$visitCountRanges, self::LOG_CONVERSION_TABLE, $prefixes[self::VISITS_UNTIL_RECORD_NAME] + )); + $selects = array_merge($selects, LogAggregator::getSelectsFromRangedColumn( + 'FLOOR(log_conversion.' . self::SECONDS_SINCE_FIRST_VISIT_FIELD . ' / 86400)', self::$daysToConvRanges, self::LOG_CONVERSION_TABLE, $prefixes[self::DAYS_UNTIL_CONV_RECORD_NAME] + )); + + $query = $logAggregator->queryConversionsByDimension([], false, $selects); + if ($query === false) { + return []; + } + + $conversionMetrics = $logAggregator->getConversionsMetricFields(); + while ($row = $query->fetch()) { + $idGoal = $row['idgoal']; + unset($row['idgoal']); + unset($row['label']); + + $values = []; + foreach ($conversionMetrics as $field => $statement) { + $values[$field] = $row[$field]; + } + $goals->sumMetrics($idGoal, $values); + + if (empty($visitsToConversions[$idGoal])) { + $visitsToConversions[$idGoal] = new DataTable(); + } + $array = LogAggregator::makeArrayOneColumn($row, Metrics::INDEX_NB_CONVERSIONS, $prefixes[self::VISITS_UNTIL_RECORD_NAME]); + $visitsToConversions[$idGoal]->addDataTable(DataTable::makeFromIndexedArray($array)); + + if (empty($daysToConversions[$idGoal])) { + $daysToConversions[$idGoal] = new DataTable(); + } + $array = LogAggregator::makeArrayOneColumn($row, Metrics::INDEX_NB_CONVERSIONS, $prefixes[self::DAYS_UNTIL_CONV_RECORD_NAME]); + $daysToConversions[$idGoal]->addDataTable(DataTable::makeFromIndexedArray($array)); + + // We don't want to sum Abandoned cart metrics in the overall revenue/conversions/converted visits + // since it is a "negative conversion" + if ($idGoal != GoalManager::IDGOAL_CART) { + $totalConversions += $row[Metrics::INDEX_GOAL_NB_CONVERSIONS]; + $totalRevenue += $row[Metrics::INDEX_GOAL_REVENUE]; + } + } + } + + // Stats by goal, for all visitors + $numericRecords = $this->getConversionsNumericMetrics($goals); + + $nbConvertedVisits = $this->archiveProcessor->getNumberOfVisitsConverted(); + + $result = array_merge([ + // Stats for all goals + Archiver::getRecordName('nb_conversions') => $totalConversions, + Archiver::getRecordName('nb_visits_converted') => $nbConvertedVisits, + Archiver::getRecordName('revenue') => $totalRevenue, + ], $numericRecords); + + // TODO: Remove use of DataArray everywhere, just directly build DataTables (less memory use overall) + + foreach ($visitsToConversions as $idGoal => $table) { + $recordName = Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME, $idGoal); + $result[$recordName] = $table; + } + $result[Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME)] = $this->getOverviewFromGoalTables($visitsToConversions); + + foreach ($daysToConversions as $idGoal => $table) { + $recordName = Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME, $idGoal); + $result[$recordName] = $table; + } + $result[Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME)] = $this->getOverviewFromGoalTables($daysToConversions); + + return $result; + } + + protected function getOverviewFromGoalTables($tableByGoal) + { + $overview = new DataTable(); + foreach ($tableByGoal as $idGoal => $table) { + if ($this->isStandardGoal($idGoal)) { + $overview->addDataTable($table); + } + } + return $overview; + } + + protected function isStandardGoal($idGoal) + { + return !in_array($idGoal, $this->getEcommerceIdGoals()); + } + + public function getRecordMetadata() + { + $records = [ + Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME)), + Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME)), + + Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName('nb_conversions')), + Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName('nb_visits_converted')), + Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName('revenue')), + ]; + + $goals = API::getInstance()->getGoals($this->getSiteId()); + $goals = array_keys($goals); + + if (Manager::getInstance()->isPluginActivated('Ecommerce')) { + $goals = array_merge($goals, $this->getEcommerceIdGoals()); + } + + foreach ($goals as $idGoal) { + foreach (Metrics::$mappingFromIdToNameGoal as $metricName) { + $records[] = Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName($metricName, $idGoal)); + + $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME, $idGoal)); + $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME, $idGoal)); + } + } + + return $records; + } + + private function hasAnyGoalOrEcommerce($idSite) // TODO: this & other methods like this should probably be in a base class + { + return $this->usesEcommerce($idSite) || !empty(GoalManager::getGoalIds($idSite)); + } + + protected function getConversionsNumericMetrics(DataArray $goals) + { + $numericRecords = array(); + $goals = $goals->getDataArray(); + foreach ($goals as $idGoal => $array) { + foreach ($array as $metricId => $value) { + $metricName = Metrics::$mappingFromIdToNameGoal[$metricId]; + $recordName = Archiver::getRecordName($metricName, $idGoal); + $numericRecords[$recordName] = $value; + } + } + return $numericRecords; + } +} \ No newline at end of file diff --git a/plugins/Goals/Reports/GetVisitsUntilConversion.php b/plugins/Goals/Reports/GetVisitsUntilConversion.php index cd0a04d70bf..8b5933bd624 100644 --- a/plugins/Goals/Reports/GetVisitsUntilConversion.php +++ b/plugins/Goals/Reports/GetVisitsUntilConversion.php @@ -12,6 +12,7 @@ use Piwik\Plugin\ViewDataTable; use Piwik\Plugins\Goals\Columns\VisitsUntilConversion; use Piwik\Plugins\Goals\Archiver; +use Piwik\Plugins\Goals\RecordBuilders\GeneralGoalsRecords; class GetVisitsUntilConversion extends Base { @@ -44,7 +45,7 @@ public function configureView(ViewDataTable $view) $view->requestConfig->filter_sort_column = 'label'; $view->requestConfig->filter_sort_order = 'asc'; - $view->requestConfig->filter_limit = count(Archiver::$visitCountRanges); + $view->requestConfig->filter_limit = count(GeneralGoalsRecords::$visitCountRanges); $view->config->addTranslations(array('label' => $this->dimension->getName())); } From 5f4297501bb03709d836b2848e16844b4304a83a Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 23 Feb 2023 14:52:32 -0800 Subject: [PATCH 02/56] fix loop iteration bug --- plugins/Goals/RecordBuilders/EcommerceRecords.php | 2 -- plugins/Goals/RecordBuilders/GeneralGoalsRecords.php | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/plugins/Goals/RecordBuilders/EcommerceRecords.php b/plugins/Goals/RecordBuilders/EcommerceRecords.php index b781a221dcd..fc2741c672e 100644 --- a/plugins/Goals/RecordBuilders/EcommerceRecords.php +++ b/plugins/Goals/RecordBuilders/EcommerceRecords.php @@ -9,9 +9,7 @@ namespace Piwik\Plugins\Goals\RecordBuilders; -use Piwik\ArchiveProcessor; use Piwik\ArchiveProcessor\Record; -use Piwik\Common; use Piwik\Config; use Piwik\DataArray; use Piwik\Metrics; diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index dc5895b81be..d27475a6c54 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -214,10 +214,10 @@ public function getRecordMetadata() foreach ($goals as $idGoal) { foreach (Metrics::$mappingFromIdToNameGoal as $metricName) { $records[] = Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName($metricName, $idGoal)); - - $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME, $idGoal)); - $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME, $idGoal)); } + + $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME, $idGoal)); + $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME, $idGoal)); } return $records; From 5921f9d6a3538ca175db6b91874454d6552eb1bb Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 25 Feb 2023 16:46:24 -0800 Subject: [PATCH 03/56] split ecommerce records recordbuilder into 3 separate records --- core/ArchiveProcessor/RecordBuilder.php | 10 +- core/Plugin/Archiver.php | 17 ++- plugins/Goals/Goals.php | 14 +++ ...EcommerceRecords.php => ProductRecord.php} | 108 +++++++----------- 4 files changed, 75 insertions(+), 74 deletions(-) rename plugins/Goals/RecordBuilders/{EcommerceRecords.php => ProductRecord.php} (66%) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 48bfff84c8e..29ef23bcc10 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -94,7 +94,6 @@ public function build() unset($records); if (!empty($numericRecords)) { - // TODO: plugin name replace? $this->archiveProcessor->insertNumericRecords($numericRecords); } } @@ -166,6 +165,15 @@ public function getColumnToSortByBeforeTruncation() return $this->columnToSortByBeforeTruncation; } + public function getPluginName() + { + $className = get_class($this); + $parts = explode('\\', $className); + $parts = array_filter($parts); + $plugin = $parts[2]; + return $plugin; + } + private function getQueryDecoration() // TODO: use query decoration in LogAggregator or wherever { $recordBuilderName = get_class($this); diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index c4fcf78e977..595f4b0705a 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -107,25 +107,26 @@ private function getPluginName() */ private function getRecordBuilders() { - $plugin = $this->getPluginName(); - $transientCache = Cache::getTransientCache(); - $cacheKey = 'Archiver.RecordBuilders.' . $plugin; + $cacheKey = 'Archiver.RecordBuilders'; $recordBuilders = $transientCache->fetch($cacheKey); if ($recordBuilders === false) { $recordBuilderClasses = Manager::getInstance()->findMultipleComponents('RecordBuilders', ArchiveProcessor\RecordBuilder::class); $recordBuilders = array_map(function ($className) { - return StaticContainer::getContainer()->make($className); + if ((new \ReflectionClass($className))->getConstructor()->getNumberOfRequiredParameters() == 0) { + return StaticContainer::getContainer()->make($className); + } }, $recordBuilderClasses); + $recordBuilders = array_filter($recordBuilders); /** * TODO * * @api */ - Piwik::postEvent('Archiver.addRecordBuilders', [&$recordBuilders, $plugin]); + Piwik::postEvent('Archiver.addRecordBuilders', [&$recordBuilders]); $transientCache->save($cacheKey, $recordBuilders); } @@ -140,9 +141,13 @@ final public function callAggregateDayReport() try { ErrorHandler::pushFatalErrorBreadcrumb(static::class); + $pluginName = $this->getPluginName(); + $recordBuilders = $this->getRecordBuilders(); foreach ($recordBuilders as $recordBuilder) { - if (!$recordBuilder->isEnabled()) { + if ($recordBuilder->getPluginName() != $pluginName + || !$recordBuilder->isEnabled() + ) { continue; } diff --git a/plugins/Goals/Goals.php b/plugins/Goals/Goals.php index 1e3d04b031b..22b0a158f3a 100644 --- a/plugins/Goals/Goals.php +++ b/plugins/Goals/Goals.php @@ -18,6 +18,7 @@ use Piwik\Plugin\ComputedMetric; use Piwik\Plugin\ReportsProvider; use Piwik\Plugins\CoreHome\SystemSummary; +use Piwik\Plugins\Goals\RecordBuilders\ProductRecord; use Piwik\Tracker\GoalManager; use Piwik\Category\Subcategory; @@ -104,10 +105,23 @@ public function registerEvents() 'Metric.addMetrics' => 'addMetrics', 'Metric.addComputedMetrics' => 'addComputedMetrics', 'System.addSystemSummaryItems' => 'addSystemSummaryItems', + 'Archiver.addRecordBuilders' => 'addRecordBuilders', ); return $hooks; } + public function addRecordBuilders(&$recordBuilders) + { + $recordBuilders[] = new ProductRecord(ProductRecord::SKU_FIELD, ProductRecord::ITEMS_SKU_RECORD_NAME); + $recordBuilders[] = new ProductRecord(ProductRecord::NAME_FIELD, ProductRecord::ITEMS_NAME_RECORD_NAME); + $recordBuilders[] = new ProductRecord(ProductRecord::CATEGORY_FIELD, ProductRecord::ITEMS_CATEGORY_RECORD_NAME, [ + ProductRecord::CATEGORY2_FIELD, + ProductRecord::CATEGORY3_FIELD, + ProductRecord::CATEGORY4_FIELD, + ProductRecord::CATEGORY5_FIELD, + ]); + } + public function addSystemSummaryItems(&$systemSummary) { $goalModel = new Model(); diff --git a/plugins/Goals/RecordBuilders/EcommerceRecords.php b/plugins/Goals/RecordBuilders/ProductRecord.php similarity index 66% rename from plugins/Goals/RecordBuilders/EcommerceRecords.php rename to plugins/Goals/RecordBuilders/ProductRecord.php index fc2741c672e..5316769140e 100644 --- a/plugins/Goals/RecordBuilders/EcommerceRecords.php +++ b/plugins/Goals/RecordBuilders/ProductRecord.php @@ -17,8 +17,7 @@ use Piwik\Plugins\Goals\Archiver; use Piwik\Tracker\GoalManager; -// TODO: this can be split up further -class EcommerceRecords extends Base +class ProductRecord extends Base { const SKU_FIELD = 'idaction_sku'; const NAME_FIELD = 'idaction_name'; @@ -32,12 +31,6 @@ class EcommerceRecords extends Base const ITEMS_NAME_RECORD_NAME = 'Goals_ItemsName'; const ITEMS_CATEGORY_RECORD_NAME = 'Goals_ItemsCategory'; - protected $dimensionRecord = [ - self::SKU_FIELD => self::ITEMS_SKU_RECORD_NAME, - self::NAME_FIELD => self::ITEMS_NAME_RECORD_NAME, - self::CATEGORY_FIELD => self::ITEMS_CATEGORY_RECORD_NAME - ]; - protected $actionMapping = [ self::SKU_FIELD => 'idaction_product_sku', self::NAME_FIELD => 'idaction_product_name', @@ -48,12 +41,31 @@ class EcommerceRecords extends Base self::CATEGORY5_FIELD => 'idaction_product_cat5', ]; - public function __construct() + /** + * @var string + */ + private $dimension; + + /** + * @var string + */ + private $recordName; + + /** + * @var string[] + */ + private $dimensionsToAggregate; + + public function __construct($dimension, $recordName, $otherDimensionsToAggregate = []) { $general = Config::getInstance()->General; $productReportsMaximumRows = $general['datatable_archiving_maximum_rows_products']; parent::__construct($productReportsMaximumRows, $productReportsMaximumRows, Metrics::INDEX_ECOMMERCE_ITEM_REVENUE); + + $this->dimension = $dimension; + $this->recordName = $recordName; + $this->dimensionsToAggregate = array_merge([$dimension], $otherDimensionsToAggregate); } public function isEnabled() @@ -63,23 +75,19 @@ public function isEnabled() public function getRecordMetadata() { - $result = []; - foreach ($this->dimensionRecord as $recordName) { - $result[] = Record::make(Record::TYPE_BLOB, $recordName); + $abandonedCartRecordName = Archiver::getItemRecordNameAbandonedCart($this->recordName); - $abandonedCartRecordName = Archiver::getItemRecordNameAbandonedCart($recordName); - $result[] = Record::make(Record::TYPE_BLOB, $abandonedCartRecordName); - } - return $result; + return [ + Record::make(Record::TYPE_BLOB, $this->recordName), + Record::make(Record::TYPE_BLOB, $abandonedCartRecordName), + ]; } protected function aggregate() { $itemReports = []; foreach ($this->getEcommerceIdGoals() as $ecommerceType) { - foreach ($this->dimensionRecord as $dimension => $record) { - $itemReports[$dimension][$ecommerceType] = new DataArray(); - } + $itemReports[$ecommerceType] = new DataArray(); } $logAggregator = $this->archiveProcessor->getLogAggregator(); @@ -87,7 +95,7 @@ protected function aggregate() // try to query ecommerce items only, if ecommerce is actually used // otherwise we simply insert empty records if ($this->usesEcommerce($this->getSiteId())) { - foreach ($this->getItemsDimensions() as $dimension) { + foreach ($this->dimensionsToAggregate as $dimension) { $query = $logAggregator->queryEcommerceItems($dimension); if ($query !== false) { $this->aggregateFromEcommerceItems($itemReports, $query, $dimension); @@ -101,37 +109,16 @@ protected function aggregate() } $records = []; - foreach ($itemReports as $dimension => $itemAggregatesByType) { - foreach ($itemAggregatesByType as $ecommerceType => $itemAggregate) { - $recordName = $this->dimensionRecord[$dimension]; - if ($ecommerceType == GoalManager::IDGOAL_CART) { - $recordName = Archiver::getItemRecordNameAbandonedCart($recordName); - } - - $table = $itemAggregate->asDataTable(); - $records[$recordName] = $table; + foreach ($itemReports as $ecommerceType => $itemAggregate) { + $recordName = $this->recordName; + if ($ecommerceType == GoalManager::IDGOAL_CART) { + $recordName = Archiver::getItemRecordNameAbandonedCart($recordName); } - } - return $records; - } - protected function getItemsDimensions() - { - $dimensions = array_keys($this->dimensionRecord); - foreach ($this->getItemExtraCategories() as $category) { - $dimensions[] = $category; + $table = $itemAggregate->asDataTable(); + $records[$recordName] = $table; } - return $dimensions; - } - - protected function getItemExtraCategories() - { - return array(self::CATEGORY2_FIELD, self::CATEGORY3_FIELD, self::CATEGORY4_FIELD, self::CATEGORY5_FIELD); - } - - protected function isItemExtraCategory($field) - { - return in_array($field, $this->getItemExtraCategories()); + return $records; } protected function aggregateFromEcommerceItems($itemReports, $query, $dimension) @@ -144,12 +131,7 @@ protected function aggregateFromEcommerceItems($itemReports, $query, $dimension) continue; } - // Aggregate extra categories in the Item categories array - if ($this->isItemExtraCategory($dimension)) { - $array = $itemReports[self::CATEGORY_FIELD][$ecommerceType]; - } else { - $array = $itemReports[$dimension][$ecommerceType]; - } + $array = $itemReports[$ecommerceType]; $this->roundColumnValues($row); $array->sumMetrics($label, $row); @@ -159,19 +141,11 @@ protected function aggregateFromEcommerceItems($itemReports, $query, $dimension) protected function aggregateFromEcommerceViews($itemReports, $query, $dimension) { while ($row = $query->fetch()) { - $label = $this->getRowLabel($row, $dimension); if ($label === false) { continue; // ignore empty additional categories } - // Aggregate extra categories in the Item categories array - if ($this->isItemExtraCategory($dimension)) { - $array = $itemReports[self::CATEGORY_FIELD]; - } else { - $array = $itemReports[$dimension]; - } - unset($row['label']); if (isset($row['avg_price_viewed'])) { @@ -179,7 +153,7 @@ protected function aggregateFromEcommerceViews($itemReports, $query, $dimension) } // add views to all types - foreach ($array as $ecommerceType => $dataArray) { + foreach ($itemReports as $ecommerceType => $dataArray) { $dataArray->sumMetrics($label, $row); } } @@ -218,12 +192,12 @@ protected function roundColumnValues(&$row) } } - protected function getRowLabel(&$row, $currentField) + protected function getRowLabel(&$row, $dimension) { $label = $row['label']; if (empty($label)) { // An empty additional category -> skip this iteration - if ($this->isItemExtraCategory($currentField)) { + if ($dimension != $this->dimension) { return false; } $label = "Value not defined"; @@ -231,9 +205,9 @@ protected function getRowLabel(&$row, $currentField) return $label; } - protected function cleanupRowGetLabel(&$row, $currentField) + protected function cleanupRowGetLabel(&$row, $dimension) { - $label = $this->getRowLabel($row, $currentField); + $label = $this->getRowLabel($row, $dimension); if (isset($row['ecommerceType']) && $row['ecommerceType'] == GoalManager::IDGOAL_CART) { // abandoned carts are the number of visits with an abandoned cart From b2d8e10ba97ae414e6cb300c8fd5543ad359420a Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 25 Feb 2023 17:01:11 -0800 Subject: [PATCH 04/56] make sure Goals::getRecordMetadata() behaves like old archiver code --- .../RecordBuilders/GeneralGoalsRecords.php | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index d27475a6c54..fe44caa8a99 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -17,6 +17,7 @@ use Piwik\Plugin\Manager; use Piwik\Plugins\Goals\API; use Piwik\Plugins\Goals\Archiver; +use Piwik\Plugins\Goals\Goals; use Piwik\Tracker\GoalManager; class GeneralGoalsRecords extends Base @@ -195,15 +196,6 @@ protected function isStandardGoal($idGoal) public function getRecordMetadata() { - $records = [ - Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME)), - Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME)), - - Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName('nb_conversions')), - Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName('nb_visits_converted')), - Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName('revenue')), - ]; - $goals = API::getInstance()->getGoals($this->getSiteId()); $goals = array_keys($goals); @@ -211,15 +203,19 @@ public function getRecordMetadata() $goals = array_merge($goals, $this->getEcommerceIdGoals()); } + // Overall goal metrics + $goals[] = false; + + $records = []; foreach ($goals as $idGoal) { - foreach (Metrics::$mappingFromIdToNameGoal as $metricName) { + $metricsToSum = Goals::getGoalColumns($idGoal); + foreach ($metricsToSum as $metricName) { $records[] = Record::make(Record::TYPE_NUMERIC, Archiver::getRecordName($metricName, $idGoal)); } $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME, $idGoal)); $records[] = Record::make(Record::TYPE_BLOB, Archiver::getRecordName(self::DAYS_UNTIL_CONV_RECORD_NAME, $idGoal)); } - return $records; } From 2be26f224655a9b6d3ec15070d77acbc1303ef24 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 26 Feb 2023 11:26:42 -0800 Subject: [PATCH 05/56] make sure recordbuilder archive processor is restored after being used since archiving is a recursive process --- core/ArchiveProcessor/RecordBuilder.php | 7 ++++++- core/Plugin/Archiver.php | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 29ef23bcc10..6665e4524f7 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -57,11 +57,16 @@ public function __construct($maxRowsInTable = null, $maxRowsInSubtable = null, $this->columnAggregationOps = $columnAggregationOps; } + public function getProcessor() + { + return $this->archiveProcessor; + } + /** * @return void * @internal */ - public function setArchiveProcessor(ArchiveProcessor $archiveProcessor) + public function setArchiveProcessor(ArchiveProcessor $archiveProcessor = null) { $this->archiveProcessor = $archiveProcessor; } diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 595f4b0705a..acfef87edd4 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -151,8 +151,13 @@ final public function callAggregateDayReport() continue; } - $recordBuilder->setArchiveProcessor($this->getProcessor()); - $recordBuilder->build(); + $oldArchiveProcessor = $recordBuilder->getProcessor(); + try { + $recordBuilder->setArchiveProcessor($this->getProcessor()); + $recordBuilder->build(); + } finally { + $recordBuilder->setArchiveProcessor($oldArchiveProcessor); + } } $this->aggregateDayReport(); @@ -175,8 +180,13 @@ final public function callAggregateMultipleReports() continue; } - $recordBuilder->setArchiveProcessor($this->getProcessor()); - $recordBuilder->buildMultiplePeriod(); + $oldArchiveProcessor = $recordBuilder->getProcessor(); + try { + $recordBuilder->setArchiveProcessor($this->getProcessor()); + $recordBuilder->buildMultiplePeriod(); + } finally { + $recordBuilder->setArchiveProcessor($oldArchiveProcessor); + } } $this->aggregateMultipleReports(); From 0db7cb573c3fc7abe95d37c832737dbb012146df Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 26 Feb 2023 13:36:59 -0800 Subject: [PATCH 06/56] just make ArchiveProcessor a parameter --- core/ArchiveProcessor/RecordBuilder.php | 43 ++++++------------- core/Plugin/Archiver.php | 16 +------ plugins/Goals/RecordBuilders/Base.php | 5 ++- .../RecordBuilders/GeneralGoalsRecords.php | 15 ++++--- .../Goals/RecordBuilders/ProductRecord.php | 15 ++++--- 5 files changed, 33 insertions(+), 61 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 6665e4524f7..45dbe3c422c 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -18,11 +18,6 @@ */ abstract class RecordBuilder { - /** - * @var ArchiveProcessor - */ - protected $archiveProcessor; - /** * @var int */ @@ -57,26 +52,12 @@ public function __construct($maxRowsInTable = null, $maxRowsInSubtable = null, $this->columnAggregationOps = $columnAggregationOps; } - public function getProcessor() - { - return $this->archiveProcessor; - } - - /** - * @return void - * @internal - */ - public function setArchiveProcessor(ArchiveProcessor $archiveProcessor = null) - { - $this->archiveProcessor = $archiveProcessor; - } - public function isEnabled() { return true; } - public function build() + public function build(ArchiveProcessor $archiveProcessor) { if (!$this->isEnabled()) { return; @@ -84,10 +65,10 @@ public function build() $numericRecords = []; - $records = $this->aggregate(); + $records = $this->aggregate($archiveProcessor); foreach ($records as $recordName => $recordValue) { if ($recordValue instanceof DataTable) { - $this->insertRecord($recordName, $recordValue); + $this->insertRecord($archiveProcessor, $recordName, $recordValue); Common::destroy($recordValue); unset($recordValue); @@ -99,17 +80,17 @@ public function build() unset($records); if (!empty($numericRecords)) { - $this->archiveProcessor->insertNumericRecords($numericRecords); + $archiveProcessor->insertNumericRecords($numericRecords); } } - public function buildMultiplePeriod() + public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) { if (!$this->isEnabled()) { return; } - $recordsBuilt = $this->getRecordMetadata(); + $recordsBuilt = $this->getRecordMetadata($archiveProcessor); $numericRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_NUMERIC; }); $blobRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_BLOB; }); @@ -119,7 +100,7 @@ public function buildMultiplePeriod() $maxRowsInSubtable = $record->getMaxRowsInSubtable() ?? $this->maxRowsInSubtable; $columnToSortByBeforeTruncation = $record->getColumnToSortByBeforeTruncation() ?? $this->columnToSortByBeforeTruncation; - $this->archiveProcessor->aggregateDataTableRecords( + $archiveProcessor->aggregateDataTableRecords( $record->getName(), $maxRowsInTable, $maxRowsInSubtable, @@ -130,28 +111,28 @@ public function buildMultiplePeriod() if (!empty($numericRecords)) { $numericMetrics = array_map(function (Record $r) { return $r->getName(); }, $numericRecords); - $this->archiveProcessor->aggregateNumericMetrics($numericMetrics, $this->columnAggregationOps); + $archiveProcessor->aggregateNumericMetrics($numericMetrics, $this->columnAggregationOps); } } /** * @return Record[] */ - public abstract function getRecordMetadata(); + public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor); /** * @return (DataTable|int|float|string)[] */ - protected abstract function aggregate(); + protected abstract function aggregate(ArchiveProcessor $archiveProcessor); - private function insertRecord($recordName, DataTable\DataTableInterface $record) + private function insertRecord(ArchiveProcessor $archiveProcessor, $recordName, DataTable\DataTableInterface $record) { if ($record->getRowsCount() <= 0) { return; } $serialized = $record->getSerialized($this->maxRowsInTable, $this->maxRowsInSubtable, $this->columnToSortByBeforeTruncation); - $this->archiveProcessor->insertBlobRecord($recordName, $serialized); + $archiveProcessor->insertBlobRecord($recordName, $serialized); unset($serialized); } diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index acfef87edd4..3daff682612 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -151,13 +151,7 @@ final public function callAggregateDayReport() continue; } - $oldArchiveProcessor = $recordBuilder->getProcessor(); - try { - $recordBuilder->setArchiveProcessor($this->getProcessor()); - $recordBuilder->build(); - } finally { - $recordBuilder->setArchiveProcessor($oldArchiveProcessor); - } + $recordBuilder->build($this->getProcessor()); } $this->aggregateDayReport(); @@ -180,13 +174,7 @@ final public function callAggregateMultipleReports() continue; } - $oldArchiveProcessor = $recordBuilder->getProcessor(); - try { - $recordBuilder->setArchiveProcessor($this->getProcessor()); - $recordBuilder->buildMultiplePeriod(); - } finally { - $recordBuilder->setArchiveProcessor($oldArchiveProcessor); - } + $recordBuilder->buildMultiplePeriod($this->getProcessor()); } $this->aggregateMultipleReports(); diff --git a/plugins/Goals/RecordBuilders/Base.php b/plugins/Goals/RecordBuilders/Base.php index 03c771f0a43..b9401a4b40c 100644 --- a/plugins/Goals/RecordBuilders/Base.php +++ b/plugins/Goals/RecordBuilders/Base.php @@ -9,6 +9,7 @@ namespace Piwik\Plugins\Goals\RecordBuilders; +use Piwik\ArchiveProcessor; use Piwik\ArchiveProcessor\RecordBuilder; use Piwik\Plugin\Manager; use Piwik\Site; @@ -16,9 +17,9 @@ abstract class Base extends RecordBuilder { - protected function getSiteId() + protected function getSiteId(ArchiveProcessor $archiveProcessor) { - return $this->archiveProcessor->getParams()->getSite()->getId(); + return $archiveProcessor->getParams()->getSite()->getId(); } protected function usesEcommerce($idSite) diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index fe44caa8a99..89234783119 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -9,6 +9,7 @@ namespace Piwik\Plugins\Goals\RecordBuilders; +use Piwik\ArchiveProcessor; use Piwik\ArchiveProcessor\Record; use Piwik\DataAccess\LogAggregator; use Piwik\DataArray; @@ -67,7 +68,7 @@ class GeneralGoalsRecords extends Base array(364) ); - protected function aggregate() + protected function aggregate(ArchiveProcessor $archiveProcessor) { $prefixes = array( self::VISITS_UNTIL_RECORD_NAME => 'vcv', @@ -82,13 +83,13 @@ protected function aggregate() $visitsToConversions = []; $daysToConversions = []; - $siteHasEcommerceOrGoals = $this->hasAnyGoalOrEcommerce($this->getSiteId()); + $siteHasEcommerceOrGoals = $this->hasAnyGoalOrEcommerce($this->getSiteId($archiveProcessor)); // Special handling for sites that contain subordinated sites, like in roll up reporting. // A roll up site, might not have ecommerce enabled or any configured goals, // but if a subordinated site has, we calculate the overview conversion metrics nevertheless if ($siteHasEcommerceOrGoals === false) { - $idSitesToArchive = $this->archiveProcessor->getParams()->getIdSites(); + $idSitesToArchive = $archiveProcessor->getParams()->getIdSites(); foreach ($idSitesToArchive as $idSite) { if ($this->hasAnyGoalOrEcommerce($idSite)) { @@ -98,7 +99,7 @@ protected function aggregate() } } - $logAggregator = $this->archiveProcessor->getLogAggregator(); + $logAggregator = $archiveProcessor->getLogAggregator(); // try to query goal data only, if goals or ecommerce is actually used // otherwise we simply insert empty records @@ -152,7 +153,7 @@ protected function aggregate() // Stats by goal, for all visitors $numericRecords = $this->getConversionsNumericMetrics($goals); - $nbConvertedVisits = $this->archiveProcessor->getNumberOfVisitsConverted(); + $nbConvertedVisits = $archiveProcessor->getNumberOfVisitsConverted(); $result = array_merge([ // Stats for all goals @@ -194,9 +195,9 @@ protected function isStandardGoal($idGoal) return !in_array($idGoal, $this->getEcommerceIdGoals()); } - public function getRecordMetadata() + public function getRecordMetadata(ArchiveProcessor $archiveProcessor) { - $goals = API::getInstance()->getGoals($this->getSiteId()); + $goals = API::getInstance()->getGoals($this->getSiteId($archiveProcessor)); $goals = array_keys($goals); if (Manager::getInstance()->isPluginActivated('Ecommerce')) { diff --git a/plugins/Goals/RecordBuilders/ProductRecord.php b/plugins/Goals/RecordBuilders/ProductRecord.php index 5316769140e..ef0e316f030 100644 --- a/plugins/Goals/RecordBuilders/ProductRecord.php +++ b/plugins/Goals/RecordBuilders/ProductRecord.php @@ -9,8 +9,10 @@ namespace Piwik\Plugins\Goals\RecordBuilders; +use Piwik\ArchiveProcessor; use Piwik\ArchiveProcessor\Record; use Piwik\Config; +use Piwik\DataAccess\LogAggregator; use Piwik\DataArray; use Piwik\Metrics; use Piwik\Plugin\Manager; @@ -73,7 +75,7 @@ public function isEnabled() return Manager::getInstance()->isPluginActivated('Ecommerce'); } - public function getRecordMetadata() + public function getRecordMetadata(ArchiveProcessor $archiveProcessor) { $abandonedCartRecordName = Archiver::getItemRecordNameAbandonedCart($this->recordName); @@ -83,25 +85,25 @@ public function getRecordMetadata() ]; } - protected function aggregate() + protected function aggregate(ArchiveProcessor $archiveProcessor) { $itemReports = []; foreach ($this->getEcommerceIdGoals() as $ecommerceType) { $itemReports[$ecommerceType] = new DataArray(); } - $logAggregator = $this->archiveProcessor->getLogAggregator(); + $logAggregator = $archiveProcessor->getLogAggregator(); // try to query ecommerce items only, if ecommerce is actually used // otherwise we simply insert empty records - if ($this->usesEcommerce($this->getSiteId())) { + if ($this->usesEcommerce($this->getSiteId($archiveProcessor))) { foreach ($this->dimensionsToAggregate as $dimension) { $query = $logAggregator->queryEcommerceItems($dimension); if ($query !== false) { $this->aggregateFromEcommerceItems($itemReports, $query, $dimension); } - $query = $this->queryItemViewsForDimension($dimension); + $query = $this->queryItemViewsForDimension($logAggregator, $dimension); if ($query !== false) { $this->aggregateFromEcommerceViews($itemReports, $query, $dimension); } @@ -159,12 +161,11 @@ protected function aggregateFromEcommerceViews($itemReports, $query, $dimension) } } - protected function queryItemViewsForDimension($dimension) + protected function queryItemViewsForDimension(LogAggregator $logAggregator, $dimension) { $column = $this->actionMapping[$dimension]; $where = "log_link_visit_action.$column is not null"; - $logAggregator = $this->archiveProcessor->getLogAggregator(); return $logAggregator->queryActionsByDimension( ['label' => 'log_action1.name'], $where, From fda7ee7f7f97df5030fd53bcf7ba8aadad905ca7 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 26 Feb 2023 14:00:37 -0800 Subject: [PATCH 07/56] check for plugin before calling buildMultiplePeriod() --- core/Plugin/Archiver.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 3daff682612..e3df8ebc401 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -168,9 +168,13 @@ final public function callAggregateMultipleReports() try { ErrorHandler::pushFatalErrorBreadcrumb(static::class); + $pluginName = $this->getPluginName(); + $recordBuilders = $this->getRecordBuilders(); foreach ($recordBuilders as $recordBuilder) { - if (!$recordBuilder->isEnabled()) { + if ($recordBuilder->getPluginName() != $pluginName + || !$recordBuilder->isEnabled() + ) { continue; } From b2ea2521bc3238f782158a58645998c18081f914 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 26 Feb 2023 14:46:06 -0800 Subject: [PATCH 08/56] do not invoke record builders if archiver has no plugin (happens during tests) --- core/Plugin/Archiver.php | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index e3df8ebc401..2e8f157735c 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -94,7 +94,7 @@ private function getPluginName() $plugin = $parts[2]; if (!Manager::getInstance()->isPluginLoaded($plugin)) { - throw new \Exception('Unexpected state: archiver plugin \'' . $plugin . '\' is not loaded.'); + return null; } return $plugin; @@ -143,15 +143,17 @@ final public function callAggregateDayReport() $pluginName = $this->getPluginName(); - $recordBuilders = $this->getRecordBuilders(); - foreach ($recordBuilders as $recordBuilder) { - if ($recordBuilder->getPluginName() != $pluginName - || !$recordBuilder->isEnabled() - ) { - continue; - } + if (!empty($pluginName)) { + $recordBuilders = $this->getRecordBuilders(); + foreach ($recordBuilders as $recordBuilder) { + if ($recordBuilder->getPluginName() != $pluginName + || !$recordBuilder->isEnabled() + ) { + continue; + } - $recordBuilder->build($this->getProcessor()); + $recordBuilder->build($this->getProcessor()); + } } $this->aggregateDayReport(); @@ -170,15 +172,17 @@ final public function callAggregateMultipleReports() $pluginName = $this->getPluginName(); - $recordBuilders = $this->getRecordBuilders(); - foreach ($recordBuilders as $recordBuilder) { - if ($recordBuilder->getPluginName() != $pluginName - || !$recordBuilder->isEnabled() - ) { - continue; - } + if (!empty($pluginName)) { + $recordBuilders = $this->getRecordBuilders(); + foreach ($recordBuilders as $recordBuilder) { + if ($recordBuilder->getPluginName() != $pluginName + || !$recordBuilder->isEnabled() + ) { + continue; + } - $recordBuilder->buildMultiplePeriod($this->getProcessor()); + $recordBuilder->buildMultiplePeriod($this->getProcessor()); + } } $this->aggregateMultipleReports(); From 6cfe3ee3120faf5b86829810befc7d280b4a1af8 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 26 Feb 2023 14:50:03 -0800 Subject: [PATCH 09/56] insert empty DataTables (as this appears to be the existing behavior before this change) --- core/ArchiveProcessor/RecordBuilder.php | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 45dbe3c422c..1e610d64a78 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -127,10 +127,6 @@ protected abstract function aggregate(ArchiveProcessor $archiveProcessor); private function insertRecord(ArchiveProcessor $archiveProcessor, $recordName, DataTable\DataTableInterface $record) { - if ($record->getRowsCount() <= 0) { - return; - } - $serialized = $record->getSerialized($this->maxRowsInTable, $this->maxRowsInSubtable, $this->columnToSortByBeforeTruncation); $archiveProcessor->insertBlobRecord($recordName, $serialized); unset($serialized); From 43bd3463adfb9f432a1f9744f79b564cd40182ff Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 26 Feb 2023 14:57:54 -0800 Subject: [PATCH 10/56] add RecordBuilder class name to aggregation query hint --- core/ArchiveProcessor/RecordBuilder.php | 4 ++-- core/DataAccess/LogAggregator.php | 5 +++++ core/Plugin/Archiver.php | 18 ++++++++++++++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 1e610d64a78..8a868db155c 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -13,7 +13,7 @@ use Piwik\DataTable; /** - * TODO docs + * TODO docs (including members) * @api */ abstract class RecordBuilder @@ -156,7 +156,7 @@ public function getPluginName() return $plugin; } - private function getQueryDecoration() // TODO: use query decoration in LogAggregator or wherever + public function getQueryOriginHint() { $recordBuilderName = get_class($this); $recordBuilderName = explode('\\', $recordBuilderName); diff --git a/core/DataAccess/LogAggregator.php b/core/DataAccess/LogAggregator.php index 63a0df741bb..3db3b393711 100644 --- a/core/DataAccess/LogAggregator.php +++ b/core/DataAccess/LogAggregator.php @@ -206,6 +206,11 @@ public function setQueryOriginHint($nameOfOrigin) $this->queryOriginHint = $nameOfOrigin; } + public function getQueryOriginHint() + { + return $this->queryOriginHint; + } + public function getSegmentTmpTableName() { $bind = $this->getGeneralQueryBindParams(); diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 2e8f157735c..d809bec31c5 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -152,7 +152,14 @@ final public function callAggregateDayReport() continue; } - $recordBuilder->build($this->getProcessor()); + $originalQueryHint = $this->getProcessor()->getLogAggregator()->getQueryOriginHint(); + $newQueryHint = $originalQueryHint . ' ' . $recordBuilder->getQueryOriginHint(); + try { + $this->getProcessor()->getLogAggregator()->setQueryOriginHint($newQueryHint); + $recordBuilder->build($this->getProcessor()); + } finally { + $this->getProcessor()->getLogAggregator()->setQueryOriginHint($originalQueryHint); + } } } @@ -181,7 +188,14 @@ final public function callAggregateMultipleReports() continue; } - $recordBuilder->buildMultiplePeriod($this->getProcessor()); + $originalQueryHint = $this->getProcessor()->getLogAggregator()->getQueryOriginHint(); + $newQueryHint = $originalQueryHint . ' ' . $recordBuilder->getQueryOriginHint(); + try { + $this->getProcessor()->getLogAggregator()->setQueryOriginHint($newQueryHint); + $recordBuilder->buildMultiplePeriod($this->getProcessor()); + } finally { + $this->getProcessor()->getLogAggregator()->setQueryOriginHint($originalQueryHint); + } } } From c24d3aa740744ec1a81c3a16109216ba383d93a3 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 26 Feb 2023 15:25:02 -0800 Subject: [PATCH 11/56] clear up in-source todo --- core/ArchiveProcessor/Record.php | 4 +-- core/ArchiveProcessor/RecordBuilder.php | 31 ++++++++++++++++--- core/Plugin/Archiver.php | 12 ++++++- plugins/Goals/RecordBuilders/Base.php | 5 +++ .../RecordBuilders/GeneralGoalsRecords.php | 7 ----- 5 files changed, 44 insertions(+), 15 deletions(-) diff --git a/core/ArchiveProcessor/Record.php b/core/ArchiveProcessor/Record.php index b364e9432b3..273fe948198 100644 --- a/core/ArchiveProcessor/Record.php +++ b/core/ArchiveProcessor/Record.php @@ -8,11 +8,9 @@ namespace Piwik\ArchiveProcessor; -use Piwik\DataTable; - /** - * TODO docs * @api + * @since 5.0.0 */ class Record { diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 8a868db155c..af58c05e8ed 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -13,8 +13,9 @@ use Piwik\DataTable; /** - * TODO docs (including members) - * @api + * Inherit from this class to define archiving logic for one or more records. + * + * @since 5.0.0 */ abstract class RecordBuilder { @@ -43,6 +44,13 @@ abstract class RecordBuilder */ protected $columnAggregationOps; + /** + * @param int|null $maxRowsInTable + * @param int|null $maxRowsInSubtable + * @param string|int|null $columnToSortByBeforeTruncation + * @param array|null $columnAggregationOps + * @api + */ public function __construct($maxRowsInTable = null, $maxRowsInSubtable = null, $columnToSortByBeforeTruncation = null, $columnAggregationOps = null) { @@ -116,12 +124,20 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) } /** + * Returns metadata for records primarily used when aggregating over non-day periods. Every numeric/blob + * record your RecordBuilder creates should have an associated piece of record metadata. + * * @return Record[] + * @api */ public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor); /** - * @return (DataTable|int|float|string)[] + * Derived classes should define this method to aggregate log data for a single day and return the records + * to store indexed by record names. + * + * @return (DataTable|int|float|string)[] Record values indexed by their record name, eg, `['MyPlugin_MyRecord' => new DataTable()]` + * @api */ protected abstract function aggregate(ArchiveProcessor $archiveProcessor); @@ -156,10 +172,17 @@ public function getPluginName() return $plugin; } + /** + * Returns an extra hint for LogAggregator to add to log aggregation SQL. Can be overridden if you'd + * like the origin hint to have more information. + * + * @return string + * @api + */ public function getQueryOriginHint() { $recordBuilderName = get_class($this); $recordBuilderName = explode('\\', $recordBuilderName); return end($recordBuilderName); } -} \ No newline at end of file +} diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index d809bec31c5..afac8e10776 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -122,8 +122,18 @@ private function getRecordBuilders() $recordBuilders = array_filter($recordBuilders); /** - * TODO + * Triggered to add new RecordBuilders that cannot be picked up automatically by the platform. + * If you define RecordBuilders that take a parameter, for example, an ID to an entity your plugin + * manages, use this event to add instances of that RecordBuilder to the global list. * + * **Example** + * + * public function addRecordBuilders(&$recordBuilders) + * { + * $recordBuilders[] = new MyParameterizedRecordBuilder($idOfThingToArchiveFor); + * } + * + * @param ArchiveProcessor\RecordBuilder[] $reports An array of dimensions * @api */ Piwik::postEvent('Archiver.addRecordBuilders', [&$recordBuilders]); diff --git a/plugins/Goals/RecordBuilders/Base.php b/plugins/Goals/RecordBuilders/Base.php index b9401a4b40c..5d7ac90a126 100644 --- a/plugins/Goals/RecordBuilders/Base.php +++ b/plugins/Goals/RecordBuilders/Base.php @@ -28,6 +28,11 @@ protected function usesEcommerce($idSite) && Site::isEcommerceEnabledFor($idSite); } + protected function hasAnyGoalOrEcommerce($idSite) + { + return $this->usesEcommerce($idSite) || !empty(GoalManager::getGoalIds($idSite)); + } + protected function getEcommerceIdGoals() { return array(GoalManager::IDGOAL_CART, GoalManager::IDGOAL_ORDER); diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index 89234783119..3e98d4e16de 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -162,8 +162,6 @@ protected function aggregate(ArchiveProcessor $archiveProcessor) Archiver::getRecordName('revenue') => $totalRevenue, ], $numericRecords); - // TODO: Remove use of DataArray everywhere, just directly build DataTables (less memory use overall) - foreach ($visitsToConversions as $idGoal => $table) { $recordName = Archiver::getRecordName(self::VISITS_UNTIL_RECORD_NAME, $idGoal); $result[$recordName] = $table; @@ -220,11 +218,6 @@ public function getRecordMetadata(ArchiveProcessor $archiveProcessor) return $records; } - private function hasAnyGoalOrEcommerce($idSite) // TODO: this & other methods like this should probably be in a base class - { - return $this->usesEcommerce($idSite) || !empty(GoalManager::getGoalIds($idSite)); - } - protected function getConversionsNumericMetrics(DataArray $goals) { $numericRecords = array(); From c2de81344ff419bb9174bf472bf8fb27c4cd5d8e Mon Sep 17 00:00:00 2001 From: diosmosis Date: Fri, 3 Mar 2023 06:54:18 -0800 Subject: [PATCH 12/56] attempt only archiving requested report if range archive and the record needed is created by a RecordBuilder --- core/Archive.php | 51 ++++--- core/ArchiveProcessor/Parameters.php | 17 ++- core/ArchiveProcessor/RecordBuilder.php | 34 +++++ core/ArchiveProcessor/Rules.php | 9 ++ core/DataAccess/ArchiveSelector.php | 9 +- core/DataAccess/Model.php | 21 +++ core/Plugin/Archiver.php | 23 +++ .../Archive/PartialArchiveTest.php | 139 ++++++++++++++++++ 8 files changed, 283 insertions(+), 20 deletions(-) create mode 100644 tests/PHPUnit/Integration/Archive/PartialArchiveTest.php diff --git a/core/Archive.php b/core/Archive.php index 09e6c3bb360..da49b5bc1ab 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -422,20 +422,23 @@ public function getDataTableExpanded($name, $idSubtable = null, $depth = null, $ } /** - * Returns the list of plugins that archive the given reports. + * Returns the given reports grouped by the plugin name that archives them. * * @param array $archiveNames - * @return array + * @return array `['MyPlugin' => ['MyPlugin_metric1', 'MyPlugin_report1'], ...]` */ private function getRequestedPlugins($archiveNames) { $result = []; foreach ($archiveNames as $name) { - $result[] = self::getPluginForReport($name); + $plugin = self::getPluginForReport($name); + $result[$plugin][] = $name; } - return array_unique($result); + $result = array_map('array_unique', $result); + + return $result; } /** @@ -582,12 +585,13 @@ protected function get($archiveNames, $archiveDataType, $idSubtable = null) * query archive tables for IDs w/o launching archiving, or launch archiving and * get the idarchive from ArchiveProcessor instances. * - * @param string $archiveNames + * @param string[] $archiveNames * @return array */ private function getArchiveIds($archiveNames) { - $plugins = $this->getRequestedPlugins($archiveNames); + $archiveNamesByPlugin = $this->getRequestedPlugins($archiveNames); + $plugins = array_keys($archiveNamesByPlugin); // figure out which archives haven't been processed (if an archive has been processed, // then we have the archive IDs in $this->idarchives) @@ -613,15 +617,13 @@ private function getArchiveIds($archiveNames) $globalDoneFlag = Rules::getDoneFlagArchiveContainsAllPlugins($this->params->getSegment()); $doneFlags[$globalDoneFlag] = true; - $archiveGroups = array_unique($archiveGroups); - // cache id archives for plugins we haven't processed yet if (!empty($archiveGroups)) { if ( Rules::isArchivingEnabledFor($this->params->getIdSites(), $this->params->getSegment(), $this->getPeriodLabel()) && !$this->forceFetchingWithoutLaunchingArchiving ) { - $this->cacheArchiveIdsAfterLaunching($archiveGroups, $plugins); + $this->cacheArchiveIdsAfterLaunching($archiveNamesByPlugin); } else { $this->cacheArchiveIdsWithoutLaunching($plugins); } @@ -637,10 +639,9 @@ private function getArchiveIds($archiveNames) * This function will launch the archiving process for each period/site/plugin if * metrics/reports have not been calculated/archived already. * - * @param array $archiveGroups @see getArchiveGroupOfReport - * @param array $plugins List of plugin names to archive. + * @param array $archiveNamesByPlugin @see getRequestedPlugins */ - private function cacheArchiveIdsAfterLaunching($archiveGroups, $plugins) + private function cacheArchiveIdsAfterLaunching($archiveNamesByPlugin) { foreach ($this->params->getPeriods() as $period) { $twoDaysAfterPeriod = $period->getDateEnd()->addDay(2); @@ -683,7 +684,7 @@ private function cacheArchiveIdsAfterLaunching($archiveGroups, $plugins) continue; } - $this->prepareArchive($archiveGroups, $site, $period); + $this->prepareArchive($archiveNamesByPlugin, $site, $period); } } } @@ -730,6 +731,19 @@ private function cacheArchiveIdsWithoutLaunching($plugins) */ private function getDoneStringForPlugin($plugin, $idSites) { + // TODO: code redundancy w/ cacheArchiveIdsAfterLaunching + $requestedReport = null; + if (SettingsServer::isArchivePhpTriggered()) { + $requestedReport = Common::getRequestVar('requestedReport', '', 'string'); + } + + $shouldOnlyProcessRequestedRecords = empty($requestedReport) + && Rules::shouldProcessOnlyReportsRequestedInArchiveQuery($this->getPeriodLabel()); + + if ($shouldOnlyProcessRequestedRecords) { + return Rules::getDoneFlagArchiveContainsOnePlugin($this->params->getSegment(), $plugin); + } + return Rules::getDoneStringFlagFor( $idSites, $this->params->getSegment(), @@ -867,11 +881,11 @@ public static function getPluginForReport($report) } /** - * @param $archiveGroups + * @param $archiveNamesByPlugin * @param $site * @param $period */ - private function prepareArchive(array $archiveGroups, Site $site, Period $period) + private function prepareArchive(array $archiveNamesByPlugin, Site $site, Period $period) { $coreAdminHomeApi = API::getInstance(); @@ -880,13 +894,16 @@ private function prepareArchive(array $archiveGroups, Site $site, Period $period $requestedReport = Common::getRequestVar('requestedReport', '', 'string'); } + $shouldOnlyProcessRequestedArchives = empty($requestedReport) + && Rules::shouldProcessOnlyReportsRequestedInArchiveQuery($period->getLabel()); + $periodString = $period->getRangeString(); $periodDateStr = $period->getLabel() == 'range' ? $periodString : $period->getDateStart()->toString(); $idSites = [$site->getId()]; // process for each plugin as well - foreach ($archiveGroups as $plugin) { + foreach ($archiveNamesByPlugin as $plugin => $archiveNames) { $doneFlag = $this->getDoneStringForPlugin($plugin, $idSites); $this->initializeArchiveIdCache($doneFlag); @@ -896,7 +913,7 @@ private function prepareArchive(array $archiveGroups, Site $site, Period $period $periodDateStr, $this->params->getSegment()->getOriginalString(), $plugin, - $requestedReport + empty($requestedReport) && $shouldOnlyProcessRequestedArchives ? $archiveNames : $requestedReport ); if ( diff --git a/core/ArchiveProcessor/Parameters.php b/core/ArchiveProcessor/Parameters.php index 4d9975ca9eb..1797dfd45a1 100644 --- a/core/ArchiveProcessor/Parameters.php +++ b/core/ArchiveProcessor/Parameters.php @@ -76,7 +76,7 @@ public function __construct(Site $site, Period $period, Segment $segment) * If we want to archive only a single report, we can request that via this method. * It is up to each plugin's archiver to respect the setting. * - * @param string $archiveOnlyReport + * @param string|string[] $archiveOnlyReport * @api */ public function setArchiveOnlyReport($archiveOnlyReport) @@ -295,7 +295,11 @@ public function setIsRootArchiveRequest($isRootArchiveRequest) public function __toString() { - return "[idSite = {$this->getSite()->getId()}, period = {$this->getPeriod()->getLabel()} {$this->getPeriod()->getRangeString()}, segment = {$this->getSegment()->getString()}, plugin = {$this->getRequestedPlugin()}, report = {$this->getArchiveOnlyReport()}]"; + $requestedReports = $this->getArchiveOnlyReport(); + if (is_array($requestedReports)) { + $requestedReports = json_encode($requestedReports); + } + return "[idSite = {$this->getSite()->getId()}, period = {$this->getPeriod()->getLabel()} {$this->getPeriod()->getRangeString()}, segment = {$this->getSegment()->getString()}, plugin = {$this->getRequestedPlugin()}, report = {$requestedReports}]"; } /** @@ -323,4 +327,13 @@ public function setIsPartialArchive($isArchiveOnlyReportHandled) { $this->isArchiveOnlyReportHandled = $isArchiveOnlyReportHandled; } + + public function getArchiveOnlyReportAsArray() + { + $requestedReport = $this->getArchiveOnlyReport(); + if (empty($requestedReport)) { + return []; + } + return is_array($requestedReport) ? $requestedReport : [$requestedReport]; + } } diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index af58c05e8ed..534a61f7150 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -98,12 +98,21 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) return; } + $requestedReports = $archiveProcessor->getParams()->getArchiveOnlyReportAsArray(); + $recordsBuilt = $this->getRecordMetadata($archiveProcessor); $numericRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_NUMERIC; }); $blobRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_BLOB; }); + // TODO: dependsOn in record metadata foreach ($blobRecords as $record) { + if (!empty($requestedReports) + && !in_array($record->getName(), $requestedReports) + ) { + continue; + } + $maxRowsInTable = $record->getMaxRowsInTable() ?? $this->maxRowsInTable; $maxRowsInSubtable = $record->getMaxRowsInSubtable() ?? $this->maxRowsInSubtable; $columnToSortByBeforeTruncation = $record->getColumnToSortByBeforeTruncation() ?? $this->columnToSortByBeforeTruncation; @@ -119,6 +128,11 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) if (!empty($numericRecords)) { $numericMetrics = array_map(function (Record $r) { return $r->getName(); }, $numericRecords); + if (!empty($requestedReport)) { + $numericMetrics = array_filter($numericMetrics, function ($name) use ($requestedReports) { + return in_array($name, $requestedReports); + }); + } $archiveProcessor->aggregateNumericMetrics($numericMetrics, $this->columnAggregationOps); } } @@ -185,4 +199,24 @@ public function getQueryOriginHint() $recordBuilderName = explode('\\', $recordBuilderName); return end($recordBuilderName); } + + /** + * Returns true if at least one of the given reports is handled by this RecordBuilder instance + * when invoked with the given ArchiveProcessor. + * + * @param ArchiveProcessor $archiveProcessor Archiving parameters, like idSite, can influence the list of + * all records a RecordBuilder produces, so it is required here. + * @param string[] $requestedReports The list of requested reports to check for. + * @return bool + */ + public function isBuilderForAtLeastOneOf(ArchiveProcessor $archiveProcessor, array $requestedReports) + { + $recordMetadata = $this->getRecordMetadata($archiveProcessor); + foreach ($recordMetadata as $record) { + if (in_array($record->getName(), $requestedReports)) { + return true; + } + } + return false; + } } diff --git a/core/ArchiveProcessor/Rules.php b/core/ArchiveProcessor/Rules.php index d1ff2e0880e..3d443c7befc 100644 --- a/core/ArchiveProcessor/Rules.php +++ b/core/ArchiveProcessor/Rules.php @@ -381,4 +381,13 @@ public static function isSegmentPluginArchivingDisabled($pluginName, $siteId = n return in_array(strtolower($pluginName), $pluginArchivingSetting); } + + public static function shouldProcessOnlyReportsRequestedInArchiveQuery($periodLabel) + { + if (SettingsServer::isArchivePhpTriggered()) { + return false; + } + + return $periodLabel === 'range'; + } } diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index 3af14f0201e..7149a0b0302 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -70,6 +70,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $numericTable = ArchiveTableCreator::getNumericTable($dateStart); $requestedPlugin = $params->getRequestedPlugin(); + $requestedReport = $params->getArchiveOnlyReport(); $segment = $params->getSegment(); $plugins = array("VisitsSummary", $requestedPlugin); $plugins = array_filter($plugins); @@ -94,7 +95,13 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $value = isset($result['value']) ? $result['value'] : false; $result['idarchive'] = empty($result['idarchive']) ? [] : [$result['idarchive']]; - if (isset($result['partial'])) { + if (!empty($result['partial']) + // TODO: test and comment for this + && (!empty($result['idarchive']) + || empty($requestedReport) + || self::getModel()->isRecordContainedInArchives($dateStart, $result['partial'], $requestedReport) + ) + ) { $result['idarchive'] = array_merge($result['idarchive'], $result['partial']); } diff --git a/core/DataAccess/Model.php b/core/DataAccess/Model.php index 4323481b996..61e49706db3 100644 --- a/core/DataAccess/Model.php +++ b/core/DataAccess/Model.php @@ -973,6 +973,27 @@ public function resetFailedArchivingJobs() return $query->rowCount(); } + public function isRecordContainedInArchives(Date $archiveStartDate, $idArchives, $recordName) + { + $idArchives = array_map('intval', $idArchives); + $idArchives = implode(',', $idArchives); + + $countSql = "SELECT name FROM %s WHERE idarchive IN ($idArchives) AND name = ? LIMIT 1"; + + $numericTable = ArchiveTableCreator::getNumericTable($archiveStartDate); + $blobTable = ArchiveTableCreator::getBlobTable($archiveStartDate); + + foreach ([$numericTable, $blobTable] as $tableName) { + $sql = sprintf($countSql, $tableName); + $rows = Db::fetchAll($sql, [$recordName]); + if (!empty($rows)) { + return true; + } + } + + return false; + } + private function isCutOffGroupConcatResult($pair) { $position = strpos($pair, '.'); diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index afac8e10776..2343cc6579a 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -140,6 +140,16 @@ private function getRecordBuilders() $transientCache->save($cacheKey, $recordBuilders); } + + $requestedReports = $this->processor->getParams()->getArchiveOnlyReport(); + if (!empty($requestedReports)) { + $requestedReports = is_string($requestedReports) ? [$requestedReports] : $requestedReports; + + $recordBuilders = array_filter($recordBuilders, function (ArchiveProcessor\RecordBuilder $builder) use ($requestedReports) { + return $builder->isBuilderForAtLeastOneOf($this->processor, $requestedReports); + }); + } + return $recordBuilders; } @@ -155,6 +165,7 @@ final public function callAggregateDayReport() if (!empty($pluginName)) { $recordBuilders = $this->getRecordBuilders(); + foreach ($recordBuilders as $recordBuilder) { if ($recordBuilder->getPluginName() != $pluginName || !$recordBuilder->isEnabled() @@ -162,6 +173,12 @@ final public function callAggregateDayReport() continue; } + // if automatically handling "archive only report" in RecordBuilders, make sure the archive + // will be marked as partial + if ($this->processor->getParams()->getArchiveOnlyReport()) { + $this->processor->getParams()->setIsPartialArchive(true); // make sure archive will be marked as partial + } + $originalQueryHint = $this->getProcessor()->getLogAggregator()->getQueryOriginHint(); $newQueryHint = $originalQueryHint . ' ' . $recordBuilder->getQueryOriginHint(); try { @@ -198,6 +215,12 @@ final public function callAggregateMultipleReports() continue; } + // if automatically handling "archive only report" in RecordBuilders, make sure the archive + // will be marked as partial + if ($this->processor->getParams()->getArchiveOnlyReport()) { + $this->processor->getParams()->setIsPartialArchive(true); // make sure archive will be marked as partial + } + $originalQueryHint = $this->getProcessor()->getLogAggregator()->getQueryOriginHint(); $newQueryHint = $originalQueryHint . ' ' . $recordBuilder->getQueryOriginHint(); try { diff --git a/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php b/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php new file mode 100644 index 00000000000..2c32f4eb48e --- /dev/null +++ b/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php @@ -0,0 +1,139 @@ +getDaysToConversion(1, 'day', '2020-04-07', false, $this->idGoal); // day first + $data = GoalsApi::getInstance()->getDaysToConversion(1, 'range', '2020-04-06,2020-04-09', false, $this->idGoal); + $this->assertEquals(['label' => '0-0', Metrics::INDEX_NB_CONVERSIONS => 1], $data->getFirstRow()->getColumns()); + + // check archive is all plugins archive as expected + [$idArchives, $archiveInfo] = $this->getArchiveInfo('2020_04', 5, false); + $this->assertEquals([ + ['idsite' => 1, 'date1' => '2020-04-06', 'date2' => '2020-04-09', 'period' => 5, 'name' => 'done', 'value' => 1, 'blob_count' => 57], + ], $archiveInfo); + + $maxIdArchive = $this->getMaxIdArchive('2020_04'); + + self::trackAnotherVisit(); + + // trigger browser archiving for range + GoalsApi::getInstance()->getDaysToConversion(1, 'day', '2020-04-08', false, $this->idGoal); // first day + unset($_GET['trigger']); + StaticContainer::get(ArchiveInvalidator::class)->markArchivesAsInvalidated([1], ['2020-04-06,2020-04-09'], 'range'); + $data = GoalsApi::getInstance()->getDaysToConversion(1, 'range', '2020-04-06,2020-04-09', false, $this->idGoal); + $this->assertEquals(['label' => '0-0', Metrics::INDEX_NB_CONVERSIONS => 2], $data->getFirstRow()->getColumns()); + + [$idArchives, $archiveInfo] = $this->getArchiveInfo('2020_04', 5, false, $maxIdArchive); + + $archiveNames = $this->getArchiveNames('2020_04', $idArchives[0]); + $this->assertEquals(['Goal_1_days_until_conv'], $archiveNames); + + $this->assertEquals([ + // expect only one blob for new range partial archive + ['idsite' => 1, 'date1' => '2020-04-06', 'date2' => '2020-04-09', 'period' => 5, 'name' => 'done.Goals', 'value' => 5, 'blob_count' => 1], + ], $archiveInfo); + } + + private static function createWebsite() + { + Fixture::createWebsite('2018-05-05 09:00:00'); + GoalsApi::getInstance()->addGoal(1, 'test goal', 'url', 'http', 'contains'); + } + + private static function trackVisit() + { + $t = Fixture::getTracker(1, self::$dateTime); + $t->setUrl('http://site.com/path'); + Fixture::checkResponse($t->doTrackPageView('page title')); + } + + private static function trackAnotherVisit() + { + $t = Fixture::getTracker(1, Date::factory(self::$dateTime)->addDay(1)->getDatetime()); + $t->setUrl('http://site.com/path2'); + Fixture::checkResponse($t->doTrackPageView('page title 2')); + } + + private function getArchiveInfo($yearMonth, $period, $segmentHash = false, $idArchiveGreaterThan = 0) + { + $sql = 'SELECT idarchive, idsite, date1, date2, period, name, value FROM ' + . Common::prefixTable('archive_numeric_' . $yearMonth) + . ' WHERE (name = \'done' . $segmentHash . '\' OR name LIKE \'done' . $segmentHash . '.%\') AND period = ? AND idarchive > ?'; + $archiveNumericInfo = Db::fetchAll($sql, [$period, $idArchiveGreaterThan]); + + $sql = 'SELECT idarchive, COUNT(DISTINCT name) AS blob_count FROM ' . Common::prefixTable('archive_blob_' . $yearMonth) + . ' GROUP BY idarchive'; + $archiveBlobInfo = Db::fetchAll($sql); + $archiveBlobInfo = array_column($archiveBlobInfo, 'blob_count', 'idarchive'); + + $idArchives = []; + foreach ($archiveNumericInfo as &$row) { + $row['blob_count'] = $archiveBlobInfo[$row['idarchive']] ?? 0; + + // archives can randomly be created out of order despite not using core:archive, so we don't check their + // value. we still need to use it, though, so we return the values. + $idArchives[] = $row['idarchive']; + unset($row['idarchive']); + } + + return [$idArchives, $archiveNumericInfo]; + } + + private function getArchiveNames($yearMonth, $idArchive) + { + $sql = 'SELECT DISTINCT name FROM ' . Common::prefixTable('archive_blob_' . $yearMonth) + . ' WHERE idarchive = ?'; + $rows = Db::fetchAll($sql, [$idArchive]); + $rows = array_column($rows, 'name'); + return $rows; + } + + protected static function configureFixture($fixture) + { + parent::configureFixture($fixture); + $fixture->createSuperUser = true; + } + + private function getMaxIdArchive($yearMonth) + { + return Db::fetchOne('SELECT MAX(idarchive) FROM ' . Common::prefixTable('archive_numeric_' . $yearMonth)); + } +} \ No newline at end of file From 0139de3768eebd0425297fc5fc3d27fe55f1b0b8 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 11:10:37 -0800 Subject: [PATCH 13/56] refactor ArchiveSelector::getArchiveIds() to provide result with string keys --- core/ArchiveProcessor/Loader.php | 16 +++++- core/CronArchive.php | 5 +- core/CronArchive/QueueConsumer.php | 7 +-- core/DataAccess/ArchiveSelector.php | 53 +++++++++++++++++-- plugins/SegmentEditor/SegmentEditor.php | 4 +- .../DataAccess/ArchiveSelectorTest.php | 6 +-- 6 files changed, 76 insertions(+), 15 deletions(-) diff --git a/core/ArchiveProcessor/Loader.php b/core/ArchiveProcessor/Loader.php index 961426b811b..1dfb963e3e0 100644 --- a/core/ArchiveProcessor/Loader.php +++ b/core/ArchiveProcessor/Loader.php @@ -207,7 +207,12 @@ protected function loadArchiveData() // this hack was used to check the main function goes to return or continue // NOTE: $idArchives will contain the latest DONE_OK/DONE_INVALIDATED archive as well as any partial archives // with a ts_archived >= the DONE_OK/DONE_INVALIDATED date. - list($idArchives, $visits, $visitsConverted, $isAnyArchiveExists, $tsArchived, $value) = $this->loadExistingArchiveIdFromDb(); + $archiveInfo = $this->loadExistingArchiveIdFromDb(); + $idArchives = $archiveInfo['idArchives']; + $visits = $archiveInfo['visits']; + $visitsConverted = $archiveInfo['visitsConverted']; + $tsArchived = $archiveInfo['tsArchived']; + $value = $archiveInfo['value']; if (!empty($idArchives) && !Rules::isActuallyForceArchivingSinglePlugin() @@ -330,7 +335,14 @@ public function loadExistingArchiveIdFromDb() // return no usable archive found, and no existing archive. this will skip invalidation, which should // be fine since we just force archiving. - return [false, false, false, false, false, false]; + return [ + 'idArchives' => false, + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => false, + 'tsArchived' => false, + 'doneFlagValue' => false, + ]; } $minDatetimeArchiveProcessedUTC = $this->getMinTimeArchiveProcessed(); diff --git a/core/CronArchive.php b/core/CronArchive.php index 81eae542ac9..2f4190ea147 100644 --- a/core/CronArchive.php +++ b/core/CronArchive.php @@ -994,12 +994,13 @@ public function canWeSkipInvalidatingBecauseThereIsAUsablePeriod(Parameters $par Date::now()->subSeconds(Rules::getPeriodArchiveTimeToLiveDefault($params->getPeriod()->getLabel())); // empty plugins param since we only check for an 'all' archive - list($idArchive, $visits, $visitsConverted, $ignore, $tsArchived) = ArchiveSelector::getArchiveIdAndVisits($params, $minArchiveProcessedTime, $includeInvalidated = $isPeriodIncludesToday); + $archiveInfo = ArchiveSelector::getArchiveIdAndVisits($params, $minArchiveProcessedTime, $includeInvalidated = $isPeriodIncludesToday); + $idArchive = $archiveInfo['idArchives']; // day has changed since the archive was created, we need to reprocess it if ($isYesterday && !empty($idArchive) - && Date::factory($tsArchived)->toString() != $today->toString() + && Date::factory($archiveInfo['tsArchived'])->toString() != $today->toString() ) { return false; } diff --git a/core/CronArchive/QueueConsumer.php b/core/CronArchive/QueueConsumer.php index cff23bca560..92e3c43080b 100644 --- a/core/CronArchive/QueueConsumer.php +++ b/core/CronArchive/QueueConsumer.php @@ -616,11 +616,12 @@ public function usableArchiveExists(array $invalidatedArchive) // if valid archive already exists, do not re-archive $minDateTimeProcessedUTC = Date::now()->subSeconds(Rules::getPeriodArchiveTimeToLiveDefault($periodLabel)); $archiveIdAndVisits = ArchiveSelector::getArchiveIdAndVisits($params, $minDateTimeProcessedUTC, $includeInvalidated = false); + $idArchives = $archiveIdAndVisits['idArchives']; + $tsArchived = $archiveIdAndVisits['tsArchived']; - $tsArchived = !empty($archiveIdAndVisits[4]) ? Date::factory($archiveIdAndVisits[4])->getDatetime() : null; + $tsArchived = !empty($tsArchived) ? Date::factory($tsArchived)->getDatetime() : null; - $idArchive = $archiveIdAndVisits[0]; - if (empty($idArchive)) { + if (empty($idArchives)) { return [false, $tsArchived]; } diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index 7149a0b0302..d892d2551b3 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -71,6 +71,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $requestedPlugin = $params->getRequestedPlugin(); $requestedReport = $params->getArchiveOnlyReport(); + $segment = $params->getSegment(); $plugins = array("VisitsSummary", $requestedPlugin); $plugins = array_filter($plugins); @@ -84,7 +85,14 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $results = self::getModel()->getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, null, $doneFlags); if (empty($results)) { // no archive found - return [false, false, false, false, false, false]; + return self::archiveInfoBcResult([ + 'idArchives' => false, + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => false, + 'tsArchived' => false, + 'doneFlagValue' => false, + ]); } $result = self::findArchiveDataWithLatestTsArchived($results, $requestedPluginDoneFlags, $allPluginsDoneFlag); @@ -109,7 +117,14 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params || (isset($result['value']) && !in_array($result['value'], $doneFlagValues)) ) { // the archive cannot be considered valid for this request (has wrong done flag value) - return [false, $visits, $visitsConverted, true, $tsArchived, $value]; + return self::archiveInfoBcResult([ + 'idArchives' => false, + 'visits' => $visits, + 'visitsConverted' => $visitsConverted, + 'archiveExists' => true, + 'tsArchived' => $tsArchived, + 'doneFlagValue' => $value, + ]); } if (!empty($minDatetimeArchiveProcessedUTC) && !is_object($minDatetimeArchiveProcessedUTC)) { @@ -121,12 +136,26 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params && !empty($result['idarchive']) && Date::factory($tsArchived)->isEarlier($minDatetimeArchiveProcessedUTC) ) { - return [false, $visits, $visitsConverted, true, $tsArchived, $value]; + return self::archiveInfoBcResult([ + 'idArchives' => false, + 'visits' => $visits, + 'visitsConverted' => $visitsConverted, + 'archiveExists' => true, + 'tsArchived' => $tsArchived, + 'doneFlagValue' => $value, + ]); } $idArchives = !empty($result['idarchive']) ? $result['idarchive'] : false; - return [$idArchives, $visits, $visitsConverted, true, $tsArchived, $value]; + return self::archiveInfoBcResult([ + 'idArchives' => $idArchives, + 'visits' => $visits, + 'visitsConverted' => $visitsConverted, + 'archiveExists' => true, + 'tsArchived' => $tsArchived, + 'doneFlagValue' => $value, + ]); } /** @@ -505,4 +534,20 @@ private static function findArchiveDataWithLatestTsArchived($results, $requested return $archiveData; } + + /** + * provides BC result for getArchiveIdAndVisits + * @param array $archiveInfo + * @return array + */ + private static function archiveInfoBcResult(array $archiveInfo) + { + $archiveInfo[0] = $archiveInfo['idArchives']; + $archiveInfo[1] = $archiveInfo['visits']; + $archiveInfo[2] = $archiveInfo['visitsConverted']; + $archiveInfo[3] = $archiveInfo['archiveExists']; + $archiveInfo[4] = $archiveInfo['tsArchived']; + $archiveInfo[5] = $archiveInfo['doneFlagValue']; + return $archiveInfo; + } } diff --git a/plugins/SegmentEditor/SegmentEditor.php b/plugins/SegmentEditor/SegmentEditor.php index 9f2943f8802..7a60c7c82a0 100644 --- a/plugins/SegmentEditor/SegmentEditor.php +++ b/plugins/SegmentEditor/SegmentEditor.php @@ -253,7 +253,9 @@ private function getSegmentIfIsUnprocessed() // check if segment archive does not exist $processorParams = new \Piwik\ArchiveProcessor\Parameters(new Site($idSite), $period, $segment); $archiveIdAndStats = ArchiveSelector::getArchiveIdAndVisits($processorParams, null); - if (!empty($archiveIdAndStats[0]) || !empty($archiveIdAndStats[1])) { + if (!empty($archiveIdAndStats['idArchives']) + || !empty($archiveIdAndStats['visits']) + ) { return null; } diff --git a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php index cf32511fd9f..525bbf135b6 100644 --- a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php +++ b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php @@ -187,11 +187,11 @@ public function test_getArchiveIdAndVisits_returnsCorrectResult($period, $date, $params = new \Piwik\ArchiveProcessor\Parameters(new Site(1), Factory::build($period, $date), new Segment($segment, [1])); $result = ArchiveSelector::getArchiveIdAndVisits($params, $minDateProcessed, $includeInvalidated); - if ($result[4] !== false) { - Date::factory($result[4]); + if ($result['tsArchived'] !== false) { + Date::factory($result['tsArchived']); } - unset($result[4]); + unset($result['tsArchived']); $result = array_values($result); $this->assertEquals($expected, $result); From f7e383166c3732a147cc438356fd5ce45e30be7d Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 12:37:06 -0800 Subject: [PATCH 14/56] when all found archives are partial archives, check that requested data is present within them. if some are not present, only archive those in a new partial archive. --- core/ArchiveProcessor/Loader.php | 36 ++++++++++++++++++------- core/ArchiveProcessor/Parameters.php | 15 +++++++++++ core/ArchiveProcessor/RecordBuilder.php | 6 +++-- core/DataAccess/ArchiveSelector.php | 21 +++++++++++---- core/DataAccess/Model.php | 9 ++++--- 5 files changed, 67 insertions(+), 20 deletions(-) diff --git a/core/ArchiveProcessor/Loader.php b/core/ArchiveProcessor/Loader.php index 1dfb963e3e0..64f2c4b57b0 100644 --- a/core/ArchiveProcessor/Loader.php +++ b/core/ArchiveProcessor/Loader.php @@ -133,7 +133,7 @@ private function prepareArchiveImpl($pluginName) if (sizeof($data) == 2) { return $data; } - list($idArchives, $visits, $visitsConverted) = $data; + list($idArchives, $visits, $visitsConverted, $foundRecords) = $data; // only lock meet those conditions if ($this->params->isRootArchiveRequest() && !SettingsServer::isArchivePhpTriggered()) { @@ -152,15 +152,15 @@ private function prepareArchiveImpl($pluginName) return $data; } - list($idArchives, $visits, $visitsConverted) = $data; + list($idArchives, $visits, $visitsConverted, $existingArchives) = $data; - return $this->insertArchiveData($visits, $visitsConverted); + return $this->insertArchiveData($visits, $visitsConverted, $idArchives, $foundRecords); } finally { $lock->unlock(); } } else { - return $this->insertArchiveData($visits, $visitsConverted); + return $this->insertArchiveData($visits, $visitsConverted, $idArchives, $foundRecords); } } @@ -170,17 +170,27 @@ private function prepareArchiveImpl($pluginName) * @param $visitsConverted * @return array|false[] */ - protected function insertArchiveData($visits, $visitsConverted) + protected function insertArchiveData($visits, $visitsConverted, $existingArchives, $foundRecords) { if (SettingsServer::isArchivePhpTriggered()) { $this->logger->info("initiating archiving via core:archive for " . $this->params); } + if (!empty($foundRecords)) { + $this->params->setFoundRequestedReports($foundRecords); + } + list($visits, $visitsConverted) = $this->prepareCoreMetricsArchive($visits, $visitsConverted); list($idArchive, $visits) = $this->prepareAllPluginsArchive($visits, $visitsConverted); - if ($this->isThereSomeVisits($visits) || PluginsArchiver::doesAnyPluginArchiveWithoutVisits()) { - return [[$idArchive], $visits]; + if ($this->isThereSomeVisits($visits) + || PluginsArchiver::doesAnyPluginArchiveWithoutVisits() + ) { + $idArchivesToQuery = [$idArchive]; + if (!empty($foundRecords)) { + $idArchivesToQuery = array_merge($idArchivesToQuery, $existingArchives); + } + return [$idArchivesToQuery, $visits]; } return [false, false]; @@ -212,11 +222,17 @@ protected function loadArchiveData() $visits = $archiveInfo['visits']; $visitsConverted = $archiveInfo['visitsConverted']; $tsArchived = $archiveInfo['tsArchived']; - $value = $archiveInfo['value']; + $value = $archiveInfo['doneFlagValue']; + $existingArchives = $archiveInfo['existingRecords']; + + $requestedRecords = $this->params->getArchiveOnlyReportAsArray(); + $isMissingRequestedRecords = !empty($requestedRecords) && count($requestedRecords) != count($existingArchives ?: []); if (!empty($idArchives) && !Rules::isActuallyForceArchivingSinglePlugin() - && !$this->shouldForceInvalidatedArchive($value, $tsArchived)) { + && !$this->shouldForceInvalidatedArchive($value, $tsArchived) + && !$isMissingRequestedRecords + ) { // we have a usable idarchive (it's not invalidated and it's new enough), and we are not archiving // a single report return [$idArchives, $visits]; @@ -236,7 +252,7 @@ protected function loadArchiveData() } } - return [$idArchives, $visits, $visitsConverted]; + return [$idArchives, $visits, $visitsConverted, $existingArchives]; } /** diff --git a/core/ArchiveProcessor/Parameters.php b/core/ArchiveProcessor/Parameters.php index 1797dfd45a1..545788bab51 100644 --- a/core/ArchiveProcessor/Parameters.php +++ b/core/ArchiveProcessor/Parameters.php @@ -60,6 +60,11 @@ class Parameters */ private $isArchiveOnlyReportHandled; + /** + * @var string[]|null + */ + private $foundRequestedReports; + /** * Constructor. * @@ -336,4 +341,14 @@ public function getArchiveOnlyReportAsArray() } return is_array($requestedReport) ? $requestedReport : [$requestedReport]; } + + public function setFoundRequestedReports(array $foundRecords) + { + $this->foundRequestedReports = $foundRecords; + } + + public function getFoundRequestedReports() + { + return $this->foundRequestedReports ?: []; + } } diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 534a61f7150..9f60ffab787 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -99,6 +99,7 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) } $requestedReports = $archiveProcessor->getParams()->getArchiveOnlyReportAsArray(); + $foundRequestedReports = $archiveProcessor->getParams()->getFoundRequestedReports(); $recordsBuilt = $this->getRecordMetadata($archiveProcessor); @@ -109,6 +110,7 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) foreach ($blobRecords as $record) { if (!empty($requestedReports) && !in_array($record->getName(), $requestedReports) + && !in_array($record->getName(), $foundRequestedReports) ) { continue; } @@ -129,8 +131,8 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) if (!empty($numericRecords)) { $numericMetrics = array_map(function (Record $r) { return $r->getName(); }, $numericRecords); if (!empty($requestedReport)) { - $numericMetrics = array_filter($numericMetrics, function ($name) use ($requestedReports) { - return in_array($name, $requestedReports); + $numericMetrics = array_filter($numericMetrics, function ($name) use ($requestedReports, $foundRequestedReports) { + return in_array($name, $requestedReports) && !in_array($name, $foundRequestedReports); }); } $archiveProcessor->aggregateNumericMetrics($numericMetrics, $this->columnAggregationOps); diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index d892d2551b3..a2f5fd87ac5 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -92,6 +92,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params 'archiveExists' => false, 'tsArchived' => false, 'doneFlagValue' => false, + 'existingRecords' => null, ]); } @@ -102,15 +103,22 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $visitsConverted = isset($result['nb_visits_converted']) ? $result['nb_visits_converted'] : false; $value = isset($result['value']) ? $result['value'] : false; + $existingRecords = null; + $result['idarchive'] = empty($result['idarchive']) ? [] : [$result['idarchive']]; if (!empty($result['partial']) + ) { // TODO: test and comment for this - && (!empty($result['idarchive']) + if (!empty($result['idarchive']) || empty($requestedReport) - || self::getModel()->isRecordContainedInArchives($dateStart, $result['partial'], $requestedReport) - ) - ) { - $result['idarchive'] = array_merge($result['idarchive'], $result['partial']); + ) { + $result['idarchive'] = array_merge($result['idarchive'], $result['partial']); + } else { + $existingRecords = self::getModel()->getRecordsContainedInArchives($dateStart, $result['partial'], $requestedReport); + if (!empty($existingRecords)) { + $result['idarchive'] = array_merge($result['idarchive'], $result['partial']); + } + } } if (empty($result['idarchive']) @@ -124,6 +132,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params 'archiveExists' => true, 'tsArchived' => $tsArchived, 'doneFlagValue' => $value, + 'existingRecords' => $existingRecords, ]); } @@ -143,6 +152,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params 'archiveExists' => true, 'tsArchived' => $tsArchived, 'doneFlagValue' => $value, + 'existingRecords' => $existingRecords, ]); } @@ -155,6 +165,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params 'archiveExists' => true, 'tsArchived' => $tsArchived, 'doneFlagValue' => $value, + 'existingRecords' => $existingRecords, ]); } diff --git a/core/DataAccess/Model.php b/core/DataAccess/Model.php index 61e49706db3..16b97ccacd1 100644 --- a/core/DataAccess/Model.php +++ b/core/DataAccess/Model.php @@ -973,19 +973,22 @@ public function resetFailedArchivingJobs() return $query->rowCount(); } - public function isRecordContainedInArchives(Date $archiveStartDate, $idArchives, $recordName) + public function getRecordsContainedInArchives(Date $archiveStartDate, $idArchives, $requestedRecords) { $idArchives = array_map('intval', $idArchives); $idArchives = implode(',', $idArchives); - $countSql = "SELECT name FROM %s WHERE idarchive IN ($idArchives) AND name = ? LIMIT 1"; + $requestedRecords = is_string($requestedRecords) ? [$requestedRecords] : $requestedRecords; + $placeholders = Common::getSqlStringFieldsArray($requestedRecords); + + $countSql = "SELECT DISTINCT name FROM %s WHERE idarchive IN ($idArchives) AND name IN ($placeholders) LIMIT " . count($requestedRecords); $numericTable = ArchiveTableCreator::getNumericTable($archiveStartDate); $blobTable = ArchiveTableCreator::getBlobTable($archiveStartDate); foreach ([$numericTable, $blobTable] as $tableName) { $sql = sprintf($countSql, $tableName); - $rows = Db::fetchAll($sql, [$recordName]); + $rows = Db::fetchAll($sql, $requestedRecords); if (!empty($rows)) { return true; } From 70b5bab9db965f8e213a89b474063faa35f21791 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 13:25:52 -0800 Subject: [PATCH 15/56] return correct value in Model::getRecordsContainedInArchives() --- core/DataAccess/Model.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/core/DataAccess/Model.php b/core/DataAccess/Model.php index 16b97ccacd1..db346f20505 100644 --- a/core/DataAccess/Model.php +++ b/core/DataAccess/Model.php @@ -986,15 +986,13 @@ public function getRecordsContainedInArchives(Date $archiveStartDate, $idArchive $numericTable = ArchiveTableCreator::getNumericTable($archiveStartDate); $blobTable = ArchiveTableCreator::getBlobTable($archiveStartDate); + $existingRecords = []; foreach ([$numericTable, $blobTable] as $tableName) { $sql = sprintf($countSql, $tableName); $rows = Db::fetchAll($sql, $requestedRecords); - if (!empty($rows)) { - return true; - } + $existingRecords = array_merge($existingRecords, array_column($rows, 'name')); } - - return false; + return $existingRecords; } private function isCutOffGroupConcatResult($pair) From d05f243138e3f586c1acd65fea4356024524f257 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 13:26:00 -0800 Subject: [PATCH 16/56] fix if formatting --- core/DataAccess/ArchiveSelector.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index a2f5fd87ac5..2c689c6d29b 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -106,8 +106,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $existingRecords = null; $result['idarchive'] = empty($result['idarchive']) ? [] : [$result['idarchive']]; - if (!empty($result['partial']) - ) { + if (!empty($result['partial'])) { // TODO: test and comment for this if (!empty($result['idarchive']) || empty($requestedReport) From 1609d31d9c59ab7d38cefc6a58d69f1373493c1f Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 13:49:46 -0800 Subject: [PATCH 17/56] existingArchives can be falsy --- core/ArchiveProcessor/Loader.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ArchiveProcessor/Loader.php b/core/ArchiveProcessor/Loader.php index 64f2c4b57b0..33c04a8bb9c 100644 --- a/core/ArchiveProcessor/Loader.php +++ b/core/ArchiveProcessor/Loader.php @@ -188,7 +188,7 @@ protected function insertArchiveData($visits, $visitsConverted, $existingArchive ) { $idArchivesToQuery = [$idArchive]; if (!empty($foundRecords)) { - $idArchivesToQuery = array_merge($idArchivesToQuery, $existingArchives); + $idArchivesToQuery = array_merge($idArchivesToQuery, $existingArchives ?: []); } return [$idArchivesToQuery, $visits]; } From 234467afcf8e6c494fa3f0056958662605ad4e05 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 18:37:30 -0800 Subject: [PATCH 18/56] existing archives can be null if the check is not relevant to the current archive request --- core/ArchiveProcessor/Loader.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ArchiveProcessor/Loader.php b/core/ArchiveProcessor/Loader.php index 33c04a8bb9c..58d85cba463 100644 --- a/core/ArchiveProcessor/Loader.php +++ b/core/ArchiveProcessor/Loader.php @@ -226,7 +226,7 @@ protected function loadArchiveData() $existingArchives = $archiveInfo['existingRecords']; $requestedRecords = $this->params->getArchiveOnlyReportAsArray(); - $isMissingRequestedRecords = !empty($requestedRecords) && count($requestedRecords) != count($existingArchives ?: []); + $isMissingRequestedRecords = !empty($requestedRecords) && is_array($existingArchives) && count($requestedRecords) != count($existingArchives); if (!empty($idArchives) && !Rules::isActuallyForceArchivingSinglePlugin() From cb1cf08e1e3d513f63a3b76f7d5d1893c8ad6a2c Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 18:41:54 -0800 Subject: [PATCH 19/56] do not archive dependent segments if only processing the specific requested report --- core/ArchiveProcessor.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/ArchiveProcessor.php b/core/ArchiveProcessor.php index 2fa72d2dbbb..1a2bfc1263a 100644 --- a/core/ArchiveProcessor.php +++ b/core/ArchiveProcessor.php @@ -671,6 +671,12 @@ public function processDependentArchive($plugin, $segment) return; } + // range archives are always processed on demand, so pre-processing dependent archives is not required + // here + if (Rules::shouldProcessOnlyReportsRequestedInArchiveQuery($params->getPeriod()->getLabel())) { + return; + } + $idSites = [$params->getSite()->getId()]; // important to use the original segment string when combining. As the API itself would combine the original string. From 21b62d89376bc1b40b219f27ad5d7bf4df7014e9 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 21:59:19 -0800 Subject: [PATCH 20/56] fix more tests --- core/DataAccess/ArchiveSelector.php | 2 +- .../ArchiveProcessor/LoaderTest.php | 118 +++++++-- .../DataAccess/ArchiveSelectorTest.php | 235 ++++++++++++++++-- ...bsiteSeveralDaysDateRangeArchivingTest.php | 9 +- 4 files changed, 328 insertions(+), 36 deletions(-) diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index 2c689c6d29b..aa37771edbc 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -107,7 +107,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $result['idarchive'] = empty($result['idarchive']) ? [] : [$result['idarchive']]; if (!empty($result['partial'])) { - // TODO: test and comment for this + // TODO: comment for this if (!empty($result['idarchive']) || empty($requestedReport) ) { diff --git a/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php b/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php index f7b972ef313..07a4733b6eb 100644 --- a/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php +++ b/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php @@ -1054,7 +1054,22 @@ public function test_loadExistingArchiveIdFromDb_returnsFalsesIfNoArchiveFound() $archiveInfo = $loader->loadExistingArchiveIdFromDb(); - $this->assertEquals([false, false, false, false, false, false], $archiveInfo); + // unset numeric index keys kept for BC + unset($archiveInfo[0]); + unset($archiveInfo[1]); + unset($archiveInfo[2]); + unset($archiveInfo[3]); + unset($archiveInfo[4]); + unset($archiveInfo[5]); + + $this->assertEquals([ + 'idArchives' => false, + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => false, + 'doneFlagValue' => false, + 'existingRecords' => false, + ], $archiveInfo); } /** @@ -1070,17 +1085,48 @@ public function test_loadExistingArchiveIdFromDb_returnsFalsesPeriodIsForcedToAr $archiveInfo = $loader->loadExistingArchiveIdFromDb(); - $this->assertNotEmpty($archiveInfo[4]); - $this->assertLessThanOrEqual(time(), strtotime($archiveInfo[4])); + $this->assertNotEmpty($archiveInfo['tsArchived']); + $this->assertLessThanOrEqual(time(), strtotime($archiveInfo['tsArchived'])); + + unset($archiveInfo['tsArchived']); + + // unset numeric index keys kept for BC + unset($archiveInfo[0]); + unset($archiveInfo[1]); + unset($archiveInfo[2]); + unset($archiveInfo[3]); unset($archiveInfo[4]); - $archiveInfo = array_values($archiveInfo); + unset($archiveInfo[5]); - $this->assertNotEquals([false, false, false, false, false, false], $archiveInfo); + $this->assertEquals([ + 'idArchives' => false, + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => false, + 'doneFlagValue' => false, + 'existingRecords' => false, + ], $archiveInfo); Config::getInstance()->Debug[$configSetting] = 1; $archiveInfo = $loader->loadExistingArchiveIdFromDb(); - $this->assertEquals([false, false, false, false, false, false], $archiveInfo); + + // unset numeric index keys kept for BC + unset($archiveInfo[0]); + unset($archiveInfo[1]); + unset($archiveInfo[2]); + unset($archiveInfo[3]); + unset($archiveInfo[4]); + unset($archiveInfo[5]); + + $this->assertEquals([ + 'idArchives' => false, + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => false, + 'doneFlagValue' => false, + 'existingRecords' => false, + ], $archiveInfo); } public function getTestDataForLoadExistingArchiveIdFromDbDebugConfig() @@ -1103,11 +1149,25 @@ public function test_loadExistingArchiveIdFromDb_returnsArchiveIfArchiveInThePas $archiveInfo = $loader->loadExistingArchiveIdFromDb(); - $this->assertNotEmpty($archiveInfo[4]); + $this->assertNotEmpty($archiveInfo['tsArchived']); + unset($archiveInfo['tsArchived']); + + // unset numeric index keys kept for BC + unset($archiveInfo[0]); + unset($archiveInfo[1]); + unset($archiveInfo[2]); + unset($archiveInfo[3]); unset($archiveInfo[4]); - $archiveInfo = array_values($archiveInfo); + unset($archiveInfo[5]); - $this->assertEquals([['1'], '10', '0', true, '1'], $archiveInfo); + $this->assertEquals([ + 'idArchives' => ['1'], + 'visits' => '10', + 'visitsConverted' => '0', + 'archiveExists' => true, + 'doneFlagValue' => '1', + 'existingRecords' => null, + ], $archiveInfo); } public function test_loadExistingArchiveIdFromDb_returnsArchiveIfForACurrentPeriod_AndNewEnough() @@ -1119,11 +1179,25 @@ public function test_loadExistingArchiveIdFromDb_returnsArchiveIfForACurrentPeri $archiveInfo = $loader->loadExistingArchiveIdFromDb(); - $this->assertNotEmpty($archiveInfo[4]); + $this->assertNotEmpty($archiveInfo['tsArchived']); + unset($archiveInfo['tsArchived']); + + // unset numeric index keys kept for BC + unset($archiveInfo[0]); + unset($archiveInfo[1]); + unset($archiveInfo[2]); + unset($archiveInfo[3]); unset($archiveInfo[4]); - $archiveInfo = array_values($archiveInfo); + unset($archiveInfo[5]); - $this->assertEquals([['1'], '10', '0', true, '1'], $archiveInfo); + $this->assertEquals([ + 'idArchives' => ['1'], + 'visits' => '10', + 'visitsConverted' => '0', + 'archiveExists' => true, + 'doneFlagValue' => '1', + 'existingRecords' => null, + ], $archiveInfo); } public function test_loadExistingArchiveIdFromDb_returnsNoArchiveIfForACurrentPeriod_AndNoneAreNewEnough() @@ -1135,11 +1209,25 @@ public function test_loadExistingArchiveIdFromDb_returnsNoArchiveIfForACurrentPe $archiveInfo = $loader->loadExistingArchiveIdFromDb(); - $this->assertNotEmpty($archiveInfo[4]); + $this->assertNotEmpty($archiveInfo['tsArchived']); + unset($archiveInfo['tsArchived']); + + // unset numeric index keys kept for BC + unset($archiveInfo[0]); + unset($archiveInfo[1]); + unset($archiveInfo[2]); + unset($archiveInfo[3]); unset($archiveInfo[4]); - $archiveInfo = array_values($archiveInfo); + unset($archiveInfo[5]); - $this->assertEquals([false, '10', '0', true, '1'], $archiveInfo); // visits are still returned as this was the original behavior + $this->assertEquals([ + 'idArchives' => false, + 'visits' => '10', + 'visitsConverted' => '0', + 'archiveExists' => true, + 'doneFlagValue' => '1', + 'existingRecords' => null, + ], $archiveInfo); // visits are still returned as this was the original behavior } /** diff --git a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php index 525bbf135b6..747012ad5de 100644 --- a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php +++ b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php @@ -192,7 +192,14 @@ public function test_getArchiveIdAndVisits_returnsCorrectResult($period, $date, } unset($result['tsArchived']); - $result = array_values($result); + + // remove BC indexed values + unset($result[0]); + unset($result[1]); + unset($result[2]); + unset($result[3]); + unset($result[4]); + unset($result[5]); $this->assertEquals($expected, $result); } @@ -212,7 +219,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, true, - [false, false, false, false, false], + ['idArchives' => false, 'visits' => false, 'visitsConverted' => false, 'archiveExists' => false, 'doneFlagValue' => false, 'existingRecords' => null], ], [ 'day', @@ -225,7 +232,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, true, - [false, false, false, false, false], + ['idArchives' => false, 'visits' => false, 'visitsConverted' => false, 'archiveExists' => false, 'doneFlagValue' => false, 'existingRecords' => null], ], // value is not valid @@ -241,7 +248,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, false, false, true, '99'], + ['idArchives' => false, 'visits' => false, 'visitsConverted' => false, 'archiveExists' => true, 'doneFlagValue' => '99', 'existingRecords' => null], ], [ 'day', @@ -257,7 +264,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, 0, 0, true, '99'], + ['idArchives' => false, 'visits' => 0, 'visitsConverted' => 0, 'archiveExists' => true, 'doneFlagValue' => '99', 'existingRecords' => null], ], [ 'day', @@ -270,7 +277,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, 20, 40, true, false], + ['idArchives' => false, 'visits' => 20, 'visitsConverted' => 40, 'archiveExists' => true, 'doneFlagValue' => false, 'existingRecords' => null], ], [ 'day', @@ -283,7 +290,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, 30, 50, true, false], + ['idArchives' => false, 'visits' => 30, 'visitsConverted' => 50, 'archiveExists' => true, 'doneFlagValue' => false, 'existingRecords' => null], ], [ 'day', @@ -297,7 +304,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, false, false, true, false], + ['idArchives' => false, 'visits' => false, 'visitsConverted' => false, 'archiveExists' => true, 'doneFlagValue' => false, 'existingRecords' => null], ], // archive is too old @@ -311,7 +318,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, false, false, true, '1'], + ['idArchives' => false, 'visits' => false, 'visitsConverted' => false, 'archiveExists' => true, 'doneFlagValue' => '1', 'existingRecords' => null], ], [ 'day', @@ -325,7 +332,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, 1, false, true, '1'], + ['idArchives' => false, 'visits' => 1, 'visitsConverted' => false, 'archiveExists' => true, 'doneFlagValue' => '1', 'existingRecords' => null], ], // no archive done flags, but metric @@ -339,7 +346,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, false, false, false, false], + ['idArchives' => false, 'visits' => false, 'visitsConverted' => false, 'archiveExists' => false, 'doneFlagValue' => false, 'existingRecords' => null], ], [ 'day', @@ -352,7 +359,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, false, false, false, false], + ['idArchives' => false, 'visits' => false, 'visitsConverted' => false, 'archiveExists' => false, 'doneFlagValue' => false, 'existingRecords' => null], ], // archive exists and is usable @@ -365,7 +372,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [[1], 0, 0, true, '1'], + ['idArchives' => [1], 'visits' => 0, 'visitsConverted' => 0, 'archiveExists' => true, 'doneFlagValue' => '1', 'existingRecords' => null], ], [ 'day', @@ -378,7 +385,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [[1], 5, 10, true, '1'], + ['idArchives' => [1], 'visits' => 5, 'visitsConverted' => 10, 'archiveExists' => true, 'doneFlagValue' => '1', 'existingRecords' => null], ], // range archive, valid @@ -393,7 +400,7 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [[1], 5, 10, true, '1'], + ['idArchives' => [1], 'visits' => 5, 'visitsConverted' => 10, 'archiveExists' => true, 'doneFlagValue' => '1', 'existingRecords' => null], ], // range archive, invalid @@ -408,7 +415,203 @@ public function getTestDataForGetArchiveIdAndVisits() '', $minDateProcessed, false, - [false, 5, 10, true, '4'], // forcing archiving since invalid + browser archiving of ranges allowed + // forcing archiving since invalid + browser archiving of ranges allowed + ['idArchives' => false, 'visits' => 5, 'visitsConverted' => 10, 'archiveExists' => true, 'doneFlagValue' => '4', 'existingRecords' => null], + ], + ]; + } + + /** + * @dataProvider getTestDataForGetArchiveIdAndVisitsWithOnlyPartialArchives + */ + public function test_getArchiveIdAndVisits_whenThereAreOnlyPartialArchives($archiveRows, $requestedReports, $expected) + { + Fixture::createWebsite('2010-02-02 00:00:00'); + + Rules::setBrowserTriggerArchiving(false); + API::getInstance()->add('test segment', self::TEST_SEGMENT, 0, 0); // processed in real time + + $this->insertArchiveData($archiveRows); + + $params = new \Piwik\ArchiveProcessor\Parameters(new Site(1), Factory::build('range', '2020-03-04,2020-03-08'), new Segment('', [1])); + $params->setRequestedPlugin('TestPlugin'); + $params->setArchiveOnlyReport($requestedReports); + + $result = ArchiveSelector::getArchiveIdAndVisits($params); + + if ($result['tsArchived'] !== false) { + Date::factory($result['tsArchived']); + } + + unset($result['tsArchived']); + + // remove BC indexed values + unset($result[0]); + unset($result[1]); + unset($result[2]); + unset($result[3]); + unset($result[4]); + unset($result[5]); + + $this->assertEquals($expected, $result); + } + + public function getTestDataForGetArchiveIdAndVisitsWithOnlyPartialArchives() + { + // $archiveRows, $plugin, $requestedReports, $expected + return [ + // only partial archives, no requested reports + [ + [ + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf', 'is_blob_data' => true], + + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf2', 'is_blob_data' => true], + + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf3', 'is_blob_data' => true], + ], + null, + [ + 'idArchives' => [1], + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => true, + 'doneFlagValue' => false, + 'existingRecords' => null, + ], + ], + + // only partial archives, requested reports, no existing reports + [ + [ + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf', 'is_blob_data' => true], + + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf2', 'is_blob_data' => true], + + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf3', 'is_blob_data' => true], + ], + 'TestPlugin_otherMetric', + [ + 'idArchives' => false, + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => true, + 'doneFlagValue' => false, + 'existingRecords' => [], + ], + ], + + // only partial archives, requested reports, some existing reports (both numeric and blob) + [ + [ + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 1', 'is_blob_data' => true], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric2', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 2', 'is_blob_data' => true], + + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 3', 'is_blob_data' => true], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric2', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 4', 'is_blob_data' => true], + + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 5', 'is_blob_data' => true], + ['idarchive' => 3, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric2', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 6', 'is_blob_data' => true], + ], + ['TestPlugin_metric', 'TestPlugin_blob'], + [ + 'idArchives' => [1], + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => true, + 'doneFlagValue' => false, + 'existingRecords' => ['TestPlugin_metric', 'TestPlugin_blob'], + ], + ], + + // only partial archives, requested reports, all existing reports (both numeric and blob) + [ + [ + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf', 'is_blob_data' => true], + + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf2', 'is_blob_data' => true], + + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf3', 'is_blob_data' => true], + ], + ['TestPlugin_metric', 'TestPlugin_blob'], + [ + 'idArchives' => [1], + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => true, + 'doneFlagValue' => false, + 'existingRecords' => ['TestPlugin_metric', 'TestPlugin_blob'], + ], + ], + + // only partial archives, requested reports, some existing reports (both numeric and blob) across multiple partial archives + [ + [ + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5], + + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 1', 'is_blob_data' => true], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric2', 'value' => 5], + + ['idarchive' => 3, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 2', 'is_blob_data' => true], + + ['idarchive' => 4, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 4, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric', 'value' => 5], + + ['idarchive' => 5, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 5, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 3', 'is_blob_data' => true], + + ['idarchive' => 6, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 6, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric2', 'value' => 5], + ['idarchive' => 6, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 4', 'is_blob_data' => true], + + ['idarchive' => 7, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 7, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 7, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 5', 'is_blob_data' => true], + + ['idarchive' => 8, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 8, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric2', 'value' => 5], + + ['idarchive' => 9, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 9, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 6', 'is_blob_data' => true], + ], + ['TestPlugin_metric', 'TestPlugin_blob', 'TestPlugin_blob5'], + [ + 'idArchives' => [3, 2, 1], + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => true, + 'doneFlagValue' => false, + 'existingRecords' => ['TestPlugin_metric', 'TestPlugin_blob'], + ], ], ]; } diff --git a/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php b/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php index baf0fd2fb56..5f10dc427ee 100644 --- a/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php +++ b/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php @@ -138,13 +138,14 @@ public function test_checkArchiveRecords_whenPeriodIsRange() * and VisitFrequency creates two more. * * So each period=range will have = 11 records + (5 metrics + 2 flags // VisitsSummary + 3 metrics // VisitorInterest) - * = 18 * - * Total expected records = count unique archives * records per archive + * Total expected records = count unique segments * records per segment * = 3 * 21 - * = 21 + * = 63 + * + * + 3 * 10 extra done flags for partial archives since range archives only archive single reports */ - 'archive_numeric_2010_12' => 63, + 'archive_numeric_2010_12' => 93, /** * In the January date range, From 99599e934bcdd2da2b583e81daf9b2b96808ef69 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 22:14:07 -0800 Subject: [PATCH 21/56] fix LoaderTest --- .../Integration/ArchiveProcessor/LoaderTest.php | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php b/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php index 07a4733b6eb..e65e67f1cb1 100644 --- a/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php +++ b/tests/PHPUnit/Integration/ArchiveProcessor/LoaderTest.php @@ -1068,7 +1068,8 @@ public function test_loadExistingArchiveIdFromDb_returnsFalsesIfNoArchiveFound() 'visitsConverted' => false, 'archiveExists' => false, 'doneFlagValue' => false, - 'existingRecords' => false, + 'tsArchived' => false, + 'existingRecords' => null, ], $archiveInfo); } @@ -1099,12 +1100,12 @@ public function test_loadExistingArchiveIdFromDb_returnsFalsesPeriodIsForcedToAr unset($archiveInfo[5]); $this->assertEquals([ - 'idArchives' => false, - 'visits' => false, - 'visitsConverted' => false, - 'archiveExists' => false, - 'doneFlagValue' => false, - 'existingRecords' => false, + 'idArchives' => [1], + 'visits' => 10, + 'visitsConverted' => 0, + 'archiveExists' => true, + 'doneFlagValue' => 1, + 'existingRecords' => null, ], $archiveInfo); Config::getInstance()->Debug[$configSetting] = 1; @@ -1125,7 +1126,7 @@ public function test_loadExistingArchiveIdFromDb_returnsFalsesPeriodIsForcedToAr 'visitsConverted' => false, 'archiveExists' => false, 'doneFlagValue' => false, - 'existingRecords' => false, + 'tsArchived' => false, ], $archiveInfo); } From 46abe79e0e250a1f6e742da516ad1135ba87efea Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 4 Mar 2023 23:20:53 -0800 Subject: [PATCH 22/56] make sure if archiving specific reports for a single plugin that archiver class instances will not be created --- core/ArchiveProcessor/PluginsArchiver.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/ArchiveProcessor/PluginsArchiver.php b/core/ArchiveProcessor/PluginsArchiver.php index b62c91eec2c..ed7e24c4514 100644 --- a/core/ArchiveProcessor/PluginsArchiver.php +++ b/core/ArchiveProcessor/PluginsArchiver.php @@ -137,7 +137,19 @@ public function callAggregateAllPlugins($visits, $visitsConverted, $forceArchivi $archivers = static::getPluginArchivers(); + $archiveOnlyPlugin = $this->params->getRequestedPlugin(); + $archiveOnlyReports = $this->params->getArchiveOnlyReport(); + foreach ($archivers as $pluginName => $archiverClass) { + // if we are archiving specific reports for a single plugin then we don't need or want to create + // Archiver instances, since they will set the archive to partial even if the requested reports aren't + // handled by the Archiver + if (!empty($archiveOnlyReports) + && $archiveOnlyPlugin != $pluginName + ) { + continue; + } + // We clean up below all tables created during this function call (and recursive calls) $latestUsedTableId = Manager::getInstance()->getMostRecentTableId(); From 4c2701ba68da4de360f6f75bf94348841ff3cbde Mon Sep 17 00:00:00 2001 From: diosmosis Date: Wed, 15 Mar 2023 00:40:11 -0700 Subject: [PATCH 23/56] add filterRecordBuilders event --- core/Plugin/Archiver.php | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 2343cc6579a..bc1d32fa511 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -133,11 +133,30 @@ private function getRecordBuilders() * $recordBuilders[] = new MyParameterizedRecordBuilder($idOfThingToArchiveFor); * } * - * @param ArchiveProcessor\RecordBuilder[] $reports An array of dimensions + * @param ArchiveProcessor\RecordBuilder[] $recordBuilders An array of RecordBuilder instances * @api */ Piwik::postEvent('Archiver.addRecordBuilders', [&$recordBuilders]); + /** + * Triggered to filter / restrict reports. + * + * **Example** + * + * public function filterRecordBuilders(&$recordBuilders) + * { + * foreach ($reports as $index => $recordBuilder) { + * if ($recordBuilders instanceof AnotherPluginRecordBuilder) { + * unset($reports[$index]); + * } + * } + * } + * + * @param ArchiveProcessor\RecordBuilder[] $recordBuilders An array of RecordBuilder instances + * @api + */ + Piwik::postEvent('Archiver.filterRecordBuilders', [&$recordBuilders]); + $transientCache->save($cacheKey, $recordBuilders); } From c817f790a8cb918d0acbce7ecc62d179053ce205 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Wed, 15 Mar 2023 01:59:03 -0700 Subject: [PATCH 24/56] if it looks like the requested records are numeric, prioritize the numeric archive table, otherwise blob archive table --- core/DataAccess/Model.php | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/core/DataAccess/Model.php b/core/DataAccess/Model.php index db346f20505..ff3304f3ab9 100644 --- a/core/DataAccess/Model.php +++ b/core/DataAccess/Model.php @@ -973,7 +973,7 @@ public function resetFailedArchivingJobs() return $query->rowCount(); } - public function getRecordsContainedInArchives(Date $archiveStartDate, $idArchives, $requestedRecords) + public function getRecordsContainedInArchives(Date $archiveStartDate, array $idArchives, $requestedRecords): array { $idArchives = array_map('intval', $idArchives); $idArchives = implode(',', $idArchives); @@ -986,11 +986,23 @@ public function getRecordsContainedInArchives(Date $archiveStartDate, $idArchive $numericTable = ArchiveTableCreator::getNumericTable($archiveStartDate); $blobTable = ArchiveTableCreator::getBlobTable($archiveStartDate); + // if the requested metrics look numeric, prioritize the numeric table, otherwise the blob table. this way, if all the metrics are + // found in this table (which will be most of the time), we don't have to query the other table + if ($this->doRequestedRecordsLookNumeric($requestedRecords)) { + $tablesToSearch = [$numericTable, $blobTable]; + } else { + $tablesToSearch = [$blobTable, $numericTable]; + } + $existingRecords = []; - foreach ([$numericTable, $blobTable] as $tableName) { + foreach ($tablesToSearch as $tableName) { $sql = sprintf($countSql, $tableName); $rows = Db::fetchAll($sql, $requestedRecords); $existingRecords = array_merge($existingRecords, array_column($rows, 'name')); + + if (count($existingRecords) == count($requestedRecords)) { + break; + } } return $existingRecords; } @@ -1006,4 +1018,14 @@ private function getHashFromDoneFlag($doneFlag) preg_match('/^done([a-zA-Z0-9]+)/', $doneFlag, $matches); return $matches[1] ?? ''; } + + private function doRequestedRecordsLookNumeric(array $requestedRecords): bool + { + foreach ($requestedRecords as $record) { + if (preg_match('/^nb_/', $record)) { + return true; + } + } + return false; + } } From d18b61e02c8d97045dd967f2f03d2904794414dc Mon Sep 17 00:00:00 2001 From: diosmosis Date: Wed, 15 Mar 2023 02:00:05 -0700 Subject: [PATCH 25/56] fix copy-paste error --- core/ArchiveProcessor/Loader.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ArchiveProcessor/Loader.php b/core/ArchiveProcessor/Loader.php index 58d85cba463..4d9ed0416df 100644 --- a/core/ArchiveProcessor/Loader.php +++ b/core/ArchiveProcessor/Loader.php @@ -152,7 +152,7 @@ private function prepareArchiveImpl($pluginName) return $data; } - list($idArchives, $visits, $visitsConverted, $existingArchives) = $data; + list($idArchives, $visits, $visitsConverted, $foundRecords) = $data; return $this->insertArchiveData($visits, $visitsConverted, $idArchives, $foundRecords); } finally { From f6714dcddfdf8e9284d3736fdd08e693961eaac6 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Wed, 15 Mar 2023 17:41:46 -0700 Subject: [PATCH 26/56] add dummy test for numeric values --- tests/PHPUnit/Integration/Archive/PartialArchiveTest.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php b/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php index 2c32f4eb48e..d25ac2ac99c 100644 --- a/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php +++ b/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php @@ -36,7 +36,12 @@ protected static function beforeTableDataCached() self::trackVisit(); } - public function test_rangeArchiving_onlyArchivesSingleRecord() + public function test_rangeArchiving_onlyArchivesSingleRecord_whenQueryingNumerics() + { + // TODO + } + + public function test_rangeArchiving_onlyArchivesSingleRecord_whenQueryingBlobs() { // first trigger all plugins archiving $_GET['trigger'] = 'archivephp'; From 52c4e932858ca666ade3e1287065914b7d53cc5a Mon Sep 17 00:00:00 2001 From: diosmosis Date: Wed, 15 Mar 2023 20:52:51 -0700 Subject: [PATCH 27/56] add test for partial archiving of numeric records for ranges and fix typo causing this to fail --- core/ArchiveProcessor/RecordBuilder.php | 26 ++++++- .../Archive/PartialArchiveTest.php | 73 +++++++++++++++++-- 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 9f60ffab787..a962f803f68 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -71,12 +71,25 @@ public function build(ArchiveProcessor $archiveProcessor) return; } + $recordsBuilt = $this->getRecordMetadata($archiveProcessor); + + $recordMetadataByName = []; + foreach ($recordsBuilt as $recordMetadata) { + $recordMetadataByName[$recordMetadata->getName()] = $recordMetadata; + } + $numericRecords = []; $records = $this->aggregate($archiveProcessor); foreach ($records as $recordName => $recordValue) { if ($recordValue instanceof DataTable) { - $this->insertRecord($archiveProcessor, $recordName, $recordValue); + $record = $recordMetadataByName[$recordName]; + + $maxRowsInTable = $record->getMaxRowsInTable() ?? $this->maxRowsInTable; + $maxRowsInSubtable = $record->getMaxRowsInSubtable() ?? $this->maxRowsInSubtable; + $columnToSortByBeforeTruncation = $record->getColumnToSortByBeforeTruncation() ?? $this->columnToSortByBeforeTruncation; + + $this->insertRecord($archiveProcessor, $recordName, $recordValue, $maxRowsInTable, $maxRowsInSubtable, $columnToSortByBeforeTruncation); Common::destroy($recordValue); unset($recordValue); @@ -130,7 +143,7 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) if (!empty($numericRecords)) { $numericMetrics = array_map(function (Record $r) { return $r->getName(); }, $numericRecords); - if (!empty($requestedReport)) { + if (!empty($requestedReports)) { $numericMetrics = array_filter($numericMetrics, function ($name) use ($requestedReports, $foundRequestedReports) { return in_array($name, $requestedReports) && !in_array($name, $foundRequestedReports); }); @@ -157,9 +170,14 @@ public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor); */ protected abstract function aggregate(ArchiveProcessor $archiveProcessor); - private function insertRecord(ArchiveProcessor $archiveProcessor, $recordName, DataTable\DataTableInterface $record) + private function insertRecord(ArchiveProcessor $archiveProcessor, $recordName, DataTable\DataTableInterface $record, + $maxRowsInTable, $maxRowsInSubtable, $columnToSortByBeforeTruncation) { - $serialized = $record->getSerialized($this->maxRowsInTable, $this->maxRowsInSubtable, $this->columnToSortByBeforeTruncation); + $serialized = $record->getSerialized( + $maxRowsInTable ?: $this->maxRowsInTable, + $maxRowsInSubtable ?: $this->maxRowsInSubtable, + $columnToSortByBeforeTruncation ?: $this->columnToSortByBeforeTruncation + ); $archiveProcessor->insertBlobRecord($recordName, $serialized); unset($serialized); } diff --git a/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php b/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php index d25ac2ac99c..d64c567483e 100644 --- a/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php +++ b/tests/PHPUnit/Integration/Archive/PartialArchiveTest.php @@ -24,7 +24,6 @@ */ class PartialArchiveTest extends IntegrationTestCase { - private static $chrome = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'; private static $dateTime = '2020-04-07 10:03:04'; private $idGoal = 1; @@ -38,7 +37,71 @@ protected static function beforeTableDataCached() public function test_rangeArchiving_onlyArchivesSingleRecord_whenQueryingNumerics() { - // TODO + // first trigger all plugins archiving + $_GET['trigger'] = 'archivephp'; + GoalsApi::getInstance()->get(1, 'day', '2020-04-07', false, $this->idGoal); // day first + $data = GoalsApi::getInstance()->get(1, 'range', '2020-04-06,2020-04-09', false, $this->idGoal); + $this->assertEquals([ + 'nb_conversions' => 1, + 'nb_visits_converted' => 1, + 'revenue' => 0, + 'conversion_rate' => 'Intl_NumberFormatPercent', + 'nb_conversions_new_visit' => 1, + 'nb_visits_converted_new_visit' => 1, + 'revenue_new_visit' => 0, + 'conversion_rate_new_visit' => 'Intl_NumberFormatPercent', + 'nb_conversions_returning_visit' => 0, + 'nb_visits_converted_returning_visit' => 0, + 'revenue_returning_visit' => 0, + 'conversion_rate_returning_visit' => 'Intl_NumberFormatPercent', + ], $data->getFirstRow()->getColumns()); + + // check archive is all plugins archive as expected + [$idArchives, $archiveInfo] = $this->getArchiveInfo('2020_04', 5, false); + $this->assertEquals([ + ['idsite' => 1, 'date1' => '2020-04-06', 'date2' => '2020-04-09', 'period' => 5, 'name' => 'done', 'value' => 1, 'blob_count' => 57], + ], $archiveInfo); + + $maxIdArchive = $this->getMaxIdArchive('2020_04'); + + self::trackAnotherVisit(); + + // trigger browser archiving for range + GoalsApi::getInstance()->get(1, 'day', '2020-04-08', false, $this->idGoal); // day first + unset($_GET['trigger']); + StaticContainer::get(ArchiveInvalidator::class)->markArchivesAsInvalidated([1], ['2020-04-06,2020-04-09'], 'range'); + $data = GoalsApi::getInstance()->get(1, 'range', '2020-04-06,2020-04-09', false, $this->idGoal); + $this->assertEquals([ + 'nb_conversions' => 2, + 'nb_visits_converted' => 2, + 'revenue' => 0, + 'conversion_rate' => 'Intl_NumberFormatPercent', + 'nb_conversions_new_visit' => 2, + 'nb_visits_converted_new_visit' => 2, + 'revenue_new_visit' => 0, + 'conversion_rate_new_visit' => 'Intl_NumberFormatPercent', + 'nb_conversions_returning_visit' => 0, + 'nb_visits_converted_returning_visit' => 0, + 'revenue_returning_visit' => 0, + 'conversion_rate_returning_visit' => 'Intl_NumberFormatPercent', + ], $data->getFirstRow()->getColumns()); + + [$idArchives, $archiveInfo] = $this->getArchiveInfo('2020_04', 5, false, $maxIdArchive); + + $archiveNames = $this->getArchiveNames('2020_04', $idArchives[0], 'numeric'); + $this->assertEquals([ + 'Goal_1_nb_conversions', + 'Goal_1_nb_visits_converted', + ], $archiveNames); + + $blobArchiveNames = $this->getArchiveNames('2020_04', $idArchives[0]); + $this->assertEquals([], $blobArchiveNames); + + $this->assertEquals([ + // expect only one blob for new range partial archive + ['idsite' => 1, 'date1' => '2020-04-06', 'date2' => '2020-04-09', 'period' => 5, 'name' => 'done.Goals', 'value' => 5, 'blob_count' => 0], + ['idsite' => 1, 'date1' => '2020-04-06', 'date2' => '2020-04-09', 'period' => 5, 'name' => 'done.VisitsSummary', 'value' => 1, 'blob_count' => 0], + ], $archiveInfo); } public function test_rangeArchiving_onlyArchivesSingleRecord_whenQueryingBlobs() @@ -122,10 +185,10 @@ private function getArchiveInfo($yearMonth, $period, $segmentHash = false, $idAr return [$idArchives, $archiveNumericInfo]; } - private function getArchiveNames($yearMonth, $idArchive) + private function getArchiveNames($yearMonth, $idArchive, $type = 'blob') { - $sql = 'SELECT DISTINCT name FROM ' . Common::prefixTable('archive_blob_' . $yearMonth) - . ' WHERE idarchive = ?'; + $sql = 'SELECT DISTINCT name FROM ' . Common::prefixTable('archive_' . $type . '_' . $yearMonth) + . ' WHERE idarchive = ? AND name NOT LIKE \'done%\''; $rows = Db::fetchAll($sql, [$idArchive]); $rows = array_column($rows, 'name'); return $rows; From ce8f7a013d63fbb5eb70dbeba52fcfbbaf1ac6ab Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 20 Apr 2023 04:42:17 -0700 Subject: [PATCH 28/56] lessen code redundancy in Archive.php, use Piwik\\Request and do not yet mark RecordBuilder as api --- core/Archive.php | 20 +++++++++++--------- core/ArchiveProcessor/RecordBuilder.php | 6 ------ 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/core/Archive.php b/core/Archive.php index 0722505222b..0c4cd0309a3 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -745,11 +745,7 @@ private function cacheArchiveIdsWithoutLaunching($plugins) */ private function getDoneStringForPlugin($plugin, $idSites) { - // TODO: code redundancy w/ cacheArchiveIdsAfterLaunching - $requestedReport = null; - if (SettingsServer::isArchivePhpTriggered()) { - $requestedReport = Common::getRequestVar('requestedReport', '', 'string'); - } + $requestedReport = $this->getRequestedReport(); $shouldOnlyProcessRequestedRecords = empty($requestedReport) && Rules::shouldProcessOnlyReportsRequestedInArchiveQuery($this->getPeriodLabel()); @@ -903,10 +899,7 @@ private function prepareArchive(array $archiveNamesByPlugin, Site $site, Period { $coreAdminHomeApi = API::getInstance(); - $requestedReport = null; - if (SettingsServer::isArchivePhpTriggered()) { - $requestedReport = Common::getRequestVar('requestedReport', '', 'string'); - } + $requestedReport = $this->getRequestedReport(); $shouldOnlyProcessRequestedArchives = empty($requestedReport) && Rules::shouldProcessOnlyReportsRequestedInArchiveQuery($period->getLabel()); @@ -973,4 +966,13 @@ public function forceFetchingWithoutLaunchingArchiving() { $this->forceFetchingWithoutLaunchingArchiving = true; } + + private function getRequestedReport(): string + { + $requestedReport = null; + if (SettingsServer::isArchivePhpTriggered()) { + $requestedReport = Request::fromRequest()->getStringParameter('requestedReport', ''); + } + return $requestedReport; + } } diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index a962f803f68..72581c0dff4 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -14,8 +14,6 @@ /** * Inherit from this class to define archiving logic for one or more records. - * - * @since 5.0.0 */ abstract class RecordBuilder { @@ -49,7 +47,6 @@ abstract class RecordBuilder * @param int|null $maxRowsInSubtable * @param string|int|null $columnToSortByBeforeTruncation * @param array|null $columnAggregationOps - * @api */ public function __construct($maxRowsInTable = null, $maxRowsInSubtable = null, $columnToSortByBeforeTruncation = null, $columnAggregationOps = null) @@ -157,7 +154,6 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) * record your RecordBuilder creates should have an associated piece of record metadata. * * @return Record[] - * @api */ public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor); @@ -166,7 +162,6 @@ public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor); * to store indexed by record names. * * @return (DataTable|int|float|string)[] Record values indexed by their record name, eg, `['MyPlugin_MyRecord' => new DataTable()]` - * @api */ protected abstract function aggregate(ArchiveProcessor $archiveProcessor); @@ -211,7 +206,6 @@ public function getPluginName() * like the origin hint to have more information. * * @return string - * @api */ public function getQueryOriginHint() { From 5b719b1bfc0d583c4456d2e5049c467d0c49b4e7 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 20 Apr 2023 04:49:11 -0700 Subject: [PATCH 29/56] fix type hint --- core/Archive.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Archive.php b/core/Archive.php index 0c4cd0309a3..921d543576f 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -967,7 +967,7 @@ public function forceFetchingWithoutLaunchingArchiving() $this->forceFetchingWithoutLaunchingArchiving = true; } - private function getRequestedReport(): string + private function getRequestedReport(): ?string { $requestedReport = null; if (SettingsServer::isArchivePhpTriggered()) { From 8b48227b0501884dddffc179a700ad7311c014dc Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 20 Apr 2023 05:08:19 -0700 Subject: [PATCH 30/56] fix php-cs errors --- plugins/Goals/Archiver.php | 10 ---------- plugins/Goals/Reports/GetVisitsUntilConversion.php | 1 - 2 files changed, 11 deletions(-) diff --git a/plugins/Goals/Archiver.php b/plugins/Goals/Archiver.php index d538bc62551..bdd913a62d4 100644 --- a/plugins/Goals/Archiver.php +++ b/plugins/Goals/Archiver.php @@ -9,16 +9,6 @@ namespace Piwik\Plugins\Goals; -use Piwik\ArchiveProcessor; -use Piwik\Common; -use Piwik\Config; -use Piwik\DataAccess\LogAggregator; -use Piwik\DataArray; -use Piwik\DataTable; -use Piwik\Metrics; -use Piwik\Plugin\Manager; -use Piwik\Site; -use Piwik\Tracker\GoalManager; use Piwik\Plugins\VisitFrequency\API as VisitFrequencyAPI; class Archiver extends \Piwik\Plugin\Archiver diff --git a/plugins/Goals/Reports/GetVisitsUntilConversion.php b/plugins/Goals/Reports/GetVisitsUntilConversion.php index 8b5933bd624..2ae3deaebc3 100644 --- a/plugins/Goals/Reports/GetVisitsUntilConversion.php +++ b/plugins/Goals/Reports/GetVisitsUntilConversion.php @@ -11,7 +11,6 @@ use Piwik\Piwik; use Piwik\Plugin\ViewDataTable; use Piwik\Plugins\Goals\Columns\VisitsUntilConversion; -use Piwik\Plugins\Goals\Archiver; use Piwik\Plugins\Goals\RecordBuilders\GeneralGoalsRecords; class GetVisitsUntilConversion extends Base From 16c58573605e25a50cfae21476dbc3dea5be2d12 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 20 Apr 2023 06:02:09 -0700 Subject: [PATCH 31/56] fix failing tests --- .../DataAccess/ArchiveSelectorTest.php | 14 +++++++++ ...bsiteSeveralDaysDateRangeArchivingTest.php | 29 +++++++------------ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php index 7759b64576a..e4d184b3590 100644 --- a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php +++ b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php @@ -201,6 +201,13 @@ public function test_getArchiveIdAndVisits_returnsCorrectResult($period, $date, unset($result[4]); unset($result[5]); + if (isset($result['existingRecords'])) { + sort($result['existingRecords']); + } + if (isset($expected['existingRecords'])) { + sort($expected['existingRecords']); + } + $this->assertEquals($expected, $result); } @@ -453,6 +460,13 @@ public function test_getArchiveIdAndVisits_whenThereAreOnlyPartialArchives($arch unset($result[4]); unset($result[5]); + if (isset($result['existingRecords'])) { + sort($result['existingRecords']); + } + if (isset($expected['existingRecords'])) { + sort($expected['existingRecords']); + } + $this->assertEquals($expected, $result); } diff --git a/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php b/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php index 5f10dc427ee..d1679ba6363 100644 --- a/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php +++ b/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php @@ -125,25 +125,18 @@ public function test_checkArchiveRecords_whenPeriodIsRange() + 2 /* VisitTime */) * 3, /** - * In Each "Period=range" Archive, we expect following non zero numeric entries: - * 5 metrics + 1 flag // VisitsSummary - * + 2 metrics + 1 flag // Actions - * + 1 flag // Resolution - * + 1 flag // VisitTime - * = 11 + * segments: 9 (including all visits) + * plugins: 4 different plugins + * VisitsSummary: 9 archives (8 segments + all visits) (4 metrics in each + 3 bounce_counts across 3 archives) + * Actions: 3 archives (2 segments + all visits) (2 metrics in each) + * Resolution: 3 archives (2 segments + all visits) (0 metrics in each) + * VisitTime: 3 archives (2 segments + all visits) (0 metrics in each) * - * because we call VisitFrequency.get, this creates archives for visitorType==returning - * and visitorType==new segment. - * -> There are two archives for each segment (one for "countryCode!=aa" - * and VisitFrequency creates two more. - * - * So each period=range will have = 11 records + (5 metrics + 2 flags // VisitsSummary + 3 metrics // VisitorInterest) - * - * Total expected records = count unique segments * records per segment - * = 3 * 21 - * = 63 - * - * + 3 * 10 extra done flags for partial archives since range archives only archive single reports + * Total: 9 VisitsSummary done flags + ((4 * 9) + 3) VisitsSummary metrics + * + 3 Actions done flags + 3 * 2 Actions metrics + * + 3 Resolution done flags + * + 3 VisitTime done flags + * = 63 */ 'archive_numeric_2010_12' => 93, From 0d5bd3d380d8e2c433c4a821b8e4cc9eb2dcd5ab Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 20 Apr 2023 06:16:15 -0700 Subject: [PATCH 32/56] fix failing tests (really) --- .../OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php b/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php index d1679ba6363..1b0fba5e356 100644 --- a/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php +++ b/tests/PHPUnit/System/OneVisitorOneWebsiteSeveralDaysDateRangeArchivingTest.php @@ -138,7 +138,7 @@ public function test_checkArchiveRecords_whenPeriodIsRange() * + 3 VisitTime done flags * = 63 */ - 'archive_numeric_2010_12' => 93, + 'archive_numeric_2010_12' => 63, /** * In the January date range, From 81e9ead6c88f4151c5e00bbaca070899caa745c7 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 11 May 2023 13:55:37 -0700 Subject: [PATCH 33/56] fix isEnabled calls --- core/Plugin/Archiver.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index bc1d32fa511..c139eea0bd7 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -187,7 +187,7 @@ final public function callAggregateDayReport() foreach ($recordBuilders as $recordBuilder) { if ($recordBuilder->getPluginName() != $pluginName - || !$recordBuilder->isEnabled() + || !$recordBuilder->isEnabled($this->getProcessor()) ) { continue; } @@ -229,7 +229,7 @@ final public function callAggregateMultipleReports() $recordBuilders = $this->getRecordBuilders(); foreach ($recordBuilders as $recordBuilder) { if ($recordBuilder->getPluginName() != $pluginName - || !$recordBuilder->isEnabled() + || !$recordBuilder->isEnabled($this->getProcessor()) ) { continue; } From 94b98da9bcaa2269daa2f2b40b3d3b3371e17bd9 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 11 May 2023 16:40:15 -0700 Subject: [PATCH 34/56] only add idarchive to Archive.php idarchive cache if it is not already there (makes debugging a little less confusing) --- core/Archive.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/Archive.php b/core/Archive.php index 921d543576f..265b7807a9d 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -928,7 +928,11 @@ private function prepareArchive(array $archiveNamesByPlugin, Site $site, Period && !empty($prepareResult['idarchives']) ) { foreach ($prepareResult['idarchives'] as $idArchive) { - $this->idarchives[$doneFlag][$periodString][] = $idArchive; + if (empty($this->idarchives[$doneFlag][$periodString]) + || !in_array($idArchive, $this->idarchives[$doneFlag][$periodString]) + ) { + $this->idarchives[$doneFlag][$periodString][] = $idArchive; + } } } } From 63146a3ad39250f167763bcc91dea708fc50e3f7 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 11 May 2023 16:40:32 -0700 Subject: [PATCH 35/56] remove unneeded TODO --- core/ArchiveProcessor/RecordBuilder.php | 1 - 1 file changed, 1 deletion(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index b0cc17db702..a43f9d64aae 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -116,7 +116,6 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) $numericRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_NUMERIC; }); $blobRecords = array_filter($recordsBuilt, function (Record $r) { return $r->getType() == Record::TYPE_BLOB; }); - // TODO: dependsOn in record metadata foreach ($blobRecords as $record) { if (!empty($requestedReports) && !in_array($record->getName(), $requestedReports) From 6b09b25eb64fcfc41879541806ec1f4e36c6767e Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 11 May 2023 16:41:10 -0700 Subject: [PATCH 36/56] when forcing new archive because timestamp is too old, do not report any existing archives --- core/DataAccess/ArchiveSelector.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index 06cde78fd96..fd898043133 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -151,7 +151,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params 'archiveExists' => true, 'tsArchived' => $tsArchived, 'doneFlagValue' => $value, - 'existingRecords' => $existingRecords, + 'existingRecords' => [], ]); } From 23e2e371189c4b65ea9ebff64a6bd437ad398c55 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 11 May 2023 18:09:55 -0700 Subject: [PATCH 37/56] report no existing archives if done flag is different + add tests --- core/DataAccess/ArchiveSelector.php | 4 +- .../DataAccess/ArchiveSelectorTest.php | 50 ++++++++++++++++--- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index fd898043133..9ee1819efed 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -131,7 +131,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params 'archiveExists' => true, 'tsArchived' => $tsArchived, 'doneFlagValue' => $value, - 'existingRecords' => $existingRecords, + 'existingRecords' => null, ]); } @@ -151,7 +151,7 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params 'archiveExists' => true, 'tsArchived' => $tsArchived, 'doneFlagValue' => $value, - 'existingRecords' => [], + 'existingRecords' => null, ]); } diff --git a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php index e4d184b3590..f2c6cc212da 100644 --- a/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php +++ b/tests/PHPUnit/Integration/DataAccess/ArchiveSelectorTest.php @@ -431,7 +431,7 @@ public function getTestDataForGetArchiveIdAndVisits() /** * @dataProvider getTestDataForGetArchiveIdAndVisitsWithOnlyPartialArchives */ - public function test_getArchiveIdAndVisits_whenThereAreOnlyPartialArchives($archiveRows, $requestedReports, $expected) + public function test_getArchiveIdAndVisits_whenThereAreOnlyPartialArchives($archiveRows, $requestedReports, $expected, $minDatetimeArchiveProcessedUTC = false) { Fixture::createWebsite('2010-02-02 00:00:00'); @@ -444,7 +444,7 @@ public function test_getArchiveIdAndVisits_whenThereAreOnlyPartialArchives($arch $params->setRequestedPlugin('TestPlugin'); $params->setArchiveOnlyReport($requestedReports); - $result = ArchiveSelector::getArchiveIdAndVisits($params); + $result = ArchiveSelector::getArchiveIdAndVisits($params, $minDatetimeArchiveProcessedUTC); if ($result['tsArchived'] !== false) { Date::factory($result['tsArchived']); @@ -522,18 +522,18 @@ public function getTestDataForGetArchiveIdAndVisitsWithOnlyPartialArchives() 'visitsConverted' => false, 'archiveExists' => true, 'doneFlagValue' => false, - 'existingRecords' => [], + 'existingRecords' => null, ], ], // only partial archives, requested reports, some existing reports (both numeric and blob) [ [ - ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5], - ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5], - ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 1', 'is_blob_data' => true], - ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric2', 'value' => 5], - ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 2', 'is_blob_data' => true], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 1', 'is_blob_data' => true, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric2', 'value' => 5, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 2', 'is_blob_data' => true, 'ts_archived' => '2020-03-08 03:00:00'], ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric', 'value' => 5], @@ -556,6 +556,40 @@ public function getTestDataForGetArchiveIdAndVisitsWithOnlyPartialArchives() 'doneFlagValue' => false, 'existingRecords' => ['TestPlugin_metric', 'TestPlugin_blob'], ], + '2020-03-08 00:00:00', + ], + + // only partial archives, requested reports, some existing reports (both numeric and blob), but archive is too old + [ + [ + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'done.TestPlugin', 'value' => 5, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric', 'value' => 5, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 1', 'is_blob_data' => true, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_metric2', 'value' => 5, 'ts_archived' => '2020-03-08 03:00:00'], + ['idarchive' => 1, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-04', 'date2' => '2020-03-08', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 2', 'is_blob_data' => true, 'ts_archived' => '2020-03-08 03:00:00'], + + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 3', 'is_blob_data' => true], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric2', 'value' => 5], + ['idarchive' => 2, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 4', 'is_blob_data' => true], + + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'done.TestPlugin', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_metric', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04', 'name' => 'TestPlugin_blob', 'value' => 'slkdjf 5', 'is_blob_data' => true], + ['idarchive' => 3, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_metric2', 'value' => 5], + ['idarchive' => 3, 'idsite' => 1, 'period' => 5, 'date1' => '2020-03-03', 'date2' => '2020-03-09', 'name' => 'TestPlugin_blob2', 'value' => 'slkdjf 6', 'is_blob_data' => true], + ], + ['TestPlugin_metric', 'TestPlugin_blob'], + [ + 'idArchives' => false, + 'visits' => false, + 'visitsConverted' => false, + 'archiveExists' => true, + 'doneFlagValue' => false, + 'existingRecords' => null, + ], + '2020-03-08 09:00:00', ], // only partial archives, requested reports, all existing reports (both numeric and blob) From eea32113a8cfb39a8b4d13c416c6702da5ec020b Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 14 May 2023 09:05:03 -0700 Subject: [PATCH 38/56] remove unneeded unset --- core/ArchiveProcessor/RecordBuilder.php | 1 - 1 file changed, 1 deletion(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index a43f9d64aae..9288696bff4 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -89,7 +89,6 @@ public function build(ArchiveProcessor $archiveProcessor) $this->insertRecord($archiveProcessor, $recordName, $recordValue, $maxRowsInTable, $maxRowsInSubtable, $columnToSortByBeforeTruncation); Common::destroy($recordValue); - unset($recordValue); } else { // collect numeric records so we can insert them all at once $numericRecords[$recordName] = $recordValue; From 6c610fcdd9be1b41cb7dbb6ccd2a245cd088126e Mon Sep 17 00:00:00 2001 From: sgiehl Date: Mon, 15 May 2023 11:28:00 +0200 Subject: [PATCH 39/56] fix phpcs --- plugins/Goals/RecordBuilders/Base.php | 2 +- plugins/Goals/RecordBuilders/GeneralGoalsRecords.php | 2 +- plugins/Goals/RecordBuilders/ProductRecord.php | 3 ++- tests/PHPUnit/Integration/Archive/PartialArchiveTest.php | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/Goals/RecordBuilders/Base.php b/plugins/Goals/RecordBuilders/Base.php index 5d7ac90a126..0b275643761 100644 --- a/plugins/Goals/RecordBuilders/Base.php +++ b/plugins/Goals/RecordBuilders/Base.php @@ -37,4 +37,4 @@ protected function getEcommerceIdGoals() { return array(GoalManager::IDGOAL_CART, GoalManager::IDGOAL_ORDER); } -} \ No newline at end of file +} diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index 0c121d726ee..2f2f1e027cb 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -236,4 +236,4 @@ public function isEnabled(ArchiveProcessor $archiveProcessor) { return $archiveProcessor->getNumberOfVisitsConverted() > 0; } -} \ No newline at end of file +} diff --git a/plugins/Goals/RecordBuilders/ProductRecord.php b/plugins/Goals/RecordBuilders/ProductRecord.php index 0cedf995d14..a39b4e007af 100644 --- a/plugins/Goals/RecordBuilders/ProductRecord.php +++ b/plugins/Goals/RecordBuilders/ProductRecord.php @@ -1,3 +1,4 @@ + Date: Mon, 15 May 2023 07:54:09 -0700 Subject: [PATCH 40/56] remove unneeded newline --- plugins/Goals/RecordBuilders/ProductRecord.php | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/Goals/RecordBuilders/ProductRecord.php b/plugins/Goals/RecordBuilders/ProductRecord.php index a39b4e007af..85669250884 100644 --- a/plugins/Goals/RecordBuilders/ProductRecord.php +++ b/plugins/Goals/RecordBuilders/ProductRecord.php @@ -1,4 +1,3 @@ - Date: Wed, 17 May 2023 12:47:10 -0700 Subject: [PATCH 41/56] use siteAware cache for RecordBuilder array --- core/Plugin/Archiver.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index c139eea0bd7..467cae86be3 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -11,6 +11,7 @@ use Piwik\ArchiveProcessor; use Piwik\Cache; +use Piwik\CacheId; use Piwik\Config as PiwikConfig; use Piwik\Container\StaticContainer; use Piwik\ErrorHandler; @@ -108,7 +109,7 @@ private function getPluginName() private function getRecordBuilders() { $transientCache = Cache::getTransientCache(); - $cacheKey = 'Archiver.RecordBuilders'; + $cacheKey = CacheId::siteAware('Archiver.RecordBuilders'); $recordBuilders = $transientCache->fetch($cacheKey); if ($recordBuilders === false) { From ef4ca18ba8ae20c1b52c0167fe57466921e0ec32 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Thu, 18 May 2023 17:04:09 -0700 Subject: [PATCH 42/56] better typehints in RecordBuilder --- core/ArchiveProcessor/RecordBuilder.php | 32 +++++++++---------- .../RecordBuilders/GeneralGoalsRecords.php | 6 ++-- .../Goals/RecordBuilders/ProductRecord.php | 6 ++-- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 9288696bff4..b28dcb6905a 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -28,7 +28,7 @@ abstract class RecordBuilder protected $maxRowsInSubtable; /** - * @var string|int + * @var string|null */ protected $columnToSortByBeforeTruncation; @@ -45,11 +45,11 @@ abstract class RecordBuilder /** * @param int|null $maxRowsInTable * @param int|null $maxRowsInSubtable - * @param string|int|null $columnToSortByBeforeTruncation + * @param string|null $columnToSortByBeforeTruncation * @param array|null $columnAggregationOps */ - public function __construct($maxRowsInTable = null, $maxRowsInSubtable = null, - $columnToSortByBeforeTruncation = null, $columnAggregationOps = null) + public function __construct(?int $maxRowsInTable = null, ?int $maxRowsInSubtable = null, + ?string $columnToSortByBeforeTruncation = null, ?array $columnAggregationOps = null) { $this->maxRowsInTable = $maxRowsInTable; $this->maxRowsInSubtable = $maxRowsInSubtable; @@ -57,12 +57,12 @@ public function __construct($maxRowsInTable = null, $maxRowsInSubtable = null, $this->columnAggregationOps = $columnAggregationOps; } - public function isEnabled(ArchiveProcessor $archiveProcessor) + public function isEnabled(ArchiveProcessor $archiveProcessor): bool { return true; } - public function build(ArchiveProcessor $archiveProcessor) + public function build(ArchiveProcessor $archiveProcessor): void { if (!$this->isEnabled($archiveProcessor)) { return; @@ -101,7 +101,7 @@ public function build(ArchiveProcessor $archiveProcessor) } } - public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) + public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor): void { if (!$this->isEnabled($archiveProcessor)) { return; @@ -153,7 +153,7 @@ public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor) * * @return Record[] */ - public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor); + public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor): array; /** * Derived classes should define this method to aggregate log data for a single day and return the records @@ -161,10 +161,10 @@ public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor); * * @return (DataTable|int|float|string)[] Record values indexed by their record name, eg, `['MyPlugin_MyRecord' => new DataTable()]` */ - protected abstract function aggregate(ArchiveProcessor $archiveProcessor); + protected abstract function aggregate(ArchiveProcessor $archiveProcessor): array; private function insertRecord(ArchiveProcessor $archiveProcessor, $recordName, DataTable\DataTableInterface $record, - $maxRowsInTable, $maxRowsInSubtable, $columnToSortByBeforeTruncation) + ?int $maxRowsInTable, ?int $maxRowsInSubtable, ?string $columnToSortByBeforeTruncation): void { $serialized = $record->getSerialized( $maxRowsInTable ?: $this->maxRowsInTable, @@ -175,22 +175,22 @@ private function insertRecord(ArchiveProcessor $archiveProcessor, $recordName, D unset($serialized); } - public function getMaxRowsInTable() + public function getMaxRowsInTable(): ?int { return $this->maxRowsInTable; } - public function getMaxRowsInSubtable() + public function getMaxRowsInSubtable(): ?int { return $this->maxRowsInSubtable; } - public function getColumnToSortByBeforeTruncation() + public function getColumnToSortByBeforeTruncation(): ?string { return $this->columnToSortByBeforeTruncation; } - public function getPluginName() + public function getPluginName(): ?string { $className = get_class($this); $parts = explode('\\', $className); @@ -205,7 +205,7 @@ public function getPluginName() * * @return string */ - public function getQueryOriginHint() + public function getQueryOriginHint(): ?string { $recordBuilderName = get_class($this); $recordBuilderName = explode('\\', $recordBuilderName); @@ -221,7 +221,7 @@ public function getQueryOriginHint() * @param string[] $requestedReports The list of requested reports to check for. * @return bool */ - public function isBuilderForAtLeastOneOf(ArchiveProcessor $archiveProcessor, array $requestedReports) + public function isBuilderForAtLeastOneOf(ArchiveProcessor $archiveProcessor, array $requestedReports): bool { $recordMetadata = $this->getRecordMetadata($archiveProcessor); foreach ($recordMetadata as $record) { diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index 2f2f1e027cb..8b97dcf963a 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -68,7 +68,7 @@ class GeneralGoalsRecords extends Base array(364) ); - protected function aggregate(ArchiveProcessor $archiveProcessor) + protected function aggregate(ArchiveProcessor $archiveProcessor): array { $prefixes = array( self::VISITS_UNTIL_RECORD_NAME => 'vcv', @@ -193,7 +193,7 @@ protected function isStandardGoal($idGoal) return !in_array($idGoal, $this->getEcommerceIdGoals()); } - public function getRecordMetadata(ArchiveProcessor $archiveProcessor) + public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array { $goals = API::getInstance()->getGoals($this->getSiteId($archiveProcessor)); $goals = array_keys($goals); @@ -232,7 +232,7 @@ protected function getConversionsNumericMetrics(DataArray $goals) return $numericRecords; } - public function isEnabled(ArchiveProcessor $archiveProcessor) + public function isEnabled(ArchiveProcessor $archiveProcessor): bool { return $archiveProcessor->getNumberOfVisitsConverted() > 0; } diff --git a/plugins/Goals/RecordBuilders/ProductRecord.php b/plugins/Goals/RecordBuilders/ProductRecord.php index 85669250884..e0f12cff437 100644 --- a/plugins/Goals/RecordBuilders/ProductRecord.php +++ b/plugins/Goals/RecordBuilders/ProductRecord.php @@ -70,12 +70,12 @@ public function __construct($dimension, $recordName, $otherDimensionsToAggregate $this->dimensionsToAggregate = array_merge([$dimension], $otherDimensionsToAggregate); } - public function isEnabled(ArchiveProcessor $archiveProcessor) + public function isEnabled(ArchiveProcessor $archiveProcessor): bool { return Manager::getInstance()->isPluginActivated('Ecommerce'); } - public function getRecordMetadata(ArchiveProcessor $archiveProcessor) + public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array { $abandonedCartRecordName = Archiver::getItemRecordNameAbandonedCart($this->recordName); @@ -85,7 +85,7 @@ public function getRecordMetadata(ArchiveProcessor $archiveProcessor) ]; } - protected function aggregate(ArchiveProcessor $archiveProcessor) + protected function aggregate(ArchiveProcessor $archiveProcessor): array { $itemReports = []; foreach ($this->getEcommerceIdGoals() as $ecommerceType) { From 80a270e102ac244f0b7d3c8cfc90aeb5b09931a7 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sun, 21 May 2023 14:28:25 -0700 Subject: [PATCH 43/56] ignore any records that are not declared in the record metadata (which can happen, for instance, when a goal has been deleted but is still referred to in log data) --- core/ArchiveProcessor/RecordBuilder.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index b28dcb6905a..20c18a6e926 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -79,6 +79,13 @@ public function build(ArchiveProcessor $archiveProcessor): void $records = $this->aggregate($archiveProcessor); foreach ($records as $recordName => $recordValue) { + if (empty($recordMetadataByName[$recordName])) { + if ($recordValue instanceof DataTable) { + Common::destroy($recordValue); + } + continue; + } + if ($recordValue instanceof DataTable) { $record = $recordMetadataByName[$recordName]; From 8a7c75d03d1d2267ba555d19b461d2e1de496738 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Mon, 22 May 2023 12:00:26 -0700 Subject: [PATCH 44/56] apply review feedback --- core/Archive.php | 4 +- core/ArchiveProcessor/Loader.php | 4 +- core/ArchiveProcessor/Parameters.php | 2 +- core/ArchiveProcessor/PluginsArchiver.php | 1 + core/ArchiveProcessor/RecordBuilder.php | 18 ++++- core/CronArchive.php | 3 +- core/Plugin/Archiver.php | 30 ++++---- .../RecordBuilders/GeneralGoalsRecords.php | 74 +++++++++---------- 8 files changed, 76 insertions(+), 60 deletions(-) diff --git a/core/Archive.php b/core/Archive.php index 265b7807a9d..367b28f2471 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -914,13 +914,15 @@ private function prepareArchive(array $archiveNamesByPlugin, Site $site, Period $doneFlag = $this->getDoneStringForPlugin($plugin, $idSites); $this->initializeArchiveIdCache($doneFlag); + $reportsToArchiveForThisPlugin = (empty($requestedReport) && $shouldOnlyProcessRequestedArchives) ? $archiveNames : $requestedReport; + $prepareResult = $coreAdminHomeApi->archiveReports( $site->getId(), $period->getLabel(), $periodDateStr, $this->params->getSegment()->getOriginalString(), $plugin, - empty($requestedReport) && $shouldOnlyProcessRequestedArchives ? $archiveNames : $requestedReport + $reportsToArchiveForThisPlugin ); if ( diff --git a/core/ArchiveProcessor/Loader.php b/core/ArchiveProcessor/Loader.php index 0fc34485b1f..4a3b60e8130 100644 --- a/core/ArchiveProcessor/Loader.php +++ b/core/ArchiveProcessor/Loader.php @@ -223,7 +223,7 @@ protected function loadArchiveData() $visits = $archiveInfo['visits']; $visitsConverted = $archiveInfo['visitsConverted']; $tsArchived = $archiveInfo['tsArchived']; - $value = $archiveInfo['doneFlagValue']; + $doneFlagValue = $archiveInfo['doneFlagValue']; $existingArchives = $archiveInfo['existingRecords']; $requestedRecords = $this->params->getArchiveOnlyReportAsArray(); @@ -231,7 +231,7 @@ protected function loadArchiveData() if (!empty($idArchives) && !Rules::isActuallyForceArchivingSinglePlugin() - && !$this->shouldForceInvalidatedArchive($value, $tsArchived) + && !$this->shouldForceInvalidatedArchive($doneFlagValue, $tsArchived) && !$isMissingRequestedRecords ) { // we have a usable idarchive (it's not invalidated and it's new enough), and we are not archiving diff --git a/core/ArchiveProcessor/Parameters.php b/core/ArchiveProcessor/Parameters.php index b4e7bd1d623..ea8c6703f23 100644 --- a/core/ArchiveProcessor/Parameters.php +++ b/core/ArchiveProcessor/Parameters.php @@ -274,7 +274,7 @@ public function __toString() { $requestedReports = $this->getArchiveOnlyReport(); if (is_array($requestedReports)) { - $requestedReports = json_encode($requestedReports); + $requestedReports = implode(', ', $requestedReports); } return "[idSite = {$this->getSite()->getId()}, period = {$this->getPeriod()->getLabel()} {$this->getPeriod()->getRangeString()}, segment = {$this->getSegment()->getString()}, plugin = {$this->getRequestedPlugin()}, report = {$requestedReports}]"; } diff --git a/core/ArchiveProcessor/PluginsArchiver.php b/core/ArchiveProcessor/PluginsArchiver.php index ed7e24c4514..408389f404e 100644 --- a/core/ArchiveProcessor/PluginsArchiver.php +++ b/core/ArchiveProcessor/PluginsArchiver.php @@ -197,6 +197,7 @@ public function callAggregateAllPlugins($visits, $visitsConverted, $forceArchivi $this->params->getSegment() ? sprintf("(for segment = '%s')", $this->params->getSegment()->getString()) : '' ); } catch (Exception $e) { + throw $e; throw new PluginsArchiverException($e->getMessage() . " - in plugin $pluginName.", $e->getCode(), $e); } finally { self::$currentPluginBeingArchived = null; diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 20c18a6e926..12c4fcb226f 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -62,7 +62,14 @@ public function isEnabled(ArchiveProcessor $archiveProcessor): bool return true; } - public function build(ArchiveProcessor $archiveProcessor): void + /** + * Used the `aggregate()` protected function to build records by aggregating log table data directly, then + * inserts them as archive data. + * + * @param ArchiveProcessor $archiveProcessor + * @return void + */ + public function buildFromLogs(ArchiveProcessor $archiveProcessor): void { if (!$this->isEnabled($archiveProcessor)) { return; @@ -108,7 +115,14 @@ public function build(ArchiveProcessor $archiveProcessor): void } } - public function buildMultiplePeriod(ArchiveProcessor $archiveProcessor): void + /** + * Builds records for non-day periods by aggregating day records together, then inserts + * them as archive data. + * + * @param ArchiveProcessor $archiveProcessor + * @return void + */ + public function buildForNonDayPeriod(ArchiveProcessor $archiveProcessor): void { if (!$this->isEnabled($archiveProcessor)) { return; diff --git a/core/CronArchive.php b/core/CronArchive.php index 2b53d27be9f..78068de9d9f 100644 --- a/core/CronArchive.php +++ b/core/CronArchive.php @@ -1025,11 +1025,12 @@ public function canWeSkipInvalidatingBecauseThereIsAUsablePeriod(Parameters $par // empty plugins param since we only check for an 'all' archive $archiveInfo = ArchiveSelector::getArchiveIdAndVisits($params, $minArchiveProcessedTime, $includeInvalidated = $isPeriodIncludesToday); $idArchive = $archiveInfo['idArchives']; + $tsArchived = $archiveInfo['tsArchived']; // day has changed since the archive was created, we need to reprocess it if ($isYesterday && !empty($idArchive) - && Date::factory($archiveInfo['tsArchived'])->toString() != $today->toString() + && Date::factory($tsArchived)->toString() != $today->toString() ) { return false; } diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 467cae86be3..da98a56fc95 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -93,11 +93,6 @@ private function getPluginName() $parts = explode('\\', $className); $parts = array_filter($parts); $plugin = $parts[2]; - - if (!Manager::getInstance()->isPluginLoaded($plugin)) { - return null; - } - return $plugin; } @@ -114,13 +109,11 @@ private function getRecordBuilders() $recordBuilders = $transientCache->fetch($cacheKey); if ($recordBuilders === false) { $recordBuilderClasses = Manager::getInstance()->findMultipleComponents('RecordBuilders', ArchiveProcessor\RecordBuilder::class); + $recordBuilderClasses = $this->getDefaultConstructibleClasses($recordBuilderClasses); $recordBuilders = array_map(function ($className) { - if ((new \ReflectionClass($className))->getConstructor()->getNumberOfRequiredParameters() == 0) { - return StaticContainer::getContainer()->make($className); - } + return StaticContainer::getContainer()->make($className); }, $recordBuilderClasses); - $recordBuilders = array_filter($recordBuilders); /** * Triggered to add new RecordBuilders that cannot be picked up automatically by the platform. @@ -161,10 +154,8 @@ private function getRecordBuilders() $transientCache->save($cacheKey, $recordBuilders); } - $requestedReports = $this->processor->getParams()->getArchiveOnlyReport(); + $requestedReports = $this->processor->getParams()->getArchiveOnlyReportAsArray(); if (!empty($requestedReports)) { - $requestedReports = is_string($requestedReports) ? [$requestedReports] : $requestedReports; - $recordBuilders = array_filter($recordBuilders, function (ArchiveProcessor\RecordBuilder $builder) use ($requestedReports) { return $builder->isBuilderForAtLeastOneOf($this->processor, $requestedReports); }); @@ -183,7 +174,7 @@ final public function callAggregateDayReport() $pluginName = $this->getPluginName(); - if (!empty($pluginName)) { + if (Manager::getInstance()->isPluginLoaded($pluginName)) { $recordBuilders = $this->getRecordBuilders(); foreach ($recordBuilders as $recordBuilder) { @@ -203,7 +194,7 @@ final public function callAggregateDayReport() $newQueryHint = $originalQueryHint . ' ' . $recordBuilder->getQueryOriginHint(); try { $this->getProcessor()->getLogAggregator()->setQueryOriginHint($newQueryHint); - $recordBuilder->build($this->getProcessor()); + $recordBuilder->buildFromLogs($this->getProcessor()); } finally { $this->getProcessor()->getLogAggregator()->setQueryOriginHint($originalQueryHint); } @@ -226,7 +217,7 @@ final public function callAggregateMultipleReports() $pluginName = $this->getPluginName(); - if (!empty($pluginName)) { + if (Manager::getInstance()->isPluginLoaded($pluginName)) { $recordBuilders = $this->getRecordBuilders(); foreach ($recordBuilders as $recordBuilder) { if ($recordBuilder->getPluginName() != $pluginName @@ -245,7 +236,7 @@ final public function callAggregateMultipleReports() $newQueryHint = $originalQueryHint . ' ' . $recordBuilder->getQueryOriginHint(); try { $this->getProcessor()->getLogAggregator()->setQueryOriginHint($newQueryHint); - $recordBuilder->buildMultiplePeriod($this->getProcessor()); + $recordBuilder->buildForNonDayPeriod($this->getProcessor()); } finally { $this->getProcessor()->getLogAggregator()->setQueryOriginHint($originalQueryHint); } @@ -341,4 +332,11 @@ protected function isRequestedReport(string $reportName) return empty($requestedReport) || $requestedReport == $reportName; } + + private function getDefaultConstructibleClasses(array $classes): array + { + return array_filter($classes, function ($className) { + return (new \ReflectionClass($className))->getConstructor()->getNumberOfRequiredParameters() == 0; + }); + } } diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index 8b97dcf963a..22d0fffc45e 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -32,48 +32,48 @@ class GeneralGoalsRecords extends Base /** * This array stores the ranges to use when displaying the 'visits to conversion' report */ - public static $visitCountRanges = array( - array(1, 1), - array(2, 2), - array(3, 3), - array(4, 4), - array(5, 5), - array(6, 6), - array(7, 7), - array(8, 8), - array(9, 14), - array(15, 25), - array(26, 50), - array(51, 100), - array(100) - ); + public static $visitCountRanges = [ + [1, 1], + [2, 2], + [3, 3], + [4, 4], + [5, 5], + [6, 6], + [7, 7], + [8, 8], + [9, 14], + [15, 25], + [26, 50], + [51, 100], + [100], + ]; /** * This array stores the ranges to use when displaying the 'days to conversion' report */ - public static $daysToConvRanges = array( - array(0, 0), - array(1, 1), - array(2, 2), - array(3, 3), - array(4, 4), - array(5, 5), - array(6, 6), - array(7, 7), - array(8, 14), - array(15, 30), - array(31, 60), - array(61, 120), - array(121, 364), - array(364) - ); + public static $daysToConvRanges = [ + [0, 0], + [1, 1], + [2, 2], + [3, 3], + [4, 4], + [5, 5], + [6, 6], + [7, 7], + [8, 14], + [15, 30], + [31, 60], + [61, 120], + [121, 364], + [364], + ]; protected function aggregate(ArchiveProcessor $archiveProcessor): array { - $prefixes = array( + $prefixes = [ self::VISITS_UNTIL_RECORD_NAME => 'vcv', self::DAYS_UNTIL_CONV_RECORD_NAME => 'vdsf', - ); + ]; $totalConversions = 0; $totalRevenue = 0; @@ -177,7 +177,7 @@ protected function aggregate(ArchiveProcessor $archiveProcessor): array return $result; } - protected function getOverviewFromGoalTables($tableByGoal) + private function getOverviewFromGoalTables($tableByGoal) { $overview = new DataTable(); foreach ($tableByGoal as $idGoal => $table) { @@ -188,7 +188,7 @@ protected function getOverviewFromGoalTables($tableByGoal) return $overview; } - protected function isStandardGoal($idGoal) + private function isStandardGoal($idGoal) { return !in_array($idGoal, $this->getEcommerceIdGoals()); } @@ -218,9 +218,9 @@ public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array return $records; } - protected function getConversionsNumericMetrics(DataArray $goals) + private function getConversionsNumericMetrics(DataArray $goals) { - $numericRecords = array(); + $numericRecords = []; $goals = $goals->getDataArray(); foreach ($goals as $idGoal => $array) { foreach ($array as $metricId => $value) { From 831e000450ed479712cb02161af628341b93d09f Mon Sep 17 00:00:00 2001 From: diosmosis Date: Mon, 22 May 2023 12:21:21 -0700 Subject: [PATCH 45/56] remove stray debugging change --- core/ArchiveProcessor/PluginsArchiver.php | 1 - 1 file changed, 1 deletion(-) diff --git a/core/ArchiveProcessor/PluginsArchiver.php b/core/ArchiveProcessor/PluginsArchiver.php index 408389f404e..ed7e24c4514 100644 --- a/core/ArchiveProcessor/PluginsArchiver.php +++ b/core/ArchiveProcessor/PluginsArchiver.php @@ -197,7 +197,6 @@ public function callAggregateAllPlugins($visits, $visitsConverted, $forceArchivi $this->params->getSegment() ? sprintf("(for segment = '%s')", $this->params->getSegment()->getString()) : '' ); } catch (Exception $e) { - throw $e; throw new PluginsArchiverException($e->getMessage() . " - in plugin $pluginName.", $e->getCode(), $e); } finally { self::$currentPluginBeingArchived = null; From b8bccd78b1094beedefe4eb8b2946267ceba4ca9 Mon Sep 17 00:00:00 2001 From: Michal Kleiner Date: Fri, 26 May 2023 17:39:44 +1200 Subject: [PATCH 46/56] Update variable name for consistency --- core/Archive.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/Archive.php b/core/Archive.php index 367b28f2471..2aaea504f55 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -747,10 +747,10 @@ private function getDoneStringForPlugin($plugin, $idSites) { $requestedReport = $this->getRequestedReport(); - $shouldOnlyProcessRequestedRecords = empty($requestedReport) + $shouldOnlyProcessRequestedArchives = empty($requestedReport) && Rules::shouldProcessOnlyReportsRequestedInArchiveQuery($this->getPeriodLabel()); - if ($shouldOnlyProcessRequestedRecords) { + if ($shouldOnlyProcessRequestedArchives) { return Rules::getDoneFlagArchiveContainsOnePlugin($this->params->getSegment(), $plugin); } From 76a2823afe5921cdb81655e65461d237fd12eac8 Mon Sep 17 00:00:00 2001 From: Michal Kleiner Date: Fri, 26 May 2023 17:43:36 +1200 Subject: [PATCH 47/56] Remove unnecessary array_filter since a valid class name never has an empty segment --- core/ArchiveProcessor/RecordBuilder.php | 1 - core/Plugin/Archiver.php | 1 - 2 files changed, 2 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 12c4fcb226f..7a5355684ab 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -215,7 +215,6 @@ public function getPluginName(): ?string { $className = get_class($this); $parts = explode('\\', $className); - $parts = array_filter($parts); $plugin = $parts[2]; return $plugin; } diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index da98a56fc95..4a8bc0bb675 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -91,7 +91,6 @@ private function getPluginName() { $className = get_class($this); $parts = explode('\\', $className); - $parts = array_filter($parts); $plugin = $parts[2]; return $plugin; } From e59621d323ed23cf06897a665142aaaa7055b920 Mon Sep 17 00:00:00 2001 From: Michal Kleiner Date: Fri, 26 May 2023 17:47:59 +1200 Subject: [PATCH 48/56] Add TODOs --- core/ArchiveProcessor/RecordBuilder.php | 1 + core/Plugin/Archiver.php | 1 + 2 files changed, 2 insertions(+) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 7a5355684ab..c3cb4a44f93 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -213,6 +213,7 @@ public function getColumnToSortByBeforeTruncation(): ?string public function getPluginName(): ?string { + // TODO: consider extracting to a reusable method or a trait, or use another approach to getting plugin's name $className = get_class($this); $parts = explode('\\', $className); $plugin = $parts[2]; diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 4a8bc0bb675..08a61d91f6b 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -89,6 +89,7 @@ public function __construct(ArchiveProcessor $processor) private function getPluginName() { + // TODO: consider extracting to a reusable method or a trait, or use another approach to getting plugin's name $className = get_class($this); $parts = explode('\\', $className); $plugin = $parts[2]; From bb1b2f3a2d89f62bb8958444a1ccd35a78503d5f Mon Sep 17 00:00:00 2001 From: diosmosis Date: Fri, 26 May 2023 08:50:43 -0700 Subject: [PATCH 49/56] add comment on why we look for data within partial archives prior to reporting whether archives were found or not --- core/DataAccess/ArchiveSelector.php | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/core/DataAccess/ArchiveSelector.php b/core/DataAccess/ArchiveSelector.php index 9ee1819efed..983227ed097 100644 --- a/core/DataAccess/ArchiveSelector.php +++ b/core/DataAccess/ArchiveSelector.php @@ -107,9 +107,16 @@ public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params $result['idarchive'] = empty($result['idarchive']) ? [] : [$result['idarchive']]; if (!empty($result['partial'])) { - // TODO: comment for this - if (!empty($result['idarchive']) - || empty($requestedReport) + // when we are not looking for a specific report, or if we have found a non-partial archive + // that we expect to have the full set of reports for the requested plugin, then we can just + // return it with the additionally found partial archives. + // + // if, however, there is no full archive, and only a set of partial archives, then + // we have to check whether the requested data is actually within them. if we just report the + // partial archives, Archive.php will find no archive data and simply report this. returning no + // idarchive here, however, will initiate archiving, causing the missing data to populate. + if (empty($requestedReport) + || !empty($result['idarchive']) ) { $result['idarchive'] = array_merge($result['idarchive'], $result['partial']); } else { From 94c9796a795194f7e0a87b37e285f0023328313e Mon Sep 17 00:00:00 2001 From: diosmosis Date: Fri, 26 May 2023 09:07:17 -0700 Subject: [PATCH 50/56] typehint fixes + make insertBlobRecord (formerly insertRecord) protected for use in RecordBuilders that need to manually insert data --- core/ArchiveProcessor/RecordBuilder.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index c3cb4a44f93..14a242934ae 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -63,7 +63,7 @@ public function isEnabled(ArchiveProcessor $archiveProcessor): bool } /** - * Used the `aggregate()` protected function to build records by aggregating log table data directly, then + * Uses the protected `aggregate()` function to build records by aggregating log table data directly, then * inserts them as archive data. * * @param ArchiveProcessor $archiveProcessor @@ -100,7 +100,7 @@ public function buildFromLogs(ArchiveProcessor $archiveProcessor): void $maxRowsInSubtable = $record->getMaxRowsInSubtable() ?? $this->maxRowsInSubtable; $columnToSortByBeforeTruncation = $record->getColumnToSortByBeforeTruncation() ?? $this->columnToSortByBeforeTruncation; - $this->insertRecord($archiveProcessor, $recordName, $recordValue, $maxRowsInTable, $maxRowsInSubtable, $columnToSortByBeforeTruncation); + $this->insertBlobRecord($archiveProcessor, $recordName, $recordValue, $maxRowsInTable, $maxRowsInSubtable, $columnToSortByBeforeTruncation); Common::destroy($recordValue); } else { @@ -184,8 +184,8 @@ public abstract function getRecordMetadata(ArchiveProcessor $archiveProcessor): */ protected abstract function aggregate(ArchiveProcessor $archiveProcessor): array; - private function insertRecord(ArchiveProcessor $archiveProcessor, $recordName, DataTable\DataTableInterface $record, - ?int $maxRowsInTable, ?int $maxRowsInSubtable, ?string $columnToSortByBeforeTruncation): void + protected function insertBlobRecord(ArchiveProcessor $archiveProcessor, string $recordName, DataTable $record, + ?int $maxRowsInTable, ?int $maxRowsInSubtable, ?string $columnToSortByBeforeTruncation): void { $serialized = $record->getSerialized( $maxRowsInTable ?: $this->maxRowsInTable, @@ -211,7 +211,7 @@ public function getColumnToSortByBeforeTruncation(): ?string return $this->columnToSortByBeforeTruncation; } - public function getPluginName(): ?string + public function getPluginName(): string { // TODO: consider extracting to a reusable method or a trait, or use another approach to getting plugin's name $className = get_class($this); @@ -226,7 +226,7 @@ public function getPluginName(): ?string * * @return string */ - public function getQueryOriginHint(): ?string + public function getQueryOriginHint(): string { $recordBuilderName = get_class($this); $recordBuilderName = explode('\\', $recordBuilderName); From 0a3089562e2d97b3be0b9e2c49221af668a34a1e Mon Sep 17 00:00:00 2001 From: diosmosis Date: Fri, 26 May 2023 18:57:12 -0700 Subject: [PATCH 51/56] more typehints --- core/Archive.php | 4 +--- core/ArchiveProcessor/Rules.php | 2 +- core/Plugin/Archiver.php | 7 ++++--- plugins/Goals/Goals.php | 2 +- plugins/Goals/RecordBuilders/Base.php | 8 ++++---- .../Goals/RecordBuilders/GeneralGoalsRecords.php | 13 +++++++++---- plugins/Goals/RecordBuilders/ProductRecord.php | 16 ++++++++-------- 7 files changed, 28 insertions(+), 24 deletions(-) diff --git a/core/Archive.php b/core/Archive.php index 2aaea504f55..43249603fd8 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -450,9 +450,7 @@ private function getRequestedPlugins($archiveNames) $result[$plugin][] = $name; } - $result = array_map('array_unique', $result); - - return $result; + return array_map('array_unique', $result); } /** diff --git a/core/ArchiveProcessor/Rules.php b/core/ArchiveProcessor/Rules.php index 3d443c7befc..82f55b5331d 100644 --- a/core/ArchiveProcessor/Rules.php +++ b/core/ArchiveProcessor/Rules.php @@ -382,7 +382,7 @@ public static function isSegmentPluginArchivingDisabled($pluginName, $siteId = n return in_array(strtolower($pluginName), $pluginArchivingSetting); } - public static function shouldProcessOnlyReportsRequestedInArchiveQuery($periodLabel) + public static function shouldProcessOnlyReportsRequestedInArchiveQuery(string $periodLabel): bool { if (SettingsServer::isArchivePhpTriggered()) { return false; diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index 08a61d91f6b..b93321e4d23 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -87,12 +87,13 @@ public function __construct(ArchiveProcessor $processor) $this->enabled = true; } - private function getPluginName() + private function getPluginName(): string { // TODO: consider extracting to a reusable method or a trait, or use another approach to getting plugin's name $className = get_class($this); $parts = explode('\\', $className); - $plugin = $parts[2]; + $parts = array_filter($parts); + $plugin = $parts[2] ?? ''; return $plugin; } @@ -101,7 +102,7 @@ private function getPluginName() * @throws \DI\DependencyException * @throws \DI\NotFoundException */ - private function getRecordBuilders() + private function getRecordBuilders(): array { $transientCache = Cache::getTransientCache(); $cacheKey = CacheId::siteAware('Archiver.RecordBuilders'); diff --git a/plugins/Goals/Goals.php b/plugins/Goals/Goals.php index 169eda8f4c8..e1268a0e0bf 100644 --- a/plugins/Goals/Goals.php +++ b/plugins/Goals/Goals.php @@ -111,7 +111,7 @@ public function registerEvents() return $hooks; } - public function addRecordBuilders(&$recordBuilders) + public function addRecordBuilders(array &$recordBuilders): void { $recordBuilders[] = new ProductRecord(ProductRecord::SKU_FIELD, ProductRecord::ITEMS_SKU_RECORD_NAME); $recordBuilders[] = new ProductRecord(ProductRecord::NAME_FIELD, ProductRecord::ITEMS_NAME_RECORD_NAME); diff --git a/plugins/Goals/RecordBuilders/Base.php b/plugins/Goals/RecordBuilders/Base.php index 0b275643761..738f4d75226 100644 --- a/plugins/Goals/RecordBuilders/Base.php +++ b/plugins/Goals/RecordBuilders/Base.php @@ -17,23 +17,23 @@ abstract class Base extends RecordBuilder { - protected function getSiteId(ArchiveProcessor $archiveProcessor) + protected function getSiteId(ArchiveProcessor $archiveProcessor): ?int { return $archiveProcessor->getParams()->getSite()->getId(); } - protected function usesEcommerce($idSite) + protected function usesEcommerce(int $idSite): bool { return Manager::getInstance()->isPluginActivated('Ecommerce') && Site::isEcommerceEnabledFor($idSite); } - protected function hasAnyGoalOrEcommerce($idSite) + protected function hasAnyGoalOrEcommerce(int $idSite): bool { return $this->usesEcommerce($idSite) || !empty(GoalManager::getGoalIds($idSite)); } - protected function getEcommerceIdGoals() + protected function getEcommerceIdGoals(): array { return array(GoalManager::IDGOAL_CART, GoalManager::IDGOAL_ORDER); } diff --git a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php index 22d0fffc45e..b4aa3cef973 100644 --- a/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php +++ b/plugins/Goals/RecordBuilders/GeneralGoalsRecords.php @@ -70,6 +70,11 @@ class GeneralGoalsRecords extends Base protected function aggregate(ArchiveProcessor $archiveProcessor): array { + $idSite = $this->getSiteId($archiveProcessor); + if (empty($idSite)) { + return []; + } + $prefixes = [ self::VISITS_UNTIL_RECORD_NAME => 'vcv', self::DAYS_UNTIL_CONV_RECORD_NAME => 'vdsf', @@ -83,7 +88,7 @@ protected function aggregate(ArchiveProcessor $archiveProcessor): array $visitsToConversions = []; $daysToConversions = []; - $siteHasEcommerceOrGoals = $this->hasAnyGoalOrEcommerce($this->getSiteId($archiveProcessor)); + $siteHasEcommerceOrGoals = $this->hasAnyGoalOrEcommerce($idSite); // Special handling for sites that contain subordinated sites, like in roll up reporting. // A roll up site, might not have ecommerce enabled or any configured goals, @@ -177,7 +182,7 @@ protected function aggregate(ArchiveProcessor $archiveProcessor): array return $result; } - private function getOverviewFromGoalTables($tableByGoal) + private function getOverviewFromGoalTables(array $tableByGoal): DataTable { $overview = new DataTable(); foreach ($tableByGoal as $idGoal => $table) { @@ -188,7 +193,7 @@ private function getOverviewFromGoalTables($tableByGoal) return $overview; } - private function isStandardGoal($idGoal) + private function isStandardGoal(int $idGoal): bool { return !in_array($idGoal, $this->getEcommerceIdGoals()); } @@ -218,7 +223,7 @@ public function getRecordMetadata(ArchiveProcessor $archiveProcessor): array return $records; } - private function getConversionsNumericMetrics(DataArray $goals) + private function getConversionsNumericMetrics(DataArray $goals): array { $numericRecords = []; $goals = $goals->getDataArray(); diff --git a/plugins/Goals/RecordBuilders/ProductRecord.php b/plugins/Goals/RecordBuilders/ProductRecord.php index e0f12cff437..dc889e8532c 100644 --- a/plugins/Goals/RecordBuilders/ProductRecord.php +++ b/plugins/Goals/RecordBuilders/ProductRecord.php @@ -123,13 +123,13 @@ protected function aggregate(ArchiveProcessor $archiveProcessor): array return $records; } - protected function aggregateFromEcommerceItems($itemReports, $query, $dimension) + protected function aggregateFromEcommerceItems(array $itemReports, $query, string $dimension): void { while ($row = $query->fetch()) { $ecommerceType = $row['ecommerceType']; $label = $this->cleanupRowGetLabel($row, $dimension); - if ($label === false) { + if ($label === null) { continue; } @@ -140,7 +140,7 @@ protected function aggregateFromEcommerceItems($itemReports, $query, $dimension) } } - protected function aggregateFromEcommerceViews($itemReports, $query, $dimension) + protected function aggregateFromEcommerceViews(array $itemReports, $query, string $dimension): void { while ($row = $query->fetch()) { $label = $this->getRowLabel($row, $dimension); @@ -161,7 +161,7 @@ protected function aggregateFromEcommerceViews($itemReports, $query, $dimension) } } - protected function queryItemViewsForDimension(LogAggregator $logAggregator, $dimension) + protected function queryItemViewsForDimension(LogAggregator $logAggregator, string $dimension) { $column = $this->actionMapping[$dimension]; $where = "log_link_visit_action.$column is not null"; @@ -176,7 +176,7 @@ protected function queryItemViewsForDimension(LogAggregator $logAggregator, $dim ); } - protected function roundColumnValues(&$row) + protected function roundColumnValues(array &$row): void { $columnsToRound = array( Metrics::INDEX_ECOMMERCE_ITEM_REVENUE, @@ -193,20 +193,20 @@ protected function roundColumnValues(&$row) } } - protected function getRowLabel(&$row, $dimension) + protected function getRowLabel(array &$row, string $dimension): ?string { $label = $row['label']; if (empty($label)) { // An empty additional category -> skip this iteration if ($dimension != $this->dimension) { - return false; + return null; } $label = "Value not defined"; } return $label; } - protected function cleanupRowGetLabel(&$row, $dimension) + protected function cleanupRowGetLabel(array &$row, string $dimension): ?string { $label = $this->getRowLabel($row, $dimension); From 6a5c2fdfb741d31b4947a37d6e12c68e76a3f588 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 27 May 2023 00:33:46 -0700 Subject: [PATCH 52/56] in aggregateNumericMetrics() allow operationsToApply to be array mapping column name to op --- core/ArchiveProcessor.php | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/core/ArchiveProcessor.php b/core/ArchiveProcessor.php index 121bd6cb17a..0542306f624 100644 --- a/core/ArchiveProcessor.php +++ b/core/ArchiveProcessor.php @@ -262,7 +262,8 @@ public function aggregateDataTableRecords($recordNames, * as metrics for the current period. * * @param array|string $columns Array of metric names to aggregate. - * @param bool|string $operationToApply The operation to apply to the metric. Either `'sum'`, `'max'` or `'min'`. + * @param bool|string|string[] $operationToApply The operation to apply to the metric. Either `'sum'`, `'max'` or `'min'`. + * Can also be an array mapping record names to operations. * @return array|int Returns the array of aggregate values. If only one metric was aggregated, * the aggregate value will be returned as is, not in an array. * For example, if `array('nb_visits', 'nb_hits')` is supplied for `$columns`, @@ -276,9 +277,9 @@ public function aggregateDataTableRecords($recordNames, * then `3040` would be returned. * @api */ - public function aggregateNumericMetrics($columns, $operationToApply = false) + public function aggregateNumericMetrics($columns, $operationsToApply = false) { - $metrics = $this->getAggregatedNumericMetrics($columns, $operationToApply); + $metrics = $this->getAggregatedNumericMetrics($columns, $operationsToApply); foreach ($metrics as $column => $value) { $this->insertNumericRecord($column, $value); @@ -489,7 +490,7 @@ protected function getOperationForColumns($columns, $defaultOperation) { $operationForColumn = array(); foreach ($columns as $name) { - $operation = $defaultOperation; + $operation = is_array($defaultOperation) ? $defaultOperation[$name] : $defaultOperation; if (empty($operation)) { $operation = $this->guessOperationForColumn($name); } @@ -688,13 +689,13 @@ public function renameColumnsAfterAggregation(DataTable $table, $columnsToRename } } - protected function getAggregatedNumericMetrics($columns, $operationToApply) + protected function getAggregatedNumericMetrics($columns, $operationsToApply) { if (!is_array($columns)) { $columns = array($columns); } - $operationForColumn = $this->getOperationForColumns($columns, $operationToApply); + $operationForColumn = $this->getOperationForColumns($columns, $operationsToApply); $dataTable = $this->getArchive()->getDataTableFromNumeric($columns); From b99f84de4c1ebd2174e8fe7c5431f363defd0c59 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 27 May 2023 14:03:49 -0700 Subject: [PATCH 53/56] optimization: when getting recordbuilders, only post Archiver.addRecordBuilders event for requested plugin since it is expected for those event handlers to perform queries --- core/ArchiveProcessor/RecordBuilder.php | 7 +- core/Piwik.php | 17 +++++ core/Plugin/Archiver.php | 86 ++++++++++++++----------- 3 files changed, 67 insertions(+), 43 deletions(-) diff --git a/core/ArchiveProcessor/RecordBuilder.php b/core/ArchiveProcessor/RecordBuilder.php index 14a242934ae..87ade05c713 100644 --- a/core/ArchiveProcessor/RecordBuilder.php +++ b/core/ArchiveProcessor/RecordBuilder.php @@ -11,6 +11,7 @@ use Piwik\ArchiveProcessor; use Piwik\Common; use Piwik\DataTable; +use Piwik\Piwik; /** * Inherit from this class to define archiving logic for one or more records. @@ -213,11 +214,7 @@ public function getColumnToSortByBeforeTruncation(): ?string public function getPluginName(): string { - // TODO: consider extracting to a reusable method or a trait, or use another approach to getting plugin's name - $className = get_class($this); - $parts = explode('\\', $className); - $plugin = $parts[2]; - return $plugin; + return Piwik::getPluginNameOfMatomoClass(get_class($this)); } /** diff --git a/core/Piwik.php b/core/Piwik.php index e42a00adbec..ee3749a5b65 100644 --- a/core/Piwik.php +++ b/core/Piwik.php @@ -968,4 +968,21 @@ public static function getEarliestDateToRearchive() return Date::yesterday()->subMonth($lastNMonthsToInvalidate)->setDay(1); } + + /** + * Given the fully qualified name of a class located within a Matomo plugin, + * returns the name of the plugin. + * + * Uses the fact that Matomo plugins have namespaces like Piwik\Plugins\MyPlugin. + * + * @param string $className the name of a class located within a Matomo plugin + * @return string the plugin name + */ + public static function getPluginNameOfMatomoClass(string $className): string + { + $parts = explode('\\', $className); + $parts = array_filter($parts); + $plugin = $parts[2] ?? ''; + return $plugin; + } } diff --git a/core/Plugin/Archiver.php b/core/Plugin/Archiver.php index b93321e4d23..ffa6e68407e 100644 --- a/core/Plugin/Archiver.php +++ b/core/Plugin/Archiver.php @@ -89,12 +89,7 @@ public function __construct(ArchiveProcessor $processor) private function getPluginName(): string { - // TODO: consider extracting to a reusable method or a trait, or use another approach to getting plugin's name - $className = get_class($this); - $parts = explode('\\', $className); - $parts = array_filter($parts); - $plugin = $parts[2] ?? ''; - return $plugin; + return Piwik::getPluginNameOfMatomoClass(get_class($this)); } /** @@ -102,15 +97,19 @@ private function getPluginName(): string * @throws \DI\DependencyException * @throws \DI\NotFoundException */ - private function getRecordBuilders(): array + private function getRecordBuilders(string $pluginName): array { $transientCache = Cache::getTransientCache(); - $cacheKey = CacheId::siteAware('Archiver.RecordBuilders'); + $cacheKey = CacheId::siteAware('Archiver.RecordBuilders') . '.' . $pluginName; $recordBuilders = $transientCache->fetch($cacheKey); if ($recordBuilders === false) { - $recordBuilderClasses = Manager::getInstance()->findMultipleComponents('RecordBuilders', ArchiveProcessor\RecordBuilder::class); - $recordBuilderClasses = $this->getDefaultConstructibleClasses($recordBuilderClasses); + $recordBuilderClasses = $this->getAllRecordBuilderClasses(); + + // only select RecordBuilders for the selected plugin + $recordBuilderClasses = array_filter($recordBuilderClasses, function ($className) use ($pluginName) { + return Piwik::getPluginNameOfMatomoClass($className) == $pluginName; + }); $recordBuilders = array_map(function ($className) { return StaticContainer::getContainer()->make($className); @@ -131,30 +130,30 @@ private function getRecordBuilders(): array * @param ArchiveProcessor\RecordBuilder[] $recordBuilders An array of RecordBuilder instances * @api */ - Piwik::postEvent('Archiver.addRecordBuilders', [&$recordBuilders]); - - /** - * Triggered to filter / restrict reports. - * - * **Example** - * - * public function filterRecordBuilders(&$recordBuilders) - * { - * foreach ($reports as $index => $recordBuilder) { - * if ($recordBuilders instanceof AnotherPluginRecordBuilder) { - * unset($reports[$index]); - * } - * } - * } - * - * @param ArchiveProcessor\RecordBuilder[] $recordBuilders An array of RecordBuilder instances - * @api - */ - Piwik::postEvent('Archiver.filterRecordBuilders', [&$recordBuilders]); + Piwik::postEvent('Archiver.addRecordBuilders', [&$recordBuilders], false, [$pluginName]); $transientCache->save($cacheKey, $recordBuilders); } + /** + * Triggered to filter / restrict reports. + * + * **Example** + * + * public function filterRecordBuilders(&$recordBuilders) + * { + * foreach ($reports as $index => $recordBuilder) { + * if ($recordBuilders instanceof AnotherPluginRecordBuilder) { + * unset($reports[$index]); + * } + * } + * } + * + * @param ArchiveProcessor\RecordBuilder[] $recordBuilders An array of RecordBuilder instances + * @api + */ + Piwik::postEvent('Archiver.filterRecordBuilders', [&$recordBuilders]); + $requestedReports = $this->processor->getParams()->getArchiveOnlyReportAsArray(); if (!empty($requestedReports)) { $recordBuilders = array_filter($recordBuilders, function (ArchiveProcessor\RecordBuilder $builder) use ($requestedReports) { @@ -176,12 +175,10 @@ final public function callAggregateDayReport() $pluginName = $this->getPluginName(); if (Manager::getInstance()->isPluginLoaded($pluginName)) { - $recordBuilders = $this->getRecordBuilders(); + $recordBuilders = $this->getRecordBuilders($pluginName); foreach ($recordBuilders as $recordBuilder) { - if ($recordBuilder->getPluginName() != $pluginName - || !$recordBuilder->isEnabled($this->getProcessor()) - ) { + if (!$recordBuilder->isEnabled($this->getProcessor())) { continue; } @@ -219,11 +216,9 @@ final public function callAggregateMultipleReports() $pluginName = $this->getPluginName(); if (Manager::getInstance()->isPluginLoaded($pluginName)) { - $recordBuilders = $this->getRecordBuilders(); + $recordBuilders = $this->getRecordBuilders($pluginName); foreach ($recordBuilders as $recordBuilder) { - if ($recordBuilder->getPluginName() != $pluginName - || !$recordBuilder->isEnabled($this->getProcessor()) - ) { + if (!$recordBuilder->isEnabled($this->getProcessor())) { continue; } @@ -340,4 +335,19 @@ private function getDefaultConstructibleClasses(array $classes): array return (new \ReflectionClass($className))->getConstructor()->getNumberOfRequiredParameters() == 0; }); } + + private function getAllRecordBuilderClasses(): array + { + $transientCache = Cache::getTransientCache(); + $cacheKey = CacheId::siteAware('RecordBuilders.allRecordBuilders'); + + $recordBuilderClasses = $transientCache->fetch($cacheKey); + if ($recordBuilderClasses === false) { + $recordBuilderClasses = Manager::getInstance()->findMultipleComponents('RecordBuilders', ArchiveProcessor\RecordBuilder::class); + $recordBuilderClasses = $this->getDefaultConstructibleClasses($recordBuilderClasses); + + $transientCache->save($cacheKey, $recordBuilderClasses); + } + return $recordBuilderClasses; + } } From 0b197bb90b3c011e5b43e8cb19a969415352aa8c Mon Sep 17 00:00:00 2001 From: diosmosis Date: Tue, 30 May 2023 02:22:58 -0700 Subject: [PATCH 54/56] default to null if default column aggregation operation is not specified --- core/ArchiveProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ArchiveProcessor.php b/core/ArchiveProcessor.php index 0542306f624..1d335840481 100644 --- a/core/ArchiveProcessor.php +++ b/core/ArchiveProcessor.php @@ -490,7 +490,7 @@ protected function getOperationForColumns($columns, $defaultOperation) { $operationForColumn = array(); foreach ($columns as $name) { - $operation = is_array($defaultOperation) ? $defaultOperation[$name] : $defaultOperation; + $operation = is_array($defaultOperation) ? ($defaultOperation[$name] ?? null) : $defaultOperation; if (empty($operation)) { $operation = $this->guessOperationForColumn($name); } From 264c500bf75be6329dde14bd3d3f9e6a00893a15 Mon Sep 17 00:00:00 2001 From: diosmosis Date: Fri, 2 Jun 2023 13:31:34 -0700 Subject: [PATCH 55/56] add check for invalid record name to Record --- core/ArchiveProcessor/Record.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/ArchiveProcessor/Record.php b/core/ArchiveProcessor/Record.php index 273fe948198..eadf7e7f5d2 100644 --- a/core/ArchiveProcessor/Record.php +++ b/core/ArchiveProcessor/Record.php @@ -66,6 +66,10 @@ public function setPlugin(?string $plugin): Record */ public function setName(string $name): Record { + if (!preg_match('/^[a-zA-Z0-9_]+$/', $name)) { + throw new \Exception('Invalid record name: ' . $name . '. Only alphanumeric characters and underscores are allowed.'); + } + $this->name = $name; return $this; } From 540e6a2bf5bebaa486e8c5678812a38f0c3cc79b Mon Sep 17 00:00:00 2001 From: diosmosis Date: Sat, 3 Jun 2023 00:55:35 -0700 Subject: [PATCH 56/56] allow dashes in record name since entity IDs can be used in them --- core/ArchiveProcessor/Record.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ArchiveProcessor/Record.php b/core/ArchiveProcessor/Record.php index eadf7e7f5d2..d66ab379f6d 100644 --- a/core/ArchiveProcessor/Record.php +++ b/core/ArchiveProcessor/Record.php @@ -66,7 +66,7 @@ public function setPlugin(?string $plugin): Record */ public function setName(string $name): Record { - if (!preg_match('/^[a-zA-Z0-9_]+$/', $name)) { + if (!preg_match('/^[a-zA-Z0-9_-]+$/', $name)) { throw new \Exception('Invalid record name: ' . $name . '. Only alphanumeric characters and underscores are allowed.'); }