Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce data aggregation (#194) #234

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 71 additions & 6 deletions inc/class-statify-cron.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@ class Statify_Cron extends Statify {
*
* @since 0.3.0
* @version 1.4.0
* @wp-hook boolean statify__skip_aggregation
*/
public static function cleanup_data() {

// Global.
global $wpdb;

// Remove items.
// Remove old items.
$wpdb->query(
$wpdb->prepare(
"DELETE FROM `$wpdb->statify` WHERE created <= SUBDATE(%s, %d)",
Expand All @@ -38,9 +37,75 @@ public static function cleanup_data() {
)
);

// Aggregate data.
if ( ! apply_filters( 'statify__skip_aggregation', false ) ) {
self::aggregate_data();
}

// Optimize DB.
$wpdb->query(
"OPTIMIZE TABLE `$wpdb->statify`"
);
$wpdb->query( "OPTIMIZE TABLE `$wpdb->statify`" );
}

/**
* Aggregate data in database.
*
* @since 1.9
*/
public static function aggregate_data() {
global $wpdb;

// Get date of last aggregation.
if ( isset( self::$_options['last_aggregation'] ) ) {
// Value saved, use it.
$start = self::$_options['last_aggregation'];
} else {
// No? We need to clean up all data. Let's determine the oldest data in the database.
$start = $wpdb->get_col( "SELECT MIN(`created`) FROM `$wpdb->statify`" );
$start = $start[0];
}

if ( is_null( $start ) ) {
// No data available, i.e not cleaned up yet and no data in database.
return;
}

$now = new DateTime();
$date = new DateTime( $start );

// Iterate over every day from start (inclusive) til now.
while ( $date < $now ) {
$agg = $wpdb->get_results(
$wpdb->prepare(
"SELECT `created`, `referrer`, `target`, SUM(`hits`) as `hits` FROM `$wpdb->statify` WHERE `created` = %s GROUP BY `created`, `referrer`, `target`",
$date->format( 'Y-m-d' )
),
ARRAY_A
);

// Remove non-aggregated data and insert aggregates within one transaction.
$wpdb->query( 'START TRANSACTION' );
$res = $wpdb->query(
$wpdb->prepare(
"DELETE FROM `$wpdb->statify` WHERE `created` = %s",
$date->format( 'Y-m-d' )
)
);
if ( false !== $res ) {
foreach ( $agg as $a ) {
if ( false === $wpdb->insert( $wpdb->statify, $a ) ) {
$wpdb->query( 'ROLLBACK' );
break;
}
}
}
$wpdb->query( 'COMMIT' );

// Continue with next day.
$date->modify( '+1 day' );
}

// Remember last aggregation date.
self::$_options['last_aggregation'] = $now->format( 'Y-m-d' );
update_option( 'statify', self::$_options );
}
}
14 changes: 7 additions & 7 deletions inc/class-statify-dashboard.php
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ private static function _select_data() {
$data = array(
'visits' => $wpdb->get_results(
$wpdb->prepare(
"SELECT `created` as `date`, COUNT(`created`) as `count` FROM `$wpdb->statify` GROUP BY `created` ORDER BY `created` DESC LIMIT %d",
"SELECT `created` as `date`, SUM(`hits`) as `count` FROM `$wpdb->statify` GROUP BY `created` ORDER BY `created` DESC LIMIT %d",
$days_show
),
ARRAY_A
Expand All @@ -332,15 +332,15 @@ private static function _select_data() {
if ( $today ) {
$data['target'] = $wpdb->get_results(
$wpdb->prepare(
"SELECT COUNT(`target`) as `count`, `target` as `url` FROM `$wpdb->statify` WHERE created = %s GROUP BY `target` ORDER BY `count` DESC, `url` ASC LIMIT %d",
"SELECT SUM(`hits`) as `count`, `target` as `url` FROM `$wpdb->statify` WHERE created = %s GROUP BY `target` ORDER BY `count` DESC, `url` ASC LIMIT %d",
$current_date,
$limit
),
ARRAY_A
);
$data['referrer'] = $wpdb->get_results(
$wpdb->prepare(
"SELECT COUNT(`referrer`) as `count`, `referrer` as `url`, SUBSTRING_INDEX(SUBSTRING_INDEX(TRIM(LEADING 'www.' FROM(TRIM(LEADING 'https://' FROM TRIM(LEADING 'http://' FROM TRIM(`referrer`))))), '/', 1), ':', 1) as `host` FROM `$wpdb->statify` WHERE `referrer` != '' AND created = %s GROUP BY `host` ORDER BY `count` DESC, `url` ASC LIMIT %d",
"SELECT SUM(`hits`) as `count`, `referrer` as `url`, SUBSTRING_INDEX(SUBSTRING_INDEX(TRIM(LEADING 'www.' FROM(TRIM(LEADING 'https://' FROM TRIM(LEADING 'http://' FROM TRIM(`referrer`))))), '/', 1), ':', 1) as `host` FROM `$wpdb->statify` WHERE `referrer` != '' AND created = %s GROUP BY `host` ORDER BY `count` DESC, `url` ASC LIMIT %d",
$current_date,
$limit
),
Expand All @@ -349,7 +349,7 @@ private static function _select_data() {
} else {
$data['target'] = $wpdb->get_results(
$wpdb->prepare(
"SELECT COUNT(`target`) as `count`, `target` as `url` FROM `$wpdb->statify` WHERE created > DATE_SUB(%s, INTERVAL %d DAY) GROUP BY `target` ORDER BY `count` DESC, `url` ASC LIMIT %d",
"SELECT SUM(`hits`) as `count`, `target` as `url` FROM `$wpdb->statify` WHERE created > DATE_SUB(%s, INTERVAL %d DAY) GROUP BY `target` ORDER BY `count` DESC, `url` ASC LIMIT %d",
$current_date,
$days_show,
$limit
Expand All @@ -358,7 +358,7 @@ private static function _select_data() {
);
$data['referrer'] = $wpdb->get_results(
$wpdb->prepare(
"SELECT COUNT(`referrer`) as `count`, `referrer` as `url`, SUBSTRING_INDEX(SUBSTRING_INDEX(TRIM(LEADING 'www.' FROM(TRIM(LEADING 'https://' FROM TRIM(LEADING 'http://' FROM TRIM(`referrer`))))), '/', 1), ':', 1) as `host` FROM `$wpdb->statify` WHERE `referrer` != '' AND created > DATE_SUB(%s, INTERVAL %d DAY) GROUP BY `host` ORDER BY `count` DESC, `url` ASC LIMIT %d",
"SELECT SUM(`hits`) as `count`, `referrer` as `url`, SUBSTRING_INDEX(SUBSTRING_INDEX(TRIM(LEADING 'www.' FROM(TRIM(LEADING 'https://' FROM TRIM(LEADING 'http://' FROM TRIM(`referrer`))))), '/', 1), ':', 1) as `host` FROM `$wpdb->statify` WHERE `referrer` != '' AND created > DATE_SUB(%s, INTERVAL %d DAY) GROUP BY `host` ORDER BY `count` DESC, `url` ASC LIMIT %d",
$current_date,
$days_show,
$limit
Expand All @@ -371,12 +371,12 @@ private static function _select_data() {
$data['visit_totals'] = array(
'today' => $wpdb->get_var(
$wpdb->prepare(
"SELECT COUNT(`created`) FROM `$wpdb->statify` WHERE created = %s",
"SELECT SUM(`hits`) FROM `$wpdb->statify` WHERE created = %s",
$current_date
)
),
'since_beginning' => $wpdb->get_row(
"SELECT COUNT(`created`) AS `count`, MIN(`created`) AS `date` FROM `$wpdb->statify`",
"SELECT SUM(`hits`) AS `count`, MIN(`created`) AS `date` FROM `$wpdb->statify`",
ARRAY_A
),
);
Expand Down
2 changes: 1 addition & 1 deletion inc/class-statify-table.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ public static function init() {
* @version 1.2.4
*/
public static function create() {

global $wpdb;

// If existent.
Expand All @@ -65,6 +64,7 @@ public static function create() {
`created` date NOT NULL default '0000-00-00',
`referrer` varchar(255) NOT NULL default '',
`target` varchar(255) NOT NULL default '',
`hits` integer NOT NULL default 1,
PRIMARY KEY (`id`),
KEY `referrer` (`referrer`),
KEY `target` (`target`),
Expand Down
1 change: 1 addition & 0 deletions inc/class-statify.php
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ protected static function track( $referrer, $target ) {
'created' => current_time( 'Y-m-d' ),
'referrer' => $referrer,
'target' => $target,
'hits' => 1,
);

// Insert.
Expand Down
66 changes: 65 additions & 1 deletion tests/test-cron.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public function set_up() {
* @preserveGlobalState disabled
*/
public function test_cronjob() {
global $wpdb;

// Initialize normal cycle, configure storage period of 3 days.
$this->init_statify_widget( 3 );
$this->assertNotFalse(
Expand Down Expand Up @@ -61,7 +63,8 @@ public function test_cronjob() {
$this->assertEquals( 2, $v['count'], 'Unexpected visit count' );
}

// Run the cron job.
// Run the cron job without aggregation.
add_filter( 'statify__skip_aggregation', '__return_true' );
Statify_Cron::cleanup_data();

// Verify that 2 days have been deleted.
Expand All @@ -72,5 +75,66 @@ public function test_cronjob() {
$this->assertContains( $v['date'], $remaining_dates, 'Unexpected remaining date in stats' );
$this->assertEquals( 2, $v['count'], 'Unexpected visit count' );
}
$this->assertEquals(
6,
$wpdb->get_var( "SELECT COUNT(*) FROM `$wpdb->statify`" ),
'Unexpected number of entries after cleanup without aggregation'
);

// Run the cron job with aggregation (default).
remove_filter( 'statify__skip_aggregation', '__return_true' );
Statify_Cron::cleanup_data();
$this->assertEquals(
3,
$wpdb->get_var( "SELECT COUNT(*) FROM `$wpdb->statify`" ),
'Unexpected number of entries after cleanup with aggregation'
);
}

/**
* Test Statify Cron Job execution.
*
* @runInSeparateProcess Must not preserve global constant.
* @preserveGlobalState disabled
*/
public function test_aggregation() {
global $wpdb;

// Insert test data: 2 days with 3 and 4 distinct combinations of referrer and target.
$date = new DateTime();
$this->insert_test_data( $date->format( 'Y-m-d' ), '', '', 2 );
$this->insert_test_data( $date->format( 'Y-m-d' ), 'https://statify.pluginkollektiv.org/', '/', 3 );
$this->insert_test_data( $date->format( 'Y-m-d' ), 'https://statify.pluginkollektiv.org/', '/test/', 4 );
$this->insert_test_data( $date->format( 'Y-m-d' ), 'https://pluginkollektiv.org/', '/', 5 );
$date->modify( '-1 days' );
$this->insert_test_data( $date->format( 'Y-m-d' ), 'https://statify.pluginkollektiv.org/', '/', 4 );
$this->insert_test_data( $date->format( 'Y-m-d' ), 'https://statify.pluginkollektiv.org/', '/test/', 3 );
$this->insert_test_data( $date->format( 'Y-m-d' ), 'https://pluginkollektiv.org/', '/', 2 );

// Get baseline.
$this->assertEquals( 23, $wpdb->get_var( "SELECT COUNT(*) FROM `$wpdb->statify`" ), 'Unexpected number of entries before aggregation' );
$stats = $this->get_stats();

// Trigger aggregation.
Statify_Cron::aggregate_data();

// Verify results.
$this->assertEquals( 7, $wpdb->get_var( "SELECT COUNT(*) FROM `$wpdb->statify`" ), 'Unexpected number of entries after aggregation' );
$stats2 = $this->get_stats();
$this->assertEquals( $stats, $stats2, 'Statistics data should be the same after aggregation' );
// Check one single row explicitly.
$this->assertEquals(
3,
$wpdb->get_var(
$wpdb->prepare(
"SELECT hits FROM `$wpdb->statify` WHERE created = %s AND referrer = %s AND target = %s",
$date->format( 'Y-m-d' ),
'https://statify.pluginkollektiv.org/',
'/test/'
)
),
'Unexpected hit count after aggregation'
);

}
}
3 changes: 2 additions & 1 deletion tests/test-tracking.php
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,11 @@ function ( $data, $id ) use ( &$capture ) {
$this->assertNotNull( $stats['visits'][0]['count'], 'Request not tracked' );
$this->assertNotEmpty( $capture, 'Hook stativy__visit_saved has not fired' );
$this->assertTrue( is_numeric( $capture['id'] ) && $capture['id'] > 0, 'unexpected entry ID' );
$this->assertCount( 3, $capture['data'], 'unexpected number of data fields' );
$this->assertCount( 4, $capture['data'], 'unexpected number of data fields' );
$this->assertEquals( ( new DateTime() )->format( 'Y-m-d' ), $capture['data']['created'], 'unexpected creation date' );
$this->assertEquals( 'https://statify.pluginkollektiv.org/', $capture['data']['referrer'], 'unexpected referrer' );
$this->assertEquals( '/page', $capture['data']['target'], 'unexpected target' );
$this->assertEquals( 1, $capture['data']['hits'], 'unexpected hits' );
}

/**
Expand Down