From 9db6f75ad7e4e1754b0af29b30916c0c596ffa75 Mon Sep 17 00:00:00 2001 From: "m.kurzeja" <m.kurzeja@clearcode.cc> Date: Wed, 17 Sep 2014 08:09:43 +0200 Subject: [PATCH] Small changes in purgeInvalidatedArchives query --- core/DataAccess/ArchivePurger.php | 194 +++++++++--------- .../Integration/ArchiveInvalidationTest.php | 1 - 2 files changed, 100 insertions(+), 95 deletions(-) diff --git a/core/DataAccess/ArchivePurger.php b/core/DataAccess/ArchivePurger.php index 80af0bbb13..02c741b366 100644 --- a/core/DataAccess/ArchivePurger.php +++ b/core/DataAccess/ArchivePurger.php @@ -20,108 +20,114 @@ use Piwik\Piwik; * * @package Piwik\DataAccess */ -class ArchivePurger -{ - public static function purgeInvalidatedArchives() - { - $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); - - foreach ($archiveTables as $archiveTable) { - $query = ' +class ArchivePurger { + public static function purgeInvalidatedArchives() { + $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); + + foreach ( $archiveTables as $archiveTable ) { + /** + * Select the archives that have already been invalidated and have been since re-processed. + * It purges records for each distinct { archive name (includes segment hash) , idsite, date, period } tuple. + */ + $query = ' SELECT t1.idarchive FROM `' . $archiveTable . '` t1 INNER JOIN `' . $archiveTable . '` t2 ON t1.name = t2.name AND t1.idsite=t2.idsite AND t1.date1=t2.date1 AND t1.date2=t2.date2 AND t1.period=t2.period - WHERE t1.value = 4 AND t2.value IN(' . ArchiveWriter::DONE_OK . ', ' . ArchiveWriter::DONE_OK_TEMPORARY . ') AND t1.name LIKE \'done%\''; - $result = Db::fetchAll($query); - - if (count($result) > 0) { - $archiveIds = array_map( - function ($elm) { - return $elm['idarchive']; - }, - $result - ); - - $date = ArchiveTableCreator::getDateFromTableName($archiveTable); - $date = Date::factory(str_replace('_','-', $date) . '-01'); - - self::deleteArchiveIds($date, $archiveIds); - } - - } - } - - - public static function purgeOutdatedArchives(Date $dateStart) - { - $purgeArchivesOlderThan = Rules::shouldPurgeOutdatedArchives($dateStart); - if (!$purgeArchivesOlderThan) { - return; - } - - $idArchivesToDelete = self::getTemporaryArchiveIdsOlderThan($dateStart, $purgeArchivesOlderThan); - if (!empty($idArchivesToDelete)) { - self::deleteArchiveIds($dateStart, $idArchivesToDelete); - } - self::deleteArchivesWithPeriodRange($dateStart); - - Log::debug("Purging temporary archives: done [ purged archives older than %s in %s ] [Deleted IDs: %s]", - $purgeArchivesOlderThan, $dateStart->toString("Y-m"), implode(',', $idArchivesToDelete)); - } - - protected static function getTemporaryArchiveIdsOlderThan(Date $date, $purgeArchivesOlderThan) - { - $query = "SELECT idarchive - FROM " . ArchiveTableCreator::getNumericTable($date) . " + WHERE t1.value = ' . ArchiveWriter::DONE_INVALIDATED . ' + AND t2.value IN(' . ArchiveWriter::DONE_OK . ', ' . ArchiveWriter::DONE_OK_TEMPORARY . ') + AND t1.ts_archived < t2.ts_archived AND t1.name LIKE \'done%\''; + + $result = Db::fetchAll( $query ); + + if ( count( $result ) > 0 ) { + $archiveIds = array_map( + function ( $elm ) { + return $elm['idarchive']; + }, + $result + ); + + $date = ArchiveTableCreator::getDateFromTableName( $archiveTable ); + $date = Date::factory( str_replace( '_', '-', $date ) . '-01' ); + + self::deleteArchiveIds( $date, $archiveIds ); + } + + } + } + + + public static function purgeOutdatedArchives( Date $dateStart ) { + $purgeArchivesOlderThan = Rules::shouldPurgeOutdatedArchives( $dateStart ); + if ( ! $purgeArchivesOlderThan ) { + return; + } + + $idArchivesToDelete = self::getTemporaryArchiveIdsOlderThan( $dateStart, $purgeArchivesOlderThan ); + if ( ! empty( $idArchivesToDelete ) ) { + self::deleteArchiveIds( $dateStart, $idArchivesToDelete ); + } + self::deleteArchivesWithPeriodRange( $dateStart ); + + Log::debug( "Purging temporary archives: done [ purged archives older than %s in %s ] [Deleted IDs: %s]", + $purgeArchivesOlderThan, + $dateStart->toString( "Y-m" ), + implode( ',', $idArchivesToDelete ) ); + } + + protected static function getTemporaryArchiveIdsOlderThan( Date $date, $purgeArchivesOlderThan ) { + $query = "SELECT idarchive + FROM " . ArchiveTableCreator::getNumericTable( $date ) . " WHERE name LIKE 'done%' AND (( value = " . ArchiveWriter::DONE_OK_TEMPORARY . " AND ts_archived < ?) OR value = " . ArchiveWriter::DONE_ERROR . ")"; - $result = Db::fetchAll($query, array($purgeArchivesOlderThan)); - $idArchivesToDelete = array(); - if (!empty($result)) { - foreach ($result as $row) { - $idArchivesToDelete[] = $row['idarchive']; - } - } - return $idArchivesToDelete; - } - - /* - * Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space - */ - protected static function deleteArchivesWithPeriodRange(Date $date) - { - $query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?"; - - $yesterday = Date::factory('yesterday')->getDateTime(); - $bind = array(Piwik::$idPeriods['range'], $yesterday); - $numericTable = ArchiveTableCreator::getNumericTable($date); - Db::query(sprintf($query, $numericTable), $bind); - Log::debug("Purging Custom Range archives: done [ purged archives older than %s from %s / blob ]", $yesterday, $numericTable); - try { - Db::query(sprintf($query, ArchiveTableCreator::getBlobTable($date)), $bind); - } catch (Exception $e) { - // Individual blob tables could be missing - } - } - - protected static function deleteArchiveIds(Date $date, $idArchivesToDelete) - { - $batches = array_chunk($idArchivesToDelete, 1000); - foreach($batches as $idsToDelete) { - $query = "DELETE FROM %s WHERE idarchive IN (" . implode(',', $idsToDelete) . ")"; - - Db::query(sprintf($query, ArchiveTableCreator::getNumericTable($date))); - try { - Db::query(sprintf($query, ArchiveTableCreator::getBlobTable($date))); - } catch (Exception $e) { - // Individual blob tables could be missing - } - } - - } + $result = Db::fetchAll( $query, array( $purgeArchivesOlderThan ) ); + $idArchivesToDelete = array(); + if ( ! empty( $result ) ) { + foreach ( $result as $row ) { + $idArchivesToDelete[] = $row['idarchive']; + } + } + + return $idArchivesToDelete; + } + + /* + * Deleting "Custom Date Range" reports after 1 day, since they can be re-processed and would take up un-necessary space + */ + protected static function deleteArchivesWithPeriodRange( Date $date ) { + $query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?"; + + $yesterday = Date::factory( 'yesterday' )->getDateTime(); + $bind = array( Piwik::$idPeriods['range'], $yesterday ); + $numericTable = ArchiveTableCreator::getNumericTable( $date ); + Db::query( sprintf( $query, $numericTable ), $bind ); + Log::debug( "Purging Custom Range archives: done [ purged archives older than %s from %s / blob ]", + $yesterday, + $numericTable ); + try { + Db::query( sprintf( $query, ArchiveTableCreator::getBlobTable( $date ) ), $bind ); + } catch ( Exception $e ) { + // Individual blob tables could be missing + } + } + + protected static function deleteArchiveIds( Date $date, $idArchivesToDelete ) { + $batches = array_chunk( $idArchivesToDelete, 1000 ); + foreach ( $batches as $idsToDelete ) { + $query = "DELETE FROM %s WHERE idarchive IN (" . implode( ',', $idsToDelete ) . ")"; + + Db::query( sprintf( $query, ArchiveTableCreator::getNumericTable( $date ) ) ); + try { + Db::query( sprintf( $query, ArchiveTableCreator::getBlobTable( $date ) ) ); + } catch ( Exception $e ) { + // Individual blob tables could be missing + } + } + + } } diff --git a/tests/PHPUnit/Integration/ArchiveInvalidationTest.php b/tests/PHPUnit/Integration/ArchiveInvalidationTest.php index c9f350ba72..380d60e832 100644 --- a/tests/PHPUnit/Integration/ArchiveInvalidationTest.php +++ b/tests/PHPUnit/Integration/ArchiveInvalidationTest.php @@ -81,7 +81,6 @@ class ArchiveInvalidationTest extends IntegrationTestCase public function testAnotherApi($api, $params) { $this->setBrowserArchivingTriggering(1); - $this->invalidateTestArchives(); $this->runApiTests($api, $params); } -- GitLab