From 30ac243f3644bb32eda74b1c38c586ce6c81a7d8 Mon Sep 17 00:00:00 2001 From: diosmosis <benaka@piwik.pro> Date: Thu, 7 May 2015 16:58:56 -0700 Subject: [PATCH] Move deleteUnusedLogAction logic to RawLogDao class. --- core/DataAccess/RawLogDao.php | 143 ++++++++++++++++++ core/LogPurger.php | 7 +- .../Dimension}/DimensionMetadataProvider.php | 4 +- plugins/PrivacyManager/LogDataPurger.php | 130 +--------------- tests/PHPUnit/System/PrivacyManagerTest.php | 8 +- .../DimensionMetadataProviderTest.php | 4 +- 6 files changed, 153 insertions(+), 143 deletions(-) rename {plugins/PrivacyManager => core/Plugin/Dimension}/DimensionMetadataProvider.php (97%) rename {plugins/PrivacyManager/tests/Unit => tests/PHPUnit/Unit/Plugin/Dimension}/DimensionMetadataProviderTest.php (97%) diff --git a/core/DataAccess/RawLogDao.php b/core/DataAccess/RawLogDao.php index aeefa8e553..3af2085ec5 100644 --- a/core/DataAccess/RawLogDao.php +++ b/core/DataAccess/RawLogDao.php @@ -9,13 +9,33 @@ namespace Piwik\DataAccess; use Piwik\Common; +use Piwik\Container\StaticContainer; use Piwik\Db; +use Piwik\Piwik; +use Piwik\Plugin\Dimension\DimensionMetadataProvider; /** * DAO that queries log tables. */ class RawLogDao { + const TEMP_TABLE_NAME = 'tmp_log_actions_to_keep'; // TODO: rename TEMP_TABLE_NAME const name for the method its used in + + /** + * @var DimensionMetadataProvider + */ + private $dimensionMetadataProvider; + + /** + * The max set of rows each table scan select should query at one time. TODO: this was copied from LogDataPurger. should be specified on construction. + */ + public static $selectSegmentSize = 100000; + + public function __construct(DimensionMetadataProvider $provider = null) + { + $this->dimensionMetadataProvider = $provider ?: StaticContainer::get('Piwik\Plugin\Dimension\DimensionMetadataProvider'); + } + /** * @param array $values * @param string $idVisit @@ -178,6 +198,34 @@ class RawLogDao return $statement->rowCount(); } + /** + * TODO + * @throws \Exception + */ + public function deleteUnusedLogActions() + { + if (!Db::isLockPrivilegeGranted()) { + throw new \Exception("RawLogDao.deleteUnusedLogActions() requires table locking permission in order to complete without error."); + } + + // get current max ID in log tables w/ idaction references. + $maxIds = $this->getMaxIdsInLogTables(); + + $this->createTempTable(); + + // do large insert (inserting everything before maxIds) w/o locking tables... + $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true); + + // ... then do small insert w/ locked tables to minimize the amount of time tables are locked. + $this->lockLogTables(); + $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false); + + // delete before unlocking tables so there's no chance a new log row that references an + // unused action will be inserted. + $this->deleteUnusedActions(); + Db::unlockAllTables(); + } + /** * @param array $columnsToSet * @return string @@ -269,4 +317,99 @@ class RawLogDao return $sql; } + + + private function getMaxIdsInLogTables() + { + $tables = array('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item'); + $idColumns = $this->getTableIdColumns(); + + $result = array(); + foreach ($tables as $table) { + $idCol = $idColumns[$table]; + $result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); + } + + return $result; + } + + private function createTempTable() + { + $sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::TEMP_TABLE_NAME) . " ( + idaction INT(11), + PRIMARY KEY (idaction) + )"; + Db::query($sql); + } + + private function insertActionsToKeep($maxIds, $olderThan = true) + { + $tempTableName = Common::prefixTable(self::TEMP_TABLE_NAME); + + $idColumns = $this->getTableIdColumns(); + foreach ($this->dimensionMetadataProvider->getActionReferenceColumnsByTable() as $table => $columns) { + $idCol = $idColumns[$table]; + + foreach ($columns as $col) { + $select = "SELECT $col FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?"; + $sql = "INSERT IGNORE INTO $tempTableName $select"; + + if ($olderThan) { + $start = 0; + $finish = $maxIds[$table]; + } else { + $start = $maxIds[$table]; + $finish = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); + } + + Db::segmentedQuery($sql, $start, $finish, self::$selectSegmentSize); + } + } + + // allow code to be executed after data is inserted. for concurrency testing purposes. + if ($olderThan) { + /** + * @ignore + */ + Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.olderThan"); // TODO: use DI/descendant class instead + } else { + /** + * @ignore + */ + Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.newerThan"); + } + } + + private function lockLogTables() + { + Db::lockTables( + $readLocks = Common::prefixTables('log_conversion', + 'log_link_visit_action', + 'log_visit', + 'log_conversion_item'), + $writeLocks = Common::prefixTables('log_action') + ); + } + + private function deleteUnusedActions() + { + list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::TEMP_TABLE_NAME); + + $deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable + FROM $logActionTable + LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction + WHERE tmp.idaction IS NULL"; + + Db::query($deleteSql); + } + + private function getTableIdColumns() + { + return array( + 'log_link_visit_action' => 'idlink_va', + 'log_conversion' => 'idvisit', + 'log_visit' => 'idvisit', + 'log_conversion_item' => 'idvisit' + ); + } } \ No newline at end of file diff --git a/core/LogPurger.php b/core/LogPurger.php index 21a5346233..6b3848c99f 100644 --- a/core/LogPurger.php +++ b/core/LogPurger.php @@ -14,7 +14,7 @@ use Piwik\DataAccess\RawLogDao; * TODO * TODO: change name to LogDeleter * - * TODO: class docs + * TODO: class + method docs */ class LogPurger { @@ -52,9 +52,4 @@ class LogPurger { return $this->rawLogDao->deleteConversionItems($visitIds); } - - public function deleteActions($actionIds) - { - // TODO (should this cascade as well? what about existing references? revisit when refactoring log purging class) - } } \ No newline at end of file diff --git a/plugins/PrivacyManager/DimensionMetadataProvider.php b/core/Plugin/Dimension/DimensionMetadataProvider.php similarity index 97% rename from plugins/PrivacyManager/DimensionMetadataProvider.php rename to core/Plugin/Dimension/DimensionMetadataProvider.php index 6f2b4e9afd..089c60c0a6 100644 --- a/plugins/PrivacyManager/DimensionMetadataProvider.php +++ b/core/Plugin/Dimension/DimensionMetadataProvider.php @@ -6,9 +6,7 @@ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later */ -namespace Piwik\Plugins\PrivacyManager; - -use Piwik\Plugin\Dimension\ActionDimension; +namespace Piwik\Plugin\Dimension; /** * Provides metadata about dimensions for the LogDataPurger class. diff --git a/plugins/PrivacyManager/LogDataPurger.php b/plugins/PrivacyManager/LogDataPurger.php index 81b4aaebaa..4705a9091d 100755 --- a/plugins/PrivacyManager/LogDataPurger.php +++ b/plugins/PrivacyManager/LogDataPurger.php @@ -22,18 +22,11 @@ use Piwik\Piwik; */ class LogDataPurger { - const TEMP_TABLE_NAME = 'tmp_log_actions_to_keep'; - /** * The max set of rows each table scan select should query at one time. */ public static $selectSegmentSize = 100000; - /** - * @param DimensionMetadataProvider - */ - private $dimensionMetadataProvider; - /** * TODO * @@ -58,9 +51,8 @@ class LogDataPurger /** * Constructor. */ - public function __construct(DimensionMetadataProvider $dimensionMetadataProvider, LogPurger $logPurger, RawLogDao $rawLogDao) + public function __construct(LogPurger $logPurger, RawLogDao $rawLogDao) { - $this->dimensionMetadataProvider = $dimensionMetadataProvider; $this->logPurger = $logPurger; $this->rawLogDao = $rawLogDao; } @@ -96,7 +88,7 @@ class LogDataPurger // delete unused actions from the log_action table (but only if we can lock tables) if (Db::isLockPrivilegeGranted()) { - $this->purgeUnusedLogActions(); // TODO: move actual code to DAO/service in core + $this->rawLogDao->deleteUnusedLogActions(); } else { $logMessage = get_class($this) . ": LOCK TABLES privilege not granted; skipping unused actions purge"; Log::warning($logMessage); @@ -142,29 +134,6 @@ class LogDataPurger return $result; } - /** - * Safely delete all unused log_action rows. - */ - private function purgeUnusedLogActions() - { - $this->createTempTable(); - - // get current max ID in log tables w/ idaction references. - $maxIds = $this->getMaxIdsInLogTables(); - - // do large insert (inserting everything before maxIds) w/o locking tables... - $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true); - - // ... then do small insert w/ locked tables to minimize the amount of time tables are locked. - $this->lockLogTables(); - $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false); - - // delete before unlocking tables so there's no chance a new log row that references an - // unused action will be inserted. - $this->deleteUnusedActions(); - Db::unlockAllTables(); - } - /** * get highest idVisit to delete rows from * @return string @@ -198,101 +167,6 @@ class LogDataPurger return (int) Db::fetchOne($sql, array($maxIdVisit)); } - private function createTempTable() - { - $sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::TEMP_TABLE_NAME) . " ( - idaction INT(11), - PRIMARY KEY (idaction) - )"; - Db::query($sql); - } - - private function getMaxIdsInLogTables() - { - $tables = array('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item'); - $idColumns = $this->getTableIdColumns(); - - $result = array(); - foreach ($tables as $table) { - $idCol = $idColumns[$table]; - $result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); - } - - return $result; - } - - private function insertActionsToKeep($maxIds, $olderThan = true) - { - $tempTableName = Common::prefixTable(self::TEMP_TABLE_NAME); - - $idColumns = $this->getTableIdColumns(); - $idActionColumnsByTable = $this->dimensionMetadataProvider->getActionReferenceColumnsByTable(); - foreach ($idActionColumnsByTable as $table => $columns) { - $idCol = $idColumns[$table]; - - foreach ($columns as $col) { - $select = "SELECT $col FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?"; - $sql = "INSERT IGNORE INTO $tempTableName $select"; - - if ($olderThan) { - $start = 0; - $finish = $maxIds[$table]; - } else { - $start = $maxIds[$table]; - $finish = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); - } - - Db::segmentedQuery($sql, $start, $finish, self::$selectSegmentSize); - } - } - - // allow code to be executed after data is inserted. for concurrency testing purposes. - if ($olderThan) { - /** - * @ignore - */ - Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.olderThan"); - } else { - /** - * @ignore - */ - Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.newerThan"); - } - } - - private function lockLogTables() - { - Db::lockTables( - $readLocks = Common::prefixTables('log_conversion', - 'log_link_visit_action', - 'log_visit', - 'log_conversion_item'), - $writeLocks = Common::prefixTables('log_action') - ); - } - - private function deleteUnusedActions() - { - list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::TEMP_TABLE_NAME); - - $deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable - FROM $logActionTable - LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction - WHERE tmp.idaction IS NULL"; - - Db::query($deleteSql); - } - - private function getTableIdColumns() - { - return array( - 'log_link_visit_action' => 'idlink_va', - 'log_conversion' => 'idvisit', - 'log_visit' => 'idvisit', - 'log_conversion_item' => 'idvisit' - ); - } - // let's hardcode, since these are not dynamically created tables public static function getDeleteTableLogTables() { diff --git a/tests/PHPUnit/System/PrivacyManagerTest.php b/tests/PHPUnit/System/PrivacyManagerTest.php index 528c4b4fc8..9a7571c2da 100644 --- a/tests/PHPUnit/System/PrivacyManagerTest.php +++ b/tests/PHPUnit/System/PrivacyManagerTest.php @@ -19,7 +19,7 @@ use Piwik\LogPurger; use Piwik\Option; use Piwik\Plugins\Goals\API as APIGoals; use Piwik\Plugins\Goals\Archiver; -use Piwik\Plugins\PrivacyManager\DimensionMetadataProvider; +use Piwik\Plugin\Dimension\DimensionMetadataProvider; use Piwik\Plugins\PrivacyManager\LogDataPurger; use Piwik\Plugins\PrivacyManager\PrivacyManager; use Piwik\Plugins\PrivacyManager\ReportsPurger; @@ -122,7 +122,7 @@ class PrivacyManagerTest extends IntegrationTestCase public function tearDown() { - $tempTableName = Common::prefixTable(LogDataPurger::TEMP_TABLE_NAME); + $tempTableName = Common::prefixTable(RawLogDao::TEMP_TABLE_NAME); Db::query("DROP TABLE IF EXISTS " . $tempTableName); parent::tearDown(); @@ -486,8 +486,8 @@ class PrivacyManagerTest extends IntegrationTestCase { \Piwik\Piwik::addAction("LogDataPurger.ActionsToKeepInserted.olderThan", array($this, 'addReferenceToUnusedAction')); - $rawLogDao = new RawLogDao(); - $purger = new LogDataPurger(new DimensionMetadataProvider(), new LogPurger($rawLogDao), $rawLogDao); + $rawLogDao = new RawLogDao(new DimensionMetadataProvider()); + $purger = new LogDataPurger(new LogPurger($rawLogDao), $rawLogDao); $this->unusedIdAction = Db::fetchOne( "SELECT idaction FROM " . Common::prefixTable('log_action') . " WHERE name = ?", diff --git a/plugins/PrivacyManager/tests/Unit/DimensionMetadataProviderTest.php b/tests/PHPUnit/Unit/Plugin/Dimension/DimensionMetadataProviderTest.php similarity index 97% rename from plugins/PrivacyManager/tests/Unit/DimensionMetadataProviderTest.php rename to tests/PHPUnit/Unit/Plugin/Dimension/DimensionMetadataProviderTest.php index 7c015a9206..db94963fd9 100644 --- a/plugins/PrivacyManager/tests/Unit/DimensionMetadataProviderTest.php +++ b/tests/PHPUnit/Unit/Plugin/Dimension/DimensionMetadataProviderTest.php @@ -6,9 +6,9 @@ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later */ -namespace Piwik\Plugins\PrivacyManager\tests\Unit; +namespace Piwik\Tests\Unit\Plugin\Dimension; -use Piwik\Plugins\PrivacyManager\DimensionMetadataProvider; +use Piwik\Plugin\Dimension\DimensionMetadataProvider; use Piwik\Tests\Framework\TestCase\UnitTestCase; use Piwik\Plugin\Manager as PluginManager; -- GitLab