diff --git a/config/global.ini.php b/config/global.ini.php index 71d3345436b0d04da408adf82313317c1c77ca33..bd6383ce6e683612c32e007b573a1ebb1da9bb9a 100644 --- a/config/global.ini.php +++ b/config/global.ini.php @@ -225,6 +225,8 @@ allow_adding_segments_for_all_websites = 1 ; When archiving segments for the first time, this determines the oldest date that will be archived. ; This option can be used to avoid archiving (for isntance) the lastN years for every new segment. ; Valid option values include: "beginning_of_time" (start date of archiving will not be changed) +; "segment_last_edit_time" (start date of archiving will be the earliest last edit date found, +; if none is found, the created date is used) ; "segment_creation_time" (start date of archiving will be the creation date of the segment) ; lastN where N is an integer (eg "last10" to archive for 10 days before the segment creation date) process_new_segments_from = "beginning_of_time" diff --git a/core/CronArchive/SegmentArchivingRequestUrlProvider.php b/core/CronArchive/SegmentArchivingRequestUrlProvider.php index f60b7e073a737e3b3a6d3dcde09cccea8263782e..65abe86c3284e7129b759c4cde003370d7c5bd74 100644 --- a/core/CronArchive/SegmentArchivingRequestUrlProvider.php +++ b/core/CronArchive/SegmentArchivingRequestUrlProvider.php @@ -25,6 +25,7 @@ class SegmentArchivingRequestUrlProvider { const BEGINNING_OF_TIME = 'beginning_of_time'; const CREATION_TIME = 'segment_creation_time'; + const LAST_EDIT_TIME = 'segment_last_edit_time'; /** * @var Model @@ -60,9 +61,7 @@ class SegmentArchivingRequestUrlProvider public function getUrlParameterDateString($idSite, $period, $date, $segment) { - $segmentCreatedTime = $this->getCreatedTimeOfSegment($idSite, $segment); - - $oldestDateToProcessForNewSegment = $this->getOldestDateToProcessForNewSegment($segmentCreatedTime); + $oldestDateToProcessForNewSegment = $this->getOldestDateToProcessForNewSegment($idSite, $segment); if (empty($oldestDateToProcessForNewSegment)) { return $date; } @@ -96,12 +95,31 @@ class SegmentArchivingRequestUrlProvider return $date; } - private function getOldestDateToProcessForNewSegment(Date $segmentCreatedTime) + private function getOldestDateToProcessForNewSegment($idSite, $segment) { + /** + * @var Date $segmentCreatedTime + * @var Date $segmentLastEditedTime + */ + list($segmentCreatedTime, $segmentLastEditedTime) = $this->getCreatedTimeOfSegment($idSite, $segment); + if ($this->processNewSegmentsFrom == self::CREATION_TIME) { $this->logger->debug("process_new_segments_from set to segment_creation_time, oldest date to process is {time}", array('time' => $segmentCreatedTime)); return $segmentCreatedTime; + } elseif ($this->processNewSegmentsFrom == self::LAST_EDIT_TIME) { + $this->logger->debug("process_new_segments_from set to segment_last_edit_time, segment last edit time is {time}", + array('time' => $segmentLastEditedTime)); + + if ($segmentLastEditedTime === null + || $segmentLastEditedTime->getTimestamp() < $segmentCreatedTime->getTimestamp() + ) { + $this->logger->debug("segment last edit time is older than created time, using created time instead"); + + $segmentLastEditedTime = $segmentCreatedTime; + } + + return $segmentLastEditedTime; } elseif (preg_match("/^last([0-9]+)$/", $this->processNewSegmentsFrom, $matches)) { $lastN = $matches[1]; @@ -122,6 +140,8 @@ class SegmentArchivingRequestUrlProvider { $segments = $this->getAllSegments(); + /** @var Date $latestEditTime */ + $latestEditTime = null; $earliestCreatedTime = $this->now; foreach ($segments as $segment) { if (empty($segment['ts_created']) @@ -138,6 +158,15 @@ class SegmentArchivingRequestUrlProvider if ($createdTime->getTimestamp() < $earliestCreatedTime->getTimestamp()) { $earliestCreatedTime = $createdTime; } + + if (!empty($segment['ts_last_edit'])) { + $lastEditTime = Date::factory($segment['ts_last_edit']); + if ($latestEditTime === null + || $latestEditTime->getTimestamp() < $lastEditTime->getTimestamp() + ) { + $latestEditTime = $lastEditTime; + } + } } } @@ -147,7 +176,7 @@ class SegmentArchivingRequestUrlProvider 'time' => $earliestCreatedTime )); - return $earliestCreatedTime; + return array($earliestCreatedTime, $latestEditTime); } private function getAllSegments() diff --git a/tests/PHPUnit/Unit/CronArchive/SegmentArchivingRequestUrlProviderTest.php b/tests/PHPUnit/Unit/CronArchive/SegmentArchivingRequestUrlProviderTest.php index e449024326a3327b474e7e5d8be18903dabcf687..10be9a2111a1462c357d8e27a8ee427d4d0904de 100644 --- a/tests/PHPUnit/Unit/CronArchive/SegmentArchivingRequestUrlProviderTest.php +++ b/tests/PHPUnit/Unit/CronArchive/SegmentArchivingRequestUrlProviderTest.php @@ -28,44 +28,58 @@ class SegmentArchivingRequestUrlProviderTest extends \PHPUnit_Framework_TestCase array( 'ts_created' => '2014-01-01', 'definition' => 'browserName==FF', - 'enable_only_idsite' => 1 + 'enable_only_idsite' => 1, + 'ts_last_edit' => '2014-05-05 00:22:33', ), array( 'ts_created' => '2014-01-01', 'definition' => 'countryCode==us', - 'enable_only_idsite' => 1 + 'enable_only_idsite' => 1, + 'ts_last_edit' => '2014-02-02 00:33:44', ), array( 'ts_created' => '2012-01-01', 'definition' => 'countryCode==us', - 'enable_only_idsite' => 1 + 'enable_only_idsite' => 1, + 'ts_last_edit' => '2014-02-03', ), array( 'ts_created' => '2014-01-01', 'definition' => 'countryCode==ca', - 'enable_only_idsite' => 2 + 'enable_only_idsite' => 2, + 'ts_last_edit' => '2013-01-01', ), array( 'ts_created' => '2012-01-01', 'definition' => 'countryCode==ca', - 'enable_only_idsite' => 2 + 'enable_only_idsite' => 2, + 'ts_last_edit' => '2011-01-01', ), array( 'ts_created' => '2011-01-01', 'definition' => 'countryCode==ca', - 'enable_only_idsite' => 0 + 'enable_only_idsite' => 0, + 'ts_last_edit' => null, ), array( 'ts_created' => '2015-03-01', 'definition' => 'pageUrl==a', - 'enable_only_idsite' => 1 - ) + 'enable_only_idsite' => 1, + 'ts_last_edit' => '2014-01-01', + ), + + array( + 'ts_created' => '2015-02-01', + 'definition' => 'pageUrl==b', + 'enable_only_idsite' => 1, + 'ts_last_edit' => null, + ), ); } @@ -183,6 +197,51 @@ class SegmentArchivingRequestUrlProviderTest extends \PHPUnit_Framework_TestCase 'countryCode==us', '2015-02-01,2015-03-01' ), + // $idSite, $date, $period, $segment, $expected + array( // test segment_last_edit_time uses last edit time + 'segment_last_edit_time', + 1, + $dateRange, + 'week', + 'browserName==FF', + '2014-05-05,2015-03-01', + ), + + array( // test segment_last_edit_time uses greatest last edit time when found + 'segment_last_edit_time', + 1, + $dateRange, + 'week', + 'countryCode==us', + '2014-02-03,2015-03-01', + ), + + array( // test segment_last_edit_time uses last edit time when greatest last edit is newer than oldest created time + 'segment_last_edit_time', + 2, + $dateRange, + 'week', + 'countryCode==ca', + '2013-01-01,2015-03-01', + ), + + array( // test segment_last_edit_time uses creation time when last edit time is older than creation time + 'segment_last_edit_time', + 1, + $dateRange, + 'week', + 'pageUrl==a', + '2015-03-01,2015-03-01', + ), + + array( // test segment_last_edit_time uses creation time when last edit time is not set + 'segment_last_edit_time', + 1, + $dateRange, + 'week', + 'pageUrl==b', + '2015-02-01,2015-03-01', + ), ); }