Skip to content
Extraits de code Groupes Projets
CronArchive.php 62,6 ko
Newer Older
  • Learn to ignore specific revisions
  •  * Piwik - free/libre analytics platform
    
     *
     * @link http://piwik.org
     * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
     *
     */
    namespace Piwik;
    
    
    use Exception;
    
    mattab's avatar
    mattab a validé
    use Piwik\ArchiveProcessor\Rules;
    
    use Piwik\Archiver\Request;
    
    use Piwik\Container\StaticContainer;
    
    use Piwik\CronArchive\FixedSiteIds;
    use Piwik\CronArchive\SharedSiteIds;
    
    use Piwik\Exception\UnexpectedWebsiteFoundException;
    
    use Piwik\Period\Factory as PeriodFactory;
    
    use Piwik\CronArchive\SitesToReprocessDistributedList;
    
    use Piwik\CronArchive\SegmentArchivingRequestUrlProvider;
    
    use Piwik\Plugins\CoreAdminHome\API as CoreAdminHomeAPI;
    
    use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel;
    
    use Piwik\Plugins\SitesManager\API as APISitesManager;
    
    use Piwik\Plugins\UsersManager\API as APIUsersManager;
    use Piwik\Plugins\UsersManager\UserPreferences;
    
    use Psr\Log\LoggerInterface;
    
    mattab's avatar
    mattab a validé
    
    
     * ./console core:archive runs as a cron and is a useful tool for general maintenance,
    
     * and pre-process reports for a Fast dashboard rendering.
     */
    class CronArchive
    {
        // the url can be set here before the init, and it will be used instead of --url=
    
        public static $url = false;
    
        // Max parallel requests for a same site's segments
        const MAX_CONCURRENT_API_REQUESTS = 3;
    
    
        // force-timeout-for-periods default (1 hour)
        const SECONDS_DELAY_BETWEEN_PERIOD_ARCHIVES = 3600;
    
        // force-all-periods default (7 days)
        const ARCHIVE_SITES_WITH_TRAFFIC_SINCE = 604800;
    
        // By default, will process last 52 days and months
        // It will be overwritten by the number of days since last archiving ran until completion.
        const DEFAULT_DATE_LAST = 52;
    
        // Since weeks are not used in yearly archives, we make sure that all possible weeks are processed
        const DEFAULT_DATE_LAST_WEEKS = 260;
    
        const DEFAULT_DATE_LAST_YEARS = 7;
    
        // Flag to know when the archive cron is calling the API
        const APPEND_TO_API_REQUEST = '&trigger=archivephp';
    
        // Flag used to record timestamp in Option::
        const OPTION_ARCHIVING_FINISHED_TS = "LastCompletedFullArchiving";
    
    
        // Name of option used to store starting timestamp
        const OPTION_ARCHIVING_STARTED_TS = "LastFullArchivingStartTime";
    
    
        // Show only first N characters from Piwik API output in case of errors
    
    mattab's avatar
    mattab a validé
        const TRUNCATE_ERROR_MESSAGE_SUMMARY = 6000;
    
    
        // archiving  will be triggered on all websites with traffic in the last $shouldArchiveOnlySitesWithTrafficSince seconds
        private $shouldArchiveOnlySitesWithTrafficSince;
    
        // By default, we only process the current week/month/year at most once an hour
        private $processPeriodsMaximumEverySeconds;
        private $todayArchiveTimeToLive;
        private $websiteDayHasFinishedSinceLastRun = array();
        private $idSitesInvalidatedOldReports = array();
    
        private $shouldArchiveOnlySpecificPeriods = array();
    
    mattab's avatar
    mattab a validé
        private $websites = array();
        private $allWebsites = array();
        private $segments = array();
    
        private $requests = 0;
        private $archiveAndRespectTTL = true;
    
        private $lastSuccessRunTimestamp = false;
        private $errors = array();
    
    
        /**
         * The list of IDs for sites for whom archiving should be initiated. If supplied, only these
         * sites will be archived.
         *
         * @var int[]
         */
        public $shouldArchiveSpecifiedSites = array();
    
        /**
         * The list of IDs of sites to ignore when launching archiving. Archiving will not be launched
         * for any site whose ID is in this list (even if the ID is supplied in {@link $shouldArchiveSpecifiedSites}
         * or if {@link $shouldArchiveAllSites} is true).
    
         * @var int[]
         */
        public $shouldSkipSpecifiedSites = array();
    
        /**
         * If true, archiving will be launched for every site.
         *
         * @var bool
         */
        public $shouldArchiveAllSites = false;
    
        /**
         * If true, xhprof will be initiated for the archiving run. Only for development/testing.
         *
         * @var bool
         */
        public $shouldStartProfiler = false;
    
    
        /**
         * Given options will be forwarded to the PHP command if the archiver is executed via CLI.
         * @var string
         */
        public $phpCliConfigurationOptions = '';
    
    
        /**
         * If HTTP requests are used to initiate archiving, this controls whether invalid SSL certificates should
         * be accepted or not by each request.
         *
         * @var bool
         */
        public $acceptInvalidSSLCertificate = false;
    
        /**
         * If set to true, scheduled tasks will not be run.
         *
         * @var bool
         */
        public $disableScheduledTasks = false;
    
        /**
         * The amount of seconds between non-day period archiving. That is, if archiving has been launched within
         * the past [$forceTimeoutPeriod] seconds, Piwik will not initiate archiving for week, month and year periods.
         *
         * @var int|false
         */
        public $forceTimeoutPeriod = false;
    
        /**
         * If supplied, archiving will be launched for sites that have had visits within the last [$shouldArchiveAllPeriodsSince]
         * seconds. If set to `true`, the value defaults to {@link ARCHIVE_SITES_WITH_TRAFFIC_SINCE}.
         *
         * @var int|bool
         */
        public $shouldArchiveAllPeriodsSince = false;
    
        /**
         * If supplied, archiving will be launched only for periods that fall within this date range. For example,
         * `"2012-01-01,2012-03-15"` would result in January 2012, February 2012 being archived but not April 2012.
         *
         * @var string|false eg, `"2012-01-01,2012-03-15"`
         */
        public $restrictToDateRange = false;
    
        /**
    
         * A list of periods to launch archiving for. By default, day, week, month and year periods
    
         * are considered. This variable can limit the periods to, for example, week & month only.
         *
    
         * @var string[] eg, `array("day","week","month","year")`
    
        public $restrictToPeriods = array();
    
    
        /**
         * Forces CronArchive to retrieve data for the last [$dateLastForced] periods when initiating archiving.
         * When archiving weeks, for example, if 10 is supplied, the API will be called w/ last10. This will potentially
         * initiate archiving for the last 10 weeks.
         *
         * @var int|false
         */
        public $dateLastForced = false;
    
        /**
         * The number of concurrent requests to issue per website. Defaults to {@link MAX_CONCURRENT_API_REQUESTS}.
         *
         * Used when archiving a site's segments concurrently.
         *
         * @var int|false
         */
        public $concurrentRequestsPerWebsite = false;
    
    
        /**
         * List of segment strings to force archiving for. If a stored segment is not in this list, it will not
         * be archived.
         *
         * @var string[]
         */
        public $segmentsToForce = array();
    
    
        private $websitesWithVisitsSinceLastRun = 0;
        private $skippedPeriodsArchivesWebsite = 0;
    
        private $skippedPeriodsNoDataInPeriod = 0;
    
        private $skippedDayArchivesWebsites = 0;
    
        private $skippedDayNoRecentData = 0;
        private $skippedDayOnApiError = 0;
    
        private $skipped = 0;
        private $processed = 0;
        private $archivedPeriodsArchivesWebsite = 0;
    
    
        /**
         * @var SegmentArchivingRequestUrlProvider
         */
        private $segmentArchivingRequestUrlProvider;
    
    
        /**
         * @var LoggerInterface
         */
        private $logger;
    
    
        /**
         * Only used when archiving using HTTP requests.
         *
         * @var string
         */
        private $urlToPiwik = null;
    
    
        /**
         * @var ArchiveInvalidator
         */
        private $invalidator;
    
    
         * Returns the option name of the option that stores the time core:archive was last executed.
    
    mattab's avatar
    mattab a validé
         * @param int $idSite
    
         * @param string $period
         * @return string
         */
    
    Christian Raue's avatar
    Christian Raue a validé
        public static function lastRunKey($idSite, $period)
    
    mattab's avatar
    mattab a validé
            return "lastRunArchive" . $period . "_" . $idSite;
    
         * @param string|null $processNewSegmentsFrom When to archive new segments from. See [General] process_new_segments_from
         *                                            for possible values.
    
    Emir Beganovic's avatar
    Emir Beganovic a validé
         * @param LoggerInterface|null $logger
    
        public function __construct($processNewSegmentsFrom = null, LoggerInterface $logger = null)
    
            $this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
    
            $processNewSegmentsFrom = $processNewSegmentsFrom ?: StaticContainer::get('ini.General.process_new_segments_from');
    
            $this->segmentArchivingRequestUrlProvider = new SegmentArchivingRequestUrlProvider($processNewSegmentsFrom);
    
    diosmosis's avatar
    diosmosis a validé
    
            $this->invalidator = StaticContainer::get('Piwik\Archive\ArchiveInvalidator');
    
        /**
         * Initializes and runs the cron archiver.
         */
        public function main()
        {
    
            $self = $this;
            Access::doAsSuperUser(function () use ($self) {
                $self->init();
                $self->run();
                $self->runScheduledTasks();
                $self->end();
            });
    
    Patryk Andrzejewski's avatar
    Patryk Andrzejewski a validé
            /**
             * This event is triggered during initializing archiving.
             *
             * @param CronArchive $this
             */
    
            Piwik::postEvent('CronArchive.init.start', array($this));
    
    Patryk Andrzejewski's avatar
    Patryk Andrzejewski a validé
    
    
            SettingsServer::setMaxExecutionTime(0);
    
    
            // Note: the order of methods call matters here.
            $this->initStateFromParameters();
    
            $this->logInitInfo();
            $this->logArchiveTimeoutInfo();
    
    
            // record archiving start time
            Option::set(self::OPTION_ARCHIVING_STARTED_TS, time());
    
    
            $this->segments    = $this->initSegmentsToArchive();
    
            $this->allWebsites = APISitesManager::getInstance()->getAllSitesId();
    
            if (!empty($this->shouldArchiveOnlySpecificPeriods)) {
    
                $this->logger->info("- Will only process the following periods: " . implode(", ", $this->shouldArchiveOnlySpecificPeriods) . " (--force-periods)");
    
            $this->invalidateArchivedReportsForSitesThatNeedToBeArchivedAgain();
    
    
            $websitesIds = $this->initWebsiteIds();
            $this->filterWebsiteIds($websitesIds);
    
            $this->websites = $this->createSitesToArchiveQueue($websitesIds);
    
    
            if ($this->websites->getInitialSiteIds() != $websitesIds) {
    
                $this->logger->info('Will ignore websites and help finish a previous started queue instead. IDs: ' . implode(', ', $this->websites->getInitialSiteIds()));
    
            /**
             * This event is triggered after a CronArchive instance is initialized.
             *
             * @param array $websiteIds The list of website IDs this CronArchive instance is processing.
    
    mattab's avatar
    mattab a validé
             *                          This will be the entire list of IDs regardless of whether some have
    
             *                          already been processed.
             */
            Piwik::postEvent('CronArchive.init.finish', array($this->websites->getInitialSiteIds()));
    
        }
    
        /**
         * Main function, runs archiving on all websites with new activity
         */
        public function run()
        {
            $timer = new Timer;
    
            $this->logSection("START");
    
            $this->logger->info("Starting Piwik reports archiving...");
    
    mattab's avatar
    mattab a validé
                $idSite = $this->websites->getNextSiteId();
    
    mattab's avatar
    mattab a validé
                if (null === $idSite) {
    
                flush();
                $requestsBefore = $this->requests;
    
    mattab's avatar
    mattab a validé
                if ($idSite <= 0) {
    
    mattab's avatar
    mattab a validé
                $skipWebsiteForced = in_array($idSite, $this->shouldSkipSpecifiedSites);
    
                    $this->logger->info("Skipped website id $idSite, found in --skip-idsites ");
    
                    $this->skipped++;
    
                $shouldCheckIfArchivingIsNeeded    = !$this->shouldArchiveSpecifiedSites && !$this->shouldArchiveAllSites && !$this->dateLastForced;
    
                $hasWebsiteDayFinishedSinceLastRun = in_array($idSite, $this->websiteDayHasFinishedSinceLastRun);
                $isOldReportInvalidatedForWebsite  = $this->isOldReportInvalidatedForWebsite($idSite);
    
                if ($shouldCheckIfArchivingIsNeeded) {
                    // if not specific sites and not all websites should be archived, we check whether we actually have
                    // to process the archives for this website (only if there were visits since midnight)
                    if (!$hasWebsiteDayFinishedSinceLastRun && !$isOldReportInvalidatedForWebsite) {
    
    
                        if ($this->isWebsiteUsingTheTracker($idSite)) {
    
                            if(!$this->hadWebsiteTrafficSinceMidnightInTimezone($idSite)) {
                                $this->logger->info("Skipped website id $idSite as archiving is not needed");
    
                                $this->skippedDayNoRecentData++;
                                $this->skipped++;
                                continue;
                            }
                        } else {
                           $this->logger->info("- website id $idSite is not using the tracker");
    
                        $this->logger->info("Day has finished for website id $idSite since last run");
    
                    } elseif ($isOldReportInvalidatedForWebsite) {
    
                        $this->logger->info("Old report was invalidated for website id $idSite");
    
                /**
                 * This event is triggered before the cron archiving process starts archiving data for a single
                 * site.
                 *
                 * @param int $idSite The ID of the site we're archiving data for.
                 */
    
    mattab's avatar
    mattab a validé
                Piwik::postEvent('CronArchive.archiveSingleSite.start', array($idSite));
    
    mattab's avatar
    mattab a validé
                $completed = $this->archiveSingleSite($idSite, $requestsBefore);
    
                /**
                 * This event is triggered immediately after the cron archiving process starts archiving data for a single
                 * site.
                 *
                 * @param int $idSite The ID of the site we're archiving data for.
                 */
    
    mattab's avatar
    mattab a validé
                Piwik::postEvent('CronArchive.archiveSingleSite.finish', array($idSite, $completed));
            } while (!empty($idSite));
    
            $this->logger->info("Done archiving!");
    
            $this->logger->info("Total visits for today across archived websites: " . $this->visitsToday);
    
    
            $totalWebsites = count($this->allWebsites);
    
            $this->skipped = $totalWebsites - $this->websitesWithVisitsSinceLastRun;
    
            $this->logger->info("Archived today's reports for {$this->websitesWithVisitsSinceLastRun} websites");
            $this->logger->info("Archived week/month/year for {$this->archivedPeriodsArchivesWebsite} websites");
    
            $this->logger->info("Skipped {$this->skipped} websites");
    
            $this->logger->info("- {$this->skippedDayNoRecentData} skipped because no new visit since the last script execution");
            $this->logger->info("- {$this->skippedDayArchivesWebsites} skipped because existing daily reports are less than {$this->todayArchiveTimeToLive} seconds old");
            $this->logger->info("- {$this->skippedPeriodsArchivesWebsite} skipped because existing week/month/year periods reports are less than {$this->processPeriodsMaximumEverySeconds} seconds old");
    
    
            if($this->skippedPeriodsNoDataInPeriod) {
    
                $this->logger->info("- {$this->skippedPeriodsNoDataInPeriod} skipped periods archiving because no visit in recent days");
    
            }
    
            if($this->skippedDayOnApiError) {
    
                $this->logger->info("- {$this->skippedDayOnApiError} skipped because got an error while querying reporting API");
    
            $this->logger->info("Total API requests: {$this->requests}");
    
    
            //DONE: done/total, visits, wtoday, wperiods, reqs, time, errors[count]: first eg.
    
            $percent = $this->websites->getNumSites() == 0
    
                : " " . round($this->processed * 100 / $this->websites->getNumSites(), 0) . "%";
    
            $this->logger->info("done: " .
    
                $this->processed . "/" . $this->websites->getNumSites() . "" . $percent . ", " .
    
                $this->visitsToday . " vtoday, $this->websitesWithVisitsSinceLastRun wtoday, {$this->archivedPeriodsArchivesWebsite} wperiods, " .
    
                $this->requests . " req, " . round($timer->getTimeMs()) . " ms, " .
                (empty($this->errors)
    
                    : (count($this->errors) . " errors."))
    
            $this->logger->info($timer->__toString());
    
        }
    
        /**
         * End of the script
         */
        public function end()
        {
    
            /**
             * This event is triggered after archiving.
             *
             * @param CronArchive $this
             */
    
            Piwik::postEvent('CronArchive.end', array($this));
    
                // No error -> Logs the successful script execution until completion
                Option::set(self::OPTION_ARCHIVING_FINISHED_TS, time());
    
            $this->logSection("SUMMARY OF ERRORS");
            foreach ($this->errors as $error) {
                // do not logError since errors are already in stderr
    
                $this->logger->info("Error: " . $error);
    
            $summary = count($this->errors) . " total errors during this script execution, please investigate and try and fix these errors.";
            $this->logFatalError($summary);
    
        /**
         * @param int[] $idSegments
         */
        public function setSegmentsToForceFromSegmentIds($idSegments)
        {
            /** @var SegmentEditorModel $segmentEditorModel */
            $segmentEditorModel = StaticContainer::get('Piwik\Plugins\SegmentEditor\Model');
            $segments = $segmentEditorModel->getAllSegmentsAndIgnoreVisibility();
    
            $segments = array_filter($segments, function ($segment) use ($idSegments) {
                return in_array($segment['idsegment'], $idSegments);
            });
    
            $segments = array_map(function ($segment) {
                return $segment['definition'];
            }, $segments);
    
            $this->segmentsToForce = $segments;
        }
    
    
        public function runScheduledTasks()
        {
            $this->logSection("SCHEDULED TASKS");
    
                $this->logger->info("Scheduled tasks are disabled with --disable-scheduled-tasks");
    
            // TODO: this is a HACK to get the purgeOutdatedArchives task to work when run below. without
            //       it, the task will not run because we no longer run the tasks through CliMulti.
            //       harder to implement alternatives include:
            //       - moving CronArchive logic to DI and setting a flag in the class when the whole process
            //         runs
            //       - setting a new DI environment for core:archive which CoreAdminHome can use to conditionally
            //         enable/disable the task
            $_GET['trigger'] = 'archivephp';
    
            CoreAdminHomeAPI::getInstance()->runScheduledTasks();
    
    mattab's avatar
    mattab a validé
        private function archiveSingleSite($idSite, $requestsBefore)
    
        {
            $timerWebsite = new Timer;
    
            $lastTimestampWebsiteProcessedPeriods = $lastTimestampWebsiteProcessedDay = false;
    
            if ($this->archiveAndRespectTTL) {
    
    mattab's avatar
    mattab a validé
                Option::clearCachedOption($this->lastRunKey($idSite, "periods"));
    
                $lastTimestampWebsiteProcessedPeriods = $this->getPeriodLastProcessedTimestamp($idSite);
    
    mattab's avatar
    mattab a validé
                Option::clearCachedOption($this->lastRunKey($idSite, "day"));
    
                $lastTimestampWebsiteProcessedDay = $this->getDayLastProcessedTimestamp($idSite);
    
            }
    
            $this->updateIdSitesInvalidatedOldReports();
    
            // For period other than days, we only re-process the reports at most
            // 1) every $processPeriodsMaximumEverySeconds
            $secondsSinceLastExecution = time() - $lastTimestampWebsiteProcessedPeriods;
    
            // if timeout is more than 10 min, we account for a 5 min processing time, and allow trigger 1 min earlier
            if ($this->processPeriodsMaximumEverySeconds > 10 * 60) {
                $secondsSinceLastExecution += 5 * 60;
            }
    
            $shouldArchivePeriods = $secondsSinceLastExecution > $this->processPeriodsMaximumEverySeconds;
            if (empty($lastTimestampWebsiteProcessedPeriods)) {
                // 2) OR always if script never executed for this website before
                $shouldArchivePeriods = true;
            }
    
            // (*) If the website is archived because it is a new day in its timezone
            // We make sure all periods are archived, even if there is 0 visit today
    
    mattab's avatar
    mattab a validé
            $dayHasEndedMustReprocess = in_array($idSite, $this->websiteDayHasFinishedSinceLastRun);
    
            if ($dayHasEndedMustReprocess) {
                $shouldArchivePeriods = true;
            }
    
            // (*) If there was some old reports invalidated for this website
            // we make sure all these old reports are triggered at least once
    
            $websiteInvalidatedShouldReprocess = $this->isOldReportInvalidatedForWebsite($idSite);
    
            if ($websiteInvalidatedShouldReprocess) {
    
                $shouldArchivePeriods = true;
            }
    
    
    mattab's avatar
    mattab a validé
            $websiteIdIsForced = in_array($idSite, $this->shouldArchiveSpecifiedSites);
    
                $shouldArchivePeriods = true;
            }
    
            // Test if we should process this website at all
            $elapsedSinceLastArchiving = time() - $lastTimestampWebsiteProcessedDay;
    
            // Skip this day archive if last archive was older than TTL
            $existingArchiveIsValid = ($elapsedSinceLastArchiving < $this->todayArchiveTimeToLive);
    
            $skipDayArchive = $existingArchiveIsValid;
    
            // Invalidate old website forces the archiving for this site
    
            $skipDayArchive = $skipDayArchive && !$websiteInvalidatedShouldReprocess;
    
    
            // Also reprocess when day has ended since last run
            if ($dayHasEndedMustReprocess
    
    diosmosis's avatar
    diosmosis a validé
                // it might have reprocessed for that day by another cron
    
    mattab's avatar
    mattab a validé
                && !$this->hasBeenProcessedSinceMidnight($idSite, $lastTimestampWebsiteProcessedDay)
    
                && !$existingArchiveIsValid) {
                $skipDayArchive = false;
            }
    
            if ($websiteIdIsForced) {
                $skipDayArchive = false;
            }
    
    
            if ($skipDayArchive) {
    
                $this->logger->info("Skipped website id $idSite, already done "
    
                    . $this->formatter->getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true)
    
                    . " ago, " . $timerWebsite->__toString());
                $this->skippedDayArchivesWebsites++;
                $this->skipped++;
                return false;
            }
    
    
            /**
             * Trigger archiving for days
             */
    
            try {
                $shouldProceed = $this->processArchiveDays($idSite, $lastTimestampWebsiteProcessedDay, $shouldArchivePeriods, $timerWebsite);
    
            } catch (UnexpectedWebsiteFoundException $e) {
    
                // this website was deleted in the meantime
                $shouldProceed = false;
    
                $this->logger->info("Skipped website id $idSite, got: UnexpectedWebsiteFoundException, " . $timerWebsite->__toString());
    
                return false;
            }
    
            if (!$shouldArchivePeriods) {
    
                $this->logger->info("Skipped website id $idSite periods processing, already done "
    
                    . $this->formatter->getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true)
    
                    . " ago, " . $timerWebsite->__toString());
    
                $this->skippedPeriodsArchivesWebsite++;
    
                $this->skipped++;
                return false;
            }
    
    
            /**
             * Trigger archiving for non-day periods
             */
    
            $success = $this->processArchiveForPeriods($idSite, $lastTimestampWebsiteProcessedPeriods);
    
            // Record successful run of this website's periods archiving
    
    mattab's avatar
    mattab a validé
                Option::set($this->lastRunKey($idSite, "periods"), time());
    
            if (!$success) {
    
                // cancel marking the site as reprocessed
    
                if ($websiteInvalidatedShouldReprocess) {
    
                    $store = new SitesToReprocessDistributedList();
                    $store->add($idSite);
    
            $this->archivedPeriodsArchivesWebsite++;
    
            $requestsWebsite = $this->requests - $requestsBefore;
    
            $this->logger->info("Archived website id = $idSite, "
    
                . $requestsWebsite . " API requests, "
    
                . $timerWebsite->__toString()
                . " [" . $this->websites->getNumProcessedWebsites() . "/"
                . $this->websites->getNumSites()
                . " done]");
    
            return true;
        }
    
    
        /**
         * @param $idSite
         * @param $lastTimestampWebsiteProcessedPeriods
         * @return bool
         */
        private function processArchiveForPeriods($idSite, $lastTimestampWebsiteProcessedPeriods)
        {
            $success = true;
    
            foreach (array('week', 'month', 'year') as $period) {
                if (!$this->shouldProcessPeriod($period)) {
                    // if any period was skipped, we do not mark the Periods archiving as successful
                    $success = false;
                    continue;
                }
    
    
                $timer = new Timer();
    
    
                $date = $this->getApiDateParameter($idSite, $period, $lastTimestampWebsiteProcessedPeriods);
    
                $periodArchiveWasSuccessful = $this->archiveReportsFor($idSite, $period, $date, $archiveSegments = true, $timer);
    
                $success = $periodArchiveWasSuccessful && $success;
            }
    
    
            if ($this->shouldProcessPeriod('range')) {
                // period=range
                $customDateRangesToPreProcessForSite = $this->getCustomDateRangeToPreProcess($idSite);
                foreach ($customDateRangesToPreProcessForSite as $dateRange) {
    
                    $timer = new Timer();
    
                    $archiveSegments = false; // do not pre-process segments for period=range #7611
    
                    $periodArchiveWasSuccessful = $this->archiveReportsFor($idSite, 'range', $dateRange, $archiveSegments, $timer);
    
                    $success = $periodArchiveWasSuccessful && $success;
                }
    
    mattab's avatar
    mattab a validé
         * Returns base URL to process reports for the $idSite on a given $period
    
    Emir Beganovic's avatar
    Emir Beganovic a validé
         *
         * @param string $idSite
         * @param string $period
         * @param string $date
         * @param bool|false $segment
         * @return string
    
        private function getVisitsRequestUrl($idSite, $period, $date, $segment = false)
    
            $request = "?module=API&method=API.get&idSite=$idSite&period=$period&date=" . $date . "&format=php";
    
            if ($segment) {
                $request .= '&segment=' . urlencode($segment);
                ;
    
        }
    
        private function initSegmentsToArchive()
        {
    
            $segments = \Piwik\SettingsPiwik::getKnownSegmentsToArchive();
    
            if (empty($segments)) {
                return array();
            }
    
            $this->logger->info("- Will pre-process " . count($segments) . " Segments for each website and each period: " . implode(", ", $segments));
    
        /**
         * @param $idSite
    
         * @param $lastTimestampWebsiteProcessedDay
    
         * @param $shouldArchivePeriods
    
         * @param $timerWebsite
         * @return bool
         */
    
        protected function processArchiveDays($idSite, $lastTimestampWebsiteProcessedDay, $shouldArchivePeriods, Timer $timerWebsite)
    
    mattab's avatar
    mattab a validé
            if (!$this->shouldProcessPeriod("day")) {
                // skip day archiving and proceed to period processing
                return true;
            }
    
    
            $timer = new Timer();
    
    
            // Fake that the request is already done, so that other core:archive commands
            // running do not grab the same website from the queue
            Option::set($this->lastRunKey($idSite, "day"), time());
    
            // Remove this website from the list of websites to be invalidated
            // since it's now just about to being re-processed, makes sure another running cron archiving process
            // does not archive the same idSite
    
            $websiteInvalidatedShouldReprocess = $this->isOldReportInvalidatedForWebsite($idSite);
            if ($websiteInvalidatedShouldReprocess) {
    
                $store = new SitesToReprocessDistributedList();
                $store->remove($idSite);
    
            }
    
            // when some data was purged from this website
            // we make sure we query all previous days/weeks/months
            $processDaysSince = $lastTimestampWebsiteProcessedDay;
    
            if ($websiteInvalidatedShouldReprocess
    
                // when --force-all-websites option,
                // also forces to archive last52 days to be safe
                || $this->shouldArchiveAllSites) {
                $processDaysSince = false;
            }
    
    
            $date = $this->getApiDateParameter($idSite, "day", $processDaysSince);
            $url = $this->getVisitsRequestUrl($idSite, "day", $date);
    
            $content = $this->request($url);
    
            $daysResponse = @unserialize($content);
    
    
            if (empty($content)
    
                || !is_array($daysResponse)
                || count($daysResponse) == 0
    
                // cancel the successful run flag
    
                Option::set($this->lastRunKey($idSite, "day"), 0);
    
    
                // cancel marking the site as reprocessed
    
                if ($websiteInvalidatedShouldReprocess) {
    
                    $store = new SitesToReprocessDistributedList();
                    $store->add($idSite);
    
                $this->logError("Empty or invalid response '$content' for website id $idSite, " . $timerWebsite->__toString() . ", skipping");
    
                $this->skippedDayOnApiError++;
    
                $this->skipped++;
                return false;
            }
    
    
            $visitsToday = $this->getVisitsLastPeriodFromApiResponse($daysResponse);
            $visitsLastDays = $this->getVisitsFromApiResponse($daysResponse);
    
            $this->requests++;
            $this->processed++;
    
            // If there is no visit today and we don't need to process this website, we can skip remaining archives
    
            if (
                0 == $visitsToday
    
                && !$shouldArchivePeriods
            ) {
    
                $this->logger->info("Skipped website id $idSite, no visit today, " . $timerWebsite->__toString());
    
                $this->skippedDayNoRecentData++;
    
                $this->skipped++;
                return false;
            }
    
    
                && !$shouldArchivePeriods
                && $this->shouldArchiveAllSites
            ) {
    
                $humanReadableDate = $this->formatReadableDateRange($date);
                $this->logger->info("Skipped website id $idSite, no visits in the $humanReadableDate days, " . $timerWebsite->__toString());
    
                $this->skippedPeriodsNoDataInPeriod++;
    
                $this->skipped++;
                return false;
            }
    
            $this->visitsToday += $visitsToday;
            $this->websitesWithVisitsSinceLastRun++;
    
    mattab's avatar
    mattab a validé
            $this->archiveReportsFor($idSite, "day", $this->getApiDateParameter($idSite, "day", $processDaysSince), $archiveSegments = true, $timer, $visitsToday, $visitsLastDays);
    
    Emir Beganovic's avatar
    Emir Beganovic a validé
        /**
         * @param $idSite
         * @return array
         */
    
        private function getSegmentsForSite($idSite)
    
        {
            $segmentsAllSites = $this->segments;
    
            $segmentsThisSite = SettingsPiwik::getKnownSegmentsToArchiveForSite($idSite);
    
            $segments = array_unique(array_merge($segmentsAllSites, $segmentsThisSite));
            return $segments;
        }
    
    
        private function formatReadableDateRange($date)
        {
            if (0 === strpos($date, 'last')) {
                $readable = 'last ' . str_replace('last', '', $date);
            } elseif (0 === strpos($date, 'previous')) {
                $readable = 'previous ' . str_replace('previous', '', $date);
            } else {
                $readable = 'last ' . $date;
            }
    
            return $readable;
        }
    
    
    mattab's avatar
    mattab a validé
         * Will trigger API requests for the specified Website $idSite,
    
         * for the specified $period, for all segments that are pre-processed for this website.
         * Requests are triggered using cURL multi handle
         *
    
    mattab's avatar
    mattab a validé
         * @param $idSite int
    
    mattab's avatar
    mattab a validé
         * @param $period string
         * @param $date string
    
         * @param $archiveSegments bool Whether to pre-process all custom segments
    
         * @param Timer $periodTimer
    
    mattab's avatar
    mattab a validé
         * @param $visitsToday int Visits for the "day" period of today
         * @param $visitsLastDays int Visits for the last N days periods
    
         * @return bool True on success, false if some request failed
         */
    
    mattab's avatar
    mattab a validé
        private function archiveReportsFor($idSite, $period, $date, $archiveSegments, Timer $periodTimer, $visitsToday = 0, $visitsLastDays = 0)
    
            $url = $this->getVisitsRequestUrl($idSite, $period, $date, $segment = false);
            $url = $this->makeRequestUrl($url);
    
    mattab's avatar
    mattab a validé
            $visitsInLastPeriod = $visitsToday;
            $visitsInLastPeriods = $visitsLastDays;
    
            // already processed above for "day"
            if ($period != "day") {
    
                $this->logArchiveWebsite($idSite, $period, $date);
    
            $segmentRequestsCount = 0;
    
            if ($archiveSegments) {
    
                $urlsWithSegment = $this->getUrlsWithSegment($idSite, $period, $date);
                $urls = array_merge($urls, $urlsWithSegment);
    
                $segmentRequestsCount = count($urlsWithSegment);
    
                // in case several segment URLs for period=range had the date= rewritten to the same value, we only call API once
                $urls = array_unique($urls);
    
            $cliMulti->setConcurrentProcessesLimit($this->getConcurrentRequestsPerWebsite());
    
            $response = $cliMulti->request($urls);
    
            foreach ($urls as $index => $url) {
    
                $content = array_key_exists($index, $response) ? $response[$index] : null;
    
                $success = $success && $this->checkResponse($content, $url);
    
                if ($noSegmentUrl === $url && $success) {
    
                    $stats = @unserialize($content);
                    if (!is_array($stats)) {
                        $this->logError("Error unserializing the following response from $url: " . $content);
                    }
    
                    if ($period == 'range') {
    
                        // range returns one dataset (the sum of data between the two dates),
                        // whereas other periods return lastN which is N datasets in an array. Here we make our period=range dataset look like others:
                        $stats = array($stats);
                    }
    
    
                    $visitsInLastPeriods = $this->getVisitsFromApiResponse($stats);
    
    mattab's avatar
    mattab a validé
                    $visitsInLastPeriod = $this->getVisitsLastPeriodFromApiResponse($stats);
    
    mattab's avatar
    mattab a validé
            $this->logArchivedWebsite($idSite, $period, $date, $segmentRequestsCount, $visitsInLastPeriods, $visitsInLastPeriod, $periodTimer);
    
            return $success;
        }
    
        /**
         * Logs a section in the output
    
    Emir Beganovic's avatar
    Emir Beganovic a validé
         *
         * @param string $title
    
         */
        private function logSection($title = "")
        {
    
            $this->logger->info("---------------------------");
    
                $this->logger->info($title);
    
        public function logError($m)
        {
            if (!defined('PIWIK_ARCHIVE_NO_TRUNCATE')) {
                $m = substr($m, 0, self::TRUNCATE_ERROR_MESSAGE_SUMMARY);
            }
            $m = str_replace(array("\n", "\t"), " ", $m);
            $this->errors[] = $m;
    
            $this->logger->error($m);
    
        }
    
        private function logNetworkError($url, $response)
        {
            $message = "Got invalid response from API request: $url. ";
            if (empty($response)) {
    
                $message .= "The response was empty. This usually means a server error. A solution to this error is generally to increase the value of 'memory_limit' in your php.ini file. ";
    
                if($this->makeCliMulti()->supportsAsync()) {
                    $message .= " For more information and the error message please check in your PHP CLI error log file. As this core:archive command triggers PHP processes over the CLI, you can find where PHP CLI logs are stored by running this command: php -i | grep error_log";
                } else {
                    $message .= " For more information and the error message please check your web server's error Log file. As this core:archive command triggers PHP processes over HTTP, you can find the error message in your Piwik's web server error logs. ";
                }
    
            } else {
                $message .= "Response was '$response'";
            }
    
    mattab's avatar
    mattab a validé
    
    
         * Issues a request to $url eg. "?module=API&method=API.getDefaultMetricTranslations&format=original&serialize=1"
         *
    
    Emir Beganovic's avatar
    Emir Beganovic a validé
         * @param string $url
         * @return string
    
         */
        private function request($url)
        {
    
            $url = $this->makeRequestUrl($url);