Skip to content
Extraits de code Groupes Projets
archive.php 36,3 ko
Newer Older
mattpiwik's avatar
mattpiwik a validé
<?php
use Piwik\ArchiveProcessor\Rules;
mattab's avatar
mattab a validé
use Piwik\Config;
use Piwik\Piwik;
use Piwik\Common;
use Piwik\FrontController;
use Piwik\Http;
use Piwik\Version;
use Piwik\Url;
use Piwik\Timer;
mattpiwik's avatar
mattpiwik a validé
$USAGE = "
mattpiwik's avatar
 
mattpiwik a validé
Usage: 
	/path/to/cli/php \"" . @$_SERVER['argv'][0] . "\" --url=http://your-website.org/path/to/piwik/ [arguments]
mattab's avatar
mattab a validé

mattpiwik's avatar
 
mattpiwik a validé
Arguments:
mattpiwik's avatar
mattpiwik a validé
	--url=[piwik-server-url]
			Mandatory argument. Must be set to the Piwik base URL. 
			For example: --url=http://analytics.example.org/ or --url=https://example.org/piwik/ 
mattpiwik's avatar
 
mattpiwik a validé
	--force-all-websites
			If specified, the script will trigger archiving on all websites.
			This can be used along with --force-all-periods to trigger archiving on all websites and all periods.
	--force-all-periods[=seconds]
			Triggers archiving on websites with some traffic in the last [seconds] seconds.
			[seconds] defaults to 604800 which is 7 days.
			For example: --force-all-periods=86400 will archive websites that had visits in the last 24 hours.
	--force-timeout-for-periods=[seconds]
			The current week/ current month/ current year will be processed at most every [seconds].
			If not specified, defaults to 3600.
mattpiwik's avatar
mattpiwik a validé
	--accept-invalid-ssl-certificate
			It is _NOT_ recommended to use this argument. Instead, you should use a valid SSL certificate!
			It can be useful if you specified --url=https://... or if you are using Piwik with force_ssl=1
mattpiwik's avatar
 
mattpiwik a validé
	--help
			Displays usage
mattpiwik's avatar
mattpiwik a validé

mattpiwik's avatar
mattpiwik a validé
Notes:
mattpiwik's avatar
mattpiwik a validé
	* It is recommended to run the script with the argument --url=[piwik-server-url] only. Other arguments are not required. 
mattpiwik's avatar
mattpiwik a validé
	* This script should be executed every hour via crontab, or as a deamon.
	* You can also run it via http:// by specifying the Super User &token_auth=XYZ as a parameter ('Web Cron'), 
	  but it is recommended to run it via command line/CLI instead.
	* If you use Piwik to track dozens/hundreds of websites, please let the team know at hello@piwik.org
	  it makes us happy to learn successful user stories :)
	* Enjoy!
mattpiwik's avatar
mattpiwik a validé
";
mattpiwik's avatar
mattpiwik a validé
/*
Ideas for improvements:
	- Feature request: Add option to log completion even with errors: archive_script_ignore_errors = 0
	- Known bug: when adding new segments to preprocess, script will assume that data was processed for this segment in the past
	- Document: how to run the script as a daemon for near real time / constant processing
	- The script can be executed multiple times in parrallel but with known issues:
	  - scheduled task could send multiple reports 
	  - there is no documentation
	  - it does not work well with --force-all-periods etc.
	- Possible performance improvement: Run first websites which are faster to process (weighted by visits and/or time to generate the last daily report)
	  This would make sure that huge websites do not 'block' processing of smaller websites' reports.  
	- Core: check that on first day of month, if request last month from UI, 
	  it returns last temporary monthly report generated, if the last month haven't yet been processed / finalized
 */
define('PIWIK_INCLUDE_PATH', realpath(dirname(__FILE__) . "/../.."));
mattpiwik's avatar
mattpiwik a validé
define('PIWIK_USER_PATH', PIWIK_INCLUDE_PATH);
define('PIWIK_ENABLE_DISPATCH', false);
define('PIWIK_ENABLE_ERROR_HANDLER', false);
define('PIWIK_ENABLE_SESSION_START', false);
mattab's avatar
mattab a validé
if(!defined('PIWIK_MODE_ARCHIVE')) {
    define('PIWIK_MODE_ARCHIVE', true);
}
mattpiwik's avatar
mattpiwik a validé
require_once PIWIK_INCLUDE_PATH . "/index.php";
require_once PIWIK_INCLUDE_PATH . "/core/API/Request.php";

mattpiwik's avatar
mattpiwik a validé
try {
    $archiving = new Archiving;
    $archiving->init();
    $archiving->run();
    $archiving->end();
} catch (Exception $e) {
    $archiving->logFatalError($e->getMessage());
mattpiwik's avatar
mattpiwik a validé
}

mattpiwik's avatar
mattpiwik a validé
class Archiving
{
    const OPTION_ARCHIVING_FINISHED_TS = "LastCompletedFullArchiving";
    const TRUNCATE_ERROR_MESSAGE_SUMMARY = 400;

    // Seconds window to look back to define "active websites" to archive on the first archive.php script execution
mattab's avatar
mattab a validé
    private $firstRunActiveWebsitesWithTraffic = 604800; // 7 days

    // By default, we only process the current week/month/year at most once an hour
mattab's avatar
mattab a validé
    private $processPeriodsMaximumEverySeconds = 3600;

    private $websiteDayHasFinishedSinceLastRun = array();
    private $idSitesInvalidatedOldReports = array();
    private $piwikUrl = false;
    private $token_auth = false;
    private $visits = 0;
    private $requests = 0;
    private $output = '';
    private $shouldResetState = false;
    private $shouldArchiveAllWebsites = false;
    private $acceptInvalidSSLCertificate = false;
    /**
     * By default, will process last 52 days/weeks/months/year.
     * It will be overwritten by the number of days since last archiving ran until completion.
     */
    const DEFAULT_DATE_LAST = 52;
    // Since weeks are not used in yearly archives, we make sure that all possible weeks are processed
    const DEFAULT_DATE_LAST_WEEKS = 520;

mattab's avatar
mattab a validé
    private $timeLastCompleted = false;
    private $requestPrepend = '&trigger=archivephp';
    private $errors = array();

    public function init()
    {
        $this->initCore();
        $this->initTokenAuth();
        $this->initCheckCli();
        $this->initLog();
        $this->displayHelp();
        $this->initPiwikHost();
        $this->initStateFromParameters();
        Piwik::setUserIsSuperUser(true);

        $this->logSection("INIT");
        $this->log("Querying Piwik API at: {$this->piwikUrl}");
        $this->log("Running Piwik " . Version::VERSION . " as Super User: " . $this->login);

        $this->acceptInvalidSSLCertificate = $this->isParameterSet("accept-invalid-ssl-certificate");

        // Test the specified piwik URL is valid
        $response = $this->request("?module=API&method=API.getDefaultMetricTranslations&format=original&serialize=1");
        $responseUnserialized = @unserialize($response);
        if ($response === false
            || !is_array($responseUnserialized)
        ) {
            $this->logFatalError("The Piwik URL {$this->piwikUrl} does not seem to be pointing to a Piwik server. Response was '$response'.");
        }

        $this->log("Notes");
        // Information about timeout
        $this->todayArchiveTimeToLive = Rules::getTodayArchiveTimeToLive();
        $this->log("- Reports for today will be processed at most every " . $this->todayArchiveTimeToLive
            . " seconds. You can change this value in Piwik UI > Settings > General Settings.");
        $this->log("- Reports for the current week/month/year will be refreshed at most every "
            . $this->processPeriodsMaximumEverySeconds . " seconds.");
mattab's avatar
mattab a validé

        $this->initSegmentsToArchive();

        // Try and not request older data we know is already archived
        if ($this->timeLastCompleted !== false) {
            $dateLast = time() - $this->timeLastCompleted;
            $this->log("- Archiving was last executed without error " . Piwik::getPrettyTimeFromSeconds($dateLast, true, $isHtml = false) . " ago");
        }

        $this->initWebsitesToProcess();
        flush();
    }

    /**
     * Returns URL to process reports for the $idsite on a given period with no segment
     */
mattab's avatar
mattab a validé
    private function getVisitsRequestUrl($idsite, $period, $lastTimestampWebsiteProcessed = false)
    {
        if (empty($lastTimestampWebsiteProcessed)) {
            $dateLast = self::DEFAULT_DATE_LAST;
            if ($period == 'week') {
                $dateLast = self::DEFAULT_DATE_LAST_WEEKS;
            }
        } else {
            // Enforcing last2 at minimum to work around timing issues and ensure we make most archives available
            $dateLast = floor((time() - $lastTimestampWebsiteProcessed) / 86400) + 2;
            if ($dateLast > self::DEFAULT_DATE_LAST) {
                $dateLast = self::DEFAULT_DATE_LAST;
            }
        }
        return "?module=API&method=VisitsSummary.getVisits&idSite=$idsite&period=$period&date=last" . $dateLast . "&format=php&token_auth=" . $this->token_auth;
    }

mattab's avatar
mattab a validé
    private function lastRunKey($idsite, $period)
        return Piwik::getArchiveCronLastRunOptionName($period, $idsite);
    }

    /**
     * Main function, runs archiving on all websites with new activity
     */
    public function run()
    {
        $websitesWithVisitsSinceLastRun =
        $skippedPeriodsArchivesWebsite =
        $skippedDayArchivesWebsites =
        $skipped =
        $processed =
        $archivedPeriodsArchivesWebsite = 0;

        $this->logSection("START");
        $this->log("Starting Piwik reports archiving...");

        $this->websites = array_unique($this->websites);
        foreach ($this->websites as $idsite) {
            flush();
            $requestsBefore = $this->requests;
            if ($idsite <= 0) {
                continue;
            }


            $lastTimestampWebsiteProcessedPeriods = $lastTimestampWebsiteProcessedDay = false;
            if (!$this->shouldResetState) {
                $lastTimestampWebsiteProcessedPeriods = Piwik_GetOption($this->lastRunKey($idsite, "periods"));
                $lastTimestampWebsiteProcessedDay = Piwik_GetOption($this->lastRunKey($idsite, "day"));
            }

            // For period other than days, we only re-process the reports at most
            // 1) every $processPeriodsMaximumEverySeconds
            $secondsSinceLastExecution = time() - $lastTimestampWebsiteProcessedPeriods;

            // if timeout is more than 10 min, we account for a 5 min processing time, and allow trigger 1 min earlier
            if ($this->processPeriodsMaximumEverySeconds > 10 * 60) {
                $secondsSinceLastExecution += 5 * 60;
            }
            $shouldArchivePeriods = $secondsSinceLastExecution > $this->processPeriodsMaximumEverySeconds;
            if (empty($lastTimestampWebsiteProcessedPeriods)) {
                // 2) OR always if script never executed for this website before
                $shouldArchivePeriods = true;
            }


            // (*) If the website is archived because it is a new day in its timezone
            // We make sure all periods are archived, even if there is 0 visit today
            $dayHasEndedMustReprocess = in_array($idsite, $this->websiteDayHasFinishedSinceLastRun);
            if ($dayHasEndedMustReprocess) {
                $shouldArchivePeriods = true;
            }

            // (*) If there was some old reports invalidated for this website
            // we make sure all these old reports are triggered at least once
            $websiteIsOldDataInvalidate = in_array($idsite, $this->idSitesInvalidatedOldReports);
            if ($websiteIsOldDataInvalidate) {
                $shouldArchivePeriods = true;
            }

            // Test if we should process this website at all
            $elapsedSinceLastArchiving = time() - $lastTimestampWebsiteProcessedDay;
            if (!$websiteIsOldDataInvalidate // Invalidate old website forces the archiving for this site
                && !$dayHasEndedMustReprocess // Also reprocess when day has ended since last run
                && $elapsedSinceLastArchiving < $this->todayArchiveTimeToLive
            ) {
                $this->log("Skipped website id $idsite, already processed today's report in recent run, "
                    . Piwik::getPrettyTimeFromSeconds($elapsedSinceLastArchiving, true, $isHtml = false)
                    . " ago, " . $timerWebsite->__toString());
                $skippedDayArchivesWebsites++;
                $skipped++;
                continue;
            }

            // Fake that the request is already done, so that other archive.php
            // running do not grab the same website from the queue
            Piwik_SetOption($this->lastRunKey($idsite, "day"), time());

            $url = $this->getVisitsRequestUrl($idsite, "day",
                // when some data was purged from this website
                // we make sure we query all previous days/weeks/months
                ($websiteIsOldDataInvalidate
                    // when --force-all-websites option,
                    // also forces to archive last52 days to be safe
                    || $this->shouldArchiveAllWebsites)
                    ? false
                    : $lastTimestampWebsiteProcessedDay
            );
            $content = $this->request($url);
            $response = @unserialize($content);

            if (empty($content)
                || !is_array($response)
                || count($response) == 0
            ) {
                // cancel the succesful run flag
                Piwik_SetOption($this->lastRunKey($idsite, "day"), 0);

                $this->log("WARNING: Empty or invalid response '$content' for website id $idsite, " . $timerWebsite->__toString() . ", skipping");
                $skipped++;
                continue;
            }
            $visitsToday = end($response);
            $this->requests++;
            $processed++;

            // If there is no visit today and we don't need to process this website, we can skip remaining archives
                && !$shouldArchivePeriods
            ) {
                $this->log("Skipped website id $idsite, no visit today, " . $timerWebsite->__toString());
                $skipped++;
                continue;
            }

            $visitsAllDays = array_sum($response);
            if ($visitsAllDays == 0
                && !$shouldArchivePeriods
                && $this->shouldArchiveAllWebsites
            ) {
                $this->log("Skipped website id $idsite, no visits in the last " . count($response) . " days, " . $timerWebsite->__toString());
                $skipped++;
                continue;
            }
            $this->visits += $visitsToday;
            $websitesWithVisitsSinceLastRun++;
            $this->archiveVisitsAndSegments($idsite, "day", $lastTimestampWebsiteProcessedDay, $timerWebsite);

            if ($shouldArchivePeriods) {
                $success = true;
                foreach (array('week', 'month', 'year') as $period) {
                    $success = $this->archiveVisitsAndSegments($idsite, $period, $lastTimestampWebsiteProcessedPeriods) && $success;
                }
                // Record succesful run of this website's periods archiving
                if ($success) {
                    Piwik_SetOption($this->lastRunKey($idsite, "periods"), time());

                    // Remove this website from the list of websites to be invalidated
                    // since it's now just been re-processing the reports, job is done!
                    if ($websiteIsOldDataInvalidate) {
                        $websiteIdsInvalidated = Piwik_CoreAdminHome_API::getWebsiteIdsToInvalidate();
                        if (count($websiteIdsInvalidated)) {
                            $found = array_search($idsite, $websiteIdsInvalidated);
                            if ($found !== false) {
                                unset($websiteIdsInvalidated[$found]);
mattpiwik's avatar
mattpiwik a validé
//								$this->log("Websites left to invalidate: " . implode(", ", $websiteIdsInvalidated));
                                Piwik_SetOption(Piwik_CoreAdminHome_API::OPTION_INVALIDATED_IDSITES, serialize($websiteIdsInvalidated));
                            }
                        }
                    }
                }
            }
            $archivedPeriodsArchivesWebsite++;

            $requestsWebsite = $this->requests - $requestsBefore;
            $debug = $this->shouldArchiveAllWebsites ? ", last days = $visitsAllDays visits" : "";
            Piwik::log("Archived website id = $idsite, today = $visitsToday visits"
                . $debug . ", $requestsWebsite API requests, "
                . $timerWebsite->__toString()
                . " [" . ($websitesWithVisitsSinceLastRun + $skipped) . "/"
                . count($this->websites)
                . " done]");

        }

        $this->log("Done archiving!");

        $this->logSection("SUMMARY");
        $this->log("Total daily visits archived: " . $this->visits);

        $totalWebsites = count($this->allWebsites);
        $skipped = $totalWebsites - $websitesWithVisitsSinceLastRun;
        $this->log("Archived today's reports for $websitesWithVisitsSinceLastRun websites");
        $this->log("Archived week/month/year for $archivedPeriodsArchivesWebsite websites. ");
        $this->log("Skipped $skipped websites: no new visit since the last script execution");
        $this->log("Skipped $skippedDayArchivesWebsites websites day archiving: existing daily reports are less than {$this->todayArchiveTimeToLive} seconds old");
        $this->log("Skipped $skippedPeriodsArchivesWebsite websites week/month/year archiving: existing periods reports are less than {$this->processPeriodsMaximumEverySeconds} seconds old");
        $this->log("Total API requests: $this->requests");

        //DONE: done/total, visits, wtoday, wperiods, reqs, time, errors[count]: first eg.
        $percent = count($this->websites) == 0
            ? ""
            : " " . round($processed * 100 / count($this->websites), 0) . "%";
        $otherInParallel = $skippedDayArchivesWebsites;
        $this->log("done: " .
                $processed . "/" . count($this->websites) . "" . $percent . ", " .
                $this->visits . " v, $websitesWithVisitsSinceLastRun wtoday, $archivedPeriodsArchivesWebsite wperiods, " .
                $this->requests . " req, " . round($timer->getTimeMs()) . " ms, " .
                (empty($this->errors)
                    ? "no error"
                    : (count($this->errors) . " errors. eg. '" . reset($this->errors) . "'"))
        );
        $this->log($timer->__toString());
        $this->logSection("SCHEDULED TASKS");
        $this->log("Starting Scheduled tasks... ");

        $tasksOutput = $this->request("?module=API&method=CoreAdminHome.runScheduledTasks&format=csv&convertToUnicode=0&token_auth=" . $this->token_auth);
        if ($tasksOutput == "No data available") {
            $tasksOutput = " No task to run";
        }
        $this->log($tasksOutput);
        $this->log("done");
    }
mattab's avatar
mattab a validé

    private function initSegmentsToArchive()
    {
// Fetching segments to process
        $this->segments = Piwik_CoreAdminHome_API::getInstance()->getKnownSegmentsToArchive();
        if (empty($this->segments)) $this->segments = array();
        if (!empty($this->segments)) {
            $this->log("- Will pre-process " . count($this->segments) . " Segments for each website and each period: " . implode(", ", $this->segments));
        }
    }

    private function getSegmentsForSite($idsite)
    {
        $segmentsAllSites = $this->segments;
        $segmentsThisSite = Piwik::getKnownSegmentsToArchiveForSite($idsite);
        if (!empty($segmentsThisSite)) {
            $this->log("Will pre-process the following " . count($segmentsThisSite) . " Segments for this website (id = $idsite): " . implode(", ", $segmentsThisSite));
        }
        $segments = array_unique(array_merge($segmentsAllSites, $segmentsThisSite));
        return $segments;
    }

    /**
     * Archive visits and segments.
     *
     * @param $idsite int
     * @param $period
     * @param $lastTimestampWebsiteProcessed
     * @return bool True on success, false if some request failed
     */
    private function archiveVisitsAndSegments($idsite, $period, $lastTimestampWebsiteProcessed, Timer $timerWebsite = null)
        $aCurl = array();
        $mh = false;
        $url = $this->piwikUrl . $this->getVisitsRequestUrl($idsite, $period, $lastTimestampWebsiteProcessed) . $this->requestPrepend;

        // already processed above for "day"
        if ($period != "day") {
            $ch = $this->getNewCurlHandle($url);
            $aCurl[$url] = $ch;
            $this->requests++;
        }
        $urlNoSegment = $url;
mattab's avatar
mattab a validé
        foreach ($this->getSegmentsForSite($idsite) as $segment) {
            $segmentUrl = $url . '&segment=' . urlencode($segment);
            $ch = $this->getNewCurlHandle($segmentUrl);
            $aCurl[$segmentUrl] = $ch;
            $this->requests++;
        }

        $success = true;
        $visitsAllDaysInPeriod = false;

            // FIXME: This code used to execute multiple curl requests asynchronously. This caused
            // deadlocks since archive tables are locked for the entire archiving process. Moving back
            // to synchronous requests is a quick fix, but the locking mechanism can be changed to
            // only lock when getting the new archive ID. When that is done, this code should be changed
            // back to use asnychronous requests.
            foreach ($aCurl as $url => $ch) {
                $content = curl_exec($ch);
                $successResponse = $this->checkResponse($content, $url);
                $success = $successResponse && $success;
                if ($url == $urlNoSegment
                    && $successResponse
                ) {
                    $stats = unserialize($content);
                        $this->logError("Error unserializing the following response: " . $content);
                    }
                    $visitsAllDaysInPeriod = @array_sum($stats);
                }
mattpiwik's avatar
mattpiwik a validé
            }
mattpiwik's avatar
mattpiwik a validé
        }

        $this->log("Archived website id = $idsite, period = $period, "
            . ($period != "day" ? (int)$visitsAllDaysInPeriod . " visits, " : "")
            . (!empty($timerWebsite) ? $timerWebsite->__toString() : $timer->__toString()));
        return $success;
    }
    private function addCurlHandleToMulti(&$mh, $ch)
    {
        if (!$mh) {
            $mh = curl_multi_init();
        }
        curl_multi_add_handle($mh, $ch);
    }

    private function getNewCurlHandle($url)
    {
        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
mattpiwik's avatar
 
mattpiwik a validé

        if ($this->acceptInvalidSSLCertificate) {
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        }
        curl_setopt($ch, CURLOPT_USERAGENT, Http::getUserAgent());
        Http::configCurlCertificate($ch);
        return $ch;
    }


    /**
     * Logs a section in the output
     */
    private function logSection($title = "")
    {
        $this->log("---------------------------");
        $this->log($title);
    }

    /**
     * End of the script
     */
    public function end()
    {
        // How to test the error handling code?
        // - Generate some hits since last archive.php run
        // - Start the script, in the middle, shutdown apache, then restore
        // Some errors should be logged and script should successfully finish and then report the errors and trigger a PHP error
        if (!empty($this->errors)) {
            $this->logSection("SUMMARY OF ERRORS");

            foreach ($this->errors as $error) {
                $this->log("Error: " . $error);
            }
            $summary = count($this->errors) . " total errors during this script execution, please investigate and try and fix these errors";
            $this->log($summary);

            $summary .= '. First error was: ' . reset($this->errors);
            $this->logFatalError($summary);
        } else {
            // No error -> Logs the successful script execution until completion
            Piwik_SetOption(self::OPTION_ARCHIVING_FINISHED_TS, time());
        }
    }


    private function log($m)
    {
        $this->output .= $m . "\n";
        Piwik::log($m);
    }

    /**
     * Issues a request to $url
     */
mattab's avatar
mattab a validé
    private function request($url)
    {
        $url = $this->piwikUrl . $url . $this->requestPrepend;
        //$this->log($url);
        try {
            $response = Http::sendHttpRequestBy('curl', $url, $timeout = 300, $userAgent = null, $destinationPath = null, $file = null, $followDepth = 0, $acceptLanguage = false, $acceptInvalidSSLCertificate = $this->acceptInvalidSSLCertificate);
        } catch (Exception $e) {
            return $this->logNetworkError($url, $e->getMessage());
        }
        if ($this->checkResponse($response, $url)) {
            return $response;
        }
        return false;
    }

    private function checkResponse($response, $url)
    {
        if (empty($response)
            || stripos($response, 'error')
        ) {
            return $this->logNetworkError($url, $response);
        }
        return true;
    }

    private function logError($m)
    {
        $this->errors[] = substr($m, 0, self::TRUNCATE_ERROR_MESSAGE_SUMMARY);
        $this->log("ERROR: $m");
    }

    public function logFatalError($m, $backtrace = true)
    {
        $this->logError($m);
        $fe = fopen('php://stderr', 'w');
        fwrite($fe, "Error in the last Piwik archive.php run: \n" . $m
            . ($backtrace ? "\n\n Here is the full errors output:\n\n" . $this->output : '')
        );
        trigger_error($m, E_USER_ERROR);
        exit;
    }

    private function logNetworkError($url, $response)
    {
        $message = "Got invalid response from API request: $url. ";
        if (empty($response)) {
            $message .= "The response was empty. This usually means a server error. This solution to this error is generally to increase the value of 'memory_limit' in your php.ini file. Please check your Web server Error Log file for more details.";
        } else {
            $message .= "Response was '$response'";
        }
        $this->logError($message);
        return false;
    }

    /**
     * Displays script usage
     */
mattab's avatar
mattab a validé
    private function usage()
    {
        global $USAGE;
        $this->logLines($USAGE);
    }

    private function logLines($t)
    {
        foreach (explode(PHP_EOL, $t) as $line) {
            $this->log($line);
        }
    }

    private function initLog()
    {
        $config = Config::getInstance();
        $config->log['log_only_when_debug_parameter'] = 0;
        $config->log['logger_message'] = array("logger_message" => "screen");

        if (!function_exists("curl_multi_init")) {
            $this->log("ERROR: this script requires curl extension php_curl enabled in your CLI php.ini");
            $this->usage();
            exit;
        }
    }

    /**
     * Script does run on http:// ONLY if the SU token is specified
     */
    private function initCheckCli()
    {
        if (!Common::isPhpCliMode()) {
            $token_auth = Common::getRequestVar('token_auth', '', 'string');
            if ($token_auth != $this->token_auth
                || strlen($token_auth) != 32
            ) {
                die('<b>You must specify the Super User token_auth as a parameter to this script, eg. <code>?token_auth=XYZ</code> if you wish to run this script through the browser. </b><br>
mattpiwik's avatar
mattpiwik a validé
					However it is recommended to run it <a href="http://piwik.org/docs/setup-auto-archiving/">via cron in the command line</a>, since it can take a long time to run.<br/>
					In a shell, execute for example the following to trigger archiving on the local Piwik server:<br/>
mattpiwik's avatar
mattpiwik a validé
					<code>$ /path/to/php /path/to/piwik/misc/cron/archive.php --url=http://your-website.org/path/to/piwik/</code>');
            }
        }
    }

    /**
     * Init Piwik, connect DB, create log & config objects, etc.
     */
    private function initCore()
    {
        try {
            FrontController::getInstance()->init();
        } catch (Exception $e) {
            echo "ERROR: During Piwik init, Message: " . $e->getMessage();
            exit;
        }
    }

    private function displayHelp()
    {
        $displayHelp = $this->isParameterSet('help') || $this->isParameterSet('h');
        if ($displayHelp) {
            $this->usage();
            exit;
        }
    }

mattab's avatar
mattab a validé
    private function initStateFromParameters()
    {
        // Detect parameters
        $reset = $this->isParameterSet("force-all-periods", $valuePossible = true);
        $forceAll = $this->isParameterSet("force-all-websites");
        $forceTimeoutPeriod = $this->isParameterSet("force-timeout-for-periods", $valuePossible = true);
        if (!empty($forceTimeoutPeriod)
            && $forceTimeoutPeriod !== true
        ) // in case --force-timeout-for-periods= without [seconds] specified
        {
            // Ensure the cache for periods is at least as high as cache for today
            $todayTTL = Rules::getTodayArchiveTimeToLive();
            if ($forceTimeoutPeriod < $todayTTL) {
                $this->log("WARNING: Automatically increasing --force-timeout-for-periods from $forceTimeoutPeriod to "
                    . $todayTTL
                    . " to match the cache timeout for Today's report specified in Piwik UI > Settings > General Settings");
                $forceTimeoutPeriod = $todayTTL;
            }
            $this->processPeriodsMaximumEverySeconds = $forceTimeoutPeriod;
        }

        // Recommend to disable browser archiving when using this script
        if (Rules::isBrowserTriggerEnabled()) {
            $this->log("NOTE: if you execute this script at least once per hour (or more often) in a crontab, you may disable 'Browser trigger archiving' in Piwik UI > Settings > General Settings. ");
            $this->log("      see doc at: http://piwik.org/docs/setup-auto-archiving/");
        }

        if ($reset) {
            $this->log("--force-all-periods was detected: the script will run as if it was its first run, and will trigger archiving for all periods.");
            $this->shouldResetState = true;

            if (!$forceAll
                && is_numeric($reset)
                && $reset > 0
            ) {
                $this->firstRunActiveWebsitesWithTraffic = (int)$reset;
            }
        }

        if ($forceAll) {
            $this->log("--force-all-websites was detected: the script will archive all websites and all periods sequentially");
            $this->shouldArchiveAllWebsites = true;
        }

        $this->timeLastCompleted = Piwik_GetOption(self::OPTION_ARCHIVING_FINISHED_TS);
        if ($this->shouldResetState) {
            $this->timeLastCompleted = false;
        }
    }

    // Fetching websites to process
mattab's avatar
mattab a validé
    private function initWebsitesToProcess()
    {
        $this->allWebsites = Piwik_SitesManager_API::getInstance()->getAllSitesId();

        if ($this->shouldArchiveAllWebsites) {
            $this->websites = $this->allWebsites;
            $this->log("Will process " . count($this->websites) . " websites");
        } else {
            // 1) All websites with visits since the last archive.php execution
            $timestampActiveTraffic = $this->timeLastCompleted;
            if (empty($timestampActiveTraffic)) {
                $timestampActiveTraffic = time() - $this->firstRunActiveWebsitesWithTraffic;
                $this->log("--force-all-periods was detected: we will process websites with visits in the last "
                        . Piwik::getPrettyTimeFromSeconds($this->firstRunActiveWebsitesWithTraffic, true, false)
                );
            }
            $this->websites = Piwik_SitesManager_API::getInstance()->getSitesIdWithVisits($timestampActiveTraffic);
            $websiteIds = !empty($this->websites) ? ", IDs: " . implode(", ", $this->websites) : "";
            $prettySeconds = Piwik::getPrettyTimeFromSeconds(empty($this->timeLastCompleted)
                    ? $this->firstRunActiveWebsitesWithTraffic
                    : (time() - $this->timeLastCompleted),
                true, false);
            $this->log("Will process " . count($this->websites) . " websites with new visits since "
                . $prettySeconds
                . " "
                . $websiteIds);

            // 2) All websites that had reports in the past invalidated recently
            //	eg. when using Python log import script
            $this->idSitesInvalidatedOldReports = Piwik_CoreAdminHome_API::getWebsiteIdsToInvalidate();
            $this->idSitesInvalidatedOldReports = array_intersect($this->idSitesInvalidatedOldReports, $this->allWebsites);

            if (count($this->idSitesInvalidatedOldReports) > 0) {
                $websiteIds = ", IDs: " . implode(", ", $this->idSitesInvalidatedOldReports);
                $this->log("Will process " . count($this->idSitesInvalidatedOldReports) . " other websites because some old data reports have been invalidated (eg. using the Log Import script) " . $websiteIds);
                $this->websites = array_merge($this->websites, $this->idSitesInvalidatedOldReports);
            }

            // 3) Also process all other websites which days have finished since the last run.
            //    This ensures we process the previous day/week/month/year that just finished, even if there was no new visit
            $uniqueTimezones = Piwik_SitesManager_API::getInstance()->getUniqueSiteTimezones();
            $timezoneToProcess = array();
            foreach ($uniqueTimezones as &$timezone) {
                $processedDateInTz = Date::factory((int)$timestampActiveTraffic, $timezone);
                $currentDateInTz = Date::factory('now', $timezone);

                if ($processedDateInTz->toString() != $currentDateInTz->toString()) {
                    $timezoneToProcess[] = $timezone;
                }
            }

            $websiteDayHasFinishedSinceLastRun = Piwik_SitesManager_API::getInstance()->getSitesIdFromTimezones($timezoneToProcess);
            $websiteDayHasFinishedSinceLastRun = array_diff($websiteDayHasFinishedSinceLastRun, $this->websites);
            $this->websiteDayHasFinishedSinceLastRun = $websiteDayHasFinishedSinceLastRun;
            if (count($websiteDayHasFinishedSinceLastRun) > 0) {
                $websiteIds = !empty($websiteDayHasFinishedSinceLastRun) ? ", IDs: " . implode(", ", $websiteDayHasFinishedSinceLastRun) : "";
                $this->log("Will process " . count($websiteDayHasFinishedSinceLastRun) . " other websites because the last time they were archived was on a different day (in the website's timezone) " . $websiteIds);

                $this->websites = array_merge($this->websites, $websiteDayHasFinishedSinceLastRun);
            }
        }
    }

mattab's avatar
mattab a validé
    private function initTokenAuth()
        $login = Config::getInstance()->superuser['login'];
        $md5Password = Config::getInstance()->superuser['password'];
        $this->token_auth = md5($login . $md5Password);
        $this->login = $login;
    }

    private function initPiwikHost()
    {
        // If archive.php run as a web cron, we use the current hostname
        if (!Common::isPhpCliMode()) {
            // example.org/piwik/misc/cron/
            $piwikUrl = Common::sanitizeInputValue(Url::getCurrentUrlWithoutFileName());
            // example.org/piwik/
            $piwikUrl = $piwikUrl . "../../";
        } // If archive.php run as CLI/shell we require the piwik url to be set
        else {
            $piwikUrl = $this->isParameterSet("url", true);
            if (!$piwikUrl
                || !Common::isLookLikeUrl($piwikUrl)
            ) {
                $this->logFatalError("archive.php expects the argument --url to be set to your Piwik URL, for example: --url=http://example.org/piwik/ ", $backtrace = false);
            }
            // ensure there is a trailing slash
            if ($piwikUrl[strlen($piwikUrl) - 1] != '/') {
                $piwikUrl .= '/';
            }
        }
        if (Config::getInstance()->General['force_ssl'] == 1) {
            $piwikUrl = str_replace('http://', 'https://', $piwikUrl);
        }
        $this->piwikUrl = $piwikUrl . "index.php";
    }


    /**
     * Returns if the requested parameter is defined in the command line arguments.
     * If $valuePossible is true, then a value is possibly set for this parameter,
     * ie. --force-timeout-for-periods=3600 would return 3600
     *
     * @param $parameter
     * @param bool $valuePossible
     * @return true or the value (int,string) if set, false otherwise
     */
    private function isParameterSet($parameter, $valuePossible = false)
    {
        if (!Common::isPhpCliMode()) {
            return false;
        }
        $parameters = array(
            "--$parameter",
            "-$parameter",
            $parameter
        );
        foreach ($parameters as $parameter) {
            foreach ($_SERVER['argv'] as $arg) {
                if (strpos($arg, $parameter) === 0) {
                    if ($valuePossible) {
                        $parameterFound = $arg;
                        if (($posEqual = strpos($parameterFound, '=')) !== false) {
                            $return = substr($parameterFound, $posEqual + 1);
                            if ($return !== false) {
                                return $return;
                            }
                        }
                    }
                    return true;
                }
            }
        }
        return false;
    }

mattpiwik's avatar
mattpiwik a validé
}