From b03f7c82734b3a0121a7aa10128f4baa8ed5d8f9 Mon Sep 17 00:00:00 2001
From: Thomas Steur <thomas.steur@googlemail.com>
Date: Mon, 15 Sep 2014 11:26:57 +0200
Subject: [PATCH] refs #4996 cache all site urls in tracker

---
 core/Url.php                                |  15 ++
 misc/internal-docs/content-tracking.md      |   2 +-
 plugins/SitesManager/API.php                |   2 +
 plugins/SitesManager/SiteUrls.php           |  57 ++++++++
 plugins/SitesManager/tests/SiteUrlsTest.php | 145 ++++++++++++++++++++
 5 files changed, 220 insertions(+), 1 deletion(-)
 create mode 100644 plugins/SitesManager/SiteUrls.php
 create mode 100644 plugins/SitesManager/tests/SiteUrlsTest.php

diff --git a/core/Url.php b/core/Url.php
index 376138b608..a82dfebd8a 100644
--- a/core/Url.php
+++ b/core/Url.php
@@ -599,6 +599,12 @@ class Url
         return $hosts;
     }
 
+    /**
+     * Returns the host part of any valid URL.
+     *
+     * @param string $url  Any fully qualified URL
+     * @return string|null The actual host in lower case or null if $url is not a valid fully qualified URL.
+     */
     public static function getHostFromUrl($url)
     {
         $parsedUrl = parse_url($url);
@@ -610,6 +616,15 @@ class Url
         return Common::mb_strtolower($parsedUrl['host']);
     }
 
+    /**
+     * Checks whether any of the given URLs has the given host. If not, we will also check whether any URL uses a
+     * subdomain of the given host. For instance if host is "example.com" and a URL is "http://www.example.com" we
+     * consider this as valid and return true. The always trusted hosts such as "127.0.0.1" are considered valid as well.
+     *
+     * @param $host
+     * @param $urls
+     * @return bool
+     */
     public static function isHostInUrls($host, $urls)
     {
         if (empty($host)) {
diff --git a/misc/internal-docs/content-tracking.md b/misc/internal-docs/content-tracking.md
index a94dcc4733..0a149013c2 100644
--- a/misc/internal-docs/content-tracking.md
+++ b/misc/internal-docs/content-tracking.md
@@ -429,11 +429,11 @@ Yes it seems most logical to create an action entry for each Content.
 Nothing special here I think. We would probably automatically detect the type of content (image, video, text, sound, ...) depending on the content eg in case it ends with [.jpg, .png, .gif] it could be recognized as image content and show a banner in the report.
 
 ## TODO
-* Cache allowed site urls for redirects
 * When a user clicks on an interaction, we should check whether we have already tracked the impression as the content is visible now. If not tracked before, we should track the impression as well
   * There can be a scroll or timer event that detects the same content became visible as well. This would not be a problem since we do not track same content block twice
   * Maybe v2
 * Content piece undefined vs Unknown?
+* UI / PHP tests
 * Make JS tests work if piwik is installed in a path such as localhost/piwik
 
 ## V2:
diff --git a/plugins/SitesManager/API.php b/plugins/SitesManager/API.php
index 5a30448493..5b856cd862 100644
--- a/plugins/SitesManager/API.php
+++ b/plugins/SitesManager/API.php
@@ -22,6 +22,7 @@ use Piwik\SettingsPiwik;
 use Piwik\SettingsServer;
 use Piwik\Site;
 use Piwik\TaskScheduler;
+use Piwik\Tracker;
 use Piwik\Tracker\Cache;
 use Piwik\Url;
 use Piwik\UrlHelper;
@@ -594,6 +595,7 @@ class API extends \Piwik\Plugin\API
     {
         Site::clearCache();
         Cache::regenerateCacheWebsiteAttributes($idSite);
+        SiteUrls::clearSitesCache();
     }
 
     /**
diff --git a/plugins/SitesManager/SiteUrls.php b/plugins/SitesManager/SiteUrls.php
new file mode 100644
index 0000000000..02d7846d11
--- /dev/null
+++ b/plugins/SitesManager/SiteUrls.php
@@ -0,0 +1,57 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+namespace Piwik\Plugins\SitesManager;
+
+use Piwik\CacheFile;
+
+class SiteUrls
+{
+    private static $allUrlsCacheKey = 'allSiteUrlsPerSite';
+
+    public static function clearSitesCache()
+    {
+        self::getCache()->delete(self::$allUrlsCacheKey);
+    }
+
+    public function getAllCachedSiteUrls()
+    {
+        $cache    = $this->getCache();
+        $siteUrls = $cache->get(self::$allUrlsCacheKey);
+
+        if (empty($siteUrls)) {
+            $siteUrls = $this->getAllSiteUrls();
+            $cache->set(self::$allUrlsCacheKey, $siteUrls);
+        }
+
+        return $siteUrls;
+    }
+
+    public function getAllSiteUrls()
+    {
+        $model    = new Model();
+        $siteIds  = $model->getSitesId();
+        $siteUrls = array();
+
+        if (empty($siteIds)) {
+            return array();
+        }
+
+        foreach ($siteIds as $siteId) {
+            $siteId = (int) $siteId;
+            $siteUrls[$siteId] = $model->getSiteUrlsFromId($siteId);
+        }
+
+        return $siteUrls;
+    }
+
+    private static function getCache()
+    {
+        return new CacheFile('tracker', 1800);
+    }
+}
diff --git a/plugins/SitesManager/tests/SiteUrlsTest.php b/plugins/SitesManager/tests/SiteUrlsTest.php
new file mode 100644
index 0000000000..e459efb0a6
--- /dev/null
+++ b/plugins/SitesManager/tests/SiteUrlsTest.php
@@ -0,0 +1,145 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ */
+
+namespace Piwik\Plugins\SitesManager\tests;
+use Piwik\CacheFile;
+use Piwik\Plugins\SitesManager\API;
+use Piwik\Plugins\SitesManager\SiteUrls;
+
+/**
+ * @group SitesManager
+ * @group SiteUrlsTest
+ * @group Database
+ */
+class SiteUrlsTest extends \DatabaseTestCase
+{
+    /**
+     * @var SiteUrls
+     */
+    private $siteUrls;
+
+    /**
+     * @var API
+     */
+    private $api;
+
+    public function setUp()
+    {
+        parent::setUp();
+
+        $this->siteUrls = new SiteUrls();
+        $this->api = API::getInstance();
+
+        SiteUrls::clearSitesCache();
+    }
+
+    public function testGetAllSiteUrls_shouldReturnAnEmptyArray_IfThereAreNoSites()
+    {
+        $this->assertSiteUrls(array());
+    }
+
+    public function testGetAllSiteUrls_shouldReturnUrlsForEachSiteId()
+    {
+        $this->addSite('http://www.example.com'); // only one main URL
+        $this->assertSiteUrls(array(1 => array('http://www.example.com')));
+
+        $this->addSite('http://www.example.com', 'http://www.piwik.org'); // main URL and alias URL
+        $this->assertSiteUrls(array(1 => array('http://www.example.com'), 2 => array('http://www.example.com', 'http://www.piwik.org')));
+
+        $this->api->addSiteAliasUrls(2, 'http://piwik.org');
+        $this->assertSiteUrls(array(1 => array('http://www.example.com'), 2 => array('http://www.example.com', 'http://piwik.org', 'http://www.piwik.org')));
+
+        $this->api->setSiteAliasUrls(2, array());
+        $this->assertSiteUrls(array(1 => array('http://www.example.com'), 2 => array('http://www.example.com')));
+    }
+
+    public function testGetAllCachedSiteUrls_shouldReturnAnEmptyArray_IfThereAreNoSites()
+    {
+        $this->assertCachedSiteUrls(array());
+    }
+
+    public function testGetAllCachedSiteUrls_ShouldReturnCorrectResultEvenIfItIsCachedAsWeClearTheCacheOnAnyChange()
+    {
+        $this->addSite('http://www.example.com'); // only one main URL
+        $this->assertCachedSiteUrls(array(1 => array('http://www.example.com')));
+
+        $this->addSite('http://www.example.com', 'http://www.piwik.org'); // main URL and alias URL
+        $this->assertCachedSiteUrls(array(1 => array('http://www.example.com'), 2 => array('http://www.example.com', 'http://www.piwik.org')));
+
+        $this->api->addSiteAliasUrls(2, 'http://piwik.org');
+        $this->assertCachedSiteUrls(array(1 => array('http://www.example.com'), 2 => array('http://www.example.com', 'http://piwik.org', 'http://www.piwik.org')));
+
+        $this->api->setSiteAliasUrls(2, array());
+        $this->assertCachedSiteUrls(array(1 => array('http://www.example.com'), 2 => array('http://www.example.com')));
+
+        $this->api->updateSite(1, 'siteName3', array('http://updated.example.com', 'http://2.example.com'));
+        $this->assertCachedSiteUrls(array(1 => array('http://updated.example.com', 'http://2.example.com'), 2 => array('http://www.example.com')));
+    }
+
+    public function testGetAllCachedSiteUrls_ShouldWriteACacheFile()
+    {
+        // make sure cache is empty
+        $this->assertValueInCache(false);
+
+        $this->addSite('http://www.example.com');
+        $this->siteUrls->getAllCachedSiteUrls();
+
+        // make sure we have a cached result
+        $this->assertValueInCache(array(1 => array('http://www.example.com')));
+    }
+
+    public function test_clearSitesCache_ShouldActuallyDeleteACache()
+    {
+        $this->addSite('http://www.example.com');
+        $this->siteUrls->getAllCachedSiteUrls();
+
+        // make sure we have a cached result
+        $this->assertValueInCache(array(1 => array('http://www.example.com')));
+
+        SiteUrls::clearSitesCache();
+
+        // make sure is empty now
+        $this->assertValueInCache(false);
+    }
+
+    public function testGetAllCachedSiteUrls_ShouldReadFromTheCacheFile()
+    {
+        $urlsToFake = array(1 => 'Whatever');
+        $cache      = new CacheFile('tracker', 600);
+        $cache->set('allSiteUrlsPerSite', $urlsToFake);
+
+        $actual = $this->siteUrls->getAllCachedSiteUrls();
+
+        $this->assertEquals($urlsToFake, $actual);
+    }
+
+    private function assertSiteUrls($expectedUrls)
+    {
+        $urls = $this->siteUrls->getAllSiteUrls();
+        $this->assertEquals($expectedUrls, $urls);
+    }
+
+    private function assertCachedSiteUrls($expectedUrls)
+    {
+        $urls = $this->siteUrls->getAllCachedSiteUrls();
+        $this->assertEquals($expectedUrls, $urls);
+    }
+
+    private function addSite($urls)
+    {
+        $this->api->addSite('siteName', func_get_args());
+    }
+
+    private function assertValueInCache($value)
+    {
+        $cache    = new CacheFile('tracker', 600);
+        $siteUrls = $cache->get('allSiteUrlsPerSite');
+
+        $this->assertEquals($value, $siteUrls);
+    }
+}
-- 
GitLab