From 2e9aaa9496d38b586d9495c0a39473328630d903 Mon Sep 17 00:00:00 2001
From: sgiehl <stefan@piwik.org>
Date: Sat, 31 Oct 2015 19:51:52 +0100
Subject: [PATCH] moved definitions of search engines to new repo and converted
 them to better readable yml format

---
 composer.json                                 |  19 +-
 composer.lock                                 |  12 +
 core/Common.php                               |  54 ---
 core/UrlHelper.php                            | 236 -----------
 plugins/CoreAdminHome/Tasks.php               |   1 +
 plugins/Referrers/Columns/Base.php            |   5 +-
 plugins/Referrers/SearchEngine.php            | 397 ++++++++++++++++++
 plugins/Referrers/Tasks.php                   |  35 ++
 plugins/Referrers/functions.php               |   6 +-
 .../Referrers/tests/Unit/ReferrersTest.php    |  33 +-
 .../Referrers/tests/Unit/SearchEngineTest.php |  81 ++++
 tests/PHPUnit/Framework/Fixture.php           |   1 -
 tests/PHPUnit/Unit/CommonTest.php             |  35 --
 tests/PHPUnit/Unit/UrlHelperTest.php          |  31 --
 14 files changed, 569 insertions(+), 377 deletions(-)
 create mode 100644 plugins/Referrers/SearchEngine.php
 create mode 100644 plugins/Referrers/Tasks.php
 create mode 100644 plugins/Referrers/tests/Unit/SearchEngineTest.php

diff --git a/composer.json b/composer.json
index 42e741f8fd..28b407ed5a 100644
--- a/composer.json
+++ b/composer.json
@@ -54,7 +54,8 @@
         "symfony/event-dispatcher": "~2.6.0",
         "pear/pear_exception": "~1.0.0",
         "piwik/referrer-spam-blacklist": "~1.0",
-        "tecnickcom/tcpdf": "~6.0"
+        "tecnickcom/tcpdf": "~6.0",
+        "piwik/searchengine-and-social-definitions": "dev-master"
     },
     "require-dev": {
         "aws/aws-sdk-php": "2.7.1",
@@ -90,8 +91,20 @@
                     "reference": "master"
                 }
             }
-        }
-    ],
+        },
+        {
+            "type": "package",
+            "package": {
+                "name": "piwik/searchengine-and-social-definitions",
+                "type": "library",
+                "version": "master",
+                "source": {
+                    "type": "git",
+                    "url": "https://github.com/sgiehl/searchengine-and-social-definitions",
+                    "reference": "master"
+                }
+            }
+        }    ],
     "scripts": {
         "pre-update-cmd": [
             "Piwik\\Composer\\ScriptHandler::cleanXhprof"
diff --git a/composer.lock b/composer.lock
index a6ee66b134..1a5ba52b51 100644
--- a/composer.lock
+++ b/composer.lock
@@ -958,6 +958,17 @@
             "description": "Community-contributed list of referrer spammers",
             "time": "2015-10-07 10:17:59"
         },
+        {
+            "name": "piwik/searchengine-and-social-definitions",
+            "version": "master",
+            "source": {
+                "type": "git",
+                "url": "https://github.com/sgiehl/searchengine-and-social-definitions",
+                "reference": "master"
+            },
+            "type": "library",
+            "time": "2015-10-31 15:36:36"
+        },
         {
             "name": "psr/log",
             "version": "1.0.0",
@@ -2645,6 +2656,7 @@
     "minimum-stability": "stable",
     "stability-flags": {
         "php-di/php-di": 10,
+        "piwik/searchengine-and-social-definitions": 20,
         "facebook/xhprof": 20
     },
     "prefer-stable": false,
diff --git a/core/Common.php b/core/Common.php
index 7e3296bee1..6bb4298e93 100644
--- a/core/Common.php
+++ b/core/Common.php
@@ -815,60 +815,6 @@ class Common
         return $dataProvider->getLanguageToCountryList();
     }
 
-    /**
-     * Returns list of search engines by URL
-     *
-     * @see core/DataFiles/SearchEngines.php
-     *
-     * @return array  Array of ( URL => array( searchEngineName, keywordParameter, path, charset ) )
-     */
-    public static function getSearchEngineUrls()
-    {
-        $cacheId = 'Common.getSearchEngineUrls';
-        $cache = Cache::getTransientCache();
-        $searchEngines = $cache->fetch($cacheId);
-
-        if (empty($searchEngines)) {
-            require_once PIWIK_INCLUDE_PATH . '/core/DataFiles/SearchEngines.php';
-
-            $searchEngines = $GLOBALS['Piwik_SearchEngines'];
-
-            Piwik::postEvent('Referrer.addSearchEngineUrls', array(&$searchEngines));
-
-            $cache->save($cacheId, $searchEngines);
-        }
-
-        return $searchEngines;
-    }
-
-    /**
-     * Returns list of search engines by name
-     *
-     * @see core/DataFiles/SearchEngines.php
-     *
-     * @return array  Array of ( searchEngineName => URL )
-     */
-    public static function getSearchEngineNames()
-    {
-        $cacheId = 'Common.getSearchEngineNames';
-        $cache = Cache::getTransientCache();
-        $nameToUrl = $cache->fetch($cacheId);
-
-        if (empty($nameToUrl)) {
-            $searchEngines = self::getSearchEngineUrls();
-
-            $nameToUrl = array();
-            foreach ($searchEngines as $url => $info) {
-                if (!isset($nameToUrl[$info[0]])) {
-                    $nameToUrl[$info[0]] = $url;
-                }
-            }
-            $cache->save($cacheId, $nameToUrl);
-        }
-
-        return $nameToUrl;
-    }
-
     /**
      * Returns list of social networks by URL
      *
diff --git a/core/UrlHelper.php b/core/UrlHelper.php
index 4a0ac0fa0a..66a0e64e25 100644
--- a/core/UrlHelper.php
+++ b/core/UrlHelper.php
@@ -258,242 +258,6 @@ class UrlHelper
         return $result;
     }
 
-    /**
-     * Extracts a keyword from a raw not encoded URL.
-     * Will only extract keyword if a known search engine has been detected.
-     * Returns the keyword:
-     * - in UTF8: automatically converted from other charsets when applicable
-     * - strtolowered: "QUErY test!" will return "query test!"
-     * - trimmed: extra spaces before and after are removed
-     *
-     * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php
-     * The function returns false when a keyword couldn't be found.
-     *     eg. if the url is "http://www.google.com/partners.html" this will return false,
-     *       as the google keyword parameter couldn't be found.
-     *
-     * @see unit tests in /tests/core/Common.test.php
-     * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER']
-     * @return array|bool   false if a keyword couldn't be extracted,
-     *                        or array(
-     *                            'name' => 'Google',
-     *                            'keywords' => 'my searched keywords')
-     */
-    public static function extractSearchEngineInformationFromUrl($referrerUrl)
-    {
-        $referrerParsed = @parse_url($referrerUrl);
-        $referrerHost = '';
-        if (isset($referrerParsed['host'])) {
-            $referrerHost = $referrerParsed['host'];
-        }
-        if (empty($referrerHost)) {
-            return false;
-        }
-        // some search engines (eg. Bing Images) use the same domain
-        // as an existing search engine (eg. Bing), we must also use the url path
-        $referrerPath = '';
-        if (isset($referrerParsed['path'])) {
-            $referrerPath = $referrerParsed['path'];
-        }
-
-        // no search query
-        if (!isset($referrerParsed['query'])) {
-            $referrerParsed['query'] = '';
-        }
-        $query = $referrerParsed['query'];
-
-        // Google Referrers URLs sometimes have the fragment which contains the keyword
-        if (!empty($referrerParsed['fragment'])) {
-            $query .= '&' . $referrerParsed['fragment'];
-        }
-
-        $searchEngines = Common::getSearchEngineUrls();
-
-        $hostPattern = self::getLossyUrl($referrerHost);
-        /*
-         * Try to get the best matching 'host' in definitions
-         * 1. check if host + path matches an definition
-         * 2. check if host only matches
-         * 3. check if host pattern + path matches
-         * 4. check if host pattern matches
-         * 5. special handling
-         */
-        if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) {
-            $referrerHost = $referrerHost . $referrerPath;
-        } elseif (array_key_exists($referrerHost, $searchEngines)) {
-            // no need to change host
-        } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) {
-            $referrerHost = $hostPattern . $referrerPath;
-        } elseif (array_key_exists($hostPattern, $searchEngines)) {
-            $referrerHost = $hostPattern;
-        } elseif (!array_key_exists($referrerHost, $searchEngines)) {
-            if (!strncmp($query, 'cx=partner-pub-', 15)) {
-                // Google custom search engine
-                $referrerHost = 'google.com/cse';
-            } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) {
-                // private-label search powered by InfoSpace Metasearch
-                $referrerHost = 'wsdsold.infospace.com';
-            } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) {
-                // Yahoo! Images
-                $referrerHost = 'images.search.yahoo.com';
-            } elseif (strpos($referrerHost, '.search.yahoo.com') != false) {
-                // Yahoo!
-                $referrerHost = 'search.yahoo.com';
-            } else {
-                return false;
-            }
-        }
-        $searchEngineName = $searchEngines[$referrerHost][0];
-        $variableNames = null;
-        if (isset($searchEngines[$referrerHost][1])) {
-            $variableNames = $searchEngines[$referrerHost][1];
-        }
-        if (!$variableNames) {
-            $searchEngineNames = Common::getSearchEngineNames();
-            $url = $searchEngineNames[$searchEngineName];
-            $variableNames = $searchEngines[$url][1];
-        }
-        if (!is_array($variableNames)) {
-            $variableNames = array($variableNames);
-        }
-
-        $key = null;
-        if ($searchEngineName === 'Google Images'
-            || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false)
-        ) {
-            if (strpos($query, '&prev') !== false) {
-                $query = urldecode(trim(self::getParameterFromQueryString($query, 'prev')));
-                $query = str_replace('&', '&amp;', strstr($query, '?'));
-            }
-            $searchEngineName = 'Google Images';
-        } elseif ($searchEngineName === 'Google'
-            && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)
-        ) {
-            $keys = array();
-            $key = self::getParameterFromQueryString($query, 'as_q');
-            if (!empty($key)) {
-                array_push($keys, $key);
-            }
-            $key = self::getParameterFromQueryString($query, 'as_oq');
-            if (!empty($key)) {
-                array_push($keys, str_replace('+', ' OR ', $key));
-            }
-            $key = self::getParameterFromQueryString($query, 'as_epq');
-            if (!empty($key)) {
-                array_push($keys, "\"$key\"");
-            }
-            $key = self::getParameterFromQueryString($query, 'as_eq');
-            if (!empty($key)) {
-                array_push($keys, "-$key");
-            }
-            $key = trim(urldecode(implode(' ', $keys)));
-        }
-
-        if ($searchEngineName === 'Google') {
-            // top bar menu
-            $tbm = self::getParameterFromQueryString($query, 'tbm');
-            switch ($tbm) {
-                case 'isch':
-                    $searchEngineName = 'Google Images';
-                    break;
-                case 'vid':
-                    $searchEngineName = 'Google Video';
-                    break;
-                case 'shop':
-                    $searchEngineName = 'Google Shopping';
-                    break;
-            }
-        }
-
-        if (empty($key)) {
-            foreach ($variableNames as $variableName) {
-                if ($variableName[0] == '/') {
-                    // regular expression match
-                    if (preg_match($variableName, $referrerUrl, $matches)) {
-                        $key = trim(urldecode($matches[1]));
-                        break;
-                    }
-                } else {
-                    // search for keywords now &vname=keyword
-                    $key = self::getParameterFromQueryString($query, $variableName);
-                    $key = trim(urldecode($key));
-
-                    // Special cases: empty or no keywords
-                    if (empty($key)
-                        && (
-                            // Google search with no keyword
-                            ($searchEngineName == 'Google'
-                                && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment']))
-                            )
-
-                            // Yahoo search with no keyword
-                            || ($searchEngineName == 'Yahoo!'
-                                && ($referrerParsed['host'] == 'r.search.yahoo.com')
-                            )
-
-                            // empty keyword parameter
-                            || strpos($query, sprintf('&%s=', $variableName)) !== false
-                            || strpos($query, sprintf('?%s=', $variableName)) !== false
-
-                            // search engines with no keyword
-                            || $searchEngineName == 'Ixquick'
-                            || $searchEngineName == 'Google Images'
-                            || $searchEngineName == 'DuckDuckGo')
-                    ) {
-                        $key = false;
-                    }
-                    if (!empty($key)
-                        || $key === false
-                    ) {
-                        break;
-                    }
-                }
-            }
-        }
-
-        // $key === false is the special case "No keyword provided" which is a Search engine match
-        if ($key === null
-            || $key === ''
-        ) {
-            return false;
-        }
-
-        if (!empty($key)) {
-            if (function_exists('iconv')
-                && isset($searchEngines[$referrerHost][3])
-            ) {
-                // accepts string, array, or comma-separated list string in preferred order
-                $charsets = $searchEngines[$referrerHost][3];
-                if (!is_array($charsets)) {
-                    $charsets = explode(',', $charsets);
-                }
-
-                if (!empty($charsets)) {
-                    $charset = $charsets[0];
-                    if (count($charsets) > 1
-                        && function_exists('mb_detect_encoding')
-                    ) {
-                        $charset = mb_detect_encoding($key, $charsets);
-                        if ($charset === false) {
-                            $charset = $charsets[0];
-                        }
-                    }
-
-                    $newkey = @iconv($charset, 'UTF-8//IGNORE', $key);
-                    if (!empty($newkey)) {
-                        $key = $newkey;
-                    }
-                }
-            }
-
-            $key = Common::mb_strtolower($key);
-        }
-
-        return array(
-            'name'     => $searchEngineName,
-            'keywords' => $key,
-        );
-    }
-
     /**
      * Returns the query part from any valid url and adds additional parameters to the query part if needed.
      *
diff --git a/plugins/CoreAdminHome/Tasks.php b/plugins/CoreAdminHome/Tasks.php
index 01290f0cff..30f66a995d 100644
--- a/plugins/CoreAdminHome/Tasks.php
+++ b/plugins/CoreAdminHome/Tasks.php
@@ -49,6 +49,7 @@ class Tasks extends \Piwik\Plugin\Tasks
         $this->daily('optimizeArchiveTable', null, self::LOWEST_PRIORITY);
 
         $this->weekly('updateSpammerBlacklist');
+        $this->weekly('updateSearchEnginesAndSocials');
     }
 
     /**
diff --git a/plugins/Referrers/Columns/Base.php b/plugins/Referrers/Columns/Base.php
index 78fe27516c..1f4a0c7210 100644
--- a/plugins/Referrers/Columns/Base.php
+++ b/plugins/Referrers/Columns/Base.php
@@ -11,6 +11,7 @@ namespace Piwik\Plugins\Referrers\Columns;
 use Piwik\Common;
 use Piwik\Piwik;
 use Piwik\Plugin\Dimension\VisitDimension;
+use Piwik\Plugins\Referrers\SearchEngine;
 use Piwik\Tracker\PageUrl;
 use Piwik\Tracker\Request;
 use Piwik\Tracker\Visit;
@@ -139,7 +140,7 @@ abstract class Base extends VisitDimension
      */
     protected function detectReferrerSearchEngine()
     {
-        $searchEngineInformation = UrlHelper::extractSearchEngineInformationFromUrl($this->referrerUrl);
+        $searchEngineInformation = SearchEngine::getInstance()->extractInformationFromUrl($this->referrerUrl);
 
         /**
          * Triggered when detecting the search engine of a referrer URL.
@@ -277,7 +278,7 @@ abstract class Base extends VisitDimension
 
         // Set the Campaign keyword to the keyword found in the Referrer URL if any
         if (!empty($this->nameReferrerAnalyzed)) {
-            $referrerUrlInfo = UrlHelper::extractSearchEngineInformationFromUrl($this->referrerUrl);
+            $referrerUrlInfo = SearchEngine::getInstance()->extractInformationFromUrl($this->referrerUrl);
             if (!empty($referrerUrlInfo['keywords'])) {
                 $this->keywordReferrerAnalyzed = $referrerUrlInfo['keywords'];
             }
diff --git a/plugins/Referrers/SearchEngine.php b/plugins/Referrers/SearchEngine.php
new file mode 100644
index 0000000000..f2f12fe63c
--- /dev/null
+++ b/plugins/Referrers/SearchEngine.php
@@ -0,0 +1,397 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+namespace Piwik\Plugins\Referrers;
+use Piwik\Cache;
+use Piwik\Common;
+use Piwik\Option;
+use Piwik\Piwik;
+use Piwik\Singleton;
+use Piwik\UrlHelper;
+
+/**
+ * Contains methods to access search engine definition data.
+ */
+class SearchEngine extends Singleton
+{
+    const OPTION_STORAGE_NAME = 'SearchEngineDefinitions';
+
+    /** @var string location of definition file (relative to PIWIK_INCLUDE_PATH) */
+    const DEFINITION_FILE = '/vendor/piwik/searchengine-and-social-definitions/SearchEngines.yml';
+
+    protected $definitionList = null;
+
+    /**
+     * Returns list of search engines by URL
+     *
+     * @return array  Array of ( URL => array( searchEngineName, keywordParameter, path, charset ) )
+     */
+    public function getSearchEngineDefinitions()
+    {
+        $cache = Cache::getEagerCache();
+        $cacheId = 'SearchEngine-' . self::OPTION_STORAGE_NAME;
+
+        if ($cache->contains($cacheId)) {
+            $list = $cache->fetch($cacheId);
+        } else {
+            $list = $this->loadSearchEngineDefinitions();
+            $cache->save($cacheId, $list);
+        }
+
+        return $list;
+    }
+
+    private function loadSearchEngineDefinitions()
+    {
+        if ($this->definitionList === null) {
+            // Read first from the auto-updated list in database
+            $list = Option::get(self::OPTION_STORAGE_NAME);
+
+            if ($list) {
+                $this->definitionList = unserialize($list);
+            } else {
+                // Fallback to reading the bundled list
+                $yml = file_get_contents(PIWIK_INCLUDE_PATH . self::DEFINITION_FILE);
+                $this->definitionList = $this->loadYmlData($yml);
+                Option::set(self::OPTION_STORAGE_NAME, serialize($this->definitionList));
+
+            }
+        }
+
+        Piwik::postEvent('Referrer.addSearchEngineUrls', array(&$this->definitionList));
+
+        return $this->definitionList;
+    }
+
+    /**
+     * Parses the given YML string and caches the resulting definitions
+     *
+     * @param string $yml
+     * @return array
+     */
+    public function loadYmlData($yml)
+    {
+        $searchEngines = \Spyc::YAMLLoadString($yml);
+
+        $this->definitionList = $this->transformData($searchEngines);
+
+        return $this->definitionList;
+    }
+
+    protected function transformData($searchEngines)
+    {
+        $urlToInfo = array();
+        foreach ($searchEngines as $name => $info) {
+            foreach ($info as $urlDefinitions) {
+                foreach ($urlDefinitions['urls'] as $url) {
+                    $searchEngineData = $urlDefinitions;
+                    unset($searchEngineData['urls']);
+                    $searchEngineData['name'] = $name;
+                    $urlToInfo[$url] = $searchEngineData;
+                }
+            }
+        }
+        return $urlToInfo;
+    }
+
+    /**
+     * Returns list of search engines by name
+     *
+     * @see core/DataFiles/SearchEngines.php
+     *
+     * @return array  Array of ( searchEngineName => URL )
+     */
+    public function getSearchEngineNames()
+    {
+        $cacheId = 'SearchEngine.getSearchEngineNames';
+        $cache = Cache::getTransientCache();
+        $nameToUrl = $cache->fetch($cacheId);
+
+        if (empty($nameToUrl)) {
+            $searchEngines = $this->getSearchEngineDefinitions();
+
+            $nameToUrl = array();
+            foreach ($searchEngines as $url => $info) {
+                if (!isset($nameToUrl[$info['name']])) {
+                    $nameToUrl[$info['name']] = $url;
+                }
+            }
+            $cache->save($cacheId, $nameToUrl);
+        }
+
+        return $nameToUrl;
+    }
+
+    /**
+     * Returns definitions for the given search engine host
+     *
+     * @param string $host
+     * @return array
+     */
+    public function getDefinitionByHost($host)
+    {
+        $searchEngines = $this->getSearchEngineDefinitions();
+
+        if (!array_key_exists($host, $searchEngines)) {
+            return array();
+        }
+
+        return $searchEngines[$host];
+    }
+
+    /**
+     * Returns defined parameters for the given search engine host
+     * @param string $host
+     * @return array
+     */
+    public function getParameterNamesByHost($host)
+    {
+        $definition = $this->getDefinitionByHost($host);
+
+        if (empty($definition['params'])) {
+            return array();
+        }
+
+        return $definition['params'];
+    }
+
+    /**
+     * Returns defined charsets for given search engine host
+     *
+     * @param string $host
+     * @return array
+     */
+    public function getCharsetsByHost($host)
+    {
+        $definition = $this->getDefinitionByHost($host);
+
+        if (empty($definition['charsets'])) {
+            return array();
+        }
+
+        return $definition['charsets'];
+    }
+
+    /**
+     * Extracts a keyword from a raw not encoded URL.
+     * Will only extract keyword if a known search engine has been detected.
+     * Returns the keyword:
+     * - in UTF8: automatically converted from other charsets when applicable
+     * - strtolowered: "QUErY test!" will return "query test!"
+     * - trimmed: extra spaces before and after are removed
+     *
+     * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php
+     * The function returns false when a keyword couldn't be found.
+     *     eg. if the url is "http://www.google.com/partners.html" this will return false,
+     *       as the google keyword parameter couldn't be found.
+     *
+     * @see unit tests in /tests/core/Common.test.php
+     * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER']
+     * @return array|bool   false if a keyword couldn't be extracted,
+     *                        or array(
+     *                            'name' => 'Google',
+     *                            'keywords' => 'my searched keywords')
+     */
+    public function extractInformationFromUrl($referrerUrl)
+    {
+        $referrerParsed = @parse_url($referrerUrl);
+        $referrerHost = '';
+        if (isset($referrerParsed['host'])) {
+            $referrerHost = $referrerParsed['host'];
+        }
+        if (empty($referrerHost)) {
+            return false;
+        }
+        // some search engines (eg. Bing Images) use the same domain
+        // as an existing search engine (eg. Bing), we must also use the url path
+        $referrerPath = '';
+        if (isset($referrerParsed['path'])) {
+            $referrerPath = $referrerParsed['path'];
+        }
+
+        // no search query
+        if (!isset($referrerParsed['query'])) {
+            $referrerParsed['query'] = '';
+        }
+        $query = $referrerParsed['query'];
+
+        // Google Referrers URLs sometimes have the fragment which contains the keyword
+        if (!empty($referrerParsed['fragment'])) {
+            $query .= '&' . $referrerParsed['fragment'];
+        }
+
+        $searchEngines = $this->getSearchEngineDefinitions();
+
+        $hostPattern = UrlHelper::getLossyUrl($referrerHost);
+        /*
+         * Try to get the best matching 'host' in definitions
+         * 1. check if host + path matches an definition
+         * 2. check if host only matches
+         * 3. check if host pattern + path matches
+         * 4. check if host pattern matches
+         * 5. special handling
+         */
+        if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) {
+            $referrerHost = $referrerHost . $referrerPath;
+        } elseif (array_key_exists($referrerHost, $searchEngines)) {
+            // no need to change host
+        } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) {
+            $referrerHost = $hostPattern . $referrerPath;
+        } elseif (array_key_exists($hostPattern, $searchEngines)) {
+            $referrerHost = $hostPattern;
+        } elseif (!array_key_exists($referrerHost, $searchEngines)) {
+            if (!strncmp($query, 'cx=partner-pub-', 15)) {
+                // Google custom search engine
+                $referrerHost = 'google.com/cse';
+            } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) {
+                // private-label search powered by InfoSpace Metasearch
+                $referrerHost = 'wsdsold.infospace.com';
+            } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) {
+                // Yahoo! Images
+                $referrerHost = 'images.search.yahoo.com';
+            } elseif (strpos($referrerHost, '.search.yahoo.com') != false) {
+                // Yahoo!
+                $referrerHost = 'search.yahoo.com';
+            } else {
+                return false;
+            }
+        }
+        $searchEngineName = $searchEngines[$referrerHost]['name'];
+        $variableNames = $this->getParameterNamesByHost($referrerHost);
+
+        $key = null;
+        if ($searchEngineName === 'Google Images'
+            || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false)
+        ) {
+            if (strpos($query, '&prev') !== false) {
+                $query = urldecode(trim(UrlHelper::getParameterFromQueryString($query, 'prev')));
+                $query = str_replace('&', '&amp;', strstr($query, '?'));
+            }
+            $searchEngineName = 'Google Images';
+        } elseif ($searchEngineName === 'Google'
+            && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)
+        ) {
+            $keys = array();
+            $key = UrlHelper::getParameterFromQueryString($query, 'as_q');
+            if (!empty($key)) {
+                array_push($keys, $key);
+            }
+            $key = UrlHelper::getParameterFromQueryString($query, 'as_oq');
+            if (!empty($key)) {
+                array_push($keys, str_replace('+', ' OR ', $key));
+            }
+            $key = UrlHelper::getParameterFromQueryString($query, 'as_epq');
+            if (!empty($key)) {
+                array_push($keys, "\"$key\"");
+            }
+            $key = UrlHelper::getParameterFromQueryString($query, 'as_eq');
+            if (!empty($key)) {
+                array_push($keys, "-$key");
+            }
+            $key = trim(urldecode(implode(' ', $keys)));
+        }
+
+        if ($searchEngineName === 'Google') {
+            // top bar menu
+            $tbm = UrlHelper::getParameterFromQueryString($query, 'tbm');
+            switch ($tbm) {
+                case 'isch':
+                    $searchEngineName = 'Google Images';
+                    break;
+                case 'vid':
+                    $searchEngineName = 'Google Video';
+                    break;
+                case 'shop':
+                    $searchEngineName = 'Google Shopping';
+                    break;
+            }
+        }
+
+        if (empty($key)) {
+            foreach ($variableNames as $variableName) {
+                if ($variableName[0] == '/') {
+                    // regular expression match
+                    if (preg_match($variableName, $referrerUrl, $matches)) {
+                        $key = trim(urldecode($matches[1]));
+                        break;
+                    }
+                } else {
+                    // search for keywords now &vname=keyword
+                    $key = UrlHelper::getParameterFromQueryString($query, $variableName);
+                    $key = trim(urldecode($key));
+
+                    // Special cases: empty or no keywords
+                    if (empty($key)
+                        && (
+                            // Google search with no keyword
+                            ($searchEngineName == 'Google'
+                                && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment']))
+                            )
+
+                            // Yahoo search with no keyword
+                            || ($searchEngineName == 'Yahoo!'
+                                && ($referrerParsed['host'] == 'r.search.yahoo.com')
+                            )
+
+                            // empty keyword parameter
+                            || strpos($query, sprintf('&%s=', $variableName)) !== false
+                            || strpos($query, sprintf('?%s=', $variableName)) !== false
+
+                            // search engines with no keyword
+                            || $searchEngineName == 'Ixquick'
+                            || $searchEngineName == 'Google Images'
+                            || $searchEngineName == 'DuckDuckGo')
+                    ) {
+                        $key = false;
+                    }
+                    if (!empty($key)
+                        || $key === false
+                    ) {
+                        break;
+                    }
+                }
+            }
+        }
+
+        // $key === false is the special case "No keyword provided" which is a Search engine match
+        if ($key === null || $key === '') {
+            return false;
+        }
+
+        if (!empty($key)) {
+            $charsets = $this->getCharsetsByHost($referrerHost);
+
+            if (function_exists('iconv')
+                && !empty($charsets)
+            ) {
+                $charset = $charsets[0];
+                if (count($charsets) > 1
+                    && function_exists('mb_detect_encoding')
+                ) {
+                    $charset = mb_detect_encoding($key, $charsets);
+                    if ($charset === false) {
+                        $charset = $charsets[0];
+                    }
+                }
+
+                $newkey = @iconv($charset, 'UTF-8//IGNORE', $key);
+                if (!empty($newkey)) {
+                    $key = $newkey;
+                }
+            }
+
+            $key = Common::mb_strtolower($key);
+        }
+
+        return array(
+            'name' => $searchEngineName,
+            'keywords' => $key,
+        );
+    }
+
+}
diff --git a/plugins/Referrers/Tasks.php b/plugins/Referrers/Tasks.php
new file mode 100644
index 0000000000..7481dbdca3
--- /dev/null
+++ b/plugins/Referrers/Tasks.php
@@ -0,0 +1,35 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+namespace Piwik\Plugins\Referrers;
+
+
+use Piwik\Http;
+use Piwik\Option;
+
+class Tasks extends \Piwik\Plugin\Tasks
+{
+    public function schedule()
+    {
+        $this->weekly('updateSearchEngines');
+        #$this->weekly('updateSocials');
+    }
+
+    /**
+     * Update the search engine and social definitions
+     *
+     * @see https://github.com/piwik/searchengine-and-social-definitions
+     */
+    public function updateSearchEngines()
+    {
+        $url = 'https://raw.githubusercontent.com/piwik/searchengine-and-social-definitions/master/SearchEngines.yml';
+        $list = Http::sendHttpRequest($url, 30);
+        $searchEngines = SearchEngine::getInstance()->loadYmlData($list);
+        Option::set(SearchEngine::OPTION_STORAGE_NAME, serialize($searchEngines));
+    }
+}
\ No newline at end of file
diff --git a/plugins/Referrers/functions.php b/plugins/Referrers/functions.php
index e0fee30833..2a39f8d1d4 100644
--- a/plugins/Referrers/functions.php
+++ b/plugins/Referrers/functions.php
@@ -120,7 +120,7 @@ function getSocialsLogoFromUrl($domain)
  */
 function getSearchEngineUrlFromName($name)
 {
-    $searchEngineNames = Common::getSearchEngineNames();
+    $searchEngineNames = SearchEngine::getInstance()->getSearchEngineNames();
     if (isset($searchEngineNames[$name])) {
         $url = 'http://' . $searchEngineNames[$name];
     } else {
@@ -190,10 +190,10 @@ function getSearchEngineUrlFromUrlAndKeyword($url, $keyword)
     if ($keyword === API::LABEL_KEYWORD_NOT_DEFINED) {
         return 'http://piwik.org/faq/general/#faq_144';
     }
-    $searchEngineUrls = Common::getSearchEngineUrls();
+    $searchEngineUrls = SearchEngine::getInstance()->getSearchEngineDefinitions();
     $keyword = urlencode($keyword);
     $keyword = str_replace(urlencode('+'), urlencode(' '), $keyword);
-    $path = @$searchEngineUrls[getSearchEngineHostPathFromUrl($url)][2];
+    $path = @$searchEngineUrls[getSearchEngineHostPathFromUrl($url)]['backlink'];
     if (empty($path)) {
         return false;
     }
diff --git a/plugins/Referrers/tests/Unit/ReferrersTest.php b/plugins/Referrers/tests/Unit/ReferrersTest.php
index 909e6bf65e..22e7dbe216 100644
--- a/plugins/Referrers/tests/Unit/ReferrersTest.php
+++ b/plugins/Referrers/tests/Unit/ReferrersTest.php
@@ -11,20 +11,32 @@ namespace Piwik\Plugins\Referrers\tests;
 use Piwik\DataTable;
 use Piwik\DataTable\Row;
 use Piwik\Period;
+use Piwik\Plugins\Referrers\SearchEngine;
 
 require_once PIWIK_INCLUDE_PATH . '/plugins/Referrers/Referrers.php';
 
+/**
+ * @group Referererer
+ */
 class ReferrersTest extends \PHPUnit_Framework_TestCase
 {
+
+    public static function setUpBeforeClass()
+    {
+        // inject definitions to avoid database usage
+        $yml = file_get_contents(PIWIK_INCLUDE_PATH . SearchEngine::DEFINITION_FILE);
+        SearchEngine::getInstance()->loadYmlData($yml);
+
+        parent::setUpBeforeClass();
+    }
+
     /**
      * Dataprovider serving all search engine data
      */
     public function getSearchEngines()
     {
-        include PIWIK_PATH_TEST_TO_ROOT . '/core/DataFiles/SearchEngines.php';
-
         $searchEngines = array();
-        foreach ($GLOBALS['Piwik_SearchEngines'] as $url => $searchEngine) {
+        foreach (SearchEngine::getInstance()->getSearchEngineDefinitions() as $url => $searchEngine) {
             $searchEngines[] = array($url, $searchEngine);
         }
         return $searchEngines;
@@ -43,10 +55,10 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
         static $searchEngines = array();
 
         $name = parse_url('http://' . $url);
-        if (!array_key_exists($searchEngine[0], $searchEngines)) {
-            $searchEngines[$searchEngine[0]] = $url;
+        if (!array_key_exists($searchEngine['name'], $searchEngines)) {
+            $searchEngines[$searchEngine['name']] = $url;
 
-            $this->assertTrue(!empty($searchEngine[1]), $name['host']);
+            $this->assertTrue(!empty($searchEngine['params']), $name['host']);
         }
     }
 
@@ -66,8 +78,8 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
         static $searchEngines = array();
 
         $name = parse_url('http://' . $url);
-        if (!array_key_exists($searchEngine[0], $searchEngines)) {
-            $searchEngines[$searchEngine[0]] = $url;
+        if (!array_key_exists($searchEngine['name'], $searchEngines)) {
+            $searchEngines[$searchEngine['name']] = $url;
 
             $this->assertTrue(in_array($name['host'] . '.png', $favicons), $name['host']);
         }
@@ -80,11 +92,9 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
      */
     public function testObsoleteSearchEngineIcons()
     {
-        include PIWIK_PATH_TEST_TO_ROOT . '/core/DataFiles/SearchEngines.php';
-
         // Get list of search engines and first appearing URL
         $searchEngines = array();
-        foreach ($GLOBALS['Piwik_SearchEngines'] as $url => $searchEngine) {
+        foreach (SearchEngine::getInstance()->getSearchEngineDefinitions() as $url => $searchEngine) {
             $name = parse_url('http://' . $url);
             if (!array_key_exists($name['host'], $searchEngines)) {
                 $searchEngines[$name['host']] = true;
@@ -142,7 +152,6 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
      */
     public function testGetSearchEngineUrlFromUrlAndKeyword($url, $keyword, $expected)
     {
-        include PIWIK_PATH_TEST_TO_ROOT . '/core/DataFiles/SearchEngines.php';
         $this->assertEquals($expected, \Piwik\Plugins\Referrers\getSearchEngineUrlFromUrlAndKeyword($url, $keyword));
     }
 
diff --git a/plugins/Referrers/tests/Unit/SearchEngineTest.php b/plugins/Referrers/tests/Unit/SearchEngineTest.php
new file mode 100644
index 0000000000..508068feda
--- /dev/null
+++ b/plugins/Referrers/tests/Unit/SearchEngineTest.php
@@ -0,0 +1,81 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ */
+
+namespace Piwik\Plugins\Referrers\tests;
+
+use Piwik\Plugins\Referrers\SearchEngine;
+use Spyc;
+
+/**
+ * @group SearchEngine
+ */
+class SearchEngineTest extends \PHPUnit_Framework_TestCase
+{
+    public function getSearchEngineUrls()
+    {
+        return Spyc::YAMLLoad(PIWIK_PATH_TEST_TO_ROOT .'/tests/resources/extractSearchEngineInformationFromUrlTests.yml');
+    }
+
+    public static function setUpBeforeClass()
+    {
+        // inject definitions to avoid database usage
+        $yml = file_get_contents(PIWIK_INCLUDE_PATH . SearchEngine::DEFINITION_FILE);
+        SearchEngine::getInstance()->loadYmlData($yml);
+
+        parent::setUpBeforeClass();
+    }
+
+    /**
+     * @dataProvider getSearchEngineUrls
+     * @group Core
+     */
+    public function testExtractInformationFromUrl($url, $engine, $keywords)
+    {
+        $returnedValue = SearchEngine::getInstance()->extractInformationFromUrl($url);
+
+        $expectedValue = false;
+
+        if (!empty($engine)) {
+            $expectedValue = array('name' => $engine, 'keywords' => $keywords);
+        }
+
+        $this->assertEquals($expectedValue, $returnedValue);
+    }
+
+    public function testSearchEnginesDefinedCorrectly()
+    {
+        $searchEngines = array();
+        foreach (SearchEngine::getInstance()->getSearchEngineDefinitions() as $host => $info) {
+            if (isset($info['backlink']) && $info['backlink'] !== false) {
+                $this->assertTrue(strrpos($info['backlink'], "{k}") !== false, $host . " search URL is not defined correctly, must contain the macro {k}");
+            }
+
+            if (!array_key_exists($info['name'], $searchEngines)) {
+                $searchEngines[$info['name']] = true;
+
+                $this->assertTrue(strpos($host, '{}') === false, $host . " search URL is the master record and should not contain {}");
+            }
+
+            if (isset($info['charsets']) && $info['charsets'] !== false) {
+                $this->assertTrue(is_array($info['charsets']) || is_string($info['charsets']), $host . ' charsets must be either a string or an array');
+
+                if (is_string($info['charsets'])) {
+                    $this->assertTrue(trim($info['charsets']) !== '', $host . ' charsets cannot be an empty string');
+                    $this->assertTrue(strpos($info['charsets'], ' ') === false, $host . ' charsets cannot contain spaces');
+
+                }
+
+                if (is_array($info['charsets'])) {
+                    $this->assertTrue(count($info['charsets']) > 0, $host . ' charsets cannot be an empty array');
+                    $this->assertTrue(strpos(serialize($info['charsets']), '""') === false, $host . ' charsets in array cannot be empty stringss');
+                    $this->assertTrue(strpos(serialize($info['charsets']), ' ') === false, $host . ' charsets in array cannot contain spaces');
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/PHPUnit/Framework/Fixture.php b/tests/PHPUnit/Framework/Fixture.php
index 04c52371da..d66cfdac6e 100644
--- a/tests/PHPUnit/Framework/Fixture.php
+++ b/tests/PHPUnit/Framework/Fixture.php
@@ -243,7 +243,6 @@ class Fixture extends \PHPUnit_Framework_Assert
             static::fail("TEST INITIALIZATION FAILED: " . $e->getMessage() . "\n" . $e->getTraceAsString());
         }
 
-        include "DataFiles/SearchEngines.php";
         include "DataFiles/Socials.php";
         include "DataFiles/Providers.php";
 
diff --git a/tests/PHPUnit/Unit/CommonTest.php b/tests/PHPUnit/Unit/CommonTest.php
index 8aa85ed550..d8817ec3db 100644
--- a/tests/PHPUnit/Unit/CommonTest.php
+++ b/tests/PHPUnit/Unit/CommonTest.php
@@ -465,39 +465,4 @@ class CommonTest extends PHPUnit_Framework_TestCase
     {
         $this->assertEquals($expected, Common::extractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages), "test with {$browserLanguage} failed, expected {$expected}");
     }
-
-    public function testSearchEnginesDefinedCorrectly()
-    {
-        include "DataFiles/SearchEngines.php";
-
-        $searchEngines = array();
-        foreach ($GLOBALS['Piwik_SearchEngines'] as $host => $info) {
-            if (isset($info[2]) && $info[2] !== false) {
-                $this->assertTrue(strrpos($info[2], "{k}") !== false, $host . " search URL is not defined correctly, must contain the macro {k}");
-            }
-
-            if (!array_key_exists($info[0], $searchEngines)) {
-                $searchEngines[$info[0]] = true;
-
-                $this->assertTrue(strpos($host, '{}') === false, $host . " search URL is the master record and should not contain {}");
-            }
-
-            if (isset($info[3]) && $info[3] !== false) {
-                $this->assertTrue(is_array($info[3]) || is_string($info[3]), $host . ' encoding must be either a string or an array');
-
-                if (is_string($info[3])) {
-                    $this->assertTrue(trim($info[3]) !== '', $host . ' encoding cannot be an empty string');
-                    $this->assertTrue(strpos($info[3], ' ') === false, $host . ' encoding cannot contain spaces');
-
-                }
-
-                if (is_array($info[3])) {
-                    $this->assertTrue(count($info[3]) > 0, $host . ' encodings cannot be an empty array');
-                    $this->assertTrue(strpos(serialize($info[3]), '""') === false, $host . ' encodings in array cannot be empty stringss');
-                    $this->assertTrue(strpos(serialize($info[3]), ' ') === false, $host . ' encodings in array cannot contain spaces');
-                }
-            }
-        }
-    }
-
 }
diff --git a/tests/PHPUnit/Unit/UrlHelperTest.php b/tests/PHPUnit/Unit/UrlHelperTest.php
index e972d20d54..cc15d6eeb1 100644
--- a/tests/PHPUnit/Unit/UrlHelperTest.php
+++ b/tests/PHPUnit/Unit/UrlHelperTest.php
@@ -149,32 +149,6 @@ class UrlHelperTest extends \PHPUnit_Framework_TestCase
         $this->assertEquals(serialize($expected), serialize(UrlHelper::getArrayFromQueryString('a&b=&c=1&d[]&e[]=&f[]=a&g[]=b&g[]=c')));
     }
 
-    /**
-     * Dataprovider for testExtractSearchEngineInformationFromUrl
-     */
-    public function getSearchEngineUrls()
-    {
-        return Spyc::YAMLLoad(PIWIK_PATH_TEST_TO_ROOT .'/tests/resources/extractSearchEngineInformationFromUrlTests.yml');
-    }
-
-    /**
-     * @dataProvider getSearchEngineUrls
-     * @group Core
-     */
-    public function testExtractSearchEngineInformationFromUrl($url, $engine, $keywords)
-    {
-        $this->includeDataFilesForSearchEngineTest();
-        $returnedValue = UrlHelper::extractSearchEngineInformationFromUrl($url);
-
-        $exptectedValue = false;
-
-        if (!empty($engine)) {
-            $exptectedValue = array('name' => $engine, 'keywords' => $keywords);
-        }
-
-        $this->assertEquals($exptectedValue, $returnedValue);
-    }
-
     /**
      * Dataprovider for testGetLossyUrl
      */
@@ -203,11 +177,6 @@ class UrlHelperTest extends \PHPUnit_Framework_TestCase
         $this->assertEquals($expected, UrlHelper::getLossyUrl($input));
     }
 
-    private function includeDataFilesForSearchEngineTest()
-    {
-        include "DataFiles/SearchEngines.php";
-    }
-
     /**
      * @group Core
      */
-- 
GitLab