diff --git a/core/API/Request.php b/core/API/Request.php index 65801233a85e5973d828b0801eca4aeac7150643..5e9ba6d850f58325930d78ecf7a66f9a7b78222a 100644 --- a/core/API/Request.php +++ b/core/API/Request.php @@ -18,6 +18,7 @@ use Piwik\PluginDeactivatedException; use Piwik\PluginsManager; use Piwik\SettingsServer; use Piwik\Url; +use Piwik\UrlHelper; /** * An API request is the object used to make a call to the API and get the result. @@ -78,7 +79,7 @@ class Request $request = trim($request); $request = str_replace(array("\n", "\t"), '', $request); - $requestParsed = Common::getArrayFromQueryString($request); + $requestParsed = UrlHelper::getArrayFromQueryString($request); $requestArray = $requestParsed + $defaultRequest; } @@ -229,7 +230,7 @@ class Request if (empty($_SERVER['QUERY_STRING'])) { return array(); } - $GET = Common::getArrayFromQueryString($_SERVER['QUERY_STRING']); + $GET = UrlHelper::getArrayFromQueryString($_SERVER['QUERY_STRING']); return $GET; } diff --git a/core/Common.php b/core/Common.php index 36ff82061e352bf93defbccca5c7d10d7dcff04c..4055ea8b6336fe5d5f1a8f14922efa47096bc2a3 100644 --- a/core/Common.php +++ b/core/Common.php @@ -125,147 +125,6 @@ class Common return PluginsManager::getInstance()->isPluginActivated('Goals'); } - /* - * URLs - */ - - /** - * Returns the path and query part from a URL. - * Eg. http://piwik.org/test/index.php?module=CoreHome will return /test/index.php?module=CoreHome - * - * @param string $url either http://piwik.org/test or / - * @return string - */ - public static function getPathAndQueryFromUrl($url) - { - $parsedUrl = parse_url($url); - $result = ''; - if (isset($parsedUrl['path'])) { - $result .= substr($parsedUrl['path'], 1); - } - if (isset($parsedUrl['query'])) { - $result .= '?' . $parsedUrl['query']; - } - return $result; - } - - /** - * Returns the value of a GET parameter $parameter in an URL query $urlQuery - * - * @param string $urlQuery result of parse_url()['query'] and htmlentitied (& is &) eg. module=test&action=toto or ?page=test - * @param string $parameter - * @return string|bool Parameter value if found (can be the empty string!), null if not found - */ - public static function getParameterFromQueryString($urlQuery, $parameter) - { - $nameToValue = self::getArrayFromQueryString($urlQuery); - if (isset($nameToValue[$parameter])) { - return $nameToValue[$parameter]; - } - return null; - } - - /** - * Returns an URL query string in an array format - * - * @param string $urlQuery - * @return array array( param1=> value1, param2=>value2) - */ - public static function getArrayFromQueryString($urlQuery) - { - if (strlen($urlQuery) == 0) { - return array(); - } - if ($urlQuery[0] == '?') { - $urlQuery = substr($urlQuery, 1); - } - $separator = '&'; - - $urlQuery = $separator . $urlQuery; - // $urlQuery = str_replace(array('%20'), ' ', $urlQuery); - $refererQuery = trim($urlQuery); - - $values = explode($separator, $refererQuery); - - $nameToValue = array(); - - foreach ($values as $value) { - $pos = strpos($value, '='); - if ($pos !== false) { - $name = substr($value, 0, $pos); - $value = substr($value, $pos + 1); - if ($value === false) { - $value = ''; - } - } else { - $name = $value; - $value = false; - } - if (!empty($name)) { - $name = self::sanitizeInputValue($name); - } - if (!empty($value)) { - $value = self::sanitizeInputValue($value); - } - - // if array without indexes - $count = 0; - $tmp = preg_replace('/(\[|%5b)(]|%5d)$/i', '', $name, -1, $count); - if (!empty($tmp) && $count) { - $name = $tmp; - if (isset($nameToValue[$name]) == false || is_array($nameToValue[$name]) == false) { - $nameToValue[$name] = array(); - } - array_push($nameToValue[$name], $value); - } else if (!empty($name)) { - $nameToValue[$name] = $value; - } - } - return $nameToValue; - } - - /** - * Builds a URL from the result of parse_url function - * Copied from the PHP comments at http://php.net/parse_url - * @param array $parsed - * @return bool|string - */ - public static function getParseUrlReverse($parsed) - { - if (!is_array($parsed)) { - return false; - } - - $uri = !empty($parsed['scheme']) ? $parsed['scheme'] . ':' . (!strcasecmp($parsed['scheme'], 'mailto') ? '' : '//') : ''; - $uri .= !empty($parsed['user']) ? $parsed['user'] . (!empty($parsed['pass']) ? ':' . $parsed['pass'] : '') . '@' : ''; - $uri .= !empty($parsed['host']) ? $parsed['host'] : ''; - $uri .= !empty($parsed['port']) ? ':' . $parsed['port'] : ''; - - if (!empty($parsed['path'])) { - $uri .= (!strncmp($parsed['path'], '/', 1)) - ? $parsed['path'] - : ((!empty($uri) ? '/' : '') . $parsed['path']); - } - - $uri .= !empty($parsed['query']) ? '?' . $parsed['query'] : ''; - $uri .= !empty($parsed['fragment']) ? '#' . $parsed['fragment'] : ''; - return $uri; - } - - /** - * Returns true if the string passed may be a URL. - * We don't need a precise test here because the value comes from the website - * tracked source code and the URLs may look very strange. - * - * @param string $url - * @return bool - */ - public static function isLookLikeUrl($url) - { - return preg_match('~^(ftp|news|http|https)?://(.*)$~D', $url, $matches) !== 0 - && strlen($matches[2]) > 0; - } - /* * String operations */ @@ -1022,266 +881,6 @@ class Common * Referrer */ - /** - * Reduce URL to more minimal form. 2 letter country codes are - * replaced by '{}', while other parts are simply removed. - * - * Examples: - * www.example.com -> example.com - * search.example.com -> example.com - * m.example.com -> example.com - * de.example.com -> {}.example.com - * example.de -> example.{} - * example.co.uk -> example.{} - * - * @param string $url - * @return string - */ - public static function getLossyUrl($url) - { - static $countries; - if (!isset($countries)) { - $countries = implode('|', array_keys(self::getCountriesList(true))); - } - - return preg_replace( - array( - '/^(w+[0-9]*|search)\./', - '/(^|\.)m\./', - '/(\.(com|org|net|co|it|edu))?\.(' . $countries . ')(\/|$)/', - '/(^|\.)(' . $countries . ')\./', - ), - array( - '', - '$1', - '.{}$4', - '$1{}.', - ), - $url); - } - - /** - * Extracts a keyword from a raw not encoded URL. - * Will only extract keyword if a known search engine has been detected. - * Returns the keyword: - * - in UTF8: automatically converted from other charsets when applicable - * - strtolowered: "QUErY test!" will return "query test!" - * - trimmed: extra spaces before and after are removed - * - * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php - * The function returns false when a keyword couldn't be found. - * eg. if the url is "http://www.google.com/partners.html" this will return false, - * as the google keyword parameter couldn't be found. - * - * @see unit tests in /tests/core/Common.test.php - * @param string $referrerUrl URL referer URL, eg. $_SERVER['HTTP_REFERER'] - * @return array|bool false if a keyword couldn't be extracted, - * or array( - * 'name' => 'Google', - * 'keywords' => 'my searched keywords') - */ - public static function extractSearchEngineInformationFromUrl($referrerUrl) - { - $refererParsed = @parse_url($referrerUrl); - $refererHost = ''; - if (isset($refererParsed['host'])) { - $refererHost = $refererParsed['host']; - } - if (empty($refererHost)) { - return false; - } - // some search engines (eg. Bing Images) use the same domain - // as an existing search engine (eg. Bing), we must also use the url path - $refererPath = ''; - if (isset($refererParsed['path'])) { - $refererPath = $refererParsed['path']; - } - - // no search query - if (!isset($refererParsed['query'])) { - $refererParsed['query'] = ''; - } - $query = $refererParsed['query']; - - // Google Referrers URLs sometimes have the fragment which contains the keyword - if (!empty($refererParsed['fragment'])) { - $query .= '&' . $refererParsed['fragment']; - } - - $searchEngines = self::getSearchEngineUrls(); - - $hostPattern = self::getLossyUrl($refererHost); - if (array_key_exists($refererHost . $refererPath, $searchEngines)) { - $refererHost = $refererHost . $refererPath; - } elseif (array_key_exists($hostPattern . $refererPath, $searchEngines)) { - $refererHost = $hostPattern . $refererPath; - } elseif (array_key_exists($hostPattern, $searchEngines)) { - $refererHost = $hostPattern; - } elseif (!array_key_exists($refererHost, $searchEngines)) { - if (!strncmp($query, 'cx=partner-pub-', 15)) { - // Google custom search engine - $refererHost = 'google.com/cse'; - } elseif (!strncmp($refererPath, '/pemonitorhosted/ws/results/', 28)) { - // private-label search powered by InfoSpace Metasearch - $refererHost = 'wsdsold.infospace.com'; - } elseif (strpos($refererHost, '.images.search.yahoo.com') != false) { - // Yahoo! Images - $refererHost = 'images.search.yahoo.com'; - } elseif (strpos($refererHost, '.search.yahoo.com') != false) { - // Yahoo! - $refererHost = 'search.yahoo.com'; - } else { - return false; - } - } - $searchEngineName = $searchEngines[$refererHost][0]; - $variableNames = null; - if (isset($searchEngines[$refererHost][1])) { - $variableNames = $searchEngines[$refererHost][1]; - } - if (!$variableNames) { - $searchEngineNames = self::getSearchEngineNames(); - $url = $searchEngineNames[$searchEngineName]; - $variableNames = $searchEngines[$url][1]; - } - if (!is_array($variableNames)) { - $variableNames = array($variableNames); - } - - $key = null; - if ($searchEngineName === 'Google Images' - || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false) - ) { - if (strpos($query, '&prev') !== false) { - $query = urldecode(trim(self::getParameterFromQueryString($query, 'prev'))); - $query = str_replace('&', '&', strstr($query, '?')); - } - $searchEngineName = 'Google Images'; - } else if ($searchEngineName === 'Google' - && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0) - ) { - $keys = array(); - $key = self::getParameterFromQueryString($query, 'as_q'); - if (!empty($key)) { - array_push($keys, $key); - } - $key = self::getParameterFromQueryString($query, 'as_oq'); - if (!empty($key)) { - array_push($keys, str_replace('+', ' OR ', $key)); - } - $key = self::getParameterFromQueryString($query, 'as_epq'); - if (!empty($key)) { - array_push($keys, "\"$key\""); - } - $key = self::getParameterFromQueryString($query, 'as_eq'); - if (!empty($key)) { - array_push($keys, "-$key"); - } - $key = trim(urldecode(implode(' ', $keys))); - } - - if ($searchEngineName === 'Google') { - // top bar menu - $tbm = self::getParameterFromQueryString($query, 'tbm'); - switch ($tbm) { - case 'isch': - $searchEngineName = 'Google Images'; - break; - case 'vid': - $searchEngineName = 'Google Video'; - break; - case 'shop': - $searchEngineName = 'Google Shopping'; - break; - } - } - - if (empty($key)) { - foreach ($variableNames as $variableName) { - if ($variableName[0] == '/') { - // regular expression match - if (preg_match($variableName, $referrerUrl, $matches)) { - $key = trim(urldecode($matches[1])); - break; - } - } else { - // search for keywords now &vname=keyword - $key = self::getParameterFromQueryString($query, $variableName); - $key = trim(urldecode($key)); - - // Special case: Google & empty q parameter - if (empty($key) - && $variableName == 'q' - - && ( - // Google search with no keyword - ($searchEngineName == 'Google' - && ( // First, they started putting an empty q= parameter - strpos($query, '&q=') !== false - || strpos($query, '?q=') !== false - // then they started sending the full host only (no path/query string) - || (empty($query) && (empty($refererPath) || $refererPath == '/') && empty($refererParsed['fragment'])) - ) - ) - // search engines with no keyword - || $searchEngineName == 'Google Images' - || $searchEngineName == 'DuckDuckGo') - ) { - $key = false; - } - if (!empty($key) - || $key === false - ) { - break; - } - } - } - } - - // $key === false is the special case "No keyword provided" which is a Search engine match - if ($key === null - || $key === '' - ) { - return false; - } - - if (!empty($key)) { - if (function_exists('iconv') - && isset($searchEngines[$refererHost][3]) - ) { - // accepts string, array, or comma-separated list string in preferred order - $charsets = $searchEngines[$refererHost][3]; - if (!is_array($charsets)) { - $charsets = explode(',', $charsets); - } - - if (!empty($charsets)) { - $charset = $charsets[0]; - if (count($charsets) > 1 - && function_exists('mb_detect_encoding') - ) { - $charset = mb_detect_encoding($key, $charsets); - if ($charset === false) { - $charset = $charsets[0]; - } - } - - $newkey = @iconv($charset, 'UTF-8//IGNORE', $key); - if (!empty($newkey)) { - $key = $newkey; - } - } - } - - $key = self::mb_strtolower($key); - } - - return array( - 'name' => $searchEngineName, - 'keywords' => $key, - ); - } - /** * Takes a list of fields defining numeric values and returns the corresponding * unnamed parameters to be bound to the field names in the where clause of a SQL query diff --git a/core/SettingsPiwik.php b/core/SettingsPiwik.php index 8067c8a21546d0e3371eeb54d14eb19780029a42..1e51bd6c13a2553b5ab613b7df696c1b6279aa62 100644 --- a/core/SettingsPiwik.php +++ b/core/SettingsPiwik.php @@ -114,8 +114,8 @@ class SettingsPiwik public static function getPiwikUrl() { // Only set in tests - if (Piwik::$piwikUrlCache !== null) { - return Piwik::$piwikUrlCache; + if (self::$piwikUrlCache !== null) { + return self::$piwikUrlCache; } $key = 'piwikUrl'; diff --git a/core/Tracker/Action.php b/core/Tracker/Action.php index ee434789ecdd4a83ab134c8189b76ae5ea670671..7d46923481acc4d1d8df738d86fd2f355784e34c 100644 --- a/core/Tracker/Action.php +++ b/core/Tracker/Action.php @@ -14,8 +14,7 @@ use Exception; use Piwik\Common; use Piwik\Config; use Piwik\Tracker; -use Piwik\Tracker\Cache; -use Piwik\Tracker\Request; +use Piwik\UrlHelper; /** * Handles an action (page view, download or outlink) by the visitor. @@ -113,7 +112,7 @@ class Action implements ActionInterface // Clean up host & hash tags, for URLs $parsedUrl = @parse_url($fullUrl); $parsedUrl = self::cleanupHostAndHashTag($parsedUrl); - $url = Common::getParseUrlReverse($parsedUrl); + $url = UrlHelper::getParseUrlReverse($parsedUrl); if (!empty($url)) { return $url; } @@ -323,17 +322,17 @@ class Action implements ActionInterface if (empty($parsedUrl['query'])) { if (empty($parsedUrl['fragment'])) { - return Common::getParseUrlReverse($parsedUrl); + return UrlHelper::getParseUrlReverse($parsedUrl); } // Exclude from the hash tag as well - $queryParameters = Common::getArrayFromQueryString($parsedUrl['fragment']); - $parsedUrl['fragment'] = self::getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude); - $url = Common::getParseUrlReverse($parsedUrl); + $queryParameters = UrlHelper::getArrayFromQueryString($parsedUrl['fragment']); + $parsedUrl['fragment'] = UrlHelper::getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude); + $url = UrlHelper::getParseUrlReverse($parsedUrl); return $url; } - $queryParameters = Common::getArrayFromQueryString($parsedUrl['query']); - $parsedUrl['query'] = self::getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude); - $url = Common::getParseUrlReverse($parsedUrl); + $queryParameters = UrlHelper::getArrayFromQueryString($parsedUrl['query']); + $parsedUrl['query'] = UrlHelper::getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude); + $url = UrlHelper::getParseUrlReverse($parsedUrl); return $url; } @@ -369,43 +368,6 @@ class Action implements ActionInterface return $parametersToExclude; } - /** - * Returns a Query string, - * Given an array of input parameters, and an array of parameter names to exclude - * - * @static - * @param $queryParameters - * @param $parametersToExclude - * @return string - */ - public static function getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude) - { - $validQuery = ''; - $separator = '&'; - foreach ($queryParameters as $name => $value) { - // decode encoded square brackets - $name = str_replace(array('%5B', '%5D'), array('[', ']'), $name); - - if (!in_array(strtolower($name), $parametersToExclude)) { - if (is_array($value)) { - foreach ($value as $param) { - if ($param === false) { - $validQuery .= $name . '[]' . $separator; - } else { - $validQuery .= $name . '[]=' . $param . $separator; - } - } - } else if ($value === false) { - $validQuery .= $name . $separator; - } else { - $validQuery .= $name . '=' . $value . $separator; - } - } - } - $validQuery = substr($validQuery, 0, -strlen($separator)); - return $validQuery; - } - protected function init() { $this->pageEncoding = $this->request->getParam('cs'); @@ -773,7 +735,7 @@ class Action implements ActionInterface } $url = self::cleanupString($url); - if (!Common::isLookLikeUrl($url)) { + if (!UrlHelper::isLookLikeUrl($url)) { Common::printDebug("WARNING: URL looks invalid and is discarded"); $url = ''; } @@ -890,7 +852,7 @@ class Action implements ActionInterface ? $website['sitesearch_keyword_parameters'] : array(); $queryString = (!empty($parsedUrl['query']) ? $parsedUrl['query'] : '') . (!empty($parsedUrl['fragment']) ? $separator . $parsedUrl['fragment'] : ''); - $parametersRaw = Common::getArrayFromQueryString($queryString); + $parametersRaw = UrlHelper::getArrayFromQueryString($queryString); // strtolower the parameter names for smooth site search detection $parameters = array(); @@ -935,10 +897,10 @@ class Action implements ActionInterface // @see excludeQueryParametersFromUrl() // Excluded the detected parameters from the URL $parametersToExclude = array($categoryParameterRaw, $keywordParameterRaw); - $parsedUrl['query'] = self::getQueryStringWithExcludedParameters(Common::getArrayFromQueryString($parsedUrl['query']), $parametersToExclude); - $parsedUrl['fragment'] = self::getQueryStringWithExcludedParameters(Common::getArrayFromQueryString($parsedUrl['fragment']), $parametersToExclude); + $parsedUrl['query'] = UrlHelper::getQueryStringWithExcludedParameters(UrlHelper::getArrayFromQueryString($parsedUrl['query']), $parametersToExclude); + $parsedUrl['fragment'] = UrlHelper::getQueryStringWithExcludedParameters(UrlHelper::getArrayFromQueryString($parsedUrl['fragment']), $parametersToExclude); } - $url = Common::getParseUrlReverse($parsedUrl); + $url = UrlHelper::getParseUrlReverse($parsedUrl); if (is_array($actionName)) { $actionName = reset($actionName); } diff --git a/core/Tracker/Referrer.php b/core/Tracker/Referrer.php index bd30880432d350d2272626d14a7773612c8ef824..0490a3e74e1eb798fe8f8dafe5b5bc88e430c1a5 100644 --- a/core/Tracker/Referrer.php +++ b/core/Tracker/Referrer.php @@ -11,7 +11,7 @@ namespace Piwik\Tracker; use Piwik\Common; -use Piwik\Tracker\Action; +use Piwik\UrlHelper; /** * @package Piwik @@ -68,7 +68,7 @@ class Referrer // default values for the referer_* fields $refererUrl = Common::unsanitizeInputValue($refererUrl); if (!empty($refererUrl) - && !Common::isLookLikeUrl($refererUrl) + && !UrlHelper::isLookLikeUrl($refererUrl) ) { $refererUrl = ''; } @@ -126,7 +126,7 @@ class Referrer */ protected function detectRefererSearchEngine() { - $searchEngineInformation = Common::extractSearchEngineInformationFromUrl($this->refererUrl); + $searchEngineInformation = UrlHelper::extractSearchEngineInformationFromUrl($this->refererUrl); Piwik_PostEvent('Tracker.detectRefererSearchEngine', array(&$searchEngineInformation, $this->refererUrl)); if ($searchEngineInformation === false) { return false; @@ -144,7 +144,7 @@ class Referrer protected function detectCampaignFromString($string) { foreach ($this->campaignNames as $campaignNameParameter) { - $campaignName = trim(urldecode(Common::getParameterFromQueryString($string, $campaignNameParameter))); + $campaignName = trim(urldecode(UrlHelper::getParameterFromQueryString($string, $campaignNameParameter))); if (!empty($campaignName)) { break; } @@ -155,7 +155,7 @@ class Referrer $this->nameRefererAnalyzed = $campaignName; foreach ($this->campaignKeywords as $campaignKeywordParameter) { - $campaignKeyword = Common::getParameterFromQueryString($string, $campaignKeywordParameter); + $campaignKeyword = UrlHelper::getParameterFromQueryString($string, $campaignKeywordParameter); if (!empty($campaignKeyword)) { $this->keywordRefererAnalyzed = trim(urldecode($campaignKeyword)); break; @@ -165,7 +165,7 @@ class Referrer // if the campaign keyword is empty, try to get a keyword from the referrer URL if (empty($this->keywordRefererAnalyzed)) { // Set the Campaign keyword to the keyword found in the Referrer URL if any - $referrerUrlInfo = Common::extractSearchEngineInformationFromUrl($this->refererUrl); + $referrerUrlInfo = UrlHelper::extractSearchEngineInformationFromUrl($this->refererUrl); if (!empty($referrerUrlInfo['keywords'])) { $this->keywordRefererAnalyzed = $referrerUrlInfo['keywords']; } @@ -178,7 +178,7 @@ class Referrer ) { // This parameter sometimes is found & contains the page with the adsense ad bringing visitor to our site $adsenseReferrerParameter = 'url'; - $value = trim(urldecode(Common::getParameterFromQueryString($this->refererUrlParse['query'], $adsenseReferrerParameter))); + $value = trim(urldecode(UrlHelper::getParameterFromQueryString($this->refererUrlParse['query'], $adsenseReferrerParameter))); if (!empty($value)) { $parsedAdsenseReferrerUrl = parse_url($value); if (!empty($parsedAdsenseReferrerUrl['host'])) { diff --git a/core/Url.php b/core/Url.php index fd961a3189f8212985864cf6dd82088a8fd8835f..f2cb601d459d5ad9489780dad2b41115ce7bfc96 100644 --- a/core/Url.php +++ b/core/Url.php @@ -11,10 +11,6 @@ namespace Piwik; use Exception; -use Piwik\Config; -use Piwik\Piwik; -use Piwik\Common; -use Piwik\IP; /** * Class to retrieve absolute URL or URI components of the current URL, @@ -322,7 +318,7 @@ class Url static function getArrayFromCurrentQueryString() { $queryString = self::getCurrentQueryString(); - $urlValues = Common::getArrayFromQueryString($queryString); + $urlValues = UrlHelper::getArrayFromQueryString($queryString); return $urlValues; } @@ -393,7 +389,7 @@ class Url */ static public function redirectToUrl($url) { - if (Common::isLookLikeUrl($url) + if (UrlHelper::isLookLikeUrl($url) || strpos($url, 'index.php') === 0 ) { @header("Location: $url"); diff --git a/core/UrlHelper.php b/core/UrlHelper.php new file mode 100644 index 0000000000000000000000000000000000000000..a5a6db76ad19ad702493fcfbb0b016bfe0b0d6a1 --- /dev/null +++ b/core/UrlHelper.php @@ -0,0 +1,449 @@ +<?php +/** + * Piwik - Open source web analytics + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + * + * @category Piwik + * @package Piwik + */ +namespace Piwik; + +class UrlHelper +{ + /** + * Returns a Query string, + * Given an array of input parameters, and an array of parameter names to exclude + * + * @static + * @param $queryParameters + * @param $parametersToExclude + * @return string + */ + public static function getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude) + { + $validQuery = ''; + $separator = '&'; + foreach ($queryParameters as $name => $value) { + // decode encoded square brackets + $name = str_replace(array('%5B', '%5D'), array('[', ']'), $name); + + if (!in_array(strtolower($name), $parametersToExclude)) { + if (is_array($value)) { + foreach ($value as $param) { + if ($param === false) { + $validQuery .= $name . '[]' . $separator; + } else { + $validQuery .= $name . '[]=' . $param . $separator; + } + } + } else if ($value === false) { + $validQuery .= $name . $separator; + } else { + $validQuery .= $name . '=' . $value . $separator; + } + } + } + $validQuery = substr($validQuery, 0, -strlen($separator)); + return $validQuery; + } + + /** + * Reduce URL to more minimal form. 2 letter country codes are + * replaced by '{}', while other parts are simply removed. + * + * Examples: + * www.example.com -> example.com + * search.example.com -> example.com + * m.example.com -> example.com + * de.example.com -> {}.example.com + * example.de -> example.{} + * example.co.uk -> example.{} + * + * @param string $url + * @return string + */ + public static function getLossyUrl($url) + { + static $countries; + if (!isset($countries)) { + $countries = implode('|', array_keys(Common::getCountriesList(true))); + } + + return preg_replace( + array( + '/^(w+[0-9]*|search)\./', + '/(^|\.)m\./', + '/(\.(com|org|net|co|it|edu))?\.(' . $countries . ')(\/|$)/', + '/(^|\.)(' . $countries . ')\./', + ), + array( + '', + '$1', + '.{}$4', + '$1{}.', + ), + $url); + } + + /** + * Returns true if the string passed may be a URL. + * We don't need a precise test here because the value comes from the website + * tracked source code and the URLs may look very strange. + * + * @param string $url + * @return bool + */ + public static function isLookLikeUrl($url) + { + return preg_match('~^(ftp|news|http|https)?://(.*)$~D', $url, $matches) !== 0 + && strlen($matches[2]) > 0; + } + + /** + * Builds a URL from the result of parse_url function + * Copied from the PHP comments at http://php.net/parse_url + * @param array $parsed + * @return bool|string + */ + public static function getParseUrlReverse($parsed) + { + if (!is_array($parsed)) { + return false; + } + + $uri = !empty($parsed['scheme']) ? $parsed['scheme'] . ':' . (!strcasecmp($parsed['scheme'], 'mailto') ? '' : '//') : ''; + $uri .= !empty($parsed['user']) ? $parsed['user'] . (!empty($parsed['pass']) ? ':' . $parsed['pass'] : '') . '@' : ''; + $uri .= !empty($parsed['host']) ? $parsed['host'] : ''; + $uri .= !empty($parsed['port']) ? ':' . $parsed['port'] : ''; + + if (!empty($parsed['path'])) { + $uri .= (!strncmp($parsed['path'], '/', 1)) + ? $parsed['path'] + : ((!empty($uri) ? '/' : '') . $parsed['path']); + } + + $uri .= !empty($parsed['query']) ? '?' . $parsed['query'] : ''; + $uri .= !empty($parsed['fragment']) ? '#' . $parsed['fragment'] : ''; + return $uri; + } + + /** + * Returns an URL query string in an array format + * + * @param string $urlQuery + * @return array array( param1=> value1, param2=>value2) + */ + public static function getArrayFromQueryString($urlQuery) + { + if (strlen($urlQuery) == 0) { + return array(); + } + if ($urlQuery[0] == '?') { + $urlQuery = substr($urlQuery, 1); + } + $separator = '&'; + + $urlQuery = $separator . $urlQuery; + // $urlQuery = str_replace(array('%20'), ' ', $urlQuery); + $refererQuery = trim($urlQuery); + + $values = explode($separator, $refererQuery); + + $nameToValue = array(); + + foreach ($values as $value) { + $pos = strpos($value, '='); + if ($pos !== false) { + $name = substr($value, 0, $pos); + $value = substr($value, $pos + 1); + if ($value === false) { + $value = ''; + } + } else { + $name = $value; + $value = false; + } + if (!empty($name)) { + $name = Common::sanitizeInputValue($name); + } + if (!empty($value)) { + $value = Common::sanitizeInputValue($value); + } + + // if array without indexes + $count = 0; + $tmp = preg_replace('/(\[|%5b)(]|%5d)$/i', '', $name, -1, $count); + if (!empty($tmp) && $count) { + $name = $tmp; + if (isset($nameToValue[$name]) == false || is_array($nameToValue[$name]) == false) { + $nameToValue[$name] = array(); + } + array_push($nameToValue[$name], $value); + } else if (!empty($name)) { + $nameToValue[$name] = $value; + } + } + return $nameToValue; + } + + /** + * Returns the value of a GET parameter $parameter in an URL query $urlQuery + * + * @param string $urlQuery result of parse_url()['query'] and htmlentitied (& is &) eg. module=test&action=toto or ?page=test + * @param string $parameter + * @return string|bool Parameter value if found (can be the empty string!), null if not found + */ + public static function getParameterFromQueryString($urlQuery, $parameter) + { + $nameToValue = self::getArrayFromQueryString($urlQuery); + if (isset($nameToValue[$parameter])) { + return $nameToValue[$parameter]; + } + return null; + } + + /** + * Returns the path and query part from a URL. + * Eg. http://piwik.org/test/index.php?module=CoreHome will return /test/index.php?module=CoreHome + * + * @param string $url either http://piwik.org/test or / + * @return string + */ + public static function getPathAndQueryFromUrl($url) + { + $parsedUrl = parse_url($url); + $result = ''; + if (isset($parsedUrl['path'])) { + $result .= substr($parsedUrl['path'], 1); + } + if (isset($parsedUrl['query'])) { + $result .= '?' . $parsedUrl['query']; + } + return $result; + } + + + /** + * Extracts a keyword from a raw not encoded URL. + * Will only extract keyword if a known search engine has been detected. + * Returns the keyword: + * - in UTF8: automatically converted from other charsets when applicable + * - strtolowered: "QUErY test!" will return "query test!" + * - trimmed: extra spaces before and after are removed + * + * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php + * The function returns false when a keyword couldn't be found. + * eg. if the url is "http://www.google.com/partners.html" this will return false, + * as the google keyword parameter couldn't be found. + * + * @see unit tests in /tests/core/Common.test.php + * @param string $referrerUrl URL referer URL, eg. $_SERVER['HTTP_REFERER'] + * @return array|bool false if a keyword couldn't be extracted, + * or array( + * 'name' => 'Google', + * 'keywords' => 'my searched keywords') + */ + public static function extractSearchEngineInformationFromUrl($referrerUrl) + { + $refererParsed = @parse_url($referrerUrl); + $refererHost = ''; + if (isset($refererParsed['host'])) { + $refererHost = $refererParsed['host']; + } + if (empty($refererHost)) { + return false; + } + // some search engines (eg. Bing Images) use the same domain + // as an existing search engine (eg. Bing), we must also use the url path + $refererPath = ''; + if (isset($refererParsed['path'])) { + $refererPath = $refererParsed['path']; + } + + // no search query + if (!isset($refererParsed['query'])) { + $refererParsed['query'] = ''; + } + $query = $refererParsed['query']; + + // Google Referrers URLs sometimes have the fragment which contains the keyword + if (!empty($refererParsed['fragment'])) { + $query .= '&' . $refererParsed['fragment']; + } + + $searchEngines = Common::getSearchEngineUrls(); + + $hostPattern = self::getLossyUrl($refererHost); + if (array_key_exists($refererHost . $refererPath, $searchEngines)) { + $refererHost = $refererHost . $refererPath; + } elseif (array_key_exists($hostPattern . $refererPath, $searchEngines)) { + $refererHost = $hostPattern . $refererPath; + } elseif (array_key_exists($hostPattern, $searchEngines)) { + $refererHost = $hostPattern; + } elseif (!array_key_exists($refererHost, $searchEngines)) { + if (!strncmp($query, 'cx=partner-pub-', 15)) { + // Google custom search engine + $refererHost = 'google.com/cse'; + } elseif (!strncmp($refererPath, '/pemonitorhosted/ws/results/', 28)) { + // private-label search powered by InfoSpace Metasearch + $refererHost = 'wsdsold.infospace.com'; + } elseif (strpos($refererHost, '.images.search.yahoo.com') != false) { + // Yahoo! Images + $refererHost = 'images.search.yahoo.com'; + } elseif (strpos($refererHost, '.search.yahoo.com') != false) { + // Yahoo! + $refererHost = 'search.yahoo.com'; + } else { + return false; + } + } + $searchEngineName = $searchEngines[$refererHost][0]; + $variableNames = null; + if (isset($searchEngines[$refererHost][1])) { + $variableNames = $searchEngines[$refererHost][1]; + } + if (!$variableNames) { + $searchEngineNames = Common::getSearchEngineNames(); + $url = $searchEngineNames[$searchEngineName]; + $variableNames = $searchEngines[$url][1]; + } + if (!is_array($variableNames)) { + $variableNames = array($variableNames); + } + + $key = null; + if ($searchEngineName === 'Google Images' + || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false) + ) { + if (strpos($query, '&prev') !== false) { + $query = urldecode(trim(self::getParameterFromQueryString($query, 'prev'))); + $query = str_replace('&', '&', strstr($query, '?')); + } + $searchEngineName = 'Google Images'; + } else if ($searchEngineName === 'Google' + && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0) + ) { + $keys = array(); + $key = self::getParameterFromQueryString($query, 'as_q'); + if (!empty($key)) { + array_push($keys, $key); + } + $key = self::getParameterFromQueryString($query, 'as_oq'); + if (!empty($key)) { + array_push($keys, str_replace('+', ' OR ', $key)); + } + $key = self::getParameterFromQueryString($query, 'as_epq'); + if (!empty($key)) { + array_push($keys, "\"$key\""); + } + $key = self::getParameterFromQueryString($query, 'as_eq'); + if (!empty($key)) { + array_push($keys, "-$key"); + } + $key = trim(urldecode(implode(' ', $keys))); + } + + if ($searchEngineName === 'Google') { + // top bar menu + $tbm = self::getParameterFromQueryString($query, 'tbm'); + switch ($tbm) { + case 'isch': + $searchEngineName = 'Google Images'; + break; + case 'vid': + $searchEngineName = 'Google Video'; + break; + case 'shop': + $searchEngineName = 'Google Shopping'; + break; + } + } + + if (empty($key)) { + foreach ($variableNames as $variableName) { + if ($variableName[0] == '/') { + // regular expression match + if (preg_match($variableName, $referrerUrl, $matches)) { + $key = trim(urldecode($matches[1])); + break; + } + } else { + // search for keywords now &vname=keyword + $key = self::getParameterFromQueryString($query, $variableName); + $key = trim(urldecode($key)); + + // Special case: Google & empty q parameter + if (empty($key) + && $variableName == 'q' + + && ( + // Google search with no keyword + ($searchEngineName == 'Google' + && ( // First, they started putting an empty q= parameter + strpos($query, '&q=') !== false + || strpos($query, '?q=') !== false + // then they started sending the full host only (no path/query string) + || (empty($query) && (empty($refererPath) || $refererPath == '/') && empty($refererParsed['fragment'])) + ) + ) + // search engines with no keyword + || $searchEngineName == 'Google Images' + || $searchEngineName == 'DuckDuckGo') + ) { + $key = false; + } + if (!empty($key) + || $key === false + ) { + break; + } + } + } + } + + // $key === false is the special case "No keyword provided" which is a Search engine match + if ($key === null + || $key === '' + ) { + return false; + } + + if (!empty($key)) { + if (function_exists('iconv') + && isset($searchEngines[$refererHost][3]) + ) { + // accepts string, array, or comma-separated list string in preferred order + $charsets = $searchEngines[$refererHost][3]; + if (!is_array($charsets)) { + $charsets = explode(',', $charsets); + } + + if (!empty($charsets)) { + $charset = $charsets[0]; + if (count($charsets) > 1 + && function_exists('mb_detect_encoding') + ) { + $charset = mb_detect_encoding($key, $charsets); + if ($charset === false) { + $charset = $charsets[0]; + } + } + + $newkey = @iconv($charset, 'UTF-8//IGNORE', $key); + if (!empty($newkey)) { + $key = $newkey; + } + } + } + + $key = Common::mb_strtolower($key); + } + + return array( + 'name' => $searchEngineName, + 'keywords' => $key, + ); + } +} \ No newline at end of file diff --git a/misc/cron/archive.php b/misc/cron/archive.php index 7b8580dceaf0f38690513d6d8c7496db7bee74bd..2308c727239e203b2e4e96e78c0288edb5b9b47f 100644 --- a/misc/cron/archive.php +++ b/misc/cron/archive.php @@ -801,7 +801,7 @@ class Archiving else { $piwikUrl = $this->isParameterSet("url", true); if (!$piwikUrl - || !Common::isLookLikeUrl($piwikUrl) + || !\Piwik\UrlHelper::isLookLikeUrl($piwikUrl) ) { $this->logFatalError("archive.php expects the argument --url to be set to your Piwik URL, for example: --url=http://example.org/piwik/ ", $backtrace = false); } diff --git a/misc/others/iframeWidget_localhost.php b/misc/others/iframeWidget_localhost.php index d83fb26c31b3bef9f4e81e97451e012a48e6671c..73bfc5c59bebd9f3cc99d024c3823abd383f65fb 100644 --- a/misc/others/iframeWidget_localhost.php +++ b/misc/others/iframeWidget_localhost.php @@ -1,8 +1,8 @@ <?php -use Piwik\Common; use Piwik\FrontController; -use Piwik\WidgetsList; use Piwik\Url; +use Piwik\UrlHelper; +use Piwik\WidgetsList; exit; $date = date('Y-m-d'); @@ -39,7 +39,7 @@ foreach ($widgets as $category => $widgetsInCategory) { echo '<h2>' . $category . '</h2>'; foreach ($widgetsInCategory as $widget) { echo '<h3>' . $widget['name'] . '</h3>'; - $widgetUrl = Common::getArrayFromQueryString($url); + $widgetUrl = UrlHelper::getArrayFromQueryString($url); $widgetUrl['moduleToWidgetize'] = $widget['parameters']['module']; $widgetUrl['actionToWidgetize'] = $widget['parameters']['action']; $parameters = $widget['parameters']; diff --git a/plugins/Live/Visitor.php b/plugins/Live/Visitor.php index 70076fd1c3e9ca853dd760ec568a5e13871b6579..73508301ea79946e6b4b8c59b28d3b486da67619 100644 --- a/plugins/Live/Visitor.php +++ b/plugins/Live/Visitor.php @@ -17,6 +17,7 @@ use Piwik\Plugins\Referers\API as ReferersAPI; use Piwik\Plugins\UserCountry\LocationProvider\GeoIp; use Piwik\Tracker; use Piwik\Tracker\Visit; +use Piwik\UrlHelper; /** * @see plugins/Referers/functions.php @@ -376,7 +377,7 @@ class Visitor } } } - if (Common::isLookLikeUrl($this->details['referer_url'])) { + if (\Piwik\UrlHelper::isLookLikeUrl($this->details['referer_url'])) { return $this->details['referer_url']; } return null; @@ -391,7 +392,7 @@ class Visitor if (empty($url['query'])) { return null; } - $position = Common::getParameterFromQueryString($url['query'], 'cd'); + $position = UrlHelper::getParameterFromQueryString($url['query'], 'cd'); if (!empty($position)) { return $position; } diff --git a/plugins/Proxy/Controller.php b/plugins/Proxy/Controller.php index 0e514d692ec4539e2778310301d29a4aa1f8a9fc..f7e29362b2af63bcf7c5fae5ce702bf20c7942e0 100644 --- a/plugins/Proxy/Controller.php +++ b/plugins/Proxy/Controller.php @@ -15,6 +15,7 @@ use Piwik\Common; use Piwik\Piwik; use Piwik\ProxyHttp; use Piwik\Url; +use Piwik\UrlHelper; /** * Controller for proxy services @@ -92,7 +93,7 @@ class Controller extends \Piwik\Controller if (!self::isPiwikUrl($url)) { Piwik::checkUserHasSomeViewAccess(); } - if (!Common::isLookLikeUrl($url)) { + if (!UrlHelper::isLookLikeUrl($url)) { die('Please check the &url= parameter: it should to be a valid URL'); } @header('Content-Type: text/html; charset=utf-8'); diff --git a/plugins/Referers/functions.php b/plugins/Referers/functions.php index 930be01b7e5159ec1337498795ef4b6aa0502846..6fb0f69ccb01cb6a7c992e3e2619eaca2ba68a76 100644 --- a/plugins/Referers/functions.php +++ b/plugins/Referers/functions.php @@ -11,7 +11,7 @@ namespace Piwik\Plugins\Referers; use Piwik\Common; -use Piwik\Plugins\Referers\API; +use Piwik\UrlHelper; /** * Returns path component from a URL @@ -21,7 +21,7 @@ use Piwik\Plugins\Referers\API; */ function getPathFromUrl($url) { - $path = Common::getPathAndQueryFromUrl($url); + $path = UrlHelper::getPathAndQueryFromUrl($url); if (empty($path)) { return 'index'; } diff --git a/plugins/SEO/Controller.php b/plugins/SEO/Controller.php index ee23bde1f444a85856e8a90b321037af86ac3a5f..9ae07949fec79ceef857e56eaf1676675d4a0659 100644 --- a/plugins/SEO/Controller.php +++ b/plugins/SEO/Controller.php @@ -12,10 +12,9 @@ namespace Piwik\Plugins\SEO; use Piwik\Common; use Piwik\DataTable\Renderer; -use Piwik\Plugins\SEO\API; -use Piwik\View; use Piwik\Site; -use Piwik\Plugins\SEO\RankChecker; +use Piwik\UrlHelper; +use Piwik\View; /** * @package SEO @@ -33,7 +32,7 @@ class Controller extends \Piwik\Controller $url = 'http://' . $url; } - if (empty($url) || !Common::isLookLikeUrl($url)) { + if (empty($url) || !UrlHelper::isLookLikeUrl($url)) { $url = $site->getMainUrl(); } diff --git a/plugins/SitesManager/API.php b/plugins/SitesManager/API.php index 9017a70af2dec74dc8b9899ef3874f5376d4891c..875189b77bf2ea3924fde415afd047fc2ba19f10 100644 --- a/plugins/SitesManager/API.php +++ b/plugins/SitesManager/API.php @@ -24,6 +24,7 @@ use Piwik\Site; use Piwik\TaskScheduler; use Piwik\Tracker\Cache; use Piwik\Url; +use Piwik\UrlHelper; /** * The SitesManager API gives you full control on Websites in Piwik (create, update and delete), and many methods to retrieve websites based on various attributes. @@ -1234,7 +1235,7 @@ class API */ private function isValidUrl($url) { - return Common::isLookLikeUrl($url); + return UrlHelper::isLookLikeUrl($url); } /** diff --git a/plugins/SitesManager/Controller.php b/plugins/SitesManager/Controller.php index 5629e009e98dd6ab21c125e80f46ad0d3e0f1d80..d68e582c6e377795434f4ee6db77d0e4f28aaab7 100644 --- a/plugins/SitesManager/Controller.php +++ b/plugins/SitesManager/Controller.php @@ -20,6 +20,7 @@ use Piwik\Piwik; use Piwik\SettingsServer; use Piwik\Site; use Piwik\Url; +use Piwik\UrlHelper; use Piwik\View; /** @@ -161,7 +162,7 @@ class Controller extends \Piwik\Controller\Admin $view->idSite = Common::getRequestVar('idSite'); $url = Common::getRequestVar('piwikUrl', '', 'string'); if (empty($url) - || !Common::isLookLikeUrl($url) + || !UrlHelper::isLookLikeUrl($url) ) { $url = $view->piwikUrl; } diff --git a/tests/PHPUnit/Core/CommonTest.php b/tests/PHPUnit/Core/CommonTest.php index e666ad9b5d280f57679817b9858e3c8821d20075..a89bd81c2fd9ef3190593e111c53dc3ed8aae2f0 100644 --- a/tests/PHPUnit/Core/CommonTest.php +++ b/tests/PHPUnit/Core/CommonTest.php @@ -1,6 +1,7 @@ <?php use Piwik\Common; use Piwik\Filesystem; +use Piwik\UrlHelper; /** * Piwik - Open source web analytics @@ -45,7 +46,7 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase */ public function testIsUrl($url, $isValid) { - $this->assertEquals($isValid, Common::isLookLikeUrl($url)); + $this->assertEquals($isValid, UrlHelper::isLookLikeUrl($url)); } /** @@ -341,7 +342,7 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase */ public function testGetParameterFromQueryString($queryString, $parameter, $expected) { - $this->assertSame($expected, Common::getParameterFromQueryString($queryString, $parameter)); + $this->assertSame($expected, UrlHelper::getParameterFromQueryString($queryString, $parameter)); } /** @@ -351,7 +352,7 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase */ public function testGetPathAndQueryFromUrl() { - $this->assertEquals('test/index.php?module=CoreHome', Common::getPathAndQueryFromUrl('http://piwik.org/test/index.php?module=CoreHome')); + $this->assertEquals('test/index.php?module=CoreHome', UrlHelper::getPathAndQueryFromUrl('http://piwik.org/test/index.php?module=CoreHome')); } /** @@ -370,7 +371,7 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase 'f' => array('a'), 'g' => array('b', 'c'), ); - $this->assertEquals(serialize($expected), serialize(Common::getArrayFromQueryString('a&b=&c=1&d[]&e[]=&f[]=a&g[]=b&g[]=c'))); + $this->assertEquals(serialize($expected), serialize(UrlHelper::getArrayFromQueryString('a&b=&c=1&d[]&e[]=&f[]=a&g[]=b&g[]=c'))); } /** @@ -608,7 +609,7 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase public function testExtractSearchEngineInformationFromUrl($url, $engine, $keywords) { $this->includeDataFilesForSearchEngineTest(); - $returnedValue = Common::extractSearchEngineInformationFromUrl($url); + $returnedValue = UrlHelper::extractSearchEngineInformationFromUrl($url); $exptectedValue = false; @@ -646,7 +647,7 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase */ public function testGetLossyUrl($input, $expected) { - $this->assertEquals($expected, Common::getLossyUrl($input)); + $this->assertEquals($expected, UrlHelper::getLossyUrl($input)); } private function includeDataFilesForSearchEngineTest() diff --git a/tests/PHPUnit/IntegrationTestCase.php b/tests/PHPUnit/IntegrationTestCase.php index c1d7e9e76fc672be8261d6e5699e6661fe0c8f0c..7f6a94b76471fffa7d16c26515c65b2a8fc330ca 100755 --- a/tests/PHPUnit/IntegrationTestCase.php +++ b/tests/PHPUnit/IntegrationTestCase.php @@ -5,24 +5,24 @@ * @link http://piwik.org * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later */ +use Piwik\Access; use Piwik\API\DocumentationGenerator; -use Piwik\API\Request; use Piwik\API\Proxy; +use Piwik\API\Request; use Piwik\ArchiveProcessor\Rules; +use Piwik\Common; use Piwik\Config; use Piwik\DataAccess\ArchiveTableCreator; use Piwik\DataTable\Manager; -use Piwik\Db\Adapter\Mysqli; -use Piwik\Piwik; -use Piwik\Common; -use Piwik\Access; +use Piwik\Db; use Piwik\Option; +use Piwik\Piwik; use Piwik\Plugins\LanguagesManager\API; use Piwik\ReportRenderer; use Piwik\Site; use Piwik\Tracker\Cache; use Piwik\Translate; -use Piwik\Db; +use Piwik\UrlHelper; require_once PIWIK_INCLUDE_PATH . '/libs/PiwikTracker/PiwikTracker.php'; @@ -719,7 +719,7 @@ abstract class IntegrationTestCase extends PHPUnit_Framework_TestCase && $isTestLogImportReverseChronological; $request = new Request($requestUrl); - $dateTime = Common::getRequestVar('date', '', 'string', Common::getArrayFromQueryString($requestUrl)); + $dateTime = Common::getRequestVar('date', '', 'string', UrlHelper::getArrayFromQueryString($requestUrl)); list($processedFilePath, $expectedFilePath) = $this->getProcessedAndExpectedPaths($testName, $apiId);