diff --git a/core/Tracker/Action.php b/core/Tracker/Action.php index af872891036131e2df3e9f364a161dc769f43170..2cfacb66cd3952669dad74d806233095129ec486 100644 --- a/core/Tracker/Action.php +++ b/core/Tracker/Action.php @@ -931,12 +931,19 @@ class Piwik_Tracker_Action implements Piwik_Tracker_Action_Interface ? $website['sitesearch_keyword_parameters'] : array(); $queryString = (!empty($parsedUrl['query']) ? $parsedUrl['query'] : '') . (!empty($parsedUrl['fragment']) ? $separator . $parsedUrl['fragment'] : ''); - $parameters = Piwik_Common::getArrayFromQueryString($queryString); - + $parametersRaw = Piwik_Common::getArrayFromQueryString($queryString); + + // strtolower the parameter names for smooth site search detection + $parameters = array(); + foreach($parametersRaw as $k => $v) { + $parameters[Piwik_Common::mb_strtolower($k)] = $v; + } + // decode values if they were sent from a client using another charset self::reencodeParameters($parameters, $this->pageEncoding); - // Detect Site Search - foreach ($keywordParameters as $keywordParameter) { + // Detect Site Search keyword + foreach ($keywordParameters as $keywordParameterRaw) { + $keywordParameter = Piwik_Common::mb_strtolower($keywordParameterRaw); if (!empty($parameters[$keywordParameter])) { $actionName = $parameters[$keywordParameter]; break; @@ -952,7 +959,8 @@ class Piwik_Tracker_Action implements Piwik_Tracker_Action_Interface ? $website['sitesearch_category_parameters'] : array(); - foreach ($categoryParameters as $categoryParameter) { + foreach ($categoryParameters as $categoryParameterRaw) { + $categoryParameter = Piwik_Common::mb_strtolower($categoryParameterRaw); if (!empty($parameters[$categoryParameter])) { $categoryName = $parameters[$categoryParameter]; break; @@ -969,7 +977,7 @@ class Piwik_Tracker_Action implements Piwik_Tracker_Action_Interface { // @see excludeQueryParametersFromUrl() // Excluded the detected parameters from the URL - $parametersToExclude = array($categoryParameter, $keywordParameter); + $parametersToExclude = array($categoryParameterRaw, $keywordParameterRaw); $parsedUrl['query'] = self::getQueryStringWithExcludedParameters(Piwik_Common::getArrayFromQueryString($parsedUrl['query']), $parametersToExclude); $parsedUrl['fragment'] = self::getQueryStringWithExcludedParameters(Piwik_Common::getArrayFromQueryString($parsedUrl['fragment']), $parametersToExclude); } @@ -1021,17 +1029,36 @@ class Piwik_Tracker_Action implements Piwik_Tracker_Action_Interface // if query params are encoded w/ non-utf8 characters (due to browser bug or whatever), // encode to UTF-8. if ($encoding !== false - && !in_array( strtolower($encoding), array('utf-8', 'iso-8859-1', 'iso-8859-15')) + && strtolower($encoding) != 'utf-8' && function_exists('mb_check_encoding')) { - foreach ($queryParameters as $key => &$value) - { - $decoded = urldecode($value); - if (@mb_check_encoding($decoded, $encoding)) - { - $value = urlencode(mb_convert_encoding($decoded, 'UTF-8', $encoding)); - } + $queryParameters = self::reencodeParametersArray($queryParameters, $encoding); + } + return $queryParameters; + } + + private static function reencodeParametersArray($queryParameters, $encoding) + { + foreach($queryParameters as $key => &$value) + { + if(is_array($value)) { + $value = self::reencodeParametersArray($value, $encoding); + } else { + $value = self::reencodeParameterValue($value, $encoding); + } + } + return $queryParameters; + } + + private static function reencodeParameterValue($value, $encoding) + { + if(is_string($value)) + { + $decoded = urldecode($value); + if (@mb_check_encoding($decoded, $encoding)) { + $value = urlencode(mb_convert_encoding($decoded, 'UTF-8', $encoding)); } } + return $value; } } diff --git a/libs/PiwikTracker/PiwikTracker.php b/libs/PiwikTracker/PiwikTracker.php index 44e92c38c5e97c7b27d65d6efed4a07e51339d3a..bb506baec0c1ed4bd59d0b00566151abf25a0375 100644 --- a/libs/PiwikTracker/PiwikTracker.php +++ b/libs/PiwikTracker/PiwikTracker.php @@ -51,6 +51,13 @@ class PiwikTracker * @ignore */ const LENGTH_VISITOR_ID = 16; + + /** + * By default, Piwik expects utf-8 encoded values, for example for the page URL parameter values, Page Title, etc. + * + * @ignore + */ + const DEFAULT_CHARSET_PARAMETER_VALUES = 'utf-8'; /** * Builds a PiwikTracker object, used to track visits, pages and Goal conversions @@ -81,6 +88,7 @@ class PiwikTracker $this->requestCookie = ''; $this->idSite = $idSite; $this->urlReferrer = @$_SERVER['HTTP_REFERER']; + $this->pageCharset = self::DEFAULT_CHARSET_PARAMETER_VALUES; $this->pageUrl = self::getCurrentUrl(); $this->ip = @$_SERVER['REMOTE_ADDR']; $this->acceptLanguage = @$_SERVER['HTTP_ACCEPT_LANGUAGE']; @@ -96,7 +104,12 @@ class PiwikTracker $this->doBulkRequests = false; $this->storedTrackingActions = array(); } - + + public function setPageCharset( $charset = false ) + { + $this->pageCharset = $charset; + } + /** * Sets the current URL being tracked * @@ -1038,6 +1051,7 @@ class PiwikTracker // URL parameters '&url=' . urlencode($this->pageUrl) . '&urlref=' . urlencode($this->urlReferrer) . + ((!empty($this->pageCharset) && $this->pageCharset != self::DEFAULT_CHARSET_PARAMETER_VALUES) ? '&cs=' . $this->pageCharset : '') . // Attribution information, so that Goal conversions are attributed to the right referrer or campaign // Campaign name diff --git a/tests/PHPUnit/Integration/NonUnicodeTests.php b/tests/PHPUnit/Integration/NonUnicodeTests.php index 20b180dec96ffe8c74e83a9fbfbd5b7ce3bc2223..17374b6c4d820e6deb939fcacc9c091bc8cec99c 100755 --- a/tests/PHPUnit/Integration/NonUnicodeTests.php +++ b/tests/PHPUnit/Integration/NonUnicodeTests.php @@ -78,27 +78,38 @@ class Test_Piwik_Integration_NonUnicodeTests extends IntegrationTestCase // Test w/ iso-8859-15 $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(0.3)->getDatetime()); $visitor->setUrlReferrer('http://anothersite.com/whatever.html?whatever=Ato%FC'); - $visitor->setUrl('http://example.org/index.htm?random=param&mykwd=Search 2%FC&test&cats= Search Category &search_count=INCORRECT!'); - $visitor->setDebugStringAppend('&cs=iso-8859-15'); + // Also testing that the value is encoded when passed as an array + $visitor->setUrl('http://example.org/index.htm?random=param&mykwd[]=Search 2%FC&test&cats= Search Category &search_count=INCORRECT!'); + $visitor->setPageCharset('iso-8859-15'); self::checkResponse($visitor->doTrackPageView('Site Search results')); - $visitor->setDebugStringAppend(''); + $visitor->setPageCharset(''); // Test w/ windows-1251 $visitor = self::getTracker(self::$idSite1, self::$dateTime, $defaultInit = true); $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(0.3)->getDatetime()); $visitor->setUrlReferrer('http://anothersite.com/whatever.html?txt=%EC%E5%F8%EA%EE%E2%FB%E5'); $visitor->setUrl('http://example.org/page/index.htm?whatever=%EC%E5%F8%EA%EE%E2%FB%E5'); - $visitor->setDebugStringAppend('&cs=windows-1251'); + $visitor->setPageCharset('windows-1251'); self::checkResponse($visitor->doTrackPageView('Page title is always UTF-8')); - $visitor->setDebugStringAppend(''); + + $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(0.4)->getDatetime()); + $visitor->setUrl('http://example.org/page/index.htm?q=%EC%E5%F8%EA%EE%E2%FB%E5'); + $visitor->setPageCharset('windows-1251'); + self::checkResponse($visitor->doTrackPageView('Site Search')); + + $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(0.5)->getDatetime()); + $visitor->setUrl('http://example.org/page/index.htm?q=non unicode keyword %EC%E5%F8%EA%EE%E2%FB%E5 was there'); + $visitor->setPageCharset('utf-8'); + self::checkResponse($visitor->doTrackPageView('Site Search')); + $visitor->setPageCharset(''); // Test invalid char set $visitor = self::getTracker(self::$idSite1, self::$dateTime, $defaultInit = true); $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(1)->getDatetime()); $visitor->setUrlReferrer('http://anothersite.com/whatever.html'); $visitor->setUrl('http://example.org/index.htm?random=param&mykwd=a+keyword&test&cats= Search Category &search_count=INCORRECT!'); - $visitor->setDebugStringAppend('&cs=GTF-42'); // galactic transformation format + $visitor->setPageCharset('GTF-42'); // galactic transformation format self::checkResponse($visitor->doTrackPageView('Site Search results')); - $visitor->setDebugStringAppend(''); + $visitor->setPageCharset(''); } } diff --git a/tests/PHPUnit/Integration/SiteSearchTest.php b/tests/PHPUnit/Integration/SiteSearchTest.php index 12c1814063d3569047bfe62bad8b96fa6ebbb153..f2a5fbda9234b8a52d70852e3991109054ccb874 100755 --- a/tests/PHPUnit/Integration/SiteSearchTest.php +++ b/tests/PHPUnit/Integration/SiteSearchTest.php @@ -156,12 +156,14 @@ class Test_Piwik_Integration_SiteSearch extends IntegrationTestCase // Testing with non urlencoded values $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(0.3)->getDatetime()); - $visitor->setUrl('http://example.org/index.htm?random=param&mykwd=Search 2&test&cats= Search Category &search_count=INCORRECT!'); + // ALso testing that array[] notation is detected + $visitor->setUrl('http://example.org/index.htm?random=param&mykwd[]=Search 2&test&cats= Search Category &search_count=INCORRECT!'); self::checkResponse($visitor->doTrackPageView('Site Search results')); // Testing with urlencoded values $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(0.32)->getDatetime()); - $visitor->setUrl('http://example.org/index.htm?random=param&mykwd=Search 1&test&cats='.urlencode(' Search Category '). ' &search_count=0'); + // Also testing with random case 'myKwd' + $visitor->setUrl('http://example.org/index.htm?random=param&myKwd=Search 1&test&cats='.urlencode(' Search Category '). ' &search_count=0'); self::checkResponse($visitor->doTrackPageView('Site Search results')); // IS_FOLLOWING_SEARCH: Yes @@ -184,8 +186,11 @@ class Test_Piwik_Integration_SiteSearch extends IntegrationTestCase $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(24.43)->getDatetime()); self::checkResponse($visitor->doTrackSiteSearch("No Result Keyword!", "Bad No Result Category :(", $count = 0)); + // Keyword in iso-8859-15 charset with funny character $visitor->setForceVisitDateTime(Piwik_Date::factory(self::$dateTime)->addHour(24.5)->getDatetime()); - self::checkResponse($visitor->doTrackSiteSearch("Final Keyword Searched for now.", false, $count = 10)); + $visitor->setPageCharset('iso-8859-15'); + $visitor->setUrl('http://example.org/index.htm?q=Final%20t%FCte%20Keyword%20Searched%20for%20now&search_count=10'); + self::checkResponse($visitor->doTrackPageView(false)); // - // Visitor BIS diff --git a/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageTitles_day.xml b/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageTitles_day.xml index 5e4d9bf417608036d1085067acd72295dc385f67..11c271fe53232fdb90f6b4f3c8c5ff151e54c773 100755 --- a/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageTitles_day.xml +++ b/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageTitles_day.xml @@ -5,12 +5,10 @@ <nb_visits>1</nb_visits> <nb_uniq_visitors>1</nb_uniq_visitors> <nb_hits>1</nb_hits> - <sum_time_spent>0</sum_time_spent> + <sum_time_spent>360</sum_time_spent> <nb_hits_following_search>1</nb_hits_following_search> - <exit_nb_uniq_visitors>1</exit_nb_uniq_visitors> - <exit_nb_visits>1</exit_nb_visits> - <avg_time_on_page>0</avg_time_on_page> + <avg_time_on_page>360</avg_time_on_page> <bounce_rate>0%</bounce_rate> - <exit_rate>100%</exit_rate> + <exit_rate>0%</exit_rate> </row> </result> \ No newline at end of file diff --git a/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageUrls_day.xml b/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageUrls_day.xml index 69bf5dfd19490bb5f9773aba76b0f8fd22d3b1cd..e3fc104f077131ef1868a1137d082619a628c816 100755 --- a/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageUrls_day.xml +++ b/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getPageUrls_day.xml @@ -4,25 +4,22 @@ <label>page</label> <nb_visits>1</nb_visits> <nb_hits>1</nb_hits> - <sum_time_spent>0</sum_time_spent> + <sum_time_spent>360</sum_time_spent> <nb_hits_following_search>1</nb_hits_following_search> - <exit_nb_visits>1</exit_nb_visits> - <avg_time_on_page>0</avg_time_on_page> + <avg_time_on_page>360</avg_time_on_page> <bounce_rate>0%</bounce_rate> - <exit_rate>100%</exit_rate> + <exit_rate>0%</exit_rate> <subtable> <row> <label>/index.htm?whatever=</label> <nb_visits>1</nb_visits> <nb_uniq_visitors>1</nb_uniq_visitors> <nb_hits>1</nb_hits> - <sum_time_spent>0</sum_time_spent> + <sum_time_spent>360</sum_time_spent> <nb_hits_following_search>1</nb_hits_following_search> - <exit_nb_uniq_visitors>1</exit_nb_uniq_visitors> - <exit_nb_visits>1</exit_nb_visits> - <avg_time_on_page>0</avg_time_on_page> + <avg_time_on_page>360</avg_time_on_page> <bounce_rate>0%</bounce_rate> - <exit_rate>100%</exit_rate> + <exit_rate>0%</exit_rate> <url>http://example.org/page/index.htm?whatever=%EC%E5%F8%EA%EE%E2%FB%E5</url> </row> </subtable> diff --git a/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getSiteSearchKeywords_day.xml b/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getSiteSearchKeywords_day.xml index 4996356cb9d1b7fe812b2b84253d26b759df7f3a..33535c7b1f888d9de8afab149260722cb288c195 100755 --- a/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getSiteSearchKeywords_day.xml +++ b/tests/PHPUnit/Integration/expected/test_NonUnicode__Actions.getSiteSearchKeywords_day.xml @@ -11,6 +11,17 @@ <bounce_rate>0%</bounce_rate> <exit_rate>100%</exit_rate> </row> + <row> + <label>non unicode keyword was there</label> + <nb_visits>1</nb_visits> + <nb_hits>1</nb_hits> + <sum_time_spent>0</sum_time_spent> + <exit_nb_visits>1</exit_nb_visits> + <nb_pages_per_search>1</nb_pages_per_search> + <avg_time_on_page>0</avg_time_on_page> + <bounce_rate>0%</bounce_rate> + <exit_rate>100%</exit_rate> + </row> <row> <label>Search 2ü</label> <nb_visits>1</nb_visits> @@ -21,4 +32,14 @@ <bounce_rate>0%</bounce_rate> <exit_rate>0%</exit_rate> </row> + <row> + <label>мешковые</label> + <nb_visits>1</nb_visits> + <nb_hits>1</nb_hits> + <sum_time_spent>360</sum_time_spent> + <nb_pages_per_search>1</nb_pages_per_search> + <avg_time_on_page>360</avg_time_on_page> + <bounce_rate>0%</bounce_rate> + <exit_rate>0%</exit_rate> + </row> </result> \ No newline at end of file diff --git a/tests/PHPUnit/Integration/expected/test_NonUnicode__Referers.getWebsites_day.xml b/tests/PHPUnit/Integration/expected/test_NonUnicode__Referers.getWebsites_day.xml index a8581a98d3494b526229ce68a063e2e1a0d7a505..9d1211be043e52b5cdf3e5f606311577d6e4402f 100755 --- a/tests/PHPUnit/Integration/expected/test_NonUnicode__Referers.getWebsites_day.xml +++ b/tests/PHPUnit/Integration/expected/test_NonUnicode__Referers.getWebsites_day.xml @@ -4,10 +4,10 @@ <label>anothersite.com</label> <nb_uniq_visitors>2</nb_uniq_visitors> <nb_visits>2</nb_visits> - <nb_actions>1</nb_actions> - <max_actions>1</max_actions> - <sum_visit_length>1</sum_visit_length> - <bounce_count>2</bounce_count> + <nb_actions>3</nb_actions> + <max_actions>3</max_actions> + <sum_visit_length>721</sum_visit_length> + <bounce_count>1</bounce_count> <nb_visits_converted>0</nb_visits_converted> <subtable> <row> @@ -24,10 +24,10 @@ <label>http://anothersite.com/whatever.html?whatever=Ato</label> <nb_uniq_visitors>1</nb_uniq_visitors> <nb_visits>1</nb_visits> - <nb_actions>1</nb_actions> - <max_actions>1</max_actions> - <sum_visit_length>1</sum_visit_length> - <bounce_count>1</bounce_count> + <nb_actions>3</nb_actions> + <max_actions>3</max_actions> + <sum_visit_length>721</sum_visit_length> + <bounce_count>0</bounce_count> <nb_visits_converted>0</nb_visits_converted> </row> </subtable> diff --git a/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_day.xml b/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_day.xml index 50a09d0f372446f0906cec76297eced1cd87da65..c4cc0ca63029610d7ec0659f80cc8ed7e65b124b 100644 --- a/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_day.xml +++ b/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_day.xml @@ -56,7 +56,7 @@ </result> <result prettyDate="Monday 4 January 2010"> <row> - <label>Final Keyword Searched for now.</label> + <label>Final tüte Keyword Searched for now</label> <nb_visits>1</nb_visits> <nb_pages_per_search>1</nb_pages_per_search> <exit_rate>100%</exit_rate> diff --git a/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_month.xml b/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_month.xml index b6298078d92a05fabc649f1a4a067e03217994f4..dc6e356798209faeae482ad852764ab7d1eb1146 100644 --- a/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_month.xml +++ b/tests/PHPUnit/Integration/expected/test_SiteSearch_Actions.getSiteSearchKeywords_firstSite_lastN__API.getProcessedReport_month.xml @@ -54,7 +54,7 @@ <exit_rate>50%</exit_rate> </row> <row> - <label>Final Keyword Searched for now.</label> + <label>Final tüte Keyword Searched for now</label> <nb_visits>1</nb_visits> <nb_pages_per_search>1</nb_pages_per_search> <exit_rate>100%</exit_rate> diff --git a/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_day.xml b/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_day.xml index 807b530ba41a8652251c1673c2ede377957980e1..df088f8ae0a34f9276bccf2e8b8fd2f8e4ebc903 100644 --- a/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_day.xml +++ b/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_day.xml @@ -36,7 +36,7 @@ </result> <result date="2010-01-04"> <row> - <label>Final Keyword Searched for now.</label> + <label>Final tüte Keyword Searched for now</label> <nb_visits>1</nb_visits> <nb_hits>1</nb_hits> <sum_time_spent>0</sum_time_spent> diff --git a/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_month.xml b/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_month.xml index 3d60272474a3b9bc4426844ba2a5ca6e3701db76..777e92604627c1801c91e8794676bae4ce8923ac 100644 --- a/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_month.xml +++ b/tests/PHPUnit/Integration/expected/test_SiteSearch_AllSites__Actions.getSiteSearchKeywords_month.xml @@ -34,7 +34,7 @@ <exit_rate>50%</exit_rate> </row> <row> - <label>Final Keyword Searched for now.</label> + <label>Final tüte Keyword Searched for now</label> <nb_visits>1</nb_visits> <nb_hits>1</nb_hits> <sum_time_spent>0</sum_time_spent> diff --git a/tests/PHPUnit/Integration/expected/test_SiteSearch_NotLastNPeriods__Actions.getSiteSearchKeywords_month.xml b/tests/PHPUnit/Integration/expected/test_SiteSearch_NotLastNPeriods__Actions.getSiteSearchKeywords_month.xml index e1e9b882321b65e0559f224bf511afede1ceb226..7de408b2e215cdaeb8ee012673e2d5fa1bdd406f 100644 --- a/tests/PHPUnit/Integration/expected/test_SiteSearch_NotLastNPeriods__Actions.getSiteSearchKeywords_month.xml +++ b/tests/PHPUnit/Integration/expected/test_SiteSearch_NotLastNPeriods__Actions.getSiteSearchKeywords_month.xml @@ -32,7 +32,7 @@ <exit_rate>50%</exit_rate> </row> <row> - <label>Final Keyword Searched for now.</label> + <label>Final tüte Keyword Searched for now</label> <nb_visits>1</nb_visits> <nb_hits>1</nb_hits> <sum_time_spent>0</sum_time_spent>