Skip to content
Extraits de code Groupes Projets
Valider dba06599 rédigé par Matthieu Aubry's avatar Matthieu Aubry
Parcourir les fichiers

Merge pull request #6657 from piwik/bugfix/5365

 Do not store Accept-language full string, only store one detected language
parents 52443868 6c9573cf
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -34,6 +34,7 @@ class Common
/*
* Database
*/
const LANGUAGE_CODE_INVALID = 'xx';
/**
* Hashes a string into an integer which should be very low collision risks
......@@ -937,8 +938,8 @@ class Common
*/
public static function getCountry($lang, $enableLanguageToCountryGuess, $ip)
{
if (empty($lang) || strlen($lang) < 2 || $lang == 'xx') {
return 'xx';
if (empty($lang) || strlen($lang) < 2 || $lang == self::LANGUAGE_CODE_INVALID) {
return self::LANGUAGE_CODE_INVALID;
}
$validCountries = self::getCountriesList();
......@@ -974,35 +975,73 @@ class Common
}
}
}
return 'xx';
return self::LANGUAGE_CODE_INVALID;
}
/**
* Returns the visitor language based only on the Browser 'accepted language' information
* Returns the language and region string, based only on the Browser 'accepted language' information.
* * The language tag is defined by ISO 639-1
*
* @param string $browserLanguage Browser's accepted langauge header
* @param array $validLanguages array of valid language codes
* @return string 2 letter ISO 639 code
* @return string 2 letter ISO 639 code 'es' (Spanish)
*/
public static function extractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages)
public static function extractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages = array())
{
// assumes language preference is sorted;
// does not handle language-script-region tags or language range (*)
if (!empty($validLanguages) && preg_match_all('/(?:^|,)([a-z]{2,3})([-][a-z]{2})?/', $browserLanguage, $matches, PREG_SET_ORDER)) {
foreach ($matches as $parts) {
if (count($parts) == 3) {
// match locale (language and location)
if (in_array($parts[1] . $parts[2], $validLanguages)) {
return $parts[1] . $parts[2];
}
$validLanguages = self::checkValidLanguagesIsSet($validLanguages);
$languageRegionCode = self::extractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages);
if(strlen($languageRegionCode) == 2) {
$languageCode = $languageRegionCode;
} else {
$languageCode = substr($languageRegionCode, 0, 2);
}
if(in_array($languageCode, $validLanguages)) {
return $languageCode;
}
return self::LANGUAGE_CODE_INVALID;
}
/**
* Returns the language and region string, based only on the Browser 'accepted language' information.
* * The language tag is defined by ISO 639-1
* * The region tag is defined by ISO 3166-1
*
* @param string $browserLanguage Browser's accepted langauge header
* @param array $validLanguages array of valid language codes. Note that if the array includes "fr" then it will consider all regional variants of this language valid, such as "fr-ca" etc.
* @return string 2 letter ISO 639 code 'es' (Spanish) or if found, includes the region as well: 'es-ar'
*/
public static function extractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages = array() )
{
$validLanguages = self::checkValidLanguagesIsSet($validLanguages);
if(!preg_match_all('/(?:^|,)([a-z]{2,3})([-][a-z]{2})?/', $browserLanguage, $matches, PREG_SET_ORDER)) {
return self::LANGUAGE_CODE_INVALID;
}
foreach ($matches as $parts) {
$langIso639 = $parts[1];
if(empty($langIso639)) {
continue;
}
// If a region tag is found eg. "fr-ca"
if (count($parts) == 3) {
$regionIso3166 = $parts[2]; // eg. "-ca"
if (in_array($langIso639 . $regionIso3166, $validLanguages)) {
return $langIso639 . $regionIso3166;
}
// match language only (where no region provided)
if (in_array($parts[1], $validLanguages)) {
return $parts[1];
if (in_array($langIso639, $validLanguages)) {
return $langIso639 . $regionIso3166;
}
}
// eg. "fr" or "es"
if (in_array($langIso639, $validLanguages)) {
return $langIso639;
}
}
return 'xx';
return self::LANGUAGE_CODE_INVALID;
}
/**
......@@ -1161,4 +1200,17 @@ class Common
}
}
}
/**
* @param $validLanguages
* @return array
*/
protected static function checkValidLanguagesIsSet($validLanguages)
{
if (empty($validLanguages)) {
$validLanguages = array_keys(Common::getLanguagesList());
return $validLanguages;
}
return $validLanguages;
}
}
......@@ -140,12 +140,11 @@ class Archiver extends \Piwik\Plugin\Archiver
protected function aggregateByLanguage()
{
$query = $this->getLogAggregator()->queryVisitsByDimension(array("label" => self::LANGUAGE_DIMENSION));
$languageCodes = array_keys(Common::getLanguagesList());
$countryCodes = Common::getCountriesList($includeInternalCodes = true);
$metricsByLanguage = new DataArray();
while ($row = $query->fetch()) {
$langCode = Common::extractLanguageCodeFromBrowserLanguage($row['label'], $languageCodes);
$langCode = Common::extractLanguageCodeFromBrowserLanguage($row['label']);
$countryCode = Common::extractCountryCodeFromBrowserLanguage($row['label'], $countryCodes, $enableLanguageToCountryGuess = true);
if ($countryCode == 'xx' || $countryCode == $langCode) {
......
......@@ -8,6 +8,7 @@
*/
namespace Piwik\Plugins\UserSettings\Columns;
use Piwik\Common;
use Piwik\Piwik;
use Piwik\Plugin\Dimension\VisitDimension;
use Piwik\Tracker\Action;
......@@ -32,12 +33,22 @@ class Language extends VisitDimension
*/
public function onNewVisit(Request $request, Visitor $visitor, $action)
{
$language = $request->getBrowserLanguage();
return $this->getSingleLanguageFromAcceptedLanguages($request->getBrowserLanguage());
}
if (empty($language)) {
/**
* For better privacy we store only the main language code, instead of the whole browser language string.
*
* @param $acceptLanguagesString
* @return string
*/
protected function getSingleLanguageFromAcceptedLanguages($acceptLanguagesString)
{
if (empty($acceptLanguagesString)) {
return '';
}
return substr($language, 0, 20);
$languageCode = Common::extractLanguageAndRegionCodeFromBrowserLanguage($acceptLanguagesString);
return $languageCode;
}
}
\ No newline at end of file
}
......@@ -2,7 +2,7 @@
<result>
<row>
<label>Polish (pl)</label>
<nb_uniq_visitors>2</nb_uniq_visitors>
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>3</nb_visits>
<nb_actions>3</nb_actions>
<nb_users>0</nb_users>
......@@ -12,7 +12,7 @@
<nb_visits_converted>0</nb_visits_converted>
</row>
<row>
<label>English - United States (en-us)</label>
<label>English (en)</label>
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>2</nb_visits>
<nb_actions>2</nb_actions>
......@@ -133,7 +133,7 @@
<nb_visits_converted>0</nb_visits_converted>
</row>
<row>
<label>Unknown - Liberia (xx-lr)</label>
<label>Unknown (xx)</label>
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
......
......@@ -2,7 +2,7 @@
<result>
<row>
<label>Polish</label>
<nb_uniq_visitors>2</nb_uniq_visitors>
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>3</nb_visits>
<nb_actions>3</nb_actions>
<nb_users>0</nb_users>
......
......@@ -395,25 +395,29 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase
}
/**
* Dataprovider for testExtractLanguageCodeFromBrowserLanguage
* Dataprovider for testExtractLanguageAndRegionCodeFromBrowserLanguage
*/
public function getLanguageDataToExtract()
public function getLanguageDataToExtractLanguageRegionCode()
{
return array(
// browser language, valid languages, expected result
array("fr-ca", array("fr"), "fr"),
// browser language, valid languages (with optional region), expected result
array("fr-ca", array("fr"), "fr-ca"),
array("fr-ca", array("ca"), "xx"),
array("", array(), "xx"),
array("", array("en"), "xx"),
array("fr", array("en"), "xx"),
array("en", array("en"), "en"),
array("en", array("en-ca"), "xx"),
array("en-ca", array("en-ca"), "en-ca"),
array("en-ca", array("en"), "en"),
array("en-ca", array("en"), "en-ca"),
array("fr,en-us", array("fr", "en"), "fr"),
array("fr,en-us", array("en", "fr"), "fr"),
array("fr-fr,fr-ca", array("fr"), "fr"),
array("fr-fr,fr-ca", array("fr"), "fr-fr"),
array("fr-fr,fr-ca", array("fr-ca"), "fr-ca"),
array("-ca", array("fr","ca"), "xx"),
array("fr-fr;q=1.0,fr-ca;q=0.9", array("fr-ca"), "fr-ca"),
array("es,en,fr;q=0.7,de;q=0.3", array("fr", "es", "de", "en"), "es"),
array("zh-sg,de;q=0.3", array("zh", "es", "de"), "zh-sg"),
array("fr-ca,fr;q=0.1", array("fr-ca"), "fr-ca"),
array("r5,fr;q=1,de", array("fr", "de"), "fr"),
array("Zen§gq1", array("en"), "xx"),
......@@ -421,7 +425,42 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase
}
/**
* @dataProvider getLanguageDataToExtract
* @dataProvider getLanguageDataToExtractLanguageRegionCode
* @group Core
*/
public function testExtractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages, $expected)
{
$this->assertEquals($expected, Common::extractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages), "test with {$browserLanguage} failed, expected {$expected}");
}
/**
* Dataprovider for testExtractLanguageCodeFromBrowserLanguage
*/
public function getLanguageDataToExtractLanguageCode()
{
return array(
// browser language, valid languages, expected result
array("fr-ca", array("fr"), "fr"),
array("fr-ca", array("ca"), "xx"),
array("", array("en"), "xx"),
array("fr", array("en"), "xx"),
array("en", array("en"), "en"),
array("en", array("en-ca"), "xx"),
array("en-ca", array("en"), "en"),
array("fr,en-us", array("fr", "en"), "fr"),
array("fr,en-us", array("en", "fr"), "fr"),
array("fr-fr,fr-ca", array("fr"), "fr"),
array("-ca", array("fr","ca"), "xx"),
array("es,en,fr;q=0.7,de;q=0.3", array("fr", "es", "de", "en"), "es"),
array("zh-sg,de;q=0.3", array("zh", "es", "de"), "zh"),
array("r5,fr;q=1,de", array("fr", "de"), "fr"),
array("Zen§gq1", array("en"), "xx"),
);
}
/**
* @dataProvider getLanguageDataToExtractLanguageCode
* @group Core
*/
public function testExtractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages, $expected)
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter