diff --git a/CHANGELOG.md b/CHANGELOG.md index ce013f131c20c23174775c3f766a165d8e55b644..4f7004f45af1c0c8e3bc685aa8d6f8aa317548cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,10 @@ This is a changelog for Piwik platform developers. All changes for our HTTP API' * In 2.6.0 we added the possibility to use `filter_limit` and `filter_offset` if an API returns an indexed array. This was not working in all cases and is fixed now. * The API parameter `filter_pattern` and `filter_offset[]` can now be used if an API returns an indexed array. +### Internal changes + +* The referrer spam filter has moved from the `referrer_urls_spam` INI option (in `global.ini.php`) to a separate package (see [https://github.com/piwik/referrer-spam-blacklist](https://github.com/piwik/referrer-spam-blacklist)). + ## Piwik 2.12.0 ### Breaking Changes @@ -85,7 +89,7 @@ This is a changelog for Piwik platform developers. All changes for our HTTP API' * During UI tests we do now add a CSS class to the HTML element called `uiTest`. This allows you do hide content when screenshots are captured. ### New commands -* A new command (core:fix-duplicate-log-actions) has been added which can be used to remove duplicate actions and correct references to them in other tables. Duplicates were caused by this bug: https://github.com/piwik/piwik/issues/6436 +* A new command (core:fix-duplicate-log-actions) has been added which can be used to remove duplicate actions and correct references to them in other tables. Duplicates were caused by this bug: [#6436](https://github.com/piwik/piwik/issues/6436) ### Library updates * Updated AngularJS from 1.2.26 to 1.2.28 diff --git a/composer.json b/composer.json index 7acbf4508f90f423181cde0fb2477ba8e65a6bec..f452c42abc5120987fddf3e11522de79092babd7 100644 --- a/composer.json +++ b/composer.json @@ -53,7 +53,8 @@ "symfony/monolog-bridge": "~2.6", "symfony/event-dispatcher": "~2.6", "pear/pear_exception": "~1.0.0", - "pear/pear-core-minimal": "~1.9.5" + "pear/pear-core-minimal": "~1.9.5", + "piwik/referrer-spam-blacklist": "~1.0" }, "require-dev": { "aws/aws-sdk-php": "2.7.1", diff --git a/composer.lock b/composer.lock index fadd1acc2e1ab6f2b0038c9f2afcd98e94f0b106..82da13c21adb228d7b67e56a84bf2ac604b6abe9 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "f0c9c70091696d6197a4c438b9c12860", + "hash": "5033f5c1e5d1d67add5f5e75bbe59aaf", "packages": [ { "name": "container-interop/container-interop", @@ -926,6 +926,28 @@ ], "time": "2014-10-23 03:30:23" }, + { + "name": "piwik/referrer-spam-blacklist", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/piwik/referrer-spam-blacklist.git", + "reference": "4508617435cd8b93c0fbd0fd430dc24d2a898cdb" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/piwik/referrer-spam-blacklist/zipball/4508617435cd8b93c0fbd0fd430dc24d2a898cdb", + "reference": "4508617435cd8b93c0fbd0fd430dc24d2a898cdb", + "shasum": "" + }, + "type": "library", + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Public Domain" + ], + "description": "Community-contributed list of referrer spammers", + "time": "2015-04-19 23:20:51" + }, { "name": "psr/log", "version": "1.0.0", diff --git a/config/global.ini.php b/config/global.ini.php index 526f8b3e243bb7de1d6cba7d4adeef141f08f6b0..f366e27820401b40665a129ade18793126cab643 100644 --- a/config/global.ini.php +++ b/config/global.ini.php @@ -651,11 +651,6 @@ bulk_requests_require_authentication = 0 ; This greatly increases performance of Log Analytics and in general any Bulk Tracking API requests. bulk_requests_use_transaction = 1 -; Comma separated list of known Referrer Spammers, ie. bot visits that set a fake Referrer field. -; All Visits with a Referrer URL host set to one of these will be excluded. -; If you find new spam entries in Referrers>Websites, please report them here: https://github.com/piwik/piwik/issues/5099 -referrer_urls_spam = "4webmasters.org,7makemoneyonline.com,anticrawler.org,best-seo-solution.com,bestwebsitesawards.com,blackhatworth.com,buttons-for-website.com,darodar.com,econom.co,hulfingtonpost.com,ilovevitaly.com,kambasoft.com,o-o-6-o-o.com,priceg.com,ranksonic.info,ranksonic.org,savetubevideo.com,semalt.com" - ; DO NOT USE THIS SETTING ON PUBLICLY AVAILABLE PIWIK SERVER ; !!! Security risk: if set to 0, it would allow anyone to push data to Piwik with custom dates in the past/future and even with fake IPs! ; When using the Tracking API, to override either the datetime and/or the visitor IP, diff --git a/core/Tracker/Visit/ReferrerSpamFilter.php b/core/Tracker/Visit/ReferrerSpamFilter.php new file mode 100644 index 0000000000000000000000000000000000000000..51a50fcb2fcf9a25064cb8c773abd15adfd2639f --- /dev/null +++ b/core/Tracker/Visit/ReferrerSpamFilter.php @@ -0,0 +1,51 @@ +<?php + +namespace Piwik\Tracker\Visit; + +use Piwik\Common; +use Piwik\Tracker\Request; + +/** + * Filters out tracking requests issued by spammers. + */ +class ReferrerSpamFilter +{ + /** + * @var string[] + */ + private $spammerList; + + /** + * Check if the request is from a known spammer host. + * + * @param Request $request + * @return bool + */ + public function isSpam(Request $request) + { + $spammers = $this->loadSpammerList(); + + $referrerUrl = $request->getParam('urlref'); + + foreach($spammers as $spammerHost) { + if (strpos($referrerUrl, $spammerHost) !== false) { + Common::printDebug('Referrer URL is a known spam: ' . $spammerHost); + return true; + } + } + + return false; + } + + private function loadSpammerList() + { + if ($this->spammerList !== null) { + return $this->spammerList; + } + + $file = PIWIK_INCLUDE_PATH . '/vendor/piwik/referrer-spam-blacklist/spammers.txt'; + $this->spammerList = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + + return $this->spammerList; + } +} diff --git a/core/Tracker/VisitExcluded.php b/core/Tracker/VisitExcluded.php index ae380cac42080889e61e1abb58c6169a3b1399b1..b35110a13d6050c44c37e1da95f5227f14dae809 100644 --- a/core/Tracker/VisitExcluded.php +++ b/core/Tracker/VisitExcluded.php @@ -13,12 +13,15 @@ use Piwik\Config; use Piwik\DeviceDetectorFactory; use Piwik\Network\IP; use Piwik\Piwik; +use Piwik\Tracker\Visit\ReferrerSpamFilter; /** * This class contains the logic to exclude some visitors from being tracked as per user settings */ class VisitExcluded { + private $spamFilter; + /** * @param Request $request * @param bool|string $ip @@ -26,6 +29,8 @@ class VisitExcluded */ public function __construct(Request $request, $ip = false, $userAgent = false) { + $this->spamFilter = new ReferrerSpamFilter(); + if (false === $ip) { $ip = $request->getIp(); } @@ -266,19 +271,6 @@ class VisitExcluded */ protected function isReferrerSpamExcluded() { - $spamHosts = Config::getInstance()->Tracker['referrer_urls_spam']; - $spamHosts = explode(",", $spamHosts); - - $referrerUrl = $this->request->getParam('urlref'); - - foreach($spamHosts as $spamHost) { - $spamHost = trim($spamHost); - if ( strpos($referrerUrl, $spamHost) !== false) { - Common::printDebug('Referrer URL is a known spam: ' . $spamHost); - return true; - } - } - - return false; + return $this->spamFilter->isSpam($this->request); } }