diff --git a/config/global.ini.php b/config/global.ini.php index 31b8beb8c53155fffd3dc7b18abdf72a80deb40c..939ce65c17f51ed6cc799c276d5596a2d0003b22 100644 --- a/config/global.ini.php +++ b/config/global.ini.php @@ -497,6 +497,11 @@ tracker_cache_file_ttl = 300 ; Whether Bulk tracking requests to the Tracking API requires the token_auth to be set. bulk_requests_require_authentication = 0 +; All Visits with a Referrer URL host set to one of these will be excluded. +; If you find new spam entries in Referrers>Websites, please report them here: http://dev.piwik.org/trac/ticket/2268 +; Comma separated list of known Referrer Spammers, ie. bot visits that set a fake Referrer field: +referrer_urls_spam = "semalt.com" + ; DO NOT USE THIS SETTING ON PUBLICLY AVAILABLE PIWIK SERVER ; !!! Security risk: if set to 0, it would allow anyone to push data to Piwik with custom dates in the past/future and even with fake IPs! ; When using the Tracking API, to override either the datetime and/or the visitor IP, diff --git a/core/Tracker/VisitExcluded.php b/core/Tracker/VisitExcluded.php index 25d05250807efb836ddb7646d69b23c2d1a7902e..8ca1e2fbaa3c9f0a8cf16f25949a53199faa0989 100644 --- a/core/Tracker/VisitExcluded.php +++ b/core/Tracker/VisitExcluded.php @@ -9,6 +9,7 @@ namespace Piwik\Tracker; use Piwik\Common; +use Piwik\Config; use Piwik\IP; use Piwik\Piwik; @@ -241,4 +242,23 @@ class VisitExcluded } return false; } + + /** + * Returns true if the Referrer is a known spammer. + * + * @return bool + */ + protected function isReferrerSpamExcluded() + { + $spamHosts = Config::getInstance()->Tracker['referrer_urls_spam']; + $spamHosts = explode(",", $spamHosts); + + $referrerUrl = $this->request->getParam('urlref'); + foreach($spamHosts as $spamHost) { + if( strpos($referrerUrl, $spamHost) !== false) { + return true; + } + } + return false; + } } diff --git a/tests/PHPUnit/Integration/Core/Tracker/VisitTest.php b/tests/PHPUnit/Integration/Core/Tracker/VisitTest.php index 65a3ecede84ff49b4aa0fe6479326f4850813e3e..516913efc6882ea7e05600ce6c6bc15ce9eb5375 100644 --- a/tests/PHPUnit/Integration/Core/Tracker/VisitTest.php +++ b/tests/PHPUnit/Integration/Core/Tracker/VisitTest.php @@ -136,7 +136,38 @@ class Core_Tracker_VisitTest extends DatabaseTestCase foreach ($tests as $ua => $expected) { $excluded = new VisitExcluded_public($request, $ip = false, $ua); - $this->assertSame($expected, $excluded->public_isUserAgentExcluded($ua), "Result if isUserAgentExcluded('$ua') was not " . ($expected ? 'true' : 'false') . "."); + $this->assertSame($expected, $excluded->public_isUserAgentExcluded(), "Result if isUserAgentExcluded('$ua') was not " . ($expected ? 'true' : 'false') . "."); + } + } + + /** + * @group Core + * @group referrerIsKnownSpam + */ + public function testIsVisitor_referrerIsKnownSpam() + { + $knownSpammers = array( + 'http://semalt.com' => true, + 'http://semalt.com/random/sub/page' => true, + 'http://semalt.com/out/of/here?mate' => true, + 'http://valid.domain/' => false, + 'http://valid.domain/page' => false, + ); + API::getInstance()->setSiteSpecificUserAgentExcludeEnabled(true); + + $idsite = API::getInstance()->addSite("name", "http://piwik.net/"); + + + // test that user agents that contain excluded user agent strings are excluded + foreach ($knownSpammers as $spamUrl => $expectedIsReferrerSpam) { + $spamUrl = urlencode($spamUrl); + $request = new Request(array( + 'idsite' => $idsite, + 'urlref' => $spamUrl + )); + $excluded = new VisitExcluded_public($request); + + $this->assertSame($expectedIsReferrerSpam, $excluded->public_isReferrerSpamExcluded(), $spamUrl); } } } @@ -148,8 +179,12 @@ class VisitExcluded_public extends VisitExcluded return $this->isVisitorIpExcluded($ip); } - public function public_isUserAgentExcluded($ua) + public function public_isUserAgentExcluded() + { + return $this->isUserAgentExcluded(); + } + public function public_isReferrerSpamExcluded() { - return $this->isUserAgentExcluded($ua); + return $this->isReferrerSpamExcluded(); } }