From 060fe92c83ae9631cab08186b50c25ede88ecb4c Mon Sep 17 00:00:00 2001 From: Matthieu Napoli <matthieu@mnapoli.fr> Date: Sun, 19 Apr 2015 23:48:14 +1200 Subject: [PATCH] #7674 Begun moving the spammer list into a file + separate package --- CHANGELOG.md | 6 +++ config/global.ini.php | 5 --- core/Tracker/Visit/SpamFilter.php | 64 +++++++++++++++++++++++++++++++ core/Tracker/VisitExcluded.php | 20 +++------- 4 files changed, 76 insertions(+), 19 deletions(-) create mode 100644 core/Tracker/Visit/SpamFilter.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a300ac3d8..d72b877b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ This is a changelog for Piwik platform developers. All changes for our HTTP API's, Plugins, Themes, etc will be listed here. +## Next + +### Internal changes + +* The referer spam filter has moved from the `referrer_urls_spam` INI option (in `global.ini.php`) to a separate package (see https://github.com/piwik/referer-spam-blacklist). + ## Piwik 2.13.0 ### Deprecations diff --git a/config/global.ini.php b/config/global.ini.php index 526f8b3e24..f366e27820 100644 --- a/config/global.ini.php +++ b/config/global.ini.php @@ -651,11 +651,6 @@ bulk_requests_require_authentication = 0 ; This greatly increases performance of Log Analytics and in general any Bulk Tracking API requests. bulk_requests_use_transaction = 1 -; Comma separated list of known Referrer Spammers, ie. bot visits that set a fake Referrer field. -; All Visits with a Referrer URL host set to one of these will be excluded. -; If you find new spam entries in Referrers>Websites, please report them here: https://github.com/piwik/piwik/issues/5099 -referrer_urls_spam = "4webmasters.org,7makemoneyonline.com,anticrawler.org,best-seo-solution.com,bestwebsitesawards.com,blackhatworth.com,buttons-for-website.com,darodar.com,econom.co,hulfingtonpost.com,ilovevitaly.com,kambasoft.com,o-o-6-o-o.com,priceg.com,ranksonic.info,ranksonic.org,savetubevideo.com,semalt.com" - ; DO NOT USE THIS SETTING ON PUBLICLY AVAILABLE PIWIK SERVER ; !!! Security risk: if set to 0, it would allow anyone to push data to Piwik with custom dates in the past/future and even with fake IPs! ; When using the Tracking API, to override either the datetime and/or the visitor IP, diff --git a/core/Tracker/Visit/SpamFilter.php b/core/Tracker/Visit/SpamFilter.php new file mode 100644 index 0000000000..5ec458014d --- /dev/null +++ b/core/Tracker/Visit/SpamFilter.php @@ -0,0 +1,64 @@ +<?php + +namespace Piwik\Tracker\Visit; + +use Piwik\Common; +use Piwik\Container\StaticContainer; +use Piwik\Tracker\Request; + +/** + * Filters out tracking requests issued by spammers. + */ +class SpamFilter +{ + /** + * @var string[] + */ + private $spammerList; + + /** + * Check if the request is from a known spammer host. + * + * @param Request $request + * @return bool + */ + public function isSpam(Request $request) + { + $spammers = $this->loadSpammerList(); + + $referrerUrl = $request->getParam('urlref'); + + foreach($spammers as $spammerHost) { + if (strpos($referrerUrl, $spammerHost) !== false) { + Common::printDebug('Referrer URL is a known spam: ' . $spammerHost); + return true; + } + } + + return false; + } + + private function loadSpammerList() + { + if ($this->spammerList !== null) { + return $this->spammerList; + } + + $userFile = StaticContainer::get('path.tmp') . '/spammers.txt'; + if (file_exists($userFile)) { + $this->spammerList = file($userFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + + if (!is_array($this->spammerList)) { + throw new \Exception(sprintf('The file %s does not contain a JSON array', $userFile)); + } + } else { + // TODO + $this->spammerList = array( + '4webmasters.org', + '7makemoneyonline.com', + ); + } + + return $this->spammerList; + } +} diff --git a/core/Tracker/VisitExcluded.php b/core/Tracker/VisitExcluded.php index ae380cac42..8aae3bbf00 100644 --- a/core/Tracker/VisitExcluded.php +++ b/core/Tracker/VisitExcluded.php @@ -13,12 +13,15 @@ use Piwik\Config; use Piwik\DeviceDetectorFactory; use Piwik\Network\IP; use Piwik\Piwik; +use Piwik\Tracker\Visit\SpamFilter; /** * This class contains the logic to exclude some visitors from being tracked as per user settings */ class VisitExcluded { + private $spamFilter; + /** * @param Request $request * @param bool|string $ip @@ -26,6 +29,8 @@ class VisitExcluded */ public function __construct(Request $request, $ip = false, $userAgent = false) { + $this->spamFilter = new SpamFilter(); + if (false === $ip) { $ip = $request->getIp(); } @@ -266,19 +271,6 @@ class VisitExcluded */ protected function isReferrerSpamExcluded() { - $spamHosts = Config::getInstance()->Tracker['referrer_urls_spam']; - $spamHosts = explode(",", $spamHosts); - - $referrerUrl = $this->request->getParam('urlref'); - - foreach($spamHosts as $spamHost) { - $spamHost = trim($spamHost); - if ( strpos($referrerUrl, $spamHost) !== false) { - Common::printDebug('Referrer URL is a known spam: ' . $spamHost); - return true; - } - } - - return false; + return $this->spamFilter->isSpam($this->request); } } -- GitLab