Db
6 years ago
Handler
6 years ago
TableLogAction
6 years ago
Visit
6 years ago
Action.php
6 years ago
ActionPageview.php
6 years ago
Cache.php
6 years ago
Db.php
6 years ago
Failures.php
6 years ago
FingerprintSalt.php
6 years ago
GoalManager.php
6 years ago
Handler.php
6 years ago
IgnoreCookie.php
6 years ago
LogTable.php
6 years ago
Model.php
6 years ago
PageUrl.php
6 years ago
Request.php
5 years ago
RequestProcessor.php
6 years ago
RequestSet.php
6 years ago
Response.php
6 years ago
ScheduledTasksRunner.php
6 years ago
Settings.php
5 years ago
TableLogAction.php
6 years ago
TrackerCodeGenerator.php
6 years ago
TrackerConfig.php
6 years ago
Visit.php
5 years ago
VisitExcluded.php
6 years ago
VisitInterface.php
6 years ago
Visitor.php
6 years ago
VisitorNotFoundInDb.php
6 years ago
VisitorRecognizer.php
6 years ago
VisitExcluded.php
369 lines
| 1 | <?php |
| 2 | /** |
| 3 | * Piwik - free/libre analytics platform |
| 4 | * |
| 5 | * @link https://matomo.org |
| 6 | * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 7 | * |
| 8 | */ |
| 9 | namespace Piwik\Tracker; |
| 10 | |
| 11 | use Piwik\Cache as PiwikCache; |
| 12 | use Piwik\Common; |
| 13 | use Piwik\Container\StaticContainer; |
| 14 | use Piwik\DeviceDetector\DeviceDetectorFactory; |
| 15 | use Piwik\Exception\UnexpectedWebsiteFoundException; |
| 16 | use Piwik\Network\IP; |
| 17 | use Piwik\Piwik; |
| 18 | use Piwik\Plugins\SitesManager\SiteUrls; |
| 19 | use Piwik\Tracker\Visit\ReferrerSpamFilter; |
| 20 | |
| 21 | /** |
| 22 | * This class contains the logic to exclude some visitors from being tracked as per user settings |
| 23 | */ |
| 24 | class VisitExcluded |
| 25 | { |
| 26 | /** |
| 27 | * @var ReferrerSpamFilter |
| 28 | */ |
| 29 | private $spamFilter; |
| 30 | |
| 31 | private $siteCache = array(); |
| 32 | |
| 33 | /** |
| 34 | * @param Request $request |
| 35 | */ |
| 36 | public function __construct(Request $request) |
| 37 | { |
| 38 | $this->spamFilter = new ReferrerSpamFilter(); |
| 39 | $this->request = $request; |
| 40 | |
| 41 | try { |
| 42 | $this->idSite = $request->getIdSite(); |
| 43 | } catch (UnexpectedWebsiteFoundException $e){ |
| 44 | // most checks will still work on a global scope and we still want to be able to test if this is a valid |
| 45 | // visit or not |
| 46 | $this->idSite = 0; |
| 47 | } |
| 48 | $userAgent = $request->getUserAgent(); |
| 49 | $this->userAgent = Common::unsanitizeInputValue($userAgent); |
| 50 | $this->ip = $request->getIp(); |
| 51 | } |
| 52 | |
| 53 | /** |
| 54 | * Test if the current visitor is excluded from the statistics. |
| 55 | * |
| 56 | * Plugins can for example exclude visitors based on the |
| 57 | * - IP |
| 58 | * - If a given cookie is found |
| 59 | * |
| 60 | * @return bool True if the visit must not be saved, false otherwise |
| 61 | */ |
| 62 | public function isExcluded() |
| 63 | { |
| 64 | $excluded = false; |
| 65 | |
| 66 | if ($this->isNonHumanBot()) { |
| 67 | Common::printDebug('Search bot detected, visit excluded'); |
| 68 | $excluded = true; |
| 69 | } |
| 70 | |
| 71 | /* |
| 72 | * Requests built with piwik.js will contain a rec=1 parameter. This is used as |
| 73 | * an indication that the request is made by a JS enabled device. By default, Piwik |
| 74 | * doesn't track non-JS visitors. |
| 75 | */ |
| 76 | if (!$excluded) { |
| 77 | $toRecord = $this->request->getParam($parameterForceRecord = 'rec'); |
| 78 | if (!$toRecord) { |
| 79 | Common::printDebug(@$_SERVER['REQUEST_METHOD'] . ' parameter ' . $parameterForceRecord . ' not found in URL, request excluded'); |
| 80 | $excluded = true; |
| 81 | Common::printDebug("'$parameterForceRecord' parameter not found."); |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | /** |
| 86 | * Triggered on every tracking request. |
| 87 | * |
| 88 | * This event can be used to tell the Tracker not to record this particular action or visit. |
| 89 | * |
| 90 | * @param bool &$excluded Whether the request should be excluded or not. Initialized |
| 91 | * to `false`. Event subscribers should set it to `true` in |
| 92 | * order to exclude the request. |
| 93 | * @param Request $request The request object which contains all of the request's information |
| 94 | * |
| 95 | */ |
| 96 | Piwik::postEvent('Tracker.isExcludedVisit', array(&$excluded, $this->request)); |
| 97 | |
| 98 | /* |
| 99 | * Following exclude operations happen after the hook. |
| 100 | * These are of higher priority and should not be overwritten by plugins. |
| 101 | */ |
| 102 | |
| 103 | // Checking if the Piwik ignore cookie is set |
| 104 | if (!$excluded) { |
| 105 | $excluded = $this->isIgnoreCookieFound(); |
| 106 | if ($excluded) { |
| 107 | Common::printDebug("Ignore cookie found."); |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | // Checking for excluded IPs |
| 112 | if (!$excluded) { |
| 113 | $excluded = $this->isVisitorIpExcluded(); |
| 114 | if ($excluded) { |
| 115 | Common::printDebug("IP excluded."); |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | // Check if user agent should be excluded |
| 120 | if (!$excluded) { |
| 121 | $excluded = $this->isUserAgentExcluded(); |
| 122 | if ($excluded) { |
| 123 | Common::printDebug("User agent excluded."); |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | // Check if Referrer URL is a known spam |
| 128 | if (!$excluded) { |
| 129 | $excluded = $this->isReferrerSpamExcluded(); |
| 130 | if ($excluded) { |
| 131 | Common::printDebug("Referrer URL is blacklisted as spam."); |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | // Check if request URL is excluded |
| 136 | if (!$excluded) { |
| 137 | $excluded = $this->isUrlExcluded(); |
| 138 | if ($excluded) { |
| 139 | Common::printDebug("Unknown URL is not allowed to track."); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | if (!$excluded) { |
| 144 | if ($this->isPrefetchDetected()) { |
| 145 | $excluded = true; |
| 146 | Common::printDebug("Prefetch request detected, not a real visit so we Ignore this visit/pageview"); |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | if ($excluded) { |
| 151 | Common::printDebug("Visitor excluded."); |
| 152 | return true; |
| 153 | } |
| 154 | |
| 155 | return false; |
| 156 | } |
| 157 | |
| 158 | protected function isPrefetchDetected() |
| 159 | { |
| 160 | return (isset($_SERVER["HTTP_X_PURPOSE"]) |
| 161 | && in_array($_SERVER["HTTP_X_PURPOSE"], array("preview", "instant"))) |
| 162 | || (isset($_SERVER['HTTP_X_MOZ']) |
| 163 | && $_SERVER['HTTP_X_MOZ'] == "prefetch"); |
| 164 | } |
| 165 | |
| 166 | /** |
| 167 | * Live/Bing/MSN bot and Googlebot are evolving to detect cloaked websites. |
| 168 | * As a result, these sophisticated bots exhibit characteristics of |
| 169 | * browsers (cookies enabled, executing JavaScript, etc). |
| 170 | * |
| 171 | * @see \DeviceDetector\Parser\Bot |
| 172 | * |
| 173 | * @return boolean |
| 174 | */ |
| 175 | protected function isNonHumanBot() |
| 176 | { |
| 177 | $allowBots = $this->request->getParam('bots'); |
| 178 | |
| 179 | $deviceDetector = StaticContainer::get(DeviceDetectorFactory::class)->makeInstance($this->userAgent ); |
| 180 | |
| 181 | return !$allowBots |
| 182 | && ($deviceDetector->isBot() || $this->isIpInRange()); |
| 183 | } |
| 184 | |
| 185 | private function isIpInRange() |
| 186 | { |
| 187 | $cache = PiwikCache::getTransientCache(); |
| 188 | |
| 189 | $ip = IP::fromBinaryIP($this->ip); |
| 190 | $key = 'VisitExcludedIsIpInRange' . $ip->toString(); |
| 191 | |
| 192 | if ($cache->contains($key)) { |
| 193 | $isInRanges = $cache->fetch($key); |
| 194 | } else { |
| 195 | if ($this->isChromeDataSaverUsed($ip)) { |
| 196 | $isInRanges = false; |
| 197 | } else { |
| 198 | $isInRanges = $ip->isInRanges($this->getBotIpRanges()); |
| 199 | } |
| 200 | |
| 201 | $cache->save($key, $isInRanges); |
| 202 | } |
| 203 | |
| 204 | return $isInRanges; |
| 205 | } |
| 206 | |
| 207 | private function isChromeDataSaverUsed(IP $ip) |
| 208 | { |
| 209 | // see https://github.com/piwik/piwik/issues/7733 |
| 210 | return !empty($_SERVER['HTTP_VIA']) |
| 211 | && false !== strpos(strtolower($_SERVER['HTTP_VIA']), 'chrome-compression-proxy') |
| 212 | && $ip->isInRanges($this->getGoogleBotIpRanges()); |
| 213 | } |
| 214 | |
| 215 | protected function getBotIpRanges() |
| 216 | { |
| 217 | return array_merge($this->getGoogleBotIpRanges(), array( |
| 218 | // Live/Bing/MSN |
| 219 | '64.4.0.0/18', |
| 220 | '65.52.0.0/14', |
| 221 | '157.54.0.0/15', |
| 222 | '157.56.0.0/14', |
| 223 | '157.60.0.0/16', |
| 224 | '207.46.0.0/16', |
| 225 | '207.68.128.0/18', |
| 226 | '207.68.192.0/20', |
| 227 | '131.253.26.0/20', |
| 228 | '131.253.24.0/20', |
| 229 | |
| 230 | // Yahoo |
| 231 | '72.30.198.0/20', |
| 232 | '72.30.196.0/20', |
| 233 | '98.137.207.0/20', |
| 234 | // Chinese bot hammering websites |
| 235 | '1.202.218.8' |
| 236 | )); |
| 237 | } |
| 238 | |
| 239 | private function getGoogleBotIpRanges() |
| 240 | { |
| 241 | return array( |
| 242 | '216.239.32.0/19', |
| 243 | '64.233.160.0/19', |
| 244 | '66.249.80.0/20', |
| 245 | '72.14.192.0/18', |
| 246 | '209.85.128.0/17', |
| 247 | '66.102.0.0/20', |
| 248 | '74.125.0.0/16', |
| 249 | '64.18.0.0/20', |
| 250 | '207.126.144.0/20', |
| 251 | '173.194.0.0/16' |
| 252 | ); |
| 253 | } |
| 254 | |
| 255 | /** |
| 256 | * Looks for the ignore cookie that users can set in the Piwik admin screen. |
| 257 | * @return bool |
| 258 | */ |
| 259 | protected function isIgnoreCookieFound() |
| 260 | { |
| 261 | if (IgnoreCookie::isIgnoreCookieFound()) { |
| 262 | Common::printDebug('Matomo ignore cookie was found, visit not tracked.'); |
| 263 | return true; |
| 264 | } |
| 265 | |
| 266 | return false; |
| 267 | } |
| 268 | |
| 269 | /** |
| 270 | * Checks if the visitor ip is in the excluded list |
| 271 | * |
| 272 | * @return bool |
| 273 | */ |
| 274 | protected function isVisitorIpExcluded() |
| 275 | { |
| 276 | $excludedIps = $this->getAttributes('excluded_ips', 'global_excluded_ips'); |
| 277 | |
| 278 | if (!empty($excludedIps)) { |
| 279 | $ip = IP::fromBinaryIP($this->ip); |
| 280 | if ($ip->isInRanges($excludedIps)) { |
| 281 | Common::printDebug('Visitor IP ' . $ip->toString() . ' is excluded from being tracked'); |
| 282 | return true; |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | return false; |
| 287 | } |
| 288 | |
| 289 | private function getAttributes($siteAttribute, $globalAttribute) |
| 290 | { |
| 291 | if (!isset($this->siteCache[$this->idSite])) { |
| 292 | $this->siteCache[$this->idSite] = array(); |
| 293 | } |
| 294 | try { |
| 295 | if (empty($this->siteCache[$this->idSite])) { |
| 296 | $this->siteCache[$this->idSite] = Cache::getCacheWebsiteAttributes($this->idSite); |
| 297 | } |
| 298 | if (isset($this->siteCache[$this->idSite][$siteAttribute])) { |
| 299 | return $this->siteCache[$this->idSite][$siteAttribute]; |
| 300 | } |
| 301 | } catch (UnexpectedWebsiteFoundException $e) { |
| 302 | $cached = Cache::getCacheGeneral(); |
| 303 | if ($globalAttribute && isset($cached[$globalAttribute])) { |
| 304 | return $cached[$globalAttribute]; |
| 305 | } |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | /** |
| 310 | * Checks if request URL is excluded |
| 311 | * @return bool |
| 312 | */ |
| 313 | protected function isUrlExcluded() |
| 314 | { |
| 315 | $excludedUrls = $this->getAttributes('exclude_unknown_urls', null); |
| 316 | $siteUrls = $this->getAttributes('urls', null); |
| 317 | |
| 318 | if (!empty($excludedUrls) && !empty($siteUrls)) { |
| 319 | $url = $this->request->getParam('url'); |
| 320 | $parsedUrl = parse_url($url); |
| 321 | |
| 322 | $trackingUrl = new SiteUrls(); |
| 323 | $urls = $trackingUrl->groupUrlsByHost(array($this->idSite => $siteUrls)); |
| 324 | |
| 325 | $idSites = $trackingUrl->getIdSitesMatchingUrl($parsedUrl, $urls); |
| 326 | $isUrlExcluded = !isset($idSites) || !in_array($this->idSite, $idSites); |
| 327 | |
| 328 | return $isUrlExcluded; |
| 329 | } |
| 330 | |
| 331 | return false; |
| 332 | } |
| 333 | |
| 334 | /** |
| 335 | * Returns true if the specified user agent should be excluded for the current site or not. |
| 336 | * |
| 337 | * Visits whose user agent string contains one of the excluded_user_agents strings for the |
| 338 | * site being tracked (or one of the global strings) will be excluded. |
| 339 | * |
| 340 | * @internal param string $this ->userAgent The user agent string. |
| 341 | * @return bool |
| 342 | */ |
| 343 | protected function isUserAgentExcluded() |
| 344 | { |
| 345 | $excludedAgents = $this->getAttributes('excluded_user_agents', 'global_excluded_user_agents'); |
| 346 | |
| 347 | if (!empty($excludedAgents)) { |
| 348 | foreach ($excludedAgents as $excludedUserAgent) { |
| 349 | // if the excluded user agent string part is in this visit's user agent, this visit should be excluded |
| 350 | if (stripos($this->userAgent, $excludedUserAgent) !== false) { |
| 351 | return true; |
| 352 | } |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | return false; |
| 357 | } |
| 358 | |
| 359 | /** |
| 360 | * Returns true if the Referrer is a known spammer. |
| 361 | * |
| 362 | * @return bool |
| 363 | */ |
| 364 | protected function isReferrerSpamExcluded() |
| 365 | { |
| 366 | return $this->spamFilter->isSpam($this->request); |
| 367 | } |
| 368 | } |
| 369 |