PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / 1.3.1
Matomo Analytics – Powerful, Privacy-First Insights for WordPress v1.3.1
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / Tracker / VisitExcluded.php
matomo / app / core / Tracker Last commit date
Db 6 years ago Handler 6 years ago TableLogAction 6 years ago Visit 6 years ago Action.php 6 years ago ActionPageview.php 6 years ago Cache.php 6 years ago Db.php 6 years ago Failures.php 6 years ago FingerprintSalt.php 6 years ago GoalManager.php 6 years ago Handler.php 6 years ago IgnoreCookie.php 6 years ago LogTable.php 6 years ago Model.php 6 years ago PageUrl.php 6 years ago Request.php 5 years ago RequestProcessor.php 6 years ago RequestSet.php 6 years ago Response.php 6 years ago ScheduledTasksRunner.php 6 years ago Settings.php 5 years ago TableLogAction.php 6 years ago TrackerCodeGenerator.php 6 years ago TrackerConfig.php 6 years ago Visit.php 5 years ago VisitExcluded.php 6 years ago VisitInterface.php 6 years ago Visitor.php 6 years ago VisitorNotFoundInDb.php 6 years ago VisitorRecognizer.php 6 years ago
VisitExcluded.php
369 lines
1 <?php
2 /**
3 * Piwik - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9 namespace Piwik\Tracker;
10
11 use Piwik\Cache as PiwikCache;
12 use Piwik\Common;
13 use Piwik\Container\StaticContainer;
14 use Piwik\DeviceDetector\DeviceDetectorFactory;
15 use Piwik\Exception\UnexpectedWebsiteFoundException;
16 use Piwik\Network\IP;
17 use Piwik\Piwik;
18 use Piwik\Plugins\SitesManager\SiteUrls;
19 use Piwik\Tracker\Visit\ReferrerSpamFilter;
20
21 /**
22 * This class contains the logic to exclude some visitors from being tracked as per user settings
23 */
24 class VisitExcluded
25 {
26 /**
27 * @var ReferrerSpamFilter
28 */
29 private $spamFilter;
30
31 private $siteCache = array();
32
33 /**
34 * @param Request $request
35 */
36 public function __construct(Request $request)
37 {
38 $this->spamFilter = new ReferrerSpamFilter();
39 $this->request = $request;
40
41 try {
42 $this->idSite = $request->getIdSite();
43 } catch (UnexpectedWebsiteFoundException $e){
44 // most checks will still work on a global scope and we still want to be able to test if this is a valid
45 // visit or not
46 $this->idSite = 0;
47 }
48 $userAgent = $request->getUserAgent();
49 $this->userAgent = Common::unsanitizeInputValue($userAgent);
50 $this->ip = $request->getIp();
51 }
52
53 /**
54 * Test if the current visitor is excluded from the statistics.
55 *
56 * Plugins can for example exclude visitors based on the
57 * - IP
58 * - If a given cookie is found
59 *
60 * @return bool True if the visit must not be saved, false otherwise
61 */
62 public function isExcluded()
63 {
64 $excluded = false;
65
66 if ($this->isNonHumanBot()) {
67 Common::printDebug('Search bot detected, visit excluded');
68 $excluded = true;
69 }
70
71 /*
72 * Requests built with piwik.js will contain a rec=1 parameter. This is used as
73 * an indication that the request is made by a JS enabled device. By default, Piwik
74 * doesn't track non-JS visitors.
75 */
76 if (!$excluded) {
77 $toRecord = $this->request->getParam($parameterForceRecord = 'rec');
78 if (!$toRecord) {
79 Common::printDebug(@$_SERVER['REQUEST_METHOD'] . ' parameter ' . $parameterForceRecord . ' not found in URL, request excluded');
80 $excluded = true;
81 Common::printDebug("'$parameterForceRecord' parameter not found.");
82 }
83 }
84
85 /**
86 * Triggered on every tracking request.
87 *
88 * This event can be used to tell the Tracker not to record this particular action or visit.
89 *
90 * @param bool &$excluded Whether the request should be excluded or not. Initialized
91 * to `false`. Event subscribers should set it to `true` in
92 * order to exclude the request.
93 * @param Request $request The request object which contains all of the request's information
94 *
95 */
96 Piwik::postEvent('Tracker.isExcludedVisit', array(&$excluded, $this->request));
97
98 /*
99 * Following exclude operations happen after the hook.
100 * These are of higher priority and should not be overwritten by plugins.
101 */
102
103 // Checking if the Piwik ignore cookie is set
104 if (!$excluded) {
105 $excluded = $this->isIgnoreCookieFound();
106 if ($excluded) {
107 Common::printDebug("Ignore cookie found.");
108 }
109 }
110
111 // Checking for excluded IPs
112 if (!$excluded) {
113 $excluded = $this->isVisitorIpExcluded();
114 if ($excluded) {
115 Common::printDebug("IP excluded.");
116 }
117 }
118
119 // Check if user agent should be excluded
120 if (!$excluded) {
121 $excluded = $this->isUserAgentExcluded();
122 if ($excluded) {
123 Common::printDebug("User agent excluded.");
124 }
125 }
126
127 // Check if Referrer URL is a known spam
128 if (!$excluded) {
129 $excluded = $this->isReferrerSpamExcluded();
130 if ($excluded) {
131 Common::printDebug("Referrer URL is blacklisted as spam.");
132 }
133 }
134
135 // Check if request URL is excluded
136 if (!$excluded) {
137 $excluded = $this->isUrlExcluded();
138 if ($excluded) {
139 Common::printDebug("Unknown URL is not allowed to track.");
140 }
141 }
142
143 if (!$excluded) {
144 if ($this->isPrefetchDetected()) {
145 $excluded = true;
146 Common::printDebug("Prefetch request detected, not a real visit so we Ignore this visit/pageview");
147 }
148 }
149
150 if ($excluded) {
151 Common::printDebug("Visitor excluded.");
152 return true;
153 }
154
155 return false;
156 }
157
158 protected function isPrefetchDetected()
159 {
160 return (isset($_SERVER["HTTP_X_PURPOSE"])
161 && in_array($_SERVER["HTTP_X_PURPOSE"], array("preview", "instant")))
162 || (isset($_SERVER['HTTP_X_MOZ'])
163 && $_SERVER['HTTP_X_MOZ'] == "prefetch");
164 }
165
166 /**
167 * Live/Bing/MSN bot and Googlebot are evolving to detect cloaked websites.
168 * As a result, these sophisticated bots exhibit characteristics of
169 * browsers (cookies enabled, executing JavaScript, etc).
170 *
171 * @see \DeviceDetector\Parser\Bot
172 *
173 * @return boolean
174 */
175 protected function isNonHumanBot()
176 {
177 $allowBots = $this->request->getParam('bots');
178
179 $deviceDetector = StaticContainer::get(DeviceDetectorFactory::class)->makeInstance($this->userAgent );
180
181 return !$allowBots
182 && ($deviceDetector->isBot() || $this->isIpInRange());
183 }
184
185 private function isIpInRange()
186 {
187 $cache = PiwikCache::getTransientCache();
188
189 $ip = IP::fromBinaryIP($this->ip);
190 $key = 'VisitExcludedIsIpInRange' . $ip->toString();
191
192 if ($cache->contains($key)) {
193 $isInRanges = $cache->fetch($key);
194 } else {
195 if ($this->isChromeDataSaverUsed($ip)) {
196 $isInRanges = false;
197 } else {
198 $isInRanges = $ip->isInRanges($this->getBotIpRanges());
199 }
200
201 $cache->save($key, $isInRanges);
202 }
203
204 return $isInRanges;
205 }
206
207 private function isChromeDataSaverUsed(IP $ip)
208 {
209 // see https://github.com/piwik/piwik/issues/7733
210 return !empty($_SERVER['HTTP_VIA'])
211 && false !== strpos(strtolower($_SERVER['HTTP_VIA']), 'chrome-compression-proxy')
212 && $ip->isInRanges($this->getGoogleBotIpRanges());
213 }
214
215 protected function getBotIpRanges()
216 {
217 return array_merge($this->getGoogleBotIpRanges(), array(
218 // Live/Bing/MSN
219 '64.4.0.0/18',
220 '65.52.0.0/14',
221 '157.54.0.0/15',
222 '157.56.0.0/14',
223 '157.60.0.0/16',
224 '207.46.0.0/16',
225 '207.68.128.0/18',
226 '207.68.192.0/20',
227 '131.253.26.0/20',
228 '131.253.24.0/20',
229
230 // Yahoo
231 '72.30.198.0/20',
232 '72.30.196.0/20',
233 '98.137.207.0/20',
234 // Chinese bot hammering websites
235 '1.202.218.8'
236 ));
237 }
238
239 private function getGoogleBotIpRanges()
240 {
241 return array(
242 '216.239.32.0/19',
243 '64.233.160.0/19',
244 '66.249.80.0/20',
245 '72.14.192.0/18',
246 '209.85.128.0/17',
247 '66.102.0.0/20',
248 '74.125.0.0/16',
249 '64.18.0.0/20',
250 '207.126.144.0/20',
251 '173.194.0.0/16'
252 );
253 }
254
255 /**
256 * Looks for the ignore cookie that users can set in the Piwik admin screen.
257 * @return bool
258 */
259 protected function isIgnoreCookieFound()
260 {
261 if (IgnoreCookie::isIgnoreCookieFound()) {
262 Common::printDebug('Matomo ignore cookie was found, visit not tracked.');
263 return true;
264 }
265
266 return false;
267 }
268
269 /**
270 * Checks if the visitor ip is in the excluded list
271 *
272 * @return bool
273 */
274 protected function isVisitorIpExcluded()
275 {
276 $excludedIps = $this->getAttributes('excluded_ips', 'global_excluded_ips');
277
278 if (!empty($excludedIps)) {
279 $ip = IP::fromBinaryIP($this->ip);
280 if ($ip->isInRanges($excludedIps)) {
281 Common::printDebug('Visitor IP ' . $ip->toString() . ' is excluded from being tracked');
282 return true;
283 }
284 }
285
286 return false;
287 }
288
289 private function getAttributes($siteAttribute, $globalAttribute)
290 {
291 if (!isset($this->siteCache[$this->idSite])) {
292 $this->siteCache[$this->idSite] = array();
293 }
294 try {
295 if (empty($this->siteCache[$this->idSite])) {
296 $this->siteCache[$this->idSite] = Cache::getCacheWebsiteAttributes($this->idSite);
297 }
298 if (isset($this->siteCache[$this->idSite][$siteAttribute])) {
299 return $this->siteCache[$this->idSite][$siteAttribute];
300 }
301 } catch (UnexpectedWebsiteFoundException $e) {
302 $cached = Cache::getCacheGeneral();
303 if ($globalAttribute && isset($cached[$globalAttribute])) {
304 return $cached[$globalAttribute];
305 }
306 }
307 }
308
309 /**
310 * Checks if request URL is excluded
311 * @return bool
312 */
313 protected function isUrlExcluded()
314 {
315 $excludedUrls = $this->getAttributes('exclude_unknown_urls', null);
316 $siteUrls = $this->getAttributes('urls', null);
317
318 if (!empty($excludedUrls) && !empty($siteUrls)) {
319 $url = $this->request->getParam('url');
320 $parsedUrl = parse_url($url);
321
322 $trackingUrl = new SiteUrls();
323 $urls = $trackingUrl->groupUrlsByHost(array($this->idSite => $siteUrls));
324
325 $idSites = $trackingUrl->getIdSitesMatchingUrl($parsedUrl, $urls);
326 $isUrlExcluded = !isset($idSites) || !in_array($this->idSite, $idSites);
327
328 return $isUrlExcluded;
329 }
330
331 return false;
332 }
333
334 /**
335 * Returns true if the specified user agent should be excluded for the current site or not.
336 *
337 * Visits whose user agent string contains one of the excluded_user_agents strings for the
338 * site being tracked (or one of the global strings) will be excluded.
339 *
340 * @internal param string $this ->userAgent The user agent string.
341 * @return bool
342 */
343 protected function isUserAgentExcluded()
344 {
345 $excludedAgents = $this->getAttributes('excluded_user_agents', 'global_excluded_user_agents');
346
347 if (!empty($excludedAgents)) {
348 foreach ($excludedAgents as $excludedUserAgent) {
349 // if the excluded user agent string part is in this visit's user agent, this visit should be excluded
350 if (stripos($this->userAgent, $excludedUserAgent) !== false) {
351 return true;
352 }
353 }
354 }
355
356 return false;
357 }
358
359 /**
360 * Returns true if the Referrer is a known spammer.
361 *
362 * @return bool
363 */
364 protected function isReferrerSpamExcluded()
365 {
366 return $this->spamFilter->isSpam($this->request);
367 }
368 }
369