PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / 1.3.1
Matomo Analytics – Powerful, Privacy-First Insights for WordPress v1.3.1
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / Tracker / PageUrl.php
matomo / app / core / Tracker Last commit date
Db 6 years ago Handler 6 years ago TableLogAction 6 years ago Visit 6 years ago Action.php 6 years ago ActionPageview.php 6 years ago Cache.php 6 years ago Db.php 6 years ago Failures.php 6 years ago FingerprintSalt.php 6 years ago GoalManager.php 6 years ago Handler.php 6 years ago IgnoreCookie.php 6 years ago LogTable.php 6 years ago Model.php 6 years ago PageUrl.php 6 years ago Request.php 5 years ago RequestProcessor.php 6 years ago RequestSet.php 6 years ago Response.php 6 years ago ScheduledTasksRunner.php 6 years ago Settings.php 5 years ago TableLogAction.php 6 years ago TrackerCodeGenerator.php 6 years ago TrackerConfig.php 6 years ago Visit.php 5 years ago VisitExcluded.php 6 years ago VisitInterface.php 6 years ago Visitor.php 6 years ago VisitorNotFoundInDb.php 6 years ago VisitorRecognizer.php 6 years ago
PageUrl.php
385 lines
1 <?php
2 /**
3 * Piwik - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9
10 namespace Piwik\Tracker;
11
12 use Piwik\Common;
13 use Piwik\Config;
14 use Piwik\Piwik;
15 use Piwik\UrlHelper;
16
17 class PageUrl
18 {
19
20 /**
21 * Map URL prefixes to integers.
22 * @see self::normalizeUrl(), self::reconstructNormalizedUrl()
23 */
24 public static $urlPrefixMap = array(
25 'http://www.' => 1,
26 'http://' => 0,
27 'https://www.' => 3,
28 'https://' => 2
29 );
30
31 /**
32 * Given the Input URL, will exclude all query parameters set for this site
33 *
34 * @static
35 * @param $originalUrl
36 * @param $idSite
37 * @return bool|string Returned URL is HTML entities decoded
38 */
39 public static function excludeQueryParametersFromUrl($originalUrl, $idSite, $additionalParametersToExclude = [])
40 {
41 $originalUrl = self::cleanupUrl($originalUrl);
42
43 $parsedUrl = @parse_url($originalUrl);
44 $parsedUrl = self::cleanupHostAndHashTag($parsedUrl, $idSite);
45 $parametersToExclude = array_merge(self::getQueryParametersToExclude($idSite), $additionalParametersToExclude);
46 if (empty($parsedUrl['query'])) {
47 if (empty($parsedUrl['fragment'])) {
48 return UrlHelper::getParseUrlReverse($parsedUrl);
49 }
50
51 // Exclude from the hash tag as well
52 $queryParameters = UrlHelper::getArrayFromQueryString($parsedUrl['fragment']);
53 $parsedUrl['fragment'] = UrlHelper::getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude);
54 $url = UrlHelper::getParseUrlReverse($parsedUrl);
55
56 return $url;
57 }
58
59 $queryParameters = UrlHelper::getArrayFromQueryString($parsedUrl['query']);
60 $parsedUrl['query'] = UrlHelper::getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude);
61 $url = UrlHelper::getParseUrlReverse($parsedUrl);
62
63 return $url;
64 }
65
66 /**
67 * Returns the array of parameters names that must be excluded from the Query String in all tracked URLs
68 * @static
69 * @param $idSite
70 * @return array
71 */
72 public static function getQueryParametersToExclude($idSite)
73 {
74 $campaignTrackingParameters = Common::getCampaignParameters();
75
76 $campaignTrackingParameters = array_merge(
77 $campaignTrackingParameters[0], // campaign name parameters
78 $campaignTrackingParameters[1] // campaign keyword parameters
79 );
80
81 $website = Cache::getCacheWebsiteAttributes($idSite);
82 $excludedParameters = self::getExcludedParametersFromWebsite($website);
83
84 $parametersToExclude = array_merge($excludedParameters,
85 self::getUrlParameterNamesToExcludeFromUrl(),
86 $campaignTrackingParameters);
87
88 /**
89 * Triggered before setting the action url in Piwik\Tracker\Action so plugins can register
90 * parameters to be excluded from the tracking URL (e.g. campaign parameters).
91 *
92 * @param array &$parametersToExclude An array of parameters to exclude from the tracking url.
93 */
94 Piwik::postEvent('Tracker.PageUrl.getQueryParametersToExclude', array(&$parametersToExclude));
95
96 if (!empty($parametersToExclude)) {
97 Common::printDebug('Excluding parameters "' . implode(',', $parametersToExclude) . '" from URL');
98 }
99
100 $parametersToExclude = array_map('strtolower', $parametersToExclude);
101 return $parametersToExclude;
102 }
103
104 /**
105 * Returns the list of URL query parameters that should be removed from the tracked URL query string.
106 *
107 * @return array
108 */
109 protected static function getUrlParameterNamesToExcludeFromUrl()
110 {
111 $paramsToExclude = Config::getInstance()->Tracker['url_query_parameter_to_exclude_from_url'];
112 $paramsToExclude = explode(",", $paramsToExclude);
113 $paramsToExclude = array_map('trim', $paramsToExclude);
114 return $paramsToExclude;
115 }
116
117 /**
118 * Returns true if URL fragments should be removed for a specific site,
119 * false if otherwise.
120 *
121 * This function uses the Tracker cache and not the MySQL database.
122 *
123 * @param $idSite int The ID of the site to check for.
124 * @return bool
125 */
126 public static function shouldRemoveURLFragmentFor($idSite)
127 {
128 $websiteAttributes = Cache::getCacheWebsiteAttributes($idSite);
129 return empty($websiteAttributes['keep_url_fragment']);
130 }
131
132 /**
133 * Cleans and/or removes the URL fragment of a URL.
134 *
135 * @param $urlFragment string The URL fragment to process.
136 * @param $idSite int|bool If not false, this function will check if URL fragments
137 * should be removed for the site w/ this ID and if so,
138 * the returned processed fragment will be empty.
139 *
140 * @return string The processed URL fragment.
141 */
142 public static function processUrlFragment($urlFragment, $idSite = false)
143 {
144 // if we should discard the url fragment for this site, return an empty string as
145 // the processed url fragment
146 if ($idSite !== false
147 && PageUrl::shouldRemoveURLFragmentFor($idSite)
148 ) {
149 return '';
150 } else {
151 // Remove trailing Hash tag in ?query#hash#
152 if (substr($urlFragment, -1) == '#') {
153 $urlFragment = substr($urlFragment, 0, strlen($urlFragment) - 1);
154 }
155 return $urlFragment;
156 }
157 }
158
159 /**
160 * Will cleanup the hostname (some browser do not strolower the hostname),
161 * and deal ith the hash tag on incoming URLs based on website setting.
162 *
163 * @param $parsedUrl
164 * @param $idSite int|bool The site ID of the current visit. This parameter is
165 * only used by the tracker to see if we should remove
166 * the URL fragment for this site.
167 * @return array
168 */
169 protected static function cleanupHostAndHashTag($parsedUrl, $idSite = false)
170 {
171 if (empty($parsedUrl)) {
172 return $parsedUrl;
173 }
174
175 if (!empty($parsedUrl['host'])) {
176 $parsedUrl['host'] = Common::mb_strtolower($parsedUrl['host']);
177 }
178
179 if (!empty($parsedUrl['fragment'])) {
180 $parsedUrl['fragment'] = PageUrl::processUrlFragment($parsedUrl['fragment'], $idSite);
181 }
182
183 return $parsedUrl;
184 }
185
186 /**
187 * Converts Matrix URL format
188 * from http://example.org/thing;paramA=1;paramB=6542
189 * to http://example.org/thing?paramA=1&paramB=6542
190 *
191 * @param string $originalUrl
192 * @return string
193 */
194 public static function convertMatrixUrl($originalUrl)
195 {
196 $posFirstSemiColon = strpos($originalUrl, ";");
197
198 if (false === $posFirstSemiColon) {
199 return $originalUrl;
200 }
201
202 $posQuestionMark = strpos($originalUrl, "?");
203 $replace = (false === $posQuestionMark);
204
205 if ($posQuestionMark > $posFirstSemiColon) {
206 $originalUrl = substr_replace($originalUrl, ";", $posQuestionMark, 1);
207 $replace = true;
208 }
209
210 if ($replace) {
211 $originalUrl = substr_replace($originalUrl, "?", strpos($originalUrl, ";"), 1);
212 $originalUrl = str_replace(";", "&", $originalUrl);
213 }
214
215 return $originalUrl;
216 }
217
218 /**
219 * Clean up string contents (filter, truncate, ...)
220 *
221 * @param string $string Dirty string
222 * @return string
223 */
224 public static function cleanupString($string)
225 {
226 $string = trim($string);
227 $string = str_replace(array("\n", "\r", "\0"), '', $string);
228
229 $limit = Config::getInstance()->Tracker['page_maximum_length'];
230 $clean = substr($string, 0, $limit);
231 return $clean;
232 }
233
234 protected static function reencodeParameterValue($value, $encoding)
235 {
236 if (is_string($value)) {
237 $decoded = urldecode($value);
238 if (function_exists('mb_check_encoding')
239 && @mb_check_encoding($decoded, $encoding)) {
240 $value = urlencode(mb_convert_encoding($decoded, 'UTF-8', $encoding));
241 }
242 }
243
244 return $value;
245 }
246
247 protected static function reencodeParametersArray($queryParameters, $encoding)
248 {
249 foreach ($queryParameters as &$value) {
250 if (is_array($value)) {
251 $value = self::reencodeParametersArray($value, $encoding);
252 } else {
253 $value = PageUrl::reencodeParameterValue($value, $encoding);
254 }
255 }
256
257 return $queryParameters;
258 }
259
260 /**
261 * Checks if query parameters are of a non-UTF-8 encoding and converts the values
262 * from the specified encoding to UTF-8.
263 * This method is used to workaround browser/webapp bugs (see #3450). When
264 * browsers fail to encode query parameters in UTF-8, the tracker will send the
265 * charset of the page viewed and we can sometimes work around invalid data
266 * being stored.
267 *
268 * @param array $queryParameters Name/value mapping of query parameters.
269 * @param bool|string $encoding of the HTML page the URL is for. Used to workaround
270 * browser bugs & mis-coded webapps. See #3450.
271 *
272 * @return array
273 */
274 public static function reencodeParameters(&$queryParameters, $encoding = false)
275 {
276 if (function_exists('mb_check_encoding')) {
277 // if query params are encoded w/ non-utf8 characters (due to browser bug or whatever),
278 // encode to UTF-8.
279 if (strtolower($encoding) != 'utf-8'
280 && $encoding != false
281 ) {
282 Common::printDebug("Encoding page URL query parameters to $encoding.");
283
284 $queryParameters = PageUrl::reencodeParametersArray($queryParameters, $encoding);
285 }
286 } else {
287 Common::printDebug("Page charset supplied in tracking request, but mbstring extension is not available.");
288 }
289
290 return $queryParameters;
291 }
292
293 public static function cleanupUrl($url)
294 {
295 $url = Common::unsanitizeInputValue($url);
296 $url = PageUrl::cleanupString($url);
297 $url = PageUrl::convertMatrixUrl($url);
298
299 return $url;
300 }
301
302 /**
303 * Build the full URL from the prefix ID and the rest.
304 *
305 * @param string $url
306 * @param integer $prefixId
307 * @return string
308 */
309 public static function reconstructNormalizedUrl($url, $prefixId)
310 {
311 $map = array_flip(self::$urlPrefixMap);
312
313 if ($prefixId !== null && isset($map[$prefixId])) {
314 $fullUrl = $map[$prefixId] . $url;
315 } else {
316 $fullUrl = $url;
317 }
318
319 // Clean up host & hash tags, for URLs
320 $parsedUrl = @parse_url($fullUrl);
321 $parsedUrl = PageUrl::cleanupHostAndHashTag($parsedUrl);
322 $url = UrlHelper::getParseUrlReverse($parsedUrl);
323
324 if (!empty($url)) {
325 return $url;
326 }
327
328 return $fullUrl;
329 }
330
331 /**
332 * Extract the prefix from a URL.
333 * Return the prefix ID and the rest.
334 *
335 * @param string $url
336 * @return array
337 */
338 public static function normalizeUrl($url)
339 {
340 foreach (self::$urlPrefixMap as $prefix => $id) {
341 if (strtolower(substr($url, 0, strlen($prefix))) == $prefix) {
342 return array(
343 'url' => substr($url, strlen($prefix)),
344 'prefixId' => $id
345 );
346 }
347 }
348
349 return array('url' => $url, 'prefixId' => null);
350 }
351
352 public static function getUrlIfLookValid($url)
353 {
354 $url = PageUrl::cleanupString($url);
355
356 if (!UrlHelper::isLookLikeUrl($url)) {
357 Common::printDebug("WARNING: URL looks invalid and is discarded");
358
359 return false;
360 }
361
362 return $url;
363 }
364
365 private static function getExcludedParametersFromWebsite($website)
366 {
367 if (isset($website['excluded_parameters'])) {
368 return $website['excluded_parameters'];
369 }
370
371 return array();
372 }
373
374 public static function urldecodeValidUtf8($value)
375 {
376 $value = urldecode($value);
377 if (function_exists('mb_check_encoding')
378 && !@mb_check_encoding($value, 'utf-8')
379 ) {
380 return urlencode($value);
381 }
382 return $value;
383 }
384 }
385