API
1 month ago
Access
3 months ago
Application
1 month ago
Archive
1 month ago
ArchiveProcessor
1 month ago
Archiver
2 years ago
AssetManager
1 month ago
Auth
6 months ago
Category
6 months ago
Changes
1 month ago
CliMulti
1 year ago
Columns
1 month ago
Concurrency
1 month ago
Config
1 month ago
Container
1 month ago
CronArchive
3 months ago
DataAccess
1 month ago
DataFiles
2 years ago
DataTable
2 weeks ago
Db
2 weeks ago
DeviceDetector
1 year ago
Email
2 years ago
Exception
4 months ago
Http
4 months ago
Intl
3 months ago
Log
2 years ago
Mail
1 year ago
Measurable
6 months ago
Menu
1 month ago
Metrics
3 months ago
Notification
6 months ago
Period
1 month ago
Plugin
2 weeks ago
Policy
1 month ago
ProfessionalServices
1 year ago
Report
1 year ago
ReportRenderer
3 months ago
Request
3 months ago
Scheduler
1 month ago
Segment
1 month ago
Session
2 weeks ago
Settings
1 month ago
Tracker
2 weeks ago
Translation
1 month ago
Twig
1 year ago
UpdateCheck
3 months ago
Updater
1 month ago
Updates
2 days ago
Validators
1 year ago
View
1 month ago
ViewDataTable
2 weeks ago
Visualization
1 year ago
Widget
1 month ago
.htaccess
2 years ago
Access.php
1 month ago
Archive.php
1 month ago
ArchiveProcessor.php
1 month ago
AssetManager.php
1 month ago
Auth.php
6 months ago
AuthResult.php
6 months ago
BaseFactory.php
2 years ago
Cache.php
2 years ago
CacheId.php
4 months ago
CliMulti.php
1 month ago
Common.php
2 weeks ago
Config.php
1 month ago
Console.php
3 months ago
Context.php
2 years ago
Cookie.php
1 year ago
CronArchive.php
1 month ago
DI.php
3 months ago
DataArray.php
1 month ago
DataTable.php
1 month ago
Date.php
1 month ago
Db.php
1 month ago
DbHelper.php
1 month ago
Development.php
1 year ago
ErrorHandler.php
6 months ago
EventDispatcher.php
1 month ago
ExceptionHandler.php
4 months ago
FileIntegrity.php
1 month ago
Filechecks.php
1 year ago
Filesystem.php
1 month ago
FrontController.php
4 months ago
Http.php
1 month ago
IP.php
1 year ago
Log.php
3 months ago
LogDeleter.php
1 year ago
Mail.php
1 year ago
Metrics.php
1 month ago
NoAccessException.php
2 years ago
Nonce.php
6 months ago
Notification.php
1 month ago
NumberFormatter.php
5 months ago
Option.php
5 months ago
Period.php
1 month ago
Piwik.php
1 month ago
Plugin.php
1 month ago
Process.php
1 month ago
Profiler.php
6 months ago
ProxyHeaders.php
4 months ago
ProxyHttp.php
5 months ago
QuickForm2.php
3 months ago
RankingQuery.php
1 month ago
ReportRenderer.php
1 month ago
Request.php
1 month ago
Segment.php
1 month ago
Sequence.php
6 months ago
Session.php
2 weeks ago
SettingsPiwik.php
1 month ago
SettingsServer.php
1 year ago
Singleton.php
2 years ago
Site.php
1 month ago
SiteContentDetector.php
1 month ago
SupportedBrowser.php
2 years ago
TCPDF.php
1 year ago
Theme.php
1 year ago
Timer.php
1 month ago
Tracker.php
1 month ago
Twig.php
1 month ago
Unzip.php
1 year ago
UpdateCheck.php
1 month ago
Updater.php
1 month ago
UpdaterErrorException.php
2 years ago
Updates.php
3 months ago
Url.php
3 months ago
UrlHelper.php
1 month ago
Version.php
2 days ago
View.php
1 month ago
bootstrap.php
1 year ago
dispatch.php
2 years ago
testMinimumPhpVersion.php
6 months ago
UrlHelper.php
327 lines
| 1 | <?php |
| 2 | |
| 3 | /** |
| 4 | * Matomo - free/libre analytics platform |
| 5 | * |
| 6 | * @link https://matomo.org |
| 7 | * @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 8 | */ |
| 9 | namespace Piwik; |
| 10 | |
| 11 | use Piwik\Container\StaticContainer; |
| 12 | use Piwik\Intl\Data\Provider\RegionDataProvider; |
| 13 | /** |
| 14 | * Contains less commonly needed URL helper methods. |
| 15 | * |
| 16 | */ |
| 17 | class UrlHelper |
| 18 | { |
| 19 | /** |
| 20 | * @var string[] |
| 21 | */ |
| 22 | private static $validLinkProtocols = ['http', 'https', 'tel', 'sms', 'mailto', 'callto']; |
| 23 | /** |
| 24 | * Checks if a string matches/is equal to one of the patterns/strings. |
| 25 | * |
| 26 | * @static |
| 27 | * @param string $test String to test. |
| 28 | * @param string[] $patterns Array of strings or regexs. |
| 29 | * |
| 30 | * @return bool true if $test matches or is equal to one of the regex/string in $patterns, false otherwise. |
| 31 | */ |
| 32 | protected static function inArrayMatchesRegex($test, $patterns) : bool |
| 33 | { |
| 34 | foreach ($patterns as $val) { |
| 35 | if (@preg_match($val, '') === \false) { |
| 36 | if (strcasecmp($val, $test) === 0) { |
| 37 | return \true; |
| 38 | } |
| 39 | } else { |
| 40 | if (preg_match($val, $test) === 1) { |
| 41 | return \true; |
| 42 | } |
| 43 | } |
| 44 | } |
| 45 | return \false; |
| 46 | } |
| 47 | /** |
| 48 | * Converts an array of query parameter name/value mappings into a query string. |
| 49 | * Parameters that are in `$parametersToExclude` will not appear in the result. |
| 50 | * |
| 51 | * @static |
| 52 | * @param array<string, string|false|array<string|false>> $queryParameters Array of query parameters, eg, `array('site' => '0', 'date' => '2012-01-01')`. |
| 53 | * @param string[] $parametersToExclude Array of query parameter names that shouldn't be |
| 54 | * in the result query string, eg, `array('date', 'period')`. |
| 55 | * @return string A query string, eg, `"?site=0"`. |
| 56 | * @api |
| 57 | */ |
| 58 | public static function getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude) |
| 59 | { |
| 60 | $validQuery = ''; |
| 61 | $separator = '&'; |
| 62 | foreach ($queryParameters as $name => $value) { |
| 63 | // decode encoded square brackets |
| 64 | $name = str_replace(array('%5B', '%5D'), array('[', ']'), $name); |
| 65 | if (!self::inArrayMatchesRegex(strtolower($name), $parametersToExclude)) { |
| 66 | if (is_array($value)) { |
| 67 | foreach ($value as $param) { |
| 68 | if ($param === \false) { |
| 69 | $validQuery .= $name . '[]' . $separator; |
| 70 | } else { |
| 71 | $validQuery .= $name . '[]=' . $param . $separator; |
| 72 | } |
| 73 | } |
| 74 | } elseif ($value === \false) { |
| 75 | $validQuery .= $name . $separator; |
| 76 | } else { |
| 77 | $validQuery .= $name . '=' . $value . $separator; |
| 78 | } |
| 79 | } |
| 80 | } |
| 81 | $validQuery = substr($validQuery, 0, -strlen($separator)); |
| 82 | return $validQuery; |
| 83 | } |
| 84 | /** |
| 85 | * Reduce URL to more minimal form. 2 letter country codes are |
| 86 | * replaced by '{}', while other parts are simply removed. |
| 87 | * |
| 88 | * Examples: |
| 89 | * www.example.com -> example.com |
| 90 | * search.example.com -> example.com |
| 91 | * m.example.com -> example.com |
| 92 | * de.example.com -> {}.example.com |
| 93 | * example.de -> example.{} |
| 94 | * example.co.uk -> example.{} |
| 95 | * |
| 96 | * @param string $url |
| 97 | * @return string |
| 98 | */ |
| 99 | public static function getLossyUrl($url) |
| 100 | { |
| 101 | static $countries; |
| 102 | if (!isset($countries)) { |
| 103 | /** @var RegionDataProvider $regionDataProvider */ |
| 104 | $regionDataProvider = StaticContainer::get('Piwik\\Intl\\Data\\Provider\\RegionDataProvider'); |
| 105 | $countries = implode('|', array_keys($regionDataProvider->getCountryList(\true))); |
| 106 | } |
| 107 | return preg_replace(array('/^(w+[0-9]*|search)\\./', '/(^|\\.)m\\./', '/(\\.(com|org|net|co|it|edu))?\\.(' . $countries . ')(\\/|$)/', '/(^|\\.)(' . $countries . ')\\./'), array('', '$1', '.{}$4', '$1{}.'), $url); |
| 108 | } |
| 109 | /** |
| 110 | * Returns true if the string passed may be a URL ie. it starts with protocol://. |
| 111 | * We don't need a precise test here because the value comes from the website |
| 112 | * tracked source code and the URLs may look very strange. |
| 113 | * |
| 114 | * @api |
| 115 | * @param string $url |
| 116 | * @return bool |
| 117 | */ |
| 118 | public static function isLookLikeUrl($url) |
| 119 | { |
| 120 | return $url && preg_match('~^(([[:alpha:]][[:alnum:]+.-]*)?:)?//(.*)$~D', $url, $matches) !== 0 && strlen($matches[3]) > 0 && !preg_match('/^(javascript:|vbscript:|data:)/i', $matches[1]); |
| 121 | } |
| 122 | /** |
| 123 | * @param string $url |
| 124 | * @return bool |
| 125 | */ |
| 126 | public static function isLookLikeSafeUrl($url) |
| 127 | { |
| 128 | if (preg_match('/[\\x00-\\x1F\\x7F]/', $url)) { |
| 129 | return \false; |
| 130 | } |
| 131 | if (strpos($url, ':') === \false) { |
| 132 | return \true; |
| 133 | } |
| 134 | $protocol = explode(':', $url, 2)[0]; |
| 135 | return (bool) preg_match('/^(' . implode('|', self::$validLinkProtocols) . ')$/i', $protocol); |
| 136 | } |
| 137 | /** |
| 138 | * Returns a URL created from the result of the [parse_url](https://php.net/manual/en/function.parse-url.php) |
| 139 | * function. |
| 140 | * |
| 141 | * Copied from the PHP comments at [https://php.net/parse_url](https://php.net/parse_url). |
| 142 | * |
| 143 | * @param array $parsed Result of [parse_url](https://php.net/manual/en/function.parse-url.php). |
| 144 | * @return false|string The URL or `false` if `$parsed` isn't an array. |
| 145 | * @api |
| 146 | */ |
| 147 | public static function getParseUrlReverse($parsed) |
| 148 | { |
| 149 | if (!is_array($parsed)) { |
| 150 | return \false; |
| 151 | } |
| 152 | // According to RFC 1738, the chars ':', '@' and '/' need to be encoded in username or password part of an url |
| 153 | // We also encode '\' as a username or password containing that char, might be handled incorrectly by browsers |
| 154 | $escapeSpecialChars = function ($value) { |
| 155 | return str_replace([':', '@', '/', '\\'], [urlencode(':'), urlencode('@'), urlencode('/'), urlencode('\\')], $value); |
| 156 | }; |
| 157 | $uri = !empty($parsed['scheme']) ? $parsed['scheme'] . ':' . (!strcasecmp($parsed['scheme'], 'mailto') ? '' : '//') : ''; |
| 158 | $uri .= !empty($parsed['user']) ? $escapeSpecialChars($parsed['user']) . (!empty($parsed['pass']) ? ':' . $escapeSpecialChars($parsed['pass']) : '') . '@' : ''; |
| 159 | $uri .= !empty($parsed['host']) ? $parsed['host'] : ''; |
| 160 | $uri .= !empty($parsed['port']) ? ':' . $parsed['port'] : ''; |
| 161 | if (!empty($parsed['path'])) { |
| 162 | $uri .= !strncmp($parsed['path'], '/', 1) ? $parsed['path'] : (!empty($uri) ? '/' : '') . $parsed['path']; |
| 163 | } |
| 164 | $uri .= !empty($parsed['query']) ? '?' . $parsed['query'] : ''; |
| 165 | $uri .= !empty($parsed['fragment']) ? '#' . $parsed['fragment'] : ''; |
| 166 | return $uri; |
| 167 | } |
| 168 | /** |
| 169 | * Returns a URL query string as an array. |
| 170 | * |
| 171 | * @param string $urlQuery The query string, eg, `'?param1=value1¶m2=value2'`. |
| 172 | * @return array eg, `array('param1' => 'value1', 'param2' => 'value2')` |
| 173 | * @api |
| 174 | */ |
| 175 | public static function getArrayFromQueryString($urlQuery) |
| 176 | { |
| 177 | if (empty($urlQuery)) { |
| 178 | return array(); |
| 179 | } |
| 180 | // TODO: this method should not use a cache. callers should instead have their own cache, configured through DI. |
| 181 | // one undesirable side effect of using a cache here, is that this method can now init the StaticContainer, which makes setting |
| 182 | // test environment for RequestCommand more complicated. |
| 183 | $cache = \Piwik\Cache::getTransientCache(); |
| 184 | $cacheKey = 'arrayFromQuery' . $urlQuery; |
| 185 | if ($cache->contains($cacheKey)) { |
| 186 | return $cache->fetch($cacheKey); |
| 187 | } |
| 188 | if ($urlQuery[0] == '?') { |
| 189 | $urlQuery = substr($urlQuery, 1); |
| 190 | } |
| 191 | $separator = '&'; |
| 192 | $urlQuery = $separator . $urlQuery; |
| 193 | // $urlQuery = str_replace(array('%20'), ' ', $urlQuery); |
| 194 | $referrerQuery = trim($urlQuery); |
| 195 | $values = explode($separator, $referrerQuery); |
| 196 | $nameToValue = array(); |
| 197 | foreach ($values as $value) { |
| 198 | $pos = strpos($value, '='); |
| 199 | if ($pos !== \false) { |
| 200 | $name = substr($value, 0, $pos); |
| 201 | $value = substr($value, $pos + 1); |
| 202 | if ($value === \false) { |
| 203 | $value = ''; |
| 204 | } |
| 205 | } else { |
| 206 | $name = $value; |
| 207 | $value = \false; |
| 208 | } |
| 209 | if (!empty($name)) { |
| 210 | $name = \Piwik\Common::sanitizeInputValue($name); |
| 211 | } |
| 212 | if (!empty($value)) { |
| 213 | $value = \Piwik\Common::sanitizeInputValue($value); |
| 214 | } |
| 215 | // if array without indexes |
| 216 | $count = 0; |
| 217 | $tmp = preg_replace('/(\\[|%5b)(]|%5d)$/i', '', $name, -1, $count); |
| 218 | if (!empty($tmp) && $count) { |
| 219 | $name = $tmp; |
| 220 | if (isset($nameToValue[$name]) == \false || is_array($nameToValue[$name]) == \false) { |
| 221 | $nameToValue[$name] = array(); |
| 222 | } |
| 223 | array_push($nameToValue[$name], $value); |
| 224 | } elseif (!empty($name)) { |
| 225 | $nameToValue[$name] = $value; |
| 226 | } |
| 227 | } |
| 228 | $cache->save($cacheKey, $nameToValue); |
| 229 | return $nameToValue; |
| 230 | } |
| 231 | /** |
| 232 | * Returns the value of a single query parameter from the supplied query string. |
| 233 | * |
| 234 | * @param string $urlQuery The query string. |
| 235 | * @param string $parameter The query parameter name to return. |
| 236 | * @return string|null Parameter value if found (can be the empty string!), null if not found. |
| 237 | * @api |
| 238 | */ |
| 239 | public static function getParameterFromQueryString($urlQuery, $parameter) |
| 240 | { |
| 241 | $nameToValue = self::getArrayFromQueryString($urlQuery); |
| 242 | if (isset($nameToValue[$parameter])) { |
| 243 | return $nameToValue[$parameter]; |
| 244 | } |
| 245 | return null; |
| 246 | } |
| 247 | /** |
| 248 | * Returns the path and query string of a URL. |
| 249 | * |
| 250 | * @param string $url The URL. |
| 251 | * @param array $additionalParamsToAdd If not empty the given parameters will be added to the query. |
| 252 | * @param bool $preserveAnchor If true then do not remove any #anchor from the url, default false |
| 253 | * @return string eg, `/test/index.php?module=CoreHome` if `$url` is `https://piwik.org/test/index.php?module=CoreHome`. |
| 254 | * @api |
| 255 | */ |
| 256 | public static function getPathAndQueryFromUrl($url, array $additionalParamsToAdd = [], bool $preserveAnchor = \false) |
| 257 | { |
| 258 | $parsedUrl = parse_url($url); |
| 259 | // If an anchor is included in the URL parse_url() will not split the anchor and query, so we do that there |
| 260 | if (isset($parsedUrl['fragment']) && strpos($parsedUrl['fragment'], '?') !== \false) { |
| 261 | $parsedUrl['query'] = substr($parsedUrl['fragment'], strpos($parsedUrl['fragment'], '?') + 1); |
| 262 | $parsedUrl['fragment'] = substr($parsedUrl['fragment'], 0, strpos($parsedUrl['fragment'], '?')); |
| 263 | } |
| 264 | $result = ''; |
| 265 | if (isset($parsedUrl['path'])) { |
| 266 | if (substr($parsedUrl['path'], 0, 1) == '/') { |
| 267 | $parsedUrl['path'] = substr($parsedUrl['path'], 1); |
| 268 | } |
| 269 | $result .= $parsedUrl['path']; |
| 270 | } |
| 271 | if ($preserveAnchor && isset($parsedUrl['fragment'])) { |
| 272 | $result .= '#' . $parsedUrl['fragment']; |
| 273 | } |
| 274 | if (isset($parsedUrl['query']) || count($additionalParamsToAdd)) { |
| 275 | $query = isset($parsedUrl['query']) ? $parsedUrl['query'] : ''; |
| 276 | $query = self::addAdditionalParameters($query, $additionalParamsToAdd); |
| 277 | $result .= '?' . $query; |
| 278 | } |
| 279 | return $result; |
| 280 | } |
| 281 | /** |
| 282 | * Returns the query part from any valid url and adds additional parameters to the query part if needed. |
| 283 | * |
| 284 | * @param string $url Any url eg `"https://example.com/piwik/?foo=bar"` |
| 285 | * @param array $additionalParamsToAdd If not empty the given parameters will be added to the query. |
| 286 | * |
| 287 | * @return string eg. `"foo=bar&foo2=bar2"` |
| 288 | * @api |
| 289 | */ |
| 290 | public static function getQueryFromUrl($url, array $additionalParamsToAdd = []) |
| 291 | { |
| 292 | $url = @parse_url($url); |
| 293 | $query = ''; |
| 294 | if (!empty($url['query'])) { |
| 295 | $query .= $url['query']; |
| 296 | } |
| 297 | $query = self::addAdditionalParameters($query, $additionalParamsToAdd); |
| 298 | return $query; |
| 299 | } |
| 300 | /** |
| 301 | * Add an array of additional parameters to a query string |
| 302 | * |
| 303 | * @param array $additionalParamsToAdd |
| 304 | */ |
| 305 | private static function addAdditionalParameters(string $query, array $additionalParamsToAdd) : string |
| 306 | { |
| 307 | if (!empty($additionalParamsToAdd)) { |
| 308 | if (!empty($query)) { |
| 309 | $query .= '&'; |
| 310 | } |
| 311 | $query .= \Piwik\Url::getQueryStringFromParameters($additionalParamsToAdd); |
| 312 | } |
| 313 | return $query; |
| 314 | } |
| 315 | /** |
| 316 | * @param string $url |
| 317 | * @return string|false|null |
| 318 | */ |
| 319 | public static function getHostFromUrl($url) |
| 320 | { |
| 321 | if (!\Piwik\UrlHelper::isLookLikeUrl($url)) { |
| 322 | $url = "http://" . $url; |
| 323 | } |
| 324 | return parse_url($url, \PHP_URL_HOST); |
| 325 | } |
| 326 | } |
| 327 |