ArrayUtil.php
6 months ago
BlocksUtil.php
4 months ago
COTMigrationUtil.php
1 year ago
DatabaseUtil.php
1 year ago
FilesystemUtil.php
6 months ago
HtmlSanitizer.php
2 years ago
LegacyRestApiStub.php
1 year ago
PluginInstaller.php
1 year ago
ProductUtil.php
8 months ago
Types.php
1 year ago
URL.php
1 year ago
URLException.php
4 years ago
Users.php
3 months ago
WebhookUtil.php
1 year ago
URL.php
381 lines
| 1 | <?php |
| 2 | |
| 3 | namespace Automattic\WooCommerce\Internal\Utilities; |
| 4 | |
| 5 | /** |
| 6 | * Provides an easy method of assessing URLs, including filepaths (which will be silently |
| 7 | * converted to a file:// URL if provided). |
| 8 | */ |
| 9 | class URL { |
| 10 | /** |
| 11 | * Components of the URL being assessed. |
| 12 | * |
| 13 | * The keys match those potentially returned by the parse_url() function, except |
| 14 | * that they are always defined and 'drive' (Windows drive letter) has been added. |
| 15 | * |
| 16 | * @var string|null[] |
| 17 | */ |
| 18 | private $components = array( |
| 19 | 'drive' => null, |
| 20 | 'fragment' => null, |
| 21 | 'host' => null, |
| 22 | 'pass' => null, |
| 23 | 'path' => null, |
| 24 | 'port' => null, |
| 25 | 'query' => null, |
| 26 | 'scheme' => null, |
| 27 | 'user' => null, |
| 28 | ); |
| 29 | |
| 30 | /** |
| 31 | * If the URL (or filepath) is absolute. |
| 32 | * |
| 33 | * @var bool |
| 34 | */ |
| 35 | private $is_absolute; |
| 36 | |
| 37 | /** |
| 38 | * If the URL (or filepath) represents a directory other than the root directory. |
| 39 | * |
| 40 | * This is useful at different points in the process, when deciding whether to re-apply |
| 41 | * a trailing slash at the end of processing or when we need to calculate how many |
| 42 | * directory traversals are needed to form a (grand-)parent URL. |
| 43 | * |
| 44 | * @var bool |
| 45 | */ |
| 46 | private $is_non_root_directory; |
| 47 | |
| 48 | /** |
| 49 | * The components of the URL's path. |
| 50 | * |
| 51 | * For instance, in the case of "file:///srv/www/wp.site" (noting that a file URL has |
| 52 | * no host component) this would contain: |
| 53 | * |
| 54 | * [ "srv", "www", "wp.site" ] |
| 55 | * |
| 56 | * In the case of a non-file URL such as "https://example.com/foo/bar/baz" (noting the |
| 57 | * host is not part of the path) it would contain: |
| 58 | * |
| 59 | * [ "foo", "bar", "baz" ] |
| 60 | * |
| 61 | * @var array |
| 62 | */ |
| 63 | private $path_parts = array(); |
| 64 | |
| 65 | /** |
| 66 | * The URL. |
| 67 | * |
| 68 | * @var string |
| 69 | */ |
| 70 | private $url; |
| 71 | |
| 72 | /** |
| 73 | * Creates and processes the provided URL (or filepath). |
| 74 | * |
| 75 | * @throws URLException If the URL (or filepath) is seriously malformed. |
| 76 | * |
| 77 | * @param string $url The URL (or filepath). |
| 78 | */ |
| 79 | public function __construct( string $url ) { |
| 80 | $this->url = $url; |
| 81 | $this->preprocess(); |
| 82 | $this->process_path(); |
| 83 | } |
| 84 | |
| 85 | /** |
| 86 | * Makes all slashes forward slashes, converts filepaths to file:// URLs, and |
| 87 | * other processing to help with comprehension of filepaths. |
| 88 | * |
| 89 | * @throws URLException If the URL is seriously malformed. |
| 90 | */ |
| 91 | private function preprocess() { |
| 92 | // For consistency, all slashes should be forward slashes. |
| 93 | $this->url = str_replace( '\\', '/', $this->url ); |
| 94 | |
| 95 | // Windows: capture the drive letter if provided. |
| 96 | if ( preg_match( '#^(file://)?([a-z]):/(?!/).*#i', $this->url, $matches ) ) { |
| 97 | $this->components['drive'] = $matches[2]; |
| 98 | } |
| 99 | |
| 100 | /* |
| 101 | * If there is no scheme, assume and prepend "file://". An exception is made for cases where the URL simply |
| 102 | * starts with exactly two forward slashes, which indicates 'any scheme' (most commonly, that is used when |
| 103 | * there is freedom to switch between 'http' and 'https'). |
| 104 | */ |
| 105 | if ( ! preg_match( '#^[a-z]+://#i', $this->url ) && ! preg_match( '#^//(?!/)#', $this->url ) ) { |
| 106 | $this->url = 'file://' . $this->url; |
| 107 | } |
| 108 | |
| 109 | $parsed_components = wp_parse_url( $this->url ); |
| 110 | |
| 111 | // If we received a really badly formed URL, let's go no further. |
| 112 | if ( false === $parsed_components ) { |
| 113 | throw new URLException( |
| 114 | sprintf( |
| 115 | /* translators: %s is the URL. */ |
| 116 | __( '%s is not a valid URL.', 'woocommerce' ), |
| 117 | $this->url |
| 118 | ) |
| 119 | ); |
| 120 | } |
| 121 | |
| 122 | $this->components = array_merge( $this->components, $parsed_components ); |
| 123 | |
| 124 | // File URLs cannot have a host. However, the initial path segment *or* the Windows drive letter |
| 125 | // (if present) may be incorrectly be interpreted as the host name. |
| 126 | if ( 'file' === $this->components['scheme'] && ! empty( $this->components['host'] ) ) { |
| 127 | // If we do not have a drive letter, then simply merge the host and the path together. |
| 128 | if ( null === $this->components['drive'] ) { |
| 129 | $this->components['path'] = $this->components['host'] . ( $this->components['path'] ?? '' ); |
| 130 | } |
| 131 | |
| 132 | // Restore the host to null in this situation. |
| 133 | $this->components['host'] = null; |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | /** |
| 138 | * Simplifies the path if possible, by resolving directory traversals to the extent possible |
| 139 | * without touching the filesystem. |
| 140 | */ |
| 141 | private function process_path() { |
| 142 | $segments = explode( '/', $this->components['path'] ); |
| 143 | $this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] ); |
| 144 | $this->is_non_root_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1; |
| 145 | $resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute; |
| 146 | $retain_traversals = false; |
| 147 | |
| 148 | // Clean the path. |
| 149 | foreach ( $segments as $part ) { |
| 150 | // Drop empty segments. |
| 151 | if ( strlen( $part ) === 0 || '.' === $part ) { |
| 152 | continue; |
| 153 | } |
| 154 | |
| 155 | // Directory traversals created with percent-encoding syntax should also be detected. |
| 156 | $is_traversal = str_ireplace( '%2e', '.', $part ) === '..'; |
| 157 | |
| 158 | // Resolve directory traversals (if allowed: see further comment relating to this). |
| 159 | if ( $resolve_traversals && $is_traversal ) { |
| 160 | if ( count( $this->path_parts ) > 0 && ! $retain_traversals ) { |
| 161 | $this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 ); |
| 162 | continue; |
| 163 | } elseif ( $this->is_absolute ) { |
| 164 | continue; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | /* |
| 169 | * Consider allowing directory traversals to be resolved (ie, the process that converts 'foo/bar/../baz' to |
| 170 | * 'foo/baz'). |
| 171 | * |
| 172 | * 1. For this decision point, we are only concerned with relative filepaths (in all other cases, |
| 173 | * $resolve_traversals will already be true). |
| 174 | * 2. This is a 'one time' and unidirectional operation. We only wish to flip from false to true, and we |
| 175 | * never wish to do this more than once. |
| 176 | * 3. We only flip the switch after we have examined all leading '..' traversal segments. |
| 177 | */ |
| 178 | if ( false === $resolve_traversals && '..' !== $part && 'file' === $this->components['scheme'] && ! $this->is_absolute ) { |
| 179 | $resolve_traversals = true; |
| 180 | } |
| 181 | |
| 182 | /* |
| 183 | * Set a flag indicating that traversals should be retained. This is done to ensure we don't prematurely |
| 184 | * discard traversals at the start of the path. |
| 185 | */ |
| 186 | $retain_traversals = $resolve_traversals && '..' === $part; |
| 187 | |
| 188 | // Retain this part of the path. |
| 189 | $this->path_parts[] = $part; |
| 190 | } |
| 191 | |
| 192 | // Protect against empty relative paths. |
| 193 | if ( count( $this->path_parts ) === 0 && ! $this->is_absolute ) { |
| 194 | $this->path_parts = array( '.' ); |
| 195 | $this->is_non_root_directory = true; |
| 196 | } |
| 197 | |
| 198 | // Reform the path from the processed segments, appending a leading slash if it is absolute and restoring |
| 199 | // the Windows drive letter if we have one. |
| 200 | $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_non_root_directory ? '/' : '' ); |
| 201 | } |
| 202 | |
| 203 | /** |
| 204 | * Returns the processed URL as a string. |
| 205 | * |
| 206 | * @return string |
| 207 | */ |
| 208 | public function __toString(): string { |
| 209 | return $this->get_url(); |
| 210 | } |
| 211 | |
| 212 | /** |
| 213 | * Returns all possible parent URLs for the current URL. |
| 214 | * |
| 215 | * @return string[] |
| 216 | */ |
| 217 | public function get_all_parent_urls(): array { |
| 218 | $max_parent = count( $this->path_parts ); |
| 219 | $parents = array(); |
| 220 | |
| 221 | /* |
| 222 | * If we are looking at a relative path that begins with at least one traversal (example: "../../foo") |
| 223 | * then we should only return one parent URL (otherwise, we'd potentially have to return an infinite |
| 224 | * number of parent URLs since we can't know how far the tree extends). |
| 225 | */ |
| 226 | if ( $max_parent > 0 && ! $this->is_absolute && '..' === $this->path_parts[0] ) { |
| 227 | $max_parent = 1; |
| 228 | } |
| 229 | |
| 230 | for ( $level = 1; $level <= $max_parent; $level++ ) { |
| 231 | $parents[] = $this->get_parent_url( $level ); |
| 232 | } |
| 233 | |
| 234 | return $parents; |
| 235 | } |
| 236 | |
| 237 | /** |
| 238 | * Outputs the parent URL. |
| 239 | * |
| 240 | * For example, if $this->get_url() returns "https://example.com/foo/bar/baz" then |
| 241 | * this method will return "https://example.com/foo/bar/". |
| 242 | * |
| 243 | * When a grand-parent is needed, the optional $level parameter can be used. By default |
| 244 | * this is set to 1 (parent). 2 will yield the grand-parent, 3 will yield the great |
| 245 | * grand-parent, etc. |
| 246 | * |
| 247 | * If a level is specified that exceeds the number of path segments, this method will |
| 248 | * return false. |
| 249 | * |
| 250 | * @param int $level Used to indicate the level of parent. |
| 251 | * |
| 252 | * @return string|false |
| 253 | */ |
| 254 | public function get_parent_url( int $level = 1 ) { |
| 255 | if ( $level < 1 ) { |
| 256 | $level = 1; |
| 257 | } |
| 258 | |
| 259 | $parts_count = count( $this->path_parts ); |
| 260 | $parent_path_parts_to_keep = $parts_count - $level; |
| 261 | |
| 262 | /* |
| 263 | * With the exception of file URLs, we do not allow obtaining (grand-)parent directories that require |
| 264 | * us to describe them using directory traversals. For example, given "http://hostname/foo/bar/baz.png" we do |
| 265 | * not permit determining anything more than 2 levels up (we cannot go beyond "http://hostname/"). |
| 266 | */ |
| 267 | if ( 'file' !== $this->components['scheme'] && $parent_path_parts_to_keep < 0 ) { |
| 268 | return false; |
| 269 | } |
| 270 | |
| 271 | // In the specific case of an absolute filepath describing the root directory, there can be no parent. |
| 272 | if ( 'file' === $this->components['scheme'] && $this->is_absolute && empty( $this->path_parts ) ) { |
| 273 | return false; |
| 274 | } |
| 275 | |
| 276 | // Handle cases where the path starts with one or more 'dot segments'. Since the path has already been |
| 277 | // processed, we can be confident that any such segments are at the start of the path. |
| 278 | if ( $parts_count > 0 && ( '.' === $this->path_parts[0] || '..' === $this->path_parts[0] ) ) { |
| 279 | // Determine the index of the last dot segment (ex: given the path '/../../foo' it would be 1). |
| 280 | $single_dots = array_keys( $this->path_parts, '.', true ); |
| 281 | $double_dots = array_keys( $this->path_parts, '..', true ); |
| 282 | $max_dot_index = max( array_merge( $single_dots, $double_dots ) ); |
| 283 | |
| 284 | // Prepend the required number of traversals and discard unnecessary trailing segments. |
| 285 | $last_traversal = $max_dot_index + ( $this->is_non_root_directory ? 1 : 0 ); |
| 286 | $parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) ); |
| 287 | } elseif ( $parent_path_parts_to_keep < 0 ) { |
| 288 | // For relative filepaths only, we use traversals to describe the requested parent. |
| 289 | $parent_path = untrailingslashit( str_repeat( '../', $parent_path_parts_to_keep * -1 ) ); |
| 290 | } else { |
| 291 | // Otherwise, in a very simple case, we just remove existing parts. |
| 292 | $parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) ); |
| 293 | } |
| 294 | |
| 295 | if ( $this->is_relative() && '' === $parent_path ) { |
| 296 | $parent_path = '.'; |
| 297 | } |
| 298 | |
| 299 | // Append a trailing slash, since a parent is always a directory. The only exception is the current working directory. |
| 300 | $parent_path .= '/'; |
| 301 | |
| 302 | // For absolute paths, apply a leading slash (does not apply if we have a root path). |
| 303 | if ( $this->is_absolute && 0 !== strpos( $parent_path, '/' ) ) { |
| 304 | $parent_path = '/' . $parent_path; |
| 305 | } |
| 306 | |
| 307 | // Form the parent URL (ditching the query and fragment, if set). |
| 308 | $parent_url = $this->get_url( |
| 309 | array( |
| 310 | 'path' => $parent_path, |
| 311 | 'query' => null, |
| 312 | 'fragment' => null, |
| 313 | ) |
| 314 | ); |
| 315 | |
| 316 | // We process the parent URL through a fresh instance of this class, for consistency. |
| 317 | return ( new self( $parent_url ) )->get_url(); |
| 318 | } |
| 319 | |
| 320 | /** |
| 321 | * Outputs the processed URL. |
| 322 | * |
| 323 | * Borrows from https://www.php.net/manual/en/function.parse-url.php#106731 |
| 324 | * |
| 325 | * @param array $component_overrides If provided, these will override values set in $this->components. |
| 326 | * |
| 327 | * @return string |
| 328 | */ |
| 329 | public function get_url( array $component_overrides = array() ): string { |
| 330 | $components = array_merge( $this->components, $component_overrides ); |
| 331 | |
| 332 | $scheme = null !== $components['scheme'] ? $components['scheme'] . '://' : '//'; |
| 333 | $host = null !== $components['host'] ? $components['host'] : ''; |
| 334 | $port = null !== $components['port'] ? ':' . $components['port'] : ''; |
| 335 | $path = $this->get_path( $components['path'] ); |
| 336 | |
| 337 | // Special handling for hostless URLs (typically, filepaths) referencing the current working directory. |
| 338 | if ( '' === $host && ( '' === $path || '.' === $path ) ) { |
| 339 | $path = './'; |
| 340 | } |
| 341 | |
| 342 | $user = null !== $components['user'] ? $components['user'] : ''; |
| 343 | $pass = null !== $components['pass'] ? ':' . $components['pass'] : ''; |
| 344 | $user_pass = ( ! empty( $user ) || ! empty( $pass ) ) ? $user . $pass . '@' : ''; |
| 345 | |
| 346 | $query = null !== $components['query'] ? '?' . $components['query'] : ''; |
| 347 | $fragment = null !== $components['fragment'] ? '#' . $components['fragment'] : ''; |
| 348 | |
| 349 | return $scheme . $user_pass . $host . $port . $path . $query . $fragment; |
| 350 | } |
| 351 | |
| 352 | /** |
| 353 | * Outputs the path. Especially useful if it was a a regular filepath that was passed in originally. |
| 354 | * |
| 355 | * @param string|null $path_override If provided this will be used as the URL path. Does not impact drive letter. |
| 356 | * |
| 357 | * @return string |
| 358 | */ |
| 359 | public function get_path( ?string $path_override = null ): string { |
| 360 | return ( $this->components['drive'] ? $this->components['drive'] . ':' : '' ) . ( $path_override ?? $this->components['path'] ); |
| 361 | } |
| 362 | |
| 363 | /** |
| 364 | * Indicates if the URL or filepath was absolute. |
| 365 | * |
| 366 | * @return bool True if absolute, else false. |
| 367 | */ |
| 368 | public function is_absolute(): bool { |
| 369 | return $this->is_absolute; |
| 370 | } |
| 371 | |
| 372 | /** |
| 373 | * Indicates if the URL or filepath was relative. |
| 374 | * |
| 375 | * @return bool True if relative, else false. |
| 376 | */ |
| 377 | public function is_relative(): bool { |
| 378 | return ! $this->is_absolute; |
| 379 | } |
| 380 | } |
| 381 |