Filters
6 months ago
Scanning
5 years ago
AbstractFileObject.php
1 year ago
AbstractFilesystemScanner.php
2 months ago
DebugLogReader.php
2 years ago
DirectoryListing.php
5 months ago
DiskWriteCheck.php
5 months ago
FileObject.php
1 year ago
Filesystem.php
6 months ago
FilesystemExceptions.php
5 years ago
FilesystemScanner.php
2 months ago
FilesystemScannerDto.php
1 year ago
FilterableDirectoryIterator.php
1 year ago
LogCleanup.php
5 months ago
LogFiles.php
1 year ago
MissingFileException.php
3 years ago
OPcache.php
5 months ago
PartIdentifier.php
8 months ago
PathChecker.php
2 years ago
PathIdentifier.php
6 months ago
Permissions.php
5 months ago
WpUploadsFolderSymlinker.php
2 months ago
FilesystemScanner.php
520 lines
| 1 | <?php |
| 2 | |
| 3 | namespace WPStaging\Framework\Filesystem; |
| 4 | |
| 5 | use OutOfBoundsException; |
| 6 | use RuntimeException; |
| 7 | use SplFileInfo; |
| 8 | use Throwable; |
| 9 | use WPStaging\Core\WPStaging; |
| 10 | use WPStaging\Framework\Adapter\Directory; |
| 11 | use WPStaging\Framework\Job\Exception\DiskNotWritableException; |
| 12 | use WPStaging\Framework\Queue\FinishedQueueException; |
| 13 | use WPStaging\Framework\Queue\SeekableQueueInterface; |
| 14 | use WPStaging\Framework\SiteInfo; |
| 15 | use WPStaging\Framework\Utils\PluginInfo; |
| 16 | use WPStaging\Vendor\Psr\Log\LoggerInterface; |
| 17 | |
| 18 | class FilesystemScanner extends AbstractFilesystemScanner |
| 19 | { |
| 20 | /** @var SeekableQueueInterface */ |
| 21 | protected $filesystemQueue; |
| 22 | |
| 23 | /** @var SeekableQueueInterface */ |
| 24 | protected $taskQueue; |
| 25 | |
| 26 | /** @var LoggerInterface */ |
| 27 | protected $logger; |
| 28 | |
| 29 | /** @var FilesystemScannerDto */ |
| 30 | protected $scannerDto; |
| 31 | |
| 32 | /** @var string */ |
| 33 | protected $logTitle = ''; |
| 34 | |
| 35 | /** @var string */ |
| 36 | protected $queueCacheName = ''; |
| 37 | |
| 38 | /** @var int */ |
| 39 | protected $ignoreFileBiggerThan = 0; |
| 40 | |
| 41 | /** @var array */ |
| 42 | protected $ignoreFileExtensions = []; |
| 43 | |
| 44 | /** @var array */ |
| 45 | protected $ignoreFileExtensionFilesBiggerThan = []; |
| 46 | |
| 47 | /** @var bool */ |
| 48 | protected $isSiteHostedOnWordPressCom = false; |
| 49 | |
| 50 | /** @var array */ |
| 51 | protected $folderNameRules = []; |
| 52 | |
| 53 | /** @var array */ |
| 54 | protected $fileNameRules = []; |
| 55 | |
| 56 | /** |
| 57 | * @param Directory $directory |
| 58 | * @param PathIdentifier $pathIdentifier |
| 59 | * @param Filesystem $filesystem |
| 60 | * @param PluginInfo $pluginInfo |
| 61 | * @param SiteInfo $siteInfo |
| 62 | * @param SeekableQueueInterface $filesystemQueue |
| 63 | */ |
| 64 | public function __construct( |
| 65 | Directory $directory, |
| 66 | PathIdentifier $pathIdentifier, |
| 67 | Filesystem $filesystem, |
| 68 | PluginInfo $pluginInfo, |
| 69 | SiteInfo $siteInfo, |
| 70 | SeekableQueueInterface $filesystemQueue |
| 71 | ) { |
| 72 | parent::__construct($directory, $pathIdentifier, $filesystem, $pluginInfo); |
| 73 | $this->isSiteHostedOnWordPressCom = $siteInfo->isHostedOnWordPressCom(); |
| 74 | $this->filesystemQueue = $filesystemQueue; |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * @param int $ignoreFileBiggerThan |
| 79 | * @param array $ignoreFileExtensions |
| 80 | * @param array $ignoreFileExtensionFilesBiggerThan |
| 81 | * @return void |
| 82 | */ |
| 83 | public function setFilters(int $ignoreFileBiggerThan, array $ignoreFileExtensions, array $ignoreFileExtensionFilesBiggerThan) |
| 84 | { |
| 85 | $this->ignoreFileBiggerThan = $ignoreFileBiggerThan; |
| 86 | $this->ignoreFileExtensions = $ignoreFileExtensions; |
| 87 | $this->ignoreFileExtensionFilesBiggerThan = $ignoreFileExtensionFilesBiggerThan; |
| 88 | } |
| 89 | |
| 90 | /** |
| 91 | * Set the rules to exclude folders and files according to their names. |
| 92 | * |
| 93 | * @param array $folderNameRules |
| 94 | * @param array $fileNameRules |
| 95 | */ |
| 96 | public function setNameExcludeRules(array $folderNameRules, array $fileNameRules) |
| 97 | { |
| 98 | $this->folderNameRules = $folderNameRules; |
| 99 | $this->fileNameRules = $fileNameRules; |
| 100 | } |
| 101 | |
| 102 | /** |
| 103 | * @return void |
| 104 | */ |
| 105 | public function setupFilesystemQueue() |
| 106 | { |
| 107 | $fileBackupQueueCacheName = $this->queueCacheName . '_' . $this->currentPathScanning; |
| 108 | $this->filesystemQueue->setup($fileBackupQueueCacheName, SeekableQueueInterface::MODE_WRITE); |
| 109 | } |
| 110 | |
| 111 | /** |
| 112 | * @param string $logTitle |
| 113 | * @return void |
| 114 | */ |
| 115 | public function setLogTitle(string $logTitle) |
| 116 | { |
| 117 | $this->logTitle = $logTitle; |
| 118 | } |
| 119 | |
| 120 | /** |
| 121 | * @param string $queueCacheName |
| 122 | * @return void |
| 123 | */ |
| 124 | public function setQueueCacheName(string $queueCacheName) |
| 125 | { |
| 126 | $this->queueCacheName = $queueCacheName; |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * @param LoggerInterface $logger |
| 131 | * @param SeekableQueueInterface $taskQueue |
| 132 | * @param FilesystemScannerDto $scannerDto |
| 133 | * @return void |
| 134 | */ |
| 135 | public function inject(LoggerInterface $logger, SeekableQueueInterface $taskQueue, FilesystemScannerDto $scannerDto) |
| 136 | { |
| 137 | $this->logger = $logger; |
| 138 | $this->taskQueue = $taskQueue; |
| 139 | $this->scannerDto = $scannerDto; |
| 140 | } |
| 141 | |
| 142 | public function getFilesystemScannerDto(): FilesystemScannerDto |
| 143 | { |
| 144 | return $this->scannerDto; |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * @return void |
| 149 | */ |
| 150 | public function unlockQueue() |
| 151 | { |
| 152 | $this->filesystemQueue->shutdown(); |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * @return void |
| 157 | * @throws FinishedQueueException |
| 158 | * @throws DiskNotWritableException |
| 159 | */ |
| 160 | public function processQueue() |
| 161 | { |
| 162 | try { |
| 163 | $path = $this->taskQueue->dequeue(); |
| 164 | if ($path === null) { |
| 165 | throw new FinishedQueueException('Directory Scanner Queue is Finished'); |
| 166 | } |
| 167 | |
| 168 | $this->processPath($path); |
| 169 | } catch (FinishedQueueException $ex) { |
| 170 | try { |
| 171 | WPStaging::make(DiskWriteCheck::class)->checkPathCanStoreEnoughBytes($this->directory->getPluginUploadsDirectory(), $this->scannerDto->getFilesystemSize()); |
| 172 | } catch (DiskNotWritableException $e) { |
| 173 | throw $e; |
| 174 | } catch (RuntimeException $e) { |
| 175 | // soft error, no action needed, but log |
| 176 | $this->logger->debug($e->getMessage()); |
| 177 | } |
| 178 | |
| 179 | throw $ex; |
| 180 | } catch (OutOfBoundsException $e) { |
| 181 | $this->logger->debug($e->getMessage()); |
| 182 | } catch (Throwable $e) { |
| 183 | $this->logger->warning($e->getMessage()); |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | /** |
| 188 | * @return void |
| 189 | */ |
| 190 | protected function preRecursivePathScanningStep() |
| 191 | { |
| 192 | $this->setupFilesystemQueue(); |
| 193 | } |
| 194 | |
| 195 | /** |
| 196 | * @param SplFileInfo $file |
| 197 | * @param string $linkPath |
| 198 | * @return void |
| 199 | * @throws FinishedQueueException |
| 200 | */ |
| 201 | protected function processFile(SplFileInfo $file, string $linkPath = '') |
| 202 | { |
| 203 | $normalizedPath = $this->filesystem->normalizePath($file->getPathname(), !$file->isFile()); |
| 204 | $fileSize = $file->getSize(); |
| 205 | $fileExtension = $file->getExtension(); |
| 206 | |
| 207 | if ($this->isExcludeByFileNameRule($file->getFilename())) { |
| 208 | return; |
| 209 | } |
| 210 | |
| 211 | // Lazy-build relative path |
| 212 | $relativePath = str_replace($this->filesystem->normalizePath($this->rootPath, true), '', $normalizedPath); |
| 213 | |
| 214 | // Exclude wp-content/debug.log to prevent checksum failures caused by new log entries during backup |
| 215 | $normalizedDebugPath = $this->filesystem->normalizePath($this->contentPath . '/debug.log'); |
| 216 | if ($normalizedPath === $normalizedDebugPath) { |
| 217 | $this->logger->notice(sprintf( |
| 218 | '%s: Skipped file "%s". Excluded by rule.', |
| 219 | esc_html($this->logTitle), |
| 220 | esc_html($relativePath) |
| 221 | )); |
| 222 | return; |
| 223 | } |
| 224 | |
| 225 | if ($this->canExcludeLogFile($fileExtension) || $this->canExcludeCacheFile($fileExtension) || isset($this->ignoreFileExtensions[$fileExtension])) { |
| 226 | // Early bail: File has an ignored extension |
| 227 | $this->logger->notice(sprintf( |
| 228 | '%s: Skipped file: "%s". Extension: "%s" is excluded by rule.', |
| 229 | esc_html($this->logTitle), |
| 230 | esc_html($relativePath), |
| 231 | esc_html($fileExtension) |
| 232 | )); |
| 233 | |
| 234 | return; |
| 235 | } |
| 236 | |
| 237 | if (isset($this->ignoreFileExtensionFilesBiggerThan[$fileExtension])) { |
| 238 | if ($fileSize > $this->ignoreFileExtensionFilesBiggerThan[$fileExtension]) { |
| 239 | // Early bail: File bigger than expected for given extension |
| 240 | $this->logger->notice(sprintf( |
| 241 | '%s: Skipped file "%s" (%s). It exceeds the maximum allowed file size for files with the extension "%s" (%s).', |
| 242 | esc_html($this->logTitle), |
| 243 | esc_html($relativePath), |
| 244 | size_format($fileSize), |
| 245 | esc_html($fileExtension), |
| 246 | size_format($this->ignoreFileExtensionFilesBiggerThan[$fileExtension]) |
| 247 | )); |
| 248 | |
| 249 | return; |
| 250 | } |
| 251 | } elseif ($fileSize > $this->ignoreFileBiggerThan) { |
| 252 | // Early bail: File is larger than max allowed size. |
| 253 | $this->logger->notice(sprintf( |
| 254 | '%s: Skipped file "%s" (%s). It exceeds the maximum file size (%s).', |
| 255 | esc_html($this->logTitle), |
| 256 | esc_html($relativePath), |
| 257 | size_format($fileSize), |
| 258 | size_format($this->ignoreFileBiggerThan) |
| 259 | )); |
| 260 | |
| 261 | return; |
| 262 | } |
| 263 | |
| 264 | $this->scannerDto->incrementDiscoveredFiles(); |
| 265 | $this->scannerDto->incrementDiscoveredFilesByCategory($this->currentPathScanning); |
| 266 | $this->scannerDto->addFilesystemSize($fileSize); |
| 267 | |
| 268 | if (!empty($linkPath)) { |
| 269 | $linkPath = $this->filesystem->normalizePath($linkPath, true); |
| 270 | $relativePath = $this->replaceEOLsWithPlaceholders($relativePath); |
| 271 | $path = rtrim($relativePath, '/') . self::PATH_SEPARATOR . rtrim($linkPath, '/'); |
| 272 | $this->filesystemQueue->enqueue($path); |
| 273 | return; |
| 274 | } |
| 275 | |
| 276 | $relativePath = $this->replaceEOLsWithPlaceholders($relativePath); |
| 277 | $this->filesystemQueue->enqueue(rtrim($relativePath, '/')); |
| 278 | } |
| 279 | |
| 280 | /** |
| 281 | * @param SplFileInfo $dir |
| 282 | * @param SplFileInfo|null $link |
| 283 | * @return void |
| 284 | */ |
| 285 | protected function processDirectory(SplFileInfo $dir, $link = null) |
| 286 | { |
| 287 | if ($this->isUploadsYearMonthDirectory($dir)) { |
| 288 | $this->preScanPath($dir->getPathname()); |
| 289 | return; |
| 290 | } |
| 291 | |
| 292 | $normalizedPath = $this->filesystem->normalizePath($dir->getPathname(), true); |
| 293 | |
| 294 | if ($this->isExcludedDirectory($dir->getPathname()) || $this->canExcludeCacheDir($dir)) { |
| 295 | return; |
| 296 | } |
| 297 | |
| 298 | if ($link !== null && $this->isExcludedDirectory($link->getPathname())) { |
| 299 | return; |
| 300 | } |
| 301 | |
| 302 | if ($link !== null) { |
| 303 | $linkPath = $this->filesystem->normalizePath($link->getPathname(), true); |
| 304 | $this->taskQueue->enqueue($this->currentPathScanning . self::PATH_SEPARATOR . $normalizedPath . self::PATH_SEPARATOR . $linkPath); |
| 305 | return; |
| 306 | } |
| 307 | |
| 308 | // we need to know |
| 309 | $this->taskQueue->enqueue($this->currentPathScanning . self::PATH_SEPARATOR . $normalizedPath); |
| 310 | } |
| 311 | |
| 312 | /** |
| 313 | * @param string $path |
| 314 | * @return bool |
| 315 | */ |
| 316 | protected function isExcludedDirectory(string $path): bool |
| 317 | { |
| 318 | $normalizedPath = $this->filesystem->normalizePath($path, true); |
| 319 | |
| 320 | if (in_array($normalizedPath, $this->scannerDto->getExcludedDirectories())) { |
| 321 | $relativePathForLogging = str_replace($this->filesystem->normalizePath($this->contentPath, true), '', $normalizedPath); |
| 322 | |
| 323 | $this->logger->notice(sprintf( |
| 324 | '%s: Skipped directory "%s". Excluded by rule', |
| 325 | esc_html($this->logTitle), |
| 326 | esc_html($relativePathForLogging) |
| 327 | )); |
| 328 | |
| 329 | return true; |
| 330 | } |
| 331 | |
| 332 | return false; |
| 333 | } |
| 334 | |
| 335 | /** |
| 336 | * RecursivePathScanning method extended to include exclude filter and directory increment |
| 337 | * @inheritdoc |
| 338 | */ |
| 339 | protected function recursivePathScanning(string $path, string $link = '') |
| 340 | { |
| 341 | if ($this->isExcludedDirectory($path)) { |
| 342 | return; |
| 343 | } |
| 344 | |
| 345 | $this->scannerDto->incrementTotalDirectories(); |
| 346 | |
| 347 | parent::recursivePathScanning($path, $link); |
| 348 | } |
| 349 | |
| 350 | /** |
| 351 | * @param \SplFileInfo $dir |
| 352 | * @return bool |
| 353 | */ |
| 354 | protected function isUploadsYearMonthDirectory(SplFileInfo $dir): bool |
| 355 | { |
| 356 | if ($this->currentPathScanning !== PartIdentifier::UPLOAD_PART_IDENTIFIER) { |
| 357 | return false; |
| 358 | } |
| 359 | |
| 360 | $parentDir = $dir->getPathInfo(); |
| 361 | if ($parentDir === false) { |
| 362 | return false; |
| 363 | } |
| 364 | |
| 365 | if ($this->filesystem->normalizePath($parentDir->getPathname(), true) !== $this->directory->getUploadsDirectory()) { |
| 366 | return false; |
| 367 | } |
| 368 | |
| 369 | /** |
| 370 | * This is a default WordPress year-month uploads folder. |
| 371 | * |
| 372 | * Here we break down the uploads folder by months, considering it's often the largest folder in a website, |
| 373 | * and we need to be able to scan each folder in one request. |
| 374 | */ |
| 375 | return is_numeric($dir->getBasename()) && $dir->getBasename() > 1970 && $dir->getBasename() < 2100; |
| 376 | } |
| 377 | |
| 378 | protected function isExcludeByFileNameRule(string $fileName): bool |
| 379 | { |
| 380 | if (empty($this->fileNameRules)) { |
| 381 | return false; |
| 382 | } |
| 383 | |
| 384 | foreach ($this->fileNameRules as $rule) { |
| 385 | if ($this->ruleMatch($rule, $fileName)) { |
| 386 | $this->logger->info(sprintf( |
| 387 | '%s: Skipped file "%s". Excluded by file name rule: "%s".', |
| 388 | esc_html($this->logTitle), |
| 389 | esc_html($fileName), |
| 390 | esc_html($rule) |
| 391 | )); |
| 392 | |
| 393 | return true; |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | return false; |
| 398 | } |
| 399 | |
| 400 | protected function isExcludeByFolderNameRule(string $folderName): bool |
| 401 | { |
| 402 | if (empty($this->folderNameRules)) { |
| 403 | return false; |
| 404 | } |
| 405 | |
| 406 | foreach ($this->folderNameRules as $rule) { |
| 407 | if ($this->ruleMatch($rule, $folderName)) { |
| 408 | $this->logger->info(sprintf( |
| 409 | '%s: Skipped directory "%s". Excluded by folder name rule: "%s".', |
| 410 | esc_html($this->logTitle), |
| 411 | esc_html($folderName), |
| 412 | esc_html($rule) |
| 413 | )); |
| 414 | |
| 415 | return true; |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | return false; |
| 420 | } |
| 421 | |
| 422 | /** |
| 423 | * @param string $fileExtension |
| 424 | * @return bool |
| 425 | */ |
| 426 | private function canExcludeLogFile(string $fileExtension): bool |
| 427 | { |
| 428 | if ($fileExtension !== 'log') { |
| 429 | return false; |
| 430 | } |
| 431 | |
| 432 | if (!$this->scannerDto->getIsExcludingLogs()) { |
| 433 | return false; |
| 434 | } |
| 435 | |
| 436 | return true; |
| 437 | } |
| 438 | |
| 439 | /** |
| 440 | * @param string $fileExtension |
| 441 | * @return bool |
| 442 | */ |
| 443 | private function canExcludeCacheFile(string $fileExtension): bool |
| 444 | { |
| 445 | if ($fileExtension !== 'cache') { |
| 446 | return false; |
| 447 | } |
| 448 | |
| 449 | if (!$this->scannerDto->getIsExcludingCaches()) { |
| 450 | return false; |
| 451 | } |
| 452 | |
| 453 | return true; |
| 454 | } |
| 455 | |
| 456 | /** |
| 457 | * @param SplFileInfo $dir |
| 458 | * @return bool |
| 459 | */ |
| 460 | private function canExcludeCacheDir(SplFileInfo $dir): bool |
| 461 | { |
| 462 | if (!$dir->isDir()) { |
| 463 | return false; |
| 464 | } |
| 465 | |
| 466 | if (!$this->scannerDto->getIsExcludingCaches()) { |
| 467 | return false; |
| 468 | } |
| 469 | |
| 470 | if (!$this->isPathContainsCache($dir->getRealPath())) { |
| 471 | return false; |
| 472 | } |
| 473 | |
| 474 | $this->logger->notice(sprintf( |
| 475 | '%s: Skipped directory "%s". Excluded by smart exclusion rule: Excluding cache folder.', |
| 476 | esc_html($this->logTitle), |
| 477 | esc_html($dir->getRealPath()) |
| 478 | )); |
| 479 | |
| 480 | return true; |
| 481 | } |
| 482 | |
| 483 | /** |
| 484 | * Check if "cache" is one of the directory names. |
| 485 | * |
| 486 | * @param string $path |
| 487 | * @return bool |
| 488 | */ |
| 489 | private function isPathContainsCache(string $path): bool |
| 490 | { |
| 491 | $pathParts = explode(DIRECTORY_SEPARATOR, $path); |
| 492 | |
| 493 | return in_array('cache', $pathParts); |
| 494 | } |
| 495 | |
| 496 | private function ruleMatch(string $rule, string $name): bool |
| 497 | { |
| 498 | $rule = trim($rule); |
| 499 | if (strpos($rule, ' ') === false) { |
| 500 | // Malformed rule, treat as no match |
| 501 | return false; |
| 502 | } |
| 503 | |
| 504 | list($ruleType, $ruleValue) = explode(' ', $rule, 2); |
| 505 | switch ($ruleType) { |
| 506 | case 'name_contains': |
| 507 | return strpos($name, $ruleValue) !== false; |
| 508 | case 'name_begins_with': |
| 509 | return strpos($name, $ruleValue) === 0; |
| 510 | case 'name_ends_with': |
| 511 | return substr($name, -strlen($ruleValue)) === $ruleValue; |
| 512 | case 'name_exact_matches': |
| 513 | return $name === $ruleValue; |
| 514 | default: |
| 515 | // Unknown rule type, treat as no match |
| 516 | return false; |
| 517 | } |
| 518 | } |
| 519 | } |
| 520 |