PluginProbe ʕ •ᴥ•ʔ
WP STAGING – WordPress Backup, Restore, Migration & Clone / trunk
WP STAGING – WordPress Backup, Restore, Migration & Clone vtrunk
4.9.1 4.9.0 4.8.1 trunk 3.0.0 3.0.1 3.0.2 3.0.3 3.0.4 3.0.5 3.0.6 3.1.0 3.1.1 3.1.2 3.1.3 3.1.4 3.10.0 3.2.0 3.3.1 3.3.2 3.3.3 3.4.1 3.4.3 3.5.0 3.6.0 3.7.1 3.8.0 3.8.1 3.8.2 3.8.3 3.8.4 3.8.5 3.8.6 3.8.7 3.9.0 3.9.1 3.9.2 3.9.3 3.9.4 4.0.0 4.1.0 4.1.1 4.1.2 4.1.3 4.1.4 4.2.0 4.2.1 4.3.0 4.3.1 4.3.2 4.4.0 4.5.0 4.6.0 4.7.0 4.7.1 4.7.2 4.7.3 4.8.0
wp-staging / Backup / Service / Extractor.php
wp-staging / Backup / Service Last commit date
Compression 4 months ago Database 1 week ago AbstractBackupsFinder.php 1 year ago AbstractExtractor.php 1 week ago AbstractServiceProvider.php 2 years ago Archiver.php 1 week ago BackupAssets.php 1 month ago BackupContent.php 1 year ago BackupMetadataEditor.php 1 year ago BackupMetadataReader.php 5 months ago BackupSigner.php 6 months ago BackupsDirectoryResolver.php 1 week ago BackupsFinder.php 1 week ago Extractor.php 1 week ago FileBackupService.php 1 week ago FileBackupServiceProvider.php 2 years ago ServiceInterface.php 2 years ago TmpBackupCleaner.php 1 week ago ZlibCompressor.php 11 months ago
Extractor.php
607 lines
1 <?php
2
3 /**
4 * Extracts files from WP Staging backup archives to restore sites
5 *
6 * Handles the extraction process for both compressed and uncompressed backups,
7 * including validation, disk space checks, and file restoration with proper permissions.
8 */
9
10 namespace WPStaging\Backup\Service;
11
12 use Exception;
13 use OutOfRangeException;
14 use RuntimeException;
15 use WPStaging\Backup\BackupFileIndex;
16 use WPStaging\Backup\BackupHeader;
17 use WPStaging\Backup\BackupValidator;
18 use WPStaging\Backup\Exceptions\EmptyChunkException;
19 use WPStaging\Backup\FileHeader;
20 use WPStaging\Backup\Interfaces\ExtractorTaskInterface;
21 use WPStaging\Core\WPStaging;
22 use WPStaging\Framework\Adapter\Directory;
23 use WPStaging\Framework\Job\Exception\DiskNotWritableException;
24 use WPStaging\Framework\Job\Exception\FileValidationException;
25 use WPStaging\Framework\Facades\Hooks;
26 use WPStaging\Framework\Filesystem\FileObject;
27 use WPStaging\Framework\Filesystem\DiskWriteCheck;
28 use WPStaging\Framework\Filesystem\MissingFileException;
29 use WPStaging\Framework\Filesystem\PathIdentifier;
30 use WPStaging\Framework\Filesystem\Permissions;
31 use WPStaging\Framework\Job\Dto\JobDataDto;
32 use WPStaging\Framework\Queue\FinishedQueueException;
33 use WPStaging\Framework\Traits\ResourceTrait;
34 use WPStaging\Framework\Traits\RestoreFileExclusionTrait;
35 use WPStaging\Vendor\Psr\Log\LoggerInterface;
36
37 class Extractor extends AbstractExtractor
38 {
39 use ResourceTrait;
40 use RestoreFileExclusionTrait;
41
42 /** @var LoggerInterface */
43 protected $logger;
44
45 /** @var DiskWriteCheck */
46 protected $diskWriteCheck;
47
48 /** @var BackupValidator */
49 protected $backupValidator;
50
51 /** @var ZlibCompressor */
52 protected $zlibCompressor;
53
54 /** @var ExtractorTaskInterface */
55 protected $extractorTask;
56
57 /** @var bool */
58 protected $isRepairMultipleHeadersIssue = false;
59
60 /** @var bool */
61 protected $isFastPerformanceMode = true;
62
63 /** @var bool */
64 protected $isLastRequestGracefulShutdown = true;
65
66 public function __construct(
67 PathIdentifier $pathIdentifier,
68 Directory $directory,
69 DiskWriteCheck $diskWriteCheck,
70 ZlibCompressor $zlibCompressor,
71 BackupValidator $backupValidator,
72 BackupHeader $backupHeader,
73 Permissions $permissions
74 ) {
75 parent::__construct($pathIdentifier, $directory, $backupHeader, $permissions);
76 $this->zlibCompressor = $zlibCompressor;
77 $this->backupValidator = $backupValidator;
78 $this->diskWriteCheck = $diskWriteCheck;
79 }
80
81 /**
82 * @param bool $isBackupFormatV1
83 * @return void
84 */
85 public function setIsBackupFormatV1(bool $isBackupFormatV1)
86 {
87 $this->isBackupFormatV1 = $isBackupFormatV1;
88 if ($isBackupFormatV1) {
89 $this->indexLineDto = new BackupFileIndex();
90 } else {
91 $this->indexLineDto = WPStaging::make(FileHeader::class);
92 }
93 }
94
95 /**
96 * @param bool $isRepairMultipleHeadersIssue
97 * @return void
98 */
99 public function setIsRepairMultipleHeadersIssue(bool $isRepairMultipleHeadersIssue)
100 {
101 $this->isRepairMultipleHeadersIssue = $isRepairMultipleHeadersIssue;
102 }
103
104 public function setIsFastPerformanceMode(bool $isFastPerformanceMode)
105 {
106 $this->isFastPerformanceMode = $isFastPerformanceMode;
107 }
108
109 public function setIsLastRequestGracefulShutdown(bool $isLastRequestGracefulShutdown)
110 {
111 $this->isLastRequestGracefulShutdown = $isLastRequestGracefulShutdown;
112 }
113
114 /**
115 * @param ExtractorTaskInterface $extractorTask
116 * @param LoggerInterface $logger
117 * @return void
118 */
119 public function inject(ExtractorTaskInterface $extractorTask, LoggerInterface $logger)
120 {
121 $this->extractorTask = $extractorTask;
122 $this->logger = $logger;
123 }
124
125 /**
126 * @param bool $isValidateOnly
127 * @return void
128 */
129 public function setIsValidateOnly(bool $isValidateOnly)
130 {
131 $this->isValidateOnly = $isValidateOnly;
132 if ($isValidateOnly) {
133 $this->throwExceptionOnValidationFailure = true;
134 }
135 }
136
137 /**
138 * @return void
139 * @throws DiskNotWritableException
140 */
141 public function execute()
142 {
143 while (!$this->isThreshold()) {
144 try {
145 $this->findFileToExtract();
146 } catch (OutOfRangeException $e) {
147 // Done processing, or failed
148 $this->logger->warning('OutOfRangeException. Error: ' . $e->getMessage());
149 return;
150 } catch (RuntimeException $e) {
151 $this->logger->warning($e->getMessage());
152 continue;
153 } catch (MissingFileException $e) {
154 $this->logger->warning('MissingFileException. Error: ' . $e->getMessage());
155 continue;
156 } catch (Exception $e) {
157 if ($e->getCode() === self::FILE_FILTERED_EXCEPTION_CODE) {
158 continue;
159 }
160
161 if ($e->getCode() === self::FINISHED_QUEUE_EXCEPTION_CODE) {
162 throw new FinishedQueueException();
163 }
164
165 if ($e->getCode() === self::ITEM_SKIP_EXCEPTION_CODE) {
166 continue;
167 }
168
169 throw $e;
170 }
171
172 try {
173 $this->processCurrentFile();
174 } catch (FileValidationException $e) {
175 if ($this->isValidateOnly || $this->throwExceptionOnValidationFailure) {
176 throw $e;
177 }
178
179 $this->logger->warning('Unable to validate file. Error: ' . $e->getMessage());
180 }
181 }
182 }
183
184 /**
185 * @param Exception $ex
186 * @param string $filePath
187 * @return void
188 */
189 protected function throwMissingFileException(Exception $ex, string $filePath)
190 {
191 throw new MissingFileException(sprintf("Following backup part missing: %s", $filePath), 0, $ex);
192 }
193
194 protected function isBigFile(): bool
195 {
196 $sizeToConsiderAsBigFile = Hooks::applyFilters('wpstg.tests.restore.bigFileSize', 10 * MB_IN_BYTES);
197
198 return $this->extractingFile->getTotalBytes() > $sizeToConsiderAsBigFile;
199 }
200
201 protected function cleanExistingFile(string $identifier)
202 {
203 if ($this->isValidateOnly) {
204 return;
205 }
206
207 if ($identifier !== PathIdentifier::IDENTIFIER_UPLOADS || $this->extractingFile->getWrittenBytes() > 0) {
208 return;
209 }
210
211 // Continuation segment of a multipart-split upload file — the existing bytes on disk
212 // are the previous parts' segments and must be preserved so the file can be stitched.
213 if ($this->indexLineDto instanceof FileHeader && $this->indexLineDto->getIsPreviousPartRequired()) {
214 return;
215 }
216
217 if (file_exists($this->extractingFile->getBackupPath())) {
218 // Delete the original upload file
219 if (!unlink($this->extractingFile->getBackupPath())) {
220 throw new \RuntimeException(sprintf(__('Could not delete original media library file %s. Skipping restore of it...', 'wp-staging'), $this->extractingFile->getRelativePath()));
221 }
222 }
223 }
224
225 /**
226 * Fixes issue https://github.com/wp-staging/wp-staging-pro/issues/2861
227 * @return void
228 */
229 protected function maybeRemoveLastAccidentalCharFromLastExtractedFile()
230 {
231 if ($this->backupMetadata->getTotalFiles() !== $this->extractorDto->getTotalFilesExtracted()) {
232 return;
233 }
234
235 if ($this->backupValidator->validateFileIndexFirstLine($this->wpstgFile, $this->backupMetadata)) {
236 return;
237 }
238
239 $this->removeLastCharInExtractedFile();
240 }
241
242 protected function getExtractFolder(string $identifier): string
243 {
244 if ($this->isValidateOnly) {
245 return trailingslashit($this->dirRestore . self::VALIDATE_DIRECTORY);
246 }
247
248 if ($identifier === PathIdentifier::IDENTIFIER_UPLOADS) {
249 return $this->directory->getUploadsDirectory();
250 }
251
252 return $this->dirRestore . $identifier;
253 }
254
255 /**
256 * @return void
257 * @throws DiskNotWritableException
258 */
259 private function processCurrentFile()
260 {
261 $destinationFilePath = $this->extractingFile->getBackupPath();
262 if ($this->currentIdentifier === PathIdentifier::IDENTIFIER_UPLOADS && $this->isExcludedFile($destinationFilePath)) {
263 $this->extractorDto->incrementTotalFilesSkipped();
264 $this->extractorDto->setCurrentIndexOffset($this->wpstgIndexOffsetForNextFile);
265 $this->debugLog('Skipping excluded upload file: ' . rtrim($destinationFilePath, "\n"));
266 return;
267 }
268
269 if ($this->extractingFile->getWrittenBytes() > 0) {
270 $this->logger->debug(sprintf('Resuming extraction of file %s from byte %d. Total size: %d...', $this->extractingFile->getRelativePath(), $this->extractingFile->getWrittenBytes(), $this->extractingFile->getTotalBytes()));
271 }
272
273 $uncompressedSize = $this->indexLineDto->getUncompressedSize();
274 $shouldExtractToMemory = $this->isValidateOnly
275 && !$this->isBackupFormatV1
276 && !$this->isCurrentSegmentedFileHeader()
277 && $this->extractingFile->getWrittenBytes() === 0
278 && $this->extractingFile->getReadBytes() === 0
279 && $this->isWithinMemoryExtractionLimit($uncompressedSize)
280 && Hooks::applyFilters(JobDataDto::FILTER_BACKUP_USE_INMEMORY_EXTRACTION, true);
281 try {
282 if ($this->isThreshold()) {
283 // Prevent considering a file as big just because we start extracting at the threshold
284 return;
285 }
286
287 if ($shouldExtractToMemory) {
288 $this->extractAndValidateInMemory();
289 return;
290 }
291
292 $this->extractFileToDisk();
293 } catch (DiskNotWritableException $e) {
294 // Re-throw
295 throw $e;
296 } catch (OutOfRangeException $e) {
297 // Backup header, should be ignored silently
298 $this->extractingFile->setWrittenBytes($this->extractingFile->getTotalBytes());
299 } catch (Exception $e) {
300 // Set this file as "written", so that we can skip to the next file.
301 $this->extractingFile->setWrittenBytes($this->extractingFile->getTotalBytes());
302
303 if (defined('WPSTG_DEBUG') && WPSTG_DEBUG) {
304 $this->logger->warning(sprintf('Skipped file %s. Reason: %s', $this->extractingFile->getRelativePath(), $e->getMessage()));
305 }
306 }
307
308 if ($this->isFastPerformanceMode) {
309 return;
310 }
311
312 $this->extractorTask->persistDto($this->extractorDto);
313 }
314
315 /**
316 * @return void
317 * @throws DiskNotWritableException
318 * @throws \WPStaging\Framework\Filesystem\FilesystemExceptions
319 */
320 private function fileBatchWrite()
321 {
322 $destinationFilePath = $this->extractingFile->getBackupPath();
323
324 if (strpos($destinationFilePath, '.sql') !== false) {
325 $this->logger->debug(sprintf('DEBUG: Extracting SQL file %s', $destinationFilePath));
326 }
327
328 $this->maybeResetFilePointerAfterInMemoryFallback();
329 wp_mkdir_p(dirname($destinationFilePath));
330
331 /**
332 * On some servers, it is required to create empty file first, so we will create empty files.
333 * On some servers, touch doesn't work consistently, so we will use fwrite, see the reason below.
334 * On sites hosted on SiteGround, creating files using file_puts_contents uses a lot of memory,
335 * so by default we will use fwrite to create the empty file.
336 * If creating the empty file using fwrite fails, let try creating it using file_put_contents
337 * @see https://github.com/wp-staging/wp-staging-pro/issues/3272 why it was needed.
338 */
339 if (!$this->createEmptyFile($destinationFilePath)) {
340 file_put_contents($destinationFilePath, '');
341 }
342
343 $destinationFileResource = @fopen($destinationFilePath, FileObject::MODE_APPEND);
344 if (!$destinationFileResource) {
345 $this->diskWriteCheck->testDiskIsWriteable();
346 throw new Exception("Can not extract file $destinationFilePath");
347 }
348
349 /**
350 * When last request is not graceful shutdown and it is not fast performance mode (i.e. safe performance mode),
351 * we need to set the file pointer to the correct position in the backup file to continue extraction from where it left off.
352 * But this solution only works for non-compressed backups
353 */
354 if (!$this->isLastRequestGracefulShutdown && !$this->isFastPerformanceMode && !$this->extractingFile->getIsCompressed()) {
355 $fileSize = $this->getRecoverableCurrentSegmentBytes($destinationFilePath);
356 $this->wpstgFile->fseek($this->extractingFile->getStart() + $fileSize);
357 $this->extractingFile->setReadBytes($fileSize);
358 $this->extractingFile->setWrittenBytes($fileSize);
359 $this->logger->debug(sprintf('DEBUG: Seeking to byte %d in backup file to continue extraction of %s...', $this->extractingFile->getStart() + $fileSize, $this->extractingFile->getRelativePath()));
360 }
361
362 $lastDebugMessage = '';
363 $processedChunks = 0;
364 while (!$this->extractingFile->isFinished() && !$this->isThreshold()) {
365 $readBytesBefore = $this->wpstgFile->ftell();
366 try {
367 $chunk = $this->readAndPrepareChunk();
368 } catch (DiskNotWritableException $ex) {
369 $this->diskWriteCheck->testDiskIsWriteable();
370 throw new Exception("Unable to extract file to $destinationFilePath. Please check if there is enough disk space available.");
371 }
372
373 if ($chunk === null) {
374 continue;
375 }
376
377 $processedChunks++;
378 $this->updateProgressTracking($processedChunks, $lastDebugMessage);
379 $writtenBytes = $this->writeChunkToFile($destinationFileResource, $chunk);
380
381 $this->trackChunkProgress($readBytesBefore, $writtenBytes);
382 $this->persistDto();
383 }
384
385 if (!empty($lastDebugMessage)) {
386 $this->logger->debug($lastDebugMessage);
387 }
388
389 fclose($destinationFileResource);
390 $destinationFileResource = null;
391 }
392
393 protected function persistDto()
394 {
395 if ($this->isFastPerformanceMode) {
396 return;
397 }
398
399 $this->updateExtractorDto();
400 $this->extractorTask->persistDto($this->extractorDto);
401 }
402
403 /**
404 * @return void
405 * @throws Exception
406 */
407 private function extractFileToDisk()
408 {
409 $this->fileBatchWrite();
410 $isFileExtracted = $this->isExtractingFileExtracted(function ($message) {
411 $this->logger->info($message);
412 });
413
414 if (!$isFileExtracted) {
415 return;
416 }
417
418 $this->validateExtractedFileAndMoveNext();
419 }
420
421 /**
422 * @return string|null
423 * @throws DiskNotWritableException
424 * @throws Exception
425 */
426 private function readAndPrepareChunk()
427 {
428 try {
429 $chunk = $this->zlibCompressor->getService()->readChunk($this->wpstgFile, $this->extractingFile);
430 } catch (EmptyChunkException $ex) {
431 return null;
432 }
433
434 if ($this->isRepairMultipleHeadersIssue) {
435 $chunk = $this->maybeRepairMultipleHeadersIssue($chunk);
436 }
437
438 return $chunk;
439 }
440
441 /**
442 * @return void
443 */
444 private function updateProgressTracking(int $processedChunks, string &$lastDebugMessage)
445 {
446 if ($processedChunks % 200 === 0 || $processedChunks === $this->extractorDto->getTotalChunks()) {
447 $lastDebugMessage = sprintf('DEBUG: Extracting chunk %d/%d', $processedChunks, $this->extractorDto->getTotalChunks());
448 }
449 }
450
451 /**
452 * @param resource $fileResource
453 * @param string $chunk
454 * @return int
455 * @throws DiskNotWritableException
456 */
457 private function writeChunkToFile($fileResource, string $chunk): int
458 {
459 $writtenBytes = fwrite($fileResource, $chunk, (int)$this->getScriptMemoryLimit());
460
461 if ($writtenBytes === false || $writtenBytes <= 0) {
462 fclose($fileResource);
463 throw DiskNotWritableException::diskNotWritable();
464 }
465
466 return $writtenBytes;
467 }
468
469 private function getRecoverableCurrentSegmentBytes(string $destinationFilePath): int
470 {
471 clearstatcache();
472 $diskSize = filesize($destinationFilePath);
473 if ($diskSize === false) {
474 return 0;
475 }
476
477 $writtenBytes = (int) $diskSize;
478 if ($this->indexLineDto instanceof FileHeader && $this->indexLineDto->getIsPreviousPartRequired()) {
479 $writtenBytes -= $this->extractorDto->getExtractorFileBaseBytes();
480 }
481
482 return max(0, min($this->extractingFile->getTotalBytes(), $writtenBytes));
483 }
484
485 /**
486 * @return void
487 */
488 private function trackChunkProgress(int $readBytesBefore, int $chunkSize)
489 {
490 $readBytesAfter = $this->wpstgFile->ftell() - $readBytesBefore;
491 $this->extractingFile->addReadBytes($readBytesAfter);
492 $this->extractingFile->addWrittenBytes($chunkSize);
493 }
494
495 /**
496 * @return void
497 * @throws FileValidationException
498 */
499 private function validateFileContent(string $fileContent, string $pathForErrorLogging)
500 {
501 $actualSize = strlen($fileContent);
502 $expectedSize = $this->indexLineDto->getUncompressedSize();
503 if ($expectedSize !== $actualSize) {
504 throw new FileValidationException(
505 sprintf(
506 'Filesize validation failed for file %s. Expected: %s. Actual: %s',
507 $pathForErrorLogging,
508 $this->formatSize($expectedSize, 2),
509 $this->formatSize($actualSize, 2)
510 )
511 );
512 }
513
514 if (!$this->extractingFile->areHeaderBytesRemoved()) {
515 $crc32Checksum = hash(FileHeader::CRC32_CHECKSUM_ALGO, $fileContent);
516 /** @var FileHeader $fileHeader */
517 $fileHeader = $this->indexLineDto;
518 $expectedChecksum = $fileHeader->getCrc32Checksum();
519 if ($expectedChecksum !== $crc32Checksum) {
520 throw new FileValidationException(
521 sprintf(
522 'CRC32 Checksum validation failed for file %s. Expected: %s. Actual: %s',
523 $pathForErrorLogging,
524 $expectedChecksum,
525 $crc32Checksum
526 )
527 );
528 }
529 } else {
530 $this->debugLog('Skipping validation for file because duplicate file headers were removed: ' . $pathForErrorLogging);
531 }
532 }
533
534 /**
535 * @return void
536 */
537 private function switchFromInMemoryToDiskExtraction(string $pathForErrorLogging)
538 {
539 $this->logger->debug(sprintf(
540 'Threshold reached during in-memory extraction of %s. Switching to disk-based extraction on next request.',
541 $pathForErrorLogging
542 ));
543
544 $this->extractingFile->setWrittenBytes(0);
545 }
546
547 /**
548 * @return void
549 * @throws FileValidationException
550 * @throws Exception
551 */
552 private function extractAndValidateInMemory()
553 {
554 $pathForErrorLogging = $this->pathIdentifier->transformIdentifiableToPath($this->indexLineDto->getIdentifiablePath());
555 $chunks = [];
556 while (!$this->extractingFile->isFinished() && !$this->isThreshold()) {
557 $readBytesBefore = $this->wpstgFile->ftell();
558 $chunk = $this->readAndPrepareChunk();
559 if ($chunk === null) {
560 continue;
561 }
562
563 $chunks[] = $chunk;
564 $this->trackChunkProgress($readBytesBefore, strlen($chunk));
565 }
566
567 if (!$this->extractingFile->isFinished()) {
568 $this->switchFromInMemoryToDiskExtraction($pathForErrorLogging);
569 $this->persistDto();
570 return;
571 }
572
573 $fileContent = implode('', $chunks);
574 $this->validateFileContent($fileContent, $pathForErrorLogging);
575 $this->moveToNextFile();
576 }
577
578 /**
579 * @return void
580 * @throws RuntimeException
581 */
582 private function maybeResetFilePointerAfterInMemoryFallback()
583 {
584 if ($this->extractingFile->getWrittenBytes() !== 0 || $this->extractingFile->getReadBytes() === 0) {
585 return;
586 }
587
588 $this->logger->debug(sprintf(
589 'Starting disk extraction for %s after in-memory fallback (resetting state)',
590 $this->extractingFile->getRelativePath()
591 ));
592
593 $this->extractingFile->setReadBytes(0);
594 $seekResult = $this->wpstgFile->fseek($this->extractingFile->getStart());
595 if ($seekResult !== 0) {
596 $message = sprintf(
597 'Failed to seek backup file to start offset %d for %s during disk extraction fallback.',
598 $this->extractingFile->getStart(),
599 $this->extractingFile->getRelativePath()
600 );
601
602 $this->logger->warning($message);
603 throw new RuntimeException($message);
604 }
605 }
606 }
607