PluginProbe ʕ •ᴥ•ʔ
WP STAGING – WordPress Backup, Restore, Migration & Clone / 4.8.1
WP STAGING – WordPress Backup, Restore, Migration & Clone v4.8.1
4.9.1 4.9.0 4.8.1 trunk 3.0.0 3.0.1 3.0.2 3.0.3 3.0.4 3.0.5 3.0.6 3.1.0 3.1.1 3.1.2 3.1.3 3.1.4 3.10.0 3.2.0 3.3.1 3.3.2 3.3.3 3.4.1 3.4.3 3.5.0 3.6.0 3.7.1 3.8.0 3.8.1 3.8.2 3.8.3 3.8.4 3.8.5 3.8.6 3.8.7 3.9.0 3.9.1 3.9.2 3.9.3 3.9.4 4.0.0 4.1.0 4.1.1 4.1.2 4.1.3 4.1.4 4.2.0 4.2.1 4.3.0 4.3.1 4.3.2 4.4.0 4.5.0 4.6.0 4.7.0 4.7.1 4.7.2 4.7.3 4.8.0
wp-staging / Backup / Service / Extractor.php
wp-staging / Backup / Service Last commit date
Compression 4 months ago Database 1 month ago AbstractBackupsFinder.php 1 year ago AbstractExtractor.php 3 months ago AbstractServiceProvider.php 2 years ago Archiver.php 1 month ago BackupAssets.php 1 month ago BackupContent.php 1 year ago BackupMetadataEditor.php 1 year ago BackupMetadataReader.php 5 months ago BackupSigner.php 6 months ago BackupsFinder.php 4 months ago Extractor.php 3 months ago FileBackupService.php 6 months ago FileBackupServiceProvider.php 2 years ago ServiceInterface.php 2 years ago ZlibCompressor.php 11 months ago
Extractor.php
584 lines
1 <?php
2
3 /**
4 * Extracts files from WP Staging backup archives to restore sites
5 *
6 * Handles the extraction process for both compressed and uncompressed backups,
7 * including validation, disk space checks, and file restoration with proper permissions.
8 */
9
10 namespace WPStaging\Backup\Service;
11
12 use Exception;
13 use OutOfRangeException;
14 use RuntimeException;
15 use WPStaging\Backup\BackupFileIndex;
16 use WPStaging\Backup\BackupHeader;
17 use WPStaging\Backup\BackupValidator;
18 use WPStaging\Backup\Exceptions\EmptyChunkException;
19 use WPStaging\Backup\FileHeader;
20 use WPStaging\Backup\Interfaces\ExtractorTaskInterface;
21 use WPStaging\Core\WPStaging;
22 use WPStaging\Framework\Adapter\Directory;
23 use WPStaging\Framework\Job\Exception\DiskNotWritableException;
24 use WPStaging\Framework\Job\Exception\FileValidationException;
25 use WPStaging\Framework\Facades\Hooks;
26 use WPStaging\Framework\Filesystem\FileObject;
27 use WPStaging\Framework\Filesystem\DiskWriteCheck;
28 use WPStaging\Framework\Filesystem\MissingFileException;
29 use WPStaging\Framework\Filesystem\PathIdentifier;
30 use WPStaging\Framework\Filesystem\Permissions;
31 use WPStaging\Framework\Job\Dto\JobDataDto;
32 use WPStaging\Framework\Queue\FinishedQueueException;
33 use WPStaging\Framework\Traits\ResourceTrait;
34 use WPStaging\Framework\Traits\RestoreFileExclusionTrait;
35 use WPStaging\Vendor\Psr\Log\LoggerInterface;
36
37 class Extractor extends AbstractExtractor
38 {
39 use ResourceTrait;
40 use RestoreFileExclusionTrait;
41
42 /** @var LoggerInterface */
43 protected $logger;
44
45 /** @var DiskWriteCheck */
46 protected $diskWriteCheck;
47
48 /** @var BackupValidator */
49 protected $backupValidator;
50
51 /** @var ZlibCompressor */
52 protected $zlibCompressor;
53
54 /** @var ExtractorTaskInterface */
55 protected $extractorTask;
56
57 /** @var bool */
58 protected $isRepairMultipleHeadersIssue = false;
59
60 /** @var bool */
61 protected $isFastPerformanceMode = true;
62
63 /** @var bool */
64 protected $isLastRequestGracefulShutdown = true;
65
66 public function __construct(
67 PathIdentifier $pathIdentifier,
68 Directory $directory,
69 DiskWriteCheck $diskWriteCheck,
70 ZlibCompressor $zlibCompressor,
71 BackupValidator $backupValidator,
72 BackupHeader $backupHeader,
73 Permissions $permissions
74 ) {
75 parent::__construct($pathIdentifier, $directory, $backupHeader, $permissions);
76 $this->zlibCompressor = $zlibCompressor;
77 $this->backupValidator = $backupValidator;
78 $this->diskWriteCheck = $diskWriteCheck;
79 }
80
81 /**
82 * @param bool $isBackupFormatV1
83 * @return void
84 */
85 public function setIsBackupFormatV1(bool $isBackupFormatV1)
86 {
87 $this->isBackupFormatV1 = $isBackupFormatV1;
88 if ($isBackupFormatV1) {
89 $this->indexLineDto = new BackupFileIndex();
90 } else {
91 $this->indexLineDto = WPStaging::make(FileHeader::class);
92 }
93 }
94
95 /**
96 * @param bool $isRepairMultipleHeadersIssue
97 * @return void
98 */
99 public function setIsRepairMultipleHeadersIssue(bool $isRepairMultipleHeadersIssue)
100 {
101 $this->isRepairMultipleHeadersIssue = $isRepairMultipleHeadersIssue;
102 }
103
104 public function setIsFastPerformanceMode(bool $isFastPerformanceMode)
105 {
106 $this->isFastPerformanceMode = $isFastPerformanceMode;
107 }
108
109 public function setIsLastRequestGracefulShutdown(bool $isLastRequestGracefulShutdown)
110 {
111 $this->isLastRequestGracefulShutdown = $isLastRequestGracefulShutdown;
112 }
113
114 /**
115 * @param ExtractorTaskInterface $extractorTask
116 * @param LoggerInterface $logger
117 * @return void
118 */
119 public function inject(ExtractorTaskInterface $extractorTask, LoggerInterface $logger)
120 {
121 $this->extractorTask = $extractorTask;
122 $this->logger = $logger;
123 }
124
125 /**
126 * @param bool $isValidateOnly
127 * @return void
128 */
129 public function setIsValidateOnly(bool $isValidateOnly)
130 {
131 $this->isValidateOnly = $isValidateOnly;
132 if ($isValidateOnly) {
133 $this->throwExceptionOnValidationFailure = true;
134 }
135 }
136
137 /**
138 * @return void
139 * @throws DiskNotWritableException
140 */
141 public function execute()
142 {
143 while (!$this->isThreshold()) {
144 try {
145 $this->findFileToExtract();
146 } catch (OutOfRangeException $e) {
147 // Done processing, or failed
148 $this->logger->warning('OutOfRangeException. Error: ' . $e->getMessage());
149 return;
150 } catch (RuntimeException $e) {
151 $this->logger->warning($e->getMessage());
152 continue;
153 } catch (MissingFileException $e) {
154 $this->logger->warning('MissingFileException. Error: ' . $e->getMessage());
155 continue;
156 } catch (Exception $e) {
157 if ($e->getCode() === self::FILE_FILTERED_EXCEPTION_CODE) {
158 continue;
159 }
160
161 if ($e->getCode() === self::FINISHED_QUEUE_EXCEPTION_CODE) {
162 throw new FinishedQueueException();
163 }
164
165 if ($e->getCode() === self::ITEM_SKIP_EXCEPTION_CODE) {
166 continue;
167 }
168
169 throw $e;
170 }
171
172 try {
173 $this->processCurrentFile();
174 } catch (FileValidationException $e) {
175 if ($this->isValidateOnly || $this->throwExceptionOnValidationFailure) {
176 throw $e;
177 }
178
179 $this->logger->warning('Unable to validate file. Error: ' . $e->getMessage());
180 }
181 }
182 }
183
184 /**
185 * @param Exception $ex
186 * @param string $filePath
187 * @return void
188 */
189 protected function throwMissingFileException(Exception $ex, string $filePath)
190 {
191 throw new MissingFileException(sprintf("Following backup part missing: %s", $filePath), 0, $ex);
192 }
193
194 protected function isBigFile(): bool
195 {
196 $sizeToConsiderAsBigFile = Hooks::applyFilters('wpstg.tests.restore.bigFileSize', 10 * MB_IN_BYTES);
197
198 return $this->extractingFile->getTotalBytes() > $sizeToConsiderAsBigFile;
199 }
200
201 protected function cleanExistingFile(string $identifier)
202 {
203 if ($this->isValidateOnly) {
204 return;
205 }
206
207 if ($identifier !== PathIdentifier::IDENTIFIER_UPLOADS || $this->extractingFile->getWrittenBytes() > 0) {
208 return;
209 }
210
211 if (file_exists($this->extractingFile->getBackupPath())) {
212 // Delete the original upload file
213 if (!unlink($this->extractingFile->getBackupPath())) {
214 throw new \RuntimeException(sprintf(__('Could not delete original media library file %s. Skipping restore of it...', 'wp-staging'), $this->extractingFile->getRelativePath()));
215 }
216 }
217 }
218
219 /**
220 * Fixes issue https://github.com/wp-staging/wp-staging-pro/issues/2861
221 * @return void
222 */
223 protected function maybeRemoveLastAccidentalCharFromLastExtractedFile()
224 {
225 if ($this->backupMetadata->getTotalFiles() !== $this->extractorDto->getTotalFilesExtracted()) {
226 return;
227 }
228
229 if ($this->backupValidator->validateFileIndexFirstLine($this->wpstgFile, $this->backupMetadata)) {
230 return;
231 }
232
233 $this->removeLastCharInExtractedFile();
234 }
235
236 protected function getExtractFolder(string $identifier): string
237 {
238 if ($this->isValidateOnly) {
239 return trailingslashit($this->dirRestore . self::VALIDATE_DIRECTORY);
240 }
241
242 if ($identifier === PathIdentifier::IDENTIFIER_UPLOADS) {
243 return $this->directory->getUploadsDirectory();
244 }
245
246 return $this->dirRestore . $identifier;
247 }
248
249 /**
250 * @return void
251 * @throws DiskNotWritableException
252 */
253 private function processCurrentFile()
254 {
255 $destinationFilePath = $this->extractingFile->getBackupPath();
256 if ($this->currentIdentifier === PathIdentifier::IDENTIFIER_UPLOADS && $this->isExcludedFile($destinationFilePath)) {
257 $this->extractorDto->incrementTotalFilesSkipped();
258 $this->extractorDto->setCurrentIndexOffset($this->wpstgIndexOffsetForNextFile);
259 $this->debugLog('Skipping excluded upload file: ' . rtrim($destinationFilePath, "\n"));
260 return;
261 }
262
263 if ($this->extractingFile->getWrittenBytes() > 0) {
264 $this->logger->debug(sprintf('Resuming extraction of file %s from byte %d. Total size: %d...', $this->extractingFile->getRelativePath(), $this->extractingFile->getWrittenBytes(), $this->extractingFile->getTotalBytes()));
265 }
266
267 $uncompressedSize = $this->indexLineDto->getUncompressedSize();
268 $shouldExtractToMemory = $this->isValidateOnly
269 && !$this->isBackupFormatV1
270 && $this->extractingFile->getWrittenBytes() === 0
271 && $this->extractingFile->getReadBytes() === 0
272 && $this->isWithinMemoryExtractionLimit($uncompressedSize)
273 && Hooks::applyFilters(JobDataDto::FILTER_BACKUP_USE_INMEMORY_EXTRACTION, true);
274 try {
275 if ($this->isThreshold()) {
276 // Prevent considering a file as big just because we start extracting at the threshold
277 return;
278 }
279
280 if ($shouldExtractToMemory) {
281 $this->extractAndValidateInMemory();
282 return;
283 }
284
285 $this->extractFileToDisk();
286 } catch (DiskNotWritableException $e) {
287 // Re-throw
288 throw $e;
289 } catch (OutOfRangeException $e) {
290 // Backup header, should be ignored silently
291 $this->extractingFile->setWrittenBytes($this->extractingFile->getTotalBytes());
292 } catch (Exception $e) {
293 // Set this file as "written", so that we can skip to the next file.
294 $this->extractingFile->setWrittenBytes($this->extractingFile->getTotalBytes());
295
296 if (defined('WPSTG_DEBUG') && WPSTG_DEBUG) {
297 $this->logger->warning(sprintf('Skipped file %s. Reason: %s', $this->extractingFile->getRelativePath(), $e->getMessage()));
298 }
299 }
300
301 if ($this->isFastPerformanceMode) {
302 return;
303 }
304
305 $this->extractorTask->persistDto($this->extractorDto);
306 }
307
308 /**
309 * @return void
310 * @throws DiskNotWritableException
311 * @throws \WPStaging\Framework\Filesystem\FilesystemExceptions
312 */
313 private function fileBatchWrite()
314 {
315 $destinationFilePath = $this->extractingFile->getBackupPath();
316
317 if (strpos($destinationFilePath, '.sql') !== false) {
318 $this->logger->debug(sprintf('DEBUG: Extracting SQL file %s', $destinationFilePath));
319 }
320
321 $this->maybeResetFilePointerAfterInMemoryFallback();
322 wp_mkdir_p(dirname($destinationFilePath));
323
324 /**
325 * On some servers, it is required to create empty file first, so we will create empty files.
326 * On some servers, touch doesn't work consistently, so we will use fwrite, see the reason below.
327 * On sites hosted on SiteGround, creating files using file_puts_contents uses a lot of memory,
328 * so by default we will use fwrite to create the empty file.
329 * If creating the empty file using fwrite fails, let try creating it using file_put_contents
330 * @see https://github.com/wp-staging/wp-staging-pro/issues/3272 why it was needed.
331 */
332 if (!$this->createEmptyFile($destinationFilePath)) {
333 file_put_contents($destinationFilePath, '');
334 }
335
336 $destinationFileResource = @fopen($destinationFilePath, FileObject::MODE_APPEND);
337 if (!$destinationFileResource) {
338 $this->diskWriteCheck->testDiskIsWriteable();
339 throw new Exception("Can not extract file $destinationFilePath");
340 }
341
342 /**
343 * When last request is not graceful shutdown and it is not fast performance mode (i.e. safe performance mode),
344 * we need to set the file pointer to the correct position in the backup file to continue extraction from where it left off.
345 * But this solution only works for non-compressed backups
346 */
347 if (!$this->isLastRequestGracefulShutdown && !$this->isFastPerformanceMode && !$this->extractingFile->getIsCompressed()) {
348 $fileSize = filesize($destinationFilePath);
349 $this->wpstgFile->fseek($this->extractingFile->getStart() + $fileSize);
350 $this->extractingFile->setReadBytes($fileSize);
351 $this->extractingFile->setWrittenBytes($fileSize);
352 $this->logger->debug(sprintf('DEBUG: Seeking to byte %d in backup file to continue extraction of %s...', $this->extractingFile->getStart() + $fileSize, $this->extractingFile->getRelativePath()));
353 }
354
355 $lastDebugMessage = '';
356 $processedChunks = 0;
357 while (!$this->extractingFile->isFinished() && !$this->isThreshold()) {
358 $readBytesBefore = $this->wpstgFile->ftell();
359 try {
360 $chunk = $this->readAndPrepareChunk();
361 } catch (DiskNotWritableException $ex) {
362 $this->diskWriteCheck->testDiskIsWriteable();
363 throw new Exception("Unable to extract file to $destinationFilePath. Please check if there is enough disk space available.");
364 }
365
366 if ($chunk === null) {
367 continue;
368 }
369
370 $processedChunks++;
371 $this->updateProgressTracking($processedChunks, $lastDebugMessage);
372 $writtenBytes = $this->writeChunkToFile($destinationFileResource, $chunk);
373
374 $this->trackChunkProgress($readBytesBefore, $writtenBytes);
375 $this->persistDto();
376 }
377
378 if (!empty($lastDebugMessage)) {
379 $this->logger->debug($lastDebugMessage);
380 }
381
382 fclose($destinationFileResource);
383 $destinationFileResource = null;
384 }
385
386 protected function persistDto()
387 {
388 if ($this->isFastPerformanceMode) {
389 return;
390 }
391
392 $this->updateExtractorDto();
393 $this->extractorTask->persistDto($this->extractorDto);
394 }
395
396 /**
397 * @return void
398 * @throws Exception
399 */
400 private function extractFileToDisk()
401 {
402 $this->fileBatchWrite();
403 $isFileExtracted = $this->isExtractingFileExtracted(function ($message) {
404 $this->logger->info($message);
405 });
406
407 if (!$isFileExtracted) {
408 return;
409 }
410
411 $this->validateExtractedFileAndMoveNext();
412 }
413
414 /**
415 * @return string|null
416 * @throws DiskNotWritableException
417 * @throws Exception
418 */
419 private function readAndPrepareChunk()
420 {
421 try {
422 $chunk = $this->zlibCompressor->getService()->readChunk($this->wpstgFile, $this->extractingFile);
423 } catch (EmptyChunkException $ex) {
424 return null;
425 }
426
427 if ($this->isRepairMultipleHeadersIssue) {
428 $chunk = $this->maybeRepairMultipleHeadersIssue($chunk);
429 }
430
431 return $chunk;
432 }
433
434 /**
435 * @return void
436 */
437 private function updateProgressTracking(int $processedChunks, string &$lastDebugMessage)
438 {
439 if ($processedChunks % 200 === 0 || $processedChunks === $this->extractorDto->getTotalChunks()) {
440 $lastDebugMessage = sprintf('DEBUG: Extracting chunk %d/%d', $processedChunks, $this->extractorDto->getTotalChunks());
441 }
442 }
443
444 /**
445 * @param resource $fileResource
446 * @param string $chunk
447 * @return int
448 * @throws DiskNotWritableException
449 */
450 private function writeChunkToFile($fileResource, string $chunk): int
451 {
452 $writtenBytes = fwrite($fileResource, $chunk, (int)$this->getScriptMemoryLimit());
453
454 if ($writtenBytes === false || $writtenBytes <= 0) {
455 fclose($fileResource);
456 throw DiskNotWritableException::diskNotWritable();
457 }
458
459 return $writtenBytes;
460 }
461
462 /**
463 * @return void
464 */
465 private function trackChunkProgress(int $readBytesBefore, int $chunkSize)
466 {
467 $readBytesAfter = $this->wpstgFile->ftell() - $readBytesBefore;
468 $this->extractingFile->addReadBytes($readBytesAfter);
469 $this->extractingFile->addWrittenBytes($chunkSize);
470 }
471
472 /**
473 * @return void
474 * @throws FileValidationException
475 */
476 private function validateFileContent(string $fileContent, string $pathForErrorLogging)
477 {
478 $actualSize = strlen($fileContent);
479 $expectedSize = $this->indexLineDto->getUncompressedSize();
480 if ($expectedSize !== $actualSize) {
481 throw new FileValidationException(
482 sprintf(
483 'Filesize validation failed for file %s. Expected: %s. Actual: %s',
484 $pathForErrorLogging,
485 $this->formatSize($expectedSize, 2),
486 $this->formatSize($actualSize, 2)
487 )
488 );
489 }
490
491 if (!$this->extractingFile->areHeaderBytesRemoved()) {
492 $crc32Checksum = hash(FileHeader::CRC32_CHECKSUM_ALGO, $fileContent);
493 /** @var FileHeader $fileHeader */
494 $fileHeader = $this->indexLineDto;
495 $expectedChecksum = $fileHeader->getCrc32Checksum();
496 if ($expectedChecksum !== $crc32Checksum) {
497 throw new FileValidationException(
498 sprintf(
499 'CRC32 Checksum validation failed for file %s. Expected: %s. Actual: %s',
500 $pathForErrorLogging,
501 $expectedChecksum,
502 $crc32Checksum
503 )
504 );
505 }
506 } else {
507 $this->debugLog('Skipping validation for file because duplicate file headers were removed: ' . $pathForErrorLogging);
508 }
509 }
510
511 /**
512 * @return void
513 */
514 private function switchFromInMemoryToDiskExtraction(string $pathForErrorLogging)
515 {
516 $this->logger->debug(sprintf(
517 'Threshold reached during in-memory extraction of %s. Switching to disk-based extraction on next request.',
518 $pathForErrorLogging
519 ));
520
521 $this->extractingFile->setWrittenBytes(0);
522 }
523
524 /**
525 * @return void
526 * @throws FileValidationException
527 * @throws Exception
528 */
529 private function extractAndValidateInMemory()
530 {
531 $pathForErrorLogging = $this->pathIdentifier->transformIdentifiableToPath($this->indexLineDto->getIdentifiablePath());
532 $chunks = [];
533 while (!$this->extractingFile->isFinished() && !$this->isThreshold()) {
534 $readBytesBefore = $this->wpstgFile->ftell();
535 $chunk = $this->readAndPrepareChunk();
536 if ($chunk === null) {
537 continue;
538 }
539
540 $chunks[] = $chunk;
541 $this->trackChunkProgress($readBytesBefore, strlen($chunk));
542 }
543
544 if (!$this->extractingFile->isFinished()) {
545 $this->switchFromInMemoryToDiskExtraction($pathForErrorLogging);
546 $this->persistDto();
547 return;
548 }
549
550 $fileContent = implode('', $chunks);
551 $this->validateFileContent($fileContent, $pathForErrorLogging);
552 $this->moveToNextFile();
553 }
554
555 /**
556 * @return void
557 * @throws RuntimeException
558 */
559 private function maybeResetFilePointerAfterInMemoryFallback()
560 {
561 if ($this->extractingFile->getWrittenBytes() !== 0 || $this->extractingFile->getReadBytes() === 0) {
562 return;
563 }
564
565 $this->logger->debug(sprintf(
566 'Starting disk extraction for %s after in-memory fallback (resetting state)',
567 $this->extractingFile->getRelativePath()
568 ));
569
570 $this->extractingFile->setReadBytes(0);
571 $seekResult = $this->wpstgFile->fseek($this->extractingFile->getStart());
572 if ($seekResult !== 0) {
573 $message = sprintf(
574 'Failed to seek backup file to start offset %d for %s during disk extraction fallback.',
575 $this->extractingFile->getStart(),
576 $this->extractingFile->getRelativePath()
577 );
578
579 $this->logger->warning($message);
580 throw new RuntimeException($message);
581 }
582 }
583 }
584