PluginProbe ʕ •ᴥ•ʔ
Media Cleaner: Clean your WordPress! / 7.0.1
Media Cleaner: Clean your WordPress! v7.0.1
7.1.1 7.1.0 7.0.9 7.0.8 trunk 3.6.8 3.6.9 3.7.0 3.8.0 3.9.0 4.0.0 4.0.2 4.0.4 4.0.6 4.0.7 4.1.0 4.2.0 4.2.2 4.2.3 4.2.4 4.2.5 4.4.0 4.4.2 4.4.4 4.4.6 4.4.7 4.4.8 4.5.0 4.5.4 4.5.6 4.5.7 4.5.8 4.6.2 4.6.3 4.8.0 4.8.4 5.0.0 5.0.1 5.1.0 5.1.1 5.1.3 5.2.0 5.2.1 5.2.4 5.4.0 5.4.1 5.4.2 5.4.3 5.4.4 5.4.5 5.4.6 5.4.9 5.5.0 5.5.1 5.5.2 5.5.3 5.5.4 5.5.7 5.5.8 5.6.1 5.6.2 5.6.3 5.6.4 6.0.1 6.0.2 6.0.3 6.0.4 6.0.5 6.0.6 6.0.7 6.0.8 6.0.9 6.1.2 6.1.3 6.1.4 6.1.5 6.1.6 6.1.7 6.1.8 6.1.9 6.2.0 6.2.1 6.2.3 6.2.4 6.2.5 6.2.6 6.2.7 6.2.8 6.3.0 6.3.1 6.3.2 6.3.4 6.3.5 6.3.7 6.3.8 6.3.9 6.4.0 6.4.1 6.4.2 6.4.3 6.4.4 6.4.5 6.4.6 6.4.7 6.4.8 6.4.9 6.5.0 6.5.1 6.5.2 6.5.3 6.5.4 6.5.5 6.5.6 6.5.7 6.5.8 6.5.9 6.6.1 6.6.2 6.6.3 6.6.4 6.6.5 6.6.6 6.6.7 6.6.8 6.6.9 6.7.0 6.7.1 6.7.2 6.7.3 6.7.4 6.7.5 6.7.6 6.7.7 6.7.8 6.7.9 6.8.0 6.8.1 6.8.2 6.8.3 6.8.4 6.8.5 6.8.6 6.8.7 6.8.8 6.8.9 6.9.0 6.9.1 6.9.2 6.9.3 6.9.4 6.9.5 6.9.6 6.9.7 6.9.8 6.9.9 7.0.0 7.0.1 7.0.2 7.0.3 7.0.4 7.0.5 7.0.6 7.0.7
media-cleaner / classes / core.php
media-cleaner / classes Last commit date
parsers 9 months ago admin.php 7 months ago core.php 6 months ago engine.php 6 months ago init.php 7 months ago parsers.php 10 months ago rest.php 6 months ago support.php 6 months ago ui.php 3 years ago
core.php
2473 lines
1 <?php
2
3 class Meow_WPMC_Core {
4
5
6 public $admin = null;
7 public $is_rest = false;
8 public $is_cli = false;
9 public $is_pro = false;
10 public $engine = null;
11 public $catch_timeout = true; // This will halt the plugin before reaching the PHP timeout.
12 public $types = "jpg|jpeg|jpe|gif|png|tiff|bmp|csv|svg|pdf|xls|xlsx|doc|docx|odt|wpd|rtf|tiff|mp3|mp4|mov|wav|lua|webp|avif|ico";
13 public $current_method = 'media';
14 public $servername = null; // meowapps.com (site URL without http/https)
15 public $site_url = null; // https://meowapps.com
16 public $upload_path = null; // /www/wp-content/uploads (path to uploads)
17 public $upload_url = null; // wp-content/uploads (uploads without domain)
18 private $option_name = 'wpmc_options';
19 private $nonce = null; // Nonce for the REST API
20
21 private $regex_file = '/[A-Za-z0-9-_,.\(\)\s]+[.]{1}(MIMETYPES)/';
22
23 private $refcache = array();
24 private $use_cached_references = false;
25 private $progress_key = 'wpmc_progress';
26 private $cached_ids_key = 'wpmc_cached_ids';
27 private $cached_urls_key = 'wpmc_cached_urls';
28
29 private $cached_ids_cli = array();
30 private $cached_urls_cli = array();
31
32 private $check_content = null;
33 private $debug_logs = null;
34 private $multilingual = false;
35 private $languages = array();
36 private $shortcode_analysis = false;
37
38 public function get_shortcode_analysis() {
39 return $this->shortcode_analysis;
40 }
41
42 public function __construct() {
43 add_action( 'plugins_loaded', array( $this, 'plugins_loaded' ) );
44 add_action( 'init', array( $this, 'init' ) );
45 add_action( 'delete_attachment', array( $this, 'delete_attachment_related_data' ), 10, 1 );
46 add_action( 'trashed_post', array( $this, 'delete_attachment_related_data' ), 10, 1 );
47 }
48
49 function plugins_loaded() {
50
51
52 if ( is_admin() ) {
53 new Meow_WPMC_UI( $this );
54 }
55
56 // Admin
57 $this->admin = new Meow_WPMC_Admin( $this );
58
59 // Advanced core
60 if ( class_exists( 'MeowPro_WPMC_Core' ) ) {
61 new MeowPro_WPMC_Core( $this );
62 }
63
64 // Only initialize variables if we are on a relevant screen
65 $pages = [ 'wpmc_dashboard', 'wpmc_settings' ];
66 $page = isset( $_GET["page"] ) ? sanitize_text_field( $_GET["page"] ) : null;
67 $is_wpmc_screen = in_array( $page, $pages );
68
69 // Check if this is a REST request specifically for Media Cleaner
70 $is_wpmc_rest = false;
71 if ( MeowKit_WPMC_Helpers::is_rest() ) {
72 $request_uri = isset( $_SERVER['REQUEST_URI'] ) ? $_SERVER['REQUEST_URI'] : '';
73 $is_wpmc_rest = strpos( $request_uri, '/media-cleaner/v1' ) !== false;
74 }
75
76 $shouldLoad = ( defined( 'WP_CLI' ) && WP_CLI ) || $is_wpmc_screen || $is_wpmc_rest;
77
78 if ( ! $shouldLoad ) {
79 return;
80 }
81
82
83 // Variables
84 $this->site_url = get_site_url();
85 $this->multilingual = $this->is_multilingual();
86 $this->languages = $this->get_languages();
87 $this->current_method = $this->get_option( 'method' );
88 $this->regex_file = str_replace( "MIMETYPES", $this->types, $this->regex_file );
89 $this->servername = str_replace( 'http://', '', str_replace( 'https://', '', $this->site_url ) );
90 $uploaddir = wp_upload_dir();
91 $this->upload_path = $uploaddir['basedir'];
92 $this->upload_url = substr( $uploaddir['baseurl'], strlen( $this->site_url ) );
93 $this->check_content = $this->get_option( 'content' );
94 $this->debug_logs = $this->get_option( 'debuglogs' );
95 $this->is_rest = MeowKit_WPMC_Helpers::is_rest();
96 $this->is_cli = defined( 'WP_CLI' ) && WP_CLI;
97 $this->shortcode_analysis = !$this->get_option( 'shortcodes_disabled' );
98 $this->use_cached_references = $this->get_option( 'use_cached_references' );
99
100 global $wpmc;
101 $wpmc = $this;
102
103 // Language
104 load_plugin_textdomain( WPMC_DOMAIN, false, basename( WPMC_PATH ) . '/languages' );
105
106 // Install hooks and engine only if they might be used
107 if ( is_admin() || $this->is_rest || $this->is_cli ) {
108 add_action( 'wpmc_initialize_parsers', array( $this, 'initialize_parsers' ), 10, 0 );
109 add_filter( 'wp_unique_filename', array( $this, 'wp_unique_filename' ), 10, 3 );
110 $this->engine = new Meow_WPMC_Engine( $this, $this->admin );
111 }
112
113 // Only for REST
114 if ( $this->is_rest ) {
115 new Meow_WPMC_Rest( $this, $this->admin );
116 }
117
118
119 }
120
121 function init() {
122 remove_action( 'wp_scheduled_delete', 'wp_scheduled_delete' );
123 }
124
125 public function get_nonce( $force = false ) {
126 if ( !$force && !is_user_logged_in() ) {
127 return null;
128 }
129 if ( isset( $this->nonce ) ) {
130 return $this->nonce;
131 }
132
133 $this->nonce = wp_create_nonce( 'wp_rest' );
134 return $this->nonce;
135 }
136
137 function initialize_parsers() {
138 include_once( 'parsers.php' );
139 new Meow_WPMC_Parsers();
140 }
141
142 function deepsleep( $seconds ) {
143 $start_time = time();
144 while( true ) {
145 if ( ( time() - $start_time ) > $seconds ) {
146 return false;
147 }
148 get_post( array( 'posts_per_page' => 50 ) );
149 }
150 }
151
152 private $start_time;
153 private $time_elapsed = 0;
154 private $time_remaining = 0;
155 private $item_scan_avg_time = 0;
156 private $wordpress_init_time = 0.5;
157 private $max_execution_time;
158 private $items_checked = 0;
159 private $items_count = 0;
160
161 function get_max_execution_time() {
162 if ( isset( $this->max_execution_time ) )
163 return $this->max_execution_time;
164
165 $this->max_execution_time = ini_get( "max_execution_time" );
166 if ( empty( $this->max_execution_time ) || $this->max_execution_time < 5 )
167 $this->max_execution_time = 30;
168
169 return $this->max_execution_time;
170 }
171
172 function timeout_check_start( $count ) {
173 $this->start_time = time();
174 $this->items_count = $count;
175 $this->get_max_execution_time();
176 }
177
178 function timeout_get_elapsed() {
179 return $this->time_elapsed . 'ms';
180 }
181
182 function timeout_check() {
183 $this->time_elapsed = time() - $this->start_time;
184 $this->time_remaining = $this->max_execution_time - $this->wordpress_init_time - $this->time_elapsed;
185 if ( $this->catch_timeout ) {
186 if ( $this->time_remaining - $this->item_scan_avg_time < 0 ) {
187 error_log("Media Cleaner Timeout! Check the Media Cleaner logs for more info.");
188 $this->log( "😵 Timeout! Some info for debug:" );
189 $this->log( "🍀 Elapsed time: $this->time_elapsed" );
190 $this->log( "🍀 WP init time: $this->wordpress_init_time" );
191 $this->log( "🍀 Remaining time: $this->time_remaining" );
192 $this->log( "🍀 Scan time per item: $this->item_scan_avg_time" );
193 $this->log( "🍀 PHP max_execution_time: $this->max_execution_time" );
194 header("HTTP/1.0 408 Request Timeout");
195 exit;
196 }
197 }
198 }
199
200 function delete_attachment_related_data( $post_id ) {
201 global $wpdb;
202 $table_name = $wpdb->prefix . "mclean_scan";
203 $wpdb->query( $wpdb->prepare( "DELETE FROM $table_name WHERE postId = %d", $post_id ) );
204 }
205
206 function timeout_check_additem() {
207 $this->items_checked++;
208 $this->time_elapsed = time() - $this->start_time;
209 $this->item_scan_avg_time = ceil( ( $this->time_elapsed / $this->items_checked ) * 10 ) / 10;
210 }
211
212 // This checks if a new uploaded filename isn't the same one as a currently
213 // filename in the trash (that would cause issues)
214 function wp_unique_filename( $filename, $ext, $dir ) {
215 $fullpath = trailingslashit( $dir ) . $filename;
216 $relativepath = $this->clean_uploaded_filename( $fullpath );
217 $trashfilepath = trailingslashit( $this->get_trashdir() ) . $relativepath;
218 if ( file_exists( $trashfilepath ) ) {
219 $path_parts = pathinfo( $fullpath );
220 $filename_noext = $path_parts['filename'];
221 $new_filename = $filename_noext . '-' . date('Ymd-His', time()) . '.' . $path_parts['extension'];
222 //error_log( 'POTENTIALLY TRASH PATH: ' . $trashfilepath );
223 //error_log( 'POTENTIALLY NEW FILE: ' . $new_filename );
224 return $new_filename;
225 }
226 return $filename;
227 }
228
229 function array_to_ids_or_urls( $meta, &$ids, &$urls, $recursive = false, $filters = array() ) {
230 foreach ( $meta as $k => $m ) {
231
232 if ( is_numeric( $m ) ) {
233
234 if ( !empty( $filters ) && is_array( $filters ) && !in_array( $k, $filters ) ) {
235 continue;
236 }
237
238 // Probably a Media ID
239 if ( $m > 0 )
240 {
241 array_push( $ids, $m );
242 }
243 }
244
245 else if ( is_array( $m ) ) {
246
247
248 if ( $recursive ) {
249 // If it's an array, we need to go deeper
250 $this->array_to_ids_or_urls( $m, $ids, $urls, true, $filters );
251 }
252
253 }
254 else if ( !empty( $m ) ) {
255
256 if ( !empty( $filters ) && is_array( $filters ) && !in_array( $k, $filters ) ) {
257 continue;
258 }
259
260 if ( is_string( $m ) && preg_match( '/^[\d\s,]+$/', $m ) && strpos( $m, ',' ) !== false ) {
261 // If this is a string that contains only digits, spaces, and commas, and contains at least one comma
262 // it is probably a list of IDs. So we should explode it to make an array
263 // Remove any spaces
264
265 $m = str_replace( ' ', '', $m );
266 $m = explode( ',', $m );
267
268 foreach ( $m as $mv ) {
269 if ( is_numeric( $mv ) && !in_array( (int)$mv, $ids ) ) {
270 array_push( $ids, (int)$mv );
271 }
272 }
273
274 continue;
275 }
276
277 // If it's a string, maybe it's a file (with an extension)
278 if ( preg_match( $this->regex_file, $m ) )
279 {
280 $clean_url = $this->clean_url( $m );
281 array_push( $urls, $clean_url );
282 }
283 }
284 }
285 }
286
287 function get_favicon() {
288 // Yoast SEO plugin
289 $vals = get_option( 'wpseo_titles' );
290 if ( !empty( $vals ) && isset( $vals['company_logo'] ) ) {
291 $url = $vals['company_logo'];
292 if ( $this->is_url( $url ) )
293 return $this->clean_url( $url );
294 }
295 }
296
297 function get_all_shortcodes_attributes( $html, $ids_attr = array(), $urls_attr = array() ) {
298 // Get all the shortcodes from html, and check for each attributes of the shortcode if it is an ID or a URL and add the value in an array to return
299 $urls_values = array();
300 $ids_values = array();
301
302 $pattern = get_shortcode_regex();
303 if ( preg_match_all( '/'. $pattern .'/s', $html, $matches ) )
304 {
305 foreach( $matches[0] as $key => $value) {
306 // $matches[3] return the shortcode attribute as string
307 // replace space with '&' for parse_str() function
308 $get = str_replace(" ", "&" , trim( $matches[3][$key] ) );
309 $get = str_replace('"', '' , $get );
310 parse_str( $get, $sub_output );
311
312 foreach ( $sub_output as $attr_key => $attr_value ) {
313
314 if ( in_array( $attr_key, $ids_attr ) ) {
315 if ( is_numeric( $attr_value ) && !in_array( (int)$attr_value, $ids_values ) ) {
316 array_push( $ids_values, (int)$attr_value );
317 }
318
319 // In case of separated by commas
320 else if ( strpos( $attr_value, ',' ) !== false ) {
321 $attr_value = str_replace(' ', '', $attr_value );
322 $pieces = explode( ',', $attr_value );
323 foreach ( $pieces as $pval ) {
324 if ( is_numeric( $pval ) && !in_array( (int)$pval, $ids_values ) ) {
325 array_push( $ids_values, (int)$pval );
326 }
327 }
328 }
329 }
330
331 else if ( in_array( $attr_key, $urls_attr ) ) {
332 if ( !empty( trim( $attr_value ) ) && !in_array( trim( $attr_value ), $urls_values ) && !is_numeric( trim( $attr_value ) ) && strpos( trim( $attr_value ), 'http' ) !== false ) {
333 array_push( $urls_values, trim( $this->clean_url( $attr_value ) ) );
334 }
335 }
336 }
337 }
338 }
339
340 // Remove duplicates
341 $urls_values = array_unique( $urls_values );
342 $ids_values = array_unique( $ids_values );
343
344 // Return the values
345 $values = array(
346 'urls' => $urls_values,
347 'ids' => $ids_values
348 );
349
350 return $values;
351
352 }
353
354
355
356 /**
357 * Recursively transforms a string with WordPress shortcodes into a
358 * hierarchical tree structure (an Abstract Syntax Tree).
359 *
360 * @param string $content The string containing the shortcodes.
361 * @return array An array of nodes, where each node can be a shortcode with its
362 * own 'children' array, or a simple text node.
363 */
364 function nested_shortcodes_to_array(string $content): array
365 {
366 $nodes = [];
367 $last_pos = 0;
368
369 $pattern = '/\\[' . '(\\[?)' . '([\w-]+)' . '(?![\\w-])' . '(' . '[^\\]\\/]*' . '(?:' . '\\/(?!\\])' . '[^\\]\\/]*' . ')*?' . ')' . '(?:' . '(\\/)' . '\\]' . '|' . '\\]' . '(?:' . '(' . '[^\\[]*+' . '(?:' . '\\[(?!\\/\\2\\])' . '[^\\[]*+' . ')*+' . ')' . '\\[\\/\\2\\]' . ')?' . ')' . '(\\]?)/s';
370
371 // preg_match_all with PREG_OFFSET_CAPTURE is key to tracking positions.
372 if (preg_match_all($pattern, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
373 foreach ($matches as $match) {
374 // Get the position and content of the full shortcode match
375 $match_start_pos = $match[0][1];
376 $match_full_string = $match[0][0];
377 $match_end_pos = $match_start_pos + strlen($match_full_string);
378
379 // 1. Capture any text that appeared *before* this shortcode
380 if ($match_start_pos > $last_pos) {
381 $text_content = substr($content, $last_pos, $match_start_pos - $last_pos);
382 if (trim($text_content) !== '') {
383 $nodes[] = [
384 'type' => 'text',
385 'content' => $text_content
386 ];
387 }
388 }
389
390 // 2. Process the shortcode match itself
391 $tag = $match[2][0];
392 $attributes_string = $match[3][0];
393 // Use isset since self-closing tags won't have inner content (group 5)
394 $inner_content = isset($match[5]) ? $match[5][0] : null;
395
396 // Parse attributes from the attribute string
397 $parsed_attributes = [];
398 if (preg_match_all('/([\w-]+)\s*=\s*(["\'])([^"\']*?)\2/', $attributes_string, $attr_matches)) {
399 foreach ($attr_matches[1] as $attr_index => $key) {
400 $parsed_attributes[$key] = $attr_matches[3][$attr_index];
401 }
402 }
403
404 $shortcode_node = [
405 'type' => 'shortcode',
406 'tag' => $tag,
407 'attributes' => $parsed_attributes,
408 ];
409
410 // 3. This is the recursion!
411 // If there is inner content, parse it with the same function.
412 if ($inner_content !== null) {
413 $children = $this->nested_shortcodes_to_array($inner_content);
414 if (!empty($children)) {
415 $shortcode_node['children'] = $children;
416 }
417 }
418
419 $nodes[] = $shortcode_node;
420
421 // Update the last position to the end of the current match
422 $last_pos = $match_end_pos;
423 }
424 }
425
426 // 4. Capture any remaining text after the very last shortcode
427 if ($last_pos < strlen($content)) {
428 $text_content = substr($content, $last_pos);
429 if (trim($text_content) !== '') {
430 $nodes[] = [
431 'type' => 'text',
432 'content' => $text_content
433 ];
434 }
435 }
436
437 return $nodes;
438 }
439
440
441
442
443 function get_shortcode_attributes( $shortcode_tag, $post ) {
444 if ( has_shortcode( $post->post_content, $shortcode_tag ) ) {
445 $output = array();
446 //get shortcode regex pattern wordpress function
447 $pattern = get_shortcode_regex( [ $shortcode_tag ] );
448 if ( preg_match_all( '/'. $pattern .'/s', $post->post_content, $matches ) )
449 {
450 $keys = array();
451 $output = array();
452 foreach( $matches[0] as $key => $value) {
453 // $matches[3] return the shortcode attribute as string
454 // replace space with '&' for parse_str() function
455 $get = str_replace(" ", "&" , trim( $matches[3][$key] ) );
456 $get = str_replace('"', '' , $get );
457 parse_str( $get, $sub_output );
458
459 //get all shortcode attribute keys
460 $keys = array_unique( array_merge( $keys, array_keys( $sub_output )) );
461 $output[] = $sub_output;
462 }
463 if ( $keys && $output ) {
464 // Loop the output array and add the missing shortcode attribute key
465 foreach ($output as $key => $value) {
466 // Loop the shortcode attribute key
467 foreach ($keys as $attr_key) {
468 $output[$key][$attr_key] = isset( $output[$key] ) && isset( $output[$key] ) ? $output[$key][$attr_key] : NULL;
469 }
470 //sort the array key
471 ksort( $output[$key]);
472 }
473 }
474 }
475 return $output;
476 }
477 else {
478 return false;
479 }
480 }
481
482 // SImply use regex to get URLs from a string return an array of URLs
483 function get_urls_from_string( $string ) {
484 $urls = array();
485 $pattern = '/https?:\/\/[^\s"\']+/i';
486 if ( preg_match_all( $pattern, $string, $matches ) ) {
487 foreach ( $matches[0] as $match ) {
488 $clean_url = $this->clean_url( $match );
489 array_push( $urls, $clean_url );
490 }
491 }
492 return $urls;
493 }
494
495 function get_urls_from_html( $html ) {
496 if ( empty( $html ) ) {
497 return array();
498 }
499
500
501 // Proposal/fix by @copytrans
502 // Discussion: https://wordpress.org/support/topic/bug-in-core-php/#post-11647775
503 // Modified by Jordy again in 2021 for those who don't have MB enabled
504 if ( function_exists( 'mb_encode_numericentity' ) ) {
505 $convmap = [0x80, 0xffff, 0, 0xffff];
506 $html = mb_encode_numericentity( $html, $convmap, 'UTF-8' );
507 } else {
508 $html = preg_replace_callback(
509 '/[\x80-\xFF]/',
510 function( $match ) {
511 return '&#' . ord( $match[0] ) . ';';
512 },
513 $html
514 );
515 }
516
517 // Resolve src-set and shortcodes
518 if ( $this->get_shortcode_analysis() ) {
519 $html = do_shortcode( $html );
520 }
521
522 // TODO: Since WP 5.5, wp_filter_content_tags should be used instead of wp_make_content_images_responsive.
523 $html = function_exists( 'wp_filter_content_tags' ) ? wp_filter_content_tags( $html ) :
524 wp_make_content_images_responsive( $html );
525
526 // Create the DOM Document
527 if ( !class_exists("DOMDocument") ) {
528 error_log( 'Media Cleaner: The DOM extension for PHP is not installed.' );
529 throw new Error( 'The DOM extension for PHP is not installed.' );
530 }
531
532
533 if ( empty( $html ) ) {
534 return array();
535 }
536
537 libxml_use_internal_errors(true);
538 $dom = new DOMDocument();
539 @$dom->loadHTML( $html );
540 libxml_clear_errors();
541 $results = array();
542
543 // <meta> tags in <head> area
544 $metas = $dom->getElementsByTagName( 'meta' );
545 foreach ( $metas as $meta ) {
546 $property = $meta->getAttribute( 'property' );
547 if ( $property == 'og:image' || $property == 'og:image:secure_url' || $property == 'twitter:image' ) {
548 $url = $meta->getAttribute( 'content' );
549 if ( $this->is_url( $url ) ) {
550 $src = $this->clean_url( $url );
551 if ( !empty( $src ) ) {
552 array_push( $results, $src );
553 }
554 }
555 }
556 }
557
558
559
560 // IFrames (by Mike Meinz)
561 $iframes = $dom->getElementsByTagName( 'iframe' );
562 foreach( $iframes as $iframe ) {
563 $iframe_src = $iframe->getAttribute( 'src' );
564 // Ignore if the iframe src is not on this server
565 if ( ( strpos( $iframe_src, $this->servername ) !== false) || ( substr( $iframe_src, 0, 1 ) == "/" ) ) {
566 // Create a new DOM Document to hold iframe
567 $iframe_doc = new DOMDocument();
568 // Load the url's contents into the DOM
569 libxml_use_internal_errors( true ); // ignore html formatting problems
570 $rslt = @$iframe_doc->loadHTMLFile( $iframe_src );
571 libxml_clear_errors();
572 libxml_use_internal_errors( false );
573 if ( $rslt ) {
574 // Get the resulting html
575 $iframe_html = $iframe_doc->saveHTML();
576 if ( $iframe_html !== false ) {
577 // Scan for links in the iframe
578 $iframe_urls = $this->get_urls_from_html( $iframe_html ); // Recursion
579 if ( !empty( $iframe_urls ) ) {
580 $results = array_merge( $results, $iframe_urls );
581 }
582 }
583 }
584 else {
585 $this->log( '🚫 Failed to load iframe: ' . $iframe_src );
586 }
587 }
588 }
589
590
591 // Images: src, srcset
592 $imgs = $dom->getElementsByTagName( 'img' );
593 foreach ( $imgs as $img ) {
594 //error_log($img->getAttribute('src'));
595 $src = $this->clean_url( $img->getAttribute('src') );
596 array_push( $results, $src );
597 $srcset = $img->getAttribute('srcset');
598 if ( !empty( $srcset ) ) {
599 $setImgs = explode( ',', trim( $srcset ) );
600 foreach ( $setImgs as $setImg ) {
601 $finalSetImg = explode( ' ', trim( $setImg ) );
602 if ( is_array( $finalSetImg ) ) {
603 array_push( $results, $this->clean_url( $finalSetImg[0] ) );
604 }
605 }
606 }
607 }
608
609 // Videos: src, poster, and attached file
610 $videos = $dom->getElementsByTagName( 'video' );
611 foreach ($videos as $video) {
612 // Get src attribute
613 $raw_video_src = $video->getAttribute( 'src' );
614 $src = $this->clean_url( $raw_video_src );
615 if ( !empty( $src ) ) {
616 $video_id = $this->custom_attachment_url_to_postid( $raw_video_src );
617
618 $attached_file = get_post_meta( $video_id, '_wp_attached_file', true );
619 if ( !empty( $attached_file ) ) {
620 array_push( $results, $attached_file );
621 }
622 }
623
624 // Get poster attribute
625 $raw_poster_src = $video->getAttribute( 'poster' );
626 $poster = $this->clean_url( $raw_poster_src );
627 if ( !empty( $poster ) ) {
628 $poster_id = $this->custom_attachment_url_to_postid( $raw_poster_src );
629
630 $attached_file = get_post_meta( $poster_id, '_wp_attached_file', true );
631 if ( !empty( $attached_file ) ) {
632 array_push( $results, $attached_file );
633 }
634 }
635
636 }
637
638 // Audios: src
639 $audios = $dom->getElementsByTagName( 'audio' );
640 foreach ( $audios as $audio ) {
641 //error_log($audio->getAttribute('src'));
642 $src = $this->clean_url( $audio->getAttribute('src') );
643 array_push( $results, $src );
644 }
645
646 // Sources: src
647 $audios = $dom->getElementsByTagName( 'source' );
648 foreach ( $audios as $audio ) {
649 //error_log($audio->getAttribute('src'));
650 $src = $this->clean_url( $audio->getAttribute('src') );
651 array_push( $results, $src );
652 }
653
654 // Links, href
655 $urls = $dom->getElementsByTagName( 'a' );
656 foreach ( $urls as $url ) {
657 $url_href = $url->getAttribute('href'); // mm change
658 if ( $this->is_url( $url_href ) ) { // mm change
659 $src = $this->clean_url( $url_href ); // mm change
660 if ( !empty( $src ) )
661 array_push( $results, $src );
662 }
663 }
664
665 // <link> tags in <head> area
666 $urls = $dom->getElementsByTagName( 'link' );
667 foreach ( $urls as $url ) {
668 $url_href = $url->getAttribute( 'href' );
669 if ( $this->is_url( $url_href ) ) {
670 $src = $this->clean_url( $url_href );
671 if ( !empty( $src ) ) {
672 array_push( $results, $src );
673 }
674 }
675 }
676
677 // PDF
678 preg_match_all( "/((https?:\/\/)?[^\\&\#\[\] \"\?]+\.pdf)/", $html, $res );
679 if ( !empty( $res ) && isset( $res[1] ) && count( $res[1] ) > 0 ) {
680 foreach ( $res[1] as $url ) {
681 if ( $this->is_url( $url ) )
682 array_push( $results, $this->clean_url( $url ) );
683 }
684 }
685
686 // Background images
687 preg_match_all( "/url\(\'?\"?((https?:\/\/)?[^\\&\#\[\] \"\?]+\.(jpe?g|gif|png))\'?\"?/", $html, $res );
688 if ( !empty( $res ) && isset( $res[1] ) && count( $res[1] ) > 0 ) {
689 foreach ( $res[1] as $url ) {
690 if ( $this->is_url( $url ) )
691 array_push( $results, $this->clean_url( $url ) );
692 }
693 }
694
695 return $results;
696 }
697
698 /**
699 *
700 * Get the IDs and URLs from the blocks of a post.
701 *
702 * @param string $html The HTML content of the post.
703 * @param string $prefix The prefix of the blocks to look for.
704 * @param array $keys The keys to look for in the blocks.
705 * @param array $urls The array to fill with the URLs.
706 * @param array $ids The array to fill with the IDs.
707 *
708 */
709 function get_from_blocks( $html, $prefix, $keys, &$urls, &$ids ) {
710
711 $blocks = parse_blocks( $html );
712
713 if ( ! is_array( $blocks ) || ! isset( $blocks[0] ) ) {
714 return;
715 }
716
717
718 foreach ( $blocks as $block ) {
719
720 if ( strpos( $block['blockName'], $prefix ) === false ) {
721 continue;
722 }
723
724 $this->array_to_ids_or_urls( $block, $ids, $urls, true, $keys );
725
726 }
727
728
729 }
730 // Parse a meta, visit all the arrays, look for the attributes, fill $ids and $urls arrays
731 // If rawMode is enabled, it will not check if the value is an ID or an URL, it will just returns it in URLs
732 function get_from_meta( $meta, $lookFor, &$ids, &$urls, $rawMode = false ) {
733 if ( !is_array( $meta ) && !is_object( $meta) ) {
734 return;
735 }
736 foreach ( $meta as $key => $value ) {
737 if ( is_object( $value ) || is_array( $value ) )
738 $this->get_from_meta( $value, $lookFor, $ids, $urls, $rawMode );
739 else if ( in_array( $key, $lookFor ) ) {
740 if ( empty( $value ) ) {
741 continue;
742 }
743 else if ( $rawMode ) {
744 array_push( $urls, $value );
745 }
746 else if ( is_numeric( $value ) ) {
747 // It this an ID?
748 array_push( $ids, $value );
749 }
750 else {
751 if ( $this->is_url( $value ) ) {
752 // Is this an URL?
753 array_push( $urls, $this->clean_url( $value ) );
754 }
755 else {
756 // Is this an array of IDs, encoded as a string? (like "20,13")
757 $pieces = explode( ',', $value );
758 foreach ( $pieces as $pval ) {
759 if ( is_numeric( $pval ) ) {
760 array_push( $ids, $pval );
761 }
762 }
763 }
764 }
765 }
766 }
767 }
768
769 function get_images_from_themes( &$ids, &$urls ) {
770 // USE CURRENT THEME AND WP API
771 $ch = get_custom_header();
772 if ( !empty( $ch ) && !empty( $ch->url ) ) {
773 array_push( $urls, $this->clean_url( $ch->url ) );
774 }
775 if ( $this->is_url( $ch->thumbnail_url ) ) {
776 array_push( $urls, $this->clean_url( $ch->thumbnail_url ) );
777 }
778 if ( !empty( $ch ) && !empty( $ch->attachment_id ) ) {
779 array_push( $ids, $ch->attachment_id );
780 }
781 $cl = get_custom_logo();
782 if ( $this->is_url( $cl ) ) {
783 $urls = array_merge( $this->get_urls_from_html( $cl ), $urls );
784 }
785 $custom_logo = get_theme_mod( 'custom_logo' );
786 if ( !empty( $custom_logo ) && is_numeric( $custom_logo ) ) {
787 array_push( $ids, (int)$custom_logo );
788 }
789 $si = get_site_icon_url();
790 if ( $this->is_url( $si ) ) {
791 array_push( $urls, $this->clean_url( $si ) );
792 }
793 $si_id = get_option( 'site_icon' );
794 if ( !empty( $si_id ) && is_numeric( $si_id ) ) {
795 array_push( $ids, (int)$si_id );
796 }
797 $cd = get_background_image();
798 if ( $this->is_url( $cd ) ) {
799 array_push( $urls, $this->clean_url( $cd ) );
800 }
801 $photography_hero_image = get_theme_mod( 'photography_hero_image' );
802 if ( !empty( $photography_hero_image ) ) {
803 array_push( $ids, $photography_hero_image );
804 }
805 $author_profile_picture = get_theme_mod( 'author_profile_picture' );
806 if ( !empty( $author_profile_picture ) ) {
807 array_push( $ids, $author_profile_picture );
808 }
809 if ( function_exists ( 'get_uploaded_header_images' ) ) {
810 $header_images = get_uploaded_header_images();
811 if ( !empty( $header_images ) ) {
812 foreach ( $header_images as $hi ) {
813 if ( !empty ( $hi['attachment_id'] ) ) {
814 array_push( $ids, $hi['attachment_id'] );
815 }
816 }
817 }
818 }
819 }
820
821 #region LOGS
822
823 function log( $data = null, $force = false ) {
824 if ( !$this->debug_logs && !$force )
825 return;
826
827 $php_logs = $this->get_option( 'php_error_logs' );
828 $log_file_path = $this->get_logs_path();
829
830 $fh = @fopen( $log_file_path, 'a' );
831 if ( !$fh ) { return false; }
832 $date = date( "Y-m-d H:i:s" );
833 if ( is_null( $data ) ) {
834 fwrite( $fh, "\n" );
835 }
836 else {
837 fwrite( $fh, "$date: {$data}\n" );
838 if ( $php_logs ) {
839 error_log( "[MEDIA CLEANER] " . $data );
840 }
841 }
842 fclose( $fh );
843 return true;
844 }
845
846 //WPMC_PREFIX
847
848 function get_logs_path() {
849 $uploads_dir = wp_upload_dir();
850 $uploads_dir_path = trailingslashit( $uploads_dir['basedir'] );
851
852 $path = $this->get_option( 'logs_path' );
853
854 if ( $path && file_exists( $path ) ) {
855 // make sure the path is legal (within the uploads directory with the WPMC_PREFIX prefix and log extension)
856 if ( strpos( $path, $uploads_dir_path ) !== 0 || strpos( $path, WPMC_PREFIX ) === false || substr( $path, -4 ) !== '.log' ) {
857 $path = null;
858 } else {
859 return $path;
860 }
861 }
862
863 if ( !$path ) {
864 $path = $uploads_dir_path . WPMC_PREFIX . "_" . $this->random_ascii_chars() . ".log";
865 if ( !file_exists( $path ) ) {
866 touch( $path );
867 }
868
869 $options = $this->get_all_options();
870 $options['logs_path'] = $path;
871 $this->update_options( $options );
872 }
873
874 return $path;
875 }
876
877
878 function get_logs() {
879 $log_file_path = $this->get_logs_path();
880
881 if ( !file_exists( $log_file_path ) ) {
882 return "No logs found.";
883 }
884
885 $content = file_get_contents( $log_file_path );
886 $lines = explode( "\n", $content );
887 $lines = array_filter( $lines );
888 $lines = array_reverse( $lines );
889 $content = implode( "\n", $lines );
890 return $content;
891 }
892
893 function clear_logs() {
894 $logPath = $this->get_logs_path();
895 if ( file_exists( $logPath ) ) {
896 unlink( $logPath );
897 }
898
899 $options = $this->get_all_options();
900 $options['logs_path'] = null;
901 $this->update_options( $options );
902 }
903
904 #endregion
905
906 /**
907 *
908 * HELPERS
909 *
910 */
911
912 private function random_ascii_chars($length = 8)
913 {
914 $characters = array_merge(range('A', 'Z'), range('a', 'z'), range('0', '9'));
915 $characters_length = count($characters);
916 $random_string = '';
917
918 for ($i = 0; $i < $length; $i++) {
919 $random_string .= $characters[rand(0, $characters_length - 1)];
920 }
921
922 return $random_string;
923 }
924
925 function get_trashdir() {
926 return trailingslashit( $this->upload_path ) . 'wpmc-trash';
927 }
928
929 function get_trashurl() {
930 return trailingslashit( $this->upload_url ) . 'wpmc-trash';
931 }
932
933 function clean_ob(){
934 $disabled = $this->get_option( 'output_buffer_cleaning_disabled' );
935 $ob_content = ob_get_contents();
936 if ( !empty( trim( $ob_content ) ) ) {
937
938 if ( $disabled ) {
939 $this->log( "🚨 If the server's response was broken, try to let Output Buffer Cleaning enabled." );
940 return;
941 }
942
943 $this->log( "🧹 The response is broken due to output buffering, it will be cleaned." );
944 $this->log( "📄 Output buffer content: " . $ob_content );
945
946 ob_end_clean();
947 }
948 }
949
950 /**
951 *
952 * I18N RELATED HELPERS
953 *
954 */
955
956 function is_multilingual() {
957 return function_exists( 'icl_get_languages' );
958 }
959
960 function get_languages() {
961 $results = array();
962 if ( $this->is_multilingual() ) {
963 $languages = icl_get_languages();
964 foreach ( $languages as $language ) {
965 if ( isset( $language['code'] ) ) {
966 array_push( $results, $language['code'] );
967 }
968 else if ( isset( $language['language_code'] ) ) {
969 array_push( $results, $language['language_code'] );
970 }
971 }
972 }
973 return $results;
974 }
975
976 function get_translated_media_ids( $mediaId ) {
977 $translated_ids = array();
978 foreach ( $this->languages as $language ) {
979 $id = apply_filters( 'wpml_object_id', $mediaId, 'attachment', false, $language );
980 if ( !empty( $id ) ) {
981 array_push( $translated_ids, $id );
982 }
983 }
984 return $translated_ids;
985 }
986
987 /**
988 *
989 * DELETE / SCANNING / RESET
990 *
991 */
992
993 function recover_file( $path ) {
994 $originalPath = trailingslashit( $this->upload_path ) . $path;
995 $trashPath = trailingslashit( $this->get_trashdir() ) . $path;
996 if ( !file_exists( $trashPath ) ) {
997 $this->log( "🚫 The file $originalPath actually does not exist in the trash." );
998 return true;
999 }
1000 $path_parts = pathinfo( $originalPath );
1001 if ( !file_exists( $path_parts['dirname'] ) && !wp_mkdir_p( $path_parts['dirname'] ) ) {
1002 die( 'Failed to create folder.' );
1003 }
1004 if ( !rename( $trashPath, $originalPath ) ) {
1005 die( 'Failed to move the file.' );
1006 }
1007 return true;
1008 }
1009
1010 function recover( $id ) {
1011 global $wpdb;
1012 $table_name = $wpdb->prefix . "mclean_scan";
1013 $issue = $this->get_issue( $id );
1014
1015 if ( empty( $issue ) ) {
1016 $this->log( "🚫 Issue #{$id} does not exist. Cannot recover this." );
1017 return false;
1018 }
1019
1020 // Files
1021 if ( $issue->type === 0 ) {
1022 $this->recover_file( $issue->path );
1023 $wpdb->query( $wpdb->prepare( "UPDATE $table_name SET deleted = 0 WHERE id = %d", $id ) );
1024 $this->log( "�
1025 Recovered {$issue->path}." );
1026 return true;
1027 }
1028 // Media
1029 else if ( $issue->type === 1 ) {
1030
1031 // If there is no file attached, doesn't handle the files
1032 $fullpath = get_attached_file( $issue->postId );
1033 if ( empty( $fullpath ) ) {
1034 $this->log( "🚫 Media #{$issue->postId} does not have attached file anymore." );
1035 error_log( "Media #{$issue->postId} does not have attached file anymore." );
1036 return false;
1037 }
1038
1039 $paths = $this->get_paths_from_attachment( $issue->postId );
1040 foreach ( $paths as $path ) {
1041 if ( !$this->recover_file( $path ) ) {
1042 $this->log( "🚫 Could not recover $path." );
1043 error_log( "Media Cleaner: Could not recover $path." );
1044 }
1045 }
1046 if ( !wp_update_post( array( 'ID' => $issue->postId, 'post_type' => 'attachment' ) ) ) {
1047 $this->log( "🚫 Failed to Untrash Post {$issue->postId} (but deleted it from Cleaner DB)." );
1048 error_log( "Media Cleaner: Failed to Untrash Post {$issue->postId} (but deleted it from Cleaner DB)." );
1049 return false;
1050 }
1051 $wpdb->query( $wpdb->prepare( "UPDATE $table_name SET deleted = 0 WHERE id = %d", $id ) );
1052 $this->log( "�
1053 Recovered Media #{$issue->postId}." );
1054 return true;
1055 }
1056 }
1057
1058 function trash_file( $fileIssuePath ) {
1059 $originalPath = trailingslashit( $this->upload_path ) . $fileIssuePath;
1060 $trashPath = trailingslashit( $this->get_trashdir() ) . $fileIssuePath;
1061 $path_parts = pathinfo( $trashPath );
1062
1063 try {
1064 if ( !file_exists( $path_parts['dirname'] ) && !wp_mkdir_p( $path_parts['dirname'] ) ) {
1065 $this->log( "🚫 Could not create the trash directory for Media Cleaner." );
1066 error_log( "Media Cleaner: Could not create the trash directory." );
1067 return false;
1068 }
1069 // Rename the file (move). 'is_dir' is just there for security (no way we should move a whole directory)
1070 if ( is_dir( $originalPath ) ) {
1071 $this->log( "🚫 Attempted to delete a directory instead of a file ($originalPath). Can't do that." );
1072 error_log( "Media Cleaner: Attempted to delete a directory instead of a file ($originalPath). Can't do that." );
1073 return false;
1074 }
1075 if ( !file_exists( $originalPath ) ) {
1076 $this->log( "🚫 The file $originalPath actually does not exist." );
1077 error_log( "Media Cleaner: The file $originalPath actually does not exist." );
1078 return true;
1079 }
1080 if ( !@rename( $originalPath, $trashPath ) ) {
1081 error_log( "Media Cleaner: Unknown error occured while trying to delete a file ($originalPath)." );
1082 return false;
1083 }
1084 }
1085 catch ( Exception $e ) {
1086 return false;
1087 }
1088 $this->clean_dir( dirname( $originalPath ) );
1089 return true;
1090 }
1091
1092 function repair( $id ) {
1093 $repair = $this->get_repair( $id );
1094 if ( empty( $repair ) ) {
1095 $this->log( "🚫 Repair #{$id} does not exist. Cannot repair this." );
1096 return false;
1097 }
1098 foreach ( $repair->child_ids as $child_id ) {
1099 if ( !$this->delete( $child_id ) ) {
1100 $this->log( "🚫 Failed to repair the file." );
1101 return false;
1102 }
1103 }
1104 $full_path = $this->get_full_upload_path( $repair->path );
1105 $filetype = wp_check_filetype( basename( $full_path ), null );
1106 $wp_upload_dir = wp_upload_dir();
1107 $attachment = array(
1108 'guid' => $wp_upload_dir['url'] . '/' . basename( $full_path ),
1109 'post_mime_type' => $filetype['type'],
1110 'post_title' => preg_replace( '/\.[^.]+$/', '', basename( $full_path ) ),
1111 'post_content' => '',
1112 'post_status' => 'inherit'
1113 );
1114
1115 $attach_id = wp_insert_attachment( $attachment, $full_path );
1116
1117 require_once( ABSPATH . 'wp-admin/includes/image.php' );
1118 $attach_data = wp_generate_attachment_metadata( $attach_id, $full_path );
1119 wp_update_attachment_metadata( $attach_id, $attach_data );
1120
1121 global $wpdb;
1122 $table_name = $wpdb->prefix . "mclean_scan";
1123 $wpdb->query( $wpdb->prepare( "DELETE FROM $table_name WHERE id = %d OR parentId = %d", $id, $id ) );
1124 $this->log( "�
1125 Repaired {$repair->path}." );
1126 return true;
1127 }
1128
1129 function ignore( $id, $ignore ) {
1130 global $wpdb;
1131 $table_name = $wpdb->prefix . "mclean_scan";
1132 $issue = $this->get_issue( $id );
1133
1134 if ( empty( $issue ) ) {
1135 $this->log( "🚫 Issue #{$id} does not exist. Cannot ignore this." );
1136 return false;
1137 }
1138
1139 if ( !$ignore ) {
1140 $wpdb->query( $wpdb->prepare( "UPDATE $table_name SET ignored = 0 WHERE id = %d", $id ) );
1141 }
1142 else {
1143 // If it is in trash, recover it
1144 if ( $issue->deleted ) {
1145 $this->recover( $id );
1146 }
1147 $wpdb->query( $wpdb->prepare( "UPDATE $table_name SET ignored = 1 WHERE id = %d", $id ) );
1148 }
1149 return true;
1150 }
1151
1152 function endsWith( $haystack, $needle )
1153 {
1154 $length = strlen( $needle );
1155 if ( $length == 0 )
1156 return true;
1157 return ( substr( $haystack, -$length ) === $needle );
1158 }
1159
1160 function clean_dir( $dir ) {
1161 if ( !file_exists( $dir ) )
1162 return;
1163 else if ( $this->endsWith( $dir, 'uploads' ) )
1164 return;
1165 $found = array_diff( scandir( $dir ), array( '.', '..' ) );
1166 if ( count( $found ) < 1 ) {
1167 if ( rmdir( $dir ) ) {
1168 $this->clean_dir( dirname( $dir ) );
1169 }
1170 }
1171 }
1172
1173 function get_issue( $id ) {
1174 global $wpdb;
1175 $table_name = $wpdb->prefix . "mclean_scan";
1176 $issue = $wpdb->get_row( $wpdb->prepare( "SELECT * FROM $table_name WHERE id = %d", $id ), OBJECT );
1177 if ( empty( $issue ) ) {
1178 return false;
1179 }
1180 $issue->id = (int)$issue->id;
1181 $issue->postId = (int)$issue->postId;
1182 $issue->type = (int)$issue->type;
1183 $issue->deleted = (int)$issue->deleted;
1184 $issue->ignored = (int)$issue->ignored;
1185 $issue->path = stripslashes( $issue->path );
1186 return $issue;
1187 }
1188
1189 function get_repair( $id ) {
1190 global $wpdb;
1191 $table_name = $wpdb->prefix . "mclean_scan";
1192 $repair = $wpdb->get_row( $wpdb->prepare( "SELECT
1193 main.id AS id,
1194 main.path AS path,
1195 GROUP_CONCAT(child.id) AS child_ids
1196 FROM
1197 $table_name AS main
1198 LEFT JOIN
1199 $table_name AS child ON main.id = child.parentId
1200 WHERE main.id = %d", $id
1201 ), OBJECT );
1202 if ( empty( $repair ) ) {
1203 return false;
1204 }
1205
1206 // If $repair->path is null or empty return false
1207 if ( empty( $repair->path ) ) {
1208 $this->log( "🚫 Repair #{$id} does not have a path. Cannot repair this." );
1209 return false;
1210 }
1211
1212
1213 $repair->id = (int)$repair->id;
1214 $regex = "^(.*)(\\s\\(\\+.*)$";
1215 $repair->path = preg_replace( '/' . $regex . '/i', '$1', stripslashes( $repair->path ) );
1216 $repair->child_ids = $repair->child_ids ? explode( ',', $repair->child_ids ) : [];
1217 return $repair;
1218 }
1219
1220 function get_issues_to_repair( $order_by = 'id', $order = 'asc', $search = '', $skip = 0, $limit = 10 ) {
1221 global $wpdb;
1222 $table_name = $wpdb->prefix . "mclean_scan";
1223
1224 $search_clause = '';
1225 if ( !empty( $search ) ) {
1226 $search_clause = $wpdb->prepare("AND main.path LIKE %s", ( '%' . $search . '%' ));
1227 }
1228
1229 $order_clause = 'ORDER BY main.id ASC';
1230 if ( $order_by === 'path' ) {
1231 $order_clause = 'ORDER BY main.path ' . ( $order === 'asc' ? 'ASC' : 'DESC' );
1232 }
1233 else if ( $order_by === 'issue' ) {
1234 $order_clause = 'ORDER BY main.issue ' . ( $order === 'asc' ? 'ASC' : 'DESC' );
1235 }
1236 else if ( $order_by === 'size' ) {
1237 $order_clause = 'ORDER BY main.size ' . ( $order === 'asc' ? 'ASC' : 'DESC' );
1238 }
1239
1240 $result = $wpdb->get_results( $wpdb->prepare( "SELECT
1241 main.id AS id,
1242 main.path AS path,
1243 GROUP_CONCAT(child.id) AS child_ids,
1244 GROUP_CONCAT(child.path) AS child_paths,
1245 main.type AS type,
1246 main.postId AS postId,
1247 main.size AS size,
1248 main.ignored AS ignored,
1249 main.deleted AS deleted,
1250 main.issue AS issue
1251 FROM
1252 $table_name AS main
1253 LEFT JOIN
1254 $table_name AS child ON main.id = child.parentId
1255 WHERE
1256 main.path IS NOT NULL AND main.parentId IS NULL
1257 AND main.deleted = 0 AND main.ignored = 0
1258 AND main.type = 0
1259 $search_clause
1260 GROUP BY main.id
1261 $order_clause
1262 LIMIT %d, %d;
1263 ", $skip, $limit ) );
1264
1265 return $result;
1266 }
1267
1268 function get_repair_ids ( $search = '' ) {
1269 global $wpdb;
1270 $table_name = $wpdb->prefix . "mclean_scan";
1271
1272 $search_clause = '';
1273 if ( !empty( $search ) ) {
1274 $search_clause = $wpdb->prepare("AND main.path LIKE %s", ( '%' . $search . '%' ));
1275 }
1276
1277 return $wpdb->get_col( "SELECT DISTINCT main.id
1278 FROM
1279 $table_name AS main
1280 LEFT JOIN $table_name AS child ON main.id = child.parentId
1281 WHERE
1282 main.path IS NOT NULL
1283 AND main.parentId IS NULL
1284 $search_clause
1285 GROUP BY
1286 main.id
1287 ;"
1288 );
1289 }
1290
1291 function get_stats_of_issues_to_repair( $search = '' ) {
1292 global $wpdb;
1293 $table_name = $wpdb->prefix . "mclean_scan";
1294
1295 $search_clause = '';
1296 if ( !empty( $search ) ) {
1297 $search_clause = $wpdb->prepare("AND main.path LIKE %s", ( '%' . $search . '%' ));
1298 }
1299
1300 return $wpdb->get_row( "SELECT
1301 COUNT(id) AS entries,
1302 SUM(size) AS size
1303 FROM (
1304 SELECT
1305 COUNT(DISTINCT main.id) as id,
1306 main.size as size
1307 FROM
1308 $table_name AS main
1309 LEFT JOIN
1310 $table_name AS child ON main.id = child.parentId
1311 WHERE
1312 main.path IS NOT NULL AND main.parentId IS NULL AND main.deleted = 0 AND main.ignored = 0
1313 $search_clause
1314 GROUP BY main.id
1315 ) t;
1316 " );
1317 }
1318
1319 function get_count_of_issues_to_repair( $search ) {
1320 $stats = $this->get_stats_of_issues_to_repair( $search );
1321 return $stats->entries;
1322 }
1323
1324 function delete( $id ) {
1325 global $wpdb;
1326 $table_name = $wpdb->prefix . "mclean_scan";
1327 $issue = $this->get_issue( $id );
1328
1329 if ( empty( $issue ) ) {
1330 $this->log( "🚫 Issue #{$id} does not exist. Cannot delete this." );
1331 return false;
1332 }
1333
1334 $regex = "^(.*)(\\s\\(\\+.*)$";
1335 $issue->path = preg_replace( '/' . $regex . '/i', '$1', $issue->path ); // remove " (+ 6 files)" from path
1336 $skip_trash = $this->get_option( 'skip_trash' );
1337
1338 if ( $issue->type === 0 ) {
1339
1340 // Delete file from the trash
1341 if ( $issue->deleted === 1 ) {
1342 $trashPath = trailingslashit( $this->get_trashdir() ) . $issue->path;
1343 if ( unlink( $trashPath ) ) {
1344 $wpdb->query( $wpdb->prepare( "DELETE FROM $table_name WHERE id = %d", $id ) );
1345 $this->clean_dir( dirname( $trashPath ) );
1346 return true;
1347 }
1348 }
1349 // Delete file without using trash
1350 else if ( $skip_trash ) {
1351 $originalPath = trailingslashit( $this->upload_path ) . $issue->path;
1352 if ( unlink( $originalPath ) ) {
1353 $wpdb->query( $wpdb->prepare( "DELETE FROM $table_name WHERE id = %d", $id ) );
1354 $this->clean_dir( dirname( $originalPath ) );
1355 return true;
1356 }
1357 }
1358 // Move file to the trash
1359 else if ( $this->trash_file( $issue->path ) ) {
1360 $wpdb->query( $wpdb->prepare( "UPDATE $table_name SET deleted = 1, ignored = 0, time = NOW() WHERE id = %d", $id ) );
1361 return true;
1362 }
1363
1364 $this->log( "🚫 Failed to delete/trash the file." );
1365 error_log( "Media Cleaner: Failed to delete/trash the file." );
1366 }
1367
1368 if ( $issue->type === 1 ) {
1369
1370 // Trash Media definitely by recovering it (to be like a normal Media) and remove it through the
1371 // standard WordPress workflow
1372 if ( $issue->deleted === 1 || $skip_trash ) {
1373 if ( $issue->deleted === 1 ) {
1374 $this->recover( $id );
1375 }
1376 wp_update_post( array( 'ID' => $issue->postId, 'post_type' => 'attachment' ) );
1377 wp_delete_attachment( $issue->postId, true );
1378 $wpdb->query( $wpdb->prepare( "DELETE FROM $table_name WHERE id = %d", $id ) );
1379 return true;
1380 }
1381 else {
1382 // Move Media to trash
1383 // Let's copy the images to the trash so that it can be recovered.
1384 $paths = $this->get_paths_from_attachment( $issue->postId );
1385 foreach ( $paths as $path ) {
1386 if ( !$this->trash_file( $path ) ) {
1387 $this->log( "🚫 Could not trash $path." );
1388 error_log( "Media Cleaner: Could not trash $path." );
1389 return false;
1390 }
1391 }
1392 wp_update_post( array( 'ID' => $issue->postId, 'post_type' => 'wmpc-trash' ) );
1393 $wpdb->query( $wpdb->prepare( "UPDATE $table_name SET deleted = 1, ignored = 0, time = NOW() WHERE id = %d", $id ) );
1394 return true;
1395 }
1396 }
1397 return false;
1398 }
1399
1400 function delete_directory_recurcively( $dir ) {
1401 if ( !is_dir( $dir ) ) {
1402 return;
1403 }
1404 $files = array_diff( scandir( $dir ), array( '.', '..' ) );
1405 foreach ( $files as $file ) {
1406 if ( is_dir( "$dir/$file" ) ) {
1407 $this->delete_directory_recurcively( "$dir/$file" );
1408 }
1409 else {
1410 unlink( "$dir/$file" );
1411 }
1412 }
1413 rmdir( $dir );
1414 }
1415
1416 function force_trash() {
1417
1418 $res = [
1419 'message' => 'The trash folder has been emptied.',
1420 'success' => true
1421 ];
1422
1423 // Delete all the files in the trash folder.
1424 $trashDirPath = trailingslashit( $this->get_trashdir() );
1425 if ( file_exists( $trashDirPath ) && is_dir( $trashDirPath ) ) {
1426 $this->delete_directory_recurcively( $trashDirPath, true );
1427 }
1428
1429 // Clean the Database: DELETE FROM wp_mclean_scan WHERE deleted = 1
1430 global $wpdb;
1431 $table_name = $wpdb->prefix . "mclean_scan";
1432 $wpdb->query( $wpdb->prepare( "DELETE FROM $table_name WHERE deleted = 1" ) );
1433
1434
1435 return $res;
1436 }
1437
1438 /**
1439 *
1440 * SCANNING / RESET
1441 *
1442 */
1443
1444 function add_reference_url( $urlOrUrls, $type, $origin = null, $extra = null ) {
1445 $urlOrUrls = !is_array( $urlOrUrls ) ? array( $urlOrUrls ) : $urlOrUrls;
1446 foreach ( $urlOrUrls as $url ) {
1447 // With files, we need both filename without resolution and filename with resolution, it's important
1448 // to make sure the original file is not deleted if a size exists for it.
1449 // With media, all URLs should be without resolution to make sure it matches Media.
1450 if ( $this->current_method == 'files' ) {
1451 $this->add_reference( null, $url, $type, $origin, $extra );
1452 $this->add_reference( 0, $this->clean_url_from_resolution( $url ), $type, $origin, $extra );
1453 }
1454 else {
1455 // 2021/11/08: I added this, the problem is that sometimes users create image filenames with the resolution
1456 // in it, even though it is the original.
1457 $this->add_reference( null, $url, $type, $origin, $extra );
1458
1459 $this->add_reference( 0, $this->clean_url_from_resolution( $url ), $type, $origin, $extra );
1460 }
1461 }
1462 }
1463
1464 function add_reference_id( $idOrIds, $type, $origin = null, $extra = null ) {
1465 $idOrIds = !is_array( $idOrIds ) ? array( $idOrIds ) : $idOrIds;
1466 foreach ( $idOrIds as $id ) {
1467 $this->add_reference( $id, "", $type, $origin );
1468 if ( $this->multilingual ) {
1469 $translatedIds = $this->get_translated_media_ids( (int)$id );
1470
1471 // Test for WPML
1472 // if ( $id === '350') {
1473 // $translatedIds = $this->get_translated_media_ids( (int)$id );
1474 // $count = count($translatedIds);
1475 // error_log( "${id} => ${count}" );
1476 // }
1477
1478 if ( !empty( $translatedIds ) ) {
1479 foreach ( $translatedIds as $translatedId ) {
1480 $this->add_reference( $translatedId, "", $type, $origin );
1481 }
1482 }
1483 }
1484 }
1485 }
1486
1487
1488 // Returns the reference with the type, origin, related to a Media ID it is referenced
1489 public function get_reference_for_media_id( $id ) {
1490 global $wpdb;
1491 $table_name = $wpdb->prefix . "mclean_refs";
1492 $refs = $wpdb->get_results( $wpdb->prepare( "SELECT * FROM $table_name WHERE mediaId = %d", $id ), OBJECT );
1493 if ( empty( $refs ) ) {
1494 return false;
1495 }
1496 $ref = $refs[0];
1497 $ref->id = (int)$ref->id;
1498 $ref->mediaId = (int)$ref->mediaId;
1499 $ref->originType = (int)$ref->originType;
1500 $ref->origin = stripslashes( $ref->origin );
1501 $ref->parentId = empty( $ref->parentId ) ? null : (int)$ref->parentId;
1502 return $ref;
1503 }
1504
1505 // Return the references related to a Post ID
1506 public function get_references_for_post_id( $id ) {
1507 global $wpdb;
1508 $table_name = $wpdb->prefix . "mclean_refs";
1509 $refs = $wpdb->get_results( $wpdb->prepare( "SELECT * FROM $table_name WHERE originType LIKE %s", "%[$id]" ), OBJECT );
1510 if ( empty( $refs ) ) {
1511 return [];
1512 }
1513 $fresh_refs = array();
1514 foreach ( $refs as $ref ) {
1515 $mediaId = (int)$ref->mediaId > 0 ? (int)$ref->mediaId : null;
1516 if ( !$mediaId && !empty( $ref->mediaUrl ) ) {
1517 $mediaId = $this->find_media_id_from_file( $ref->mediaUrl, false );
1518 $mediaId = !empty( $mediaId ) ? (int)$mediaId : null;
1519 }
1520 if ( !$mediaId ) {
1521 continue;
1522 }
1523 array_push( $fresh_refs, [
1524 'id' => (int)$ref->id,
1525 'mediaId' => $mediaId,
1526 'mediaUrl' => $ref->mediaUrl,
1527 'originType' => $ref->originType,
1528 'parentId' => empty( $ref->parentId ) ? null : (int)$ref->parentId,
1529 ] );
1530 }
1531 return $fresh_refs;
1532 }
1533
1534 // The references are actually not being added directly in the DB, they are being pushed
1535 // into a cache ($this->refcache).
1536 private function add_reference( $id, $url, $type, $origin = null, $extra = null ) {
1537
1538 $force_no_cache = $extra && isset( $extra['force_no_cache'] ) ? $extra['force_no_cache'] : false;
1539 $force_cache = $extra && isset( $extra['force_cache'] ) ? $extra['force_cache'] : false;
1540 if ( $force_no_cache ) {
1541 $this->use_cached_references = false;
1542 }
1543
1544 if ( $force_cache ) {
1545 $this->use_cached_references = true;
1546 }
1547
1548 if ( !empty( $origin ) ) {
1549 $type = $type . " [$origin]";
1550 }
1551
1552 if ( !empty( $id ) ) {
1553
1554 if( $this->use_cached_references ) {
1555
1556 $added = $this->add_cached_id( $id );
1557 if ( $added ) {
1558 array_push( $this->refcache, array( 'id' => $id, 'url' => null, 'type' => $type, 'origin' => $origin ) );
1559 }
1560
1561
1562 }
1563
1564 if( !$this->use_cached_references ) {
1565 array_push( $this->refcache, array( 'id' => $id, 'url' => null, 'type' => $type, 'origin' => $origin ) );
1566 }
1567
1568 }
1569 if ( !empty( $url ) ) {
1570 // The URL shouldn't contain http, https, javascript at the beginning (and there are probably many more cases)
1571 // The URL must be cleaned before being passed as a reference.
1572 if ( substr( $url, 0, 5 ) === "http:" || substr( $url, 0, 6 ) === "https:" || substr( $url, 0, 11 ) === "javascript:" ) {
1573 return;
1574 }
1575
1576 if( $this->use_cached_references ) {
1577
1578 $added = $this->add_cached_url( $url );
1579 if ( $added ) {
1580 array_push( $this->refcache, array( 'id' => null, 'url' => $url, 'type' => $type, 'origin' => $origin ) );
1581 }
1582
1583 }
1584
1585 if( !$this->use_cached_references ) {
1586 array_push( $this->refcache, array( 'id' => null, 'url' => $url, 'type' => $type, 'origin' => $origin ) );
1587 }
1588
1589 }
1590
1591 }
1592
1593 private function get_cached_ids() {
1594 global $wpdb;
1595 $table_name = $wpdb->prefix . "mclean_cache";
1596 $cached_ids = $wpdb->get_col( $wpdb->prepare(
1597 "SELECT cache_value FROM $table_name WHERE cache_key = %s AND cache_type = %s",
1598 $this->cached_ids_key,
1599 'id'
1600 ) );
1601 return $cached_ids ? $cached_ids : array();
1602 }
1603
1604 private function get_cached_urls() {
1605 global $wpdb;
1606 $table_name = $wpdb->prefix . "mclean_cache";
1607 $cached_urls = $wpdb->get_col( $wpdb->prepare(
1608 "SELECT cache_value FROM $table_name WHERE cache_key = %s AND cache_type = %s",
1609 $this->cached_urls_key,
1610 'url'
1611 ) );
1612 return $cached_urls ? $cached_urls : array();
1613 }
1614
1615 private function add_cached_id($id) {
1616 global $wpdb;
1617 $table_name = $wpdb->prefix . "mclean_cache";
1618
1619 // Try to insert, ignore if duplicate (UNIQUE KEY will prevent duplicates)
1620 $result = $wpdb->query( $wpdb->prepare(
1621 "INSERT IGNORE INTO $table_name (cache_key, cache_value, cache_type) VALUES (%s, %s, %s)",
1622 $this->cached_ids_key,
1623 $id,
1624 'id'
1625 ) );
1626
1627 // Return true if a row was inserted
1628 return $result > 0;
1629 }
1630
1631 private function add_cached_url($url) {
1632 global $wpdb;
1633 $table_name = $wpdb->prefix . "mclean_cache";
1634
1635 // Try to insert, ignore if duplicate (UNIQUE KEY will prevent duplicates)
1636 $result = $wpdb->query( $wpdb->prepare(
1637 "INSERT IGNORE INTO $table_name (cache_key, cache_value, cache_type) VALUES (%s, %s, %s)",
1638 $this->cached_urls_key,
1639 $url,
1640 'url'
1641 ) );
1642
1643 // Return true if a row was inserted
1644 return $result > 0;
1645 }
1646
1647 function reset_cached_references() {
1648 global $wpdb;
1649 $table_name = $wpdb->prefix . "mclean_cache";
1650
1651 // Delete all cached references from the cache table
1652 $wpdb->query( "TRUNCATE TABLE $table_name" );
1653
1654 $this->cached_ids_cli = array();
1655 $this->cached_urls_cli = array();
1656 }
1657
1658 function insert_references($entries)
1659 {
1660 global $wpdb;
1661 $table = $wpdb->prefix . "mclean_refs";
1662 $values = array();
1663 $place_holders = array();
1664 $query = "INSERT INTO $table (mediaId, mediaUrl, originType, parentId) VALUES ";
1665
1666 foreach ( $entries as $value ) {
1667 if ( !is_null($value['id'] ) ) {
1668 // Media Reference
1669 array_push( $values, $value['id'], $value['type'] );
1670 $place_holders[] = "('%d', NULL, '%s', NULL)";
1671
1672 if ($this->debug_logs) {
1673 $this->log("+ Media #{$value['id']} (as ID)");
1674 }
1675 }
1676 else if ( !is_null($value['url'] ) ) {
1677 // File Reference
1678 array_push( $values, $value['url'], $value['type'] );
1679 if ( isset( $value['parentId'] ) ) {
1680 array_push( $values, $value['parentId'] );
1681 $place_holders[] = "(NULL, '%s', '%s', '%d')";
1682 if ( $this->debug_logs ) {
1683 $this->log( "{$value['url']} (as URL) (ParentID: {$value['parentId']})" );
1684 }
1685 } else {
1686 $place_holders[] = "(NULL, '%s', '%s', NULL)";
1687 if ( $this->debug_logs ) {
1688 $this->log("{$value['url']} (as URL)");
1689 }
1690 }
1691 }
1692 }
1693
1694 if ( !empty( $values ) ) {
1695 $query .= implode( ', ', $place_holders );
1696 $prepared = $wpdb->prepare( "$query ", $values );
1697 $wpdb->query( $prepared );
1698 }
1699 }
1700
1701 function reset_progress() {
1702 // Reset the progress by deleting the transient.
1703 delete_transient( $this->progress_key );
1704 }
1705
1706 function clear_step_progress() {
1707 // Clear step progress when scanning completes
1708 delete_transient( $this->progress_key );
1709 }
1710
1711 function save_progress( $step, $data = array() ) {
1712 // Save progress with step and optional data
1713 // Data can include type, limit, limitSize, and any other progress information
1714 $progress = array(
1715 'step' => $step,
1716 'time' => time(),
1717 'data' => $data
1718 );
1719
1720 set_transient( $this->progress_key, $progress, 0 );
1721 }
1722
1723 function get_progress() {
1724 return get_transient( $this->progress_key );
1725 }
1726
1727 function get_step_progress() {
1728 $options = $this->get_all_options();
1729 return isset( $options['step_progress'] ) ? $options['step_progress'] : null;
1730 }
1731
1732 // The cache containing the references is wrote to the DB.
1733 function write_references() {
1734 global $wpdb;
1735 $table = $wpdb->prefix . "mclean_refs";
1736
1737 $potential_parents = array();
1738 $potential_children = array();
1739
1740 foreach ( $this->refcache as $value ) {
1741 $potentialParentPath = !is_null( $value['url'] ) ? $this->clean_url_from_resolution( $value['url'] ) : null;
1742 if ( $potentialParentPath === $value['url'] ) {
1743 $potential_parents[] = $value;
1744 }
1745 else {
1746 $potential_children[] = $value;
1747 }
1748 }
1749
1750 $this->insert_references( $potential_parents );
1751
1752 // Resolve parentId for potential children
1753 foreach ( $potential_children as &$child ) {
1754 $potentialParentPath = $this->clean_url_from_resolution( $child['url'] );
1755 $parentId = $wpdb->get_var( $wpdb->prepare( "SELECT id FROM $table WHERE mediaUrl = %s", $potentialParentPath ) );
1756 if ( !empty( $parentId ) ) {
1757 $child['parentId'] = (int)$parentId;
1758 }
1759 }
1760
1761 // Insert potential children with resolved parentIds
1762 $this->insert_references( $potential_children );
1763 $this->refcache = array();
1764 }
1765
1766 function check_is_ignore( $file ) {
1767 global $wpdb;
1768 $table_name = $wpdb->prefix . "mclean_scan";
1769 $count = $wpdb->get_var( "SELECT COUNT(*)
1770 FROM $table_name
1771 WHERE ignored = 1
1772 AND path LIKE '%". esc_sql( $wpdb->esc_like( $file ) ) . "%'" );
1773 if ( $count > 0 ) {
1774 $this->log( "🚫 Could not trash $file." );
1775 }
1776 return ($count > 0);
1777 }
1778
1779 function find_media_id_from_file( $file, $doLog ) {
1780 global $wpdb;
1781 $postmeta_table_name = $wpdb->prefix . 'postmeta';
1782 $file = $this->clean_uploaded_filename( $file );
1783 $sql = $wpdb->prepare( "SELECT post_id
1784 FROM {$postmeta_table_name}
1785 WHERE meta_key = '_wp_attached_file'
1786 AND meta_value = %s", $file
1787 );
1788 $ret = $wpdb->get_var( $sql );
1789 if ( $doLog ) {
1790 if ( empty( $ret ) )
1791 $this->log( "🚫 File $file not found as _wp_attached_file (Library)." );
1792 else {
1793 $this->log( "�
1794 File $file found as Media $ret." );
1795 }
1796 }
1797
1798 return $ret;
1799 }
1800
1801 function get_thumbnails_urls( $id, $sizes_as_key = false ) {
1802 $sizes = get_intermediate_image_sizes();
1803 // For each size use wp_get_attachment_image_src() to get the URL
1804 $urls = array();
1805 foreach ( $sizes as $size ) {
1806 $src = wp_get_attachment_image_src( $id, $size );
1807 if ( $src ) {
1808 $urls[$size] = $this->clean_url( $src[0] );
1809 }
1810 }
1811
1812 return $sizes_as_key ? $urls : array_values( $urls );
1813 }
1814
1815
1816 function get_thumbnails_urls_from_srcset( $id, $size = 'full' ) {
1817
1818 $image_size = $this->get_attachment_size_by_id( $id, $size );
1819
1820 $sizes = array_keys( $this->get_image_sizes() );
1821 $sizes[] = $image_size;
1822
1823 $urls = array();
1824 foreach ( $sizes as $image_size ) {
1825 $srcset = wp_get_attachment_image_srcset( $id, $image_size );
1826
1827 // Extract URLs from srcset
1828 if ( !empty( $srcset ) ) {
1829 $srcset = explode( ', ', $srcset );
1830 foreach ( $srcset as $src ) {
1831 $parts = explode( ' ', $src );
1832 $url = trim( $parts[0] );
1833 if ( !empty( $url ) ) {
1834 $urls[] = $this->clean_url( $url );
1835 }
1836 }
1837 }
1838 }
1839
1840 return $urls;
1841
1842 }
1843
1844 function get_attachment_size_by_id( $attachment_id, $default_size = 'full' ) {
1845
1846 if ( ! $attachment_id ) {
1847 return $default_size;
1848 }
1849
1850 $url = wp_get_attachment_url( $attachment_id );
1851 if ( ! $url ) {
1852 return $default_size;
1853 }
1854
1855 $metadata = wp_get_attachment_metadata( $attachment_id );
1856
1857 if ( ! is_array( $metadata ) ) {
1858 return $default_size;
1859 }
1860
1861 $size = $default_size;
1862
1863 if ( isset( $metadata['file'] ) && strpos( $url, $metadata['file'] ) === ( strlen( $url ) - strlen( $metadata['file'] ) ) ) {
1864 $size = array( $metadata['width'], $metadata['height'] );
1865 } elseif ( preg_match( '/-(\d+)x(\d+)\.(jpg|jpeg|gif|png|svg|webp)$/', $url, $match ) ) {
1866 // Get the image width and height.
1867 // Example: https://regex101.com/r/7JwGz7/1.
1868 $size = array( $match[1], $match[2] );
1869 }
1870
1871 return $size;
1872 }
1873
1874 function get_image_sizes() {
1875 $sizes = array();
1876 global $_wp_additional_image_sizes;
1877 foreach ( get_intermediate_image_sizes() as $s ) {
1878 $crop = false;
1879 if ( isset( $_wp_additional_image_sizes[$s] ) ) {
1880 $width = intval( $_wp_additional_image_sizes[$s]['width'] );
1881 $height = intval( $_wp_additional_image_sizes[$s]['height'] );
1882 $crop = $_wp_additional_image_sizes[$s]['crop'];
1883 } else {
1884 $width = get_option( $s.'_size_w' );
1885 $height = get_option( $s.'_size_h' );
1886 $crop = get_option( $s.'_crop' );
1887 }
1888 $sizes[$s] = array( 'width' => $width, 'height' => $height, 'crop' => $crop );
1889 }
1890 return $sizes;
1891 }
1892
1893 function clean_url_from_resolution( $url ) {
1894 if ( !isset( $url ) ) return $url;
1895
1896 $pattern = '/[_-]\d+x\d+(?=\.[a-z]{3,4}$)/';
1897 $url = preg_replace( $pattern, '', $url );
1898 return $url;
1899 }
1900
1901 function is_url( $url ) {
1902 return ( (
1903 !empty( $url ) ) &&
1904 is_string( $url ) &&
1905 strlen( $url ) > 4 && (
1906 strtolower( substr( $url, 0, 4) ) == 'http' || $url[0] == '/'
1907 )
1908 );
1909 }
1910
1911 function clean_url_from_resolution_ref( &$url ) {
1912 $url = $this->clean_url_from_resolution( $url );
1913 }
1914
1915 // From a url to the shortened and cleaned url (for example '2013/02/file.png')
1916 function clean_url( $url ) {
1917 // if ( is_array( $url ) ) {
1918 // error_log( print_r( $url, 1 ) );
1919 // }
1920 $dirIndex = strpos( $url, $this->upload_url );
1921 if ( empty( $url ) || $dirIndex === false ) {
1922 $finalUrl = null;
1923 }
1924 else {
1925 $finalUrl = urldecode( substr( $url, 1 + strlen( $this->upload_url ) + $dirIndex ) );
1926 }
1927 return $finalUrl;
1928 }
1929
1930 function custom_attachment_url_to_postid( $url ) {
1931 global $wpdb;
1932
1933 // Remove the query string
1934 $url = preg_replace('/\?.*/', '', $url);
1935
1936 // Try to find the attachment ID by matching the URL with the guid
1937 $attachment = $wpdb->get_col( $wpdb->prepare( "SELECT ID FROM $wpdb->posts WHERE guid LIKE %s AND post_type = 'attachment';", '%' . $wpdb->esc_like( $url ) ) );
1938
1939 // If found, return the first attachment ID
1940 if ( !empty( $attachment ) ) {
1941 return ( int )$attachment[0];
1942 }
1943
1944 // If not found, try to match the URL without the upload directory path
1945 $upload_dir = wp_upload_dir();
1946 $url_relative = str_replace( $upload_dir['baseurl'] . '/', '', $url );
1947
1948 $attachment = $wpdb->get_col( $wpdb->prepare( "SELECT post_id FROM $wpdb->postmeta WHERE meta_key = '_wp_attached_file' AND meta_value LIKE %s;", '%' . $wpdb->esc_like( $url_relative ) ) );
1949
1950 // If found, return the first attachment ID
1951 if ( !empty( $attachment ) ) {
1952 return ( int )$attachment[0];
1953 }
1954
1955 // If still not found, return 0
1956 return 0;
1957 }
1958
1959 // From a fullpath to the shortened and cleaned path (for example '2013/02/file.png')
1960 // Original version by Jordy
1961 // function clean_uploaded_filename( $fullpath ) {
1962 // $basedir = $this->upload_path;
1963 // $file = str_replace( $basedir, '', $fullpath );
1964 // $file = str_replace( "./", "", $file );
1965 // $file = trim( $file, "/" );
1966 // return $file;
1967 // }
1968
1969 // From a fullpath to the shortened and cleaned path (for example '2013/02/file.png')
1970 // Faster version, more difficult to read, by Mike Meinz
1971 function clean_uploaded_filename( $fullpath ) {
1972 $dirIndex = strpos( $fullpath, $this->upload_url );
1973 if ( $dirIndex == false ) {
1974 $file = $fullpath;
1975 }
1976 else {
1977 // Remove first part of the path leaving yyyy/mm/filename.ext
1978 $file = substr( $fullpath, 1 + strlen( $this->upload_url ) + $dirIndex );
1979 }
1980 if ( substr( $file, 0, 2 ) == './' ) {
1981 $file = substr( $file, 2 );
1982 }
1983 if ( substr( $file, 0, 1 ) == '/' ) {
1984 $file = substr( $file, 1 );
1985 }
1986 return $file;
1987 }
1988
1989 /**
1990 * Check if the file or the Media ID is used in the install.
1991 * That file or ID will be checked against the database of references created by the plugin
1992 * by the parsers.
1993 */
1994 function reference_exists( $file, $mediaId ) {
1995 global $wpdb;
1996
1997 $table = $wpdb->prefix . "mclean_refs";
1998
1999 $row = null;
2000 if ( !empty( $mediaId ) ) {
2001 $row = $wpdb->get_row( $wpdb->prepare( "SELECT originType FROM $table WHERE mediaId = %d", $mediaId ) );
2002 if ( !empty( $row ) ) {
2003 $origin = $row->originType === 'MEDIA LIBRARY' ? 'Media Library' : 'content';
2004 $this->log( "�
2005 Media #{$mediaId} used by {$origin}" );
2006 return $row->originType;
2007 }
2008 }
2009 if ( !empty( $file ) ) {
2010 $row = $wpdb->get_row( $wpdb->prepare( "SELECT originType FROM $table WHERE mediaUrl = %s", $file ) );
2011 if ( !empty( $row ) ) {
2012 $origin = $row->originType === 'MEDIA LIBRARY' ? 'Media Library' : 'content';
2013 $this->log( "�
2014 File {$file} used by {$origin}" );
2015 return $row->originType;
2016 }
2017 }
2018 return false;
2019 }
2020
2021 function get_full_upload_path( $relative_path ) {
2022 $wp_upload_dir = wp_upload_dir();
2023 $full_path = trailingslashit( $wp_upload_dir['basedir'] ) . $relative_path;
2024 return $full_path;
2025 }
2026
2027 function get_paths_from_attachment( $attachmentId ) {
2028 $paths = array();
2029 $fullpath = get_attached_file( $attachmentId );
2030 if ( empty( $fullpath ) ) {
2031 $this->log( 'Could not find attached file for Media ID ' . $attachmentId );
2032 return array();
2033 }
2034 $mainfile = $this->clean_uploaded_filename( $fullpath );
2035 array_push( $paths, $mainfile );
2036 $baseUp = pathinfo( $mainfile );
2037 $filespath = trailingslashit( $this->upload_path ) . trailingslashit( $baseUp['dirname'] );
2038 $meta = wp_get_attachment_metadata( $attachmentId );
2039 if ( isset( $meta['original_image'] ) ) {
2040 $original_image = $this->clean_uploaded_filename( $filespath . $meta['original_image'] );
2041 array_push( $paths, $original_image );
2042 }
2043 $isImage = isset( $meta, $meta['width'], $meta['height'] );
2044 $sizes = $this->get_image_sizes();
2045 if ( $isImage && isset( $meta['sizes'] ) ) {
2046 foreach ( $meta['sizes'] as $name => $attr ) {
2047 if ( isset( $attr['file'] ) ) {
2048 $file = $this->clean_uploaded_filename( $filespath . $attr['file'] );
2049 array_push( $paths, $file );
2050 }
2051 }
2052 }
2053 return $paths;
2054 }
2055
2056 function is_media_ignored( $attachmentId ) {
2057 global $wpdb;
2058 $table_name = $wpdb->prefix . "mclean_scan";
2059 $issue = $wpdb->get_row( $wpdb->prepare( "SELECT * FROM $table_name WHERE postId = %d", $attachmentId ), OBJECT );
2060 //error_log( $attachmentId );
2061 //error_log( print_r( $issue, 1 ) );
2062 if ( $issue && $issue->ignored )
2063 return true;
2064 return false;
2065 }
2066
2067 function check_media( $attachmentId, $checkOnly = false ) {
2068
2069 // Is Media ID ignored, consider as used.
2070 if ( $this->is_media_ignored( $attachmentId ) ) {
2071 return true;
2072 }
2073
2074 // Remove everything related to this media from the database.
2075 if ( !$checkOnly ) {
2076 $this->delete_attachment_related_data( $attachmentId );
2077 }
2078
2079 $size = 0;
2080 $countfiles = 0;
2081 $check_broken_media = !$this->check_content;
2082 $fullpath = get_attached_file( $attachmentId );
2083 $is_broken = apply_filters( 'wpmc_is_file_broken', !file_exists( $fullpath ), $attachmentId );
2084
2085 // It's a broken-only scan
2086 if ( $check_broken_media && !$is_broken ) {
2087 $is_considered_used = apply_filters( 'wpmc_check_media', true, $attachmentId, false );
2088 return $is_considered_used;
2089 }
2090
2091 // Let's analyze the usage of each path (thumbnails included) for this Media ID.
2092 $issue = 'NO_CONTENT';
2093 $paths = $this->get_paths_from_attachment( $attachmentId );
2094 foreach ( $paths as $path ) {
2095
2096 // If it's found in the content, we stop the scan right away
2097 if ( $this->check_content && $this->reference_exists( $path, $attachmentId ) ) {
2098 $is_considered_used = apply_filters( 'wpmc_check_media', true, $attachmentId, false );
2099 if ( $is_considered_used ) {
2100 return true;
2101 }
2102 }
2103
2104 // Let's count the size of the files for later, in case it's unused
2105 $filepath = trailingslashit( $this->upload_path ) . $path;
2106 if ( file_exists( $filepath ) )
2107 $size += filesize( $filepath );
2108 $countfiles++;
2109 }
2110
2111 // This Media ID seems not in used (or broken)
2112 // Let's double-check through the filter (overridable by users)
2113 $is_considered_used = apply_filters( 'wpmc_check_media', false, $attachmentId, $is_broken );
2114 if ( !$is_considered_used ) {
2115 if ( $is_broken ) {
2116 $this->log( "🚫 File {$fullpath} does not exist." );
2117 $issue = 'ORPHAN_MEDIA';
2118 }
2119 if ( !$checkOnly ) {
2120 global $wpdb;
2121 $table_name = $wpdb->prefix . "mclean_scan";
2122 $mainfile = $this->clean_uploaded_filename( $fullpath );
2123 $wpdb->insert( $table_name,
2124 array(
2125 'time' => current_time('mysql'),
2126 'type' => 1,
2127 'size' => $size,
2128 'path' => $mainfile . ( $countfiles > 0 ? ( " (+ " . $countfiles . " files)" ) : "" ),
2129 'postId' => $attachmentId,
2130 'issue' => $issue
2131 )
2132 );
2133 }
2134 }
2135 return $is_considered_used;
2136 }
2137
2138 // Delete all issues
2139 function reset_issues( $includingIgnored = false ) {
2140 global $wpdb;
2141 $table_name = $wpdb->prefix . "mclean_scan";
2142 if ( $includingIgnored ) {
2143 $wpdb->query( "DELETE FROM $table_name WHERE deleted = 0" );
2144 }
2145 else {
2146 $wpdb->query( "DELETE FROM $table_name WHERE ignored = 0 AND deleted = 0" );
2147 }
2148 if ( file_exists( WPMC_PATH . '/logs/media-cleaner.log' ) ) {
2149 file_put_contents( WPMC_PATH . '/logs/media-cleaner.log', '' );
2150 }
2151 }
2152
2153 function is_image_extension( $ext ) {
2154 $ext = strtolower( $ext );
2155 $valid = apply_filters( 'wpmc_valid_image_extensions', array( 'jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'ico', 'webp', 'avif' ) );
2156
2157 return in_array( $ext, $valid );
2158
2159 }
2160
2161
2162 function reset_references() {
2163 global $wpdb;
2164 $table_name = $wpdb->prefix . "mclean_refs";
2165 $wpdb->query("TRUNCATE $table_name");
2166 $this->reset_cached_references();
2167 }
2168
2169 function get_issue_for_postId( $postId ) {
2170 global $wpdb;
2171 $table_name = $wpdb->prefix . "mclean_scan";
2172 $issue = $wpdb->get_row( $wpdb->prepare( "SELECT * FROM $table_name WHERE postId = %d", $postId ), OBJECT );
2173 return $issue;
2174 }
2175
2176 function echo_issue( $issue ) {
2177 if ( $issue == 'NO_CONTENT' ) {
2178 _e( "Not found in content", 'media-cleaner' );
2179 }
2180 else if ( $issue == 'ORPHAN_FILE' ) {
2181 _e( "Not in Library", 'media-cleaner' );
2182 }
2183 else if ( $issue == 'ORPHAN_RETINA' ) {
2184 _e( "Orphan Retina", 'media-cleaner' );
2185 }
2186 else if ( $issue == 'ORPHAN_WEBP' ) {
2187 _e( "Orphan WebP", 'media-cleaner' );
2188 }
2189 else if ( $issue == 'ORPHAN_MEDIA' ) {
2190 _e( "No attached file", 'media-cleaner' );
2191 }
2192 else {
2193 echo $issue;
2194 }
2195 }
2196
2197 function get_uploads_directory_hierarchy() {
2198 $uploads_dir = wp_upload_dir();
2199 $base_dir = wp_normalize_path( $uploads_dir['basedir'] );
2200 $root = '/' . wp_basename( $base_dir );
2201 $directories = array();
2202
2203 // Get all subdirectories of the base directory
2204 $dir_iterator = new RecursiveDirectoryIterator( $base_dir, FilesystemIterator::KEY_AS_PATHNAME | FilesystemIterator::CURRENT_AS_FILEINFO | FilesystemIterator::SKIP_DOTS );
2205 $iterator = new RecursiveIteratorIterator( $dir_iterator, RecursiveIteratorIterator::SELF_FIRST );
2206
2207 foreach ( $iterator as $file ) {
2208 if ( $file->isDir() ) {
2209 // Normalize path for consistency
2210 $file_path = wp_normalize_path( $file->getPathname() );
2211 // Remove base_dir from path
2212 $directory = str_replace( $base_dir, '', $file_path );
2213 if ( $directory ) {
2214 $directories[] = $root . $directory;
2215 }
2216 }
2217 }
2218
2219 // Return the hierarchy as a JSON file
2220 return json_encode( $directories );
2221 }
2222
2223 /**
2224 *
2225 * Roles & Access Rights
2226 *
2227 */
2228 public function can_access_settings() {
2229 return apply_filters( 'wpmc_allow_setup', current_user_can( 'manage_options' ) );
2230 }
2231
2232 public function can_access_features() {
2233 return apply_filters( 'wpmc_allow_usage', current_user_can( 'administrator' ) );
2234 }
2235
2236 #region Options
2237
2238 function list_options() {
2239 return array(
2240 'method' => 'media',
2241 'content' => true,
2242 'filesystem_content' => false,
2243 'media_library' => true,
2244 'live_content' => false,
2245 'debuglogs' => false,
2246 'images_only' => false,
2247 'attach_is_use' => false,
2248 'thumbnails_only' => false,
2249 'dirs_filter' => '',
2250 'files_filter' => '',
2251 'hide_thumbnails' => false,
2252 'hide_warning' => false,
2253 'skip_trash' => false,
2254 'medias_buffer' => 100,
2255 'posts_buffer' => 5,
2256 'analysis_buffer' => 100,
2257 'file_op_buffer' => 20,
2258 'delay' => 100,
2259 'shortcodes_disabled' => false,
2260 'use_cached_references' => true,
2261 'output_buffer_cleaning_disabled' => false,
2262 'php_error_logs' => false,
2263 'posts_per_page' => 10,
2264 'clean_uninstall' => false,
2265 'repair_mode' => false,
2266 'expert_mode' => false,
2267 'logs_path' => null,
2268 );
2269 }
2270
2271 function reset_options() {
2272 delete_option( $this->option_name );
2273 }
2274
2275 function get_option( $option ) {
2276 $options = $this->get_all_options();
2277 return $options[$option];
2278 }
2279
2280 function get_all_options() {
2281 $options = get_option( $this->option_name, null );
2282 $options = $this->check_options( $options );
2283 return $options;
2284 }
2285
2286 // Let's work on this function if we need it.
2287 // Right now, it looks like the options are all updated at the same time.
2288
2289 // function update_option( $option, $value ) {
2290 // if ( !array_key_exists( $name, $options ) ) {
2291 // return new WP_REST_Response([ 'success' => false, 'message' => 'This option does not exist.' ], 200 );
2292 // }
2293 // $value = is_bool( $params['value'] ) ? ( $params['value'] ? '1' : '' ) : $params['value'];
2294 // }
2295
2296 function update_options( $options ) {
2297 if ( !update_option( $this->option_name, $options, false ) ) {
2298 return false;
2299 }
2300 $options = $this->sanitize_options();
2301 return $options;
2302 }
2303
2304 // Upgrade from the old way of storing options to the new way.
2305 function check_options( $options = [] ) {
2306 $plugin_options = $this->list_options();
2307 $options = empty( $options ) ? [] : $options;
2308 $hasChanges = false;
2309 foreach ( $plugin_options as $option => $default ) {
2310 // The option already exists
2311 if ( isset( $options[$option] ) ) {
2312 continue;
2313 }
2314 // The option does not exist, so we need to add it.
2315 // Let's use the old value if any, or the default value.
2316 $options[$option] = get_option( 'wpmc_' . $option, $default );
2317 delete_option( 'wpmc_' . $option );
2318 $hasChanges = true;
2319 }
2320 if ( $hasChanges ) {
2321 update_option( $this->option_name , $options );
2322 }
2323
2324 // Dynamically added options
2325 //TODO: we should have a rest route to fetch this instead of using the options directly. This is temporary.
2326 $options['scan_progress'] = get_transient( $this->progress_key );
2327
2328 return $options;
2329 }
2330
2331 // Validate and keep the options clean and logical.
2332 function sanitize_options() {
2333 $options = $this->get_all_options();
2334 $medias = $options['medias_buffer'];
2335 $posts = $options['posts_buffer'];
2336 $analysis = $options['analysis_buffer'];
2337 $fileOp = $options['file_op_buffer'];
2338 $delay = $options['delay'];
2339 $hasChanges = false;
2340 if ( $medias === '' ) {
2341 $options['medias_buffer'] = 100;
2342 $hasChanges = true;
2343 }
2344 if ( $posts === '' ) {
2345 $options['posts_buffer'] = 5;
2346 $hasChanges = true;
2347 }
2348 if ( $analysis === '' ) {
2349 $options['analysis_buffer'] = 100;
2350 $hasChanges = true;
2351 }
2352 if ( $fileOp === '' ) {
2353 $options['file_op_buffer'] = 20;
2354 $hasChanges = true;
2355 }
2356 if ( $delay === '' ) {
2357 $options['delay'] = 100;
2358 $hasChanges = true;
2359 }
2360 if ( $hasChanges ) {
2361 update_option( $this->option_name, $options, false );
2362 }
2363 return $options;
2364 }
2365
2366 #endregion
2367 }
2368
2369 // Check the DB. If does not exist, let's create it.
2370 function wpmc_check_database() {
2371 wpmc_create_database();
2372 }
2373
2374 function wpmc_create_database() {
2375 global $wpdb;
2376 $table_name = $wpdb->prefix . "mclean_scan";
2377 $charset_collate = $wpdb->get_charset_collate();
2378 $sql = "CREATE TABLE $table_name (
2379 id BIGINT(20) NOT NULL AUTO_INCREMENT,
2380 time DATETIME DEFAULT '0000-00-00 00:00:00' NOT NULL,
2381 type TINYINT(1) NOT NULL,
2382 postId BIGINT(20) NULL,
2383 path TINYTEXT NULL,
2384 size INT(9) NULL,
2385 ignored TINYINT(1) NOT NULL DEFAULT 0,
2386 deleted TINYINT(1) NOT NULL DEFAULT 0,
2387 issue TINYTEXT NOT NULL,
2388 parentId BIGINT(20) NULL,
2389 PRIMARY KEY (id),
2390 KEY PostIdIndex (postId),
2391 KEY IgnoredIndex (ignored)
2392 ) " . $charset_collate . ";" ;
2393 require_once( ABSPATH . 'wp-admin/includes/upgrade.php' );
2394 dbDelta( $sql );
2395
2396 $table_name = $wpdb->prefix . "mclean_refs";
2397 $charset_collate = $wpdb->get_charset_collate();
2398 // This key doesn't work on too many installs because of the 'Specified key was too long' issue
2399 // KEY mediaLookUp (mediaId, mediaUrl)
2400 $sql = "CREATE TABLE $table_name (
2401 id BIGINT(20) NOT NULL AUTO_INCREMENT,
2402 mediaId BIGINT(20) NULL,
2403 mediaUrl TINYTEXT NULL,
2404 originType TINYTEXT NOT NULL,
2405 parentId BIGINT(20) NULL,
2406 PRIMARY KEY (id),
2407 KEY mediaId_index (mediaId)
2408 ) " . $charset_collate . ";";
2409 require_once( ABSPATH . 'wp-admin/includes/upgrade.php' );
2410 dbDelta( $sql );
2411
2412 // Create cache table for cached IDs and URLs
2413 $table_name = $wpdb->prefix . "mclean_cache";
2414 $sql = "CREATE TABLE $table_name (
2415 id BIGINT(20) NOT NULL AUTO_INCREMENT,
2416 cache_key VARCHAR(50) NOT NULL,
2417 cache_value VARCHAR(255) NOT NULL,
2418 cache_type VARCHAR(20) NOT NULL,
2419 PRIMARY KEY (id),
2420 UNIQUE KEY cache_lookup (cache_key, cache_value, cache_type)
2421 ) " . $charset_collate . ";";
2422 dbDelta( $sql );
2423 }
2424
2425 function wpmc_remove_database() {
2426 global $wpdb;
2427 $table_name1 = $wpdb->prefix . "mclean_scan";
2428 $table_name2 = $wpdb->prefix . "mclean_refs";
2429 $table_name3 = $wpdb->prefix . "wpmcleaner";
2430 $table_name4 = $wpdb->prefix . "mclean_cache";
2431 $sql = "DROP TABLE IF EXISTS $table_name1, $table_name2, $table_name3, $table_name4;";
2432 $wpdb->query( $sql );
2433 }
2434
2435 #region Install / Uninstall
2436
2437 /*
2438 INSTALL / UNINSTALL
2439 */
2440
2441 function wpmc_init( $mainfile ) {
2442 //register_activation_hook( $mainfile, 'wpmc_install' );
2443 //register_deactivation_hook( $mainfile, 'wpmc_uninstall' );
2444 register_uninstall_hook( $mainfile, 'wpmc_uninstall' );
2445 }
2446
2447 function wpmc_install() {
2448 wpmc_create_database();
2449 }
2450
2451 function wpmc_reset () {
2452 wpmc_remove_database();
2453 wpmc_create_database();
2454 }
2455
2456 function wpmc_remove_options() {
2457 global $wpdb;
2458 $options = $wpdb->get_results( "SELECT option_name FROM $wpdb->options WHERE option_name LIKE 'wpmc_%'" );
2459 foreach( $options as $option ) {
2460 delete_option( $option->option_name );
2461 }
2462 }
2463
2464 function wpmc_uninstall () {
2465 $options = get_option( 'wpmc_options', [] );
2466 $cleanUninstall = $options['clean_uninstall'];
2467 if ($cleanUninstall) {
2468 wpmc_remove_options();
2469 wpmc_remove_database();
2470 }
2471 }
2472
2473 #endregion