PluginProbe ʕ •ᴥ•ʔ
LiteSpeed Cache / 7.8.1
LiteSpeed Cache v7.8.1
trunk 1.0.15 1.9.1.1 2.9.9.2 3.6.4 4.6 5.7.0.1 6.5.4 7.0.0.1 7.0.1 7.1 7.2 7.3 7.3.0.1 7.4 7.5 7.5.0.1 7.6 7.6.1 7.6.2 7.7 7.8 7.8.0.1 7.8.1
litespeed-cache / src / crawler-map.cls.php
litespeed-cache / src Last commit date
cdn 2 months ago data_structure 2 months ago activation.cls.php 2 months ago admin-display.cls.php 2 months ago admin-settings.cls.php 2 months ago admin.cls.php 2 months ago api.cls.php 2 months ago avatar.cls.php 2 months ago base.cls.php 2 months ago cdn.cls.php 2 months ago cloud-auth-callback.trait.php 2 months ago cloud-auth-ip.trait.php 2 months ago cloud-auth.trait.php 2 months ago cloud-misc.trait.php 2 months ago cloud-node.trait.php 2 months ago cloud-request.trait.php 2 months ago cloud.cls.php 2 months ago conf.cls.php 2 months ago control.cls.php 2 months ago core.cls.php 2 months ago crawler-map.cls.php 2 months ago crawler.cls.php 2 months ago css.cls.php 2 months ago data.cls.php 2 months ago data.upgrade.func.php 2 months ago db-optm.cls.php 2 months ago debug2.cls.php 2 months ago doc.cls.php 2 months ago error.cls.php 2 months ago esi.cls.php 2 months ago file.cls.php 2 months ago guest.cls.php 2 months ago gui.cls.php 2 months ago health.cls.php 2 months ago htaccess.cls.php 2 months ago img-optm-manage.trait.php 2 months ago img-optm-pull.trait.php 2 months ago img-optm-send.trait.php 2 months ago img-optm.cls.php 2 months ago import.cls.php 2 months ago import.preset.cls.php 2 months ago lang.cls.php 2 months ago localization.cls.php 2 months ago media.cls.php 2 months ago metabox.cls.php 2 months ago object-cache-wp.cls.php 2 months ago object-cache.cls.php 2 months ago object.lib.php 2 months ago optimize.cls.php 2 months ago optimizer.cls.php 2 months ago placeholder.cls.php 2 months ago purge.cls.php 2 months ago report.cls.php 2 months ago rest.cls.php 2 months ago root.cls.php 2 months ago router.cls.php 2 months ago str.cls.php 2 months ago tag.cls.php 2 months ago task.cls.php 2 months ago tool.cls.php 2 months ago ucss.cls.php 2 months ago utility.cls.php 2 months ago vary.cls.php 2 months ago vpi.cls.php 2 months ago
crawler-map.cls.php
668 lines
1 <?php
2 /**
3 * The Crawler Sitemap Class.
4 *
5 * @package LiteSpeed
6 * @since 1.1.0
7 */
8
9 namespace LiteSpeed;
10
11 defined( 'WPINC' ) || exit();
12
13 /**
14 * Class Crawler_Map
15 *
16 * Maintains and persists crawler sitemap/blacklist state, parses custom sitemaps,
17 * and exposes helpers to query & mutate crawler results.
18 */
19 class Crawler_Map extends Root {
20
21 const LOG_TAG = '🐞🗺️';
22
23 const BM_MISS = 1;
24 const BM_HIT = 2;
25 const BM_BLACKLIST = 4;
26
27 /**
28 * Site URL used to simplify URLs.
29 *
30 * @var string
31 */
32 private $_site_url;
33
34 /**
35 * Main crawler table name.
36 *
37 * @var string
38 */
39 private $_tb;
40
41 /**
42 * Crawler blacklist table name.
43 *
44 * @var string
45 */
46 private $_tb_blacklist;
47
48 /**
49 * Data service instance.
50 *
51 * @var \LiteSpeed\Data
52 */
53 private $__data;
54
55 /**
56 * Timeout (seconds) when fetching sitemaps.
57 *
58 * @var int
59 */
60 private $_conf_map_timeout;
61
62 /**
63 * Collected URLs from parsed sitemaps.
64 *
65 * @var array<int,string>
66 */
67 private $_urls = [];
68
69 /**
70 * Instantiate the class.
71 *
72 * @since 1.1.0
73 */
74 public function __construct() {
75 $this->_site_url = get_site_url();
76 $this->__data = Data::cls();
77 $this->_tb = $this->__data->tb( 'crawler' );
78 $this->_tb_blacklist = $this->__data->tb( 'crawler_blacklist' );
79 // Specify the timeout while parsing the sitemap.
80 $this->_conf_map_timeout = defined( 'LITESPEED_CRAWLER_MAP_TIMEOUT' ) ? constant( 'LITESPEED_CRAWLER_MAP_TIMEOUT' ) : 180;
81 }
82
83 /**
84 * Save URLs crawl status into DB.
85 *
86 * @since 3.0
87 * @access public
88 *
89 * @param array<int,array<int,array{url:string,code:int}>> $items Map of bit => [ id => [url, code] ].
90 * @param int $curr_crawler Current crawler index (0-based).
91 * @return array<int,array>
92 */
93 public function save_map_status( $items, $curr_crawler ) {
94 global $wpdb;
95 Utility::compatibility();
96
97 $total_crawler = count( Crawler::cls()->list_crawlers() );
98 $total_crawler_pos = $total_crawler - 1;
99
100 // Replace current crawler's position.
101 $curr_crawler = (int) $curr_crawler;
102 foreach ( $items as $bit => $ids ) {
103 // $ids = [ id => [ url, code ], ... ].
104 if ( ! $ids ) {
105 continue;
106 }
107 self::debug( 'Update map [crawler] ' . $curr_crawler . ' [bit] ' . $bit . ' [count] ' . count( $ids ) );
108
109 // Update res first, then reason
110 $right_pos = $total_crawler_pos - $curr_crawler;
111 $id_all = implode(',', array_map('intval', array_keys($ids)));
112
113 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.InterpolatedNotPrepared
114 $wpdb->query("UPDATE `$this->_tb` SET res = CONCAT( LEFT( res, $curr_crawler ), '$bit', RIGHT( res, $right_pos ) ) WHERE id IN ( $id_all )");
115
116 // Add blacklist
117 if (Crawler::STATUS_BLACKLIST === $bit || Crawler::STATUS_NOCACHE === $bit) {
118 $q = "SELECT a.id, a.url FROM `$this->_tb_blacklist` a LEFT JOIN `$this->_tb` b ON b.url=a.url WHERE b.id IN ( $id_all )";
119 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
120 $existing = $wpdb->get_results($q, ARRAY_A);
121 // Update current crawler status tag in existing blacklist
122 if ($existing) {
123 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.InterpolatedNotPrepared, WordPress.DB.PreparedSQL.NotPrepared
124 $count = $wpdb->query("UPDATE `$this->_tb_blacklist` SET res = CONCAT( LEFT( res, $curr_crawler ), '$bit', RIGHT( res, $right_pos ) ) WHERE id IN ( " . implode(',', array_column($existing, 'id')) . ' )');
125 self::debug('Update blacklist [count] ' . $count);
126 }
127
128 // Append new blacklist
129 if (count($ids) > count($existing)) {
130 $new_urls = array_diff(array_column($ids, 'url'), array_column($existing, 'url'));
131
132 self::debug('Insert into blacklist [count] ' . count($new_urls));
133
134 $q = "INSERT INTO `$this->_tb_blacklist` ( url, res, reason ) VALUES " . implode(',', array_fill(0, count($new_urls), '( %s, %s, %s )'));
135 $data = [];
136 $res = array_fill(0, $total_crawler, '-');
137 $res[$curr_crawler] = $bit;
138 $res = implode('', $res);
139 $default_reason = $total_crawler > 1 ? str_repeat(',', $total_crawler - 1) : ''; // Pre-populate default reason value first, update later
140 foreach ($new_urls as $url) {
141 $data[] = $url;
142 $data[] = $res;
143 $data[] = $default_reason;
144 }
145 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
146 $wpdb->query($wpdb->prepare($q, $data));
147 }
148 }
149
150 // Update sitemap reason w/ HTTP code.
151 $reason_array = [];
152 foreach ( $ids as $row_id => $row ) {
153 $code = (int) $row['code'];
154 if ( empty( $reason_array[ $code ] ) ) {
155 $reason_array[ $code ] = [];
156 }
157 $reason_array[ $code ][] = (int) $row_id;
158 }
159
160 foreach ($reason_array as $code => $v2) {
161 // Complement comma
162 if ($curr_crawler) {
163 $code = ',' . $code;
164 }
165 if ($curr_crawler < $total_crawler_pos) {
166 $code .= ',';
167 }
168
169 // phpcs:ignore WordPress.DB
170 $count = $wpdb->query( "UPDATE `$this->_tb` SET reason=CONCAT(SUBSTRING_INDEX(reason, ',', $curr_crawler), '$code', SUBSTRING_INDEX(reason, ',', -$right_pos)) WHERE id IN (" . implode(',', $v2) . ')' );
171
172 self::debug("Update map reason [code] $code [pos] left $curr_crawler right -$right_pos [count] $count");
173
174 // Update blacklist reason
175 if (Crawler::STATUS_BLACKLIST === $bit || Crawler::STATUS_NOCACHE === $bit) {
176 // phpcs:ignore WordPress.DB
177 $count = $wpdb->query( "UPDATE `$this->_tb_blacklist` a LEFT JOIN `$this->_tb` b ON b.url = a.url SET a.reason=CONCAT(SUBSTRING_INDEX(a.reason, ',', $curr_crawler), '$code', SUBSTRING_INDEX(a.reason, ',', -$right_pos)) WHERE b.id IN (" . implode(',', $v2) . ')' );
178
179 self::debug("Update blacklist [code] $code [pos] left $curr_crawler right -$right_pos [count] $count");
180 }
181 }
182
183 // Reset list.
184 $items[ $bit ] = [];
185 }
186
187 return $items;
188 }
189
190 /**
191 * Add one record to blacklist.
192 * NOTE: $id is sitemap table ID.
193 *
194 * @since 3.0
195 * @access public
196 *
197 * @param int $id Sitemap row ID.
198 * @return void
199 */
200 public function blacklist_add( $id ) {
201 global $wpdb;
202
203 $id = (int) $id;
204
205 // Build res&reason.
206 $total_crawler = count( Crawler::cls()->list_crawlers() );
207 $res = str_repeat(Crawler::STATUS_BLACKLIST, $total_crawler);
208 $reason = implode(',', array_fill(0, $total_crawler, 'Man'));
209
210 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.InterpolatedNotPrepared
211 $row = $wpdb->get_row("SELECT a.url, b.id FROM `$this->_tb` a LEFT JOIN `$this->_tb_blacklist` b ON b.url = a.url WHERE a.id = '$id'", ARRAY_A);
212 if (!$row) {
213 self::debug('blacklist failed to add [id] ' . $id);
214 return;
215 }
216
217 self::debug('Add to blacklist [url] ' . $row['url']);
218
219 $q = "UPDATE `$this->_tb` SET res = %s, reason = %s WHERE id = %d";
220 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
221 $wpdb->query($wpdb->prepare($q, [ $res, $reason, $id ]));
222
223 if ($row['id']) {
224 $q = "UPDATE `$this->_tb_blacklist` SET res = %s, reason = %s WHERE id = %d";
225 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
226 $wpdb->query($wpdb->prepare($q, [ $res, $reason, $row['id'] ]));
227 } else {
228 $q = "INSERT INTO `$this->_tb_blacklist` (url, res, reason) VALUES (%s, %s, %s)";
229 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
230 $wpdb->query($wpdb->prepare($q, [ $row['url'], $res, $reason ]));
231 }
232 }
233
234 /**
235 * Delete one record from blacklist.
236 *
237 * @since 3.0
238 * @access public
239 *
240 * @param int $id Blacklist row ID.
241 * @return void
242 */
243 public function blacklist_del( $id ) {
244 global $wpdb;
245 if ( ! $this->__data->tb_exist( 'crawler_blacklist' ) ) {
246 return;
247 }
248
249 $id = (int) $id;
250 self::debug('blacklist delete [id] ' . $id);
251
252 $sql = sprintf(
253 "UPDATE `%s` SET res=REPLACE(REPLACE(res, '%s', '-'), '%s', '-') WHERE url=(SELECT url FROM `%s` WHERE id=%d)",
254 $this->_tb,
255 Crawler::STATUS_NOCACHE,
256 Crawler::STATUS_BLACKLIST,
257 $this->_tb_blacklist,
258 $id
259 );
260 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
261 $wpdb->query($sql);
262 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.InterpolatedNotPrepared
263 $wpdb->query("DELETE FROM `$this->_tb_blacklist` WHERE id='$id'");
264 }
265
266 /**
267 * Empty blacklist.
268 *
269 * @since 3.0
270 * @access public
271 * @return void
272 */
273 public function blacklist_empty() {
274 global $wpdb;
275
276 if ( ! $this->__data->tb_exist( 'crawler_blacklist' ) ) {
277 return;
278 }
279
280 self::debug('Truncate blacklist');
281 $sql = sprintf("UPDATE `%s` SET res=REPLACE(REPLACE(res, '%s', '-'), '%s', '-')", $this->_tb, Crawler::STATUS_NOCACHE, Crawler::STATUS_BLACKLIST);
282 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
283 $wpdb->query($sql);
284 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.InterpolatedNotPrepared
285 $wpdb->query("TRUNCATE `$this->_tb_blacklist`");
286 }
287
288 /**
289 * List blacklist.
290 *
291 * @since 3.0
292 * @access public
293 *
294 * @param int|false $limit Number of rows to fetch, or false for all.
295 * @param int|false $offset Offset for pagination, or false to auto-calc.
296 * @return array<int,array<string,mixed>>
297 */
298 public function list_blacklist( $limit = false, $offset = false ) {
299 global $wpdb;
300
301 if ( ! $this->__data->tb_exist( 'crawler_blacklist' ) ) {
302 return [];
303 }
304
305 $q = "SELECT * FROM `$this->_tb_blacklist` ORDER BY id DESC";
306
307 if ( false !== $limit ) {
308 if ( false === $offset ) {
309 $total = $this->count_blacklist();
310 $offset = Utility::pagination($total, $limit, true);
311 }
312 $q .= ' LIMIT %d, %d';
313 // phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
314 $q = $wpdb->prepare($q, $offset, $limit);
315 }
316 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
317 return $wpdb->get_results($q, ARRAY_A);
318 }
319
320 /**
321 * Count blacklist.
322 *
323 * @return int|false
324 */
325 public function count_blacklist() {
326 global $wpdb;
327
328 if ( ! $this->__data->tb_exist( 'crawler_blacklist' ) ) {
329 return false;
330 }
331
332 $q = "SELECT COUNT(*) FROM `$this->_tb_blacklist`";
333 // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
334 return $wpdb->get_var($q);
335 }
336
337 /**
338 * Empty sitemap.
339 *
340 * @since 3.0
341 * @access public
342 * @return void
343 */
344 public function empty_map() {
345 Data::cls()->tb_del( 'crawler' );
346
347 $msg = __( 'Sitemap cleaned successfully', 'litespeed-cache' );
348 Admin_Display::success( $msg );
349 }
350
351 /**
352 * List generated sitemap.
353 *
354 * @since 3.0
355 * @access public
356 *
357 * @param int $limit Number of rows per page.
358 * @param int|bool $offset Offset for pagination, or false to auto-calc.
359 * @return array<int,array<string,mixed>>
360 */
361 public function list_map( $limit, $offset = false ) {
362 global $wpdb;
363
364 if ( ! $this->__data->tb_exist( 'crawler' ) ) {
365 return [];
366 }
367
368 if ( false === $offset ) {
369 $total = $this->count_map();
370 $offset = Utility::pagination($total, $limit, true);
371 }
372
373 $type = Router::verify_type();
374
375 $req_uri_like = '';
376 // phpcs:ignore WordPress.Security.NonceVerification.Missing
377 if ( ! empty( $_POST['kw'] ) ) {
378 // phpcs:ignore WordPress.Security.NonceVerification.Missing
379 $kw = sanitize_text_field( wp_unslash( $_POST['kw'] ) );
380 $q = "SELECT * FROM `$this->_tb` WHERE url LIKE %s";
381 if ( 'hit' === $type ) {
382 $q .= " AND res LIKE '%" . Crawler::STATUS_HIT . "%'";
383 }
384 if ( 'miss' === $type ) {
385 $q .= " AND res LIKE '%" . Crawler::STATUS_MISS . "%'";
386 }
387 if ( 'blacklisted' === $type ) {
388 $q .= " AND res LIKE '%" . Crawler::STATUS_BLACKLIST . "%'";
389 }
390 $q .= ' ORDER BY id LIMIT %d, %d';
391 $req_uri_like = '%' . $wpdb->esc_like( $kw ) . '%';
392 return $wpdb->get_results( $wpdb->prepare( $q, $req_uri_like, $offset, $limit ), ARRAY_A ); // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
393 }
394
395 $q = "SELECT * FROM `$this->_tb`";
396 if ( 'hit' === $type ) {
397 $q .= " WHERE res LIKE '%" . Crawler::STATUS_HIT . "%'";
398 }
399 if ( 'miss' === $type ) {
400 $q .= " WHERE res LIKE '%" . Crawler::STATUS_MISS . "%'";
401 }
402 if ( 'blacklisted' === $type ) {
403 $q .= " WHERE res LIKE '%" . Crawler::STATUS_BLACKLIST . "%'";
404 }
405 $q .= ' ORDER BY id LIMIT %d, %d';
406
407 return $wpdb->get_results( $wpdb->prepare( $q, $offset, $limit ), ARRAY_A ); // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
408 }
409
410 /**
411 * Count sitemap.
412 *
413 * @return int|false
414 */
415 public function count_map() {
416 global $wpdb;
417
418 if ( ! $this->__data->tb_exist( 'crawler' ) ) {
419 return false;
420 }
421
422 $q = "SELECT COUNT(*) FROM `$this->_tb`";
423
424 $type = Router::verify_type();
425 if ( 'hit' === $type ) {
426 $q .= " WHERE res LIKE '%" . Crawler::STATUS_HIT . "%'";
427 }
428 if ( 'miss' === $type ) {
429 $q .= " WHERE res LIKE '%" . Crawler::STATUS_MISS . "%'";
430 }
431 if ( 'blacklisted' === $type ) {
432 $q .= " WHERE res LIKE '%" . Crawler::STATUS_BLACKLIST . "%'";
433 }
434
435 return $wpdb->get_var( $q ); // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
436 }
437
438 /**
439 * Generate sitemap.
440 *
441 * @since 1.1.0
442 * @access public
443 *
444 * @param bool $manual Whether triggered manually from UI.
445 * @return void
446 */
447 public function gen( $manual = false ) {
448 $count = $this->_gen();
449
450 if ( ! $count ) {
451 Admin_Display::error( __( 'No valid sitemap parsed for crawler.', 'litespeed-cache' ) );
452 return;
453 }
454
455 if ( ! wp_doing_cron() && $manual ) {
456 $msg = sprintf( __( 'Sitemap created successfully: %d items', 'litespeed-cache' ), $count );
457 Admin_Display::success( $msg );
458 }
459 }
460
461 /**
462 * Generate the sitemap.
463 *
464 * @since 1.1.0
465 * @access private
466 * @return int|false Number of URLs generated or false on failure.
467 */
468 private function _gen() {
469 global $wpdb;
470
471 if ( ! $this->__data->tb_exist( 'crawler' ) ) {
472 $this->__data->tb_create( 'crawler' );
473 }
474
475 if ( ! $this->__data->tb_exist( 'crawler_blacklist' ) ) {
476 $this->__data->tb_create( 'crawler_blacklist' );
477 }
478
479 // Use custom sitemap.
480 $sitemap = $this->conf( Base::O_CRAWLER_SITEMAP );
481 if ( ! $sitemap ) {
482 return false;
483 }
484
485 $offset = strlen( $this->_site_url );
486 $sitemap = Utility::sanitize_lines( $sitemap );
487
488 try {
489 foreach ( $sitemap as $this_map ) {
490 $this->_parse( $this_map );
491 }
492 } catch ( \Exception $e ) {
493 self::debug( '❌ failed to parse custom sitemap: ' . $e->getMessage() );
494 }
495
496 if ( is_array( $this->_urls ) && ! empty( $this->_urls ) ) {
497 if ( defined( 'LITESPEED_CRAWLER_DROP_DOMAIN' ) && constant( 'LITESPEED_CRAWLER_DROP_DOMAIN' ) ) {
498 foreach ( $this->_urls as $k => $v ) {
499 if ( 0 !== stripos( $v, $this->_site_url ) ) {
500 unset( $this->_urls[ $k ] );
501 continue;
502 }
503 $this->_urls[ $k ] = substr( $v, $offset );
504 }
505 }
506
507 $this->_urls = array_values( array_unique( $this->_urls ) );
508 }
509
510 self::debug( 'Truncate sitemap' );
511 $wpdb->query( "TRUNCATE `$this->_tb`" ); // phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared, WordPress.DB.DirectDatabaseQuery.DirectQuery
512
513 self::debug( 'Generate sitemap' );
514
515 // Filter URLs in blacklist.
516 $blacklist = $this->list_blacklist();
517
518 $full_blacklisted = [];
519 $partial_blacklisted = [];
520 foreach ( $blacklist as $v ) {
521 if ( false === strpos( $v['res'], '-' ) ) {
522 // Full blacklisted.
523 $full_blacklisted[] = $v['url'];
524 } else {
525 // Replace existing reason.
526 $v['reason'] = explode( ',', $v['reason'] );
527 $v['reason'] = array_map(
528 function ( $element ) {
529 return $element ? 'Existed' : '';
530 },
531 $v['reason']
532 );
533 $v['reason'] = implode( ',', $v['reason'] );
534 $partial_blacklisted[ $v['url'] ] = [
535 'res' => $v['res'],
536 'reason' => $v['reason'],
537 ];
538 }
539 }
540
541 // Drop all blacklisted URLs.
542 $this->_urls = array_diff( $this->_urls, $full_blacklisted );
543
544 // Default res & reason.
545 $crawler_count = count( Crawler::cls()->list_crawlers() );
546 $default_res = str_repeat( '-', $crawler_count );
547 $default_reason = $crawler_count > 1 ? str_repeat( ',', $crawler_count - 1 ) : '';
548
549 $data = [];
550 foreach ( $this->_urls as $url ) {
551 $data[] = $url;
552 $data[] = array_key_exists( $url, $partial_blacklisted ) ? $partial_blacklisted[ $url ]['res'] : $default_res;
553 $data[] = array_key_exists( $url, $partial_blacklisted ) ? $partial_blacklisted[ $url ]['reason'] : $default_reason;
554 }
555
556 foreach ( array_chunk( $data, 300 ) as $data2 ) {
557 $this->_save( $data2 );
558 }
559
560 // Reset crawler.
561 Crawler::cls()->reset_pos();
562
563 return count( $this->_urls );
564 }
565
566 /**
567 * Save data to table.
568 *
569 * @since 3.0
570 * @access private
571 *
572 * @param array<int,string> $data Flat array (url,res,reason, url,res,reason, ...).
573 * @param string $fields Fields list for insert (default url,res,reason).
574 * @return void
575 */
576 private function _save( $data, $fields = 'url,res,reason' ) {
577 global $wpdb;
578
579 if ( empty( $data ) ) {
580 return;
581 }
582
583 $q = "INSERT INTO `$this->_tb` ( {$fields} ) VALUES ";
584
585 // Add placeholder.
586 $q .= Utility::chunk_placeholder( $data, $fields );
587
588 // Store data.
589 $wpdb->query( $wpdb->prepare( $q, $data ) ); // phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.PreparedSQL.NotPrepared
590 }
591
592 /**
593 * Parse custom sitemap and collect urls.
594 *
595 * @since 1.1.1
596 * @access private
597 *
598 * @param string $sitemap Absolute sitemap URL.
599 * @return void
600 * @throws \Exception If remote read or parsing fails.
601 */
602 private function _parse( $sitemap ) {
603 /**
604 * Read via wp func to avoid allow_url_fopen = off
605 *
606 * @since 2.2.7
607 */
608 $response = wp_safe_remote_get(
609 $sitemap,
610 [
611 'timeout' => $this->_conf_map_timeout,
612 'sslverify' => false,
613 ]
614 );
615 if ( is_wp_error( $response ) ) {
616 $error_message = $response->get_error_message();
617 self::debug( 'failed to read sitemap: ' . $error_message );
618 throw new \Exception( 'Failed to remote read ' . esc_url( $sitemap ) );
619 }
620
621 $xml_object = simplexml_load_string($response['body'], null, LIBXML_NOCDATA);
622 if (!$xml_object) {
623 if ($this->_urls) {
624 return;
625 }
626 throw new \Exception('Failed to parse xml ' . esc_url( $sitemap ));
627 }
628
629 // start parsing.
630 $xml_array = (array) $xml_object;
631 if ( ! empty( $xml_array['sitemap'] ) ) {
632 // parse sitemap set.
633 if ( is_object( $xml_array['sitemap'] ) ) {
634 $xml_array['sitemap'] = (array) $xml_array['sitemap'];
635 }
636
637 if ( ! empty( $xml_array['sitemap']['loc'] ) ) {
638 // is single sitemap.
639 $this->_parse( (string) $xml_array['sitemap']['loc'] );
640 } else {
641 // parse multiple sitemaps.
642 foreach ( (array) $xml_array['sitemap'] as $val ) {
643 $val = (array) $val;
644 if ( ! empty( $val['loc'] ) ) {
645 $this->_parse( (string) $val['loc'] ); // recursive parse sitemap.
646 }
647 }
648 }
649 } elseif ( ! empty( $xml_array['url'] ) ) {
650 // parse url set.
651 if ( is_object( $xml_array['url'] ) ) {
652 $xml_array['url'] = (array) $xml_array['url'];
653 }
654 // if only 1 element.
655 if ( ! empty( $xml_array['url']['loc'] ) ) {
656 $this->_urls[] = (string) $xml_array['url']['loc'];
657 } else {
658 foreach ( (array) $xml_array['url'] as $val ) {
659 $val = (array) $val;
660 if ( ! empty( $val['loc'] ) ) {
661 $this->_urls[] = (string) $val['loc'];
662 }
663 }
664 }
665 }
666 }
667 }
668