PluginProbe ʕ •ᴥ•ʔ
Broken Link Checker / 0.7.1
Broken Link Checker v0.7.1
1.5.4 1.5.5 1.6 1.6.1 1.6.2 1.7 1.7.1 1.8 1.8.1 1.8.2 1.8.3 1.9 1.9.1 1.9.2 1.9.3 1.9.4 1.9.4.1 1.9.4.2 1.9.5 2.0.0 2.1.0 2.2.0 2.2.1 2.2.2 2.2.3 2.2.4 2.3.0 2.3.1 2.4.0 2.4.1 2.4.2 2.4.3 2.4.4 2.4.5 2.4.6 2.4.7 2.4.8 0.9.4 0.9.4.1 0.9.4.2 0.9.4.3 0.9.4.4 0.9.4.4-last-non-modular 0.9.5 0.9.6 0.9.7 0.9.7.1 0.9.7.2 1.10 1.10.1 1.10.10 1.10.11 1.10.2 1.10.3 1.10.4 1.10.5 1.10.6 1.10.7 1.10.8 1.10.9 1.11.1 1.11.10 1.11.11 1.11.12 1.11.13 1.11.14 1.11.15 1.11.17 1.11.18 1.11.19 1.11.2 1.11.20 1.11.21 1.11.3 1.11.4 1.11.5 1.11.8 1.11.9 1.2.2 1.2.3 1.2.4 1.2.5 1.3 1.3.1 1.4 1.5 1.5.1 1.5.2 1.5.3 trunk 0.1 0.2 0.2.2 0.2.2.1 0.2.3 0.2.4 0.2.5 0.3 0.3.1 0.3.2 0.3.3 0.3.4 0.3.5 0.3.6 0.3.7 0.3.8 0.3.9 0.4 0.4-i8n 0.4.1 0.4.10 0.4.11 0.4.12 0.4.13 0.4.14 0.4.2 0.4.3 0.4.4 0.4.5 0.4.6 0.4.7 0.4.8 0.4.9 0.5 0.5.1 0.5.10 0.5.10.1 0.5.11 0.5.12 0.5.13 0.5.14 0.5.15 0.5.16 0.5.16.1 0.5.17 0.5.18 0.5.2 0.5.3 0.5.4 0.5.5 0.5.6 0.5.7 0.5.8 0.5.8.1 0.5.9 0.6 0.6.1 0.6.2 0.6.3 0.6.4 0.6.5 0.7 0.7.1 0.7.2 0.7.3 0.7.4 0.8 0.8.1 0.9 0.9.1 0.9.2 0.9.3
broken-link-checker / link-classes.php
broken-link-checker Last commit date
images 17 years ago languages 16 years ago JSON.php 17 years ago broken-link-checker.php 16 years ago config-manager.php 16 years ago core.php 16 years ago highlighter-class.php 16 years ago instance-classes.php 16 years ago link-classes.php 16 years ago readme.txt 16 years ago uninstall.php 16 years ago utility-class.php 16 years ago
link-classes.php
598 lines
1 <?php
2
3 /**
4 * @author W-Shadow
5 * @copyright 2009
6 */
7
8 if (!class_exists('blcLink')){
9 class blcLink {
10
11 //Object state
12 var $is_new = false;
13 var $last_headers = '';
14 var $meets_check_threshold = false; //currently unused
15
16 //DB fields
17 var $link_id = 0;
18 var $url = '';
19 var $last_check='0000-00-00 00:00:00';
20 var $check_count = 0;
21 var $final_url = '';
22 var $log = '';
23 var $http_code = 0;
24 var $request_duration = 0;
25 var $timeout = false;
26 var $redirect_count = 0;
27
28 function __construct($arg = null){
29 global $wpdb;
30
31 if (is_int($arg)){
32 //Load a link with ID = $arg from the DB.
33 $q = $wpdb->prepare("SELECT * FROM {$wpdb->prefix}blc_links WHERE link_id=%d LIMIT 1", $arg);
34 $arr = $wpdb->get_row( $q, ARRAY_A );
35
36 if ( is_array($arr) ){ //Loaded successfully
37 $this->set_values($arr);
38 } else {
39 //Link not found. The object is invalid.
40 //I'd throw an error, but that wouldn't be PHP 4 compatible...
41 }
42
43 } else if (is_string($arg)){
44 //Load a link with URL = $arg from the DB. Create a new one if the record isn't found.
45 $q = $wpdb->prepare("SELECT * FROM {$wpdb->prefix}blc_links WHERE url=%s LIMIT 1", $arg);
46 $arr = $wpdb->get_row( $q, ARRAY_A );
47
48 if ( is_array($arr) ){ //Loaded successfully
49 $this->set_values($arr);
50 } else { //Link not found, treat as new
51 $this->url = $arg;
52 $this->is_new = true;
53 }
54
55 } else if (is_array($arg)){
56 $this->set_values($arg);
57 //Is this a new link?
58 $this->is_new = empty($this->link_id);
59 } else {
60 $this->is_new = true;
61 }
62 }
63
64 function blcLink($arg = null){
65 $this->__construct($arg);
66 }
67
68 /**
69 * blcLink::set_values()
70 * Set the internal values to the ones provided in an array (doesn't sanitize).
71 *
72 * @param array $arr An associative array of values
73 * @return void
74 */
75 function set_values($arr){
76 foreach( $arr as $key => $value ){
77 $this->$key = $value;
78 }
79 }
80
81 /**
82 * blcLink::valid()
83 * Verifies whether the object represents a valid link
84 *
85 * @return bool
86 */
87 function valid(){
88 return !empty( $this->url ) && ( !empty($this->link_id) || $this->is_new );
89 }
90
91 /**
92 * blcLink::check()
93 * Check if the link is working.
94 *
95 * @return bool
96 */
97 function check( $timeout = 40 ){
98 if ( !$this->valid() ) return false;
99
100 //General note : there is usually no need to save() the result of the check
101 //in this method because it will be typically called from wsBrokenLinkChecker::work()
102 //that will call the save() method for us.
103
104 /*
105 Check for problematic (though not necessarily "broken") links.
106 If a link has been checked multiple times and still hasn't been marked as
107 timed-out or broken then probably the checking algorithm is having problems with
108 that link. Mark it as timed-out and hope the user sorts it out.
109 */
110 if ( ($this->check_count >= 3) && ( !$this->timeout ) && ( $this->http_code == BLC_CHECKING ) ) {
111 $this->timeout = true;
112 $this->http_code = BLC_TIMEOUT;
113 $this->last_check = date('Y-m-d H:i:s');
114 $this->log .= "\r\n[A weird error was detected. This should never happen.]";
115 return false;
116 }
117
118 //Update the DB record before actually performing the check.
119 //Useful if something goes terribly wrong while checkint this particular URL.
120 //Note : might be unnecessary.
121 $this->check_count++;
122 $this->last_check = date('Y-m-d H:i:s');
123 $this->log = '';
124 $this->final_url = '';
125 $this->http_code = BLC_CHECKING;
126 $this->request_duration = 0;
127 $this->timeout = false;
128 $this->redirect_count = 0;
129 $this->save();
130
131 //Empty some variables before running the check
132 $this->last_headers = '';
133
134 //Save the URL into a local var; we'll need it later.
135 $url = $this->url;
136
137 $parts = parse_url($url);
138 //Only HTTP links are checked. All others are automatically considered okay.
139 if ( ($parts['scheme'] != 'http') && ($parts['scheme'] != 'https') ) {
140 $this->log .= "URL protocol ($parts[scheme]) is not HTTP(S). This link won't be checked.\n";
141 $this->http_code = 200;
142 return true;
143 }
144
145 //Kill the #anchor if it's present
146 $anchor_start = strpos($url, '#');
147 if ( $anchor_start !== false ){
148 $url = substr($url, 0, $anchor_start);
149 }
150
151 //******* Use CURL if available ***********
152 if ( function_exists('curl_init') ) {
153 $ch = curl_init();
154 curl_setopt($ch, CURLOPT_URL, blcUtility::urlencodefix($url));
155 //Masquerade as Internet explorer
156 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
157 //Add a semi-plausible referer header to avoid tripping up some bot traps
158 curl_setopt($ch, CURLOPT_REFERER, get_option('home'));
159
160 //Redirects don't work when safe mode or open_basedir is enabled.
161 if ( !blcUtility::is_safe_mode() && !blcUtility::is_open_basedir() ) {
162 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
163 }
164 curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
165
166 //Set the timeout
167 curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
168
169 //Set the proxy configuration. The user can provide this in wp-config.php
170 if (defined('WP_PROXY_HOST')) {
171 curl_setopt($ch, CURLOPT_PROXY, WP_PROXY_HOST);
172 }
173
174 if (defined('WP_PROXY_PORT')) {
175 curl_setopt($ch, CURLOPT_PROXYPORT, WP_PROXY_PORT);
176 }
177
178 if (defined('WP_PROXY_USERNAME')){
179 $auth = WP_PROXY_USERNAME;
180 if (defined('WP_PROXY_PASSWORD')){
181 $auth .= ':' . WP_PROXY_PASSWORD;
182 }
183 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $auth);
184 }
185
186 //Is this even necessary?
187 curl_setopt($ch, CURLOPT_FAILONERROR, false);
188
189 $nobody = false;
190 if( $parts['scheme'] == 'https' ){
191 //TODO: Redirects don't work with HTTPS
192 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
193 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
194 } else {
195 $nobody = true;
196 curl_setopt($ch, CURLOPT_NOBODY, true); //Use the HEAD method for non-https URLs
197 }
198
199 //Register a callback function which will process the HTTP header(s).
200 //It can be called multiple times if the remote server performs a redirect.
201 curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(&$this,'read_header'));
202
203 //Execute the request
204 curl_exec($ch);
205
206 $info = curl_getinfo($ch);
207 $code = intval( $info['http_code'] );
208
209 $this->log .= '=== ';
210
211 if ( $code ){
212 $this->log .= sprintf( __('First try : %d', 'broken-link-checker'), $code);
213 } else {
214 $this->log .= __('First try : 0 (No response)', 'broken-link-checker');
215 }
216
217 $this->log .= " ===\n\n";
218
219 $this->log .= $this->last_headers."\n";
220
221 if ( (($code<200) || ($code>=400)) && $nobody) {
222 $this->log .= __("Trying a second time with different settings...", 'broken-link-checker') . "\n";
223 $this->last_headers = '';
224
225 curl_setopt($ch, CURLOPT_NOBODY, false); //Don't send a HEAD request this time
226 curl_setopt($ch, CURLOPT_HTTPGET, true); //Switch back to GET instead.
227 curl_setopt($ch, CURLOPT_RANGE, '0-2047');//But limit the desired response size,
228 //we don't want to eat the user's bandwidth.
229 //Run it again
230 curl_exec($ch);
231
232 $info = curl_getinfo($ch);
233 $code = intval( $info['http_code'] );
234
235 $this->log .= '=== ';
236 if ( $code ){
237 $this->log .= sprintf( __('Second try : %d', 'broken-link-checker'), $code);
238 } else {
239 $this->log .= __('Second try : 0 (No response)', 'broken-link-checker');
240 }
241 $this->log .= " ===\n\n";
242
243 $this->log .= $this->last_headers."\n";
244 }
245
246 $this->http_code = $code != 0 ? $code : BLC_TIMEOUT;
247 $this->final_url = $info['url'];
248 $this->request_duration = $info['total_time'];
249 $this->redirect_count = $info['redirect_count'];
250
251 //When safe_mode or open_basedir is enabled CURL will be forbidden from following redirects,
252 //so redirect_count will be 0 for all URLs. As a workaround, set it to 1 when the HTTP
253 //response codes indicates a redirect but redirect_count is zero.
254 //Note to self : Extracting the Location header might also be helpful.
255 if ( ($this->redirect_count == 0) && ( in_array( $this->http_code, array(301, 302, 307) ) ) ){
256 $this->redirect_count = 1;
257 }
258
259
260 curl_close($ch);
261
262 } elseif ( class_exists('Snoopy') ) {
263 //******** Use Snoopy if CURL is not available *********
264 //Note : Snoopy doesn't work too well with HTTPS URLs.
265 $this->log .= "<em>(" . __('Using Snoopy', 'broken-link-checker') . ")</em>\n";
266
267 $start_time = microtime_float(true);
268
269 $snoopy = new Snoopy;
270 $snoopy->read_timeout = $timeout; //read timeout in seconds
271 $snoopy->maxlength = 1024*5; //load up to 5 kilobytes
272 $snoopy->fetch($url);
273
274 $this->request_duration = microtime_float(true) - $start_time;
275
276 $this->http_code = $snoopy->status; //HTTP status code (note : Snoopy returns -100 on timeout)
277 if ( $this->http_code == -100 ){
278 $this->http_code = BLC_TIMEOUT;
279 $this->timeout = true;
280 }
281
282 if ($snoopy->error)
283 $this->log .= $snoopy->error."\n";
284 if ($snoopy->timed_out)
285 $this->log .= __("Request timed out.", 'broken-link-checker') . "\n";
286
287 if ( is_array($snoopy->headers) )
288 $this->log .= implode("", $snoopy->headers)."\n"; //those headers already contain newlines
289
290 //Redirected?
291 if ( $snoopy->lastredirectaddr ) {
292 $this->final_url = $snoopy->lastredirectaddr;
293 $this->redirect_count = $snoopy->_redirectdepth;
294 } else {
295 $this->final_url = $this->url;
296 }
297 }
298
299 /*"Good" response codes are anything in the 2XX range (e.g "200 OK") and redirects - the 3XX range.
300 HTTP 401 Unauthorized is a special case that is considered OK as well. Other errors - the 4XX range -
301 are treated as "page doesn't exist'". */
302 //TODO: Treat circular redirects as broken links.
303 if ( (($this->http_code>=200) && ($this->http_code<400)) || ($this->http_code == 401) ) {
304 $this->log .= __("Link is valid.", 'broken-link-checker');
305 //Reset the check count for valid links.
306 $this->check_count = 0;
307 return true;
308 } else {
309 $this->log .= __("Link is broken.", 'broken-link-checker');
310 if ( $this->http_code == BLC_TIMEOUT ){
311 //This is probably a timeout
312 $this->timeout = true;
313 $this->log .= "\r\n(" . __("Most likely the connection timed out or the domain doesn't exist.", 'broken-link-checker') . ')';
314 }
315 return false;
316 }
317 }
318
319 function read_header($ch, $header){
320 $this->last_headers .= $header;
321 return strlen($header);
322 }
323
324 /**
325 * blcLink::save()
326 * Save link data to DB.
327 *
328 * @return bool True if saved successfully, false otherwise.
329 */
330 function save(){
331 global $wpdb;
332
333 if ( !$this->valid() ) return false;
334
335 if ( $this->is_new ){
336
337 //Insert a new row
338 $q = "
339 INSERT INTO {$wpdb->prefix}blc_links
340 ( url, last_check, check_count, final_url, redirect_count, log, http_code, request_duration, timeout )
341 VALUES( %s, %s, %d, %s, %d, %s, %d, %f, %d )";
342 $q = $wpdb->prepare($q, $this->url, $this->last_check, $this->check_count, $this->final_url,
343 $this->redirect_count, $this->log, $this->http_code, $this->request_duration, (integer)$this->timeout );
344 $rez = $wpdb->query($q);
345
346 $rez = $rez !== false;
347
348 if ($rez){
349 $this->link_id = $wpdb->insert_id;
350 //echo "Link added, ID : {$this->link_id}\r\n<br>";
351 //If the link was successfully saved then it's no longer "new"
352 $this->is_new = !$rez;
353 } else {
354 printf( __('Error adding link %s : %s', 'broken-link-checker'), $url, $wpdb->last_error );
355 echo "\r\n<br>";
356 }
357
358 return $rez;
359
360 } else {
361
362 //Update an existing DB record
363 $q = "UPDATE {$wpdb->prefix}blc_links SET url=%s, last_check=%s, check_count=%d, final_url=%s,
364 redirect_count=%d, log=%s, http_code=%d, request_duration=%f, timeout=%d
365 WHERE link_id=%d";
366
367 $q = $wpdb->prepare($q, $this->url, $this->last_check, $this->check_count, $this->final_url,
368 $this->redirect_count, $this->log, $this->http_code, $this->request_duration, (integer)$this->timeout, $this->link_id );
369
370 $rez = $wpdb->query($q);
371 if ( $rez !== false ){
372 //echo "Link updated, ID : {$this->link_id}\r\n<br>";
373 } else {
374 printf( __('Error updating link %d : %s', 'broken-link-checker'), $this->link_id, $wpdb->last_error );
375 echo "\r\n<br>";
376 }
377 return $rez !== false;
378 }
379 }
380
381 /**
382 * blcLink::edit()
383 * Edit all instances of the link by changing the URL.
384 *
385 * Here's how this really works : create a new link with the new URL. Then edit()
386 * all instances and point them to the new link record. If some instance can't be
387 * edited they will still point to the old record. The old record is deleted
388 * if all instances were edited successfully.
389 *
390 * @param string $new_url
391 * @return array An associative array with the new link ID, the number of successfully edited instances and the number of failed edits.
392 */
393 function edit($new_url){
394 if ( !$this->valid() ){
395 return false;
396 }
397
398 //FB::info('Changing link '.$this->link_id .' to URL "'.$new_url.'"');
399
400 $instances = $this->get_instances();
401 //Fail if there are no instances
402 if (empty($instances)) return false;
403
404 //Load or create a link with the URL = $new_url
405 $new_link = new blcLink($new_url);
406 $was_new = $new_link->is_new;
407 if ($new_link->is_new) {
408 //FB::log($new_link, 'Saving a new link');
409 $new_link->save(); //so that we get a valid link_id
410 }
411
412 if ( empty($new_link->link_id) ){
413 //FB::error("Failed to create a new link record");
414 return false;
415 }
416
417 //Edit each instance.
418 //FB::info('Editing ' . count($instances) . ' instances');
419 $cnt_okay = $cnt_error = 0;
420 foreach ( $instances as $instance ){
421 if ( $instance->edit( $this->url, $new_url ) ){
422 $cnt_okay++;
423 $instance->link_id = $new_link->link_id;
424 $instance->save();
425 //FB::info($instance, 'Successfully edited instance ' . $instance->instance_id);
426 } else {
427 $cnt_error++;
428 //FB::error($instance, 'Failed to edit instance ' . $instance->instance_id);
429 }
430 }
431
432 //If all instances were edited successfully we can delete the old link record.
433 //And copy the new link data into this object. UNLESS this link is equal to the new link
434 //(which should never happen, but whatever)
435 if ( ( $cnt_error == 0 ) && ( $cnt_okay > 0 ) && ( $this->link_id != $new_link->link_id ) ){
436 $this->forget( false );
437
438 $this->link_id = $new_link->link_id;
439 $this->url = $new_link->url;
440 $this->final_url = $new_link->url;
441 $this->log = $new_link->log;
442 $this->http_code = $new_link->http_code;
443 $this->redirect_count = $new_link->redirect_count;
444 $this->timeout = $new_link->timeout;
445 }
446
447 //On the other hand, if no instances could be edited and the $new_link was really new,
448 //then delete it.
449 if ( ( $cnt_okay == 0 ) && $was_new ){
450 $new_link->forget( false );
451 }
452
453 return array(
454 'new_link_id' => $this->link_id,
455 'cnt_okay' => $cnt_okay,
456 'cnt_error' => $cnt_error,
457 );
458 }
459
460 //Delete (unlink) all instances and the link itself
461 function unlink(){
462 if ( !$this->valid() ){
463 return false;
464 }
465
466 //FB::info($this, 'Removing link');
467
468 $instances = $this->get_instances();
469 //Fail if there are no instances
470 if (empty($instances)) {
471 //FB::warn("This link has no instances. Deleting the link.");
472 return $this->forget( false ) !== false;
473 }
474
475 //Unlink each instance.
476 //FB::info('Unlinking ' . count($instances) . ' instances');
477 $cnt_okay = $cnt_error = 0;
478 foreach ( $instances as $instance ){
479 if ( $instance->unlink( $this->url ) ){
480 $cnt_okay++;
481 //FB::info( $instance, 'Successfully unlinked instance' );
482 } else {
483 $cnt_error++;
484 //FB::error( $instance, 'Failed to unlink instance' );
485 }
486 }
487
488 //If all instances were unlinked successfully we can delete the link record.
489 if ( ( $cnt_error == 0 ) && ( $cnt_okay > 0 ) ){
490 //FB::log('Instances removed, deleting the link.');
491 return $this->forget() !== false;
492 } else {
493 //FB::error("Something went wrong. Unlinked instances : $cnt_okay, errors : $cnt_error");
494 return false;
495 }
496 }
497
498 /**
499 * blcLink::forget()
500 * Remove the link and instance records from the DB. Doesn't alter posts/etc.
501 *
502 * @return mixed 1 on success, 0 if link not found, false on error.
503 */
504 function forget($remove_instances = true){
505 global $wpdb;
506 if ( !$this->valid() ) return false;
507
508 if ( !empty($this->link_id) ){
509 //FB::info($this, 'Deleting link from DB');
510
511 if ( $remove_instances ){
512 //Remove instances, if any
513 $wpdb->query( $wpdb->prepare("DELETE FROM {$wpdb->prefix}blc_instances WHERE link_id=%d", $this->link_id) );
514 }
515
516 //Remove the link itself
517 $rez = $wpdb->query( $wpdb->prepare("DELETE FROM {$wpdb->prefix}blc_links WHERE link_id=%d", $this->link_id) );
518 $this->link_id = 0;
519
520 return $rez;
521 } else {
522 return false;
523 }
524
525 }
526
527 /**
528 * blcLink::get_instances()
529 * Get a list of the link's instances
530 *
531 * @param integer $max_count The maximum number of instances to return. The default is -1 (no limit)
532 * @return array An array of instance objects or FALSE on failure.
533 */
534 function get_instances($max_count = -1){
535 global $wpdb;
536 if ( !$this->valid() || empty($this->link_id) ) return false;
537
538 $limit = $max_count > 0 ? "LIMIT $max_count":'';
539
540 //Get all instances of this link
541 $q = $wpdb->prepare("SELECT * FROM {$wpdb->prefix}blc_instances WHERE link_id=%d $limit", $this->link_id);
542 $results = $wpdb->get_results($q, ARRAY_A);
543
544 if ( !empty($results) ) {
545 //Create an object for each instance
546 $instances = array();
547 foreach ($results as $result){
548 //Each source/link type combination has it's own subclass. E.g. _post_image or _blogroll_link.
549 $classname = 'blcLinkInstance_' . $result['source_type'] . '_' . $result['instance_type'];
550 $instances[] = new $classname($result);
551 }
552 return $instances;
553 } else {
554 return false;
555 }
556 }
557
558 /**
559 * blcLink::add_instance()
560 * Record a new instance of the link.
561 *
562 * @param int $source_id
563 * @param string $source_type
564 * @param string $link_text
565 * @param string $instance_type
566 * @return object The created instance or FALSE on error.
567 */
568 function add_instance($source_id, $source_type, $link_text, $instance_type){
569
570 //The link must be saved before an instance can be added
571 if ($this->is_new) {
572 if ( !$this->save()) return false;
573 }
574
575 //Create a new instance tied to this link
576 $classname = 'blcLinkInstance_' . $source_type . '_' . $instance_type;
577 if ( !class_exists($classname) ){
578 $classname = 'blcLinkInstance';
579 }
580 $inst = new $classname( array(
581 'link_id' => $this->link_id,
582 'source_id' => $source_id,
583 'source_type' => $source_type,
584 'link_text' => $link_text,
585 'instance_type' => $instance_type,
586 ) );
587
588 //Save the instance to the DB
589 if ( $inst->save() ){
590 return $inst;
591 } else {
592 return false;
593 };
594 }
595 }
596 } //class_exists
597
598 ?>