PluginProbe ʕ •ᴥ•ʔ
Broken Link Checker / 0.5
Broken Link Checker v0.5
1.5.4 1.5.5 1.6 1.6.1 1.6.2 1.7 1.7.1 1.8 1.8.1 1.8.2 1.8.3 1.9 1.9.1 1.9.2 1.9.3 1.9.4 1.9.4.1 1.9.4.2 1.9.5 2.0.0 2.1.0 2.2.0 2.2.1 2.2.2 2.2.3 2.2.4 2.3.0 2.3.1 2.4.0 2.4.1 2.4.2 2.4.3 2.4.4 2.4.5 2.4.6 2.4.7 2.4.8 0.9.4 0.9.4.1 0.9.4.2 0.9.4.3 0.9.4.4 0.9.4.4-last-non-modular 0.9.5 0.9.6 0.9.7 0.9.7.1 0.9.7.2 1.10 1.10.1 1.10.10 1.10.11 1.10.2 1.10.3 1.10.4 1.10.5 1.10.6 1.10.7 1.10.8 1.10.9 1.11.1 1.11.10 1.11.11 1.11.12 1.11.13 1.11.14 1.11.15 1.11.17 1.11.18 1.11.19 1.11.2 1.11.20 1.11.21 1.11.3 1.11.4 1.11.5 1.11.8 1.11.9 1.2.2 1.2.3 1.2.4 1.2.5 1.3 1.3.1 1.4 1.5 1.5.1 1.5.2 1.5.3 trunk 0.1 0.2 0.2.2 0.2.2.1 0.2.3 0.2.4 0.2.5 0.3 0.3.1 0.3.2 0.3.3 0.3.4 0.3.5 0.3.6 0.3.7 0.3.8 0.3.9 0.4 0.4-i8n 0.4.1 0.4.10 0.4.11 0.4.12 0.4.13 0.4.14 0.4.2 0.4.3 0.4.4 0.4.5 0.4.6 0.4.7 0.4.8 0.4.9 0.5 0.5.1 0.5.10 0.5.10.1 0.5.11 0.5.12 0.5.13 0.5.14 0.5.15 0.5.16 0.5.16.1 0.5.17 0.5.18 0.5.2 0.5.3 0.5.4 0.5.5 0.5.6 0.5.7 0.5.8 0.5.8.1 0.5.9 0.6 0.6.1 0.6.2 0.6.3 0.6.4 0.6.5 0.7 0.7.1 0.7.2 0.7.3 0.7.4 0.8 0.8.1 0.9 0.9.1 0.9.2 0.9.3
broken-link-checker / link-classes.php
broken-link-checker Last commit date
images 17 years ago JSON.php 17 years ago broken-link-checker.php 17 years ago instance-classes.php 17 years ago link-classes.php 17 years ago readme.txt 17 years ago uninstall.php 17 years ago utility-class.php 17 years ago wsblc_ajax.php 17 years ago
link-classes.php
541 lines
1 <?php
2
3 /**
4 * @author W-Shadow
5 * @copyright 2009
6 */
7
8 if (!class_exists('blcLink')){
9 class blcLink {
10
11 //Object state
12 var $is_new = false;
13 var $last_headers = '';
14 var $meets_check_threshold = false; //currently unused
15
16 //DB fields
17 var $link_id = 0;
18 var $url = '';
19 var $last_check='0000-00-00 00:00:00';
20 var $check_count = 0;
21 var $final_url = '';
22 var $log = '';
23 var $http_code = 0;
24 var $request_duration = 0;
25 var $timeout = false;
26 var $redirect_count = 0;
27
28 function __construct($arg = null){
29 global $wpdb;
30
31 if (is_int($arg)){
32 //Load a link with ID = $arg from the DB.
33 $q = $wpdb->prepare("SELECT * FROM {$wpdb->prefix}blc_links WHERE link_id=%d LIMIT 1", $arg);
34 $arr = $wpdb->get_row( $q, ARRAY_A );
35
36 if ( is_array($arr) ){ //Loaded successfully
37 $this->set_values($arr);
38 } else {
39 //Link not found. The object is invalid.
40 //I'd throw an error, but that wouldn't be PHP 4 compatible...
41 }
42
43 } else if (is_string($arg)){
44 //Load a link with URL = $arg from the DB. Create a new one if the record isn't found.
45 $q = $wpdb->prepare("SELECT * FROM {$wpdb->prefix}blc_links WHERE url=%s LIMIT 1", $arg);
46 $arr = $wpdb->get_row( $q, ARRAY_A );
47
48 if ( is_array($arr) ){ //Loaded successfully
49 $this->set_values($arr);
50 } else { //Link not found, treat as new
51 $this->url = $arg;
52 $this->is_new = true;
53 }
54
55 } else if (is_array($arg)){
56 $this->set_values($arg);
57 //Is this a new link?
58 $this->is_new = empty($this->link_id);
59 } else {
60 $this->is_new = true;
61 }
62 }
63
64 function blcLink($arg = null){
65 $this->__construct($arg);
66 }
67
68 /**
69 * blcLink::set_values()
70 * Set the internal values to the ones provided in an array (doesn't sanitize).
71 *
72 * @param array $arr An associative array of values
73 * @return void
74 */
75 function set_values($arr){
76 foreach( $arr as $key => $value ){
77 $this->$key = $value;
78 }
79 }
80
81 /**
82 * blcLink::valid()
83 * Verifies whether the object represents a valid link
84 *
85 * @return bool
86 */
87 function valid(){
88 return !empty( $this->url ) && ( !empty($this->link_id) || $this->is_new );
89 }
90
91 /**
92 * blcLink::check()
93 * Check if the link is working.
94 *
95 * @return bool
96 */
97 function check(){
98 if ( !$this->valid() ) return false;
99 /*
100 Check for problematic (though not necessarily "broken") links.
101 If a link has been checked multiple times and still hasn't been marked as
102 timed-out or broken then probably the checking algorithm is having problems with
103 that link. Mark it as timed-out and hope the user sorts it out.
104 */
105 if ( ($this->check_count >= 3) && ( !$this->timeout ) && ( !$this->http_code ) ) {
106 $this->timeout = 1;
107 $this->last_check = date('Y-m-d H:i:s');
108 $this->log .= "\r\n[A weird error was detected. This should never happen.]";
109 $this->save();
110 return false;
111 }
112
113 //Update the DB record before actually performing the check.
114 //Useful if something goes terribly wrong while checkint this particular URL.
115 //Note : might be unnecessary.
116 $this->check_count++;
117 $this->last_check = date('Y-m-d H:i:s');
118 $this->log = '';
119 $this->final_url = '';
120 $this->http_code = 0;
121 $this->request_duration = 0;
122 $this->timeout = false;
123 $this->redirect_count = 0;
124 $this->save();
125
126 //Empty some variables before running the check
127 $this->last_headers = '';
128
129 //Save the URL into a local var; we'll need it later.
130 $url = $this->url;
131
132 $parts = parse_url($url);
133 //Only HTTP links are checked. All others are automatically considered okay.
134 if ( ($parts['scheme'] != 'http') && ($parts['scheme'] != 'https') ) {
135 $this->log .= "URL protocol ($parts[scheme]) is not HTTP. This link won't be checked.\n";
136 return true;
137 }
138
139 //Kill the #anchor if it's present
140 $anchor_start = strpos($url, '#');
141 if ( $anchor_start !== false ){
142 $url = substr($url, 0, $anchor_start);
143 }
144
145 //******* Use CURL if available ***********
146 if (function_exists('curl_init')) {
147 $ch = curl_init();
148 curl_setopt($ch, CURLOPT_URL, $url);
149 //Masquerade as Internet explorer
150 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
151 //Add a semi-plausible referer header to avoid tripping up some bot traps
152 curl_setopt($ch, CURLOPT_REFERER, get_option('home'));
153
154 curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
155
156 @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
157 curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
158
159 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 20);
160 curl_setopt($ch, CURLOPT_TIMEOUT, 30);
161
162 curl_setopt($ch, CURLOPT_FAILONERROR, false);
163
164 $nobody=false;
165 if($parts['scheme']=='https'){
166 //TODO: Redirects don't work with HTTPS
167 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
168 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
169 } else {
170 $nobody=true;
171 curl_setopt($ch, CURLOPT_NOBODY, true);
172 //curl_setopt($ch, CURLOPT_RANGE, '0-1023');
173 }
174
175 //We definitely want headers.
176 curl_setopt($ch, CURLOPT_HEADER, true);
177 //register a callback function which will process the headers
178 //this assumes your code is into a class method, and uses $this->readHeader
179 //as the callback function.
180 curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(&$this,'read_header'));
181
182 //Execute the request
183 $response = curl_exec($ch);
184
185 $info = curl_getinfo($ch);
186 $code = intval( $info['http_code'] );
187
188 $this->log .= "=== First try : $code ".(!$code?'(No response) ':'')."===\n\n";
189 $this->log .= $this->last_headers."\n";
190
191 if ( (($code<200) || ($code>=400)) && $nobody) {
192 $this->log .= "Trying a second time with different settings...\n";
193 $this->last_headers = '';
194
195 curl_setopt($ch, CURLOPT_NOBODY, false);
196 curl_setopt($ch, CURLOPT_HTTPGET, true);
197 curl_setopt($ch, CURLOPT_RANGE, '0-2047');
198 $response = curl_exec($ch);
199
200 $info = curl_getinfo($ch);
201 $code = intval( $info['http_code'] );
202
203 $this->log .= "=== Second try : $code ".(!$code?'(No response) ':'')."===\n\n";
204 $this->log .= $this->last_headers."\n";
205 }
206
207 $this->http_code = $code;
208 $this->final_url = $info['url'];
209 $this->request_duration = $info['total_time'];
210 $this->redirect_count = $info['redirect_count'];
211
212 curl_close($ch);
213
214 } elseif (class_exists('Snoopy')) {
215 //******** Use Snoopy if CURL is not available *********
216 //Note : Snoopy doesn't work too well with HTTPS URLs.
217 $this->log .= "<em>(Using Snoopy)</em>\n";
218
219 $start_time = microtime_float(true);
220
221 $snoopy = new Snoopy;
222 $snoopy->read_timeout = 60; //read timeout in seconds
223 $snoopy->fetch($url);
224
225 $this->request_duration = $start_time - microtime_float(true);
226
227 $this->http_code = $snoopy->status; //HTTP status code
228
229 if ($snoopy->error)
230 $this->log .= $snoopy->error."\n";
231 if ($snoopy->timed_out)
232 $this->log .= "Request timed out.\n";
233
234 if ( is_array($snoopy->headers) )
235 $this->log .= implode("", $snoopy->headers)."\n"; //those headers already contain newlines
236
237 if ($snoopy->lastredirectaddr) {
238 $this->final_url = $snoopy->lastredirectaddr;
239 $this->redirect_count = $snoopy->_redirectdepth;
240 }
241 }
242
243 /*"Good" response codes are anything in the 2XX range (e.g "200 OK") and redirects - the 3XX range.
244 HTTP 401 Unauthorized is a special case that is considered OK as well. Other errors - the 4XX range -
245 are treated as "page doesn't exist'". */
246 //TODO: Treat circular redirects as broken links.
247 if ( (($this->http_code>=200) && ($this->http_code<400)) || ($this->http_code == 401) ) {
248 $this->log .= "Link is valid.";
249 //Reset the check count for valid links.
250 $this->check_count = 0;
251 return true;
252 } else {
253 $this->log .= "Link is broken.";
254 if ($this->http_code == 0){
255 //This is probably a timeout
256 $this->timeout = true;
257 $this->log .= "\r\n(Most likely the connection timed out)";
258 }
259 return false;
260 }
261 }
262
263 function read_header($ch, $header){
264 //extracting example data: filename from header field Content-Disposition
265 $this->last_headers .= $header;
266 return strlen($header);
267 }
268
269 /**
270 * blcLink::save()
271 * Save link data to DB.
272 *
273 * @return bool True if saved successfully, false otherwise.
274 */
275 function save(){
276 global $wpdb;
277
278 if ( !$this->valid() ) return false;
279
280 if ( $this->is_new ){
281
282 //Insert a new row
283 $q = "
284 INSERT INTO {$wpdb->prefix}blc_links
285 ( url, last_check, check_count, final_url, redirect_count, log, http_code, request_duration, timeout )
286 VALUES( %s, %s, %d, %s, %d, %s, %d, %f, %d )";
287 $q = $wpdb->prepare($q, $this->url, $this->last_check, $this->check_count, $this->final_url,
288 $this->redirect_count, $this->log, $this->http_code, $this->request_duration, (integer)$this->timeout );
289 $rez = $wpdb->query($q);
290
291 $rez = $rez !== false;
292
293 if ($rez){
294 $this->link_id = $wpdb->insert_id;
295 //echo "Link added, ID : {$this->link_id}\r\n<br>";
296 //If the link was successfully saved then it's no longer "new"
297 $this->is_new = !$rez;
298 } else {
299 echo "Error adding link $url : {$wpdb->last_error}\r\n<br>";
300 }
301
302 return $rez;
303
304 } else {
305
306 //Update an existing DB record
307 $q = "UPDATE {$wpdb->prefix}blc_links SET url=%s, last_check=%s, check_count=%d, final_url=%s,
308 redirect_count=%d, log=%s, http_code=%d, request_duration=%f, timeout=%d
309 WHERE link_id=%d";
310
311 $q = $wpdb->prepare($q, $this->url, $this->last_check, $this->check_count, $this->final_url,
312 $this->redirect_count, $this->log, $this->http_code, $this->request_duration, (integer)$this->timeout, $this->link_id );
313
314 $rez = $wpdb->query($q);
315 if ( $rez !== false ){
316 //echo "Link updated, ID : {$this->link_id}\r\n<br>";
317 } else {
318 echo "Error updating link {$this->link_id} : {$wpdb->last_error}\r\n<br>";
319 }
320 return $rez !== false;
321 }
322 }
323
324 /**
325 * blcLink::edit()
326 * Edit all instances of the link by changing the URL.
327 *
328 * Here's how this really works : create a new link with the new URL. Then edit()
329 * all instances and point them to the new link record. If some instance can't be
330 * edited they will still point to the old record. The old record is deleted
331 * if all instances were edited successfully.
332 *
333 * @param string $new_url
334 * @return array An associative array with the new link ID, the number of successfully edited instances and the number of failed edits.
335 */
336 function edit($new_url){
337 if ( !$this->valid() ){
338 return false;
339 }
340
341 //FB::info('Changing link '.$this->link_id .' to URL "'.$new_url.'"');
342
343 $instances = $this->get_instances();
344 //Fail if there are no instances
345 if (empty($instances)) return false;
346
347 //Load or create a link with the URL = $new_url
348 $new_link = new blcLink($new_url);
349 $was_new = $new_link->is_new;
350 if ($new_link->is_new) {
351 //FB::log($new_link, 'Saving a new link');
352 $new_link->save(); //so that we get a valid link_id
353 }
354
355 if ( empty($new_link->link_id) ){
356 //FB::error("Failed to create a new link record");
357 return false;
358 }
359
360 //Edit each instance.
361 //FB::info('Editing ' . count($instances) . ' instances');
362 $cnt_okay = $cnt_error = 0;
363 foreach ( $instances as $instance ){
364 if ( $instance->edit( $this->url, $new_url ) ){
365 $cnt_okay++;
366 $instance->link_id = $new_link->link_id;
367 $instance->save();
368 //FB::info($instance, 'Successfully edited instance ' . $instance->instance_id);
369 } else {
370 $cnt_error++;
371 //FB::error($instance, 'Failed to edit instance ' . $instance->instance_id);
372 }
373 }
374
375 //If all instances were edited successfully we can delete the old link record.
376 //And copy the new link data into this object. UNLESS this link is equal to the new link
377 //(which should never happen, but whatever)
378 if ( ( $cnt_error == 0 ) && ( $cnt_okay > 0 ) && ( $this->link_id != $new_link->link_id ) ){
379 $this->forget( false );
380
381 $this->link_id = $new_link->link_id;
382 $this->url = $new_link->url;
383 $this->final_url = $new_link->url;
384 $this->log = $new_link->log;
385 $this->http_code = $new_link->http_code;
386 $this->redirect_count = $new_link->redirect_count;
387 $this->timeout = $new_link->timeout;
388 }
389
390 //On the other hand, if no instances could be edited and the $new_link was really new,
391 //then delete it.
392 if ( ( $cnt_okay == 0 ) && $was_new ){
393 $new_link->forget( false );
394 }
395
396 return array(
397 'new_link_id' => $this->link_id,
398 'cnt_okay' => $cnt_okay,
399 'cnt_error' => $cnt_error,
400 );
401 }
402
403 //Delete (unlink) all instances and the link itself
404 function unlink(){
405 if ( !$this->valid() ){
406 return false;
407 }
408
409 //FB::info($this, 'Removing link');
410
411 $instances = $this->get_instances();
412 //Fail if there are no instances
413 if (empty($instances)) {
414 //FB::warn("This link has no instances. Deleting the link.");
415 return $this->forget( false ) !== false;
416 }
417
418 //Unlink each instance.
419 //FB::info('Unlinking ' . count($instances) . ' instances');
420 $cnt_okay = $cnt_error = 0;
421 foreach ( $instances as $instance ){
422 if ( $instance->unlink( $this->url ) ){
423 $cnt_okay++;
424 //FB::info( $instance, 'Successfully unlinked instance' );
425 } else {
426 $cnt_error++;
427 //FB::error( $instance, 'Failed to unlink instance' );
428 }
429 }
430
431 //If all instances were unlinked successfully we can delete the link record.
432 if ( ( $cnt_error == 0 ) && ( $cnt_okay > 0 ) ){
433 //FB::log('Instances removed, deleting the link.');
434 return $this->forget() !== false;
435 } else {
436 //FB::error("Something went wrong. Unlinked instances : $cnt_okay, errors : $cnt_error");
437 return false;
438 }
439 }
440
441 /**
442 * blcLink::forget()
443 * Remove the link and instance records from the DB. Doesn't alter posts/etc.
444 *
445 * @return mixed 1 on success, 0 if link not found, false on error.
446 */
447 function forget($remove_instances = true){
448 global $wpdb;
449 if ( !$this->valid() ) return false;
450
451 if ( !empty($this->link_id) ){
452 //FB::info($this, 'Deleting link from DB');
453
454 if ( $remove_instances ){
455 //Remove instances, if any
456 $wpdb->query( $wpdb->prepare("DELETE FROM {$wpdb->prefix}blc_instances WHERE link_id=%d", $this->link_id) );
457 }
458
459 //Remove the link itself
460 $rez = $wpdb->query( $wpdb->prepare("DELETE FROM {$wpdb->prefix}blc_links WHERE link_id=%d", $this->link_id) );
461 $this->link_id = 0;
462
463 return $rez;
464 } else {
465 return false;
466 }
467
468 }
469
470 /**
471 * blcLink::get_instances()
472 * Get a list of the link's instances
473 *
474 * @param integer $max_count The maximum number of instances to return. The default is -1 (no limit)
475 * @return array An array of instance objects or FALSE on failure.
476 */
477 function get_instances($max_count = -1){
478 global $wpdb;
479 if ( !$this->valid() || empty($this->link_id) ) return false;
480
481 $limit = $max_count > 0 ? "LIMIT $max_count":'';
482
483 //Get all instances of this link
484 $q = $wpdb->prepare("SELECT * FROM {$wpdb->prefix}blc_instances WHERE link_id=%d $limit", $this->link_id);
485 $results = $wpdb->get_results($q, ARRAY_A);
486
487 if ( !empty($results) ) {
488 //Create an object for each instance
489 $instances = array();
490 foreach ($results as $result){
491 //Each source/link type combination has it's own subclass. E.g. _post_image or _blogroll_link.
492 $classname = 'blcLinkInstance_' . $result['source_type'] . '_' . $result['instance_type'];
493 $instances[] = new $classname($result);
494 }
495 return $instances;
496 } else {
497 return false;
498 }
499 }
500
501 /**
502 * blcLink::add_instance()
503 * Record a new instance of the link.
504 *
505 * @param int $source_id
506 * @param string $source_type
507 * @param string $link_text
508 * @param string $instance_type
509 * @return object The created instance or FALSE on error.
510 */
511 function add_instance($source_id, $source_type, $link_text, $instance_type){
512
513 //The link must be saved before an instance can be added
514 if ($this->is_new) {
515 if ( !$this->save()) return false;
516 }
517
518 //Create a new instance tied to this link
519 $classname = 'blcLinkInstance_' . $source_type . '_' . $instance_type;
520 if ( !class_exists($classname) ){
521 $classname = 'blcLinkInstance';
522 }
523 $inst = new $classname( array(
524 'link_id' => $this->link_id,
525 'source_id' => $source_id,
526 'source_type' => $source_type,
527 'link_text' => $link_text,
528 'instance_type' => $instance_type,
529 ) );
530
531 //Save the instance to the DB
532 if ( $inst->save() ){
533 return $inst;
534 } else {
535 return false;
536 };
537 }
538 }
539 } //class_exists
540
541 ?>