PluginProbe ʕ •ᴥ•ʔ
Broken Link Checker / 0.2.4
Broken Link Checker v0.2.4
1.5.4 1.5.5 1.6 1.6.1 1.6.2 1.7 1.7.1 1.8 1.8.1 1.8.2 1.8.3 1.9 1.9.1 1.9.2 1.9.3 1.9.4 1.9.4.1 1.9.4.2 1.9.5 2.0.0 2.1.0 2.2.0 2.2.1 2.2.2 2.2.3 2.2.4 2.3.0 2.3.1 2.4.0 2.4.1 2.4.2 2.4.3 2.4.4 2.4.5 2.4.6 2.4.7 2.4.8 0.9.4 0.9.4.1 0.9.4.2 0.9.4.3 0.9.4.4 0.9.4.4-last-non-modular 0.9.5 0.9.6 0.9.7 0.9.7.1 0.9.7.2 1.10 1.10.1 1.10.10 1.10.11 1.10.2 1.10.3 1.10.4 1.10.5 1.10.6 1.10.7 1.10.8 1.10.9 1.11.1 1.11.10 1.11.11 1.11.12 1.11.13 1.11.14 1.11.15 1.11.17 1.11.18 1.11.19 1.11.2 1.11.20 1.11.21 1.11.3 1.11.4 1.11.5 1.11.8 1.11.9 1.2.2 1.2.3 1.2.4 1.2.5 1.3 1.3.1 1.4 1.5 1.5.1 1.5.2 1.5.3 trunk 0.1 0.2 0.2.2 0.2.2.1 0.2.3 0.2.4 0.2.5 0.3 0.3.1 0.3.2 0.3.3 0.3.4 0.3.5 0.3.6 0.3.7 0.3.8 0.3.9 0.4 0.4-i8n 0.4.1 0.4.10 0.4.11 0.4.12 0.4.13 0.4.14 0.4.2 0.4.3 0.4.4 0.4.5 0.4.6 0.4.7 0.4.8 0.4.9 0.5 0.5.1 0.5.10 0.5.10.1 0.5.11 0.5.12 0.5.13 0.5.14 0.5.15 0.5.16 0.5.16.1 0.5.17 0.5.18 0.5.2 0.5.3 0.5.4 0.5.5 0.5.6 0.5.7 0.5.8 0.5.8.1 0.5.9 0.6 0.6.1 0.6.2 0.6.3 0.6.4 0.6.5 0.7 0.7.1 0.7.2 0.7.3 0.7.4 0.8 0.8.1 0.9 0.9.1 0.9.2 0.9.3
broken-link-checker / wsblc_ajax.php
broken-link-checker Last commit date
broken-link-checker.php 18 years ago readme.txt 18 years ago wsblc_ajax.php 18 years ago
wsblc_ajax.php
324 lines
1 <?php
2 /*
3 The AJAX-y part of the link checker.
4 */
5 require_once("../../../wp-config.php");
6 require_once("../../../wp-includes/wp-db.php");
7
8 //error_reporting(E_ALL);
9
10 $execution_start_time=microtime(true);
11
12 function execution_time(){
13 global $execution_start_time;
14 return microtime(true)-$execution_start_time;
15 }
16
17
18 if(!is_object($ws_link_checker)) {
19 die('Fatal error : undefined object; plugin may not be active.');
20 };
21
22 $url_pattern='/(<a[\s]+[^>]*href\s*=\s*[\"\']?)([^\'\" >]+)([\'\"]+[^<>]*>)((?sU).*)(<\/a>)/i';
23
24 $postdata_name=$wpdb->prefix . "blc_postdata";
25 $linkdata_name=$wpdb->prefix . "blc_linkdata";
26
27 $options=$ws_link_checker->options; //get_option('wsblc_options');
28 $siteurl=get_option('siteurl');
29 $max_execution_time=isset($options['max_work_session'])?intval($options['max_work_session']):27;
30
31 // Check for safe mode
32 if( ini_get('safe_mode') ){
33 // Do it the safe mode way
34 $t=ini_get('max_execution_time');
35 if ($t && ($t < $max_execution_time))
36 $max_execution_time = $t-1;
37 } else {
38 // Do it the regular way
39 @set_time_limit(0);
40 }
41 @ignore_user_abort(true);
42
43 $check_treshold=date('Y-m-d H:i:s', strtotime('-'.$options['check_treshold'].' hours'));
44 $recheck_treshold=date('Y-m-d H:i:s', strtotime('-20 minutes'));
45
46 $action=isset($_GET['action'])?$_GET['action']:'run_check';
47
48 if($action=='dashboard_status'){
49 /* displays a notification if broken links have been found */
50 $sql="SELECT count(*) FROM $linkdata_name WHERE broken=1";
51 $broken_links=$wpdb->get_var($sql);
52 if($broken_links>0){
53 echo "<div>
54 <h3>Broken Links</h3>
55 <p><a href='".get_option('siteurl')."/wp-admin/edit.php?page=".
56 $ws_link_checker->mybasename."' title='View broken links'>Found $broken_links broken links</a></p>
57 </div>";
58 };
59
60 } else if($action=='full_status'){
61 /* give some stats about the current situation */
62 $sql="SELECT count(*) FROM $postdata_name WHERE last_check<'$check_treshold'";
63 $posts_unchecked=$wpdb->get_var($sql);
64
65 $sql="SELECT count(*) FROM $linkdata_name WHERE last_check<'$check_treshold'";
66 $links_unchecked=$wpdb->get_var($sql);
67
68 $sql="SELECT count(*) FROM $linkdata_name WHERE broken=1";
69 $broken_links=$wpdb->get_var($sql);
70
71 if($broken_links>0){
72 echo "<a href='".get_option('siteurl')."/wp-admin/edit.php?page=".
73 $ws_link_checker->mybasename."' title='View broken links'><strong>Found $broken_links broken links</strong></a>";
74 } else {
75 echo "No broken links found.";
76 }
77
78 echo "<br/>";
79
80 if($posts_unchecked || $links_unchecked) {
81 echo "$posts_unchecked posts and $links_unchecked links in the work queue.";
82 } else {
83 echo "The work queue is empty.";
84 }
85
86
87 } else if($action=='run_check'){
88 /* check for posts that haven't been checked for a long time & parse them for links, put the links in queue */
89 echo "<!-- run_check -->";
90
91 $sql="SELECT b.* FROM $postdata_name a, $wpdb->posts b
92 WHERE a.last_check<'$check_treshold' AND a.post_id=b.id ORDER BY a.last_check ASC LIMIT 20";
93
94 $rows=$wpdb->get_results($sql, OBJECT);
95 if($rows && (count($rows)>0)){
96 //some rows found
97 echo "<!-- parsing pages (rand : ".rand(1,1000).") -->";
98 foreach ($rows as $post) {
99 $wpdb->query("DELETE FROM $linkdata_name WHERE post_id=$post->ID");
100 gather_and_save_links($post->post_content, $post->ID);
101 $wpdb->query("UPDATE $postdata_name SET last_check=NOW() WHERE post_id=$post->ID");
102 }
103 };
104
105 if(execution_time()>$max_execution_time){
106 die('<!-- general timeout -->');
107 }
108
109 /* check the queue and process any links unchecked */
110 $sql="SELECT * FROM $linkdata_name WHERE ".
111 " ((last_check<'$check_treshold') OR ".
112 " (broken=1 AND check_count<5 AND last_check<'$recheck_treshold')) ".
113 " LIMIT 100";
114
115 $links=$wpdb->get_results($sql, OBJECT);
116 if($links && (count($links)>0)){
117 //some unchecked links found
118 echo "<!-- checking links (rand : ".rand(1,1000).") -->";
119 foreach ($links as $link) {
120 if(page_exists_simple($link->url)){
121 //link OK, remove from queue
122 $wpdb->query("DELETE FROM $linkdata_name WHERE id=$link->id");
123 } else {
124 $wpdb->query("UPDATE $linkdata_name SET broken=1, ".
125 " last_check=NOW(), check_count=check_count+1 WHERE id=$link->id");
126 };
127
128
129 if(execution_time()>$max_execution_time){
130 die('<!-- url loop timeout -->');
131 }
132 }
133 };
134
135 die('<!-- /run_check -->');
136
137 } else if ($action=='discard_link'){
138 $id=intval($_GET['id']);
139 $wpdb->query("DELETE FROM $linkdata_name WHERE id=$id LIMIT 1");
140 };
141
142
143 function parse_link($matches, $post_id){
144 global $wpdb, $siteurl, $linkdata_name;
145
146 $url=$matches[2];
147
148 $parts=@parse_url($url);
149
150 if(!$parts) return false;
151
152 $url=preg_replace(
153 array('/([\?&]PHPSESSID=\w+)$/i','/(#[^\/]*)$/i', '/&amp;/','/^(javascript:.*)/i','/([\?&]sid=\w+)$/i'),
154 array('','','&','',''),
155 $url);
156
157 $url=trim($url);
158 if($url=='') return false;
159
160 // turn relative URLs into absolute URLs
161 $url = relative2absolute($siteurl, $url);
162
163 if(strlen($url)>5){
164 $wpdb->query(
165 "INSERT INTO $linkdata_name(post_id, url, link_text)
166 VALUES($post_id, '".$wpdb->escape($url)."', '".$wpdb->escape(strip_tags($matches[4]))."')"
167 );
168 };
169
170 return true;
171 }
172
173 function parse_image($matches, $post_id){
174 global $wpdb, $siteurl, $linkdata_name;
175
176 $url=$matches[2];
177
178 $parts=@parse_url($url);
179
180 if(!$parts) return false;
181
182 $url=preg_replace(
183 array('/([\?&]PHPSESSID=\w+)$/i','/(#[^\/]*)$/i', '/&amp;/','/^(javascript:.*)/i','/([\?&]sid=\w+)$/i'),
184 array('','','&','',''),
185 $url);
186
187 $url=trim($url);
188 if($url=='') return false;
189
190 // turn relative URLs into absolute URLs
191 $url = relative2absolute($siteurl, $url);
192
193 if(strlen($url)>3){
194 $wpdb->query(
195 "INSERT INTO $linkdata_name(post_id, url, link_text)
196 VALUES($post_id, '".$wpdb->escape($url)."', '[image]')"
197 );
198 };
199
200 return true;
201 }
202
203 function gather_and_save_links($content, $post_id){
204 //gather links (<a href=...>)
205 $url_pattern='/(<a[\s]+[^>]*href\s*=\s*[\"\']?)([^\'\" >]+)([\'\"]+[^<>]*>)((?sU).*)(<\/a>)/i';
206
207 if(preg_match_all($url_pattern, $content, $matches, PREG_SET_ORDER)){
208 foreach($matches as $link){
209 parse_link($link, $post_id);
210 }
211 };
212
213 //gather images (<img src=...>)
214 $url_pattern='/(<img[\s]+[^>]*src\s*=\s*[\"\']?)([^\'\" >]+)([\'\"]+[^<>]*>)/i';
215
216 if(preg_match_all($url_pattern, $content, $matches, PREG_SET_ORDER)){
217 foreach($matches as $img){
218 parse_image($img, $post_id);
219 }
220 };
221
222 return $content;
223 }
224
225 function page_exists_simple($url){
226 $parts=parse_url($url);
227 if(!$parts) return false;
228
229 if(!isset($parts['scheme'])) $url='http://'.$url;
230
231 $ch = curl_init();
232 curl_setopt($ch, CURLOPT_URL, $url);
233 curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');
234 curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
235
236 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
237 curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
238
239 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
240 curl_setopt($ch, CURLOPT_TIMEOUT, 25);
241
242 curl_setopt($ch, CURLOPT_FAILONERROR, false);
243
244 $nobody=false;
245 if($parts['scheme']=='https'){
246 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
247 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
248 } else {
249 $nobody=true;
250 curl_setopt($ch, CURLOPT_NOBODY, true);
251 }
252 curl_setopt($ch, CURLOPT_HEADER, true);
253
254 $response = curl_exec($ch);
255 $code=intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
256
257 if ( (($code<200) || ($code>=400)) && $nobody) {
258 curl_setopt($ch, CURLOPT_NOBODY, false);
259 curl_setopt($ch, CURLOPT_HTTPGET, true);
260 $response = curl_exec($ch);
261 $code=intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
262 }
263
264 curl_close($ch);
265
266 return (($code>=200) && ($code<400));
267 }
268
269 function relative2absolute($absolute, $relative) {
270 $p = @parse_url($relative);
271 if(!$p) {
272 //WTF? $relative is a seriously malformed URL
273 return false;
274 }
275 if(isset($p["scheme"])) return $relative;
276
277 $parts=(parse_url($absolute));
278
279 if(substr($relative,0,1)=='/') {
280 $cparts = (explode("/", $relative));
281 array_shift($cparts);
282 } else {
283 if(isset($parts['path'])){
284 $aparts=explode('/',$parts['path']);
285 array_pop($aparts);
286 $aparts=array_filter($aparts);
287 } else {
288 $aparts=array();
289 }
290
291 $rparts = (explode("/", $relative));
292
293 $cparts = array_merge($aparts, $rparts);
294 foreach($cparts as $i => $part) {
295 if($part == '.') {
296 unset($cparts[$i]);
297 } else if($part == '..') {
298 unset($cparts[$i]);
299 unset($cparts[$i-1]);
300 }
301 }
302 }
303 $path = implode("/", $cparts);
304
305 $url = '';
306 if($parts['scheme']) {
307 $url = "$parts[scheme]://";
308 }
309 if(isset($parts['user'])) {
310 $url .= $parts['user'];
311 if(isset($parts['pass'])) {
312 $url .= ":".$parts['pass'];
313 }
314 $url .= "@";
315 }
316 if(isset($parts['host'])) {
317 $url .= $parts['host']."/";
318 }
319 $url .= $path;
320
321 return $url;
322 }
323
324 ?>