PluginProbe ʕ •ᴥ•ʔ
Responsive Lightbox & Gallery / 2.3.1
Responsive Lightbox & Gallery v2.3.1
2.7.8 trunk 1.0.0 1.0.1 1.0.1.1 1.0.2 1.0.3 1.0.4 1.1.0 1.1.1 1.1.2 1.2.0 1.2.1 1.2.2 1.2.3 1.3.0 1.3.1 1.3.2 1.3.3 1.3.4 1.3.5 1.3.6 1.4.0 1.4.0.1 1.4.1 1.4.11 1.4.12 1.4.13 1.4.14 1.4.2 1.4.3 1.4.4 1.4.5 1.4.6 1.4.7 1.4.8 1.4.9 1.5.0 1.5.1 1.5.2 1.5.3 1.5.4 1.5.5 1.5.6 1.5.7 1.6.0 1.6.1 1.6.10 1.6.11 1.6.12 1.6.2 1.6.3 1.6.4 1.6.5 1.6.6 1.6.7 1.6.8 1.6.9 1.7.0 1.7.1 1.7.2 2.0 2.0.1 2.0.2 2.0.3 2.0.4 2.0.5 2.1 2.2.0 2.2.1 2.2.2 2.2.3 2.2.3.1 2.3.0 2.3.1 2.3.2 2.3.3 2.3.4 2.3.5 2.4.0 2.4.1 2.4.2 2.4.3 2.4.4 2.4.5 2.4.6 2.4.7 2.4.8 2.4.9 2.5.0 2.5.1 2.5.2 2.5.3 2.5.4 2.5.5 2.6.0 2.6.1 2.7.0 2.7.1 2.7.2 2.7.3 2.7.4 2.7.5 2.7.6 2.7.7
responsive-lightbox / library / simplehtmldom / simple_html_dom.php
responsive-lightbox / library / simplehtmldom Last commit date
simple_html_dom.php 5 years ago simplehtmldom.php 5 years ago
simple_html_dom.php
2356 lines
1 <?php
2 /**
3 * Website: http://sourceforge.net/projects/simplehtmldom/
4 * Additional projects: http://sourceforge.net/projects/debugobject/
5 * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6 *
7 * Licensed under The MIT License
8 * See the LICENSE file in the project root for more information.
9 *
10 * Authors:
11 * S.C. Chen
12 * John Schlick
13 * Rus Carroll
14 * logmanoriginal
15 *
16 * Contributors:
17 * Yousuke Kumakura
18 * Vadim Voituk
19 * Antcs
20 *
21 * Version Rev. 1.9.1 (291)
22 */
23
24 define('HDOM_TYPE_ELEMENT', 1);
25 define('HDOM_TYPE_COMMENT', 2);
26 define('HDOM_TYPE_TEXT', 3);
27 define('HDOM_TYPE_ENDTAG', 4);
28 define('HDOM_TYPE_ROOT', 5);
29 define('HDOM_TYPE_UNKNOWN', 6);
30 define('HDOM_QUOTE_DOUBLE', 0);
31 define('HDOM_QUOTE_SINGLE', 1);
32 define('HDOM_QUOTE_NO', 3);
33 define('HDOM_INFO_BEGIN', 0);
34 define('HDOM_INFO_END', 1);
35 define('HDOM_INFO_QUOTE', 2);
36 define('HDOM_INFO_SPACE', 3);
37 define('HDOM_INFO_TEXT', 4);
38 define('HDOM_INFO_INNER', 5);
39 define('HDOM_INFO_OUTER', 6);
40 define('HDOM_INFO_ENDSPACE', 7);
41
42 defined('DEFAULT_TARGET_CHARSET') || define('DEFAULT_TARGET_CHARSET', 'UTF-8');
43 defined('DEFAULT_BR_TEXT') || define('DEFAULT_BR_TEXT', "\r\n");
44 defined('DEFAULT_SPAN_TEXT') || define('DEFAULT_SPAN_TEXT', ' ');
45 defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
46 define('HDOM_SMARTY_AS_TEXT', 1);
47
48 function file_get_html(
49 $url,
50 $use_include_path = false,
51 $context = null,
52 $offset = 0,
53 $maxLen = -1,
54 $lowercase = true,
55 $forceTagsClosed = true,
56 $target_charset = DEFAULT_TARGET_CHARSET,
57 $stripRN = true,
58 $defaultBRText = DEFAULT_BR_TEXT,
59 $defaultSpanText = DEFAULT_SPAN_TEXT)
60 {
61 if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
62
63 $dom = new simple_html_dom(
64 null,
65 $lowercase,
66 $forceTagsClosed,
67 $target_charset,
68 $stripRN,
69 $defaultBRText,
70 $defaultSpanText
71 );
72
73 /**
74 * For sourceforge users: uncomment the next line and comment the
75 * retrieve_url_contents line 2 lines down if it is not already done.
76 */
77 $contents = file_get_contents(
78 $url,
79 $use_include_path,
80 $context,
81 $offset,
82 $maxLen
83 );
84 // $contents = retrieve_url_contents($url);
85
86 if (empty($contents) || strlen($contents) > $maxLen) {
87 $dom->clear();
88 return false;
89 }
90
91 return $dom->load($contents, $lowercase, $stripRN);
92 }
93
94 function str_get_html(
95 $str,
96 $lowercase = true,
97 $forceTagsClosed = true,
98 $target_charset = DEFAULT_TARGET_CHARSET,
99 $stripRN = true,
100 $defaultBRText = DEFAULT_BR_TEXT,
101 $defaultSpanText = DEFAULT_SPAN_TEXT)
102 {
103 $dom = new simple_html_dom(
104 null,
105 $lowercase,
106 $forceTagsClosed,
107 $target_charset,
108 $stripRN,
109 $defaultBRText,
110 $defaultSpanText
111 );
112
113 if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
114 $dom->clear();
115 return false;
116 }
117
118 return $dom->load($str, $lowercase, $stripRN);
119 }
120
121 function dump_html_tree($node, $show_attr = true, $deep = 0)
122 {
123 $node->dump($node);
124 }
125
126 class simple_html_dom_node
127 {
128 public $nodetype = HDOM_TYPE_TEXT;
129 public $tag = 'text';
130 public $attr = array();
131 public $children = array();
132 public $nodes = array();
133 public $parent = null;
134 public $_ = array();
135 public $tag_start = 0;
136 private $dom = null;
137
138 function __construct($dom)
139 {
140 $this->dom = $dom;
141 $dom->nodes[] = $this;
142 }
143
144 function __destruct()
145 {
146 $this->clear();
147 }
148
149 function __toString()
150 {
151 return $this->outertext();
152 }
153
154 function clear()
155 {
156 $this->dom = null;
157 $this->nodes = null;
158 $this->parent = null;
159 $this->children = null;
160 }
161
162 function dump($show_attr = true, $depth = 0)
163 {
164 echo str_repeat("\t", $depth) . $this->tag;
165
166 if ($show_attr && count($this->attr) > 0) {
167 echo '(';
168 foreach ($this->attr as $k => $v) {
169 echo "[$k]=>\"$v\", ";
170 }
171 echo ')';
172 }
173
174 echo "\n";
175
176 if ($this->nodes) {
177 foreach ($this->nodes as $node) {
178 $node->dump($show_attr, $depth + 1);
179 }
180 }
181 }
182
183 function dump_node($echo = true)
184 {
185 $string = $this->tag;
186
187 if (count($this->attr) > 0) {
188 $string .= '(';
189 foreach ($this->attr as $k => $v) {
190 $string .= "[$k]=>\"$v\", ";
191 }
192 $string .= ')';
193 }
194
195 if (count($this->_) > 0) {
196 $string .= ' $_ (';
197 foreach ($this->_ as $k => $v) {
198 if (is_array($v)) {
199 $string .= "[$k]=>(";
200 foreach ($v as $k2 => $v2) {
201 $string .= "[$k2]=>\"$v2\", ";
202 }
203 $string .= ')';
204 } else {
205 $string .= "[$k]=>\"$v\", ";
206 }
207 }
208 $string .= ')';
209 }
210
211 if (isset($this->text)) {
212 $string .= " text: ({$this->text})";
213 }
214
215 $string .= ' HDOM_INNER_INFO: ';
216
217 if (isset($node->_[HDOM_INFO_INNER])) {
218 $string .= "'" . $node->_[HDOM_INFO_INNER] . "'";
219 } else {
220 $string .= ' NULL ';
221 }
222
223 $string .= ' children: ' . count($this->children);
224 $string .= ' nodes: ' . count($this->nodes);
225 $string .= ' tag_start: ' . $this->tag_start;
226 $string .= "\n";
227
228 if ($echo) {
229 echo $string;
230 return;
231 } else {
232 return $string;
233 }
234 }
235
236 function parent($parent = null)
237 {
238 // I am SURE that this doesn't work properly.
239 // It fails to unset the current node from it's current parents nodes or
240 // children list first.
241 if ($parent !== null) {
242 $this->parent = $parent;
243 $this->parent->nodes[] = $this;
244 $this->parent->children[] = $this;
245 }
246
247 return $this->parent;
248 }
249
250 function has_child()
251 {
252 return !empty($this->children);
253 }
254
255 function children($idx = -1)
256 {
257 if ($idx === -1) {
258 return $this->children;
259 }
260
261 if (isset($this->children[$idx])) {
262 return $this->children[$idx];
263 }
264
265 return null;
266 }
267
268 function first_child()
269 {
270 if (count($this->children) > 0) {
271 return $this->children[0];
272 }
273 return null;
274 }
275
276 function last_child()
277 {
278 if (count($this->children) > 0) {
279 return end($this->children);
280 }
281 return null;
282 }
283
284 function next_sibling()
285 {
286 if ($this->parent === null) {
287 return null;
288 }
289
290 $idx = array_search($this, $this->parent->children, true);
291
292 if ($idx !== false && isset($this->parent->children[$idx + 1])) {
293 return $this->parent->children[$idx + 1];
294 }
295
296 return null;
297 }
298
299 function prev_sibling()
300 {
301 if ($this->parent === null) {
302 return null;
303 }
304
305 $idx = array_search($this, $this->parent->children, true);
306
307 if ($idx !== false && $idx > 0) {
308 return $this->parent->children[$idx - 1];
309 }
310
311 return null;
312 }
313
314 function find_ancestor_tag($tag)
315 {
316 global $debug_object;
317 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
318
319 if ($this->parent === null) {
320 return null;
321 }
322
323 $ancestor = $this->parent;
324
325 while (!is_null($ancestor)) {
326 if (is_object($debug_object)) {
327 $debug_object->debug_log(2, 'Current tag is: ' . $ancestor->tag);
328 }
329
330 if ($ancestor->tag === $tag) {
331 break;
332 }
333
334 $ancestor = $ancestor->parent;
335 }
336
337 return $ancestor;
338 }
339
340 function innertext()
341 {
342 if (isset($this->_[HDOM_INFO_INNER])) {
343 return $this->_[HDOM_INFO_INNER];
344 }
345
346 if (isset($this->_[HDOM_INFO_TEXT])) {
347 return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
348 }
349
350 $ret = '';
351
352 foreach ($this->nodes as $n) {
353 $ret .= $n->outertext();
354 }
355
356 return $ret;
357 }
358
359 function outertext()
360 {
361 global $debug_object;
362
363 if (is_object($debug_object)) {
364 $text = '';
365
366 if ($this->tag === 'text') {
367 if (!empty($this->text)) {
368 $text = ' with text: ' . $this->text;
369 }
370 }
371
372 $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
373 }
374
375 if ($this->tag === 'root') {
376 return $this->innertext();
377 }
378
379 // todo: What is the use of this callback? Remove?
380 if ($this->dom && $this->dom->callback !== null) {
381 call_user_func_array($this->dom->callback, array($this));
382 }
383
384 if (isset($this->_[HDOM_INFO_OUTER])) {
385 return $this->_[HDOM_INFO_OUTER];
386 }
387
388 if (isset($this->_[HDOM_INFO_TEXT])) {
389 return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
390 }
391
392 $ret = '';
393
394 if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) {
395 $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
396 }
397
398 if (isset($this->_[HDOM_INFO_INNER])) {
399 // todo: <br> should either never have HDOM_INFO_INNER or always
400 if ($this->tag !== 'br') {
401 $ret .= $this->_[HDOM_INFO_INNER];
402 }
403 } elseif ($this->nodes) {
404 foreach ($this->nodes as $n) {
405 $ret .= $this->convert_text($n->outertext());
406 }
407 }
408
409 if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) {
410 $ret .= '</' . $this->tag . '>';
411 }
412
413 return $ret;
414 }
415
416 function text()
417 {
418 if (isset($this->_[HDOM_INFO_INNER])) {
419 return $this->_[HDOM_INFO_INNER];
420 }
421
422 switch ($this->nodetype) {
423 case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
424 case HDOM_TYPE_COMMENT: return '';
425 case HDOM_TYPE_UNKNOWN: return '';
426 }
427
428 if (strcasecmp($this->tag, 'script') === 0) { return ''; }
429 if (strcasecmp($this->tag, 'style') === 0) { return ''; }
430
431 $ret = '';
432
433 // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed
434 // for some span tags, and some p tags) $this->nodes is set to NULL.
435 // NOTE: This indicates that there is a problem where it's set to NULL
436 // without a clear happening.
437 // WHY is this happening?
438 if (!is_null($this->nodes)) {
439 foreach ($this->nodes as $n) {
440 // Start paragraph after a blank line
441 if ($n->tag === 'p') {
442 $ret = trim($ret) . "\n\n";
443 }
444
445 $ret .= $this->convert_text($n->text());
446
447 // If this node is a span... add a space at the end of it so
448 // multiple spans don't run into each other. This is plaintext
449 // after all.
450 if ($n->tag === 'span') {
451 $ret .= $this->dom->default_span_text;
452 }
453 }
454 }
455 return $ret;
456 }
457
458 function xmltext()
459 {
460 $ret = $this->innertext();
461 $ret = str_ireplace('<![CDATA[', '', $ret);
462 $ret = str_replace(']]>', '', $ret);
463 return $ret;
464 }
465
466 function makeup()
467 {
468 // text, comment, unknown
469 if (isset($this->_[HDOM_INFO_TEXT])) {
470 return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
471 }
472
473 $ret = '<' . $this->tag;
474 $i = -1;
475
476 foreach ($this->attr as $key => $val) {
477 ++$i;
478
479 // skip removed attribute
480 if ($val === null || $val === false) { continue; }
481
482 $ret .= $this->_[HDOM_INFO_SPACE][$i][0];
483
484 //no value attr: nowrap, checked selected...
485 if ($val === true) {
486 $ret .= $key;
487 } else {
488 switch ($this->_[HDOM_INFO_QUOTE][$i])
489 {
490 case HDOM_QUOTE_DOUBLE: $quote = '"'; break;
491 case HDOM_QUOTE_SINGLE: $quote = '\''; break;
492 default: $quote = '';
493 }
494
495 $ret .= $key
496 . $this->_[HDOM_INFO_SPACE][$i][1]
497 . '='
498 . $this->_[HDOM_INFO_SPACE][$i][2]
499 . $quote
500 . $val
501 . $quote;
502 }
503 }
504
505 $ret = $this->dom->restore_noise($ret);
506 return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
507 }
508
509 function find($selector, $idx = null, $lowercase = false)
510 {
511 $selectors = $this->parse_selector($selector);
512 if (($count = count($selectors)) === 0) { return array(); }
513 $found_keys = array();
514
515 // find each selector
516 for ($c = 0; $c < $count; ++$c) {
517 // The change on the below line was documented on the sourceforge
518 // code tracker id 2788009
519 // used to be: if (($levle=count($selectors[0]))===0) return array();
520 if (($levle = count($selectors[$c])) === 0) { return array(); }
521 if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); }
522
523 $head = array($this->_[HDOM_INFO_BEGIN] => 1);
524 $cmd = ' '; // Combinator
525
526 // handle descendant selectors, no recursive!
527 for ($l = 0; $l < $levle; ++$l) {
528 $ret = array();
529
530 foreach ($head as $k => $v) {
531 $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
532 //PaperG - Pass this optional parameter on to the seek function.
533 $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
534 }
535
536 $head = $ret;
537 $cmd = $selectors[$c][$l][4]; // Next Combinator
538 }
539
540 foreach ($head as $k => $v) {
541 if (!isset($found_keys[$k])) {
542 $found_keys[$k] = 1;
543 }
544 }
545 }
546
547 // sort keys
548 ksort($found_keys);
549
550 $found = array();
551 foreach ($found_keys as $k => $v) {
552 $found[] = $this->dom->nodes[$k];
553 }
554
555 // return nth-element or array
556 if (is_null($idx)) { return $found; }
557 elseif ($idx < 0) { $idx = count($found) + $idx; }
558 return (isset($found[$idx])) ? $found[$idx] : null;
559 }
560
561 protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
562 {
563 global $debug_object;
564 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
565
566 list($tag, $id, $class, $attributes, $cmb) = $selector;
567 $nodes = array();
568
569 if ($parent_cmd === ' ') { // Descendant Combinator
570 // Find parent closing tag if the current element doesn't have a closing
571 // tag (i.e. void element)
572 $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
573 if ($end == 0) {
574 $parent = $this->parent;
575 while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
576 $end -= 1;
577 $parent = $parent->parent;
578 }
579 $end += $parent->_[HDOM_INFO_END];
580 }
581
582 // Get list of target nodes
583 $nodes_start = $this->_[HDOM_INFO_BEGIN] + 1;
584 $nodes_count = $end - $nodes_start;
585 $nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true);
586 } elseif ($parent_cmd === '>') { // Child Combinator
587 $nodes = $this->children;
588 } elseif ($parent_cmd === '+'
589 && $this->parent
590 && in_array($this, $this->parent->children)) { // Next-Sibling Combinator
591 $index = array_search($this, $this->parent->children, true) + 1;
592 if ($index < count($this->parent->children))
593 $nodes[] = $this->parent->children[$index];
594 } elseif ($parent_cmd === '~'
595 && $this->parent
596 && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
597 $index = array_search($this, $this->parent->children, true);
598 $nodes = array_slice($this->parent->children, $index);
599 }
600
601 // Go throgh each element starting at this element until the end tag
602 // Note: If this element is a void tag, any previous void element is
603 // skipped.
604 foreach($nodes as $node) {
605 $pass = true;
606
607 // Skip root nodes
608 if(!$node->parent) {
609 $pass = false;
610 }
611
612 // Handle 'text' selector
613 if($pass && $tag === 'text' && $node->tag === 'text') {
614 $ret[array_search($node, $this->dom->nodes, true)] = 1;
615 unset($node);
616 continue;
617 }
618
619 // Skip if node isn't a child node (i.e. text nodes)
620 if($pass && !in_array($node, $node->parent->children, true)) {
621 $pass = false;
622 }
623
624 // Skip if tag doesn't match
625 if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
626 $pass = false;
627 }
628
629 // Skip if ID doesn't exist
630 if ($pass && $id !== '' && !isset($node->attr['id'])) {
631 $pass = false;
632 }
633
634 // Check if ID matches
635 if ($pass && $id !== '' && isset($node->attr['id'])) {
636 // Note: Only consider the first ID (as browsers do)
637 // $node_id = explode(' ', trim($node->attr['id']))[0];
638 $node_id = explode(' ', trim($node->attr['id']));
639 $node_id = $node_id[0];
640
641 if($id !== $node_id) { $pass = false; }
642 }
643
644 // Check if all class(es) exist
645 if ($pass && $class !== '' && is_array($class) && !empty($class)) {
646 if (isset($node->attr['class'])) {
647 $node_classes = explode(' ', $node->attr['class']);
648
649 if ($lowercase) {
650 $node_classes = array_map('strtolower', $node_classes);
651 }
652
653 foreach($class as $c) {
654 if(!in_array($c, $node_classes)) {
655 $pass = false;
656 break;
657 }
658 }
659 } else {
660 $pass = false;
661 }
662 }
663
664 // Check attributes
665 if ($pass
666 && $attributes !== ''
667 && is_array($attributes)
668 && !empty($attributes)) {
669 foreach($attributes as $a) {
670 list (
671 $att_name,
672 $att_expr,
673 $att_val,
674 $att_inv,
675 $att_case_sensitivity
676 ) = $a;
677
678 // Handle indexing attributes (i.e. "[2]")
679 /**
680 * Note: This is not supported by the CSS Standard but adds
681 * the ability to select items compatible to XPath (i.e.
682 * the 3rd element within it's parent).
683 *
684 * Note: This doesn't conflict with the CSS Standard which
685 * doesn't work on numeric attributes anyway.
686 */
687 if (is_numeric($att_name)
688 && $att_expr === ''
689 && $att_val === '') {
690 $count = 0;
691
692 // Find index of current element in parent
693 foreach ($node->parent->children as $c) {
694 if ($c->tag === $node->tag) ++$count;
695 if ($c === $node) break;
696 }
697
698 // If this is the correct node, continue with next
699 // attribute
700 if ($count === (int)$att_name) continue;
701 }
702
703 // Check attribute availability
704 if ($att_inv) { // Attribute should NOT be set
705 if (isset($node->attr[$att_name])) {
706 $pass = false;
707 break;
708 }
709 } else { // Attribute should be set
710 // todo: "plaintext" is not a valid CSS selector!
711 if ($att_name !== 'plaintext'
712 && !isset($node->attr[$att_name])) {
713 $pass = false;
714 break;
715 }
716 }
717
718 // Continue with next attribute if expression isn't defined
719 if ($att_expr === '') continue;
720
721 // If they have told us that this is a "plaintext"
722 // search then we want the plaintext of the node - right?
723 // todo "plaintext" is not a valid CSS selector!
724 if ($att_name === 'plaintext') {
725 $nodeKeyValue = $node->text();
726 } else {
727 $nodeKeyValue = $node->attr[$att_name];
728 }
729
730 if (is_object($debug_object)) {
731 $debug_object->debug_log(2,
732 'testing node: '
733 . $node->tag
734 . ' for attribute: '
735 . $att_name
736 . $att_expr
737 . $att_val
738 . ' where nodes value is: '
739 . $nodeKeyValue
740 );
741 }
742
743 // If lowercase is set, do a case insensitive test of
744 // the value of the selector.
745 if ($lowercase) {
746 $check = $this->match(
747 $att_expr,
748 strtolower($att_val),
749 strtolower($nodeKeyValue),
750 $att_case_sensitivity
751 );
752 } else {
753 $check = $this->match(
754 $att_expr,
755 $att_val,
756 $nodeKeyValue,
757 $att_case_sensitivity
758 );
759 }
760
761 if (is_object($debug_object)) {
762 $debug_object->debug_log(2,
763 'after match: '
764 . ($check ? 'true' : 'false')
765 );
766 }
767
768 if (!$check) {
769 $pass = false;
770 break;
771 }
772 }
773 }
774
775 // Found a match. Add to list and clear node
776 if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1;
777 unset($node);
778 }
779 // It's passed by reference so this is actually what this function returns.
780 if (is_object($debug_object)) {
781 $debug_object->debug_log(1, 'EXIT - ret: ', $ret);
782 }
783 }
784
785 protected function match($exp, $pattern, $value, $case_sensitivity)
786 {
787 global $debug_object;
788 if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
789
790 if ($case_sensitivity === 'i') {
791 $pattern = strtolower($pattern);
792 $value = strtolower($value);
793 }
794
795 switch ($exp) {
796 case '=':
797 return ($value === $pattern);
798 case '!=':
799 return ($value !== $pattern);
800 case '^=':
801 return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
802 case '$=':
803 return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
804 case '*=':
805 return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
806 case '|=':
807 /**
808 * [att|=val]
809 *
810 * Represents an element with the att attribute, its value
811 * either being exactly "val" or beginning with "val"
812 * immediately followed by "-" (U+002D).
813 */
814 return strpos($value, $pattern) === 0;
815 case '~=':
816 /**
817 * [att~=val]
818 *
819 * Represents an element with the att attribute whose value is a
820 * whitespace-separated list of words, one of which is exactly
821 * "val". If "val" contains whitespace, it will never represent
822 * anything (since the words are separated by spaces). Also if
823 * "val" is the empty string, it will never represent anything.
824 */
825 return in_array($pattern, explode(' ', trim($value)), true);
826 }
827 return false;
828 }
829
830 protected function parse_selector($selector_string)
831 {
832 global $debug_object;
833 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
834
835 /**
836 * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
837 *
838 * Paperg: Add the colon to the attribute, so that it properly finds
839 * <tag attr:ibute="something" > like google does.
840 *
841 * Note: if you try to look at this attribute, you MUST use getAttribute
842 * since $dom->x:y will fail the php syntax check.
843 *
844 * Notice the \[ starting the attribute? and the @? following? This
845 * implies that an attribute can begin with an @ sign that is not
846 * captured. This implies that an html attribute specifier may start
847 * with an @ sign that is NOT captured by the expression. Farther study
848 * is required to determine of this should be documented or removed.
849 *
850 * Matches selectors in this order:
851 *
852 * [0] - full match
853 *
854 * [1] - tag name
855 * ([\w:\*-]*)
856 * Matches the tag name consisting of zero or more words, colons,
857 * asterisks and hyphens.
858 *
859 * [2] - id name
860 * (?:\#([\w-]+))
861 * Optionally matches a id name, consisting of an "#" followed by
862 * the id name (one or more words and hyphens).
863 *
864 * [3] - class names (including dots)
865 * (?:\.([\w\.-]+))?
866 * Optionally matches a list of classs, consisting of an "."
867 * followed by the class name (one or more words and hyphens)
868 * where multiple classes can be chained (i.e. ".foo.bar.baz")
869 *
870 * [4] - attributes
871 * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
872 * Optionally matches the attributes list
873 *
874 * [5] - separator
875 * ([\/, >+~]+)
876 * Matches the selector list separator
877 */
878 // phpcs:ignore Generic.Files.LineLength
879 $pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is";
880
881 preg_match_all(
882 $pattern,
883 trim($selector_string) . ' ', // Add final ' ' as pseudo separator
884 $matches,
885 PREG_SET_ORDER
886 );
887
888 if (is_object($debug_object)) {
889 $debug_object->debug_log(2, 'Matches Array: ', $matches);
890 }
891
892 $selectors = array();
893 $result = array();
894
895 foreach ($matches as $m) {
896 $m[0] = trim($m[0]);
897
898 // Skip NoOps
899 if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
900
901 // Convert to lowercase
902 if ($this->dom->lowercase) {
903 $m[1] = strtolower($m[1]);
904 }
905
906 // Extract classes
907 if ($m[3] !== '') { $m[3] = explode('.', $m[3]); }
908
909 /* Extract attributes (pattern based on the pattern above!)
910
911 * [0] - full match
912 * [1] - attribute name
913 * [2] - attribute expression
914 * [3] - attribute value
915 * [4] - case sensitivity
916 *
917 * Note: Attributes can be negated with a "!" prefix to their name
918 */
919 if($m[4] !== '') {
920 preg_match_all(
921 "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
922 trim($m[4]),
923 $attributes,
924 PREG_SET_ORDER
925 );
926
927 // Replace element by array
928 $m[4] = array();
929
930 foreach($attributes as $att) {
931 // Skip empty matches
932 if(trim($att[0]) === '') { continue; }
933
934 $inverted = (isset($att[1][0]) && $att[1][0] === '!');
935 $m[4][] = array(
936 $inverted ? substr($att[1], 1) : $att[1], // Name
937 (isset($att[2])) ? $att[2] : '', // Expression
938 (isset($att[3])) ? $att[3] : '', // Value
939 $inverted, // Inverted Flag
940 (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
941 );
942 }
943 }
944
945 // Sanitize Separator
946 if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator
947 $m[5] = ' ';
948 } else { // Other Separator
949 $m[5] = trim($m[5]);
950 }
951
952 // Clear Separator if it's a Selector List
953 if ($is_list = ($m[5] === ',')) { $m[5] = ''; }
954
955 // Remove full match before adding to results
956 array_shift($m);
957 $result[] = $m;
958
959 if ($is_list) { // Selector List
960 $selectors[] = $result;
961 $result = array();
962 }
963 }
964
965 if (count($result) > 0) { $selectors[] = $result; }
966 return $selectors;
967 }
968
969 function __get($name)
970 {
971 if (isset($this->attr[$name])) {
972 return $this->convert_text($this->attr[$name]);
973 }
974 switch ($name) {
975 case 'outertext': return $this->outertext();
976 case 'innertext': return $this->innertext();
977 case 'plaintext': return $this->text();
978 case 'xmltext': return $this->xmltext();
979 default: return array_key_exists($name, $this->attr);
980 }
981 }
982
983 function __set($name, $value)
984 {
985 global $debug_object;
986 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
987
988 switch ($name) {
989 case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
990 case 'innertext':
991 if (isset($this->_[HDOM_INFO_TEXT])) {
992 return $this->_[HDOM_INFO_TEXT] = $value;
993 }
994 return $this->_[HDOM_INFO_INNER] = $value;
995 }
996
997 if (!isset($this->attr[$name])) {
998 $this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
999 $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
1000 }
1001
1002 $this->attr[$name] = $value;
1003 }
1004
1005 function __isset($name)
1006 {
1007 switch ($name) {
1008 case 'outertext': return true;
1009 case 'innertext': return true;
1010 case 'plaintext': return true;
1011 }
1012 //no value attr: nowrap, checked selected...
1013 return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
1014 }
1015
1016 function __unset($name)
1017 {
1018 if (isset($this->attr[$name])) { unset($this->attr[$name]); }
1019 }
1020
1021 function convert_text($text)
1022 {
1023 global $debug_object;
1024 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1025
1026 $converted_text = $text;
1027
1028 $sourceCharset = '';
1029 $targetCharset = '';
1030
1031 if ($this->dom) {
1032 $sourceCharset = strtoupper($this->dom->_charset);
1033 $targetCharset = strtoupper($this->dom->_target_charset);
1034 }
1035
1036 if (is_object($debug_object)) {
1037 $debug_object->debug_log(3,
1038 'source charset: '
1039 . $sourceCharset
1040 . ' target charaset: '
1041 . $targetCharset
1042 );
1043 }
1044
1045 if (!empty($sourceCharset)
1046 && !empty($targetCharset)
1047 && (strcasecmp($sourceCharset, $targetCharset) != 0)) {
1048 // Check if the reported encoding could have been incorrect and the text is actually already UTF-8
1049 if ((strcasecmp($targetCharset, 'UTF-8') == 0)
1050 && ($this->is_utf8($text))) {
1051 $converted_text = $text;
1052 } else {
1053 $converted_text = iconv($sourceCharset, $targetCharset, $text);
1054 }
1055 }
1056
1057 // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1058 if ($targetCharset === 'UTF-8') {
1059 if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1060 $converted_text = substr($converted_text, 3);
1061 }
1062
1063 if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1064 $converted_text = substr($converted_text, 0, -3);
1065 }
1066 }
1067
1068 return $converted_text;
1069 }
1070
1071 static function is_utf8($str)
1072 {
1073 $c = 0; $b = 0;
1074 $bits = 0;
1075 $len = strlen($str);
1076 for($i = 0; $i < $len; $i++) {
1077 $c = ord($str[$i]);
1078 if($c > 128) {
1079 if(($c >= 254)) { return false; }
1080 elseif($c >= 252) { $bits = 6; }
1081 elseif($c >= 248) { $bits = 5; }
1082 elseif($c >= 240) { $bits = 4; }
1083 elseif($c >= 224) { $bits = 3; }
1084 elseif($c >= 192) { $bits = 2; }
1085 else { return false; }
1086 if(($i + $bits) > $len) { return false; }
1087 while($bits > 1) {
1088 $i++;
1089 $b = ord($str[$i]);
1090 if($b < 128 || $b > 191) { return false; }
1091 $bits--;
1092 }
1093 }
1094 }
1095 return true;
1096 }
1097
1098 function get_display_size()
1099 {
1100 global $debug_object;
1101
1102 $width = -1;
1103 $height = -1;
1104
1105 if ($this->tag !== 'img') {
1106 return false;
1107 }
1108
1109 // See if there is aheight or width attribute in the tag itself.
1110 if (isset($this->attr['width'])) {
1111 $width = $this->attr['width'];
1112 }
1113
1114 if (isset($this->attr['height'])) {
1115 $height = $this->attr['height'];
1116 }
1117
1118 // Now look for an inline style.
1119 if (isset($this->attr['style'])) {
1120 // Thanks to user gnarf from stackoverflow for this regular expression.
1121 $attributes = array();
1122
1123 preg_match_all(
1124 '/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1125 $this->attr['style'],
1126 $matches,
1127 PREG_SET_ORDER
1128 );
1129
1130 foreach ($matches as $match) {
1131 $attributes[$match[1]] = $match[2];
1132 }
1133
1134 // If there is a width in the style attributes:
1135 if (isset($attributes['width']) && $width == -1) {
1136 // check that the last two characters are px (pixels)
1137 if (strtolower(substr($attributes['width'], -2)) === 'px') {
1138 $proposed_width = substr($attributes['width'], 0, -2);
1139 // Now make sure that it's an integer and not something stupid.
1140 if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1141 $width = $proposed_width;
1142 }
1143 }
1144 }
1145
1146 // If there is a width in the style attributes:
1147 if (isset($attributes['height']) && $height == -1) {
1148 // check that the last two characters are px (pixels)
1149 if (strtolower(substr($attributes['height'], -2)) == 'px') {
1150 $proposed_height = substr($attributes['height'], 0, -2);
1151 // Now make sure that it's an integer and not something stupid.
1152 if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1153 $height = $proposed_height;
1154 }
1155 }
1156 }
1157
1158 }
1159
1160 // Future enhancement:
1161 // Look in the tag to see if there is a class or id specified that has
1162 // a height or width attribute to it.
1163
1164 // Far future enhancement
1165 // Look at all the parent tags of this image to see if they specify a
1166 // class or id that has an img selector that specifies a height or width
1167 // Note that in this case, the class or id will have the img subselector
1168 // for it to apply to the image.
1169
1170 // ridiculously far future development
1171 // If the class or id is specified in a SEPARATE css file thats not on
1172 // the page, go get it and do what we were just doing for the ones on
1173 // the page.
1174
1175 $result = array(
1176 'height' => $height,
1177 'width' => $width
1178 );
1179
1180 return $result;
1181 }
1182
1183 function save($filepath = '')
1184 {
1185 $ret = $this->outertext();
1186
1187 if ($filepath !== '') {
1188 file_put_contents($filepath, $ret, LOCK_EX);
1189 }
1190
1191 return $ret;
1192 }
1193
1194 function addClass($class)
1195 {
1196 if (is_string($class)) {
1197 $class = explode(' ', $class);
1198 }
1199
1200 if (is_array($class)) {
1201 foreach($class as $c) {
1202 if (isset($this->class)) {
1203 if ($this->hasClass($c)) {
1204 continue;
1205 } else {
1206 $this->class .= ' ' . $c;
1207 }
1208 } else {
1209 $this->class = $c;
1210 }
1211 }
1212 } else {
1213 if (is_object($debug_object)) {
1214 $debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1215 }
1216 }
1217 }
1218
1219 function hasClass($class)
1220 {
1221 if (is_string($class)) {
1222 if (isset($this->class)) {
1223 return in_array($class, explode(' ', $this->class), true);
1224 }
1225 } else {
1226 if (is_object($debug_object)) {
1227 $debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1228 }
1229 }
1230
1231 return false;
1232 }
1233
1234 function removeClass($class = null)
1235 {
1236 if (!isset($this->class)) {
1237 return;
1238 }
1239
1240 if (is_null($class)) {
1241 $this->removeAttribute('class');
1242 return;
1243 }
1244
1245 if (is_string($class)) {
1246 $class = explode(' ', $class);
1247 }
1248
1249 if (is_array($class)) {
1250 $class = array_diff(explode(' ', $this->class), $class);
1251 if (empty($class)) {
1252 $this->removeAttribute('class');
1253 } else {
1254 $this->class = implode(' ', $class);
1255 }
1256 }
1257 }
1258
1259 function getAllAttributes()
1260 {
1261 return $this->attr;
1262 }
1263
1264 function getAttribute($name)
1265 {
1266 return $this->__get($name);
1267 }
1268
1269 function setAttribute($name, $value)
1270 {
1271 $this->__set($name, $value);
1272 }
1273
1274 function hasAttribute($name)
1275 {
1276 return $this->__isset($name);
1277 }
1278
1279 function removeAttribute($name)
1280 {
1281 $this->__set($name, null);
1282 }
1283
1284 function remove()
1285 {
1286 if ($this->parent) {
1287 $this->parent->removeChild($this);
1288 }
1289 }
1290
1291 function removeChild($node)
1292 {
1293 $nidx = array_search($node, $this->nodes, true);
1294 $cidx = array_search($node, $this->children, true);
1295 $didx = array_search($node, $this->dom->nodes, true);
1296
1297 if ($nidx !== false && $cidx !== false && $didx !== false) {
1298
1299 foreach($node->children as $child) {
1300 $node->removeChild($child);
1301 }
1302
1303 foreach($node->nodes as $entity) {
1304 $enidx = array_search($entity, $node->nodes, true);
1305 $edidx = array_search($entity, $node->dom->nodes, true);
1306
1307 if ($enidx !== false && $edidx !== false) {
1308 unset($node->nodes[$enidx]);
1309 unset($node->dom->nodes[$edidx]);
1310 }
1311 }
1312
1313 unset($this->nodes[$nidx]);
1314 unset($this->children[$cidx]);
1315 unset($this->dom->nodes[$didx]);
1316
1317 $node->clear();
1318
1319 }
1320 }
1321
1322 function getElementById($id)
1323 {
1324 return $this->find("#$id", 0);
1325 }
1326
1327 function getElementsById($id, $idx = null)
1328 {
1329 return $this->find("#$id", $idx);
1330 }
1331
1332 function getElementByTagName($name)
1333 {
1334 return $this->find($name, 0);
1335 }
1336
1337 function getElementsByTagName($name, $idx = null)
1338 {
1339 return $this->find($name, $idx);
1340 }
1341
1342 function parentNode()
1343 {
1344 return $this->parent();
1345 }
1346
1347 function childNodes($idx = -1)
1348 {
1349 return $this->children($idx);
1350 }
1351
1352 function firstChild()
1353 {
1354 return $this->first_child();
1355 }
1356
1357 function lastChild()
1358 {
1359 return $this->last_child();
1360 }
1361
1362 function nextSibling()
1363 {
1364 return $this->next_sibling();
1365 }
1366
1367 function previousSibling()
1368 {
1369 return $this->prev_sibling();
1370 }
1371
1372 function hasChildNodes()
1373 {
1374 return $this->has_child();
1375 }
1376
1377 function nodeName()
1378 {
1379 return $this->tag;
1380 }
1381
1382 function appendChild($node)
1383 {
1384 $node->parent($this);
1385 return $node;
1386 }
1387
1388 }
1389
1390 class simple_html_dom
1391 {
1392 public $root = null;
1393 public $nodes = array();
1394 public $callback = null;
1395 public $lowercase = false;
1396 public $original_size;
1397 public $size;
1398
1399 protected $pos;
1400 protected $doc;
1401 protected $char;
1402
1403 protected $cursor;
1404 protected $parent;
1405 protected $noise = array();
1406 protected $token_blank = " \t\r\n";
1407 protected $token_equal = ' =/>';
1408 protected $token_slash = " />\r\n\t";
1409 protected $token_attr = ' >';
1410
1411 public $_charset = '';
1412 public $_target_charset = '';
1413
1414 protected $default_br_text = '';
1415
1416 public $default_span_text = '';
1417
1418 protected $self_closing_tags = array(
1419 'area' => 1,
1420 'base' => 1,
1421 'br' => 1,
1422 'col' => 1,
1423 'embed' => 1,
1424 'hr' => 1,
1425 'img' => 1,
1426 'input' => 1,
1427 'link' => 1,
1428 'meta' => 1,
1429 'param' => 1,
1430 'source' => 1,
1431 'track' => 1,
1432 'wbr' => 1
1433 );
1434 protected $block_tags = array(
1435 'body' => 1,
1436 'div' => 1,
1437 'form' => 1,
1438 'root' => 1,
1439 'span' => 1,
1440 'table' => 1
1441 );
1442 protected $optional_closing_tags = array(
1443 // Not optional, see
1444 // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
1445 'b' => array('b' => 1),
1446 'dd' => array('dd' => 1, 'dt' => 1),
1447 // Not optional, see
1448 // https://www.w3.org/TR/html/grouping-content.html#the-dl-element
1449 'dl' => array('dd' => 1, 'dt' => 1),
1450 'dt' => array('dd' => 1, 'dt' => 1),
1451 'li' => array('li' => 1),
1452 'optgroup' => array('optgroup' => 1, 'option' => 1),
1453 'option' => array('optgroup' => 1, 'option' => 1),
1454 'p' => array('p' => 1),
1455 'rp' => array('rp' => 1, 'rt' => 1),
1456 'rt' => array('rp' => 1, 'rt' => 1),
1457 'td' => array('td' => 1, 'th' => 1),
1458 'th' => array('td' => 1, 'th' => 1),
1459 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
1460 );
1461
1462 function __construct(
1463 $str = null,
1464 $lowercase = true,
1465 $forceTagsClosed = true,
1466 $target_charset = DEFAULT_TARGET_CHARSET,
1467 $stripRN = true,
1468 $defaultBRText = DEFAULT_BR_TEXT,
1469 $defaultSpanText = DEFAULT_SPAN_TEXT,
1470 $options = 0)
1471 {
1472 if ($str) {
1473 if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
1474 $this->load_file($str);
1475 } else {
1476 $this->load(
1477 $str,
1478 $lowercase,
1479 $stripRN,
1480 $defaultBRText,
1481 $defaultSpanText,
1482 $options
1483 );
1484 }
1485 }
1486 // Forcing tags to be closed implies that we don't trust the html, but
1487 // it can lead to parsing errors if we SHOULD trust the html.
1488 if (!$forceTagsClosed) {
1489 $this->optional_closing_array = array();
1490 }
1491
1492 $this->_target_charset = $target_charset;
1493 }
1494
1495 function __destruct()
1496 {
1497 $this->clear();
1498 }
1499
1500 function load(
1501 $str,
1502 $lowercase = true,
1503 $stripRN = true,
1504 $defaultBRText = DEFAULT_BR_TEXT,
1505 $defaultSpanText = DEFAULT_SPAN_TEXT,
1506 $options = 0)
1507 {
1508 global $debug_object;
1509
1510 // prepare
1511 $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
1512
1513 // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1514 // Script tags removal now preceeds style tag removal.
1515 // strip out <script> tags
1516 $this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1517 $this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1518
1519 // strip out the \r \n's if we are told to.
1520 if ($stripRN) {
1521 $this->doc = str_replace("\r", ' ', $this->doc);
1522 $this->doc = str_replace("\n", ' ', $this->doc);
1523
1524 // set the length of content since we have changed it.
1525 $this->size = strlen($this->doc);
1526 }
1527
1528 // strip out cdata
1529 $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1530 // strip out comments
1531 $this->remove_noise("'<!--(.*?)-->'is");
1532 // strip out <style> tags
1533 $this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1534 $this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1535 // strip out preformatted tags
1536 $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1537 // strip out server side scripts
1538 $this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1539
1540 if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
1541 $this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1542 }
1543
1544 // parsing
1545 $this->parse();
1546 // end
1547 $this->root->_[HDOM_INFO_END] = $this->cursor;
1548 $this->parse_charset();
1549
1550 // make load function chainable
1551 return $this;
1552 }
1553
1554 function load_file()
1555 {
1556 $args = func_get_args();
1557
1558 if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
1559 $this->load($doc, true);
1560 } else {
1561 return false;
1562 }
1563 }
1564
1565 function set_callback($function_name)
1566 {
1567 $this->callback = $function_name;
1568 }
1569
1570 function remove_callback()
1571 {
1572 $this->callback = null;
1573 }
1574
1575 function save($filepath = '')
1576 {
1577 $ret = $this->root->innertext();
1578 if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
1579 return $ret;
1580 }
1581
1582 function find($selector, $idx = null, $lowercase = false)
1583 {
1584 return $this->root->find($selector, $idx, $lowercase);
1585 }
1586
1587 function clear()
1588 {
1589 if (isset($this->nodes)) {
1590 foreach ($this->nodes as $n) {
1591 $n->clear();
1592 $n = null;
1593 }
1594 }
1595
1596 // This add next line is documented in the sourceforge repository.
1597 // 2977248 as a fix for ongoing memory leaks that occur even with the
1598 // use of clear.
1599 if (isset($this->children)) {
1600 foreach ($this->children as $n) {
1601 $n->clear();
1602 $n = null;
1603 }
1604 }
1605
1606 if (isset($this->parent)) {
1607 $this->parent->clear();
1608 unset($this->parent);
1609 }
1610
1611 if (isset($this->root)) {
1612 $this->root->clear();
1613 unset($this->root);
1614 }
1615
1616 unset($this->doc);
1617 unset($this->noise);
1618 }
1619
1620 function dump($show_attr = true)
1621 {
1622 $this->root->dump($show_attr);
1623 }
1624
1625 protected function prepare(
1626 $str, $lowercase = true,
1627 $defaultBRText = DEFAULT_BR_TEXT,
1628 $defaultSpanText = DEFAULT_SPAN_TEXT)
1629 {
1630 $this->clear();
1631
1632 $this->doc = trim($str);
1633 $this->size = strlen($this->doc);
1634 $this->original_size = $this->size; // original size of the html
1635 $this->pos = 0;
1636 $this->cursor = 1;
1637 $this->noise = array();
1638 $this->nodes = array();
1639 $this->lowercase = $lowercase;
1640 $this->default_br_text = $defaultBRText;
1641 $this->default_span_text = $defaultSpanText;
1642 $this->root = new simple_html_dom_node($this);
1643 $this->root->tag = 'root';
1644 $this->root->_[HDOM_INFO_BEGIN] = -1;
1645 $this->root->nodetype = HDOM_TYPE_ROOT;
1646 $this->parent = $this->root;
1647 if ($this->size > 0) { $this->char = $this->doc[0]; }
1648 }
1649
1650 protected function parse()
1651 {
1652 while (true) {
1653 // Read next tag if there is no text between current position and the
1654 // next opening tag.
1655 if (($s = $this->copy_until_char('<')) === '') {
1656 if($this->read_tag()) {
1657 continue;
1658 } else {
1659 return true;
1660 }
1661 }
1662
1663 // Add a text node for text between tags
1664 $node = new simple_html_dom_node($this);
1665 ++$this->cursor;
1666 $node->_[HDOM_INFO_TEXT] = $s;
1667 $this->link_nodes($node, false);
1668 }
1669 }
1670
1671 protected function parse_charset()
1672 {
1673 global $debug_object;
1674
1675 $charset = null;
1676
1677 if (function_exists('get_last_retrieve_url_contents_content_type')) {
1678 $contentTypeHeader = get_last_retrieve_url_contents_content_type();
1679 $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
1680 if ($success) {
1681 $charset = $matches[1];
1682 if (is_object($debug_object)) {
1683 $debug_object->debug_log(2,
1684 'header content-type found charset of: '
1685 . $charset
1686 );
1687 }
1688 }
1689 }
1690
1691 if (empty($charset)) {
1692 // https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
1693 $el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
1694
1695 if (!empty($el)) {
1696 $fullvalue = $el->content;
1697 if (is_object($debug_object)) {
1698 $debug_object->debug_log(2,
1699 'meta content-type tag found'
1700 . $fullvalue
1701 );
1702 }
1703
1704 if (!empty($fullvalue)) {
1705 $success = preg_match(
1706 '/charset=(.+)/i',
1707 $fullvalue,
1708 $matches
1709 );
1710
1711 if ($success) {
1712 $charset = $matches[1];
1713 } else {
1714 // If there is a meta tag, and they don't specify the
1715 // character set, research says that it's typically
1716 // ISO-8859-1
1717 if (is_object($debug_object)) {
1718 $debug_object->debug_log(2,
1719 'meta content-type tag couldn\'t be parsed. using iso-8859 default.'
1720 );
1721 }
1722
1723 $charset = 'ISO-8859-1';
1724 }
1725 }
1726 }
1727 }
1728
1729 if (empty($charset)) {
1730 // https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
1731 if ($meta = $this->root->find('meta[charset]', 0)) {
1732 $charset = $meta->charset;
1733 if (is_object($debug_object)) {
1734 $debug_object->debug_log(2, 'meta charset: ' . $charset);
1735 }
1736 }
1737 }
1738
1739 if (empty($charset)) {
1740 // Try to guess the charset based on the content
1741 // Requires Multibyte String (mbstring) support (optional)
1742 if (function_exists('mb_detect_encoding')) {
1743 /**
1744 * mb_detect_encoding() is not intended to distinguish between
1745 * charsets, especially single-byte charsets. Its primary
1746 * purpose is to detect which multibyte encoding is in use,
1747 * i.e. UTF-8, UTF-16, shift-JIS, etc.
1748 *
1749 * -- https://bugs.php.net/bug.php?id=38138
1750 *
1751 * Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
1752 * always result in CP1251/ISO-8859-5 and vice versa.
1753 *
1754 * Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
1755 * to stay compatible.
1756 */
1757 $encoding = mb_detect_encoding(
1758 $this->doc,
1759 array( 'UTF-8', 'CP1252', 'ISO-8859-1' )
1760 );
1761
1762 if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
1763 // Due to a limitation of mb_detect_encoding
1764 // 'CP1251'/'ISO-8859-5' will be detected as
1765 // 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
1766 // which case we can simply assume it is the other charset.
1767 if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
1768 $encoding = 'CP1251';
1769 }
1770 }
1771
1772 if ($encoding !== false) {
1773 $charset = $encoding;
1774 if (is_object($debug_object)) {
1775 $debug_object->debug_log(2, 'mb_detect: ' . $charset);
1776 }
1777 }
1778 }
1779 }
1780
1781 if (empty($charset)) {
1782 // Assume it's UTF-8 as it is the most likely charset to be used
1783 $charset = 'UTF-8';
1784 if (is_object($debug_object)) {
1785 $debug_object->debug_log(2, 'No match found, assume ' . $charset);
1786 }
1787 }
1788
1789 // Since CP1252 is a superset, if we get one of it's subsets, we want
1790 // it instead.
1791 if ((strtolower($charset) == 'iso-8859-1')
1792 || (strtolower($charset) == 'latin1')
1793 || (strtolower($charset) == 'latin-1')) {
1794 $charset = 'CP1252';
1795 if (is_object($debug_object)) {
1796 $debug_object->debug_log(2,
1797 'replacing ' . $charset . ' with CP1252 as its a superset'
1798 );
1799 }
1800 }
1801
1802 if (is_object($debug_object)) {
1803 $debug_object->debug_log(1, 'EXIT - ' . $charset);
1804 }
1805
1806 return $this->_charset = $charset;
1807 }
1808
1809 protected function read_tag()
1810 {
1811 // Set end position if no further tags found
1812 if ($this->char !== '<') {
1813 $this->root->_[HDOM_INFO_END] = $this->cursor;
1814 return false;
1815 }
1816
1817 $begin_tag_pos = $this->pos;
1818 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1819
1820 // end tag
1821 if ($this->char === '/') {
1822 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1823
1824 // Skip whitespace in end tags (i.e. in "</ html>")
1825 $this->skip($this->token_blank);
1826 $tag = $this->copy_until_char('>');
1827
1828 // Skip attributes in end tags
1829 if (($pos = strpos($tag, ' ')) !== false) {
1830 $tag = substr($tag, 0, $pos);
1831 }
1832
1833 $parent_lower = strtolower($this->parent->tag);
1834 $tag_lower = strtolower($tag);
1835
1836 // The end tag is supposed to close the parent tag. Handle situations
1837 // when it doesn't
1838 if ($parent_lower !== $tag_lower) {
1839 // Parent tag does not have to be closed necessarily (optional closing tag)
1840 // Current tag is a block tag, so it may close an ancestor
1841 if (isset($this->optional_closing_tags[$parent_lower])
1842 && isset($this->block_tags[$tag_lower])) {
1843
1844 $this->parent->_[HDOM_INFO_END] = 0;
1845 $org_parent = $this->parent;
1846
1847 // Traverse ancestors to find a matching opening tag
1848 // Stop at root node
1849 while (($this->parent->parent)
1850 && strtolower($this->parent->tag) !== $tag_lower
1851 ){
1852 $this->parent = $this->parent->parent;
1853 }
1854
1855 // If we don't have a match add current tag as text node
1856 if (strtolower($this->parent->tag) !== $tag_lower) {
1857 $this->parent = $org_parent; // restore origonal parent
1858
1859 if ($this->parent->parent) {
1860 $this->parent = $this->parent->parent;
1861 }
1862
1863 $this->parent->_[HDOM_INFO_END] = $this->cursor;
1864 return $this->as_text_node($tag);
1865 }
1866 } elseif (($this->parent->parent)
1867 && isset($this->block_tags[$tag_lower])
1868 ) {
1869 // Grandparent exists and current tag is a block tag, so our
1870 // parent doesn't have an end tag
1871 $this->parent->_[HDOM_INFO_END] = 0; // No end tag
1872 $org_parent = $this->parent;
1873
1874 // Traverse ancestors to find a matching opening tag
1875 // Stop at root node
1876 while (($this->parent->parent)
1877 && strtolower($this->parent->tag) !== $tag_lower
1878 ) {
1879 $this->parent = $this->parent->parent;
1880 }
1881
1882 // If we don't have a match add current tag as text node
1883 if (strtolower($this->parent->tag) !== $tag_lower) {
1884 $this->parent = $org_parent; // restore origonal parent
1885 $this->parent->_[HDOM_INFO_END] = $this->cursor;
1886 return $this->as_text_node($tag);
1887 }
1888 } elseif (($this->parent->parent)
1889 && strtolower($this->parent->parent->tag) === $tag_lower
1890 ) { // Grandparent exists and current tag closes it
1891 $this->parent->_[HDOM_INFO_END] = 0;
1892 $this->parent = $this->parent->parent;
1893 } else { // Random tag, add as text node
1894 return $this->as_text_node($tag);
1895 }
1896 }
1897
1898 // Set end position of parent tag to current cursor position
1899 $this->parent->_[HDOM_INFO_END] = $this->cursor;
1900
1901 if ($this->parent->parent) {
1902 $this->parent = $this->parent->parent;
1903 }
1904
1905 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1906 return true;
1907 }
1908
1909 // start tag
1910 $node = new simple_html_dom_node($this);
1911 $node->_[HDOM_INFO_BEGIN] = $this->cursor;
1912 ++$this->cursor;
1913 $tag = $this->copy_until($this->token_slash); // Get tag name
1914 $node->tag_start = $begin_tag_pos;
1915
1916 // doctype, cdata & comments...
1917 // <!DOCTYPE html>
1918 // <![CDATA[ ... ]]>
1919 // <!-- Comment -->
1920 if (isset($tag[0]) && $tag[0] === '!') {
1921 $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
1922
1923 if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
1924 $node->nodetype = HDOM_TYPE_COMMENT;
1925 $node->tag = 'comment';
1926 } else { // Could be doctype or CDATA but we don't care
1927 $node->nodetype = HDOM_TYPE_UNKNOWN;
1928 $node->tag = 'unknown';
1929 }
1930
1931 if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
1932
1933 $this->link_nodes($node, true);
1934 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1935 return true;
1936 }
1937
1938 // The start tag cannot contain another start tag, if so add as text
1939 // i.e. "<<html>"
1940 if ($pos = strpos($tag, '<') !== false) {
1941 $tag = '<' . substr($tag, 0, -1);
1942 $node->_[HDOM_INFO_TEXT] = $tag;
1943 $this->link_nodes($node, false);
1944 $this->char = $this->doc[--$this->pos]; // prev
1945 return true;
1946 }
1947
1948 // Handle invalid tag names (i.e. "<html#doc>")
1949 if (!preg_match('/^\w[\w:-]*$/', $tag)) {
1950 $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
1951
1952 // Next char is the beginning of a new tag, don't touch it.
1953 if ($this->char === '<') {
1954 $this->link_nodes($node, false);
1955 return true;
1956 }
1957
1958 // Next char closes current tag, add and be done with it.
1959 if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
1960 $this->link_nodes($node, false);
1961 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1962 return true;
1963 }
1964
1965 // begin tag, add new node
1966 $node->nodetype = HDOM_TYPE_ELEMENT;
1967 $tag_lower = strtolower($tag);
1968 $node->tag = ($this->lowercase) ? $tag_lower : $tag;
1969
1970 // handle optional closing tags
1971 if (isset($this->optional_closing_tags[$tag_lower])) {
1972 // Traverse ancestors to close all optional closing tags
1973 while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
1974 $this->parent->_[HDOM_INFO_END] = 0;
1975 $this->parent = $this->parent->parent;
1976 }
1977 $node->parent = $this->parent;
1978 }
1979
1980 $guard = 0; // prevent infinity loop
1981
1982 // [0] Space between tag and first attribute
1983 $space = array($this->copy_skip($this->token_blank), '', '');
1984
1985 // attributes
1986 do {
1987 // Everything until the first equal sign should be the attribute name
1988 $name = $this->copy_until($this->token_equal);
1989
1990 if ($name === '' && $this->char !== null && $space[0] === '') {
1991 break;
1992 }
1993
1994 if ($guard === $this->pos) { // Escape infinite loop
1995 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1996 continue;
1997 }
1998
1999 $guard = $this->pos;
2000
2001 // handle endless '<'
2002 // Out of bounds before the tag ended
2003 if ($this->pos >= $this->size - 1 && $this->char !== '>') {
2004 $node->nodetype = HDOM_TYPE_TEXT;
2005 $node->_[HDOM_INFO_END] = 0;
2006 $node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
2007 $node->tag = 'text';
2008 $this->link_nodes($node, false);
2009 return true;
2010 }
2011
2012 // handle mismatch '<'
2013 // Attributes cannot start after opening tag
2014 if ($this->doc[$this->pos - 1] == '<') {
2015 $node->nodetype = HDOM_TYPE_TEXT;
2016 $node->tag = 'text';
2017 $node->attr = array();
2018 $node->_[HDOM_INFO_END] = 0;
2019 $node->_[HDOM_INFO_TEXT] = substr(
2020 $this->doc,
2021 $begin_tag_pos,
2022 $this->pos - $begin_tag_pos - 1
2023 );
2024 $this->pos -= 2;
2025 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2026 $this->link_nodes($node, false);
2027 return true;
2028 }
2029
2030 if ($name !== '/' && $name !== '') { // this is a attribute name
2031 // [1] Whitespace after attribute name
2032 $space[1] = $this->copy_skip($this->token_blank);
2033
2034 $name = $this->restore_noise($name); // might be a noisy name
2035
2036 if ($this->lowercase) { $name = strtolower($name); }
2037
2038 if ($this->char === '=') { // attribute with value
2039 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2040 $this->parse_attr($node, $name, $space); // get attribute value
2041 } else {
2042 //no value attr: nowrap, checked selected...
2043 $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
2044 $node->attr[$name] = true;
2045 if ($this->char != '>') { $this->char = $this->doc[--$this->pos]; } // prev
2046 }
2047
2048 $node->_[HDOM_INFO_SPACE][] = $space;
2049
2050 // prepare for next attribute
2051 $space = array(
2052 $this->copy_skip($this->token_blank),
2053 '',
2054 ''
2055 );
2056 } else { // no more attributes
2057 break;
2058 }
2059 } while ($this->char !== '>' && $this->char !== '/'); // go until the tag ended
2060
2061 $this->link_nodes($node, true);
2062 $node->_[HDOM_INFO_ENDSPACE] = $space[0];
2063
2064 // handle empty tags (i.e. "<div/>")
2065 if ($this->copy_until_char('>') === '/') {
2066 $node->_[HDOM_INFO_ENDSPACE] .= '/';
2067 $node->_[HDOM_INFO_END] = 0;
2068 } else {
2069 // reset parent
2070 if (!isset($this->self_closing_tags[strtolower($node->tag)])) {
2071 $this->parent = $node;
2072 }
2073 }
2074
2075 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2076
2077 // If it's a BR tag, we need to set it's text to the default text.
2078 // This way when we see it in plaintext, we can generate formatting that the user wants.
2079 // since a br tag never has sub nodes, this works well.
2080 if ($node->tag === 'br') {
2081 $node->_[HDOM_INFO_INNER] = $this->default_br_text;
2082 }
2083
2084 return true;
2085 }
2086
2087 protected function parse_attr($node, $name, &$space)
2088 {
2089 $is_duplicate = isset($node->attr[$name]);
2090
2091 if (!$is_duplicate) // Copy whitespace between "=" and value
2092 $space[2] = $this->copy_skip($this->token_blank);
2093
2094 switch ($this->char) {
2095 case '"':
2096 $quote_type = HDOM_QUOTE_DOUBLE;
2097 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2098 $value = $this->copy_until_char('"');
2099 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2100 break;
2101 case '\'':
2102 $quote_type = HDOM_QUOTE_SINGLE;
2103 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2104 $value = $this->copy_until_char('\'');
2105 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2106 break;
2107 default:
2108 $quote_type = HDOM_QUOTE_NO;
2109 $value = $this->copy_until($this->token_attr);
2110 }
2111
2112 $value = $this->restore_noise($value);
2113
2114 // PaperG: Attributes should not have \r or \n in them, that counts as
2115 // html whitespace.
2116 $value = str_replace("\r", '', $value);
2117 $value = str_replace("\n", '', $value);
2118
2119 // PaperG: If this is a "class" selector, lets get rid of the preceeding
2120 // and trailing space since some people leave it in the multi class case.
2121 if ($name === 'class') {
2122 $value = trim($value);
2123 }
2124
2125 if (!$is_duplicate) {
2126 $node->_[HDOM_INFO_QUOTE][] = $quote_type;
2127 $node->attr[$name] = $value;
2128 }
2129 }
2130
2131 protected function link_nodes(&$node, $is_child)
2132 {
2133 $node->parent = $this->parent;
2134 $this->parent->nodes[] = $node;
2135 if ($is_child) {
2136 $this->parent->children[] = $node;
2137 }
2138 }
2139
2140 protected function as_text_node($tag)
2141 {
2142 $node = new simple_html_dom_node($this);
2143 ++$this->cursor;
2144 $node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
2145 $this->link_nodes($node, false);
2146 $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2147 return true;
2148 }
2149
2150 protected function skip($chars)
2151 {
2152 $this->pos += strspn($this->doc, $chars, $this->pos);
2153 $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2154 }
2155
2156 protected function copy_skip($chars)
2157 {
2158 $pos = $this->pos;
2159 $len = strspn($this->doc, $chars, $pos);
2160 $this->pos += $len;
2161 $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2162 if ($len === 0) { return ''; }
2163 return substr($this->doc, $pos, $len);
2164 }
2165
2166 protected function copy_until($chars)
2167 {
2168 $pos = $this->pos;
2169 $len = strcspn($this->doc, $chars, $pos);
2170 $this->pos += $len;
2171 $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2172 return substr($this->doc, $pos, $len);
2173 }
2174
2175 protected function copy_until_char($char)
2176 {
2177 if ($this->char === null) { return ''; }
2178
2179 if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
2180 $ret = substr($this->doc, $this->pos, $this->size - $this->pos);
2181 $this->char = null;
2182 $this->pos = $this->size;
2183 return $ret;
2184 }
2185
2186 if ($pos === $this->pos) { return ''; }
2187
2188 $pos_old = $this->pos;
2189 $this->char = $this->doc[$pos];
2190 $this->pos = $pos;
2191 return substr($this->doc, $pos_old, $pos - $pos_old);
2192 }
2193
2194 protected function remove_noise($pattern, $remove_tag = false)
2195 {
2196 global $debug_object;
2197 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2198
2199 $count = preg_match_all(
2200 $pattern,
2201 $this->doc,
2202 $matches,
2203 PREG_SET_ORDER | PREG_OFFSET_CAPTURE
2204 );
2205
2206 for ($i = $count - 1; $i > -1; --$i) {
2207 $key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
2208
2209 if (is_object($debug_object)) {
2210 $debug_object->debug_log(2, 'key is: ' . $key);
2211 }
2212
2213 $idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
2214 $this->noise[$key] = $matches[$i][$idx][0];
2215 $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
2216 }
2217
2218 // reset the length of content
2219 $this->size = strlen($this->doc);
2220
2221 if ($this->size > 0) {
2222 $this->char = $this->doc[0];
2223 }
2224 }
2225
2226 function restore_noise($text)
2227 {
2228 global $debug_object;
2229 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2230
2231 while (($pos = strpos($text, '___noise___')) !== false) {
2232 // Sometimes there is a broken piece of markup, and we don't GET the
2233 // pos+11 etc... token which indicates a problem outside of us...
2234
2235 // todo: "___noise___1000" (or any number with four or more digits)
2236 // in the DOM causes an infinite loop which could be utilized by
2237 // malicious software
2238 if (strlen($text) > $pos + 15) {
2239 $key = '___noise___'
2240 . $text[$pos + 11]
2241 . $text[$pos + 12]
2242 . $text[$pos + 13]
2243 . $text[$pos + 14]
2244 . $text[$pos + 15];
2245
2246 if (is_object($debug_object)) {
2247 $debug_object->debug_log(2, 'located key of: ' . $key);
2248 }
2249
2250 if (isset($this->noise[$key])) {
2251 $text = substr($text, 0, $pos)
2252 . $this->noise[$key]
2253 . substr($text, $pos + 16);
2254 } else {
2255 // do this to prevent an infinite loop.
2256 $text = substr($text, 0, $pos)
2257 . 'UNDEFINED NOISE FOR KEY: '
2258 . $key
2259 . substr($text, $pos + 16);
2260 }
2261 } else {
2262 // There is no valid key being given back to us... We must get
2263 // rid of the ___noise___ or we will have a problem.
2264 $text = substr($text, 0, $pos)
2265 . 'NO NUMERIC NOISE KEY'
2266 . substr($text, $pos + 11);
2267 }
2268 }
2269 return $text;
2270 }
2271
2272 function search_noise($text)
2273 {
2274 global $debug_object;
2275 if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2276
2277 foreach($this->noise as $noiseElement) {
2278 if (strpos($noiseElement, $text) !== false) {
2279 return $noiseElement;
2280 }
2281 }
2282 }
2283
2284 function __toString()
2285 {
2286 return $this->root->innertext();
2287 }
2288
2289 function __get($name)
2290 {
2291 switch ($name) {
2292 case 'outertext':
2293 return $this->root->innertext();
2294 case 'innertext':
2295 return $this->root->innertext();
2296 case 'plaintext':
2297 return $this->root->text();
2298 case 'charset':
2299 return $this->_charset;
2300 case 'target_charset':
2301 return $this->_target_charset;
2302 }
2303 }
2304
2305 function childNodes($idx = -1)
2306 {
2307 return $this->root->childNodes($idx);
2308 }
2309
2310 function firstChild()
2311 {
2312 return $this->root->first_child();
2313 }
2314
2315 function lastChild()
2316 {
2317 return $this->root->last_child();
2318 }
2319
2320 function createElement($name, $value = null)
2321 {
2322 return @str_get_html("<$name>$value</$name>")->firstChild();
2323 }
2324
2325 function createTextNode($value)
2326 {
2327 return @end(str_get_html($value)->nodes);
2328 }
2329
2330 function getElementById($id)
2331 {
2332 return $this->find("#$id", 0);
2333 }
2334
2335 function getElementsById($id, $idx = null)
2336 {
2337 return $this->find("#$id", $idx);
2338 }
2339
2340 function getElementByTagName($name)
2341 {
2342 return $this->find($name, 0);
2343 }
2344
2345 function getElementsByTagName($name, $idx = -1)
2346 {
2347 return $this->find($name, $idx);
2348 }
2349
2350 function loadFile()
2351 {
2352 $args = func_get_args();
2353 $this->load_file($args);
2354 }
2355 }
2356