PluginProbe ʕ •ᴥ•ʔ
MailPoet – Newsletters, Email Marketing, and Automation / 3.100.2
MailPoet – Newsletters, Email Marketing, and Automation v3.100.2
5.28.1 5.28.0 5.27.0 5.26.0 5.26.1 5.25.0 5.24.0 4.43.0 4.43.1 4.44.0 4.44.1 4.45.0 4.46.0 4.47.0 4.48.0 4.48.1 4.48.2 4.49.0 4.49.1 4.5.0 4.5.1 4.5.2 4.50.0 4.50.1 4.51.0 4.51.1 4.51.2 4.52.0 4.53.0 4.54.0 4.55.0 4.56.0 4.57.0 4.58.0 4.58.1 4.58.2 4.6.0 4.6.1 4.6.2 4.7.0 4.7.1 4.8.0 4.8.1 4.9.0 5.0.0 5.0.1 5.0.2 5.1.0 5.1.1 5.10.0 5.10.1 5.11.0 5.12.0 5.12.1 5.12.10 5.12.11 5.12.12 5.12.13 5.12.2 5.12.3 5.12.4 5.12.5 5.12.6 5.12.7 5.12.8 5.12.9 5.13.0 5.13.1 5.13.2 5.14.0 5.14.1 5.14.2 5.14.3 5.15.0 5.15.1 5.16.0 5.16.1 5.16.2 5.16.3 5.16.4 5.17.0 5.17.1 5.17.2 5.17.3 5.17.4 5.17.5 5.17.6 5.18.0 5.19.0 5.2.0 5.2.1 5.2.2 5.2.3 5.20.0 5.21.0 5.21.1 5.21.2 5.21.3 5.22.0 5.22.1 5.22.2 5.22.3 5.22.4 5.23.0 5.23.1 5.23.2 5.3.0 5.3.1 5.3.2 5.3.3 5.3.4 5.3.5 5.3.6 5.3.7 5.4.0 5.4.1 5.4.2 5.5.0 5.5.1 5.5.2 5.6.0 5.6.1 5.6.2 5.6.3 5.6.4 5.7.0 5.7.1 5.8.0 5.8.1 5.9.0 3.0.0-beta.15 3.7.1 3.0.0-beta.16 3.7.2 3.0.0-beta.17 3.7.3 3.0.0-beta.18 3.7.4 3.0.0-beta.19 3.7.5 3.0.0-beta.2 3.7.6 3.0.0-beta.20 3.7.8 3.0.0-beta.21 3.70.0 3.0.0-beta.22 3.71.0 3.0.0-beta.23 3.71.1 3.0.0-beta.23.1 3.71.2 3.0.0-beta.23.2 3.71.3 3.0.0-beta.24 3.72.0 3.0.0-beta.25 3.73.0 3.0.0-beta.26 3.73.1 3.0.0-beta.27 3.73.2 3.0.0-beta.28 3.74.0 3.0.0-beta.29 3.74.1 3.0.0-beta.3 3.74.2 3.0.0-beta.30 3.74.3 3.0.0-beta.31 3.75.0 3.0.0-beta.32 3.75.1 3.0.0-beta.33 3.76.0 3.0.0-beta.33.1 3.77.0 3.0.0-beta.34.0.0 3.77.1 3.0.0-beta.36.0.0 3.78.0 3.0.0-beta.36.0.1 3.79.0 3.0.0-beta.36.2.0 3.8 3.0.0-beta.36.3.0 3.8.1 3.0.0-beta.36.3.1 3.8.2 3.0.0-beta.37.0.0 3.8.3 3.0.0-beta.4 3.8.4 3.0.0-beta.5 3.8.5 3.0.0-beta.6 3.8.6 3.0.0-beta.7 3.80.0 3.0.0-beta.7.1 3.81.0 3.0.0-beta.8 3.82.0 3.0.0-beta.9 3.83.0 3.0.0-rc.1.0.0 3.84.0 3.0.0-rc.1.0.1 3.84.1 3.0.0-rc.1.0.2 3.85.0 3.0.0-rc.1.0.3 3.85.1 3.0.0-rc.1.0.4 3.86.0 3.0.0-rc.2.0.0 3.87.0 3.0.0-rc.2.0.1 3.87.1 3.0.0-rc.2.0.2 3.87.2 3.0.0-rc.2.0.3 3.88.0 3.0.1 3.88.1 3.0.2 3.88.2 3.0.3 3.89.0 3.0.4 3.89.1 3.0.5 3.89.2 3.0.6 3.89.3 3.0.7 3.89.4 3.0.8 3.9.0 3.0.9 3.9.1 3.1.0 3.90.0 3.10 3.90.1 3.10.1 3.90.2 3.100.0 3.91.0 3.100.1 3.91.1 3.100.2 3.92.0 3.101.0 3.92.1 3.101.1 3.93.0 3.102.0 3.93.1 3.102.1 3.94.0 3.103.0 3.95.0 3.103.1 3.95.1 3.11.0 3.96.0 3.11.1 3.96.1 3.11.2 3.97.0 3.11.3 3.98.0 3.11.4 3.98.1 3.11.5 3.99.0 3.12.0 3.99.1 3.12.1 4.0.0 3.13.0 4.0.1 3.14.0 4.1.0 3.14.1 4.1.1 3.15.0 4.10.0 3.16.0 4.11.0 3.16.1 4.11.1 3.16.2 4.12.0 3.16.3 4.12.1 3.17.0 4.12.2 3.17.1 4.13.0 3.17.2 4.14.0 3.18.0 4.15.0 3.18.1 4.16.0 3.18.2 4.17.0 3.19.0 4.17.1 3.19.1 4.18.0 3.19.2 4.18.1 3.19.3 4.19.0 3.2.0 4.2.0 3.2.1 4.20.0 3.2.2 4.20.1 3.2.3 4.20.2 3.2.4 4.21.0 3.2.5 4.22.0 3.20.0 4.22.1 3.21.0 4.22.2 3.21.1 4.23.0 3.22.0 4.24.0 3.23.0 4.25.0 3.23.1 4.26.0 3.23.2 4.26.1 3.24.0 4.27.0 3.25.0 4.28.0 3.25.1 4.29.0 3.26.0 4.3.0 3.26.1 4.3.1 3.27.0 4.30.0 3.28.0 4.31.0 3.29.0 4.31.1 3.3.0 4.32.0 3.3.1 4.33.0 3.3.2 4.34.0 3.3.3 4.35.0 3.3.4 4.35.1 3.3.5 4.36.0 3.3.6 4.37.0 3.30.0 4.38.0 3.31.0 4.39.0 3.31.1 4.4.0 3.32.0 4.40.0 3.32.1 4.41.0 3.32.2 4.41.1 3.33.0 4.41.2 3.34.0 4.41.3 3.34.1 4.42.0 3.34.2 4.42.1 3.34.3 3.34.4 3.35.0 3.35.1 3.35.3 3.35.4 3.36.0 3.37.0 3.37.1 3.37.2 3.37.3 3.38.0 3.38.1 3.39.0 3.39.1 3.39.2 3.4.0 3.4.1 3.4.2 3.4.3 3.4.4 3.40.0 3.40.1 3.41.0 3.41.1 3.41.2 3.42.0 3.42.1 3.42.2 3.42.3 3.43.0 3.43.1 3.44.0 3.45.0 3.45.1 3.46.0 3.46.1 3.46.10 3.46.11 3.46.12 3.46.13 3.46.14 3.46.2 3.46.3 3.46.4 3.46.5 3.46.6 3.46.7 3.46.8 3.46.9 3.47.0 3.47.1 3.47.10 3.47.11 3.47.2 3.47.3 3.47.5 3.47.6 3.47.7 3.47.9 3.48.0 3.48.1 3.49.0 3.49.1 3.5.0 3.5.1 3.50.0 3.51.0 3.51.1 3.51.2 3.52.0 3.53.0 3.54.0 3.54.1 3.54.2 3.54.3 3.55.0 3.55.1 3.56.0 3.56.1 3.56.2 3.57.0 3.57.1 3.58.0 3.59.0 3.59.1 3.59.2 3.6.0 3.6.1 3.6.2 3.6.3 3.6.4 3.6.5 3.6.6 3.6.7 3.60.0 3.60.1 3.60.10 3.60.11 3.60.12 3.60.2 3.60.3 3.60.4 3.60.6 3.60.7 3.60.8 3.60.9 3.61.0 3.62.0 3.62.1 3.63.0 3.64.0 3.64.1 3.64.2 3.64.3 3.65.0 trunk 3.65.1 3.0.0 3.66.0 3.0.0-beta.1 3.67.0 3.0.0-beta.10 3.67.1 3.0.0-beta.11 3.68.0 3.0.0-beta.12 3.69.0 3.0.0-beta.13 3.69.1 3.0.0-beta.14 3.7.0
mailpoet / lib-3rd-party / pquery / gan_parser_html.php
mailpoet / lib-3rd-party / pquery Last commit date
third_party 4 years ago IQuery.php 4 years ago LICENSE 4 years ago gan_formatter.php 4 years ago gan_node_html.php 4 years ago gan_parser_html.php 4 years ago gan_selector_html.php 4 years ago gan_tokenizer.php 4 years ago gan_xml2array.php 4 years ago ganon.php 4 years ago index.php 4 years ago pQuery.php 4 years ago
gan_parser_html.php
841 lines
1 <?php
2 /**
3 * @author Niels A.D.
4 * @author Todd Burry <todd@vanillaforums.com>
5 * @copyright 2010 Niels A.D., 2014 Todd Burry
6 * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1
7 * @package pQuery
8 */
9
10 namespace MailPoetVendor\pQuery;
11
12 if (!defined('ABSPATH')) exit;
13
14
15 /**
16 * Parses a HTML document
17 *
18 * Functionality can be extended by overriding functions or adjusting the tag map.
19 * Document may contain small errors, the parser will try to recover and resume parsing.
20 */
21 class HtmlParserBase extends TokenizerBase {
22
23 /**
24 * Tag open token, used for "<"
25 */
26 const TOK_TAG_OPEN = 100;
27 /**
28 * Tag close token, used for ">"
29 */
30 const TOK_TAG_CLOSE = 101;
31 /**
32 * Forward slash token, used for "/"
33 */
34 const TOK_SLASH_FORWARD = 103;
35 /**
36 * Backslash token, used for "\"
37 */
38 const TOK_SLASH_BACKWARD = 104;
39 /**
40 * String token, used for attribute values (" and ')
41 */
42 const TOK_STRING = 104;
43 /**
44 * Equals token, used for "="
45 */
46 const TOK_EQUALS = 105;
47
48 /**
49 * Sets HTML identifiers, tags/attributes are considered identifiers
50 * @see TokenizerBase::$identifiers
51 * @access private
52 */
53 var $identifiers = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890:-_!?%';
54
55 /**
56 * Status of the parser (tagname, closing tag, etc)
57 * @var array
58 */
59 var $status = array();
60
61 /**
62 * Map characters to match their tokens
63 * @see TokenizerBase::$custom_char_map
64 * @access private
65 */
66 var $custom_char_map = array(
67 '<' => self::TOK_TAG_OPEN,
68 '>' => self::TOK_TAG_CLOSE,
69 "'" => 'parse_string',
70 '"' => 'parse_string',
71 '/' => self::TOK_SLASH_FORWARD,
72 '\\' => self::TOK_SLASH_BACKWARD,
73 '=' => self::TOK_EQUALS
74 );
75
76 function __construct($doc = '', $pos = 0) {
77 parent::__construct($doc, $pos);
78 $this->parse_all();
79 }
80
81 #php4 PHP4 class constructor compatibility
82 #function HtmlParserBase($doc = '', $pos = 0) {return $this->__construct($doc, $pos);}
83 #php4e
84
85 /**
86 Callback functions for certain tags
87 @var array (TAG_NAME => FUNCTION_NAME)
88 @internal Function should be a method in the class
89 @internal Tagname should be lowercase and is everything after <, e.g. "?php" or "!doctype"
90 @access private
91 */
92 var $tag_map = array(
93 '!doctype' => 'parse_doctype',
94 '?' => 'parse_php',
95 '?php' => 'parse_php',
96 '%' => 'parse_asp',
97 'style' => 'parse_style',
98 'script' => 'parse_script'
99 );
100
101 /**
102 * Parse a HTML string (attributes)
103 * @internal Gets called with ' and "
104 * @return int
105 */
106 protected function parse_string() {
107 if ($this->next_pos($this->doc[$this->pos], false) !== self::TOK_UNKNOWN) {
108 --$this->pos;
109 }
110 return self::TOK_STRING;
111 }
112
113 /**
114 * Parse text between tags
115 * @internal Gets called between tags, uses {@link $status}[last_pos]
116 * @internal Stores text in {@link $status}[text]
117 */
118 function parse_text() {
119 $len = $this->pos - 1 - $this->status['last_pos'];
120 $this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : '');
121 }
122
123 /**
124 * Parse comment tags
125 * @internal Gets called with HTML comments ("<!--")
126 * @internal Stores text in {@link $status}[comment]
127 * @return bool
128 */
129 function parse_comment() {
130 $this->pos += 3;
131 if ($this->next_pos('-->', false) !== self::TOK_UNKNOWN) {
132 $this->status['comment'] = $this->getTokenString(1, -1);
133 --$this->pos;
134 } else {
135 $this->status['comment'] = $this->getTokenString(1, -1);
136 $this->pos += 2;
137 }
138 $this->status['last_pos'] = $this->pos;
139
140 return true;
141 }
142
143 /**
144 * Parse doctype tag
145 * @internal Gets called with doctype ("<!doctype")
146 * @internal Stores text in {@link $status}[dtd]
147 * @return bool
148 */
149 function parse_doctype() {
150 $start = $this->pos;
151 if ($this->next_search('[>', false) === self::TOK_UNKNOWN) {
152 if ($this->doc[$this->pos] === '[') {
153 if (($this->next_pos(']', false) !== self::TOK_UNKNOWN) || ($this->next_pos('>', false) !== self::TOK_UNKNOWN)) {
154 $this->addError('Invalid doctype');
155 return false;
156 }
157 }
158
159 $this->token_start = $start;
160 $this->status['dtd'] = $this->getTokenString(2, -1);
161 $this->status['last_pos'] = $this->pos;
162 return true;
163 } else {
164 $this->addError('Invalid doctype');
165 return false;
166 }
167 }
168
169 /**
170 * Parse cdata tag
171 * @internal Gets called with cdata ("<![cdata")
172 * @internal Stores text in {@link $status}[cdata]
173 * @return bool
174 */
175 function parse_cdata() {
176 if ($this->next_pos(']]>', false) === self::TOK_UNKNOWN) {
177 $this->status['cdata'] = $this->getTokenString(9, -1);
178 $this->status['last_pos'] = $this->pos + 2;
179 return true;
180 } else {
181 $this->addError('Invalid cdata tag');
182 return false;
183 }
184 }
185
186 /**
187 * Parse php tags
188 * @internal Gets called with php tags ("<?php")
189 * @return bool
190 */
191 function parse_php() {
192 $start = $this->pos;
193 if ($this->next_pos('?>', false) !== self::TOK_UNKNOWN) {
194 $this->pos -= 2; //End of file
195 }
196
197 $len = $this->pos - 1 - $start;
198 $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
199 $this->status['last_pos'] = ++$this->pos;
200 return true;
201 }
202
203 /**
204 * Parse asp tags
205 * @internal Gets called with asp tags ("<%")
206 * @return bool
207 */
208 function parse_asp() {
209 $start = $this->pos;
210 if ($this->next_pos('%>', false) !== self::TOK_UNKNOWN) {
211 $this->pos -= 2; //End of file
212 }
213
214 $len = $this->pos - 1 - $start;
215 $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
216 $this->status['last_pos'] = ++$this->pos;
217 return true;
218 }
219
220 /**
221 * Parse style tags
222 * @internal Gets called with php tags ("<style>")
223 * @return bool
224 */
225 function parse_style() {
226 if ($this->parse_attributes() && ($this->token === self::TOK_TAG_CLOSE) && ($start = $this->pos) && ($this->next_pos('</style>', false) === self::TOK_UNKNOWN)) {
227 $len = $this->pos - 1 - $start;
228 $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
229
230 $this->pos += 7;
231 $this->status['last_pos'] = $this->pos;
232 return true;
233 } else {
234 $this->addError('No end for style tag found');
235 return false;
236 }
237 }
238
239 /**
240 * Parse script tags
241 * @internal Gets called with php tags ("<script>")
242 * @return bool
243 */
244 function parse_script() {
245 if ($this->parse_attributes() && ($this->token === self::TOK_TAG_CLOSE) && ($start = $this->pos) && ($this->next_pos('</script>', false) === self::TOK_UNKNOWN)) {
246 $len = $this->pos - 1 - $start;
247 $this->status['text'] = (($len > 0) ? substr($this->doc, $start + 1, $len) : '');
248
249 $this->pos += 8;
250 $this->status['last_pos'] = $this->pos;
251 return true;
252 } else {
253 $this->addError('No end for script tag found');
254 return false;
255 }
256 }
257
258 /**
259 * Parse conditional tags (+ all conditional tags inside)
260 * @internal Gets called with IE conditionals ("<![if]" and "<!--[if]")
261 * @internal Stores condition in {@link $status}[tag_condition]
262 * @return bool
263 */
264 function parse_conditional() {
265 if ($this->status['closing_tag']) {
266 $this->pos += 8;
267 } else {
268 $this->pos += (($this->status['comment']) ? 5 : 3);
269 if ($this->next_pos(']', false) !== self::TOK_UNKNOWN) {
270 $this->addError('"]" not found in conditional tag');
271 return false;
272 }
273 $this->status['tag_condition'] = $this->getTokenString(0, -1);
274 }
275
276 if ($this->next_no_whitespace() !== self::TOK_TAG_CLOSE) {
277 $this->addError('No ">" tag found 2 for conditional tag');
278 return false;
279 }
280
281 if ($this->status['comment']) {
282 $this->status['last_pos'] = $this->pos;
283 if ($this->next_pos('-->', false) !== self::TOK_UNKNOWN) {
284 $this->addError('No ending tag found for conditional tag');
285 $this->pos = $this->size - 1;
286
287 $len = $this->pos - 1 - $this->status['last_pos'];
288 $this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : '');
289 } else {
290 $len = $this->pos - 10 - $this->status['last_pos'];
291 $this->status['text'] = (($len > 0) ? substr($this->doc, $this->status['last_pos'] + 1, $len) : '');
292 $this->pos += 2;
293 }
294 }
295
296 $this->status['last_pos'] = $this->pos;
297 return true;
298 }
299
300 /**
301 * Parse attributes (names + value)
302 * @internal Stores attributes in {@link $status}[attributes] (array(ATTR => VAL))
303 * @return bool
304 */
305 function parse_attributes() {
306 $this->status['attributes'] = array();
307
308 while ($this->next_no_whitespace() === self::TOK_IDENTIFIER) {
309 $attr = $this->getTokenString();
310 if (($attr === '?') || ($attr === '%')) {
311 //Probably closing tags
312 break;
313 }
314
315 if ($this->next_no_whitespace() === self::TOK_EQUALS) {
316 if ($this->next_no_whitespace() === self::TOK_STRING) {
317 $val = $this->getTokenString(1, -1);
318 } else {
319 $this->token_start = $this->pos;
320 if (!isset($stop)) {
321 $stop = $this->whitespace;
322 $stop['<'] = true;
323 $stop['>'] = true;
324 }
325
326 while ((++$this->pos < $this->size) && (!isset($stop[$this->doc[$this->pos]]))) {
327 // Do nothing.
328 }
329 --$this->pos;
330
331 $val = $this->getTokenString();
332
333 if (trim($val) === '') {
334 $this->addError('Invalid attribute value');
335 return false;
336 }
337 }
338 } else {
339 $val = $attr;
340 $this->pos = (($this->token_start) ? $this->token_start : $this->pos) - 1;
341 }
342
343 $this->status['attributes'][$attr] = $val;
344 }
345
346 return true;
347 }
348
349 /**
350 * Default callback for tags
351 * @internal Gets called after the tagname (<html*ENTERS_HERE* attribute="value">)
352 * @return bool
353 */
354 function parse_tag_default() {
355 if ($this->status['closing_tag']) {
356 $this->status['attributes'] = array();
357 $this->next_no_whitespace();
358 } else {
359 if (!$this->parse_attributes()) {
360 return false;
361 }
362 }
363
364 if ($this->token !== self::TOK_TAG_CLOSE) {
365 if ($this->token === self::TOK_SLASH_FORWARD) {
366 $this->status['self_close'] = true;
367 $this->next();
368 } elseif ((($this->status['tag_name'][0] === '?') && ($this->doc[$this->pos] === '?')) || (($this->status['tag_name'][0] === '%') && ($this->doc[$this->pos] === '%'))) {
369 $this->status['self_close'] = true;
370 $this->pos++;
371
372 if (isset($this->char_map[$this->doc[$this->pos]]) && (!is_string($this->char_map[$this->doc[$this->pos]]))) {
373 $this->token = $this->char_map[$this->doc[$this->pos]];
374 } else {
375 $this->token = self::TOK_UNKNOWN;
376 }
377 }/* else {
378 $this->status['self_close'] = false;
379 }*/
380 }
381
382 if ($this->token !== self::TOK_TAG_CLOSE) {
383 $this->addError('Expected ">", but found "'.$this->getTokenString().'"');
384 if ($this->next_pos('>', false) !== self::TOK_UNKNOWN) {
385 $this->addError('No ">" tag found for "'.$this->status['tag_name'].'" tag');
386 return false;
387 }
388 }
389
390 return true;
391 }
392
393 /**
394 * Parse tag
395 * @internal Gets called after opening tag (<*ENTERS_HERE*html attribute="value">)
396 * @internal Stores information about the tag in {@link $status} (comment, closing_tag, tag_name)
397 * @return bool
398 */
399 function parse_tag() {
400 $start = $this->pos;
401 $this->status['self_close'] = false;
402 $this->parse_text();
403
404 $next = (($this->pos + 1) < $this->size) ? $this->doc[$this->pos + 1] : '';
405 if ($next === '!') {
406 $this->status['closing_tag'] = false;
407
408 if (substr($this->doc, $this->pos + 2, 2) === '--') {
409 $this->status['comment'] = true;
410
411 if (($this->doc[$this->pos + 4] === '[') && (strcasecmp(substr($this->doc, $this->pos + 5, 2), 'if') === 0)) {
412 return $this->parse_conditional();
413 } else {
414 return $this->parse_comment();
415 }
416 } else {
417 $this->status['comment'] = false;
418
419 if ($this->doc[$this->pos + 2] === '[') {
420 if (strcasecmp(substr($this->doc, $this->pos + 3, 2), 'if') === 0) {
421 return $this->parse_conditional();
422 } elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), 'endif') === 0) {
423 $this->status['closing_tag'] = true;
424 return $this->parse_conditional();
425 } elseif (strcasecmp(substr($this->doc, $this->pos + 3, 5), 'cdata') === 0) {
426 return $this->parse_cdata();
427 }
428 }
429 }
430 } elseif ($next === '/') {
431 $this->status['closing_tag'] = true;
432 ++$this->pos;
433 } else {
434 $this->status['closing_tag'] = false;
435 }
436
437 if ($this->next() !== self::TOK_IDENTIFIER) {
438 $this->addError('Tagname expected');
439 //if ($this->next_pos('>', false) === self::TOK_UNKNOWN) {
440 $this->status['last_pos'] = $start - 1;
441 return true;
442 //} else {
443 // return false;
444 //}
445 }
446
447 $tag = $this->getTokenString();
448 $this->status['tag_name'] = $tag;
449 $tag = strtolower($tag);
450
451 if (isset($this->tag_map[$tag])) {
452 $res = $this->{$this->tag_map[$tag]}();
453 } else {
454 $res = $this->parse_tag_default();
455 }
456
457 $this->status['last_pos'] = $this->pos;
458 return $res;
459 }
460
461 /**
462 * Parse full document
463 * @return bool
464 */
465 function parse_all() {
466 $this->errors = array();
467 $this->status['last_pos'] = -1;
468
469 if (($this->token === self::TOK_TAG_OPEN) || ($this->next_pos('<', false) === self::TOK_UNKNOWN)) {
470 do {
471 if (!$this->parse_tag()) {
472 return false;
473 }
474 } while ($this->next_pos('<') !== self::TOK_NULL);
475 }
476
477 $this->pos = $this->size;
478 $this->parse_text();
479
480 return true;
481 }
482 }
483
484 /**
485 * Parses a HTML document into a HTML DOM
486 */
487 class HtmlParser extends HtmlParserBase {
488
489 /**
490 * Root object
491 * @internal If string, then it will create a new instance as root
492 * @var DomNode
493 */
494 var $root = 'MailPoetVendor\\pQuery\\DomNode';
495
496 /**
497 * Current parsing hierarchy
498 * @internal Root is always at index 0, current tag is at the end of the array
499 * @var array
500 * @access private
501 */
502 var $hierarchy = array();
503
504 /**
505 * Tags that don't need closing tags
506 * @var array
507 * @access private
508 */
509 var $tags_selfclose = array(
510 'area' => true,
511 'base' => true,
512 'basefont' => true,
513 'br' => true,
514 'col' => true,
515 'command' => true,
516 'embed' => true,
517 'frame' => true,
518 'hr' => true,
519 'img' => true,
520 'input' => true,
521 'ins' => true,
522 'keygen' => true,
523 'link' => true,
524 'meta' => true,
525 'param' => true,
526 'source' => true,
527 'track' => true,
528 'wbr' => true
529 );
530
531 /**
532 * Class constructor
533 * @param string $doc Document to be tokenized
534 * @param int $pos Position to start parsing
535 * @param DomNode $root Root node, null to auto create
536 */
537 function __construct($doc = '', $pos = 0, $root = null) {
538 if ($root === null) {
539 $root = new $this->root('~root~', null);
540 }
541 $this->root =& $root;
542
543 parent::__construct($doc, $pos);
544 }
545
546 #php4 PHP4 class constructor compatibility
547 #function HtmlParser($doc = '', $pos = 0, $root = null) {return $this->__construct($doc, $pos, $root);}
548 #php4e
549
550 /**
551 * Class magic invoke method, performs {@link select()}
552 * @return array
553 * @access private
554 */
555 function __invoke($query = '*') {
556 return $this->select($query);
557 }
558
559 /**
560 * Class magic toString method, performs {@link DomNode::toString()}
561 * @return string
562 * @access private
563 */
564 function __toString() {
565 return $this->root->getInnerText();
566 }
567
568 /**
569 * Performs a css select query on the root node
570 * @see DomNode::select()
571 * @return array
572 */
573 function select($query = '*', $index = false, $recursive = true, $check_self = false) {
574 return $this->root->select($query, $index, $recursive, $check_self);
575 }
576
577 /**
578 * Updates the current hierarchy status and checks for
579 * correct opening/closing of tags
580 * @param bool $self_close Is current tag self closing? Null to use {@link tags_selfclose}
581 * @internal This is were most of the nodes get added
582 * @access private
583 */
584 protected function parse_hierarchy($self_close = null) {
585 if ($self_close === null) {
586 $this->status['self_close'] = ($self_close = isset($this->tags_selfclose[strtolower($this->status['tag_name'])]));
587 }
588
589 if ($self_close) {
590 if ($this->status['closing_tag']) {
591
592 //$c = end($this->hierarchy)->children
593 $c = $this->hierarchy[count($this->hierarchy) - 1]->children;
594 $found = false;
595 for ($count = count($c), $i = $count - 1; $i >= 0; $i--) {
596 if (strcasecmp($c[$i]->tag, $this->status['tag_name']) === 0) {
597 for($ii = $i + 1; $ii < $count; $ii++) {
598 $index = null; //Needs to be passed by ref
599 $c[$i + 1]->changeParent($c[$i], $index);
600 }
601 $c[$i]->self_close = false;
602
603 $found = true;
604 break;
605 }
606 }
607
608 if (!$found) {
609 $this->addError('Closing tag "'.$this->status['tag_name'].'" which is not open');
610 }
611
612 } elseif ($this->status['tag_name'][0] === '?') {
613 //end($this->hierarchy)->addXML($this->status['tag_name'], '', $this->status['attributes']);
614 $index = null; //Needs to be passed by ref
615 $this->hierarchy[count($this->hierarchy) - 1]->addXML($this->status['tag_name'], '', $this->status['attributes'], $index);
616 } elseif ($this->status['tag_name'][0] === '%') {
617 //end($this->hierarchy)->addASP($this->status['tag_name'], '', $this->status['attributes']);
618 $index = null; //Needs to be passed by ref
619 $this->hierarchy[count($this->hierarchy) - 1]->addASP($this->status['tag_name'], '', $this->status['attributes'], $index);
620 } else {
621 //end($this->hierarchy)->addChild($this->status);
622 $index = null; //Needs to be passed by ref
623 $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
624 }
625 } elseif ($this->status['closing_tag']) {
626 $found = false;
627 for ($count = count($this->hierarchy), $i = $count - 1; $i >= 0; $i--) {
628 if (strcasecmp($this->hierarchy[$i]->tag, $this->status['tag_name']) === 0) {
629
630 for($ii = ($count - $i - 1); $ii >= 0; $ii--) {
631 $e = array_pop($this->hierarchy);
632 if ($ii > 0) {
633 $this->addError('Closing tag "'.$this->status['tag_name'].'" while "'.$e->tag.'" is not closed yet');
634 }
635 }
636
637 $found = true;
638 break;
639 }
640 }
641
642 if (!$found) {
643 $this->addError('Closing tag "'.$this->status['tag_name'].'" which is not open');
644 }
645
646 } else {
647 //$this->hierarchy[] = end($this->hierarchy)->addChild($this->status);
648 $index = null; //Needs to be passed by ref
649 $this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
650 }
651 }
652
653 function parse_cdata() {
654 if (!parent::parse_cdata()) {return false;}
655
656 //end($this->hierarchy)->addCDATA($this->status['cdata']);
657 $index = null; //Needs to be passed by ref
658 $this->hierarchy[count($this->hierarchy) - 1]->addCDATA($this->status['cdata'], $index);
659 return true;
660 }
661
662 function parse_comment() {
663 if (!parent::parse_comment()) {return false;}
664
665 //end($this->hierarchy)->addComment($this->status['comment']);
666 $index = null; //Needs to be passed by ref
667 $this->hierarchy[count($this->hierarchy) - 1]->addComment($this->status['comment'], $index);
668 return true;
669 }
670
671 function parse_conditional() {
672 if (!parent::parse_conditional()) {return false;}
673
674 if ($this->status['comment']) {
675 //$e = end($this->hierarchy)->addConditional($this->status['tag_condition'], true);
676 $index = null; //Needs to be passed by ref
677 $e = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status['tag_condition'], true, $index);
678 if ($this->status['text'] !== '') {
679 $index = null; //Needs to be passed by ref
680 $e->addText($this->status['text'], $index);
681 }
682 } else {
683 if ($this->status['closing_tag']) {
684 $this->parse_hierarchy(false);
685 } else {
686 //$this->hierarchy[] = end($this->hierarchy)->addConditional($this->status['tag_condition'], false);
687 $index = null; //Needs to be passed by ref
688 $this->hierarchy[] = $this->hierarchy[count($this->hierarchy) - 1]->addConditional($this->status['tag_condition'], false, $index);
689 }
690 }
691
692 return true;
693 }
694
695 function parse_doctype() {
696 if (!parent::parse_doctype()) {return false;}
697
698 //end($this->hierarchy)->addDoctype($this->status['dtd']);
699 $index = null; //Needs to be passed by ref
700 $this->hierarchy[count($this->hierarchy) - 1]->addDoctype($this->status['dtd'], $index);
701 return true;
702 }
703
704 function parse_php() {
705 if (!parent::parse_php()) {return false;}
706
707 //end($this->hierarchy)->addXML('php', $this->status['text']);
708 $index = null; //Needs to be passed by ref
709 $this->hierarchy[count($this->hierarchy) - 1]->addXML('php', $this->status['text'], $index);
710 return true;
711 }
712
713 function parse_asp() {
714 if (!parent::parse_asp()) {return false;}
715
716 //end($this->hierarchy)->addASP('', $this->status['text']);
717 $index = null; //Needs to be passed by ref
718 $this->hierarchy[count($this->hierarchy) - 1]->addASP('', $this->status['text'], $index);
719 return true;
720 }
721
722 function parse_script() {
723 if (!parent::parse_script()) {return false;}
724
725 //$e = end($this->hierarchy)->addChild($this->status);
726 $index = null; //Needs to be passed by ref
727 $e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
728 if ($this->status['text'] !== '') {
729 $index = null; //Needs to be passed by ref
730 $e->addText($this->status['text'], $index);
731 }
732 return true;
733 }
734
735 function parse_style() {
736 if (!parent::parse_style()) {return false;}
737
738 //$e = end($this->hierarchy)->addChild($this->status);
739 $index = null; //Needs to be passed by ref
740 $e = $this->hierarchy[count($this->hierarchy) - 1]->addChild($this->status, $index);
741 if ($this->status['text'] !== '') {
742 $index = null; //Needs to be passed by ref
743 $e->addText($this->status['text'], $index);
744 }
745 return true;
746 }
747
748 function parse_tag_default() {
749 if (!parent::parse_tag_default()) {return false;}
750
751 $this->parse_hierarchy(($this->status['self_close']) ? true : null);
752 return true;
753 }
754
755 function parse_text() {
756 parent::parse_text();
757 if ($this->status['text'] !== '') {
758 //end($this->hierarchy)->addText($this->status['text']);
759 $index = null; //Needs to be passed by ref
760 $this->hierarchy[count($this->hierarchy) - 1]->addText($this->status['text'], $index);
761 }
762 }
763
764 function parse_all() {
765 $this->hierarchy = array(&$this->root);
766 return ((parent::parse_all()) ? $this->root : false);
767 }
768 }
769
770 /**
771 * HTML5 specific parser (adds support for omittable closing tags)
772 */
773 class Html5Parser extends HtmlParser {
774
775 /**
776 * Tags with ommitable closing tags
777 * @var array array('tag2' => 'tag1') will close tag1 if following (not child) tag is tag2
778 * @access private
779 */
780 var $tags_optional_close = array(
781 //Current tag => Previous tag
782 'li' => array('li' => true),
783 'dt' => array('dt' => true, 'dd' => true),
784 'dd' => array('dt' => true, 'dd' => true),
785 'address' => array('p' => true),
786 'article' => array('p' => true),
787 'aside' => array('p' => true),
788 'blockquote' => array('p' => true),
789 'dir' => array('p' => true),
790 'div' => array('p' => true),
791 'dl' => array('p' => true),
792 'fieldset' => array('p' => true),
793 'footer' => array('p' => true),
794 'form' => array('p' => true),
795 'h1' => array('p' => true),
796 'h2' => array('p' => true),
797 'h3' => array('p' => true),
798 'h4' => array('p' => true),
799 'h5' => array('p' => true),
800 'h6' => array('p' => true),
801 'header' => array('p' => true),
802 'hgroup' => array('p' => true),
803 'hr' => array('p' => true),
804 'menu' => array('p' => true),
805 'nav' => array('p' => true),
806 'ol' => array('p' => true),
807 'p' => array('p' => true),
808 'pre' => array('p' => true),
809 'section' => array('p' => true),
810 'table' => array('p' => true),
811 'ul' => array('p' => true),
812 'rt' => array('rt' => true, 'rp' => true),
813 'rp' => array('rt' => true, 'rp' => true),
814 'optgroup' => array('optgroup' => true, 'option' => true),
815 'option' => array('option'),
816 'tbody' => array('thread' => true, 'tbody' => true, 'tfoot' => true),
817 'tfoot' => array('thread' => true, 'tbody' => true),
818 'tr' => array('tr' => true),
819 'td' => array('td' => true, 'th' => true),
820 'th' => array('td' => true, 'th' => true),
821 'body' => array('head' => true)
822 );
823
824 protected function parse_hierarchy($self_close = null) {
825 $tag_curr = strtolower($this->status['tag_name']);
826 if ($self_close === null) {
827 $this->status['self_close'] = ($self_close = isset($this->tags_selfclose[$tag_curr]));
828 }
829
830 if (! ($self_close || $this->status['closing_tag'])) {
831 //$tag_prev = strtolower(end($this->hierarchy)->tag);
832 $tag_prev = strtolower($this->hierarchy[count($this->hierarchy) - 1]->tag);
833 if (isset($this->tags_optional_close[$tag_curr]) && isset($this->tags_optional_close[$tag_curr][$tag_prev])) {
834 array_pop($this->hierarchy);
835 }
836 }
837
838 return parent::parse_hierarchy($self_close);
839 }
840 }
841