PluginProbe ʕ •ᴥ•ʔ
WP All Import – Drag & Drop Import for CSV, XML, Excel & Google Sheets / trunk
WP All Import – Drag & Drop Import for CSV, XML, Excel & Google Sheets vtrunk
3.9.5 3.9.6 4.0.0 4.0.1 4.1.0 trunk 2.12 2.13 2.14 3.0 3.0.1 3.0.2 3.0.3 3.0.4 3.1.0 3.1.1 3.1.2 3.1.3 3.1.4 3.1.5 3.2.0 3.2.1 3.2.2 3.2.3 3.2.4 3.2.5 3.2.6 3.2.7 3.2.8 3.2.9 3.3.0 3.3.1 3.3.2 3.3.3 3.3.4 3.3.5 3.3.6 3.3.7 3.3.8 3.3.9 3.4.0 3.4.1 3.4.2 3.4.3 3.4.4 3.4.5 3.4.6 3.4.7 3.4.8 3.4.9 3.5.0 3.5.1 3.5.2 3.5.3 3.5.4 3.5.5 3.5.6 3.5.7 3.5.8 3.5.9 3.6.0 3.6.1 3.6.2 3.6.3 3.6.4 3.6.5 3.6.6 3.6.7 3.6.8 3.6.9 3.7.0 3.7.1 3.7.2 3.7.3 3.7.3-beta-1.0 3.7.4 3.7.4-beta-1.0 3.7.5 3.7.6 3.7.7 3.7.8 3.7.9 3.8.0 3.9.0 3.9.1 3.9.2 3.9.3 3.9.4
wp-all-import / classes / chunk.php
wp-all-import / classes Last commit date
XmlStreamReader 3 weeks ago partner-discount-sdk 3 weeks ago api.php 3 weeks ago arraytoxml.php 3 weeks ago chunk.php 3 weeks ago config.php 2 years ago download.php 3 weeks ago error.php 3 weeks ago handler.php 3 weeks ago helper.php 3 weeks ago input.php 3 weeks ago nested.php 3 weeks ago rapidaddon.php 3 weeks ago render.php 3 weeks ago session.php 9 months ago upload.php 3 weeks ago zip.php 10 years ago
chunk.php
441 lines
1 <?php
2
3 if ( ! defined( 'ABSPATH' ) ) exit;
4 include __DIR__ . "/XmlStreamReader/autoload.php";
5
6 use Prewk\XmlStringStreamer;
7 use Prewk\XmlStringStreamer\Parser;
8 use Prewk\XmlStringStreamer\Stream;
9
10 /**
11 * Chunk
12 *
13 * Reads a large file in as chunks for easier parsing.
14 *
15 *
16 * @package default
17 * @author Max Tsiplyakov
18 */
19 class PMXI_Chunk {
20 /**
21 * options
22 *
23 * @var array Contains all major options
24 * @access public
25 */
26 public $options = array(
27 'path' => './', // string The path to check for $file in
28 'element' => '', // string The XML element to return
29 'type' => 'upload',
30 'encoding' => 'UTF-8',
31 'pointer' => 1,
32 'chunkSize' => 1024,
33 'filter' => true,
34 'get_cloud' => false
35 );
36
37 /**
38 * file
39 *
40 * @var string The filename being read
41 * @access public
42 */
43 public $file = '';
44 /**
45 * pointer
46 *
47 * @var integer The current position the file is being read from
48 * @access public
49 */
50 public $reader;
51 public $cloud = array();
52 public $loop = 1;
53 public $is_404 = false;
54 public $parser_type = false;
55
56 /**
57 * handle
58 *
59 * @var resource The fopen() resource
60 * @access private
61 */
62 private $handle = null;
63 /**
64 * reading
65 *
66 * @var boolean Whether the script is currently reading the file
67 * @access private
68 */
69
70 /**
71 * __construct
72 *
73 * Builds the Chunk object
74 *
75 * @param string $file The filename to work with
76 * @param array $options The options with which to parse the file
77 *
78 * @author Dom Hastings
79 * @access public
80 */
81 public function __construct( $file, $options = array(), $parser_type = false ) {
82 try {
83
84 // merge the options together
85 $this->options = array_merge( $this->options, ( is_array( $options ) ? $options : array() ) );
86
87 $this->options['chunkSize'] *= PMXI_Plugin::getInstance()->getOption( 'chunk_size' );
88
89 // set the filename
90 $this->file = $file;
91
92 $this->parser_type = empty( $parser_type ) ? 'xmlreader' : $parser_type;
93
94 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
95 $sleep = apply_filters( 'wp_all_import_shard_delay', 0 );
96 usleep( $sleep );
97
98 $is_html = false;
99 $f = @fopen( $file, "rb" ); // phpcs:ignore WordPress.WP.AlternativeFunctions.file_system_operations_fopen
100 if ( is_resource( $file ) ) {
101 while ( ! @feof( $f ) ) {
102 $chunk = @fread( $f, 1024 ); // phpcs:ignore WordPress.WP.AlternativeFunctions.file_system_operations_fread
103 if ( strpos( $chunk, "<!DOCTYPE" ) === 0 ) {
104 $is_html = true;
105 }
106 break;
107 }
108 @fclose( $f ); // phpcs:ignore WordPress.WP.AlternativeFunctions.file_system_operations_fclose
109 }
110
111 if ( $is_html ) {
112 $path = $this->get_file_path();
113
114 $this->is_404 = true;
115
116 $this->reader = new XMLReader();
117 @$this->reader->open( $path );
118 @$this->reader->setParserProperty( XMLReader::VALIDATE, false );
119
120 return;
121 }
122
123 $input = new PMXI_Input();
124 $import_id = $input->get( 'id', 0 );
125 if ( empty( $import_id ) ) {
126 $import_id = $input->get( 'import_id', 0 );
127 }
128
129 if ( PMXI_Plugin::getInstance()->getOption( 'force_stream_reader' ) ) {
130 $this->parser_type = 'xmlstreamer';
131 } else {
132 if ( ! empty( $import_id ) ) {
133 $this->parser_type = empty( $parser_type ) ? 'xmlreader' : $parser_type;
134 $import = new PMXI_Import_Record();
135 $import->getById( $import_id );
136 if ( ! $import->isEmpty() ) {
137 $this->parser_type = empty( $import->options['xml_reader_engine'] ) ? 'xmlreader' : 'xmlstreamer';
138 }
139 } else {
140 $this->parser_type = empty( $parser_type ) ? get_option( 'wpai_parser_type', 'xmlreader' ) : $parser_type;
141 }
142 }
143
144 if ( empty( $this->options['element'] ) or $this->options['get_cloud'] ) {
145 $path = $this->get_file_path();
146
147 if ( $this->parser_type == 'xmlreader' ) {
148 $reader = new XMLReader();
149 $reader->open( $path );
150 $reader->setParserProperty( XMLReader::VALIDATE, false );
151 while ( @$reader->read() ) {
152 switch ( $reader->nodeType ) {
153 case ( XMLREADER::ELEMENT ):
154 $localName = str_replace( "_colon_", ":", $reader->localName );
155 if ( array_key_exists( str_replace( ":", "_", $localName ), $this->cloud ) ) {
156 $this->cloud[ str_replace( ":", "_", $localName ) ] ++;
157 } else {
158 $this->cloud[ str_replace( ":", "_", $localName ) ] = 1;
159 }
160 break;
161 default:
162
163 break;
164 }
165 }
166 unset( $reader );
167 } else {
168 $CHUNK_SIZE = 1024;
169 $streamProvider = new Prewk\XmlStringStreamer\Stream\File( $path, $CHUNK_SIZE );
170 $parseroptions = array(
171 "extractContainer" => false, // Required option
172 );
173 // Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting.
174 $parser = new Parser\StringWalker( $parseroptions );
175 // Create the streamer
176 $streamer = new XmlStringStreamer( $parser, $streamProvider );
177 while ( $node = $streamer->getNode() ) {
178 // $simpleXmlNode = simplexml_load_string($node);
179 // echo (string)$simpleXmlNode->firstName;
180 }
181
182 $this->cloud = $parser->cloud;
183
184 }
185
186 if ( ! empty( $this->cloud ) and empty( $this->options['element'] ) ) {
187
188 arsort( $this->cloud );
189
190 $main_elements = array(
191 'node',
192 'product',
193 'job',
194 'deal',
195 'entry',
196 'item',
197 'property',
198 'listing',
199 'hotel',
200 'record',
201 'article',
202 'post',
203 'book',
204 'item_0'
205 );
206
207 foreach ( $this->cloud as $element_name => $value ) {
208 if ( in_array( strtolower( $element_name ), $main_elements ) ) {
209 $this->options['element'] = $element_name;
210 break;
211 }
212 }
213
214 if ( empty( $this->options['element'] ) ) {
215 foreach ( $this->cloud as $el => $count ) {
216 $this->options['element'] = $el;
217 break;
218 }
219 }
220
221 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
222 $this->options['element'] = apply_filters( 'wp_all_import_root_element', $this->options['element'], $import_id, $this->cloud );
223 }
224 }
225
226 $path = $this->get_file_path();
227
228 if ( $this->parser_type == 'xmlreader' ) {
229 $this->reader = new XMLReader();
230 @$this->reader->open( $path );
231 @$this->reader->setParserProperty( XMLReader::VALIDATE, false );
232 } else {
233 $parseroptions = array(
234 "uniqueNode" => $this->options['element']
235 );
236 $CHUNK_SIZE = 1024;
237 $streamProvider = new Prewk\XmlStringStreamer\Stream\File( $path, $CHUNK_SIZE );
238 $parser = new Parser\UniqueNode( $parseroptions );
239 $this->reader = new XmlStringStreamer( $parser, $streamProvider );
240 }
241 } catch ( Throwable $e ) {
242
243 if (defined('WP_DEBUG') && WP_DEBUG) {
244 error_log('Error in PMXI_Chunk constructor: ' . $e->getMessage()); // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_error_log
245 }
246
247 $this->options = array();
248 $this->reader = null;
249 $this->file = '';
250 $this->cloud = array();
251 $this->loop = 1;
252 $this->is_404 = false;
253 $this->parser_type = false;
254 }
255 }
256
257 function get_file_path() {
258 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
259 $is_enabled_stream_filter = apply_filters( 'wp_all_import_is_enabled_stream_filter', true );
260 if ( function_exists( 'stream_filter_register' ) and $this->options['filter'] and $is_enabled_stream_filter and $this->parser_type == 'xmlreader' ) {
261 stream_filter_register( 'preprocessxml', 'preprocessXml_filter' );
262 if ( defined( 'HHVM_VERSION' ) ) {
263 $path = $this->file;
264 } else {
265 $path = 'php://filter/read=preprocessxml/resource=' . $this->file;
266 }
267 } else {
268 $path = $this->file;
269 }
270
271 return $path;
272 }
273
274 /**
275 * __destruct
276 *
277 * Cleans up
278 *
279 * @return void
280 * @author Dom Hastings
281 * @access public
282 */
283 public function __destruct() {
284 // close the file resource
285 unset( $this->reader );
286 }
287
288 /**
289 * read
290 *
291 * Reads the first available occurence of the XML element $this->options['element']
292 *
293 * @return string The XML string from $this->file
294 * @author Dom Hastings
295 * @access public
296 */
297 public function read( $debug = false ) {
298
299 // Constructor may have set reader to null on failure (e.g. empty path).
300 if ( ! $this->reader ) {
301 return false;
302 }
303
304 // trim it
305 $element = trim( $this->options['element'] );
306
307 $xml = '';
308
309 if ( $this->parser_type == 'xmlreader' ) {
310 try {
311 while ( @$this->reader->read() ) {
312 switch ( $this->reader->nodeType ) {
313 case ( XMLREADER::ELEMENT ):
314
315 $localName = str_replace( "_colon_", ":", $this->reader->localName );
316
317 if ( strtolower( str_replace( ":", "_", $localName ) ) == strtolower( $element ) ) {
318
319 if ( $this->loop < $this->options['pointer'] ) {
320 $this->loop ++;
321 continue( 2 );
322 }
323
324 $xml = @$this->reader->readOuterXML();
325
326 break( 2 );
327 }
328 break;
329 default:
330 // code ...
331 break;
332 }
333 }
334 } catch ( XmlImportException $e ) {
335 $xml = false;
336 }
337 } else {
338 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
339 $is_preprocess_enabled = apply_filters( 'is_xml_preprocess_enabled', true );
340
341 while ( $xml = $this->reader->getNode() ) {
342
343 if ( $this->loop < $this->options['pointer'] ) {
344 $this->loop ++;
345 continue;
346 }
347
348 if ( $is_preprocess_enabled ) {
349 // the & symbol is not valid in XML, so replace it with temporary word _ampersand_
350 $xml = str_replace( "&", "_ampersand_", $xml );
351 $xml = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', str_replace( ":", "_colon_", $xml ) );
352 }
353
354 break;
355 }
356 }
357
358 return ( ! empty( $xml ) ) ? self::removeColonsFromRSS( preg_replace( '%xmlns\s*=\s*([\'"]).*\1%sU', '', $xml ) ) : false;
359
360 }
361
362 public static function removeColonsFromRSS( $feed ) {
363
364 $feed = str_replace( "_colon_", ":", $feed );
365
366 // pull out colons from start tags
367 // (<\w+):(\w+>)
368 $pattern = '/(<\w+):([\w+|\.|-]+[ |>]{1})/i';
369 $replacement = '$1_$2';
370 $feed = preg_replace( $pattern, $replacement, $feed );
371 // pull out colons from end tags
372 // (<\/\w+):(\w+>)
373 $pattern = '/(<\/\w+):([\w+|\.|-]+>)/i';
374 $replacement = '$1_$2';
375 $feed = preg_replace( $pattern, $replacement, $feed );
376
377 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
378 $is_replace_colons = apply_filters( 'wp_all_import_replace_colons_in_attribute_names', true );
379 if ( $is_replace_colons ) {
380 // pull out colons from attributes
381 $pattern = '/(\s+\w+):(\w+[=]{1})/i';
382 $replacement = '$1_$2';
383 $feed = preg_replace( $pattern, $replacement, $feed );
384 }
385 // pull colons from single element
386 // (<\w+):(\w+\/>)
387 $pattern = '/(<\w+):([\w+|\.|-]+\/>)/i';
388 $replacement = '$1_$2';
389 $feed = preg_replace( $pattern, $replacement, $feed );
390
391 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
392 $is_preprocess_enabled = apply_filters( 'is_xml_preprocess_enabled', true );
393 if ( $is_preprocess_enabled ) {
394 // replace temporary word _ampersand_ back to & symbol
395 $feed = str_replace( "_ampersand_", "&", $feed );
396 }
397
398 // replace all standalone & symbols ( which is not in htmlentities e.q. &nbsp; and not wrapped in CDATA section ) to &amp;
399 PMXI_Import_Record::preprocessXml( $feed );
400
401 return $feed;
402
403 }
404
405 }
406
407 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedClassFound
408 class preprocessXml_filter extends php_user_filter {
409 #[\ReturnTypeWillChange]
410 public function filter( $in, $out, &$consumed, $closing ) {
411 while ( $bucket = stream_bucket_make_writeable( $in ) ) {
412 // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
413 $is_preprocess_enabled = apply_filters( 'is_xml_preprocess_enabled', true );
414 if ( $is_preprocess_enabled ) {
415 // the & symbol is not valid in XML, so replace it with temporary word _ampersand_
416 $bucket->data = str_replace( "&", "_ampersand_", $bucket->data );
417 $cleanXML = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $this->replace_colons( $bucket->data ) );
418 if ( $cleanXML == null && preg_last_error() == PREG_BAD_UTF8_ERROR ) {
419 $cleanXML = preg_replace( '/[^\x09\x0a\x0d\x20-\xFF]+/', ' ', $this->replace_colons( $bucket->data ) );
420 }
421 if ( $cleanXML == null && preg_last_error() == PREG_BAD_UTF8_ERROR ) {
422 if ( function_exists( 'mb_ereg_replace' ) ) {
423 mb_regex_encoding( 'UTF-8' );
424 $cleanXML = mb_ereg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $this->replace_colons( $bucket->data ) );
425 }
426 }
427 $bucket->data = empty( $cleanXML ) ? $this->replace_colons( $bucket->data ) : $cleanXML;
428 }
429 $consumed += $bucket->datalen;
430 stream_bucket_append( $out, $bucket );
431 }
432
433 return PSFS_PASS_ON;
434 }
435
436 function replace_colons( $data ) {
437 return str_replace( ":", "_colon_", $data );
438 }
439
440 }
441