PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / 1.3.1
Matomo Analytics – Powerful, Privacy-First Insights for WordPress v1.3.1
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / Segment / SegmentExpression.php
matomo / app / core / Segment Last commit date
SegmentExpression.php 6 years ago
SegmentExpression.php
497 lines
1 <?php
2 /**
3 * Piwik - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9
10 namespace Piwik\Segment;
11
12 use Exception;
13
14 /**
15 *
16 */
17 class SegmentExpression
18 {
19 const AND_DELIMITER = ';';
20 const OR_DELIMITER = ',';
21
22 const MATCH_EQUAL = '==';
23 const MATCH_NOT_EQUAL = '!=';
24 const MATCH_GREATER_OR_EQUAL = '>=';
25 const MATCH_LESS_OR_EQUAL = '<=';
26 const MATCH_GREATER = '>';
27 const MATCH_LESS = '<';
28 const MATCH_CONTAINS = '=@';
29 const MATCH_DOES_NOT_CONTAIN = '!@';
30 const MATCH_STARTS_WITH = '=^';
31 const MATCH_ENDS_WITH = '=$';
32
33 const BOOL_OPERATOR_OR = 'OR';
34 const BOOL_OPERATOR_AND = 'AND';
35 const BOOL_OPERATOR_END = '';
36
37 // Note: you can't write this in the API, but access this feature
38 // via field!= <- IS NOT NULL
39 // or via field== <- IS NULL / empty
40 const MATCH_IS_NOT_NULL_NOR_EMPTY = '::NOT_NULL';
41 const MATCH_IS_NULL_OR_EMPTY = '::NULL';
42
43 // Special case, since we look up Page URLs/Page titles in a sub SQL query
44 const MATCH_ACTIONS_CONTAINS = 'IN';
45
46 const INDEX_BOOL_OPERATOR = 0;
47 const INDEX_OPERAND = 1;
48
49 const INDEX_OPERAND_NAME = 0;
50 const INDEX_OPERAND_OPERATOR = 1;
51 const INDEX_OPERAND_VALUE = 2;
52
53 const SQL_WHERE_DO_NOT_MATCH_ANY_ROW = "(1 = 0)";
54 const SQL_WHERE_MATCHES_ALL_ROWS = "(1 = 1)";
55
56 public function __construct($string)
57 {
58 $this->string = $string;
59 $this->tree = $this->parseTree();
60 }
61
62 public function getSegmentDefinition()
63 {
64 return $this->string;
65 }
66
67 public function isEmpty()
68 {
69 return count($this->tree) == 0;
70 }
71
72 protected $joins = array();
73 protected $valuesBind = array();
74 protected $tree = array();
75 protected $parsedSubExpressions = array();
76
77 public function getSubExpressionCount()
78 {
79 $cleaned = array_filter($this->parsedSubExpressions, function ($part) {
80 $isExpressionColumnPresent = !empty($part[1][0]);
81 return $isExpressionColumnPresent;
82 });
83 return count($cleaned);
84 }
85
86 /**
87 * Given the array of parsed filters containing, for each filter,
88 * the boolean operator (AND/OR) and the operand,
89 * Will return the array where the filters are in SQL representation
90 *
91 * @throws Exception
92 * @return array
93 */
94 public function parseSubExpressions()
95 {
96 $parsedSubExpressions = array();
97 foreach ($this->tree as $leaf) {
98 $operand = $leaf[self::INDEX_OPERAND];
99
100 $operand = urldecode($operand);
101
102 $operator = $leaf[self::INDEX_BOOL_OPERATOR];
103 $pattern = '/^(.+?)(' . self::MATCH_EQUAL . '|'
104 . self::MATCH_NOT_EQUAL . '|'
105 . self::MATCH_GREATER_OR_EQUAL . '|'
106 . self::MATCH_GREATER . '|'
107 . self::MATCH_LESS_OR_EQUAL . '|'
108 . self::MATCH_LESS . '|'
109 . self::MATCH_CONTAINS . '|'
110 . self::MATCH_DOES_NOT_CONTAIN . '|'
111 . preg_quote(self::MATCH_STARTS_WITH) . '|'
112 . preg_quote(self::MATCH_ENDS_WITH)
113 . '){1}(.*)/';
114 $match = preg_match($pattern, $operand, $matches);
115 if ($match == 0) {
116 throw new Exception('The segment condition \'' . $operand . '\' is not valid.');
117 }
118
119 $leftMember = $matches[1];
120 $operation = $matches[2];
121 $valueRightMember = urldecode($matches[3]);
122
123 // is null / is not null
124 if ($valueRightMember === '') {
125 if ($operation == self::MATCH_NOT_EQUAL) {
126 $operation = self::MATCH_IS_NOT_NULL_NOR_EMPTY;
127 } elseif ($operation == self::MATCH_EQUAL) {
128 $operation = self::MATCH_IS_NULL_OR_EMPTY;
129 } else {
130 throw new Exception('The segment \'' . $operand . '\' has no value specified. You can leave this value empty ' .
131 'only when you use the operators: ' . self::MATCH_NOT_EQUAL . ' (is not) or ' . self::MATCH_EQUAL . ' (is)');
132 }
133 }
134
135 $parsedSubExpressions[] = array(
136 self::INDEX_BOOL_OPERATOR => $operator,
137 self::INDEX_OPERAND => array(
138 self::INDEX_OPERAND_NAME => $leftMember,
139 self::INDEX_OPERAND_OPERATOR => $operation,
140 self::INDEX_OPERAND_VALUE => $valueRightMember,
141 ));
142 }
143 $this->parsedSubExpressions = $parsedSubExpressions;
144 return $parsedSubExpressions;
145 }
146
147 /**
148 * Set the given expression
149 * @param $parsedSubExpressions
150 */
151 public function setSubExpressionsAfterCleanup($parsedSubExpressions)
152 {
153 $this->parsedSubExpressions = $parsedSubExpressions;
154 }
155
156 /**
157 * @param array $availableTables
158 */
159 public function parseSubExpressionsIntoSqlExpressions(&$availableTables = array())
160 {
161 $sqlSubExpressions = array();
162 $this->valuesBind = array();
163 $this->joins = array();
164
165 foreach ($this->parsedSubExpressions as $leaf) {
166 $operator = $leaf[self::INDEX_BOOL_OPERATOR];
167 $operandDefinition = $leaf[self::INDEX_OPERAND];
168
169 $operand = $this->getSqlMatchFromDefinition($operandDefinition, $availableTables);
170
171 if ($operand[self::INDEX_OPERAND_OPERATOR] !== null) {
172 if (is_array($operand[self::INDEX_OPERAND_OPERATOR])) {
173 $this->valuesBind = array_merge($this->valuesBind, $operand[self::INDEX_OPERAND_OPERATOR]);
174 } else {
175 $this->valuesBind[] = $operand[self::INDEX_OPERAND_OPERATOR];
176 }
177 }
178
179 $operand = $operand[self::INDEX_OPERAND_NAME];
180
181 $sqlSubExpressions[] = array(
182 self::INDEX_BOOL_OPERATOR => $operator,
183 self::INDEX_OPERAND => $operand,
184 );
185 }
186
187 $this->tree = $sqlSubExpressions;
188 }
189
190 /**
191 * Given an array representing one filter operand ( left member , operation , right member)
192 * Will return an array containing
193 * - the SQL substring,
194 * - the values to bind to this substring
195 *
196 * @param array $def
197 * @param array $availableTables
198 * @throws Exception
199 * @return array
200 */
201 protected function getSqlMatchFromDefinition($def, &$availableTables)
202 {
203 $field = $def[0];
204 $matchType = $def[1];
205 $value = $def[2];
206
207 // Segment::getCleanedExpression() may return array(null, $matchType, null)
208 $operandWillNotMatchAnyRow = empty($field) && is_null($value);
209 if($operandWillNotMatchAnyRow) {
210 if($matchType == self::MATCH_EQUAL) {
211 // eg. pageUrl==DoesNotExist
212 // Equal to NULL means it will match none
213 $sqlExpression = self::SQL_WHERE_DO_NOT_MATCH_ANY_ROW;
214 } elseif($matchType == self::MATCH_NOT_EQUAL) {
215 // eg. pageUrl!=DoesNotExist
216 // Not equal to NULL means it matches all rows
217 $sqlExpression = self::SQL_WHERE_MATCHES_ALL_ROWS;
218 } elseif($matchType == self::MATCH_CONTAINS
219 || $matchType == self::MATCH_DOES_NOT_CONTAIN
220 || $matchType == self::MATCH_STARTS_WITH
221 || $matchType == self::MATCH_ENDS_WITH) {
222 // no action was found for CONTAINS / DOES NOT CONTAIN
223 // eg. pageUrl=@DoesNotExist -> matches no row
224 // eg. pageUrl!@DoesNotExist -> matches no rows
225 $sqlExpression = self::SQL_WHERE_DO_NOT_MATCH_ANY_ROW;
226 } else {
227 // it is not expected to reach this code path
228 throw new Exception("Unexpected match type $matchType for your segment. " .
229 "Please report this issue to the Matomo team with the segment you are using.");
230 }
231
232 return array($sqlExpression, $value = null);
233 }
234
235 $alsoMatchNULLValues = false;
236 switch ($matchType) {
237 case self::MATCH_EQUAL:
238 $sqlMatch = '%s =';
239 break;
240 case self::MATCH_NOT_EQUAL:
241 $sqlMatch = '%s <>';
242 $alsoMatchNULLValues = true;
243 break;
244 case self::MATCH_GREATER:
245 $sqlMatch = '%s >';
246 break;
247 case self::MATCH_LESS:
248 $sqlMatch = '%s <';
249 break;
250 case self::MATCH_GREATER_OR_EQUAL:
251 $sqlMatch = '%s >=';
252 break;
253 case self::MATCH_LESS_OR_EQUAL:
254 $sqlMatch = '%s <=';
255 break;
256 case self::MATCH_CONTAINS:
257 $sqlMatch = '%s LIKE';
258 $value = '%' . $this->escapeLikeString($value) . '%';
259 break;
260 case self::MATCH_DOES_NOT_CONTAIN:
261 $sqlMatch = '%s NOT LIKE';
262 $value = '%' . $this->escapeLikeString($value) . '%';
263 $alsoMatchNULLValues = true;
264 break;
265 case self::MATCH_STARTS_WITH:
266 $sqlMatch = '%s LIKE';
267 $value = $this->escapeLikeString($value) . '%';
268 break;
269 case self::MATCH_ENDS_WITH:
270 $sqlMatch = '%s LIKE';
271 $value = '%' . $this->escapeLikeString($value);
272 break;
273
274 case self::MATCH_IS_NOT_NULL_NOR_EMPTY:
275 $sqlMatch = '%s IS NOT NULL AND %s <> \'\' AND %s <> \'0\'';
276 $value = null;
277 break;
278
279 case self::MATCH_IS_NULL_OR_EMPTY:
280 $sqlMatch = '%s IS NULL OR %s = \'\' OR %s = \'0\'';
281 $value = null;
282 break;
283
284 case self::MATCH_ACTIONS_CONTAINS:
285 // this match type is not accessible from the outside
286 // (it won't be matched in self::parseSubExpressions())
287 // it can be used internally to inject sub-expressions into the query.
288 // see Segment::getCleanedExpression()
289 $sqlMatch = '%s IN (' . $value['SQL'] . ')';
290 $value = $value['bind'];
291 break;
292 default:
293 throw new Exception("Filter contains the match type '" . $matchType . "' which is not supported");
294 break;
295 }
296
297 // We match NULL values when rows are excluded only when we are not doing a
298 $alsoMatchNULLValues = $alsoMatchNULLValues && !empty($value);
299 $sqlMatch = str_replace('%s', $field, $sqlMatch);
300
301 if ($matchType === self::MATCH_ACTIONS_CONTAINS
302 || is_null($value)
303 ) {
304 $sqlExpression = "( $sqlMatch )";
305 } else {
306 if ($alsoMatchNULLValues) {
307 $sqlExpression = "( $field IS NULL OR $sqlMatch ? )";
308 } else {
309 $sqlExpression = "$sqlMatch ?";
310 }
311 }
312
313 $columns = self::parseColumnsFromSqlExpr($field);
314 foreach ($columns as $column) {
315 $this->checkFieldIsAvailable($column, $availableTables);
316 }
317
318 return array($sqlExpression, $value);
319 }
320
321 /**
322 * @param string $field
323 * @return string[]
324 */
325 public static function parseColumnsFromSqlExpr($field)
326 {
327 preg_match_all('/[^@a-zA-Z0-9_]?`?([@a-zA-Z_][@a-zA-Z0-9_]*`?\.`?[a-zA-Z0-9_`]+)`?\b/', $field, $matches);
328 $result = isset($matches[1]) ? $matches[1] : [];
329 $result = array_filter($result, function ($value) { // remove uses of session vars
330 return strpos($value, '@') === false;
331 });
332 $result = array_map(function ($item) {
333 return str_replace('`', '', $item);
334 }, $result);
335 $result = array_unique($result);
336 $result = array_values($result);
337 return $result;
338 }
339
340 /**
341 * Check whether the field is available
342 * If not, add it to the available tables
343 *
344 * @param string $field
345 * @param array $availableTables
346 */
347 private function checkFieldIsAvailable($field, &$availableTables)
348 {
349 $fieldParts = explode('.', $field);
350
351 $table = count($fieldParts) == 2 ? $fieldParts[0] : false;
352
353 // remove sql functions from field name
354 // example: `HOUR(log_visit.visit_last_action_time)` gets `HOUR(log_visit` => remove `HOUR(`
355 $table = preg_replace('/^[A-Z_]+\(/', '', $table);
356 $tableExists = !$table || in_array($table, $availableTables);
357
358 if ($tableExists) {
359 return;
360 }
361
362 if (is_array($availableTables)) {
363 foreach ($availableTables as $availableTable) {
364 if (is_array($availableTable)) {
365 if (!isset($availableTable['tableAlias']) && $availableTable['table'] === $table) {
366 return;
367 } elseif (isset($availableTable['tableAlias']) && $availableTable['tableAlias'] === $table) {
368 return;
369 }
370 }
371 }
372 }
373
374 $availableTables[] = $table;
375 }
376
377 /**
378 * Escape the characters % and _ in the given string
379 * @param string $str
380 * @return string
381 */
382 private function escapeLikeString($str)
383 {
384 if (false !== strpos($str, '%')) {
385 $str = str_replace("%", "\%", $str);
386 }
387
388 if (false !== strpos($str, '_')) {
389 $str = str_replace("_", "\_", $str);
390 }
391
392 return $str;
393 }
394
395 /**
396 * Given a filter string,
397 * will parse it into an array where each row contains the boolean operator applied to it,
398 * and the operand
399 *
400 * @return array
401 */
402 protected function parseTree()
403 {
404 $string = $this->string;
405 if (empty($string)) {
406 return array();
407 }
408 $tree = array();
409 $i = 0;
410 $length = strlen($string);
411 $isBackslash = false;
412 $operand = '';
413 while ($i <= $length) {
414 $char = $string[$i];
415
416 $isAND = ($char == self::AND_DELIMITER);
417 $isOR = ($char == self::OR_DELIMITER);
418 $isEnd = ($length == $i + 1);
419
420 if ($isEnd) {
421 if ($isBackslash && ($isAND || $isOR)) {
422 $operand = substr($operand, 0, -1);
423 }
424 $operand .= $char;
425 $tree[] = array(self::INDEX_BOOL_OPERATOR => self::BOOL_OPERATOR_END, self::INDEX_OPERAND => $operand);
426 break;
427 }
428
429 if ($isAND && !$isBackslash) {
430 $tree[] = array(self::INDEX_BOOL_OPERATOR => self::BOOL_OPERATOR_AND, self::INDEX_OPERAND => $operand);
431 $operand = '';
432 } elseif ($isOR && !$isBackslash) {
433 $tree[] = array(self::INDEX_BOOL_OPERATOR => self::BOOL_OPERATOR_OR, self::INDEX_OPERAND => $operand);
434 $operand = '';
435 } else {
436 if ($isBackslash && ($isAND || $isOR)) {
437 $operand = substr($operand, 0, -1);
438 }
439 $operand .= $char;
440 }
441 $isBackslash = ($char == "\\");
442 $i++;
443 }
444 return $tree;
445 }
446
447 /**
448 * Given the array of parsed boolean logic, will return
449 * an array containing the full SQL string representing the filter,
450 * the needed joins and the values to bind to the query
451 *
452 * @throws Exception
453 * @return array SQL Query, Joins and Bind parameters
454 */
455 public function getSql()
456 {
457 if ($this->isEmpty()) {
458 throw new Exception("Invalid segment, please specify a valid segment.");
459 }
460 $sql = '';
461 $subExpression = false;
462 foreach ($this->tree as $expression) {
463 $operator = $expression[self::INDEX_BOOL_OPERATOR];
464 $operand = $expression[self::INDEX_OPERAND];
465
466 if ($operator == self::BOOL_OPERATOR_OR
467 && !$subExpression
468 ) {
469 $sql .= ' (';
470 $subExpression = true;
471 } else {
472 $sql .= ' ';
473 }
474
475 $sql .= $operand;
476
477 if ($operator == self::BOOL_OPERATOR_AND
478 && $subExpression
479 ) {
480 $sql .= ')';
481 $subExpression = false;
482 }
483
484 $sql .= " $operator";
485 }
486 if ($subExpression) {
487 $sql .= ')';
488 }
489 return array(
490 'where' => $sql,
491 'bind' => $this->valuesBind,
492 'join' => implode(' ', $this->joins)
493 );
494 }
495 }
496
497