SegmentExpression.php
497 lines
| 1 | <?php |
| 2 | /** |
| 3 | * Piwik - free/libre analytics platform |
| 4 | * |
| 5 | * @link https://matomo.org |
| 6 | * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 7 | * |
| 8 | */ |
| 9 | |
| 10 | namespace Piwik\Segment; |
| 11 | |
| 12 | use Exception; |
| 13 | |
| 14 | /** |
| 15 | * |
| 16 | */ |
| 17 | class SegmentExpression |
| 18 | { |
| 19 | const AND_DELIMITER = ';'; |
| 20 | const OR_DELIMITER = ','; |
| 21 | |
| 22 | const MATCH_EQUAL = '=='; |
| 23 | const MATCH_NOT_EQUAL = '!='; |
| 24 | const MATCH_GREATER_OR_EQUAL = '>='; |
| 25 | const MATCH_LESS_OR_EQUAL = '<='; |
| 26 | const MATCH_GREATER = '>'; |
| 27 | const MATCH_LESS = '<'; |
| 28 | const MATCH_CONTAINS = '=@'; |
| 29 | const MATCH_DOES_NOT_CONTAIN = '!@'; |
| 30 | const MATCH_STARTS_WITH = '=^'; |
| 31 | const MATCH_ENDS_WITH = '=$'; |
| 32 | |
| 33 | const BOOL_OPERATOR_OR = 'OR'; |
| 34 | const BOOL_OPERATOR_AND = 'AND'; |
| 35 | const BOOL_OPERATOR_END = ''; |
| 36 | |
| 37 | // Note: you can't write this in the API, but access this feature |
| 38 | // via field!= <- IS NOT NULL |
| 39 | // or via field== <- IS NULL / empty |
| 40 | const MATCH_IS_NOT_NULL_NOR_EMPTY = '::NOT_NULL'; |
| 41 | const MATCH_IS_NULL_OR_EMPTY = '::NULL'; |
| 42 | |
| 43 | // Special case, since we look up Page URLs/Page titles in a sub SQL query |
| 44 | const MATCH_ACTIONS_CONTAINS = 'IN'; |
| 45 | |
| 46 | const INDEX_BOOL_OPERATOR = 0; |
| 47 | const INDEX_OPERAND = 1; |
| 48 | |
| 49 | const INDEX_OPERAND_NAME = 0; |
| 50 | const INDEX_OPERAND_OPERATOR = 1; |
| 51 | const INDEX_OPERAND_VALUE = 2; |
| 52 | |
| 53 | const SQL_WHERE_DO_NOT_MATCH_ANY_ROW = "(1 = 0)"; |
| 54 | const SQL_WHERE_MATCHES_ALL_ROWS = "(1 = 1)"; |
| 55 | |
| 56 | public function __construct($string) |
| 57 | { |
| 58 | $this->string = $string; |
| 59 | $this->tree = $this->parseTree(); |
| 60 | } |
| 61 | |
| 62 | public function getSegmentDefinition() |
| 63 | { |
| 64 | return $this->string; |
| 65 | } |
| 66 | |
| 67 | public function isEmpty() |
| 68 | { |
| 69 | return count($this->tree) == 0; |
| 70 | } |
| 71 | |
| 72 | protected $joins = array(); |
| 73 | protected $valuesBind = array(); |
| 74 | protected $tree = array(); |
| 75 | protected $parsedSubExpressions = array(); |
| 76 | |
| 77 | public function getSubExpressionCount() |
| 78 | { |
| 79 | $cleaned = array_filter($this->parsedSubExpressions, function ($part) { |
| 80 | $isExpressionColumnPresent = !empty($part[1][0]); |
| 81 | return $isExpressionColumnPresent; |
| 82 | }); |
| 83 | return count($cleaned); |
| 84 | } |
| 85 | |
| 86 | /** |
| 87 | * Given the array of parsed filters containing, for each filter, |
| 88 | * the boolean operator (AND/OR) and the operand, |
| 89 | * Will return the array where the filters are in SQL representation |
| 90 | * |
| 91 | * @throws Exception |
| 92 | * @return array |
| 93 | */ |
| 94 | public function parseSubExpressions() |
| 95 | { |
| 96 | $parsedSubExpressions = array(); |
| 97 | foreach ($this->tree as $leaf) { |
| 98 | $operand = $leaf[self::INDEX_OPERAND]; |
| 99 | |
| 100 | $operand = urldecode($operand); |
| 101 | |
| 102 | $operator = $leaf[self::INDEX_BOOL_OPERATOR]; |
| 103 | $pattern = '/^(.+?)(' . self::MATCH_EQUAL . '|' |
| 104 | . self::MATCH_NOT_EQUAL . '|' |
| 105 | . self::MATCH_GREATER_OR_EQUAL . '|' |
| 106 | . self::MATCH_GREATER . '|' |
| 107 | . self::MATCH_LESS_OR_EQUAL . '|' |
| 108 | . self::MATCH_LESS . '|' |
| 109 | . self::MATCH_CONTAINS . '|' |
| 110 | . self::MATCH_DOES_NOT_CONTAIN . '|' |
| 111 | . preg_quote(self::MATCH_STARTS_WITH) . '|' |
| 112 | . preg_quote(self::MATCH_ENDS_WITH) |
| 113 | . '){1}(.*)/'; |
| 114 | $match = preg_match($pattern, $operand, $matches); |
| 115 | if ($match == 0) { |
| 116 | throw new Exception('The segment condition \'' . $operand . '\' is not valid.'); |
| 117 | } |
| 118 | |
| 119 | $leftMember = $matches[1]; |
| 120 | $operation = $matches[2]; |
| 121 | $valueRightMember = urldecode($matches[3]); |
| 122 | |
| 123 | // is null / is not null |
| 124 | if ($valueRightMember === '') { |
| 125 | if ($operation == self::MATCH_NOT_EQUAL) { |
| 126 | $operation = self::MATCH_IS_NOT_NULL_NOR_EMPTY; |
| 127 | } elseif ($operation == self::MATCH_EQUAL) { |
| 128 | $operation = self::MATCH_IS_NULL_OR_EMPTY; |
| 129 | } else { |
| 130 | throw new Exception('The segment \'' . $operand . '\' has no value specified. You can leave this value empty ' . |
| 131 | 'only when you use the operators: ' . self::MATCH_NOT_EQUAL . ' (is not) or ' . self::MATCH_EQUAL . ' (is)'); |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | $parsedSubExpressions[] = array( |
| 136 | self::INDEX_BOOL_OPERATOR => $operator, |
| 137 | self::INDEX_OPERAND => array( |
| 138 | self::INDEX_OPERAND_NAME => $leftMember, |
| 139 | self::INDEX_OPERAND_OPERATOR => $operation, |
| 140 | self::INDEX_OPERAND_VALUE => $valueRightMember, |
| 141 | )); |
| 142 | } |
| 143 | $this->parsedSubExpressions = $parsedSubExpressions; |
| 144 | return $parsedSubExpressions; |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * Set the given expression |
| 149 | * @param $parsedSubExpressions |
| 150 | */ |
| 151 | public function setSubExpressionsAfterCleanup($parsedSubExpressions) |
| 152 | { |
| 153 | $this->parsedSubExpressions = $parsedSubExpressions; |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * @param array $availableTables |
| 158 | */ |
| 159 | public function parseSubExpressionsIntoSqlExpressions(&$availableTables = array()) |
| 160 | { |
| 161 | $sqlSubExpressions = array(); |
| 162 | $this->valuesBind = array(); |
| 163 | $this->joins = array(); |
| 164 | |
| 165 | foreach ($this->parsedSubExpressions as $leaf) { |
| 166 | $operator = $leaf[self::INDEX_BOOL_OPERATOR]; |
| 167 | $operandDefinition = $leaf[self::INDEX_OPERAND]; |
| 168 | |
| 169 | $operand = $this->getSqlMatchFromDefinition($operandDefinition, $availableTables); |
| 170 | |
| 171 | if ($operand[self::INDEX_OPERAND_OPERATOR] !== null) { |
| 172 | if (is_array($operand[self::INDEX_OPERAND_OPERATOR])) { |
| 173 | $this->valuesBind = array_merge($this->valuesBind, $operand[self::INDEX_OPERAND_OPERATOR]); |
| 174 | } else { |
| 175 | $this->valuesBind[] = $operand[self::INDEX_OPERAND_OPERATOR]; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | $operand = $operand[self::INDEX_OPERAND_NAME]; |
| 180 | |
| 181 | $sqlSubExpressions[] = array( |
| 182 | self::INDEX_BOOL_OPERATOR => $operator, |
| 183 | self::INDEX_OPERAND => $operand, |
| 184 | ); |
| 185 | } |
| 186 | |
| 187 | $this->tree = $sqlSubExpressions; |
| 188 | } |
| 189 | |
| 190 | /** |
| 191 | * Given an array representing one filter operand ( left member , operation , right member) |
| 192 | * Will return an array containing |
| 193 | * - the SQL substring, |
| 194 | * - the values to bind to this substring |
| 195 | * |
| 196 | * @param array $def |
| 197 | * @param array $availableTables |
| 198 | * @throws Exception |
| 199 | * @return array |
| 200 | */ |
| 201 | protected function getSqlMatchFromDefinition($def, &$availableTables) |
| 202 | { |
| 203 | $field = $def[0]; |
| 204 | $matchType = $def[1]; |
| 205 | $value = $def[2]; |
| 206 | |
| 207 | // Segment::getCleanedExpression() may return array(null, $matchType, null) |
| 208 | $operandWillNotMatchAnyRow = empty($field) && is_null($value); |
| 209 | if($operandWillNotMatchAnyRow) { |
| 210 | if($matchType == self::MATCH_EQUAL) { |
| 211 | // eg. pageUrl==DoesNotExist |
| 212 | // Equal to NULL means it will match none |
| 213 | $sqlExpression = self::SQL_WHERE_DO_NOT_MATCH_ANY_ROW; |
| 214 | } elseif($matchType == self::MATCH_NOT_EQUAL) { |
| 215 | // eg. pageUrl!=DoesNotExist |
| 216 | // Not equal to NULL means it matches all rows |
| 217 | $sqlExpression = self::SQL_WHERE_MATCHES_ALL_ROWS; |
| 218 | } elseif($matchType == self::MATCH_CONTAINS |
| 219 | || $matchType == self::MATCH_DOES_NOT_CONTAIN |
| 220 | || $matchType == self::MATCH_STARTS_WITH |
| 221 | || $matchType == self::MATCH_ENDS_WITH) { |
| 222 | // no action was found for CONTAINS / DOES NOT CONTAIN |
| 223 | // eg. pageUrl=@DoesNotExist -> matches no row |
| 224 | // eg. pageUrl!@DoesNotExist -> matches no rows |
| 225 | $sqlExpression = self::SQL_WHERE_DO_NOT_MATCH_ANY_ROW; |
| 226 | } else { |
| 227 | // it is not expected to reach this code path |
| 228 | throw new Exception("Unexpected match type $matchType for your segment. " . |
| 229 | "Please report this issue to the Matomo team with the segment you are using."); |
| 230 | } |
| 231 | |
| 232 | return array($sqlExpression, $value = null); |
| 233 | } |
| 234 | |
| 235 | $alsoMatchNULLValues = false; |
| 236 | switch ($matchType) { |
| 237 | case self::MATCH_EQUAL: |
| 238 | $sqlMatch = '%s ='; |
| 239 | break; |
| 240 | case self::MATCH_NOT_EQUAL: |
| 241 | $sqlMatch = '%s <>'; |
| 242 | $alsoMatchNULLValues = true; |
| 243 | break; |
| 244 | case self::MATCH_GREATER: |
| 245 | $sqlMatch = '%s >'; |
| 246 | break; |
| 247 | case self::MATCH_LESS: |
| 248 | $sqlMatch = '%s <'; |
| 249 | break; |
| 250 | case self::MATCH_GREATER_OR_EQUAL: |
| 251 | $sqlMatch = '%s >='; |
| 252 | break; |
| 253 | case self::MATCH_LESS_OR_EQUAL: |
| 254 | $sqlMatch = '%s <='; |
| 255 | break; |
| 256 | case self::MATCH_CONTAINS: |
| 257 | $sqlMatch = '%s LIKE'; |
| 258 | $value = '%' . $this->escapeLikeString($value) . '%'; |
| 259 | break; |
| 260 | case self::MATCH_DOES_NOT_CONTAIN: |
| 261 | $sqlMatch = '%s NOT LIKE'; |
| 262 | $value = '%' . $this->escapeLikeString($value) . '%'; |
| 263 | $alsoMatchNULLValues = true; |
| 264 | break; |
| 265 | case self::MATCH_STARTS_WITH: |
| 266 | $sqlMatch = '%s LIKE'; |
| 267 | $value = $this->escapeLikeString($value) . '%'; |
| 268 | break; |
| 269 | case self::MATCH_ENDS_WITH: |
| 270 | $sqlMatch = '%s LIKE'; |
| 271 | $value = '%' . $this->escapeLikeString($value); |
| 272 | break; |
| 273 | |
| 274 | case self::MATCH_IS_NOT_NULL_NOR_EMPTY: |
| 275 | $sqlMatch = '%s IS NOT NULL AND %s <> \'\' AND %s <> \'0\''; |
| 276 | $value = null; |
| 277 | break; |
| 278 | |
| 279 | case self::MATCH_IS_NULL_OR_EMPTY: |
| 280 | $sqlMatch = '%s IS NULL OR %s = \'\' OR %s = \'0\''; |
| 281 | $value = null; |
| 282 | break; |
| 283 | |
| 284 | case self::MATCH_ACTIONS_CONTAINS: |
| 285 | // this match type is not accessible from the outside |
| 286 | // (it won't be matched in self::parseSubExpressions()) |
| 287 | // it can be used internally to inject sub-expressions into the query. |
| 288 | // see Segment::getCleanedExpression() |
| 289 | $sqlMatch = '%s IN (' . $value['SQL'] . ')'; |
| 290 | $value = $value['bind']; |
| 291 | break; |
| 292 | default: |
| 293 | throw new Exception("Filter contains the match type '" . $matchType . "' which is not supported"); |
| 294 | break; |
| 295 | } |
| 296 | |
| 297 | // We match NULL values when rows are excluded only when we are not doing a |
| 298 | $alsoMatchNULLValues = $alsoMatchNULLValues && !empty($value); |
| 299 | $sqlMatch = str_replace('%s', $field, $sqlMatch); |
| 300 | |
| 301 | if ($matchType === self::MATCH_ACTIONS_CONTAINS |
| 302 | || is_null($value) |
| 303 | ) { |
| 304 | $sqlExpression = "( $sqlMatch )"; |
| 305 | } else { |
| 306 | if ($alsoMatchNULLValues) { |
| 307 | $sqlExpression = "( $field IS NULL OR $sqlMatch ? )"; |
| 308 | } else { |
| 309 | $sqlExpression = "$sqlMatch ?"; |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | $columns = self::parseColumnsFromSqlExpr($field); |
| 314 | foreach ($columns as $column) { |
| 315 | $this->checkFieldIsAvailable($column, $availableTables); |
| 316 | } |
| 317 | |
| 318 | return array($sqlExpression, $value); |
| 319 | } |
| 320 | |
| 321 | /** |
| 322 | * @param string $field |
| 323 | * @return string[] |
| 324 | */ |
| 325 | public static function parseColumnsFromSqlExpr($field) |
| 326 | { |
| 327 | preg_match_all('/[^@a-zA-Z0-9_]?`?([@a-zA-Z_][@a-zA-Z0-9_]*`?\.`?[a-zA-Z0-9_`]+)`?\b/', $field, $matches); |
| 328 | $result = isset($matches[1]) ? $matches[1] : []; |
| 329 | $result = array_filter($result, function ($value) { // remove uses of session vars |
| 330 | return strpos($value, '@') === false; |
| 331 | }); |
| 332 | $result = array_map(function ($item) { |
| 333 | return str_replace('`', '', $item); |
| 334 | }, $result); |
| 335 | $result = array_unique($result); |
| 336 | $result = array_values($result); |
| 337 | return $result; |
| 338 | } |
| 339 | |
| 340 | /** |
| 341 | * Check whether the field is available |
| 342 | * If not, add it to the available tables |
| 343 | * |
| 344 | * @param string $field |
| 345 | * @param array $availableTables |
| 346 | */ |
| 347 | private function checkFieldIsAvailable($field, &$availableTables) |
| 348 | { |
| 349 | $fieldParts = explode('.', $field); |
| 350 | |
| 351 | $table = count($fieldParts) == 2 ? $fieldParts[0] : false; |
| 352 | |
| 353 | // remove sql functions from field name |
| 354 | // example: `HOUR(log_visit.visit_last_action_time)` gets `HOUR(log_visit` => remove `HOUR(` |
| 355 | $table = preg_replace('/^[A-Z_]+\(/', '', $table); |
| 356 | $tableExists = !$table || in_array($table, $availableTables); |
| 357 | |
| 358 | if ($tableExists) { |
| 359 | return; |
| 360 | } |
| 361 | |
| 362 | if (is_array($availableTables)) { |
| 363 | foreach ($availableTables as $availableTable) { |
| 364 | if (is_array($availableTable)) { |
| 365 | if (!isset($availableTable['tableAlias']) && $availableTable['table'] === $table) { |
| 366 | return; |
| 367 | } elseif (isset($availableTable['tableAlias']) && $availableTable['tableAlias'] === $table) { |
| 368 | return; |
| 369 | } |
| 370 | } |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | $availableTables[] = $table; |
| 375 | } |
| 376 | |
| 377 | /** |
| 378 | * Escape the characters % and _ in the given string |
| 379 | * @param string $str |
| 380 | * @return string |
| 381 | */ |
| 382 | private function escapeLikeString($str) |
| 383 | { |
| 384 | if (false !== strpos($str, '%')) { |
| 385 | $str = str_replace("%", "\%", $str); |
| 386 | } |
| 387 | |
| 388 | if (false !== strpos($str, '_')) { |
| 389 | $str = str_replace("_", "\_", $str); |
| 390 | } |
| 391 | |
| 392 | return $str; |
| 393 | } |
| 394 | |
| 395 | /** |
| 396 | * Given a filter string, |
| 397 | * will parse it into an array where each row contains the boolean operator applied to it, |
| 398 | * and the operand |
| 399 | * |
| 400 | * @return array |
| 401 | */ |
| 402 | protected function parseTree() |
| 403 | { |
| 404 | $string = $this->string; |
| 405 | if (empty($string)) { |
| 406 | return array(); |
| 407 | } |
| 408 | $tree = array(); |
| 409 | $i = 0; |
| 410 | $length = strlen($string); |
| 411 | $isBackslash = false; |
| 412 | $operand = ''; |
| 413 | while ($i <= $length) { |
| 414 | $char = $string[$i]; |
| 415 | |
| 416 | $isAND = ($char == self::AND_DELIMITER); |
| 417 | $isOR = ($char == self::OR_DELIMITER); |
| 418 | $isEnd = ($length == $i + 1); |
| 419 | |
| 420 | if ($isEnd) { |
| 421 | if ($isBackslash && ($isAND || $isOR)) { |
| 422 | $operand = substr($operand, 0, -1); |
| 423 | } |
| 424 | $operand .= $char; |
| 425 | $tree[] = array(self::INDEX_BOOL_OPERATOR => self::BOOL_OPERATOR_END, self::INDEX_OPERAND => $operand); |
| 426 | break; |
| 427 | } |
| 428 | |
| 429 | if ($isAND && !$isBackslash) { |
| 430 | $tree[] = array(self::INDEX_BOOL_OPERATOR => self::BOOL_OPERATOR_AND, self::INDEX_OPERAND => $operand); |
| 431 | $operand = ''; |
| 432 | } elseif ($isOR && !$isBackslash) { |
| 433 | $tree[] = array(self::INDEX_BOOL_OPERATOR => self::BOOL_OPERATOR_OR, self::INDEX_OPERAND => $operand); |
| 434 | $operand = ''; |
| 435 | } else { |
| 436 | if ($isBackslash && ($isAND || $isOR)) { |
| 437 | $operand = substr($operand, 0, -1); |
| 438 | } |
| 439 | $operand .= $char; |
| 440 | } |
| 441 | $isBackslash = ($char == "\\"); |
| 442 | $i++; |
| 443 | } |
| 444 | return $tree; |
| 445 | } |
| 446 | |
| 447 | /** |
| 448 | * Given the array of parsed boolean logic, will return |
| 449 | * an array containing the full SQL string representing the filter, |
| 450 | * the needed joins and the values to bind to the query |
| 451 | * |
| 452 | * @throws Exception |
| 453 | * @return array SQL Query, Joins and Bind parameters |
| 454 | */ |
| 455 | public function getSql() |
| 456 | { |
| 457 | if ($this->isEmpty()) { |
| 458 | throw new Exception("Invalid segment, please specify a valid segment."); |
| 459 | } |
| 460 | $sql = ''; |
| 461 | $subExpression = false; |
| 462 | foreach ($this->tree as $expression) { |
| 463 | $operator = $expression[self::INDEX_BOOL_OPERATOR]; |
| 464 | $operand = $expression[self::INDEX_OPERAND]; |
| 465 | |
| 466 | if ($operator == self::BOOL_OPERATOR_OR |
| 467 | && !$subExpression |
| 468 | ) { |
| 469 | $sql .= ' ('; |
| 470 | $subExpression = true; |
| 471 | } else { |
| 472 | $sql .= ' '; |
| 473 | } |
| 474 | |
| 475 | $sql .= $operand; |
| 476 | |
| 477 | if ($operator == self::BOOL_OPERATOR_AND |
| 478 | && $subExpression |
| 479 | ) { |
| 480 | $sql .= ')'; |
| 481 | $subExpression = false; |
| 482 | } |
| 483 | |
| 484 | $sql .= " $operator"; |
| 485 | } |
| 486 | if ($subExpression) { |
| 487 | $sql .= ')'; |
| 488 | } |
| 489 | return array( |
| 490 | 'where' => $sql, |
| 491 | 'bind' => $this->valuesBind, |
| 492 | 'join' => implode(' ', $this->joins) |
| 493 | ); |
| 494 | } |
| 495 | } |
| 496 | |
| 497 |