JoinTables.php
365 lines
| 1 | <?php |
| 2 | /** |
| 3 | * Piwik - free/libre analytics platform |
| 4 | * |
| 5 | * @link https://matomo.org |
| 6 | * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 7 | * |
| 8 | */ |
| 9 | |
| 10 | namespace Piwik\DataAccess\LogQueryBuilder; |
| 11 | |
| 12 | use Exception; |
| 13 | use Piwik\DataAccess\LogAggregator; |
| 14 | use Piwik\Plugin\LogTablesProvider; |
| 15 | |
| 16 | class JoinTables extends \ArrayObject |
| 17 | { |
| 18 | /** |
| 19 | * @var LogTablesProvider |
| 20 | */ |
| 21 | private $logTableProvider; |
| 22 | |
| 23 | // NOTE: joins can be specified explicitly as arrays w/ 'joinOn' keys or implicitly as table names. when |
| 24 | // table names are used, the joins dependencies are assumed based on how we want to order those joins. |
| 25 | // the below table list the possible dependencies of each table, and is specifically designed to enforce |
| 26 | // the following order: |
| 27 | // log_link_visit_action, log_action, log_visit, log_conversion, log_conversion_item |
| 28 | // which means if an array is supplied where log_visit comes before log_link_visitAction, it will |
| 29 | // be moved to after it. |
| 30 | private $implicitTableDependencies = [ |
| 31 | 'log_link_visit_action' => [ |
| 32 | // empty |
| 33 | ], |
| 34 | 'log_action' => [ |
| 35 | 'log_link_visit_action', |
| 36 | 'log_conversion', |
| 37 | 'log_conversion_item', |
| 38 | 'log_visit', |
| 39 | ], |
| 40 | 'log_visit' => [ |
| 41 | 'log_link_visit_action', |
| 42 | 'log_action', |
| 43 | ], |
| 44 | 'log_conversion' => [ |
| 45 | 'log_link_visit_action', |
| 46 | 'log_action', |
| 47 | 'log_visit', |
| 48 | ], |
| 49 | 'log_conversion_item' => [ |
| 50 | 'log_link_visit_action', |
| 51 | 'log_action', |
| 52 | 'log_visit', |
| 53 | 'log_conversion', |
| 54 | ], |
| 55 | ]; |
| 56 | |
| 57 | /** |
| 58 | * Tables constructor. |
| 59 | * @param LogTablesProvider $logTablesProvider |
| 60 | * @param array $tables |
| 61 | */ |
| 62 | public function __construct(LogTablesProvider $logTablesProvider, $tables) |
| 63 | { |
| 64 | $this->logTableProvider = $logTablesProvider; |
| 65 | |
| 66 | foreach ($tables as $table) { |
| 67 | $this->checkTableCanBeUsedForSegmentation($table); |
| 68 | } |
| 69 | |
| 70 | $this->exchangeArray(array_values($tables)); |
| 71 | } |
| 72 | |
| 73 | public function getTables() |
| 74 | { |
| 75 | return $this->getArrayCopy(); |
| 76 | } |
| 77 | |
| 78 | public function addTableToJoin($tableName) |
| 79 | { |
| 80 | $this->checkTableCanBeUsedForSegmentation($tableName); |
| 81 | $this->append($tableName); |
| 82 | } |
| 83 | |
| 84 | public function hasJoinedTable($tableName) |
| 85 | { |
| 86 | $tables = in_array($tableName, $this->getTables()); |
| 87 | if ($tables) { |
| 88 | return true; |
| 89 | } |
| 90 | |
| 91 | foreach ($this as $table) { |
| 92 | if (is_array($table)) { |
| 93 | if (!isset($table['tableAlias']) && $table['table'] === $table) { |
| 94 | return true; |
| 95 | } elseif (isset($table['tableAlias']) && $table['tableAlias'] === $table) { |
| 96 | return true; |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | return false; |
| 102 | } |
| 103 | |
| 104 | public function hasJoinedTableManually($tableToFind, $joinToFind) |
| 105 | { |
| 106 | foreach ($this as $table) { |
| 107 | if (is_array($table) |
| 108 | && !empty($table['table']) |
| 109 | && $table['table'] === $tableToFind |
| 110 | && (!isset($table['tableAlias']) || $table['tableAlias'] === $tableToFind) |
| 111 | && (!isset($table['join']) || strtolower($table['join']) === 'left join') |
| 112 | && isset($table['joinOn']) && $table['joinOn'] === $joinToFind) { |
| 113 | return true; |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | return false; |
| 118 | } |
| 119 | |
| 120 | public function getLogTable($tableName) |
| 121 | { |
| 122 | return $this->logTableProvider->getLogTable($tableName); |
| 123 | } |
| 124 | |
| 125 | public function findIndexOfManuallyAddedTable($tableNameToFind) |
| 126 | { |
| 127 | foreach ($this as $index => $table) { |
| 128 | if (is_array($table) |
| 129 | && !empty($table['table']) |
| 130 | && $table['table'] === $tableNameToFind |
| 131 | && (!isset($table['join']) || strtolower($table['join']) === 'left join') |
| 132 | && (!isset($table['tableAlias']) || $table['tableAlias'] === $tableNameToFind)) { |
| 133 | return $index; |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | public function hasAddedTableManually($tableToFind) |
| 139 | { |
| 140 | $table = $this->findIndexOfManuallyAddedTable($tableToFind); |
| 141 | |
| 142 | return isset($table); |
| 143 | } |
| 144 | |
| 145 | public function sort() |
| 146 | { |
| 147 | // we do not use $this->uasort as we do not want to maintain keys |
| 148 | $tables = $this->getTables(); |
| 149 | |
| 150 | // the first entry is always the FROM table |
| 151 | $firstTable = array_shift($tables); |
| 152 | $sorted = [$firstTable]; |
| 153 | |
| 154 | if (strpos($firstTable, LogAggregator::LOG_TABLE_SEGMENT_TEMPORARY_PREFIX) === 0) { |
| 155 | // the first table might be a temporary segment table in which case we need to keep the next one as well |
| 156 | $sorted[] = array_shift($tables); |
| 157 | } |
| 158 | |
| 159 | $dependencies = $this->parseDependencies($tables); |
| 160 | |
| 161 | $this->visitTableListDfs($tables, $dependencies, function ($tableInfo) use (&$sorted) { |
| 162 | $sorted[] = $tableInfo; |
| 163 | }); |
| 164 | |
| 165 | $this->exchangeArray($sorted); |
| 166 | } |
| 167 | |
| 168 | public function isTableJoinableOnVisit($tableToCheck) |
| 169 | { |
| 170 | $table = $this->getLogTable($tableToCheck); |
| 171 | |
| 172 | if (empty($table)) { |
| 173 | return false; |
| 174 | } |
| 175 | |
| 176 | if ($table->getColumnToJoinOnIdVisit()) { |
| 177 | return true; |
| 178 | } |
| 179 | |
| 180 | if ($table->getLinkTableToBeAbleToJoinOnVisit()) { |
| 181 | return true; |
| 182 | } |
| 183 | |
| 184 | $otherWays = $table->getWaysToJoinToOtherLogTables(); |
| 185 | |
| 186 | if (empty($otherWays)) { |
| 187 | return false; |
| 188 | } |
| 189 | |
| 190 | foreach ($otherWays as $logTable => $column) { |
| 191 | if ($logTable == 'log_visit' || $this->isTableJoinableOnVisit($logTable)) { |
| 192 | return true; |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | return false; |
| 197 | } |
| 198 | |
| 199 | public function isTableJoinableOnAction($tableToCheck) |
| 200 | { |
| 201 | $table = $this->getLogTable($tableToCheck); |
| 202 | |
| 203 | if (empty($table)) { |
| 204 | return false; |
| 205 | } |
| 206 | |
| 207 | if ($table->getColumnToJoinOnIdAction()) { |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | $otherWays = $table->getWaysToJoinToOtherLogTables(); |
| 212 | |
| 213 | if (empty($otherWays)) { |
| 214 | return false; |
| 215 | } |
| 216 | |
| 217 | foreach ($otherWays as $logTable => $column) { |
| 218 | if ($logTable == 'log_action' || $this->isTableJoinableOnAction($logTable)) { |
| 219 | return true; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | return false; |
| 224 | } |
| 225 | |
| 226 | public function addTableDependency($table, $dependentTable) |
| 227 | { |
| 228 | if (!empty($this->implicitTableDependencies[$table])) { |
| 229 | return; |
| 230 | } |
| 231 | |
| 232 | $this->implicitTableDependencies[$table] = [$dependentTable]; |
| 233 | } |
| 234 | |
| 235 | private function checkTableCanBeUsedForSegmentation($tableName) |
| 236 | { |
| 237 | if (!is_array($tableName) && !$this->getLogTable($tableName)) { |
| 238 | throw new Exception("Table '$tableName' can't be used for segmentation"); |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | private function parseDependencies(array $tables) |
| 243 | { |
| 244 | $dependencies = []; |
| 245 | foreach ($tables as $key => &$fromInfo) { |
| 246 | if (is_string($fromInfo)) { |
| 247 | $dependencies[$key] = $this->assumeImplicitJoinDependencies($tables, $fromInfo); |
| 248 | continue; |
| 249 | } |
| 250 | |
| 251 | if (empty($fromInfo['joinOn'])) { |
| 252 | continue; |
| 253 | } |
| 254 | |
| 255 | $table = isset($fromInfo['tableAlias']) ? $fromInfo['tableAlias'] : $fromInfo['table']; |
| 256 | $tablesInExpr = $this->parseSqlTables($fromInfo['joinOn'], $table); |
| 257 | $dependencies[$key] = $tablesInExpr; |
| 258 | } |
| 259 | return $dependencies; |
| 260 | } |
| 261 | |
| 262 | private function assumeImplicitJoinDependencies($allTablesToQuery, $table) |
| 263 | { |
| 264 | $implicitTableDependencies = $this->implicitTableDependencies; |
| 265 | |
| 266 | $result = []; |
| 267 | if (isset($implicitTableDependencies[$table])) { |
| 268 | $result = $implicitTableDependencies[$table]; |
| 269 | |
| 270 | // only include dependencies that are in the list of requested tables (ie, if we want to |
| 271 | // query from log_conversion joining on log_link_visit_action, we don't want to add log_visit |
| 272 | // to the sql statement) |
| 273 | $result = array_filter($result, function ($table) use ($allTablesToQuery) { |
| 274 | return $this->isInTableArray($allTablesToQuery, $table); |
| 275 | }); |
| 276 | } |
| 277 | return $result; |
| 278 | } |
| 279 | |
| 280 | private function isInTableArray($tables, $table) |
| 281 | { |
| 282 | foreach ($tables as $entry) { |
| 283 | if (is_string($entry) |
| 284 | && $entry == $table |
| 285 | ) { |
| 286 | return true; |
| 287 | } |
| 288 | |
| 289 | if (is_array($entry) |
| 290 | && $entry['table'] == $table |
| 291 | ) { |
| 292 | return true; |
| 293 | } |
| 294 | } |
| 295 | return false; |
| 296 | } |
| 297 | |
| 298 | private function parseSqlTables($joinOn, $self) |
| 299 | { |
| 300 | preg_match_all('/\b([a-zA-Z0-9_`]+)\.[a-zA-Z0-9_`]+\b/', $joinOn, $matches); |
| 301 | |
| 302 | $tables = []; |
| 303 | foreach ($matches[1] as $table) { |
| 304 | if ($table === $self) { |
| 305 | continue; |
| 306 | } |
| 307 | |
| 308 | $tables[] = $table; |
| 309 | } |
| 310 | return $tables; |
| 311 | } |
| 312 | |
| 313 | private function visitTableListDfs($tables, $dependencies, $visitor) |
| 314 | { |
| 315 | $visited = []; |
| 316 | foreach ($tables as $index => $tableInfo) { |
| 317 | if (empty($visited[$index])) { |
| 318 | $this->visitTableListDfsSingle($tables, $dependencies, $visitor, $index, $visited); |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | private function visitTableListDfsSingle($tables, $dependencies, $visitor, $tableToVisitIndex, &$visited) |
| 324 | { |
| 325 | $visited[$tableToVisitIndex] = true; |
| 326 | $tableToVisit = $tables[$tableToVisitIndex]; |
| 327 | |
| 328 | if (!empty($dependencies[$tableToVisitIndex])) { |
| 329 | foreach ($dependencies[$tableToVisitIndex] as $dependencyTableName) { |
| 330 | $dependentTableToVisit = $this->findTableIndex($tables, $dependencyTableName); |
| 331 | if ($dependentTableToVisit === null) { // sanity check, in case the dependent table is not in the list of tables to query |
| 332 | continue; |
| 333 | } |
| 334 | |
| 335 | if (!empty($visited[$dependentTableToVisit])) { // skip if already visited |
| 336 | continue; |
| 337 | } |
| 338 | |
| 339 | // visit dependent table... |
| 340 | $this->visitTableListDfsSingle($tables, $dependencies, $visitor, $dependentTableToVisit, $visited); |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | // ...then visit current table |
| 345 | $visitor($tableToVisit); |
| 346 | } |
| 347 | |
| 348 | private function findTableIndex($tables, $tableToSearchFor) |
| 349 | { |
| 350 | foreach ($tables as $key => $info) { |
| 351 | $tableName = null; |
| 352 | if (is_string($info)) { |
| 353 | $tableName = $info; |
| 354 | } else if (is_array($info)) { |
| 355 | $tableName = isset($info['tableAlias']) ? $info['tableAlias'] : $info['table']; |
| 356 | } |
| 357 | |
| 358 | if ($tableName == $tableToSearchFor) { |
| 359 | return $key; |
| 360 | } |
| 361 | } |
| 362 | return null; |
| 363 | } |
| 364 | } |
| 365 |