JoinGenerator.php
278 lines
| 1 | <?php |
| 2 | |
| 3 | /** |
| 4 | * Matomo - free/libre analytics platform |
| 5 | * |
| 6 | * @link https://matomo.org |
| 7 | * @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 8 | */ |
| 9 | namespace Piwik\DataAccess\LogQueryBuilder; |
| 10 | |
| 11 | use Exception; |
| 12 | use Piwik\Common; |
| 13 | use Piwik\DataAccess\LogAggregator; |
| 14 | use Piwik\Tracker\LogTable; |
| 15 | class JoinGenerator |
| 16 | { |
| 17 | /** |
| 18 | * @var JoinTables |
| 19 | */ |
| 20 | protected $tables; |
| 21 | /** |
| 22 | * @var bool |
| 23 | */ |
| 24 | private $joinWithSubSelect = \false; |
| 25 | /** |
| 26 | * @var string |
| 27 | */ |
| 28 | private $joinString = ''; |
| 29 | /** |
| 30 | * @var array |
| 31 | */ |
| 32 | private $nonVisitJoins = array(); |
| 33 | public function __construct(\Piwik\DataAccess\LogQueryBuilder\JoinTables $tables) |
| 34 | { |
| 35 | $this->tables = $tables; |
| 36 | $this->addMissingTablesNeededForJoins(); |
| 37 | } |
| 38 | private function addMissingTablesNeededForJoins() |
| 39 | { |
| 40 | foreach ($this->tables as $index => $table) { |
| 41 | if (is_array($table)) { |
| 42 | continue; |
| 43 | } |
| 44 | $logTable = $this->tables->getLogTable($table); |
| 45 | if (!$logTable->getColumnToJoinOnIdVisit()) { |
| 46 | $tableNameToJoin = $logTable->getLinkTableToBeAbleToJoinOnVisit(); |
| 47 | if (empty($tableNameToJoin) && $logTable->getWaysToJoinToOtherLogTables()) { |
| 48 | foreach ($logTable->getWaysToJoinToOtherLogTables() as $otherLogTable => $column) { |
| 49 | if ($this->tables->hasJoinedTable($otherLogTable)) { |
| 50 | $this->tables->addTableDependency($table, $otherLogTable); |
| 51 | continue; |
| 52 | } |
| 53 | if ($this->tables->isTableJoinableOnVisit($otherLogTable) || $this->tables->isTableJoinableOnAction($otherLogTable)) { |
| 54 | $this->addMissingTablesForOtherTableJoin($otherLogTable, $table); |
| 55 | } |
| 56 | } |
| 57 | continue; |
| 58 | } |
| 59 | if ($index > 0 && !$this->tables->hasJoinedTable($tableNameToJoin)) { |
| 60 | $this->tables->addTableToJoin($tableNameToJoin); |
| 61 | } |
| 62 | if ($this->tables->hasJoinedTable($tableNameToJoin)) { |
| 63 | $this->generateNonVisitJoins($table, $tableNameToJoin, $index); |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | foreach ($this->tables as $index => $table) { |
| 68 | if (is_array($table)) { |
| 69 | if (!isset($table['tableAlias'])) { |
| 70 | $tableName = $table['table']; |
| 71 | $numTables = count($this->tables); |
| 72 | for ($j = $index + 1; $j < $numTables; $j++) { |
| 73 | if (!isset($this->tables[$j])) { |
| 74 | continue; |
| 75 | } |
| 76 | $tableOther = $this->tables[$j]; |
| 77 | if (is_string($tableOther) && $tableOther === $tableName) { |
| 78 | unset($this->tables[$j]); |
| 79 | } |
| 80 | } |
| 81 | } |
| 82 | } elseif (is_string($table)) { |
| 83 | $numTables = count($this->tables); |
| 84 | for ($j = $index + 1; $j < $numTables; $j++) { |
| 85 | if (isset($this->tables[$j]) && is_array($this->tables[$j]) && !isset($this->tables[$j]['tableAlias'])) { |
| 86 | $tableOther = $this->tables[$j]; |
| 87 | if ($table === $tableOther['table']) { |
| 88 | $message = sprintf('Please reorganize the joined tables as the table %s in %s cannot be joined correctly. We recommend to join tables with arrays first. %s', $table, json_encode($this->tables), json_encode(debug_backtrace(\DEBUG_BACKTRACE_IGNORE_ARGS, 10))); |
| 89 | throw new Exception($message); |
| 90 | } |
| 91 | } |
| 92 | } |
| 93 | } |
| 94 | } |
| 95 | } |
| 96 | private function addMissingTablesForOtherTableJoin($tableName, $dependentTable) |
| 97 | { |
| 98 | $this->tables->addTableDependency($dependentTable, $tableName); |
| 99 | if ($this->tables->hasJoinedTable($tableName)) { |
| 100 | return; |
| 101 | } |
| 102 | $table = $this->tables->getLogTable($tableName); |
| 103 | if ($table->getColumnToJoinOnIdAction() || $table->getColumnToJoinOnIdVisit() || $table->getLinkTableToBeAbleToJoinOnVisit()) { |
| 104 | $this->tables->addTableToJoin($tableName); |
| 105 | return; |
| 106 | } |
| 107 | $otherTableJoins = $table->getWaysToJoinToOtherLogTables(); |
| 108 | foreach ($otherTableJoins as $logTable => $column) { |
| 109 | $this->addMissingTablesForOtherTableJoin($logTable, $tableName); |
| 110 | } |
| 111 | $this->tables->addTableToJoin($tableName); |
| 112 | } |
| 113 | /** |
| 114 | * Generate the join sql based on the needed tables |
| 115 | * @throws Exception if tables can't be joined |
| 116 | */ |
| 117 | public function generate() : void |
| 118 | { |
| 119 | /** @var LogTable[] $availableLogTables */ |
| 120 | $availableLogTables = array(); |
| 121 | $this->tables->sort(); |
| 122 | foreach ($this->tables as $i => $table) { |
| 123 | $useIndex = ''; |
| 124 | if ($i === 0 && is_array($table)) { |
| 125 | $useIndex = $table['useIndex'] ?? ''; |
| 126 | $table = $table['table']; |
| 127 | } |
| 128 | if (is_array($table)) { |
| 129 | // join condition provided |
| 130 | $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table']; |
| 131 | if (isset($table['join'])) { |
| 132 | $this->joinString .= ' ' . $table['join']; |
| 133 | } else { |
| 134 | $this->joinString .= ' LEFT JOIN'; |
| 135 | } |
| 136 | if (!isset($table['joinOn']) && $this->tables->getLogTable($table['table'])) { |
| 137 | $logTable = $this->tables->getLogTable($table['table']); |
| 138 | if (!empty($availableLogTables)) { |
| 139 | $table['joinOn'] = $this->findJoinCriteriasForTables($logTable, $availableLogTables); |
| 140 | } |
| 141 | if (!isset($table['tableAlias'])) { |
| 142 | // eg array('table' => 'log_link_visit_action', 'join' => 'RIGHT JOIN') |
| 143 | // we treat this like a regular string table which we can join automatically |
| 144 | $availableLogTables[$table['table']] = $logTable; |
| 145 | } |
| 146 | } |
| 147 | $this->joinString .= ' ' . Common::prefixTable($table['table']) . " AS " . $alias . " ON " . $table['joinOn']; |
| 148 | continue; |
| 149 | } |
| 150 | $tableSql = Common::prefixTable($table) . " AS {$table}"; |
| 151 | $logTable = $this->tables->getLogTable($table); |
| 152 | if ($i == 0) { |
| 153 | // first table |
| 154 | $this->joinString .= $tableSql; |
| 155 | // Force the use of the index if an index was provided |
| 156 | if (!empty($useIndex)) { |
| 157 | $this->joinString .= " USE INDEX ({$useIndex})"; |
| 158 | } |
| 159 | } else { |
| 160 | $join = $this->findJoinCriteriasForTables($logTable, $availableLogTables); |
| 161 | if ($join === null) { |
| 162 | $availableLogTables[$table] = $logTable; |
| 163 | continue; |
| 164 | } |
| 165 | $joinName = 'LEFT JOIN'; |
| 166 | if ($i > 0 && $this->tables[$i - 1] && is_string($this->tables[$i - 1]) && strpos($this->tables[$i - 1], LogAggregator::LOG_TABLE_SEGMENT_TEMPORARY_PREFIX) === 0) { |
| 167 | $joinName = 'INNER JOIN'; |
| 168 | // when we archive a segment there will be eg `logtmpsegment$HASH` as first table. |
| 169 | // then we join log_conversion for example... if we didn't use INNER JOIN we would as a result |
| 170 | // get rows for visits even when they didn't have a conversion. Instead we only want to find rows |
| 171 | // that have an entry in both tables when doing eg |
| 172 | // logtmpsegment57cd546b7203d68a41027547c4abe1a2.idvisit = log_conversion.idvisit |
| 173 | } |
| 174 | // the join sql the default way |
| 175 | $this->joinString .= " {$joinName} {$tableSql} ON " . $join; |
| 176 | } |
| 177 | $availableLogTables[$table] = $logTable; |
| 178 | } |
| 179 | } |
| 180 | public function getJoinString() |
| 181 | { |
| 182 | return $this->joinString; |
| 183 | } |
| 184 | public function shouldJoinWithSelect() |
| 185 | { |
| 186 | return $this->joinWithSubSelect; |
| 187 | } |
| 188 | /** |
| 189 | * @param LogTable[] $availableLogTables |
| 190 | * @return string|null returns null in case the table is already joined, or the join string if the table needs |
| 191 | * to be joined |
| 192 | * @throws Exception if table cannot be joined for segmentation |
| 193 | */ |
| 194 | public function findJoinCriteriasForTables(LogTable $logTable, $availableLogTables) |
| 195 | { |
| 196 | $join = null; |
| 197 | $alternativeJoin = null; |
| 198 | $table = $logTable->getName(); |
| 199 | foreach ($availableLogTables as $availableLogTable) { |
| 200 | if ($logTable->getColumnToJoinOnIdVisit() && $availableLogTable->getColumnToJoinOnIdVisit()) { |
| 201 | $join = sprintf("%s.%s = %s.%s", $table, $logTable->getColumnToJoinOnIdVisit(), $availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit()); |
| 202 | $alternativeJoin = sprintf("%s.%s = %s.%s", $availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit(), $table, $logTable->getColumnToJoinOnIdVisit()); |
| 203 | if ($availableLogTable->shouldJoinWithSubSelect()) { |
| 204 | $this->joinWithSubSelect = \true; |
| 205 | } |
| 206 | break; |
| 207 | } |
| 208 | if ($logTable->getColumnToJoinOnIdAction() && $availableLogTable->getColumnToJoinOnIdAction()) { |
| 209 | if (isset($this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()])) { |
| 210 | $join = $this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()]; |
| 211 | } |
| 212 | break; |
| 213 | } |
| 214 | $otherJoins = $logTable->getWaysToJoinToOtherLogTables(); |
| 215 | foreach ($otherJoins as $joinTable => $column) { |
| 216 | if ($availableLogTable->getName() == $joinTable) { |
| 217 | $join = sprintf("`%s`.`%s` = `%s`.`%s`", $table, $column, $availableLogTable->getName(), $column); |
| 218 | break; |
| 219 | } |
| 220 | } |
| 221 | $otherJoins = $availableLogTable->getWaysToJoinToOtherLogTables(); |
| 222 | foreach ($otherJoins as $joinTable => $column) { |
| 223 | if ($table == $joinTable) { |
| 224 | $join = sprintf("`%s`.`%s` = `%s`.`%s`", $table, $column, $availableLogTable->getName(), $column); |
| 225 | break; |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | if (!isset($join)) { |
| 230 | throw new Exception("Table '{$table}' can't be joined for segmentation"); |
| 231 | } |
| 232 | if ($this->tables->hasJoinedTableManually($table, $join) || $this->tables->hasJoinedTableManually($table, $alternativeJoin)) { |
| 233 | // already joined, no need to join it again |
| 234 | return null; |
| 235 | } |
| 236 | if ($table == 'log_conversion_item') { |
| 237 | // by default we don't want to consider deleted columns |
| 238 | $join .= sprintf(' AND `%s`.deleted = 0', $table); |
| 239 | } |
| 240 | return $join; |
| 241 | } |
| 242 | /** |
| 243 | * This code is a bit tricky. We have to execute this right at the beginning before actually iterating over all the |
| 244 | * tables and generating the join string as we may have to delete a table from the tables. If we did not delete |
| 245 | * this table upfront, we would have maybe already added a joinString for that table, even though it will be later |
| 246 | * removed by another table. This means if we wouldn't delete/unset that table upfront, we would need to alter |
| 247 | * an already generated join string which would not be really nice code as well. |
| 248 | * |
| 249 | * Next problem is, because we are deleting a table, we have to remember the "joinOn" string for that table in a |
| 250 | * property "nonVisitJoins". Otherwise we would not be able to generate the correct "joinOn" string when actually |
| 251 | * iterating over all the tables to generate that string. |
| 252 | * |
| 253 | * @param $tableName |
| 254 | * @param $tableNameToJoin |
| 255 | * @param $index |
| 256 | */ |
| 257 | protected function generateNonVisitJoins($tableName, $tableNameToJoin, $index) |
| 258 | { |
| 259 | $logTable = $this->tables->getLogTable($tableName); |
| 260 | $logTableToJoin = $this->tables->getLogTable($tableNameToJoin); |
| 261 | $nonVisitJoin = sprintf("%s.%s = %s.%s", $logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction(), $tableName, $logTable->getColumnToJoinOnIdAction()); |
| 262 | $altNonVisitJoin = sprintf("%s.%s = %s.%s", $tableName, $logTable->getColumnToJoinOnIdAction(), $logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction()); |
| 263 | if ($index > 0 && $this->tables->hasAddedTableManually($tableName) && !$this->tables->hasJoinedTableManually($tableName, $nonVisitJoin) && !$this->tables->hasJoinedTableManually($tableName, $altNonVisitJoin)) { |
| 264 | $tableIndex = $this->tables->findIndexOfManuallyAddedTable($tableName); |
| 265 | $nonVisitJoin = '(' . $this->tables[$tableIndex]['joinOn'] . ' AND ' . $nonVisitJoin . ')'; |
| 266 | unset($this->tables[$tableIndex]); |
| 267 | } |
| 268 | if (!isset($this->nonVisitJoins[$tableName])) { |
| 269 | $this->nonVisitJoins[$tableName] = array(); |
| 270 | } |
| 271 | if (!isset($this->nonVisitJoins[$tableNameToJoin])) { |
| 272 | $this->nonVisitJoins[$tableNameToJoin] = array(); |
| 273 | } |
| 274 | $this->nonVisitJoins[$tableName][$tableNameToJoin] = $nonVisitJoin; |
| 275 | $this->nonVisitJoins[$tableNameToJoin][$tableName] = $nonVisitJoin; |
| 276 | } |
| 277 | } |
| 278 |