LogQueryBuilder
6 years ago
Actions.php
6 years ago
ArchiveSelector.php
6 years ago
ArchiveTableCreator.php
6 years ago
ArchiveTableDao.php
6 years ago
ArchiveWriter.php
6 years ago
ArchivingDbAdapter.php
6 years ago
LogAggregator.php
5 years ago
LogQueryBuilder.php
6 years ago
LogTableTemporary.php
6 years ago
Model.php
6 years ago
RawLogDao.php
6 years ago
TableMetadata.php
6 years ago
Model.php
436 lines
| 1 | <?php |
| 2 | /** |
| 3 | * Piwik - free/libre analytics platform |
| 4 | * |
| 5 | * @link https://matomo.org |
| 6 | * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 7 | * |
| 8 | */ |
| 9 | namespace Piwik\DataAccess; |
| 10 | |
| 11 | use Exception; |
| 12 | use Piwik\ArchiveProcessor\Rules; |
| 13 | use Piwik\Common; |
| 14 | use Piwik\Container\StaticContainer; |
| 15 | use Piwik\Db; |
| 16 | use Piwik\DbHelper; |
| 17 | use Piwik\Period; |
| 18 | use Piwik\Segment; |
| 19 | use Piwik\Sequence; |
| 20 | use Psr\Log\LoggerInterface; |
| 21 | |
| 22 | /** |
| 23 | * Cleans up outdated archives |
| 24 | * |
| 25 | * @package Piwik\DataAccess |
| 26 | */ |
| 27 | class Model |
| 28 | { |
| 29 | /** |
| 30 | * @var LoggerInterface |
| 31 | */ |
| 32 | private $logger; |
| 33 | |
| 34 | public function __construct(LoggerInterface $logger = null) |
| 35 | { |
| 36 | $this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface'); |
| 37 | } |
| 38 | |
| 39 | /** |
| 40 | * Returns the archives IDs that have already been invalidated and have been since re-processed. |
| 41 | * |
| 42 | * These archives { archive name (includes segment hash) , idsite, date, period } will be deleted. |
| 43 | * |
| 44 | * @param string $archiveTable |
| 45 | * @param array $idSites |
| 46 | * @return array |
| 47 | * @throws Exception |
| 48 | */ |
| 49 | public function getInvalidatedArchiveIdsSafeToDelete($archiveTable) |
| 50 | { |
| 51 | try { |
| 52 | Db::get()->query('SET SESSION group_concat_max_len=' . (128 * 1024)); |
| 53 | } catch (\Exception $ex) { |
| 54 | $this->logger->info("Could not set group_concat_max_len MySQL session variable."); |
| 55 | } |
| 56 | |
| 57 | $sql = "SELECT idsite, date1, date2, period, name, |
| 58 | GROUP_CONCAT(idarchive, '.', value ORDER BY ts_archived DESC) as archives |
| 59 | FROM `$archiveTable` |
| 60 | WHERE name LIKE 'done%' |
| 61 | AND `value` NOT IN (" . ArchiveWriter::DONE_ERROR . ") |
| 62 | GROUP BY idsite, date1, date2, period, name HAVING count(*) > 1"; |
| 63 | |
| 64 | $archiveIds = array(); |
| 65 | |
| 66 | $rows = Db::fetchAll($sql); |
| 67 | foreach ($rows as $row) { |
| 68 | $duplicateArchives = explode(',', $row['archives']); |
| 69 | $countOfArchives = count($duplicateArchives); |
| 70 | |
| 71 | // if there is more than one archive, the older invalidated ones can be deleted |
| 72 | if ($countOfArchives > 1) { |
| 73 | array_shift($duplicateArchives); // we don't want to delete the latest archive if it is usable |
| 74 | |
| 75 | foreach ($duplicateArchives as $pair) { |
| 76 | if (strpos($pair, '.') === false) { |
| 77 | $this->logger->info("GROUP_CONCAT cut off the query result, you may have to purge archives again."); |
| 78 | break; |
| 79 | } |
| 80 | |
| 81 | list($idarchive, $value) = explode('.', $pair); |
| 82 | $archiveIds[] = $idarchive; |
| 83 | } |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | return $archiveIds; |
| 88 | } |
| 89 | |
| 90 | /** |
| 91 | * @param string $archiveTable Prefixed table name |
| 92 | * @param int[] $idSites |
| 93 | * @param string[][] $datesByPeriodType |
| 94 | * @param Segment $segment |
| 95 | * @return \Zend_Db_Statement |
| 96 | * @throws Exception |
| 97 | */ |
| 98 | public function updateArchiveAsInvalidated($archiveTable, $idSites, $datesByPeriodType, Segment $segment = null) |
| 99 | { |
| 100 | $idSites = array_map('intval', $idSites); |
| 101 | |
| 102 | $bind = array(); |
| 103 | |
| 104 | $periodConditions = array(); |
| 105 | foreach ($datesByPeriodType as $periodType => $dates) { |
| 106 | $dateConditions = array(); |
| 107 | |
| 108 | if ($periodType == Period\Range::PERIOD_ID) { |
| 109 | foreach ($dates as $date) { |
| 110 | // Ranges in the DB match if their date2 is after the start of the search range and date1 is before the end |
| 111 | // e.g. search range is 2019-01-01 to 2019-01-31 |
| 112 | // date2 >= startdate -> Ranges with date2 < 2019-01-01 (ended before 1 January) and are excluded |
| 113 | // date1 <= endate -> Ranges with date1 > 2019-01-31 (started after 31 January) and are excluded |
| 114 | $dateConditions[] = "(date2 >= ? AND date1 <= ?)"; |
| 115 | $bind = array_merge($bind, explode(',', $date)); |
| 116 | } |
| 117 | } else { |
| 118 | foreach ($dates as $date) { |
| 119 | $dateConditions[] = "(date1 <= ? AND ? <= date2)"; |
| 120 | $bind[] = $date; |
| 121 | $bind[] = $date; |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | $dateConditionsSql = implode(" OR ", $dateConditions); |
| 126 | if (empty($periodType) |
| 127 | || $periodType == Period\Day::PERIOD_ID |
| 128 | ) { |
| 129 | // invalidate all periods if no period supplied or period is day |
| 130 | $periodConditions[] = "($dateConditionsSql)"; |
| 131 | } else if ($periodType == Period\Range::PERIOD_ID) { |
| 132 | $periodConditions[] = "(period = " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))"; |
| 133 | } else { |
| 134 | // for non-day periods, invalidate greater periods, but not range periods |
| 135 | $periodConditions[] = "(period >= " . (int)$periodType . " AND period < " . Period\Range::PERIOD_ID . " AND ($dateConditionsSql))"; |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | if ($segment) { |
| 140 | $nameCondition = "name LIKE '" . Rules::getDoneFlagArchiveContainsAllPlugins($segment) . "%'"; |
| 141 | } else { |
| 142 | $nameCondition = "name LIKE 'done%'"; |
| 143 | } |
| 144 | |
| 145 | $sql = "UPDATE $archiveTable SET value = " . ArchiveWriter::DONE_INVALIDATED |
| 146 | . " WHERE $nameCondition |
| 147 | AND idsite IN (" . implode(", ", $idSites) . ") |
| 148 | AND (" . implode(" OR ", $periodConditions) . ")"; |
| 149 | |
| 150 | return Db::query($sql, $bind); |
| 151 | } |
| 152 | |
| 153 | public function getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan) |
| 154 | { |
| 155 | $query = "SELECT idarchive FROM " . $archiveTable . " |
| 156 | WHERE name LIKE 'done%' |
| 157 | AND (( value = " . ArchiveWriter::DONE_OK_TEMPORARY . " |
| 158 | AND ts_archived < ?) |
| 159 | OR value = " . ArchiveWriter::DONE_ERROR . ")"; |
| 160 | |
| 161 | return Db::fetchAll($query, array($purgeArchivesOlderThan)); |
| 162 | } |
| 163 | |
| 164 | public function deleteArchivesWithPeriod($numericTable, $blobTable, $period, $date) |
| 165 | { |
| 166 | $query = "DELETE FROM %s WHERE period = ? AND ts_archived < ?"; |
| 167 | $bind = array($period, $date); |
| 168 | |
| 169 | $queryObj = Db::query(sprintf($query, $numericTable), $bind); |
| 170 | $deletedRows = $queryObj->rowCount(); |
| 171 | |
| 172 | try { |
| 173 | $queryObj = Db::query(sprintf($query, $blobTable), $bind); |
| 174 | $deletedRows += $queryObj->rowCount(); |
| 175 | } catch (Exception $e) { |
| 176 | // Individual blob tables could be missing |
| 177 | $this->logger->debug("Unable to delete archives by period from {blobTable}.", array( |
| 178 | 'blobTable' => $blobTable, |
| 179 | 'exception' => $e, |
| 180 | )); |
| 181 | } |
| 182 | |
| 183 | return $deletedRows; |
| 184 | } |
| 185 | |
| 186 | public function deleteArchiveIds($numericTable, $blobTable, $idsToDelete) |
| 187 | { |
| 188 | $idsToDelete = array_values($idsToDelete); |
| 189 | |
| 190 | $idsToDelete = array_map('intval', $idsToDelete); |
| 191 | $query = "DELETE FROM %s WHERE idarchive IN (" . implode(',', $idsToDelete) . ")"; |
| 192 | |
| 193 | $queryObj = Db::query(sprintf($query, $numericTable), array()); |
| 194 | $deletedRows = $queryObj->rowCount(); |
| 195 | |
| 196 | try { |
| 197 | $queryObj = Db::query(sprintf($query, $blobTable), array()); |
| 198 | $deletedRows += $queryObj->rowCount(); |
| 199 | } catch (Exception $e) { |
| 200 | // Individual blob tables could be missing |
| 201 | $this->logger->debug("Unable to delete archive IDs from {blobTable}.", array( |
| 202 | 'blobTable' => $blobTable, |
| 203 | 'exception' => $e, |
| 204 | )); |
| 205 | } |
| 206 | |
| 207 | return $deletedRows; |
| 208 | } |
| 209 | |
| 210 | public function getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, |
| 211 | $doneFlags, $doneFlagValues = null) |
| 212 | { |
| 213 | $bindSQL = array($idSite, |
| 214 | $dateStartIso, |
| 215 | $dateEndIso, |
| 216 | $period, |
| 217 | ); |
| 218 | |
| 219 | $sqlWhereArchiveName = self::getNameCondition($doneFlags, $doneFlagValues); |
| 220 | |
| 221 | $timeStampWhere = ''; |
| 222 | if ($minDatetimeIsoArchiveProcessedUTC) { |
| 223 | $timeStampWhere = " AND ts_archived >= ? "; |
| 224 | $bindSQL[] = $minDatetimeIsoArchiveProcessedUTC; |
| 225 | } |
| 226 | |
| 227 | // NOTE: we can't predict how many segments there will be so there could be lots of nb_visits/nb_visits_converted rows... have to select everything. |
| 228 | $sqlQuery = "SELECT idarchive, value, name, ts_archived, date1 as startDate FROM $numericTable |
| 229 | WHERE idsite = ? |
| 230 | AND date1 = ? |
| 231 | AND date2 = ? |
| 232 | AND period = ? |
| 233 | AND ( ($sqlWhereArchiveName) |
| 234 | OR name = '" . ArchiveSelector::NB_VISITS_RECORD_LOOKED_UP . "' |
| 235 | OR name = '" . ArchiveSelector::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "') |
| 236 | $timeStampWhere |
| 237 | ORDER BY ts_archived DESC, idarchive DESC"; |
| 238 | $results = Db::fetchAll($sqlQuery, $bindSQL); |
| 239 | |
| 240 | return $results; |
| 241 | } |
| 242 | |
| 243 | public function createArchiveTable($tableName, $tableNamePrefix) |
| 244 | { |
| 245 | $db = Db::get(); |
| 246 | $sql = DbHelper::getTableCreateSql($tableNamePrefix); |
| 247 | |
| 248 | // replace table name template by real name |
| 249 | $tableNamePrefix = Common::prefixTable($tableNamePrefix); |
| 250 | $sql = str_replace($tableNamePrefix, $tableName, $sql); |
| 251 | |
| 252 | try { |
| 253 | $db->query($sql); |
| 254 | } catch (Exception $e) { |
| 255 | // accept mysql error 1050: table already exists, throw otherwise |
| 256 | if (!$db->isErrNo($e, '1050')) { |
| 257 | throw $e; |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | try { |
| 262 | if (ArchiveTableCreator::NUMERIC_TABLE === ArchiveTableCreator::getTypeFromTableName($tableName)) { |
| 263 | $sequence = new Sequence($tableName); |
| 264 | $sequence->create(); |
| 265 | } |
| 266 | } catch (Exception $e) { |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | public function allocateNewArchiveId($numericTable) |
| 271 | { |
| 272 | $sequence = new Sequence($numericTable); |
| 273 | |
| 274 | try { |
| 275 | $idarchive = $sequence->getNextId(); |
| 276 | } catch (Exception $e) { |
| 277 | // edge case: sequence was not found, create it now |
| 278 | $sequence->create(); |
| 279 | |
| 280 | $idarchive = $sequence->getNextId(); |
| 281 | } |
| 282 | |
| 283 | return $idarchive; |
| 284 | } |
| 285 | |
| 286 | public function updateArchiveStatus($numericTable, $archiveId, $doneFlag, $value) |
| 287 | { |
| 288 | Db::query("UPDATE $numericTable SET `value` = ? WHERE idarchive = ? and `name` = ?", |
| 289 | array($value, $archiveId, $doneFlag) |
| 290 | ); |
| 291 | } |
| 292 | |
| 293 | public function insertRecord($tableName, $fields, $record, $name, $value) |
| 294 | { |
| 295 | // duplicate idarchives are Ignored, see https://github.com/piwik/piwik/issues/987 |
| 296 | $query = "INSERT IGNORE INTO " . $tableName . " (" . implode(", ", $fields) . ") |
| 297 | VALUES (?,?,?,?,?,?,?,?) ON DUPLICATE KEY UPDATE " . end($fields) . " = ?"; |
| 298 | |
| 299 | $bindSql = $record; |
| 300 | $bindSql[] = $name; |
| 301 | $bindSql[] = $value; |
| 302 | $bindSql[] = $value; |
| 303 | |
| 304 | Db::query($query, $bindSql); |
| 305 | |
| 306 | return true; |
| 307 | } |
| 308 | |
| 309 | /** |
| 310 | * Returns the site IDs for invalidated archives in an archive table. |
| 311 | * |
| 312 | * @param string $numericTable The numeric table to search through. |
| 313 | * @return int[] |
| 314 | */ |
| 315 | public function getSitesWithInvalidatedArchive($numericTable) |
| 316 | { |
| 317 | $rows = Db::fetchAll("SELECT DISTINCT idsite FROM `$numericTable` WHERE name LIKE 'done%' AND value = " . ArchiveWriter::DONE_INVALIDATED); |
| 318 | |
| 319 | $result = array(); |
| 320 | foreach ($rows as $row) { |
| 321 | $result[] = $row['idsite']; |
| 322 | } |
| 323 | return $result; |
| 324 | } |
| 325 | |
| 326 | /** |
| 327 | * Get a list of IDs of archives that don't have any matching rows in the site table. Excludes temporary archives |
| 328 | * that may still be in use, as specified by the $oldestToKeep passed in. |
| 329 | * @param string $archiveTableName |
| 330 | * @param string $oldestToKeep Datetime string |
| 331 | * @return array of IDs |
| 332 | */ |
| 333 | public function getArchiveIdsForDeletedSites($archiveTableName) |
| 334 | { |
| 335 | $sql = "SELECT DISTINCT idsite FROM " . $archiveTableName; |
| 336 | $rows = Db::getReader()->fetchAll($sql, array()); |
| 337 | |
| 338 | if (empty($rows)) { |
| 339 | return array(); // nothing to delete |
| 340 | } |
| 341 | |
| 342 | $idSitesUsed = array_column($rows, 'idsite'); |
| 343 | |
| 344 | $model = new \Piwik\Plugins\SitesManager\Model(); |
| 345 | $idSitesExisting = $model->getSitesId(); |
| 346 | |
| 347 | $deletedSites = array_diff($idSitesUsed, $idSitesExisting); |
| 348 | |
| 349 | if (empty($deletedSites)) { |
| 350 | return array(); |
| 351 | } |
| 352 | $deletedSites = array_values($deletedSites); |
| 353 | $deletedSites = array_map('intval', $deletedSites); |
| 354 | |
| 355 | $sql = "SELECT DISTINCT idarchive FROM " . $archiveTableName . " WHERE idsite IN (".implode(',',$deletedSites).")"; |
| 356 | |
| 357 | $rows = Db::getReader()->fetchAll($sql, array()); |
| 358 | |
| 359 | return array_column($rows, 'idarchive'); |
| 360 | } |
| 361 | |
| 362 | /** |
| 363 | * Get a list of IDs of archives with segments that no longer exist in the DB. Excludes temporary archives that |
| 364 | * may still be in use, as specified by the $oldestToKeep passed in. |
| 365 | * @param string $archiveTableName |
| 366 | * @param array $segments List of segments to match against |
| 367 | * @param string $oldestToKeep Datetime string |
| 368 | * @return array With keys idarchive, name, idsite |
| 369 | */ |
| 370 | public function getArchiveIdsForSegments($archiveTableName, array $segments, $oldestToKeep) |
| 371 | { |
| 372 | $segmentClauses = []; |
| 373 | foreach ($segments as $segment) { |
| 374 | if (!empty($segment['definition'])) { |
| 375 | $segmentClauses[] = $this->getDeletedSegmentWhereClause($segment); |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | if (empty($segmentClauses)) { |
| 380 | return array(); |
| 381 | } |
| 382 | |
| 383 | $segmentClauses = implode(' OR ', $segmentClauses); |
| 384 | |
| 385 | $sql = 'SELECT idarchive FROM ' . $archiveTableName |
| 386 | . ' WHERE ts_archived < ?' |
| 387 | . ' AND (' . $segmentClauses . ')'; |
| 388 | |
| 389 | $rows = Db::fetchAll($sql, array($oldestToKeep)); |
| 390 | |
| 391 | return array_column($rows, 'idarchive'); |
| 392 | } |
| 393 | |
| 394 | private function getDeletedSegmentWhereClause(array $segment) |
| 395 | { |
| 396 | $idSite = (int)$segment['enable_only_idsite']; |
| 397 | $segmentHash = Segment::getSegmentHash($segment['definition']); |
| 398 | // Valid segment hashes are md5 strings - just confirm that it is so it's safe for SQL injection |
| 399 | if (!ctype_xdigit($segmentHash)) { |
| 400 | throw new Exception($segment . ' expected to be an md5 hash'); |
| 401 | } |
| 402 | |
| 403 | $nameClause = 'name LIKE "done' . $segmentHash . '%"'; |
| 404 | $idSiteClause = ''; |
| 405 | if ($idSite > 0) { |
| 406 | $idSiteClause = ' AND idsite = ' . $idSite; |
| 407 | } elseif (! empty($segment['idsites_to_preserve'])) { |
| 408 | // A segment for all sites was deleted, but there are segments for a single site with the same definition |
| 409 | $idSitesToPreserve = array_map('intval', $segment['idsites_to_preserve']); |
| 410 | $idSiteClause = ' AND idsite NOT IN (' . implode(',', $idSitesToPreserve) . ')'; |
| 411 | } |
| 412 | |
| 413 | return "($nameClause $idSiteClause)"; |
| 414 | } |
| 415 | |
| 416 | /** |
| 417 | * Returns the SQL condition used to find successfully completed archives that |
| 418 | * this instance is querying for. |
| 419 | */ |
| 420 | private static function getNameCondition($doneFlags, $possibleValues) |
| 421 | { |
| 422 | $allDoneFlags = "'" . implode("','", $doneFlags) . "'"; |
| 423 | |
| 424 | // create the SQL to find archives that are DONE |
| 425 | $result = "((name IN ($allDoneFlags))"; |
| 426 | |
| 427 | if (!empty($possibleValues)) { |
| 428 | $result .= " AND (value IN (" . implode(',', $possibleValues) . ")))"; |
| 429 | } |
| 430 | $result .= ')'; |
| 431 | |
| 432 | return $result; |
| 433 | } |
| 434 | |
| 435 | } |
| 436 |