PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / trunk
Matomo Analytics – Powerful, Privacy-First Insights for WordPress vtrunk
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / DataAccess / Model.php
matomo / app / core / DataAccess Last commit date
LogQueryBuilder 3 months ago Actions.php 6 months ago ArchiveSelector.php 1 month ago ArchiveTableCreator.php 1 month ago ArchiveTableDao.php 1 month ago ArchiveWriter.php 1 month ago ArchivingDbAdapter.php 1 year ago LogAggregator.php 1 month ago LogQueryBuilder.php 6 months ago LogTableTemporary.php 2 years ago Model.php 1 month ago RawLogDao.php 6 months ago TableMetadata.php 1 year ago
Model.php
846 lines
1 <?php
2
3 /**
4 * Matomo - free/libre analytics platform
5 *
6 * @link https://matomo.org
7 * @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
8 */
9 namespace Piwik\DataAccess;
10
11 use Exception;
12 use Piwik\Archive\ArchiveInvalidator;
13 use Piwik\ArchiveProcessor\Parameters;
14 use Piwik\ArchiveProcessor\Rules;
15 use Piwik\Common;
16 use Piwik\Config\GeneralConfig;
17 use Piwik\Container\StaticContainer;
18 use Piwik\Date;
19 use Piwik\Db;
20 use Piwik\DbHelper;
21 use Piwik\Period;
22 use Piwik\Segment;
23 use Piwik\Sequence;
24 use Piwik\SettingsServer;
25 use Piwik\Site;
26 use Piwik\Log\LoggerInterface;
27 /**
28 * Cleans up outdated archives
29 */
30 class Model
31 {
32 /**
33 * @var LoggerInterface
34 */
35 private $logger;
36 public function __construct(?LoggerInterface $logger = null)
37 {
38 $this->logger = $logger ?: StaticContainer::get(LoggerInterface::class);
39 }
40 /**
41 * Returns the archives IDs that have already been invalidated and have been since re-processed.
42 *
43 * These archives { archive name (includes segment hash) , idsite, date, period } will be deleted.
44 *
45 * @param string $archiveTable
46 * @param bool $setGroupContentMaxLen for tests only
47 * @return array
48 * @throws Exception
49 */
50 public function getInvalidatedArchiveIdsSafeToDelete($archiveTable, $setGroupContentMaxLen = \true)
51 {
52 if ($setGroupContentMaxLen) {
53 try {
54 Db::get()->query('SET SESSION group_concat_max_len=' . 128 * 1024);
55 } catch (\Exception $ex) {
56 $this->logger->info("Could not set group_concat_max_len MySQL session variable.");
57 }
58 }
59 $sql = "SELECT idsite, date1, date2, period, name,\n GROUP_CONCAT(idarchive, '.', value ORDER BY ts_archived DESC, idarchive DESC) as archives\n FROM `{$archiveTable}`\n WHERE name LIKE 'done%'\n AND `value` NOT IN (" . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR . ", " . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR_INVALIDATED . ")\n GROUP BY idsite, date1, date2, period, name HAVING count(*) > 1";
60 $archiveIds = array();
61 $rows = Db::fetchAll($sql);
62 foreach ($rows as $row) {
63 $duplicateArchives = explode(',', $row['archives']);
64 // do not consider purging partial archives, if they are the latest archive,
65 // and we don't want to delete the latest archive if it is usable
66 while (!empty($duplicateArchives)) {
67 $pair = $duplicateArchives[0];
68 if ($this->isCutOffGroupConcatResult($pair)) {
69 // can occur if the GROUP_CONCAT value is cut off
70 break;
71 }
72 [$idarchive, $value] = explode('.', $pair);
73 array_shift($duplicateArchives);
74 if ($value != \Piwik\DataAccess\ArchiveWriter::DONE_PARTIAL) {
75 break;
76 }
77 }
78 // if there is more than one archive, the older invalidated ones can be deleted
79 if (!empty($duplicateArchives)) {
80 foreach ($duplicateArchives as $pair) {
81 if ($this->isCutOffGroupConcatResult($pair)) {
82 $this->logger->info("GROUP_CONCAT cut off the query result, you may have to purge archives again.");
83 break;
84 }
85 [$idarchive, $value] = explode('.', $pair);
86 $archiveIds[] = $idarchive;
87 // does not matter what the value is, the latest is usable so older archives can be purged
88 }
89 }
90 }
91 return $archiveIds;
92 }
93 public function updateArchiveAsInvalidated(?string $archiveTable, $idSites, $allPeriodsToInvalidate, ?Segment $segment = null, bool $forceInvalidateNonexistentRanges = \false, ?string $name = null, bool $doNotCreateInvalidations = \false)
94 {
95 if (empty($idSites)) {
96 return 0;
97 }
98 $periodCondition = '';
99 if (!empty($allPeriodsToInvalidate)) {
100 $periodCondition .= " AND (";
101 $isFirst = \true;
102 /** @var Period $period */
103 foreach ($allPeriodsToInvalidate as $period) {
104 if ($isFirst) {
105 $isFirst = \false;
106 } else {
107 $periodCondition .= " OR ";
108 }
109 if ($period->getLabel() == 'range') {
110 // for ranges, we delete all ranges that contain the given date(s)
111 $periodCondition .= "(period = " . (int) $period->getId() . " AND date2 >= '" . $period->getDateStart()->getDatetime() . "' AND date1 <= '" . $period->getDateEnd()->getDatetime() . "')";
112 } else {
113 $periodCondition .= "(period = " . (int) $period->getId() . " AND date1 = '" . $period->getDateStart()->getDatetime() . "'" . " AND date2 = '" . $period->getDateEnd()->getDatetime() . "')";
114 }
115 }
116 $periodCondition .= ")";
117 }
118 if (!empty($name)) {
119 if (strpos($name, '.') !== \false) {
120 [$plugin, $name] = explode('.', $name, 2);
121 } else {
122 $plugin = $name;
123 $name = null;
124 }
125 }
126 $doneFlag = Rules::getDoneFlagArchiveContainsAllPlugins($segment ?: new Segment('', []));
127 if (empty($plugin)) {
128 if (null === $segment) {
129 $nameCondition = "name LIKE '{$doneFlag}%'";
130 // invalidate all segments
131 } else {
132 $nameCondition = "(name = '{$doneFlag}' OR name LIKE '{$doneFlag}.%')";
133 // invalidate specific segment only
134 }
135 } else {
136 if (null === $segment) {
137 $nameCondition = "name LIKE '{$doneFlag}%.{$plugin}'";
138 // invalidate all segments for specific plugin
139 } else {
140 $nameCondition = "name = '{$doneFlag}.{$plugin}'";
141 // invalidate specific segment for specific plugin only
142 }
143 }
144 $idArchives = [];
145 $archivesToInvalidate = [];
146 // update each archive as invalidated (but only for full archives or plugin archives, not for partial archives.
147 // DONE_INVALIDATED also implies that an archive is whole and not partial, and we want to avoid that.)
148 if (!empty($archiveTable) && empty($name)) {
149 // select all idarchive/name pairs we want to invalidate
150 $sql = "SELECT idarchive, idsite, period, date1, date2, `name`, `value`\n FROM `{$archiveTable}`\n WHERE idsite IN (" . implode(',', $idSites) . ") AND value <> " . \Piwik\DataAccess\ArchiveWriter::DONE_PARTIAL . $periodCondition . " AND {$nameCondition}";
151 $archivesToInvalidate = Db::fetchAll($sql);
152 $idArchives = array_column($archivesToInvalidate, 'idarchive');
153 if (!empty($idArchives)) {
154 $idArchives = array_map('intval', $idArchives);
155 // set status to DONE_INVALIDATED for finished archives
156 $sql = "UPDATE `{$archiveTable}` SET `value` = " . \Piwik\DataAccess\ArchiveWriter::DONE_INVALIDATED . " WHERE idarchive IN (" . implode(',', $idArchives) . ") AND value NOT IN (" . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR . ", " . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR_INVALIDATED . ") AND {$nameCondition}";
157 Db::query($sql);
158 // set status to DONE_ERROR_INVALIDATED for currently processed archives
159 $sql = "UPDATE `{$archiveTable}` SET `value` = " . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR_INVALIDATED . " WHERE idarchive IN (" . implode(',', $idArchives) . ") AND value = " . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR . " AND {$nameCondition}";
160 Db::query($sql);
161 }
162 }
163 if ($doNotCreateInvalidations) {
164 return count($idArchives);
165 }
166 // we add every archive we need to invalidate + the archives that do not already exist to archive_invalidations.
167 // except for archives that are DONE_IN_PROGRESS.
168 $archivesToCreateInvalidationRowsFor = [];
169 foreach ($archivesToInvalidate as $row) {
170 $archivesToCreateInvalidationRowsFor[$row['idsite']][$row['period']][$row['date1']][$row['date2']][$row['name']] = $row['idarchive'];
171 }
172 $now = Date::now()->getDatetime();
173 $existingInvalidations = $this->getExistingInvalidations($idSites, $periodCondition, $nameCondition);
174 $hashesOfAllSegmentsToArchiveInCoreArchive = Rules::getSegmentsToProcess($idSites);
175 $hashesOfAllSegmentsToArchiveInCoreArchive = array_map(function ($definition) {
176 return Segment::getSegmentHash($definition);
177 }, $hashesOfAllSegmentsToArchiveInCoreArchive);
178 if (empty($plugin)) {
179 $doneFlag = Rules::getDoneFlagArchiveContainsAllPlugins($segment ?: new Segment('', []));
180 } else {
181 $doneFlag = Rules::getDoneFlagArchiveContainsOnePlugin($segment ?: new Segment('', []), $plugin);
182 }
183 $dummyArchives = [];
184 foreach ($idSites as $idSite) {
185 try {
186 $siteCreationTime = Site::getCreationDateFor($idSite);
187 } catch (\Exception $ex) {
188 continue;
189 }
190 $siteCreationTime = Date::factory($siteCreationTime);
191 foreach ($allPeriodsToInvalidate as $period) {
192 if ($period->getLabel() == 'range' && !$forceInvalidateNonexistentRanges) {
193 continue;
194 // range
195 }
196 if ($period->getDateEnd()->isEarlier($siteCreationTime)) {
197 continue;
198 // don't add entries if it is before the time the site was created
199 }
200 $date1 = $period->getDateStart()->toString();
201 $date2 = $period->getDateEnd()->toString();
202 // we insert rows for the doneFlag we want to invalidate + any others we invalidated when doing the LIKE above.
203 // if we invalidated something in the archive tables, we want to make sure it appears in the invalidation queue,
204 // so we'll eventually reprocess it.
205 $doneFlagsFound = $archivesToCreateInvalidationRowsFor[$idSite][$period->getId()][$date1][$date2] ?? [];
206 $doneFlagsFound = array_keys($doneFlagsFound);
207 $doneFlagsToCheck = array_merge([$doneFlag], $doneFlagsFound);
208 $doneFlagsToCheck = array_unique($doneFlagsToCheck);
209 foreach ($doneFlagsToCheck as $doneFlagToCheck) {
210 $key = $this->makeExistingInvalidationArrayKey($idSite, $date1, $date2, $period->getId(), $doneFlagToCheck, $name);
211 if (!empty($existingInvalidations[$key])) {
212 continue;
213 // avoid adding duplicates where possible
214 }
215 $hash = $this->getHashFromDoneFlag($doneFlagToCheck);
216 if ($doneFlagToCheck != $doneFlag && (empty($hash) || !in_array($hash, $hashesOfAllSegmentsToArchiveInCoreArchive) || strpos($doneFlagToCheck, '.') !== \false)) {
217 continue;
218 // the done flag is for a segment that is not auto archive or a plugin specific archive, so we don't want to process it.
219 }
220 $idArchive = $archivesToCreateInvalidationRowsFor[$idSite][$period->getId()][$date1][$date2][$doneFlagToCheck] ?? null;
221 $dummyArchives[] = ['idarchive' => $idArchive, 'name' => $doneFlagToCheck, 'report' => $name, 'idsite' => $idSite, 'date1' => $period->getDateStart()->getDatetime(), 'date2' => $period->getDateEnd()->getDatetime(), 'period' => $period->getId(), 'ts_invalidated' => $now];
222 }
223 }
224 }
225 if (!empty($dummyArchives)) {
226 $fields = ['idarchive', 'name', 'report', 'idsite', 'date1', 'date2', 'period', 'ts_invalidated'];
227 Db\BatchInsert::tableInsertBatch(Common::prefixTable('archive_invalidations'), $fields, $dummyArchives);
228 }
229 return count($idArchives);
230 }
231 private function getExistingInvalidations($idSites, $periodCondition, $nameCondition)
232 {
233 $table = Common::prefixTable('archive_invalidations');
234 $idSites = array_map('intval', $idSites);
235 $sql = "SELECT idsite, date1, date2, period, name, report, COUNT(*) as `count` FROM `{$table}`\n WHERE idsite IN (" . implode(',', $idSites) . ") AND status = " . ArchiveInvalidator::INVALIDATION_STATUS_QUEUED . "\n {$periodCondition} AND {$nameCondition}\n GROUP BY idsite, date1, date2, period, name";
236 $rows = Db::fetchAll($sql);
237 $invalidations = [];
238 foreach ($rows as $row) {
239 $key = $this->makeExistingInvalidationArrayKey($row['idsite'], $row['date1'], $row['date2'], $row['period'], $row['name'], $row['report']);
240 $invalidations[$key] = $row['count'];
241 }
242 return $invalidations;
243 }
244 private function makeExistingInvalidationArrayKey($idSite, $date1, $date2, $period, $name, $report)
245 {
246 return implode('.', [$idSite, $date1, $date2, $period, $name, $report]);
247 }
248 /**
249 * @param string $archiveTable Prefixed table name
250 * @param int[] $idSites
251 * @param Period[] $allPeriodsToInvalidate
252 * @throws Exception
253 */
254 public function updateRangeArchiveAsInvalidated($archiveTable, $idSites, $allPeriodsToInvalidate, ?Segment $segment = null) : void
255 {
256 if (empty($idSites)) {
257 return;
258 }
259 $bind = array();
260 $periodConditions = array();
261 if (!empty($allPeriodsToInvalidate)) {
262 foreach ($allPeriodsToInvalidate as $period) {
263 $dateConditions = array();
264 $dateConditions[] = "(date1 <= ? AND ? <= date2)";
265 $bind[] = $period->getDateStart()->getDatetime();
266 $bind[] = $period->getDateEnd()->getDatetime();
267 $dateConditionsSql = implode(" OR ", $dateConditions);
268 $periodConditions[] = "(period = 5 AND ({$dateConditionsSql}))";
269 }
270 }
271 if (null === $segment) {
272 $nameCondition = "name LIKE 'done%'";
273 } else {
274 $doneFlag = Rules::getDoneFlagArchiveContainsAllPlugins($segment);
275 $nameCondition = "(name = '{$doneFlag}' OR name LIKE '{$doneFlag}.%')";
276 }
277 $sql = "SELECT idarchive FROM `{$archiveTable}` " . " WHERE {$nameCondition}\n AND idsite IN (" . implode(", ", $idSites) . ")\n AND (" . implode(" OR ", $periodConditions) . ")";
278 $recordsToUpdate = Db::fetchAll($sql, $bind);
279 if (empty($recordsToUpdate)) {
280 return;
281 }
282 $idArchives = array_map('intval', array_column($recordsToUpdate, 'idarchive'));
283 $updateSql = "UPDATE `{$archiveTable}` SET value = " . \Piwik\DataAccess\ArchiveWriter::DONE_INVALIDATED . " WHERE idarchive IN (" . implode(', ', $idArchives) . ") AND {$nameCondition}" . " AND value NOT IN (" . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR . ", " . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR_INVALIDATED . ")";
284 Db::query($updateSql);
285 $updateSql = "UPDATE `{$archiveTable}` SET value = " . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR_INVALIDATED . " WHERE idarchive IN (" . implode(', ', $idArchives) . ") AND {$nameCondition} AND value = " . \Piwik\DataAccess\ArchiveWriter::DONE_ERROR;
286 Db::query($updateSql);
287 }
288 public function getTemporaryArchivesOlderThan($archiveTable, $purgeArchivesOlderThan)
289 {
290 $temporaryArchiveValues = [\Piwik\DataAccess\ArchiveWriter::DONE_OK_TEMPORARY, \Piwik\DataAccess\ArchiveWriter::DONE_ERROR, \Piwik\DataAccess\ArchiveWriter::DONE_ERROR_INVALIDATED];
291 $query = "SELECT idarchive FROM `{$archiveTable}`\n WHERE name LIKE 'done%'\n AND ts_archived < ?\n AND value IN (" . implode(', ', $temporaryArchiveValues) . ")";
292 return Db::fetchAll($query, array($purgeArchivesOlderThan));
293 }
294 public function getArchivesMissingDoneFlag(string $archiveTable) : array
295 {
296 $query = "SELECT DISTINCT idarchive\n FROM `{$archiveTable}`\n WHERE idarchive NOT IN (\n SELECT DISTINCT idarchive\n FROM `{$archiveTable}`\n WHERE name LIKE 'done%'\n )";
297 return Db::fetchAll($query);
298 }
299 public function deleteArchivesWithPeriod($numericTable, ?string $blobTable, $period, $date)
300 {
301 if (SettingsServer::isArchivePhpTriggered()) {
302 StaticContainer::get(LoggerInterface::class)->info('deleteArchivesWithPeriod: ' . $numericTable . ' with period = ' . $period . ' and date = ' . $date);
303 }
304 $query = "DELETE FROM `%s` WHERE period = ? AND ts_archived < ?";
305 $bind = array($period, $date);
306 $queryObj = Db::query(sprintf($query, $numericTable), $bind);
307 $deletedRows = $queryObj->rowCount();
308 if (!empty($blobTable)) {
309 try {
310 $queryObj = Db::query(sprintf($query, $blobTable), $bind);
311 $deletedRows += $queryObj->rowCount();
312 } catch (Exception $e) {
313 // Individual blob tables could be missing
314 $this->logger->debug("Unable to delete archives by period from {blobTable}.", array('blobTable' => $blobTable, 'exception' => $e));
315 }
316 }
317 return $deletedRows;
318 }
319 public function deleteArchiveIds($numericTable, ?string $blobTable, $idsToDelete)
320 {
321 $idsToDelete = array_values($idsToDelete);
322 $idsToDelete = array_map('intval', $idsToDelete);
323 $query = "DELETE FROM `%s` WHERE idarchive IN (" . implode(',', $idsToDelete) . ")";
324 $queryObj = Db::query(sprintf($query, $numericTable), array());
325 $deletedRows = $queryObj->rowCount();
326 if (!empty($blobTable)) {
327 try {
328 $queryObj = Db::query(sprintf($query, $blobTable), array());
329 $deletedRows += $queryObj->rowCount();
330 } catch (Exception $e) {
331 // Individual blob tables could be missing
332 $this->logger->debug("Unable to delete archive IDs from {blobTable}.", array('blobTable' => $blobTable, 'exception' => $e));
333 }
334 }
335 return $deletedRows;
336 }
337 public function deleteOlderArchives(Parameters $params, $name, $tsArchived, $idArchive)
338 {
339 $dateStart = $params->getPeriod()->getDateStart();
340 $dateEnd = $params->getPeriod()->getDateEnd();
341 $numericTable = \Piwik\DataAccess\ArchiveTableCreator::getNumericTable($dateStart, \false);
342 if (empty($numericTable)) {
343 return;
344 }
345 $blobTable = \Piwik\DataAccess\ArchiveTableCreator::getBlobTable($dateStart, \false);
346 $sql = "SELECT idarchive FROM `{$numericTable}` WHERE idsite = ? AND date1 = ? AND date2 = ? AND period = ? AND name = ? AND ts_archived <= ? AND idarchive < ?";
347 $idArchives = Db::fetchAll($sql, [$params->getSite()->getId(), $dateStart->getDatetime(), $dateEnd->getDatetime(), $params->getPeriod()->getId(), $name, $tsArchived, $idArchive]);
348 $idArchives = array_column($idArchives, 'idarchive');
349 if (empty($idArchives)) {
350 return;
351 }
352 if (SettingsServer::isArchivePhpTriggered()) {
353 StaticContainer::get(LoggerInterface::class)->info('deleteOlderArchives with ' . $params . ', name = ' . $name . ', ts_archived < ' . $tsArchived . ', idarchive < ' . $idArchive);
354 }
355 $this->deleteArchiveIds($numericTable, $blobTable, $idArchives);
356 }
357 public function getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, $minDatetimeIsoArchiveProcessedUTC, $doneFlags, $doneFlagValues = null)
358 {
359 $bindSQL = array($idSite, $dateStartIso, $dateEndIso, $period);
360 $sqlWhereArchiveName = self::getNameCondition($doneFlags, $doneFlagValues);
361 $timeStampWhere = '';
362 if ($minDatetimeIsoArchiveProcessedUTC) {
363 $timeStampWhere = " AND arc1.ts_archived >= ? ";
364 $bindSQL[] = $minDatetimeIsoArchiveProcessedUTC;
365 }
366 // NOTE: we can't predict how many segments there will be so there could be lots of nb_visits/nb_visits_converted rows... have to select everything.
367 $sqlQuery = "SELECT arc1.idarchive, arc1.value, arc1.name, arc1.ts_archived, arc1.date1 as startDate, arc2.value as " . \Piwik\DataAccess\ArchiveSelector::NB_VISITS_RECORD_LOOKED_UP . ", arc3.value as " . \Piwik\DataAccess\ArchiveSelector::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "\n FROM `{$numericTable}` arc1\n LEFT JOIN `{$numericTable}` arc2 on arc2.idarchive = arc1.idarchive and (arc2.name = '" . \Piwik\DataAccess\ArchiveSelector::NB_VISITS_RECORD_LOOKED_UP . "')\n LEFT JOIN `{$numericTable}` arc3 on arc3.idarchive = arc1.idarchive and (arc3.name = '" . \Piwik\DataAccess\ArchiveSelector::NB_VISITS_CONVERTED_RECORD_LOOKED_UP . "')\n WHERE arc1.idsite = ?\n AND arc1.date1 = ?\n AND arc1.date2 = ?\n AND arc1.period = ?\n AND ({$sqlWhereArchiveName})\n {$timeStampWhere}\n ORDER BY arc1.ts_archived DESC, arc1.idarchive DESC";
368 $results = Db::fetchAll($sqlQuery, $bindSQL);
369 return $results;
370 }
371 public function createArchiveTable($tableName, $tableNamePrefix)
372 {
373 $db = Db::get();
374 $sql = DbHelper::getTableCreateSql($tableNamePrefix);
375 // replace table name template by real name
376 $tableNamePrefix = Common::prefixTable($tableNamePrefix);
377 $sql = str_replace($tableNamePrefix, $tableName, $sql);
378 try {
379 $db->query($sql);
380 } catch (Exception $e) {
381 // accept mysql error 1050: table already exists, throw otherwise
382 if (!$db->isErrNo($e, '1050')) {
383 throw $e;
384 }
385 }
386 try {
387 if (\Piwik\DataAccess\ArchiveTableCreator::NUMERIC_TABLE === \Piwik\DataAccess\ArchiveTableCreator::getTypeFromTableName($tableName)) {
388 $sequence = new Sequence($tableName);
389 $sequence->create();
390 }
391 } catch (Exception $e) {
392 }
393 }
394 public function getInstalledArchiveTables()
395 {
396 $allArchiveNumeric = Db::get()->fetchCol("SHOW TABLES LIKE '" . Common::prefixTable('archive_numeric%') . "'");
397 $allArchiveBlob = Db::get()->fetchCol("SHOW TABLES LIKE '" . Common::prefixTable('archive_blob%') . "'");
398 return array_merge($allArchiveBlob, $allArchiveNumeric);
399 }
400 public function allocateNewArchiveId($numericTable)
401 {
402 $sequence = new Sequence($numericTable);
403 try {
404 $idarchive = $sequence->getNextId();
405 } catch (Exception $e) {
406 // edge case: sequence was not found, create it now
407 try {
408 $sequence->create();
409 } catch (Exception $ex) {
410 // Ignore duplicate entry error, as that means another request might have already created the sequence
411 if (!Db::get()->isErrNo($ex, \Piwik\Updater\Migration\Db::ERROR_CODE_DUPLICATE_ENTRY)) {
412 throw $ex;
413 }
414 }
415 $idarchive = $sequence->getNextId();
416 }
417 return $idarchive;
418 }
419 public function updateArchiveStatus($numericTable, $archiveId, $doneFlag, $value)
420 {
421 Db::query("UPDATE {$numericTable} SET `value` = ? WHERE idarchive = ? and `name` = ?", array($value, $archiveId, $doneFlag));
422 }
423 public function getArchiveStatus($numericTable, $archiveId, $doneFlag) : int
424 {
425 return (int) Db::fetchOne("SELECT value FROM `{$numericTable}` WHERE idarchive = ? AND `name` = ?", [$archiveId, $doneFlag]);
426 }
427 public function insertRecord($tableName, $fields, $record, $name, $value)
428 {
429 // duplicate idarchives are Ignored, see https://github.com/piwik/piwik/issues/987
430 $query = "INSERT IGNORE INTO `{$tableName}` (" . implode(", ", $fields) . ")\n VALUES (?,?,?,?,?,?,?,?) ON DUPLICATE KEY UPDATE " . end($fields) . " = ?";
431 $bindSql = $record;
432 $bindSql[] = $name;
433 $bindSql[] = $value;
434 $bindSql[] = $value;
435 Db::query($query, $bindSql);
436 return \true;
437 }
438 /**
439 * Returns the site IDs for invalidated archives in an archive table.
440 *
441 * @param string $numericTable The numeric table to search through.
442 * @return int[]
443 */
444 public function getSitesWithInvalidatedArchive($numericTable)
445 {
446 $rows = Db::fetchAll("SELECT DISTINCT idsite FROM `{$numericTable}` WHERE `name` LIKE 'done%' AND `value` IN (" . \Piwik\DataAccess\ArchiveWriter::DONE_INVALIDATED . ")");
447 $result = array();
448 foreach ($rows as $row) {
449 $result[] = $row['idsite'];
450 }
451 return $result;
452 }
453 /**
454 * Get a list of IDs of archives that don't have any matching rows in the site table.
455 * @param string $archiveTableName
456 * @return array of IDs
457 */
458 public function getArchiveIdsForDeletedSites($archiveTableName)
459 {
460 $sql = "SELECT DISTINCT idsite FROM `{$archiveTableName}`";
461 $rows = Db::getReader()->fetchAll($sql, array());
462 if (empty($rows)) {
463 return array();
464 // nothing to delete
465 }
466 $idSitesUsed = array_column($rows, 'idsite');
467 $model = new \Piwik\Plugins\SitesManager\Model();
468 $idSitesExisting = $model->getSitesId();
469 $deletedSites = array_diff($idSitesUsed, $idSitesExisting);
470 if (empty($deletedSites)) {
471 return array();
472 }
473 $deletedSites = array_values($deletedSites);
474 $deletedSites = array_map('intval', $deletedSites);
475 $sql = "SELECT DISTINCT idarchive FROM `{$archiveTableName}` WHERE idsite IN (" . implode(',', $deletedSites) . ")";
476 $rows = Db::getReader()->fetchAll($sql, array());
477 return array_column($rows, 'idarchive');
478 }
479 /**
480 * Get a list of IDs of archives with segments that no longer exist in the DB. Excludes temporary archives that
481 * may still be in use, as specified by the $oldestToKeep passed in.
482 * @param string $archiveTableName
483 * @param array $segments List of segments to match against
484 * @param string $oldestToKeep Datetime string
485 * @return array With keys idarchive, name, idsite
486 */
487 public function getArchiveIdsForSegments($archiveTableName, array $segments, $oldestToKeep)
488 {
489 $segmentClauses = [];
490 foreach ($segments as $segment) {
491 if (!empty($segment['definition'])) {
492 $segmentClauses[] = $this->getDeletedSegmentWhereClause($segment);
493 }
494 }
495 if (empty($segmentClauses)) {
496 return array();
497 }
498 $segmentClauses = implode(' OR ', $segmentClauses);
499 $sql = 'SELECT idarchive FROM `' . $archiveTableName . '`' . ' WHERE ts_archived < ?' . ' AND (' . $segmentClauses . ')';
500 $rows = Db::fetchAll($sql, array($oldestToKeep));
501 return array_column($rows, 'idarchive');
502 }
503 private function getDeletedSegmentWhereClause(array $segment)
504 {
505 $idSite = (int) $segment['enable_only_idsite'];
506 $segmentHash = $segment['hash'] ?? '';
507 // Valid segment hashes are md5 strings - just confirm that it is so it's safe for SQL injection
508 if (!ctype_xdigit($segmentHash)) {
509 throw new Exception($segmentHash . ' expected to be an md5 hash');
510 }
511 $nameClause = 'name LIKE "done' . $segmentHash . '%"';
512 $idSiteClause = '';
513 if ($idSite > 0) {
514 $idSiteClause = ' AND idsite = ' . $idSite;
515 } elseif (!empty($segment['idsites_to_preserve'])) {
516 // A segment for all sites was deleted, but there are segments for a single site with the same definition
517 $idSitesToPreserve = array_map('intval', $segment['idsites_to_preserve']);
518 $idSiteClause = ' AND idsite NOT IN (' . implode(',', $idSitesToPreserve) . ')';
519 }
520 return "({$nameClause} {$idSiteClause})";
521 }
522 /**
523 * Returns the SQL condition used to find successfully completed archives that
524 * this instance is querying for.
525 */
526 private static function getNameCondition($doneFlags, $possibleValues)
527 {
528 $allDoneFlags = "'" . implode("','", $doneFlags) . "'";
529 // create the SQL to find archives that are DONE
530 $result = "((arc1.name IN ({$allDoneFlags}))";
531 if (!empty($possibleValues)) {
532 $result .= " AND (arc1.value IN (" . implode(',', $possibleValues) . ")))";
533 }
534 $result .= ')';
535 return $result;
536 }
537 /**
538 * Marks an archive as in progress if it has not been already. This method must be thread
539 * safe.
540 */
541 public function startArchive($invalidation)
542 {
543 $table = Common::prefixTable('archive_invalidations');
544 // set archive value to in progress if not set already
545 $statement = Db::query("UPDATE `{$table}` SET `status` = ?, `processing_host` = ?, `process_id` = ?, `ts_started` = NOW() WHERE `idinvalidation` = ? AND `status` = ?", [ArchiveInvalidator::INVALIDATION_STATUS_IN_PROGRESS, gethostname() ?: null, Common::getProcessId(), $invalidation['idinvalidation'], ArchiveInvalidator::INVALIDATION_STATUS_QUEUED]);
546 // if we updated, then we've marked the archive as started
547 return $statement->rowCount() > 0;
548 }
549 public function isSimilarArchiveInProgress($invalidation)
550 {
551 $table = Common::prefixTable('archive_invalidations');
552 $bind = [$invalidation['idsite'], $invalidation['period'], $invalidation['date1'], $invalidation['date2'], $invalidation['name'], ArchiveInvalidator::INVALIDATION_STATUS_IN_PROGRESS];
553 if (empty($invalidation['report'])) {
554 $reportClause = "(report IS NULL OR report = '')";
555 } else {
556 $reportClause = "report = ?";
557 $bind[] = $invalidation['report'];
558 }
559 $sql = "SELECT idinvalidation FROM `{$table}` WHERE idsite = ? AND `period` = ? AND date1 = ? AND date2 = ? AND `name` = ? AND `status` = ? AND ts_started IS NOT NULL AND {$reportClause} LIMIT 1";
560 $result = Db::fetchOne($sql, $bind);
561 return !empty($result);
562 }
563 public function getInvalidationsInProgress(array $idSites = [], array $processingHosts = [], ?Date $startTime = null, ?Date $endTime = null) : array
564 {
565 $table = Common::prefixTable('archive_invalidations');
566 $bind = [ArchiveInvalidator::INVALIDATION_STATUS_IN_PROGRESS];
567 $whereConditions = '';
568 if (!empty($processingHosts)) {
569 $whereConditions .= sprintf(' AND `processing_host` IN (%1$s)', Common::getSqlStringFieldsArray($processingHosts));
570 $bind = array_merge($bind, $processingHosts);
571 }
572 if (!empty($idSites)) {
573 $whereConditions .= sprintf(' AND `idsite` IN (' . implode(', ', $idSites) . ')');
574 }
575 if (!empty($startTime)) {
576 $whereConditions .= ' AND `ts_started` > ?';
577 $bind[] = $startTime->toString('Y-m-d H:i:s');
578 }
579 if (!empty($endTime)) {
580 $whereConditions .= ' AND `ts_started` < ?';
581 $bind[] = $endTime->toString('Y-m-d H:i:s');
582 }
583 $sql = "SELECT idinvalidation, idsite, period, date1, date2, name, report, ts_invalidated, ts_started, processing_host, process_id FROM `{$table}` WHERE `status` = ? {$whereConditions} AND ts_started IS NOT NULL ORDER BY ts_started ASC";
584 return Db::fetchAll($sql, $bind);
585 }
586 /**
587 * Gets the next invalidated archive that should be archived in a table.
588 *
589 * @param int $idSite
590 * @param string $archivingStartTime
591 * @param int[]|null $idInvalidationsToExclude
592 * @param bool $useLimit Whether to limit the result set to one result or not. Used in tests only.
593 */
594 public function getNextInvalidatedArchive($idSite, $archivingStartTime, $idInvalidationsToExclude = null, $useLimit = \true)
595 {
596 $table = Common::prefixTable('archive_invalidations');
597 $sql = "SELECT *\n FROM `{$table}`\n WHERE idsite = ? AND status != ? AND ts_invalidated <= ?";
598 $bind = [$idSite, ArchiveInvalidator::INVALIDATION_STATUS_IN_PROGRESS, $archivingStartTime];
599 if (!empty($idInvalidationsToExclude)) {
600 $idInvalidationsToExclude = array_map('intval', $idInvalidationsToExclude);
601 $sql .= " AND idinvalidation NOT IN (" . implode(',', $idInvalidationsToExclude) . ')';
602 }
603 // NOTE: order here is very important to ensure we process lower period archives first, and general 'all' archives before
604 // segment archives, and so we use the latest idinvalidation
605 $sql .= " ORDER BY date1 DESC, period ASC, CHAR_LENGTH(name) ASC, idinvalidation DESC";
606 if ($useLimit) {
607 $sql .= " LIMIT 1";
608 return Db::fetchRow($sql, $bind);
609 } else {
610 return Db::fetchAll($sql, $bind);
611 }
612 }
613 public function deleteInvalidations($archiveInvalidations)
614 {
615 $ids = array_column($archiveInvalidations, 'idinvalidation');
616 $ids = array_map('intval', $ids);
617 $table = Common::prefixTable('archive_invalidations');
618 $sql = "DELETE FROM `{$table}` WHERE idinvalidation IN (" . implode(', ', $ids) . ")";
619 Db::query($sql);
620 }
621 public function removeInvalidationsLike($idSite, $start)
622 {
623 $idSitesClause = $this->getRemoveInvalidationsIdSitesClause($idSite);
624 $table = Common::prefixTable('archive_invalidations');
625 $sql = "DELETE FROM `{$table}` WHERE {$idSitesClause} `name` LIKE ?";
626 Db::query($sql, ['done%.' . str_replace('_', "\\_", $start)]);
627 }
628 public function removeInvalidations($idSite, $plugin, $report)
629 {
630 $idSitesClause = $this->getRemoveInvalidationsIdSitesClause($idSite);
631 $table = Common::prefixTable('archive_invalidations');
632 $sql = "DELETE FROM `{$table}` WHERE {$idSitesClause} `name` LIKE ? AND report = ?";
633 Db::query($sql, ['done%.' . str_replace('_', "\\_", $plugin), $report]);
634 }
635 public function isArchiveAlreadyInProgress($invalidatedArchive)
636 {
637 $table = Common::prefixTable('archive_invalidations');
638 $bind = [$invalidatedArchive['idsite'], $invalidatedArchive['date1'], $invalidatedArchive['date2'], $invalidatedArchive['period'], $invalidatedArchive['name']];
639 $reportClause = "(report = '' OR report IS NULL)";
640 if (!empty($invalidatedArchive['report'])) {
641 $reportClause = "report = ?";
642 $bind[] = $invalidatedArchive['report'];
643 }
644 $sql = "SELECT MAX(idinvalidation) FROM `{$table}` WHERE idsite = ? AND date1 = ? AND date2 = ? AND `period` = ? AND `name` = ? AND status = 1 AND {$reportClause}";
645 $inProgressInvalidation = Db::fetchOne($sql, $bind);
646 return $inProgressInvalidation;
647 }
648 /**
649 * Returns true if there is an archive that exists that can be used when aggregating an archive for $period.
650 *
651 * @param $idSite
652 * @return bool
653 * @throws Exception
654 */
655 public function hasChildArchivesInPeriod($idSite, Period $period)
656 {
657 $date = $period->getDateStart();
658 while ($date->isEarlier($period->getDateEnd()->addPeriod(1, 'month'))) {
659 $archiveTable = \Piwik\DataAccess\ArchiveTableCreator::getNumericTable($date, \false);
660 if (empty($archiveTable)) {
661 $date = $date->addPeriod(1, 'month');
662 // move to next archive table
663 continue;
664 }
665 // we look for any archive that can be used to compute this one. this includes invalidated archives, since it is possible
666 // under certain circumstances for them to exist, when archiving a higher period that includes them. the main example being
667 // the GoogleAnalyticsImporter which disallows the recomputation of invalidated archives for imported data, since that would
668 // essentially get rid of the imported data.
669 $usableDoneFlags = [\Piwik\DataAccess\ArchiveWriter::DONE_OK, \Piwik\DataAccess\ArchiveWriter::DONE_INVALIDATED, \Piwik\DataAccess\ArchiveWriter::DONE_PARTIAL, \Piwik\DataAccess\ArchiveWriter::DONE_OK_TEMPORARY];
670 $sql = "SELECT idarchive\n FROM `{$archiveTable}`\n WHERE idsite = ? AND date1 >= ? AND date2 <= ? AND period < ? AND `name` LIKE 'done%' AND `value` IN (" . implode(', ', $usableDoneFlags) . ")\n LIMIT 1";
671 $bind = [$idSite, $period->getDateStart()->getDatetime(), $period->getDateEnd()->getDatetime(), $period->getId()];
672 $result = (bool) Db::fetchOne($sql, $bind);
673 if ($result) {
674 return \true;
675 }
676 $date = $date->addPeriod(1, 'month');
677 // move to next archive table
678 }
679 return \false;
680 }
681 /**
682 * Returns true if any invalidations exists for the given
683 * $idsite and $doneFlag (name column) for the $period.
684 *
685 * @param mixed $idSite
686 * @param mixed $doneFlag
687 * @param mixed $report
688 * @return bool
689 * @throws Exception
690 */
691 public function hasInvalidationForPeriodAndName($idSite, Period $period, $doneFlag, $report = null)
692 {
693 $table = Common::prefixTable('archive_invalidations');
694 $report = !empty($report) && !is_array($report) ? [$report] : $report;
695 if (empty($report)) {
696 $sql = "SELECT idinvalidation FROM `{$table}` WHERE idsite = ? AND date1 = ? AND date2 = ? AND `period` = ? AND `name` = ? AND `report` IS NULL LIMIT 1";
697 } else {
698 $sql = "SELECT idinvalidation FROM `{$table}` WHERE idsite = ? AND date1 = ? AND date2 = ? AND `period` = ? AND `name` = ? AND `report` IN (" . Common::getSqlStringFieldsArray($report) . ") LIMIT 1";
699 }
700 $bind = [$idSite, $period->getDateStart()->toString(), $period->getDateEnd()->toString(), $period->getId(), $doneFlag];
701 if (!empty($report)) {
702 $bind = array_merge($bind, $report);
703 }
704 $idInvalidation = Db::fetchOne($sql, $bind);
705 if (empty($idInvalidation)) {
706 return \false;
707 }
708 return \true;
709 }
710 public function deleteInvalidationsForSites(array $idSites)
711 {
712 $idSites = array_map('intval', $idSites);
713 $table = Common::prefixTable('archive_invalidations');
714 $sql = "DELETE FROM `{$table}` WHERE idsite IN (" . implode(',', $idSites) . ")";
715 Db::query($sql);
716 }
717 public function deleteInvalidationsForDeletedSites()
718 {
719 $siteTable = Common::prefixTable('site');
720 $table = Common::prefixTable('archive_invalidations');
721 $sql = "DELETE a FROM `{$table}` a LEFT JOIN `{$siteTable}` s ON a.idsite = s.idsite WHERE s.idsite IS NULL";
722 Db::query($sql);
723 }
724 private function getRemoveInvalidationsIdSitesClause($idSite)
725 {
726 if ($idSite === 'all') {
727 return '';
728 }
729 $idSites = is_array($idSite) ? $idSite : [$idSite];
730 $idSites = array_map('intval', $idSites);
731 $idSitesStr = implode(',', $idSites);
732 return "idsite IN ({$idSitesStr}) AND";
733 }
734 /**
735 * Releases in progress invalidations for the given ids
736 *
737 * To avoid duplicate invalidations in the database, the method is also meant to prevent having duplicates after a reset
738 * Therefor below code will check if any of the invalidations to be reset should be removed instead
739 * An invalidation can be safely removed
740 * - if there exists another queued invalidation with the same parameters
741 * - if there is another running invalidation, that had been started after the current one was invalidated
742 * Otherwise the invalidation will be reset
743 *
744 * @param array $idinvalidations
745 * @throws \Zend_Db_Statement_Exception
746 */
747 public function releaseInProgressInvalidations(array $idinvalidations) : int
748 {
749 $idinvalidations = array_map('intval', $idinvalidations);
750 $table = Common::prefixTable('archive_invalidations');
751 $changedCount = 0;
752 $sql = "SELECT * FROM `{$table}` WHERE idinvalidation IN (" . implode(',', $idinvalidations) . ")";
753 $invalidations = Db::fetchAll($sql);
754 // Check invalidations one by one, to ensure we safely remove invalidations in cases where two identical ones are requested to reset
755 foreach ($invalidations as $invalidation) {
756 // Look for other identical invalidations that are either not started or started after the current one had been invalidated
757 $query = "SELECT COUNT(*) FROM `{$table}` WHERE name = ? AND idsite = ? AND date1 = ? AND date2 = ? AND period = ? AND " . "(status = ? OR (status = ? AND ts_started > ?)) AND idinvalidation != ?";
758 $bind = [$invalidation['name'], $invalidation['idsite'], $invalidation['date1'], $invalidation['date2'], $invalidation['period'], ArchiveInvalidator::INVALIDATION_STATUS_QUEUED, ArchiveInvalidator::INVALIDATION_STATUS_IN_PROGRESS, $invalidation['ts_invalidated'], $invalidation['idinvalidation']];
759 if (empty($invalidation['report'])) {
760 $query .= " AND (report IS NULL OR report = '')";
761 } else {
762 $query .= " AND report = ?";
763 $bind[] = $invalidation['report'];
764 }
765 $count = Db::fetchOne($query, $bind);
766 if ($count > 0) {
767 $this->logger->info('Found duplicate invalidation for params (name = {name}, idsite = {idsite}, date1 = {date1}, date2 = {date2}, period = {period}, report = {report}). Removing invalidation {idinvalidation} instead of resetting it.', $invalidation);
768 $sql = "DELETE FROM `{$table}` WHERE status = ? AND idinvalidation = ?";
769 $bind = [ArchiveInvalidator::INVALIDATION_STATUS_IN_PROGRESS, $invalidation['idinvalidation']];
770 $query = Db::query($sql, $bind);
771 $changedCount += $query->rowCount();
772 } else {
773 $sql = "UPDATE `{$table}` SET status = ?, processing_host = NULL, process_id = NULL, ts_started = NULL WHERE status = ? AND idinvalidation = ?";
774 $bind = [ArchiveInvalidator::INVALIDATION_STATUS_QUEUED, ArchiveInvalidator::INVALIDATION_STATUS_IN_PROGRESS, $invalidation['idinvalidation']];
775 $query = Db::query($sql, $bind);
776 $changedCount += $query->rowCount();
777 }
778 }
779 return $changedCount;
780 }
781 public function resetFailedArchivingJobs()
782 {
783 $invalidationsInProgress = $this->getInvalidationsInProgress();
784 $idsToReset = [];
785 foreach ($invalidationsInProgress as $invalidation) {
786 $archiveFailureRecoveryTimeout = GeneralConfig::getConfigValue('archive_failure_recovery_timeout', $invalidation['idsite']);
787 if (empty($invalidation['ts_started']) || Date::factory($invalidation['ts_started'])->getTimestamp() < Date::now()->getTimestamp() - $archiveFailureRecoveryTimeout) {
788 $idsToReset[] = $invalidation['idinvalidation'];
789 }
790 }
791 if (empty($idsToReset)) {
792 return 0;
793 }
794 return $this->releaseInProgressInvalidations($idsToReset);
795 }
796 public function getRecordsContainedInArchives(Date $archiveStartDate, array $idArchives, $requestedRecords) : array
797 {
798 $idArchives = array_map('intval', $idArchives);
799 $idArchives = implode(',', $idArchives);
800 $requestedRecords = is_string($requestedRecords) ? [$requestedRecords] : $requestedRecords;
801 $placeholders = Common::getSqlStringFieldsArray($requestedRecords);
802 $countSql = "SELECT DISTINCT name FROM `%s` WHERE idarchive IN ({$idArchives}) AND name IN ({$placeholders}) LIMIT " . count($requestedRecords);
803 $numericTable = \Piwik\DataAccess\ArchiveTableCreator::getNumericTable($archiveStartDate, \false);
804 $blobTable = \Piwik\DataAccess\ArchiveTableCreator::getBlobTable($archiveStartDate, \false);
805 // if the requested metrics look numeric, prioritize the numeric table, otherwise the blob table. this way, if all the metrics are
806 // found in this table (which will be most of the time), we don't have to query the other table
807 if ($this->doRequestedRecordsLookNumeric($requestedRecords)) {
808 $tablesToSearch = [$numericTable, $blobTable];
809 } else {
810 $tablesToSearch = [$blobTable, $numericTable];
811 }
812 $existingRecords = [];
813 foreach ($tablesToSearch as $tableName) {
814 if (empty($tableName)) {
815 continue;
816 }
817 $sql = sprintf($countSql, $tableName);
818 $rows = Db::fetchAll($sql, $requestedRecords);
819 $existingRecords = array_merge($existingRecords, array_column($rows, 'name'));
820 if (count($existingRecords) == count($requestedRecords)) {
821 break;
822 }
823 }
824 return $existingRecords;
825 }
826 private function isCutOffGroupConcatResult($pair)
827 {
828 $position = strpos($pair, '.');
829 return $position === \false || $position === strlen($pair) - 1;
830 }
831 private function getHashFromDoneFlag($doneFlag)
832 {
833 preg_match('/^done([a-zA-Z0-9]+)/', $doneFlag, $matches);
834 return $matches[1] ?? '';
835 }
836 private function doRequestedRecordsLookNumeric(array $requestedRecords) : bool
837 {
838 foreach ($requestedRecords as $record) {
839 if (preg_match('/^nb_/', $record)) {
840 return \true;
841 }
842 }
843 return \false;
844 }
845 }
846