LogQueryBuilder
6 years ago
Actions.php
6 years ago
ArchiveSelector.php
6 years ago
ArchiveTableCreator.php
6 years ago
ArchiveTableDao.php
6 years ago
ArchiveWriter.php
6 years ago
ArchivingDbAdapter.php
6 years ago
LogAggregator.php
5 years ago
LogQueryBuilder.php
6 years ago
LogTableTemporary.php
6 years ago
Model.php
6 years ago
RawLogDao.php
6 years ago
TableMetadata.php
6 years ago
ArchiveSelector.php
420 lines
| 1 | <?php |
| 2 | /** |
| 3 | * Piwik - free/libre analytics platform |
| 4 | * |
| 5 | * @link https://matomo.org |
| 6 | * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 7 | * |
| 8 | */ |
| 9 | namespace Piwik\DataAccess; |
| 10 | |
| 11 | use Exception; |
| 12 | use Piwik\Archive; |
| 13 | use Piwik\Archive\Chunk; |
| 14 | use Piwik\ArchiveProcessor; |
| 15 | use Piwik\ArchiveProcessor\Rules; |
| 16 | use Piwik\Common; |
| 17 | use Piwik\Date; |
| 18 | use Piwik\Db; |
| 19 | use Piwik\Period; |
| 20 | use Piwik\Period\Range; |
| 21 | use Piwik\Segment; |
| 22 | |
| 23 | /** |
| 24 | * Data Access object used to query archives |
| 25 | * |
| 26 | * A record in the Database for a given report is defined by |
| 27 | * - idarchive = unique ID that is associated to all the data of this archive (idsite+period+date) |
| 28 | * - idsite = the ID of the website |
| 29 | * - date1 = starting day of the period |
| 30 | * - date2 = ending day of the period |
| 31 | * - period = integer that defines the period (day/week/etc.). @see period::getId() |
| 32 | * - ts_archived = timestamp when the archive was processed (UTC) |
| 33 | * - name = the name of the report (ex: uniq_visitors or search_keywords_by_search_engines) |
| 34 | * - value = the actual data (a numeric value, or a blob of compressed serialized data) |
| 35 | * |
| 36 | */ |
| 37 | class ArchiveSelector |
| 38 | { |
| 39 | const NB_VISITS_RECORD_LOOKED_UP = "nb_visits"; |
| 40 | |
| 41 | const NB_VISITS_CONVERTED_RECORD_LOOKED_UP = "nb_visits_converted"; |
| 42 | |
| 43 | private static function getModel() |
| 44 | { |
| 45 | return new Model(); |
| 46 | } |
| 47 | |
| 48 | /** |
| 49 | * @param ArchiveProcessor\Parameters $params |
| 50 | * @param bool $minDatetimeArchiveProcessedUTC deprecated. will be removed in Matomo 4. |
| 51 | * @return array An array with four values: \ |
| 52 | * - the latest archive ID or false if none |
| 53 | * - the latest visits value for the latest archive, regardless of whether the archive is invalidated or not |
| 54 | * - the latest visits converted value for the latest archive, regardless of whether the archive is invalidated or not |
| 55 | * - whether there is an archive that exists or not. if this is true and the latest archive is false, it means |
| 56 | * the archive found was not usable (for example, it was invalidated and we are not looking for invalidated archives) |
| 57 | * @throws Exception |
| 58 | */ |
| 59 | public static function getArchiveIdAndVisits(ArchiveProcessor\Parameters $params, $minDatetimeArchiveProcessedUTC = false, $includeInvalidated = true) |
| 60 | { |
| 61 | $idSite = $params->getSite()->getId(); |
| 62 | $period = $params->getPeriod()->getId(); |
| 63 | $dateStart = $params->getPeriod()->getDateStart(); |
| 64 | $dateStartIso = $dateStart->toString('Y-m-d'); |
| 65 | $dateEndIso = $params->getPeriod()->getDateEnd()->toString('Y-m-d'); |
| 66 | |
| 67 | $numericTable = ArchiveTableCreator::getNumericTable($dateStart); |
| 68 | |
| 69 | $requestedPlugin = $params->getRequestedPlugin(); |
| 70 | $segment = $params->getSegment(); |
| 71 | $plugins = array("VisitsSummary", $requestedPlugin); |
| 72 | |
| 73 | $doneFlags = Rules::getDoneFlags($plugins, $segment); |
| 74 | $requestedPluginDoneFlags = Rules::getDoneFlags([$requestedPlugin], $segment); |
| 75 | $doneFlagValues = Rules::getSelectableDoneFlagValues($includeInvalidated, $params); |
| 76 | |
| 77 | $results = self::getModel()->getArchiveIdAndVisits($numericTable, $idSite, $period, $dateStartIso, $dateEndIso, null, $doneFlags); |
| 78 | if (empty($results)) { // no archive found |
| 79 | return [false, false, false, false]; |
| 80 | } |
| 81 | |
| 82 | $result = self::findArchiveDataWithLatestTsArchived($results, $requestedPluginDoneFlags); |
| 83 | |
| 84 | $visits = isset($result['nb_visits']) ? $result['nb_visits'] : false; |
| 85 | $visitsConverted = isset($result['nb_visits_converted']) ? $result['nb_visits_converted'] : false; |
| 86 | |
| 87 | if (isset($result['value']) |
| 88 | && !in_array($result['value'], $doneFlagValues) |
| 89 | ) { // the archive cannot be considered valid for this request (has wrong done flag value) |
| 90 | return [false, $visits, $visitsConverted, true]; |
| 91 | } |
| 92 | if (!empty($minDatetimeArchiveProcessedUTC) && !is_object($minDatetimeArchiveProcessedUTC)) { |
| 93 | $minDatetimeArchiveProcessedUTC = Date::factory($minDatetimeArchiveProcessedUTC); |
| 94 | } |
| 95 | |
| 96 | // the archive is too old |
| 97 | if ($minDatetimeArchiveProcessedUTC |
| 98 | && isset($result['idarchive']) |
| 99 | && Date::factory($result['ts_archived'])->isEarlier($minDatetimeArchiveProcessedUTC) |
| 100 | ) { |
| 101 | return [false, $visits, $visitsConverted, true]; |
| 102 | } |
| 103 | |
| 104 | $idArchive = isset($result['idarchive']) ? $result['idarchive'] : false; |
| 105 | |
| 106 | return array($idArchive, $visits, $visitsConverted, true); |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Queries and returns archive IDs for a set of sites, periods, and a segment. |
| 111 | * |
| 112 | * @param array $siteIds |
| 113 | * @param array $periods |
| 114 | * @param Segment $segment |
| 115 | * @param array $plugins List of plugin names for which data is being requested. |
| 116 | * @param bool $includeInvalidated true to include archives that are DONE_INVALIDATED, false if only DONE_OK. |
| 117 | * @return array Archive IDs are grouped by archive name and period range, ie, |
| 118 | * array( |
| 119 | * 'VisitsSummary.done' => array( |
| 120 | * '2010-01-01' => array(1,2,3) |
| 121 | * ) |
| 122 | * ) |
| 123 | * @throws |
| 124 | */ |
| 125 | public static function getArchiveIds($siteIds, $periods, $segment, $plugins, $includeInvalidated = true) |
| 126 | { |
| 127 | if (empty($siteIds)) { |
| 128 | throw new \Exception("Website IDs could not be read from the request, ie. idSite="); |
| 129 | } |
| 130 | |
| 131 | foreach ($siteIds as $index => $siteId) { |
| 132 | $siteIds[$index] = (int) $siteId; |
| 133 | } |
| 134 | |
| 135 | $getArchiveIdsSql = "SELECT idsite, name, date1, date2, MAX(idarchive) as idarchive |
| 136 | FROM %s |
| 137 | WHERE idsite IN (" . implode(',', $siteIds) . ") |
| 138 | AND " . self::getNameCondition($plugins, $segment, $includeInvalidated) . " |
| 139 | AND %s |
| 140 | GROUP BY idsite, date1, date2, name"; |
| 141 | |
| 142 | $monthToPeriods = array(); |
| 143 | foreach ($periods as $period) { |
| 144 | /** @var Period $period */ |
| 145 | if ($period->getDateStart()->isLater(Date::now()->addDay(2))) { |
| 146 | continue; // avoid creating any archive tables in the future |
| 147 | } |
| 148 | $table = ArchiveTableCreator::getNumericTable($period->getDateStart()); |
| 149 | $monthToPeriods[$table][] = $period; |
| 150 | } |
| 151 | |
| 152 | $db = Db::get(); |
| 153 | |
| 154 | // for every month within the archive query, select from numeric table |
| 155 | $result = array(); |
| 156 | foreach ($monthToPeriods as $table => $periods) { |
| 157 | $firstPeriod = reset($periods); |
| 158 | |
| 159 | $bind = array(); |
| 160 | |
| 161 | if ($firstPeriod instanceof Range) { |
| 162 | $dateCondition = "date1 = ? AND date2 = ?"; |
| 163 | $bind[] = $firstPeriod->getDateStart()->toString('Y-m-d'); |
| 164 | $bind[] = $firstPeriod->getDateEnd()->toString('Y-m-d'); |
| 165 | } else { |
| 166 | // we assume there is no range date in $periods |
| 167 | $dateCondition = '('; |
| 168 | |
| 169 | foreach ($periods as $period) { |
| 170 | if (strlen($dateCondition) > 1) { |
| 171 | $dateCondition .= ' OR '; |
| 172 | } |
| 173 | |
| 174 | $dateCondition .= "(period = ? AND date1 = ? AND date2 = ?)"; |
| 175 | $bind[] = $period->getId(); |
| 176 | $bind[] = $period->getDateStart()->toString('Y-m-d'); |
| 177 | $bind[] = $period->getDateEnd()->toString('Y-m-d'); |
| 178 | } |
| 179 | |
| 180 | $dateCondition .= ')'; |
| 181 | } |
| 182 | |
| 183 | $sql = sprintf($getArchiveIdsSql, $table, $dateCondition); |
| 184 | |
| 185 | $archiveIds = $db->fetchAll($sql, $bind); |
| 186 | |
| 187 | // get the archive IDs |
| 188 | foreach ($archiveIds as $row) { |
| 189 | //FIXMEA duplicate with Archive.php |
| 190 | $dateStr = $row['date1'] . ',' . $row['date2']; |
| 191 | |
| 192 | $result[$row['name']][$dateStr][] = $row['idarchive']; |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | return $result; |
| 197 | } |
| 198 | |
| 199 | /** |
| 200 | * Queries and returns archive data using a set of archive IDs. |
| 201 | * |
| 202 | * @param array $archiveIds The IDs of the archives to get data from. |
| 203 | * @param array $recordNames The names of the data to retrieve (ie, nb_visits, nb_actions, etc.). |
| 204 | * Note: You CANNOT pass multiple recordnames if $loadAllSubtables=true. |
| 205 | * @param string $archiveDataType The archive data type (either, 'blob' or 'numeric'). |
| 206 | * @param int|null|string $idSubtable null if the root blob should be loaded, an integer if a subtable should be |
| 207 | * loaded and 'all' if all subtables should be loaded. |
| 208 | * @return array |
| 209 | *@throws Exception |
| 210 | */ |
| 211 | public static function getArchiveData($archiveIds, $recordNames, $archiveDataType, $idSubtable) |
| 212 | { |
| 213 | $chunk = new Chunk(); |
| 214 | |
| 215 | $db = Db::get(); |
| 216 | |
| 217 | // create the SQL to select archive data |
| 218 | $loadAllSubtables = $idSubtable == Archive::ID_SUBTABLE_LOAD_ALL_SUBTABLES; |
| 219 | if ($loadAllSubtables) { |
| 220 | $name = reset($recordNames); |
| 221 | |
| 222 | // select blobs w/ name like "$name_[0-9]+" w/o using RLIKE |
| 223 | $nameEnd = strlen($name) + 1; |
| 224 | $nameEndAppendix = $nameEnd + 1; |
| 225 | $appendix = $chunk->getAppendix(); |
| 226 | $lenAppendix = strlen($appendix); |
| 227 | |
| 228 | $checkForChunkBlob = "SUBSTRING(name, $nameEnd, $lenAppendix) = '$appendix'"; |
| 229 | $checkForSubtableId = "(SUBSTRING(name, $nameEndAppendix, 1) >= '0' |
| 230 | AND SUBSTRING(name, $nameEndAppendix, 1) <= '9')"; |
| 231 | |
| 232 | $whereNameIs = "(name = ? OR (name LIKE ? AND ( $checkForChunkBlob OR $checkForSubtableId ) ))"; |
| 233 | $bind = array($name, $name . '%'); |
| 234 | } else { |
| 235 | if ($idSubtable === null) { |
| 236 | // select root table or specific record names |
| 237 | $bind = array_values($recordNames); |
| 238 | } else { |
| 239 | // select a subtable id |
| 240 | $bind = array(); |
| 241 | foreach ($recordNames as $recordName) { |
| 242 | // to be backwards compatibe we need to look for the exact idSubtable blob and for the chunk |
| 243 | // that stores the subtables (a chunk stores many blobs in one blob) |
| 244 | $bind[] = $chunk->getRecordNameForTableId($recordName, $idSubtable); |
| 245 | $bind[] = self::appendIdSubtable($recordName, $idSubtable); |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | $inNames = Common::getSqlStringFieldsArray($bind); |
| 250 | $whereNameIs = "name IN ($inNames)"; |
| 251 | } |
| 252 | |
| 253 | $getValuesSql = "SELECT value, name, idsite, date1, date2, ts_archived |
| 254 | FROM %s |
| 255 | WHERE idarchive IN (%s) |
| 256 | AND " . $whereNameIs; |
| 257 | |
| 258 | // get data from every table we're querying |
| 259 | $rows = array(); |
| 260 | foreach ($archiveIds as $period => $ids) { |
| 261 | if (empty($ids)) { |
| 262 | throw new Exception("Unexpected: id archive not found for period '$period' '"); |
| 263 | } |
| 264 | |
| 265 | // $period = "2009-01-04,2009-01-04", |
| 266 | $date = Date::factory(substr($period, 0, 10)); |
| 267 | |
| 268 | $isNumeric = $archiveDataType == 'numeric'; |
| 269 | if ($isNumeric) { |
| 270 | $table = ArchiveTableCreator::getNumericTable($date); |
| 271 | } else { |
| 272 | $table = ArchiveTableCreator::getBlobTable($date); |
| 273 | } |
| 274 | |
| 275 | $sql = sprintf($getValuesSql, $table, implode(',', $ids)); |
| 276 | $dataRows = $db->fetchAll($sql, $bind); |
| 277 | |
| 278 | foreach ($dataRows as $row) { |
| 279 | if ($isNumeric) { |
| 280 | $rows[] = $row; |
| 281 | } else { |
| 282 | $row['value'] = self::uncompress($row['value']); |
| 283 | |
| 284 | if ($chunk->isRecordNameAChunk($row['name'])) { |
| 285 | self::moveChunkRowToRows($rows, $row, $chunk, $loadAllSubtables, $idSubtable); |
| 286 | } else { |
| 287 | $rows[] = $row; |
| 288 | } |
| 289 | } |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | return $rows; |
| 294 | } |
| 295 | |
| 296 | private static function moveChunkRowToRows(&$rows, $row, Chunk $chunk, $loadAllSubtables, $idSubtable) |
| 297 | { |
| 298 | // $blobs = array([subtableID] = [blob of subtableId]) |
| 299 | $blobs = Common::safe_unserialize($row['value']); |
| 300 | |
| 301 | if (!is_array($blobs)) { |
| 302 | return; |
| 303 | } |
| 304 | |
| 305 | // $rawName = eg 'PluginName_ArchiveName' |
| 306 | $rawName = $chunk->getRecordNameWithoutChunkAppendix($row['name']); |
| 307 | |
| 308 | if ($loadAllSubtables) { |
| 309 | foreach ($blobs as $subtableId => $blob) { |
| 310 | $row['value'] = $blob; |
| 311 | $row['name'] = self::appendIdSubtable($rawName, $subtableId); |
| 312 | $rows[] = $row; |
| 313 | } |
| 314 | } elseif (array_key_exists($idSubtable, $blobs)) { |
| 315 | $row['value'] = $blobs[$idSubtable]; |
| 316 | $row['name'] = self::appendIdSubtable($rawName, $idSubtable); |
| 317 | $rows[] = $row; |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | public static function appendIdSubtable($recordName, $id) |
| 322 | { |
| 323 | return $recordName . "_" . $id; |
| 324 | } |
| 325 | |
| 326 | private static function uncompress($data) |
| 327 | { |
| 328 | return @gzuncompress($data); |
| 329 | } |
| 330 | |
| 331 | /** |
| 332 | * Returns the SQL condition used to find successfully completed archives that |
| 333 | * this instance is querying for. |
| 334 | * |
| 335 | * @param array $plugins |
| 336 | * @param Segment $segment |
| 337 | * @param bool $includeInvalidated |
| 338 | * @return string |
| 339 | */ |
| 340 | private static function getNameCondition(array $plugins, Segment $segment, $includeInvalidated = true) |
| 341 | { |
| 342 | // the flags used to tell how the archiving process for a specific archive was completed, |
| 343 | // if it was completed |
| 344 | $doneFlags = Rules::getDoneFlags($plugins, $segment); |
| 345 | $allDoneFlags = "'" . implode("','", $doneFlags) . "'"; |
| 346 | |
| 347 | $possibleValues = Rules::getSelectableDoneFlagValues($includeInvalidated); |
| 348 | |
| 349 | // create the SQL to find archives that are DONE |
| 350 | return "((name IN ($allDoneFlags)) AND (value IN (" . implode(',', $possibleValues) . ")))"; |
| 351 | } |
| 352 | |
| 353 | /** |
| 354 | * This method takes the output of Model::getArchiveIdAndVisits() and selects data from the |
| 355 | * latest archives. |
| 356 | * |
| 357 | * This includes: |
| 358 | * - the idarchive with the latest ts_archived ($results will be ordered by ts_archived desc) |
| 359 | * - the visits/converted visits of the latest archive, which includes archives for VisitsSummary alone |
| 360 | * ($requestedPluginDoneFlags will have the done flag for the overall archive plus a done flag for |
| 361 | * VisitsSummary by itself) |
| 362 | * - the ts_archived for the latest idarchive |
| 363 | * - the doneFlag value for the latest archive |
| 364 | * |
| 365 | * @param $results |
| 366 | * @param $requestedPluginDoneFlags |
| 367 | * @return array |
| 368 | */ |
| 369 | private static function findArchiveDataWithLatestTsArchived($results, $requestedPluginDoneFlags) |
| 370 | { |
| 371 | // find latest idarchive for each done flag |
| 372 | $idArchives = []; |
| 373 | foreach ($results as $row) { |
| 374 | $doneFlag = $row['name']; |
| 375 | if (preg_match('/^done/', $doneFlag) |
| 376 | && !isset($idArchives[$doneFlag]) |
| 377 | ) { |
| 378 | $idArchives[$doneFlag] = $row['idarchive']; |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | $archiveData = []; |
| 383 | |
| 384 | // gather the latest visits/visits_converted metrics |
| 385 | foreach ($results as $row) { |
| 386 | $name = $row['name']; |
| 387 | if (!isset($archiveData[$name]) |
| 388 | && in_array($name, [self::NB_VISITS_RECORD_LOOKED_UP, self::NB_VISITS_CONVERTED_RECORD_LOOKED_UP]) |
| 389 | && in_array($row['idarchive'], $idArchives) |
| 390 | ) { |
| 391 | $archiveData[$name] = $row['value']; |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | // if an archive is found, but the metric data isn't found, we set the value to 0, |
| 396 | // so it won't get returned as false. this is here because the code used to do this before this change |
| 397 | // and we didn't want to introduce any side effects. it may be removable in the future. |
| 398 | foreach ([self::NB_VISITS_RECORD_LOOKED_UP, self::NB_VISITS_CONVERTED_RECORD_LOOKED_UP] as $metric) { |
| 399 | if (!empty($idArchives) |
| 400 | && !isset($archiveData[$metric]) |
| 401 | ) { |
| 402 | $archiveData[$metric] = 0; |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | // set the idarchive & ts_archived for the archive we're looking for |
| 407 | foreach ($results as $row) { |
| 408 | $name = $row['name']; |
| 409 | if (in_array($name, $requestedPluginDoneFlags)) { |
| 410 | $archiveData['idarchive'] = $row['idarchive']; |
| 411 | $archiveData['ts_archived'] = $row['ts_archived']; |
| 412 | $archiveData['value'] = $row['value']; |
| 413 | break; |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | return $archiveData; |
| 418 | } |
| 419 | } |
| 420 |