PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / trunk
Matomo Analytics – Powerful, Privacy-First Insights for WordPress vtrunk
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / ArchiveProcessor.php
matomo / app / core Last commit date
API 1 month ago Access 3 months ago Application 1 month ago Archive 1 month ago ArchiveProcessor 1 month ago Archiver 2 years ago AssetManager 1 month ago Auth 6 months ago Category 6 months ago Changes 1 month ago CliMulti 1 year ago Columns 1 month ago Concurrency 1 month ago Config 1 month ago Container 1 month ago CronArchive 3 months ago DataAccess 1 month ago DataFiles 2 years ago DataTable 2 weeks ago Db 2 weeks ago DeviceDetector 1 year ago Email 2 years ago Exception 4 months ago Http 4 months ago Intl 3 months ago Log 2 years ago Mail 1 year ago Measurable 6 months ago Menu 1 month ago Metrics 3 months ago Notification 6 months ago Period 1 month ago Plugin 2 weeks ago Policy 1 month ago ProfessionalServices 1 year ago Report 1 year ago ReportRenderer 3 months ago Request 3 months ago Scheduler 1 month ago Segment 1 month ago Session 2 weeks ago Settings 1 month ago Tracker 2 weeks ago Translation 1 month ago Twig 1 year ago UpdateCheck 3 months ago Updater 1 month ago Updates 2 days ago Validators 1 year ago View 1 month ago ViewDataTable 2 weeks ago Visualization 1 year ago Widget 1 month ago .htaccess 2 years ago Access.php 1 month ago Archive.php 1 month ago ArchiveProcessor.php 1 month ago AssetManager.php 1 month ago Auth.php 6 months ago AuthResult.php 6 months ago BaseFactory.php 2 years ago Cache.php 2 years ago CacheId.php 4 months ago CliMulti.php 1 month ago Common.php 2 weeks ago Config.php 1 month ago Console.php 3 months ago Context.php 2 years ago Cookie.php 1 year ago CronArchive.php 1 month ago DI.php 3 months ago DataArray.php 1 month ago DataTable.php 1 month ago Date.php 1 month ago Db.php 1 month ago DbHelper.php 1 month ago Development.php 1 year ago ErrorHandler.php 6 months ago EventDispatcher.php 1 month ago ExceptionHandler.php 4 months ago FileIntegrity.php 1 month ago Filechecks.php 1 year ago Filesystem.php 1 month ago FrontController.php 4 months ago Http.php 1 month ago IP.php 1 year ago Log.php 3 months ago LogDeleter.php 1 year ago Mail.php 1 year ago Metrics.php 1 month ago NoAccessException.php 2 years ago Nonce.php 6 months ago Notification.php 1 month ago NumberFormatter.php 5 months ago Option.php 5 months ago Period.php 1 month ago Piwik.php 1 month ago Plugin.php 1 month ago Process.php 1 month ago Profiler.php 6 months ago ProxyHeaders.php 4 months ago ProxyHttp.php 5 months ago QuickForm2.php 3 months ago RankingQuery.php 1 month ago ReportRenderer.php 1 month ago Request.php 1 month ago Segment.php 1 month ago Sequence.php 6 months ago Session.php 2 weeks ago SettingsPiwik.php 1 month ago SettingsServer.php 1 year ago Singleton.php 2 years ago Site.php 1 month ago SiteContentDetector.php 1 month ago SupportedBrowser.php 2 years ago TCPDF.php 1 year ago Theme.php 1 year ago Timer.php 1 month ago Tracker.php 1 month ago Twig.php 1 month ago Unzip.php 1 year ago UpdateCheck.php 1 month ago Updater.php 1 month ago UpdaterErrorException.php 2 years ago Updates.php 3 months ago Url.php 3 months ago UrlHelper.php 1 month ago Version.php 2 days ago View.php 1 month ago bootstrap.php 1 year ago dispatch.php 2 years ago testMinimumPhpVersion.php 6 months ago
ArchiveProcessor.php
687 lines
1 <?php
2
3 /**
4 * Matomo - free/libre analytics platform
5 *
6 * @link https://matomo.org
7 * @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
8 */
9 namespace Piwik;
10
11 use Exception;
12 use Piwik\Archive\DataTableFactory;
13 use Piwik\ArchiveProcessor\Parameters;
14 use Piwik\ArchiveProcessor\Rules;
15 use Piwik\Container\StaticContainer;
16 use Piwik\DataAccess\ArchiveWriter;
17 use Piwik\DataAccess\LogAggregator;
18 use Piwik\DataTable\Manager;
19 use Piwik\DataTable\Map;
20 use Piwik\DataTable\Row;
21 use Piwik\Segment\SegmentExpression;
22 use Piwik\Log\LoggerInterface;
23 /**
24 * Used by {@link Piwik\Plugin\Archiver} instances to insert and aggregate archive data.
25 *
26 * ### See also
27 *
28 * - **{@link Piwik\Plugin\Archiver}** - to learn how plugins should implement their own analytics
29 * aggregation logic.
30 * - **{@link Piwik\DataAccess\LogAggregator}** - to learn how plugins can perform data aggregation
31 * across Piwik's log tables.
32 *
33 * ### Examples
34 *
35 * **Inserting numeric data**
36 *
37 * // function in an Archiver descendant
38 * public function aggregateDayReport()
39 * {
40 * $archiveProcessor = $this->getProcessor();
41 *
42 * $myFancyMetric = // ... calculate the metric value ...
43 * $archiveProcessor->insertNumericRecord('MyPlugin_myFancyMetric', $myFancyMetric);
44 * }
45 *
46 * **Inserting serialized DataTables**
47 *
48 * // function in an Archiver descendant
49 * public function aggregateDayReport()
50 * {
51 * $archiveProcessor = $this->getProcessor();
52 *
53 * $maxRowsInTable = Config::getInstance()->General['datatable_archiving_maximum_rows_standard'];j
54 *
55 * $dataTable = // ... build by aggregating visits ...
56 * $serializedData = $dataTable->getSerialized($maxRowsInTable, $maxRowsInSubtable = $maxRowsInTable,
57 * $columnToSortBy = Metrics::INDEX_NB_VISITS);
58 *
59 * $archiveProcessor->insertBlobRecords('MyPlugin_myFancyReport', $serializedData);
60 * }
61 *
62 * **Aggregating archive data**
63 *
64 * // function in Archiver descendant
65 * public function aggregateMultipleReports()
66 * {
67 * $archiveProcessor = $this->getProcessor();
68 *
69 * // aggregate a metric
70 * $archiveProcessor->aggregateNumericMetrics('MyPlugin_myFancyMetric');
71 * $archiveProcessor->aggregateNumericMetrics('MyPlugin_mySuperFancyMetric', 'max');
72 *
73 * // aggregate a report
74 * $archiveProcessor->aggregateDataTableRecords('MyPlugin_myFancyReport');
75 * }
76 *
77 */
78 class ArchiveProcessor
79 {
80 /**
81 * @var bool
82 */
83 public static $isRootArchivingRequest = \true;
84 /**
85 * @var \Piwik\DataAccess\ArchiveWriter
86 */
87 private $archiveWriter;
88 /**
89 * @var \Piwik\DataAccess\LogAggregator
90 */
91 private $logAggregator;
92 /**
93 * @var Archive
94 */
95 public $archive = null;
96 /**
97 * @var Parameters
98 */
99 private $params;
100 /**
101 * @var int
102 */
103 private $numberOfVisits = \false;
104 private $numberOfVisitsConverted = \false;
105 private $processedDependentSegments = [];
106 public function __construct(Parameters $params, ArchiveWriter $archiveWriter, LogAggregator $logAggregator)
107 {
108 $this->params = $params;
109 $this->logAggregator = $logAggregator;
110 $this->archiveWriter = $archiveWriter;
111 }
112 protected function getArchive()
113 {
114 if (empty($this->archive)) {
115 $subPeriods = $this->params->getSubPeriods();
116 $idSites = $this->params->getIdSites();
117 $this->archive = \Piwik\Archive::factory($this->params->getSegment(), $subPeriods, $idSites);
118 /**
119 * @internal
120 */
121 \Piwik\Piwik::postEvent('ArchiveProcessor.getArchive', [$this->archive]);
122 }
123 return $this->archive;
124 }
125 public function setNumberOfVisits($visits, $visitsConverted)
126 {
127 $this->numberOfVisits = $visits;
128 $this->numberOfVisitsConverted = $visitsConverted;
129 }
130 /**
131 * Returns the {@link Parameters} object containing the site, period and segment we're archiving
132 * data for.
133 *
134 * @return Parameters
135 * @api
136 */
137 public function getParams()
138 {
139 return $this->params;
140 }
141 /**
142 * Returns a `{@link Piwik\DataAccess\LogAggregator}` instance for the site, period and segment this
143 * ArchiveProcessor will insert archive data for.
144 *
145 * @return LogAggregator
146 * @api
147 */
148 public function getLogAggregator()
149 {
150 return $this->logAggregator;
151 }
152 /**
153 * Array of (column name before => column name renamed) of the columns for which sum operation is invalid.
154 * These columns will be renamed as per this mapping.
155 * @var array
156 */
157 protected static $columnsToRenameAfterAggregation = array(\Piwik\Metrics::INDEX_NB_UNIQ_VISITORS => \Piwik\Metrics::INDEX_SUM_DAILY_NB_UNIQ_VISITORS, \Piwik\Metrics::INDEX_NB_USERS => \Piwik\Metrics::INDEX_SUM_DAILY_NB_USERS);
158 /**
159 * Sums records for every subperiod of the current period and inserts the result as the record
160 * for this period.
161 *
162 * DataTables are summed recursively so subtables will be summed as well.
163 *
164 * @param string|array $recordNames Name(s) of the report we are aggregating, eg, `'Referrers_type'`.
165 * @param int $maximumRowsInDataTableLevelZero Maximum number of rows allowed in the top level DataTable.
166 * @param int $maximumRowsInSubDataTable Maximum number of rows allowed in each subtable.
167 * @param string|null $defaultColumnToSortByBeforeTruncation The name of the column to sort by before truncating a DataTable.
168 * If not set, and the table contains nb_visits or INDEX_NB_VISITS, we will
169 * sort by visits.
170 * @param array $columnsAggregationOperation Operations for aggregating columns, see {@link Row::sumRow()}.
171 * @param array $columnsToRenameAfterAggregation Columns mapped to new names for columns that must change names
172 * when summed because they cannot be summed, eg,
173 * `array('nb_uniq_visitors' => 'sum_daily_nb_uniq_visitors')`.
174 * @param string[]|bool $countRowsRecursive array of recordNames that defines for which ones you need a recursive row count, or true if it should be done for all
175 * @param string[] $countLeafRows array of recordNames that defines for which ones you need a leaf row count.
176 * @return array Returns the row counts of each aggregated report before truncation, eg,
177 *
178 * array(
179 * 'report1' => array('level0' => $report1->getRowsCount,
180 * 'recursive' => $report1->getRowsCountRecursive()),
181 * 'report2' => array('level0' => $report2->getRowsCount,
182 * 'recursive' => $report2->getRowsCountRecursive()),
183 * ...
184 * )
185 * @api
186 */
187 public function aggregateDataTableRecords($recordNames, $maximumRowsInDataTableLevelZero = null, $maximumRowsInSubDataTable = null, $defaultColumnToSortByBeforeTruncation = null, &$columnsAggregationOperation = null, $columnsToRenameAfterAggregation = null, $countRowsRecursive = \true, array $countLeafRows = [])
188 {
189 /** @var LoggerInterface $logger */
190 $logger = StaticContainer::get(LoggerInterface::class);
191 if (!is_array($recordNames)) {
192 $recordNames = array($recordNames);
193 }
194 $archiveDescription = $this->params . '';
195 $nameToCount = array();
196 foreach ($recordNames as $recordName) {
197 $latestUsedTableId = Manager::getInstance()->getMostRecentTableId();
198 $logger->debug("aggregating record {record} [archive = {archive}]", ['record' => $recordName, 'archive' => $archiveDescription]);
199 $table = $this->aggregateDataTableRecord($recordName, $columnsAggregationOperation, $columnsToRenameAfterAggregation);
200 $nameToCount[$recordName]['level0'] = $table->getRowsCount();
201 if ($countRowsRecursive === \true || is_array($countRowsRecursive) && in_array($recordName, $countRowsRecursive)) {
202 $nameToCount[$recordName]['recursive'] = $table->getRowsCountRecursive();
203 }
204 if (in_array($recordName, $countLeafRows)) {
205 $nameToCount[$recordName]['leafs'] = $table->getLeafRowsCount();
206 }
207 $columnToSortByBeforeTruncation = $defaultColumnToSortByBeforeTruncation;
208 if (empty($columnToSortByBeforeTruncation)) {
209 $columns = $table->getColumns();
210 if (in_array(\Piwik\Metrics::INDEX_NB_VISITS, $columns)) {
211 $columnToSortByBeforeTruncation = \Piwik\Metrics::INDEX_NB_VISITS;
212 } elseif (in_array('nb_visits', $columns)) {
213 $columnToSortByBeforeTruncation = 'nb_visits';
214 }
215 }
216 $blob = $table->getSerialized($maximumRowsInDataTableLevelZero, $maximumRowsInSubDataTable, $columnToSortByBeforeTruncation);
217 \Piwik\Common::destroy($table);
218 $this->insertBlobRecord($recordName, $blob);
219 unset($blob);
220 \Piwik\DataTable\Manager::getInstance()->deleteAll($latestUsedTableId);
221 }
222 return $nameToCount;
223 }
224 /**
225 * Aggregates one or more metrics for every subperiod of the current period and inserts the results
226 * as metrics for the current period.
227 *
228 * @param array|string $columns Array of metric names to aggregate.
229 * @param string|string[]|false $operationsToApply The operation to apply to the metric. Either `'sum'`, `'max'` or `'min'`.
230 * Can also be an array mapping record names to operations.
231 * @return array|int Returns the array of aggregate values. If only one metric was aggregated,
232 * the aggregate value will be returned as is, not in an array.
233 * For example, if `array('nb_visits', 'nb_hits')` is supplied for `$columns`,
234 *
235 * array(
236 * 'nb_visits' => 3040,
237 * 'nb_hits' => 405
238 * )
239 *
240 * could be returned. If `array('nb_visits')` or `'nb_visits'` is used for `$columns`,
241 * then `3040` would be returned.
242 * @api
243 */
244 public function aggregateNumericMetrics($columns, $operationsToApply = \false)
245 {
246 $metrics = $this->getAggregatedNumericMetrics($columns, $operationsToApply);
247 foreach ($metrics as $column => $value) {
248 $this->insertNumericRecord($column, $value);
249 }
250 // if asked for only one field to sum
251 if (count($metrics) === 1) {
252 return reset($metrics);
253 }
254 // returns the array of records once summed
255 return $metrics;
256 }
257 public function getNumberOfVisits()
258 {
259 if ($this->numberOfVisits === \false) {
260 throw new Exception("visits should have been set here");
261 }
262 return $this->numberOfVisits;
263 }
264 public function getNumberOfVisitsConverted()
265 {
266 return $this->numberOfVisitsConverted;
267 }
268 /**
269 * Caches multiple numeric records in the archive for this processor's site, period
270 * and segment.
271 *
272 * @param array $numericRecords A name-value mapping of numeric values that should be
273 * archived, eg,
274 *
275 * array('Referrers_distinctKeywords' => 23, 'Referrers_distinctCampaigns' => 234)
276 * @api
277 */
278 public function insertNumericRecords($numericRecords)
279 {
280 foreach ($numericRecords as $name => $value) {
281 $this->insertNumericRecord($name, $value);
282 }
283 }
284 /**
285 * Caches a single numeric record in the archive for this processor's site, period and
286 * segment.
287 *
288 * Numeric values are not inserted if they equal `0`.
289 *
290 * @param string $name The name of the numeric value, eg, `'Referrers_distinctKeywords'`.
291 * @param float|null $value The numeric value.
292 * @api
293 */
294 public function insertNumericRecord($name, $value)
295 {
296 $value = round($value ?? 0, 2);
297 $value = \Piwik\Common::forceDotAsSeparatorForDecimalPoint($value);
298 $this->archiveWriter->insertRecord($name, $value);
299 }
300 /**
301 * Caches one or more blob records in the archive for this processor's site, period
302 * and segment.
303 *
304 * @param string $name The name of the record, eg, 'Referrers_type'.
305 * @param string|array $values A blob string or an array of blob strings. If an array
306 * is used, the first element in the array will be inserted
307 * with the `$name` name. The others will be inserted with
308 * `$name . '_' . $index` as the record name (where $index is
309 * the index of the blob record in `$values`).
310 * @api
311 */
312 public function insertBlobRecord($name, $values)
313 {
314 $this->archiveWriter->insertBlobRecord($name, $values);
315 }
316 /**
317 * This method selects all DataTables that have the name $name over the period.
318 * All these DataTables are then added together, and the resulting DataTable is returned.
319 *
320 * @param string $name
321 * @param array $columnsAggregationOperation Operations for aggregating columns, @see Row::sumRow()
322 * @param array $columnsToRenameAfterAggregation columns in the array (old name, new name) to be renamed as the sum operation is not valid on them (eg. nb_uniq_visitors->sum_daily_nb_uniq_visitors)
323 * @return DataTable
324 */
325 protected function aggregateDataTableRecord($name, $columnsAggregationOperation = null, $columnsToRenameAfterAggregation = null)
326 {
327 try {
328 \Piwik\ErrorHandler::pushFatalErrorBreadcrumb(__CLASS__, ['name' => $name]);
329 $blobs = $this->getArchive()->querySingleBlob($name);
330 $dataTable = $this->getAggregatedDataTableMapFromBlobs($blobs, $columnsAggregationOperation, $columnsToRenameAfterAggregation, $name);
331 } finally {
332 \Piwik\ErrorHandler::popFatalErrorBreadcrumb();
333 }
334 return $dataTable;
335 }
336 protected function getAggregatedDataTableMapFromBlobs(\Iterator $dataTableBlobs, $columnsAggregationOperation, $columnsToRenameAfterAggregation, $name)
337 {
338 // maps period & subtable ID in database to the Row instance in $result that subtable should be added to when encountered
339 // [$row['date1'].','.$row['date2']][$tableId] = $row in $result
340 /** @var Row[][] */
341 $tableIdToResultRowMapping = [];
342 $result = new \Piwik\DataTable();
343 if (!empty($columnsAggregationOperation)) {
344 $result->setMetadata(\Piwik\DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
345 }
346 foreach ($dataTableBlobs as $archiveDataRow) {
347 $period = $archiveDataRow['date1'] . ',' . $archiveDataRow['date2'];
348 $tableId = $archiveDataRow['name'] == $name ? null : $this->getSubtableIdFromBlobName($archiveDataRow['name']);
349 $blobTable = \Piwik\DataTable::fromSerializedArray($archiveDataRow['value']);
350 // see https://github.com/piwik/piwik/issues/4377
351 $blobTable->filter(function ($table) use($columnsToRenameAfterAggregation) {
352 if ($this->areColumnsNotAlreadyRenamed($table)) {
353 /**
354 * This makes archiving and range dates a lot faster. Imagine we archive a week, then we will
355 * rename all columns of each 7 day archives. Afterwards we know the columns will be replaced in a
356 * week archive. When generating month archives, which uses mostly week archives, we do not have
357 * to replace those columns for the week archives again since we can be sure they were already
358 * replaced. Same when aggregating year and range archives. This can save up 10% or more when
359 * aggregating Month, Year and Range archives.
360 */
361 $this->renameColumnsAfterAggregation($table, $columnsToRenameAfterAggregation);
362 }
363 });
364 $tableToAddTo = null;
365 if ($tableId === null) {
366 $tableToAddTo = $result;
367 } elseif (empty($tableIdToResultRowMapping[$period][$tableId])) {
368 // sanity check
369 StaticContainer::get(LoggerInterface::class)->info('Unexpected state when aggregating DataTable, unknown period/table ID combination encountered: {period} - {tableId}.' . ' This either means the SQL to order blobs is behaving incorrectly or the blob data is corrupt in some way.', ['period' => $period, 'tableId' => $tableId]);
370 continue;
371 } else {
372 $rowToAddTo = $tableIdToResultRowMapping[$period][$tableId];
373 if (!$rowToAddTo->getIdSubDataTable()) {
374 $newTable = new \Piwik\DataTable();
375 $newTable->setMetadata(\Piwik\DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
376 $rowToAddTo->setSubtable($newTable);
377 }
378 $tableToAddTo = $rowToAddTo->getSubtable();
379 }
380 $tableToAddTo->addDataTable($blobTable);
381 // add subtable IDs for $blobTableRow to $tableIdToResultRowMapping
382 foreach ($blobTable->getRows() as $blobTableRow) {
383 $label = $blobTableRow->getColumn('label');
384 $subtableId = $blobTableRow->getIdSubDataTable();
385 if (empty($subtableId)) {
386 continue;
387 }
388 $rowToAddTo = $tableToAddTo->getRowFromLabel($label);
389 $tableIdToResultRowMapping[$period][$subtableId] = $rowToAddTo;
390 }
391 \Piwik\Common::destroy($blobTable);
392 unset($blobTable);
393 }
394 return $result;
395 }
396 private function getSubtableIdFromBlobName($recordName)
397 {
398 $parts = explode('_', $recordName);
399 $id = end($parts);
400 if (is_numeric($id)) {
401 return $id;
402 }
403 return null;
404 }
405 /**
406 * Note: public only for use in closure in PHP 5.3.
407 *
408 * @param $table
409 * @return \Piwik\Period
410 */
411 public function areColumnsNotAlreadyRenamed($table)
412 {
413 $period = $table->getMetadata(DataTableFactory::TABLE_METADATA_PERIOD_INDEX);
414 return !$period || $period->getLabel() === 'day';
415 }
416 protected function getOperationForColumns($columns, $defaultOperation)
417 {
418 $operationForColumn = array();
419 foreach ($columns as $name) {
420 $operation = is_array($defaultOperation) ? $defaultOperation[$name] ?? null : $defaultOperation;
421 if (empty($operation)) {
422 $operation = $this->guessOperationForColumn($name);
423 }
424 $operationForColumn[$name] = $operation;
425 }
426 return $operationForColumn;
427 }
428 protected function enrichWithUniqueVisitorsMetric(Row $row)
429 {
430 if ($row->getColumn('nb_uniq_visitors') === \false && $row->getColumn('nb_users') === \false) {
431 return;
432 }
433 $periodLabel = $this->getParams()->getPeriod()->getLabel();
434 if (!\Piwik\SettingsPiwik::isUniqueVisitorsEnabled($periodLabel)) {
435 $row->deleteColumn('nb_uniq_visitors');
436 $row->deleteColumn('nb_users');
437 return;
438 }
439 $sites = $this->getIdSitesToComputeNbUniques();
440 if (count($sites) > 1 && Rules::shouldSkipUniqueVisitorsCalculationForMultipleSites()) {
441 if ($periodLabel != 'day') {
442 // for day we still keep the aggregated metric but for other periods we remove it as it becomes to
443 // inaccurate
444 $row->deleteColumn('nb_uniq_visitors');
445 $row->deleteColumn('nb_users');
446 }
447 return;
448 }
449 if (empty($sites)) {
450 // a plugin disabled running below query by removing all sites.
451 $row->deleteColumn('nb_uniq_visitors');
452 $row->deleteColumn('nb_users');
453 return;
454 }
455 if (count($sites) === 1) {
456 $uniqueVisitorsMetric = \Piwik\Metrics::INDEX_NB_UNIQ_VISITORS;
457 } else {
458 if (!\Piwik\SettingsPiwik::isSameFingerprintAcrossWebsites()) {
459 throw new Exception("Processing unique visitors across websites is enabled for this instance,\n but to process this metric you must first set enable_fingerprinting_across_websites=1\n in the config file, under the [Tracker] section.");
460 }
461 $uniqueVisitorsMetric = \Piwik\Metrics::INDEX_NB_UNIQ_FINGERPRINTS;
462 }
463 $metrics = array(\Piwik\Metrics::INDEX_NB_USERS, $uniqueVisitorsMetric);
464 $uniques = $this->computeNbUniques($metrics, $sites);
465 // see edge case as described in https://github.com/piwik/piwik/issues/9357 where uniq_visitors might be higher
466 // than visits because we archive / process it after nb_visits. Between archiving nb_visits and nb_uniq_visitors
467 // there could have been a new visit leading to a higher nb_unique_visitors than nb_visits which is not possible
468 // by definition. In this case we simply use the visits metric instead of unique visitors metric.
469 $visits = $row->getColumn('nb_visits');
470 if ($visits !== \false && $uniques[$uniqueVisitorsMetric] !== \false) {
471 $uniques[$uniqueVisitorsMetric] = min($uniques[$uniqueVisitorsMetric], $visits);
472 }
473 $row->setColumn('nb_uniq_visitors', $uniques[$uniqueVisitorsMetric]);
474 $row->setColumn('nb_users', $uniques[\Piwik\Metrics::INDEX_NB_USERS]);
475 }
476 protected function guessOperationForColumn($column)
477 {
478 if (strpos($column, 'max_') === 0) {
479 return 'max';
480 }
481 if (strpos($column, 'min_') === 0) {
482 return 'min';
483 }
484 return 'sum';
485 }
486 private function getIdSitesToComputeNbUniques()
487 {
488 $params = $this->getParams();
489 $sites = array($params->getSite()->getId());
490 /**
491 * Triggered to change which site ids should be looked at when processing unique visitors and users.
492 *
493 * @param array &$idSites An array with one idSite. This site is being archived currently. To cancel the query
494 * you can change this value to an empty array. To include other sites in the query you
495 * can add more idSites to this list of idSites.
496 * @param Period $period The period that is being requested to be archived.
497 * @param Segment $segment The segment that is request to be archived.
498 */
499 \Piwik\Piwik::postEvent('ArchiveProcessor.ComputeNbUniques.getIdSites', array(&$sites, $params->getPeriod(), $params->getSegment()));
500 return $sites;
501 }
502 /**
503 * Processes number of unique visitors for the given period
504 *
505 * This is the only Period metric (ie. week/month/year/range) that we process from the logs directly,
506 * since unique visitors cannot be summed like other metrics.
507 *
508 * @param array $metrics Metrics Ids for which to aggregates count of values
509 * @param int[] $sites A list of idSites that should be included
510 * @return array|null An array of metrics, where the key is metricid and the value is the metric value or null if
511 * the query was cancelled and not executed.
512 */
513 protected function computeNbUniques($metrics, $sites)
514 {
515 $logAggregator = $this->getLogAggregator();
516 $sitesBackup = $logAggregator->getSites();
517 $logAggregator->setSites($sites);
518 try {
519 $query = $logAggregator->queryVisitsByDimension(array(), \false, array(), $metrics);
520 } finally {
521 $logAggregator->setSites($sitesBackup);
522 }
523 $data = $query->fetch();
524 return $data;
525 }
526 /**
527 * If the DataTable is a Map, sums all DataTable in the map and return the DataTable.
528 *
529 * @param $data DataTable|DataTable\Map
530 * @param $columnsToRenameAfterAggregation array
531 * @return DataTable
532 */
533 protected function getAggregatedDataTableMap($data, $columnsAggregationOperation)
534 {
535 $table = new \Piwik\DataTable();
536 if (!empty($columnsAggregationOperation)) {
537 $table->setMetadata(\Piwik\DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
538 }
539 if ($data instanceof \Piwik\DataTable\Map) {
540 // as $date => $tableToSum
541 $this->aggregatedDataTableMapsAsOne($data, $table);
542 } else {
543 $table->addDataTable($data);
544 }
545 return $table;
546 }
547 /**
548 * Aggregates the DataTable\Map into the destination $aggregated
549 * @param $map
550 * @param $aggregated
551 */
552 protected function aggregatedDataTableMapsAsOne(Map $map, \Piwik\DataTable $aggregated)
553 {
554 foreach ($map->getDataTables() as $tableToAggregate) {
555 if ($tableToAggregate instanceof Map) {
556 $this->aggregatedDataTableMapsAsOne($tableToAggregate, $aggregated);
557 } else {
558 $aggregated->addDataTable($tableToAggregate);
559 }
560 }
561 }
562 /**
563 * Note: public only for use in closure in PHP 5.3.
564 */
565 public function renameColumnsAfterAggregation(\Piwik\DataTable $table, $columnsToRenameAfterAggregation = null)
566 {
567 // Rename columns after aggregation
568 if (is_null($columnsToRenameAfterAggregation)) {
569 $columnsToRenameAfterAggregation = self::$columnsToRenameAfterAggregation;
570 }
571 if (empty($columnsToRenameAfterAggregation)) {
572 return;
573 }
574 foreach ($table->getRows() as $row) {
575 foreach ($columnsToRenameAfterAggregation as $oldName => $newName) {
576 $row->renameColumn($oldName, $newName);
577 }
578 $subTable = $row->getSubtable();
579 if ($subTable) {
580 $this->renameColumnsAfterAggregation($subTable, $columnsToRenameAfterAggregation);
581 }
582 }
583 }
584 protected function getAggregatedNumericMetrics($columns, $operationsToApply)
585 {
586 if (!is_array($columns)) {
587 $columns = array($columns);
588 }
589 $operationForColumn = $this->getOperationForColumns($columns, $operationsToApply);
590 $dataTable = $this->getArchive()->getDataTableFromNumeric($columns);
591 if ($dataTable->wasBuiltWithoutArchives()) {
592 return (new Row())->getColumns();
593 }
594 $results = $this->getAggregatedDataTableMap($dataTable, $operationForColumn);
595 if ($results->getRowsCount() > 1) {
596 throw new Exception("A DataTable is an unexpected state:" . var_export($results, \true));
597 }
598 $rowMetrics = $results->getFirstRow();
599 if ($rowMetrics === \false) {
600 $rowMetrics = new Row();
601 }
602 $this->enrichWithUniqueVisitorsMetric($rowMetrics);
603 $this->renameColumnsAfterAggregation($results, self::$columnsToRenameAfterAggregation);
604 $metrics = $rowMetrics->getColumns();
605 foreach ($columns as $name) {
606 if (!isset($metrics[$name])) {
607 $metrics[$name] = 0;
608 }
609 }
610 return $metrics;
611 }
612 /**
613 * Initiate archiving for a plugin during an ongoing archiving. The plugin can be another
614 * plugin or the same plugin.
615 *
616 * This method should be called during archiving when one plugin uses the report of another
617 * plugin with a segment. It will ensure reports for that segment & plugin will be archived
618 * without initiating archiving for every plugin with that segment (which would be a performance
619 * killer).
620 *
621 * @param string $plugin
622 * @param string $segment
623 */
624 public function processDependentArchive($plugin, $segment)
625 {
626 if (!self::$isRootArchivingRequest) {
627 // prevent all recursion
628 return;
629 }
630 $params = $this->getParams();
631 // range archives are always processed on demand, so pre-processing dependent archives is not required
632 // here
633 if (Rules::shouldProcessOnlyReportsRequestedInArchiveQuery($params->getPeriod()->getLabel())) {
634 return;
635 }
636 $idSites = [$params->getSite()->getId()];
637 // important to use the original segment string when combining. As the API itself would combine the original string.
638 // this prevents a bug where the API would use the segment
639 // userId!@%2540matomo.org;userId!=hello%2540matomo.org;visitorType==new
640 // vs here we would use
641 // userId!@%40matomo.org;userId!=hello%40matomo.org;visitorType==new
642 // thus these would result in different segment hashes and therefore the reports would either show 0 or archive the data twice
643 $originSegmentString = $params->getSegment()->getOriginalString();
644 $newSegment = \Piwik\Segment::combine($originSegmentString, SegmentExpression::AND_DELIMITER, $segment);
645 if (!empty($originSegmentString) && $newSegment === $segment && $params->getRequestedPlugin() === $plugin) {
646 // being processed now
647 return;
648 }
649 $newSegment = new \Piwik\Segment($newSegment, $idSites, $params->getDateTimeStart(), $params->getDateTimeEnd());
650 if (\Piwik\ArchiveProcessor\Rules::isSegmentPreProcessed($idSites, $newSegment)) {
651 // will be processed anyway
652 return;
653 }
654 // The below check is meant to avoid archiving the same dependency multiple times.
655 $processedSegmentKey = $params->getSite()->getId() . $params->getPeriod()->getDateStart() . $params->getPeriod()->getLabel() . $newSegment->getOriginalString();
656 if (in_array($processedSegmentKey . $plugin, $this->processedDependentSegments)) {
657 return;
658 }
659 self::$isRootArchivingRequest = \false;
660 try {
661 $invalidator = StaticContainer::get('Piwik\\Archive\\ArchiveInvalidator');
662 // Ensure to always invalidate VisitsSummary before any other plugin archive.
663 // Otherwise those archives might get build with outdated VisitsSummary data
664 if ($plugin !== 'VisitsSummary' && !in_array($processedSegmentKey . 'VisitsSummary', $this->processedDependentSegments)) {
665 $invalidator->markArchivesAsInvalidated($idSites, [$params->getPeriod()->getDateStart()], $params->getPeriod()->getLabel(), $newSegment, \false, \false, 'VisitsSummary', \false, \true);
666 $parameters = new \Piwik\ArchiveProcessor\Parameters($params->getSite(), $params->getPeriod(), $newSegment);
667 $parameters->onlyArchiveRequestedPlugin();
668 $archiveLoader = new \Piwik\ArchiveProcessor\Loader($parameters);
669 $archiveLoader->prepareArchive('VisitsSummary');
670 $this->processedDependentSegments[] = $processedSegmentKey . 'VisitsSummary';
671 }
672 $invalidator->markArchivesAsInvalidated($idSites, [$params->getPeriod()->getDateStart()], $params->getPeriod()->getLabel(), $newSegment, \false, \false, $plugin, \false, \true);
673 $parameters = new \Piwik\ArchiveProcessor\Parameters($params->getSite(), $params->getPeriod(), $newSegment);
674 $parameters->onlyArchiveRequestedPlugin();
675 $archiveLoader = new \Piwik\ArchiveProcessor\Loader($parameters);
676 $archiveLoader->prepareArchive($plugin);
677 $this->processedDependentSegments[] = $processedSegmentKey . $plugin;
678 } finally {
679 self::$isRootArchivingRequest = \true;
680 }
681 }
682 public function getArchiveWriter()
683 {
684 return $this->archiveWriter;
685 }
686 }
687