PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / trunk
Matomo Analytics – Powerful, Privacy-First Insights for WordPress vtrunk
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / RankingQuery.php
matomo / app / core Last commit date
API 1 month ago Access 3 months ago Application 1 month ago Archive 1 month ago ArchiveProcessor 1 month ago Archiver 2 years ago AssetManager 1 month ago Auth 6 months ago Category 6 months ago Changes 1 month ago CliMulti 1 year ago Columns 1 month ago Concurrency 1 month ago Config 1 month ago Container 1 month ago CronArchive 3 months ago DataAccess 1 month ago DataFiles 2 years ago DataTable 2 weeks ago Db 2 weeks ago DeviceDetector 1 year ago Email 2 years ago Exception 4 months ago Http 4 months ago Intl 3 months ago Log 2 years ago Mail 1 year ago Measurable 6 months ago Menu 1 month ago Metrics 3 months ago Notification 6 months ago Period 1 month ago Plugin 2 weeks ago Policy 1 month ago ProfessionalServices 1 year ago Report 1 year ago ReportRenderer 3 months ago Request 3 months ago Scheduler 1 month ago Segment 1 month ago Session 2 weeks ago Settings 1 month ago Tracker 2 weeks ago Translation 1 month ago Twig 1 year ago UpdateCheck 3 months ago Updater 1 month ago Updates 2 days ago Validators 1 year ago View 1 month ago ViewDataTable 2 weeks ago Visualization 1 year ago Widget 1 month ago .htaccess 2 years ago Access.php 1 month ago Archive.php 1 month ago ArchiveProcessor.php 1 month ago AssetManager.php 1 month ago Auth.php 6 months ago AuthResult.php 6 months ago BaseFactory.php 2 years ago Cache.php 2 years ago CacheId.php 4 months ago CliMulti.php 1 month ago Common.php 2 weeks ago Config.php 1 month ago Console.php 3 months ago Context.php 2 years ago Cookie.php 1 year ago CronArchive.php 1 month ago DI.php 3 months ago DataArray.php 1 month ago DataTable.php 1 month ago Date.php 1 month ago Db.php 1 month ago DbHelper.php 1 month ago Development.php 1 year ago ErrorHandler.php 6 months ago EventDispatcher.php 1 month ago ExceptionHandler.php 4 months ago FileIntegrity.php 1 month ago Filechecks.php 1 year ago Filesystem.php 1 month ago FrontController.php 4 months ago Http.php 1 month ago IP.php 1 year ago Log.php 3 months ago LogDeleter.php 1 year ago Mail.php 1 year ago Metrics.php 1 month ago NoAccessException.php 2 years ago Nonce.php 6 months ago Notification.php 1 month ago NumberFormatter.php 5 months ago Option.php 5 months ago Period.php 1 month ago Piwik.php 1 month ago Plugin.php 1 month ago Process.php 1 month ago Profiler.php 6 months ago ProxyHeaders.php 4 months ago ProxyHttp.php 5 months ago QuickForm2.php 3 months ago RankingQuery.php 1 month ago ReportRenderer.php 1 month ago Request.php 1 month ago Segment.php 1 month ago Sequence.php 6 months ago Session.php 2 weeks ago SettingsPiwik.php 1 month ago SettingsServer.php 1 year ago Singleton.php 2 years ago Site.php 1 month ago SiteContentDetector.php 1 month ago SupportedBrowser.php 2 years ago TCPDF.php 1 year ago Theme.php 1 year ago Timer.php 1 month ago Tracker.php 1 month ago Twig.php 1 month ago Unzip.php 1 year ago UpdateCheck.php 1 month ago Updater.php 1 month ago UpdaterErrorException.php 2 years ago Updates.php 3 months ago Url.php 3 months ago UrlHelper.php 1 month ago Version.php 2 days ago View.php 1 month ago bootstrap.php 1 year ago dispatch.php 2 years ago testMinimumPhpVersion.php 6 months ago
RankingQuery.php
391 lines
1 <?php
2
3 /**
4 * Matomo - free/libre analytics platform
5 *
6 * @link https://matomo.org
7 * @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
8 */
9 namespace Piwik;
10
11 use Exception;
12 use Piwik\Db\Schema;
13 /**
14 * The ranking query class wraps an arbitrary SQL query with more SQL that limits
15 * the number of results while aggregating the rest in an a new "Others" row. It also
16 * allows for some more fancy things that can be configured via method calls of this
17 * class. The advanced use cases are explained in the doc comments of the methods.
18 *
19 * The general use case looks like this:
20 *
21 * // limit to 500 rows + "Others"
22 * $rankingQuery = new RankingQuery();
23 * $rankingQuery->setLimit(500);
24 *
25 * // idaction_url will be "Others" in the row that contains the aggregated rest
26 * $rankingQuery->addLabelColumn('idaction_url');
27 *
28 * // the actual query. it's important to sort it before the limit is applied
29 * $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits
30 * FROM log_link_visit_action
31 * GROUP BY idaction_url
32 * ORDER BY nb_hits DESC';
33 *
34 * // execute the query
35 * $rankingQuery->execute($sql);
36 *
37 * For more examples, see RankingQueryTest.php
38 *
39 * @api
40 */
41 class RankingQuery
42 {
43 // a special label used to mark the 'Others' row in a ranking query result set. this is mapped to the
44 // datatable summary row during archiving.
45 public const LABEL_SUMMARY_ROW = '__mtm_ranking_query_others__';
46 /**
47 * Contains the labels of the inner query.
48 * Format: "label" => true (to make sure labels don't appear twice)
49 * @var array
50 */
51 private $labelColumns = array();
52 /**
53 * The columns of the inner query that are not labels
54 * Format: "label" => "aggregation function" or false for no aggregation
55 * @var array
56 */
57 private $additionalColumns = array();
58 /**
59 * The limit for each group
60 * @var int
61 */
62 private $limit = 5;
63 /**
64 * The name of the columns that marks rows to be excluded from the limit
65 * @var string|false
66 */
67 private $columnToMarkExcludedRows = \false;
68 /**
69 * The column that is used to partition the result
70 * @var string|false
71 */
72 private $partitionColumn = \false;
73 /**
74 * The possible values for the column $this->partitionColumn
75 * @var array
76 */
77 private $partitionColumnValues = array();
78 /**
79 * The value to use in the label of the 'Others' row.
80 * @var string
81 */
82 private $othersLabelValue = self::LABEL_SUMMARY_ROW;
83 /**
84 * @param int|false $limit The result row limit. See {@link setLimit()}.
85 */
86 public function __construct($limit = \false)
87 {
88 if ($limit !== \false) {
89 $this->setLimit($limit);
90 }
91 }
92 /**
93 * Set the limit after which everything is grouped to "Others".
94 *
95 * @param int $limit
96 */
97 public function setLimit($limit)
98 {
99 $this->limit = $limit;
100 }
101 /**
102 * Set the value to use for the label in the 'Others' row.
103 *
104 * @param string $value
105 */
106 public function setOthersLabel($value)
107 {
108 $this->othersLabelValue = $value;
109 }
110 /**
111 * Add a label column.
112 * Labels are the columns that are replaced with "Others" after the limit.
113 *
114 * @param string|array $labelColumn
115 */
116 public function addLabelColumn($labelColumn)
117 {
118 if (is_array($labelColumn)) {
119 foreach ($labelColumn as $label) {
120 $this->addLabelColumn($label);
121 }
122 return;
123 }
124 $this->labelColumns[$labelColumn] = \true;
125 }
126 /**
127 * @return array
128 */
129 public function getLabelColumns()
130 {
131 return $this->labelColumns;
132 }
133 /**
134 * Add a column that has be added to the outer queries.
135 *
136 * @param $column
137 * @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others",
138 * eg, `'min'`, `'max'` or `'sum'`.
139 */
140 public function addColumn($column, $aggregationFunction = \false)
141 {
142 if (is_array($column)) {
143 foreach ($column as $c) {
144 $this->addColumn($c, $aggregationFunction);
145 }
146 return;
147 }
148 $this->additionalColumns[$column] = $aggregationFunction;
149 }
150 /**
151 * Sets a column that will be used to filter the result into two categories.
152 * Rows where this column has a value > 0 will be removed from the result and put
153 * into another array. Both the result and the array of excluded rows are returned
154 * by {@link execute()}.
155 *
156 * @param string $column Name of the column.
157 * @throws Exception if method is used more than once.
158 */
159 public function setColumnToMarkExcludedRows($column)
160 {
161 if ($this->columnToMarkExcludedRows !== \false) {
162 throw new Exception("setColumnToMarkExcludedRows can only be used once");
163 }
164 $this->columnToMarkExcludedRows = $column;
165 $this->addColumn($this->columnToMarkExcludedRows);
166 }
167 /**
168 * This method can be used to partition the result based on the possible values of one
169 * table column. This means the query will split the result set into other sets of rows
170 * for each possible value you provide (where the rows of each set have a column value
171 * that equals a possible value). Each of these new sets of rows will be individually
172 * limited resulting in several limited result sets.
173 *
174 * For example, you can run a query aggregating some data on the log_action table and
175 * partition by log_action.type with the possible values of {@link Piwik\Tracker\Action::TYPE_PAGE_URL},
176 * {@link Piwik\Tracker\Action::TYPE_OUTLINK}, {@link Piwik\Tracker\Action::TYPE_DOWNLOAD}.
177 * The result will be three separate result sets that are aggregated the same ways, but for rows
178 * where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for
179 * rows `log_action.type = TYPE_DOWNLOAD`.
180 *
181 * @param string $partitionColumn The column name to partition by.
182 * @param array $possibleValues Array of possible column values.
183 * @throws Exception if method is used more than once.
184 */
185 public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues)
186 {
187 if ($this->partitionColumn !== \false) {
188 throw new Exception("partitionResultIntoMultipleGroups can only be used once");
189 }
190 $this->partitionColumn = $partitionColumn;
191 $this->partitionColumnValues = $possibleValues;
192 $this->addColumn($partitionColumn);
193 }
194 /**
195 * Executes the query.
196 * The object has to be configured first using the other methods.
197 *
198 * @param string $innerQuery The "payload" query that does the actual data aggregation. The ordering
199 * has to be specified in this query. {@link RankingQuery} cannot apply ordering
200 * itself.
201 * @param array $bind Bindings for the inner query.
202 * @param int $timeLimit Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimit > 0
203 * for more details see {@link DbHelper::addMaxExecutionTimeHintToQuery}
204 * @return array The format depends on which methods have been used
205 * to configure the ranking query.
206 */
207 public function execute($innerQuery, $bind = array(), $timeLimit = 0)
208 {
209 $query = $this->generateRankingQuery($innerQuery);
210 $query = \Piwik\DbHelper::addMaxExecutionTimeHintToQuery($query, $timeLimit);
211 $data = \Piwik\Db::getReader()->fetchAll($query, $bind);
212 if ($this->columnToMarkExcludedRows !== \false) {
213 // split the result into the regular result and the rows with special treatment
214 $excludedFromLimit = array();
215 $result = array();
216 foreach ($data as &$row) {
217 if ($row[$this->columnToMarkExcludedRows] != 0) {
218 $excludedFromLimit[] = $row;
219 } else {
220 $result[] = $row;
221 }
222 }
223 $data = array('result' => &$result, 'excludedFromLimit' => &$excludedFromLimit);
224 }
225 if ($this->partitionColumn !== \false) {
226 if ($this->columnToMarkExcludedRows !== \false) {
227 $data['result'] = $this->splitPartitions($data['result']);
228 } else {
229 $data = $this->splitPartitions($data);
230 }
231 }
232 return $data;
233 }
234 private function splitPartitions(&$data)
235 {
236 $result = array();
237 foreach ($data as &$row) {
238 $partition = $row[$this->partitionColumn];
239 if (!isset($result[$partition])) {
240 $result[$partition] = array();
241 }
242 $result[$partition][] =& $row;
243 }
244 return $result;
245 }
246 /**
247 * Generate the SQL code that does the magic.
248 * If you want to get the result, use execute() instead. If you want to run the query
249 * yourself, use this method.
250 *
251 * @param string $innerQuery The "payload" query that does the actual data aggregation. The ordering
252 * has to be specified in this query. {@link RankingQuery} cannot apply ordering
253 * itself.
254 * @param bool $withRollup A flag which determines whether to generate the SQL query using ROLLUP
255 * @return string The entire ranking query SQL.
256 */
257 public function generateRankingQuery($innerQuery, bool $withRollup = \false)
258 {
259 // +1 to include "Others"
260 $limit = $this->limit + 1;
261 $counterExpression = $this->getCounterExpression($limit, $withRollup);
262 // generate select clauses for label columns
263 $labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`';
264 $labelColumnsOthersSwitch = [];
265 $withRollupColumns = [];
266 $withRollupOthersGroupBy = [];
267 foreach (array_keys($this->labelColumns) as $column) {
268 if ($withRollup) {
269 if ([] === $withRollupColumns) {
270 // support "Others" row for first label column
271 $rollupWhen = "\n WHEN counterRollup = {$limit} THEN '" . $this->othersLabelValue . "'\n WHEN counterRollup > 0 THEN `{$column}`\n WHEN counter = {$limit} AND counterRollup = 0 THEN `{$column}`\n WHEN counter = {$limit} THEN '" . $this->othersLabelValue . "'\n ";
272 } else {
273 // support "Others" row for secondary label columns
274 $rollupWhen = "\n WHEN `{$column}` IS NULL THEN NULL\n WHEN counter = {$limit} AND counterRollup = 0 THEN '" . $this->othersLabelValue . "'\n ";
275 }
276 $switch = "\n CASE\n {$rollupWhen}\n ELSE `{$column}`\n END\n ";
277 $labelColumnsOthersSwitch[] = "{$switch} AS `{$column}`";
278 $withRollupColumns[] = $column;
279 $withRollupOthersGroupBy[] = $switch;
280 } else {
281 $labelColumnsOthersSwitch[] = "\n CASE\n WHEN counter = {$limit} THEN '" . $this->othersLabelValue . "'\n ELSE `{$column}`\n END AS `{$column}`\n ";
282 }
283 }
284 $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch);
285 // generate select clauses for additional columns
286 $additionalColumnsString = '';
287 $additionalColumnsAggregatedString = '';
288 foreach ($this->additionalColumns as $additionalColumn => $aggregation) {
289 $additionalColumnsString .= ', `' . $additionalColumn . '`';
290 if ($aggregation !== \false) {
291 $additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`';
292 } else {
293 $additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`';
294 }
295 }
296 // initialize the counters
297 if ($this->partitionColumn !== \false) {
298 $initCounter = '';
299 foreach ($this->partitionColumnValues as $value) {
300 $initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', ';
301 }
302 } else {
303 $initCounter = '( SELECT @counter:=0 ) initCounter,';
304 }
305 $counterRollupExpression = '';
306 if ($withRollup) {
307 $initCounter .= ' ( SELECT @counterRollup:=0 ) initCounterRollup,';
308 $counterRollupWhen = '';
309 if (count($withRollupColumns) >= 2) {
310 $counterRollupWhen = "\n WHEN `" . implode('` IS NULL AND `', $withRollupColumns) . "` IS NULL THEN -1\n ";
311 }
312 foreach ($withRollupColumns as $withRollupColumn) {
313 $counterRollupWhen .= "\n WHEN `{$withRollupColumn}` IS NULL AND @counterRollup = {$limit} THEN {$limit}\n WHEN `{$withRollupColumn}` IS NULL THEN @counterRollup := @counterRollup + 1\n ";
314 }
315 $counterRollupExpression = "\n , CASE\n {$counterRollupWhen}\n ELSE 0\n END AS counterRollup\n ";
316 }
317 if (\false === strpos($innerQuery, ' LIMIT ') && !Schema::getInstance()->supportsSortingInSubquery()) {
318 // Setting a limit for the inner query forces the optimizer to use a temporary table, which uses the sorting
319 $innerQuery .= ' LIMIT 18446744073709551615';
320 }
321 // add a counter to the query
322 // we rely on the sorting of the inner query
323 $withCounter = "\n\t\t\tSELECT\n\t\t\t\t{$labelColumnsString},\n\t\t\t\t{$counterExpression} AS counter\n\t\t\t\t{$counterRollupExpression}\n\t\t\t\t{$additionalColumnsString}\n\t\t\tFROM\n\t\t\t\t{$initCounter}\n\t\t\t\t( {$innerQuery} ) actualQuery\n\t\t";
324 if ($withRollup && !Schema::getInstance()->supportsRankingRollupWithoutExtraSorting()) {
325 // MariaDB requires an additional sorting layer to return
326 // the counter/counterRollup values we expect
327 $rollupColumnSorts = [];
328 foreach ($withRollupColumns as $withRollupColumn) {
329 $rollupColumnSorts[] = "`{$withRollupColumn}` IS NULL";
330 }
331 $withCounter .= ' ORDER BY ' . implode(', ', $rollupColumnSorts);
332 $innerQueryOrderBy = \Piwik\DbHelper::extractOrderByFromQuery($innerQuery);
333 if (null !== $innerQueryOrderBy) {
334 // copy ORDER BY from inner query to rollup sorting
335 $withCounter .= ', ' . $innerQueryOrderBy;
336 }
337 }
338 // group by the counter - this groups "Others" because the counter stops at $limit
339 $groupBy = 'counter';
340 if ($withRollup) {
341 // group rollups additionally by the rollup counter and the
342 // full "Others" switch to ensure correct secondary level "Others" calculation
343 $groupBy .= ', counterRollup, ' . implode(', ', $withRollupOthersGroupBy);
344 }
345 if ($this->partitionColumn !== \false) {
346 $groupBy .= ', `' . $this->partitionColumn . '`';
347 }
348 $groupOthers = "\n\t\t\tSELECT\n\t\t\t\t{$labelColumnsOthersSwitch}\n\t\t\t\t{$additionalColumnsAggregatedString}\n\t\t\tFROM ( {$withCounter} ) AS withCounter\n\t\t\tGROUP BY {$groupBy}\n\t\t";
349 if ($withRollup) {
350 // Sort the final result if a rollup was used
351 // to ensure rollup values are returned first, and "Others" last
352 $groupOthers .= " ORDER BY counter, counterRollup";
353 } elseif (!Schema::getInstance()->supportsSortingInSubquery()) {
354 // When subqueries aren't sorted, we need to sort the result manually again
355 $groupOthers .= " ORDER BY counter";
356 }
357 return $groupOthers;
358 }
359 private function getCounterExpression($limit, bool $withRollup = \false)
360 {
361 $whens = array();
362 if ($this->columnToMarkExcludedRows !== \false) {
363 // when a row has been specified that marks which records should be excluded
364 // from limiting, we don't give those rows the normal counter but -1 times the
365 // value they had before. this way, they have a separate number space (i.e. negative
366 // integers).
367 $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}";
368 }
369 if ($withRollup) {
370 foreach (array_keys($this->labelColumns) as $column) {
371 $whens[] = "WHEN `{$column}` IS NULL THEN -1";
372 }
373 }
374 if ($this->partitionColumn !== \false) {
375 // partition: one counter per possible value
376 foreach ($this->partitionColumnValues as $value) {
377 $isValue = '`' . $this->partitionColumn . '` = ' . intval($value);
378 $counter = '@counter' . intval($value);
379 $whens[] = "WHEN {$isValue} AND {$counter} = {$limit} THEN {$limit}";
380 $whens[] = "WHEN {$isValue} THEN {$counter}:={$counter}+1";
381 }
382 $whens[] = "ELSE 0";
383 } else {
384 // no partitioning: add a single counter
385 $whens[] = "WHEN @counter = {$limit} THEN {$limit}";
386 $whens[] = "ELSE @counter:=@counter+1";
387 }
388 return "\n\t\t\tCASE\n\t\t\t\t" . implode("\n\t\t\t\t", $whens) . "\n\t\t\tEND\n\t\t";
389 }
390 }
391