PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / 1.3.1
Matomo Analytics – Powerful, Privacy-First Insights for WordPress v1.3.1
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / RankingQuery.php
matomo / app / core Last commit date
API 6 years ago Access 6 years ago Application 6 years ago Archive 6 years ago ArchiveProcessor 6 years ago Archiver 6 years ago AssetManager 6 years ago Auth 6 years ago Category 6 years ago CliMulti 6 years ago Columns 6 years ago Composer 6 years ago Concurrency 6 years ago Config 6 years ago Container 6 years ago CronArchive 6 years ago DataAccess 5 years ago DataFiles 6 years ago DataTable 6 years ago Db 6 years ago DeviceDetector 5 years ago Email 6 years ago Exception 6 years ago Http 6 years ago Intl 6 years ago Mail 6 years ago Measurable 6 years ago Menu 6 years ago Metrics 6 years ago Notification 6 years ago Period 6 years ago Plugin 6 years ago ProfessionalServices 6 years ago Report 6 years ago ReportRenderer 6 years ago Scheduler 6 years ago Segment 6 years ago Session 6 years ago Settings 6 years ago Tracker 5 years ago Translation 6 years ago UpdateCheck 6 years ago Updater 6 years ago Updates 6 years ago Validators 6 years ago View 6 years ago ViewDataTable 6 years ago Visualization 6 years ago Widget 6 years ago .htaccess 6 years ago Access.php 6 years ago Archive.php 6 years ago ArchiveProcessor.php 6 years ago AssetManager.php 6 years ago Auth.php 6 years ago BaseFactory.php 6 years ago Cache.php 6 years ago CacheId.php 6 years ago CliMulti.php 6 years ago Common.php 6 years ago Config.php 6 years ago Console.php 6 years ago Context.php 6 years ago Cookie.php 5 years ago CronArchive.php 5 years ago DataArray.php 6 years ago DataTable.php 6 years ago Date.php 6 years ago Db.php 6 years ago DbHelper.php 6 years ago Development.php 6 years ago DeviceDetectorFactory.php 6 years ago ErrorHandler.php 6 years ago EventDispatcher.php 6 years ago ExceptionHandler.php 6 years ago FileIntegrity.php 6 years ago Filechecks.php 6 years ago Filesystem.php 6 years ago FrontController.php 6 years ago Http.php 6 years ago IP.php 6 years ago Log.php 6 years ago LogDeleter.php 6 years ago Mail.php 6 years ago Metrics.php 6 years ago MetricsFormatter.php 6 years ago Nonce.php 5 years ago Notification.php 6 years ago NumberFormatter.php 6 years ago Option.php 5 years ago Period.php 6 years ago Piwik.php 6 years ago Plugin.php 6 years ago Profiler.php 6 years ago ProxyHeaders.php 6 years ago ProxyHttp.php 6 years ago QuickForm2.php 6 years ago RankingQuery.php 6 years ago Registry.php 6 years ago ReportRenderer.php 6 years ago ScheduledTask.php 6 years ago Segment.php 6 years ago Sequence.php 6 years ago Session.php 6 years ago SettingsPiwik.php 6 years ago SettingsServer.php 6 years ago Singleton.php 6 years ago Site.php 6 years ago TCPDF.php 6 years ago TaskScheduler.php 6 years ago Theme.php 6 years ago Timer.php 6 years ago Tracker.php 6 years ago Translate.php 6 years ago Twig.php 6 years ago Unzip.php 6 years ago UpdateCheck.php 6 years ago Updater.php 6 years ago Updates.php 6 years ago Url.php 6 years ago UrlHelper.php 6 years ago Version.php 5 years ago View.php 6 years ago bootstrap.php 6 years ago dispatch.php 6 years ago testMinimumPhpVersion.php 6 years ago
RankingQuery.php
388 lines
1 <?php
2 /**
3 * Piwik - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9
10 namespace Piwik;
11
12 use Exception;
13
14 /**
15 * The ranking query class wraps an arbitrary SQL query with more SQL that limits
16 * the number of results while aggregating the rest in an a new "Others" row. It also
17 * allows for some more fancy things that can be configured via method calls of this
18 * class. The advanced use cases are explained in the doc comments of the methods.
19 *
20 * The general use case looks like this:
21 *
22 * // limit to 500 rows + "Others"
23 * $rankingQuery = new RankingQuery();
24 * $rankingQuery->setLimit(500);
25 *
26 * // idaction_url will be "Others" in the row that contains the aggregated rest
27 * $rankingQuery->addLabelColumn('idaction_url');
28 *
29 * // the actual query. it's important to sort it before the limit is applied
30 * $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits
31 * FROM log_link_visit_action
32 * GROUP BY idaction_url
33 * ORDER BY nb_hits DESC';
34 *
35 * // execute the query
36 * $rankingQuery->execute($sql);
37 *
38 * For more examples, see RankingQueryTest.php
39 *
40 * @api
41 */
42 class RankingQuery
43 {
44 // a special label used to mark the 'Others' row in a ranking query result set. this is mapped to the
45 // datatable summary row during archiving.
46 const LABEL_SUMMARY_ROW = '__mtm_ranking_query_others__';
47
48 /**
49 * Contains the labels of the inner query.
50 * Format: "label" => true (to make sure labels don't appear twice)
51 * @var array
52 */
53 private $labelColumns = array();
54
55 /**
56 * The columns of the inner query that are not labels
57 * Format: "label" => "aggregation function" or false for no aggregation
58 * @var array
59 */
60 private $additionalColumns = array();
61
62 /**
63 * The limit for each group
64 * @var int
65 */
66 private $limit = 5;
67
68 /**
69 * The name of the columns that marks rows to be excluded from the limit
70 * @var string
71 */
72 private $columnToMarkExcludedRows = false;
73
74 /**
75 * The column that is used to partition the result
76 * @var bool|string
77 */
78 private $partitionColumn = false;
79
80 /**
81 * The possible values for the column $this->partitionColumn
82 * @var array
83 */
84 private $partitionColumnValues = array();
85
86 /**
87 * The value to use in the label of the 'Others' row.
88 * @var string
89 */
90 private $othersLabelValue = self::LABEL_SUMMARY_ROW;
91
92 /**
93 * Constructor.
94 *
95 * @param int|false $limit The result row limit. See {@link setLimit()}.
96 */
97 public function __construct($limit = false)
98 {
99 if ($limit !== false) {
100 $this->setLimit($limit);
101 }
102 }
103
104 /**
105 * Set the limit after which everything is grouped to "Others".
106 *
107 * @param int $limit
108 */
109 public function setLimit($limit)
110 {
111 $this->limit = $limit;
112 }
113
114 /**
115 * Set the value to use for the label in the 'Others' row.
116 *
117 * @param string $value
118 */
119 public function setOthersLabel($value)
120 {
121 $this->othersLabelValue = $value;
122 }
123
124 /**
125 * Add a label column.
126 * Labels are the columns that are replaced with "Others" after the limit.
127 *
128 * @param string|array $labelColumn
129 */
130 public function addLabelColumn($labelColumn)
131 {
132 if (is_array($labelColumn)) {
133 foreach ($labelColumn as $label) {
134 $this->addLabelColumn($label);
135 }
136 return;
137 }
138 $this->labelColumns[$labelColumn] = true;
139 }
140
141 /**
142 * @return array
143 */
144 public function getLabelColumns()
145 {
146 return $this->labelColumns;
147 }
148
149 /**
150 * Add a column that has be added to the outer queries.
151 *
152 * @param $column
153 * @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others",
154 * eg, `'min'`, `'max'` or `'sum'`.
155 */
156 public function addColumn($column, $aggregationFunction = false)
157 {
158 if (is_array($column)) {
159 foreach ($column as $c) {
160 $this->addColumn($c, $aggregationFunction);
161 }
162 return;
163 }
164 $this->additionalColumns[$column] = $aggregationFunction;
165 }
166
167 /**
168 * Sets a column that will be used to filter the result into two categories.
169 * Rows where this column has a value > 0 will be removed from the result and put
170 * into another array. Both the result and the array of excluded rows are returned
171 * by {@link execute()}.
172 *
173 * @param $column string Name of the column.
174 * @throws Exception if method is used more than once.
175 */
176 public function setColumnToMarkExcludedRows($column)
177 {
178 if ($this->columnToMarkExcludedRows !== false) {
179 throw new Exception("setColumnToMarkExcludedRows can only be used once");
180 }
181
182 $this->columnToMarkExcludedRows = $column;
183 $this->addColumn($this->columnToMarkExcludedRows);
184 }
185
186 /**
187 * This method can be used to parition the result based on the possible values of one
188 * table column. This means the query will split the result set into other sets of rows
189 * for each possible value you provide (where the rows of each set have a column value
190 * that equals a possible value). Each of these new sets of rows will be individually
191 * limited resulting in several limited result sets.
192 *
193 * For example, you can run a query aggregating some data on the log_action table and
194 * partition by log_action.type with the possible values of {@link Piwik\Tracker\Action::TYPE_PAGE_URL},
195 * {@link Piwik\Tracker\Action::TYPE_OUTLINK}, {@link Piwik\Tracker\Action::TYPE_DOWNLOAD}.
196 * The result will be three separate result sets that are aggregated the same ways, but for rows
197 * where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for
198 * rows `log_action.type = TYPE_DOWNLOAD`.
199 *
200 * @param $partitionColumn string The column name to partion by.
201 * @param $possibleValues Array of possible column values.
202 * @throws Exception if method is used more than once.
203 */
204 public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues)
205 {
206 if ($this->partitionColumn !== false) {
207 throw new Exception("partitionResultIntoMultipleGroups can only be used once");
208 }
209
210 $this->partitionColumn = $partitionColumn;
211 $this->partitionColumnValues = $possibleValues;
212 $this->addColumn($partitionColumn);
213 }
214
215 /**
216 * Executes the query.
217 * The object has to be configured first using the other methods.
218 *
219 * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering
220 * has to be specified in this query. {@link RankingQuery} cannot apply ordering
221 * itself.
222 * @param $bind array Bindings for the inner query.
223 * @param int $timeLimitInMs Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimitInMs > 0
224 * @return array The format depends on which methods have been used
225 * to configure the ranking query.
226 */
227 public function execute($innerQuery, $bind = array(), $timeLimitInMs = 0)
228 {
229 $query = $this->generateRankingQuery($innerQuery);
230 $query = DbHelper::addMaxExecutionTimeHintToQuery($query, $timeLimitInMs);
231
232 $data = Db::getReader()->fetchAll($query, $bind);
233
234 if ($this->columnToMarkExcludedRows !== false) {
235 // split the result into the regular result and the rows with special treatment
236 $excludedFromLimit = array();
237 $result = array();
238 foreach ($data as &$row) {
239 if ($row[$this->columnToMarkExcludedRows] != 0) {
240 $excludedFromLimit[] = $row;
241 } else {
242 $result[] = $row;
243 }
244 }
245 $data = array(
246 'result' => &$result,
247 'excludedFromLimit' => &$excludedFromLimit
248 );
249 }
250
251 if ($this->partitionColumn !== false) {
252 if ($this->columnToMarkExcludedRows !== false) {
253 $data['result'] = $this->splitPartitions($data['result']);
254 } else {
255 $data = $this->splitPartitions($data);
256 }
257 }
258
259 return $data;
260 }
261
262 private function splitPartitions(&$data)
263 {
264 $result = array();
265 foreach ($data as &$row) {
266 $partition = $row[$this->partitionColumn];
267 if (!isset($result[$partition])) {
268 $result[$partition] = array();
269 }
270 $result[$partition][] = & $row;
271 }
272 return $result;
273 }
274
275 /**
276 * Generate the SQL code that does the magic.
277 * If you want to get the result, use execute() instead. If you want to run the query
278 * yourself, use this method.
279 *
280 * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering
281 * has to be specified in this query. {@link RankingQuery} cannot apply ordering
282 * itself.
283 * @return string The entire ranking query SQL.
284 */
285 public function generateRankingQuery($innerQuery)
286 {
287 // +1 to include "Others"
288 $limit = $this->limit + 1;
289 $counterExpression = $this->getCounterExpression($limit);
290
291 // generate select clauses for label columns
292 $labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`';
293 $labelColumnsOthersSwitch = array();
294 foreach ($this->labelColumns as $column => $true) {
295 $labelColumnsOthersSwitch[] = "
296 CASE
297 WHEN counter = $limit THEN '" . $this->othersLabelValue . "'
298 ELSE `$column`
299 END AS `$column`
300 ";
301 }
302 $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch);
303
304 // generate select clauses for additional columns
305 $additionalColumnsString = '';
306 $additionalColumnsAggregatedString = '';
307 foreach ($this->additionalColumns as $additionalColumn => $aggregation) {
308 $additionalColumnsString .= ', `' . $additionalColumn . '`';
309 if ($aggregation !== false) {
310 $additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`';
311 } else {
312 $additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`';
313 }
314 }
315
316 // initialize the counters
317 if ($this->partitionColumn !== false) {
318 $initCounter = '';
319 foreach ($this->partitionColumnValues as $value) {
320 $initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', ';
321 }
322 } else {
323 $initCounter = '( SELECT @counter:=0 ) initCounter,';
324 }
325
326 // add a counter to the query
327 // we rely on the sorting of the inner query
328 $withCounter = "
329 SELECT
330 $labelColumnsString,
331 $counterExpression AS counter
332 $additionalColumnsString
333 FROM
334 $initCounter
335 ( $innerQuery ) actualQuery
336 ";
337
338 // group by the counter - this groups "Others" because the counter stops at $limit
339 $groupBy = 'counter';
340 if ($this->partitionColumn !== false) {
341 $groupBy .= ', `' . $this->partitionColumn . '`';
342 }
343 $groupOthers = "
344 SELECT
345 $labelColumnsOthersSwitch
346 $additionalColumnsAggregatedString
347 FROM ( $withCounter ) AS withCounter
348 GROUP BY $groupBy
349 ";
350 return $groupOthers;
351 }
352
353 private function getCounterExpression($limit)
354 {
355 $whens = array();
356
357 if ($this->columnToMarkExcludedRows !== false) {
358 // when a row has been specified that marks which records should be excluded
359 // from limiting, we don't give those rows the normal counter but -1 times the
360 // value they had before. this way, they have a separate number space (i.e. negative
361 // integers).
362 $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}";
363 }
364
365 if ($this->partitionColumn !== false) {
366 // partition: one counter per possible value
367 foreach ($this->partitionColumnValues as $value) {
368 $isValue = '`' . $this->partitionColumn . '` = ' . intval($value);
369 $counter = '@counter' . intval($value);
370 $whens[] = "WHEN $isValue AND $counter = $limit THEN $limit";
371 $whens[] = "WHEN $isValue THEN $counter:=$counter+1";
372 }
373 $whens[] = "ELSE 0";
374 } else {
375 // no partitioning: add a single counter
376 $whens[] = "WHEN @counter = $limit THEN $limit";
377 $whens[] = "ELSE @counter:=@counter+1";
378 }
379
380 return "
381 CASE
382 " . implode("
383 ", $whens) . "
384 END
385 ";
386 }
387 }
388