API
6 years ago
Access
6 years ago
Application
6 years ago
Archive
6 years ago
ArchiveProcessor
6 years ago
Archiver
6 years ago
AssetManager
6 years ago
Auth
6 years ago
Category
6 years ago
CliMulti
6 years ago
Columns
6 years ago
Composer
6 years ago
Concurrency
6 years ago
Config
6 years ago
Container
6 years ago
CronArchive
6 years ago
DataAccess
5 years ago
DataFiles
6 years ago
DataTable
6 years ago
Db
6 years ago
DeviceDetector
5 years ago
Email
6 years ago
Exception
6 years ago
Http
6 years ago
Intl
6 years ago
Mail
6 years ago
Measurable
6 years ago
Menu
6 years ago
Metrics
6 years ago
Notification
6 years ago
Period
6 years ago
Plugin
6 years ago
ProfessionalServices
6 years ago
Report
6 years ago
ReportRenderer
6 years ago
Scheduler
6 years ago
Segment
6 years ago
Session
6 years ago
Settings
6 years ago
Tracker
5 years ago
Translation
6 years ago
UpdateCheck
6 years ago
Updater
6 years ago
Updates
6 years ago
Validators
6 years ago
View
6 years ago
ViewDataTable
6 years ago
Visualization
6 years ago
Widget
6 years ago
.htaccess
6 years ago
Access.php
6 years ago
Archive.php
6 years ago
ArchiveProcessor.php
6 years ago
AssetManager.php
6 years ago
Auth.php
6 years ago
BaseFactory.php
6 years ago
Cache.php
6 years ago
CacheId.php
6 years ago
CliMulti.php
6 years ago
Common.php
6 years ago
Config.php
6 years ago
Console.php
6 years ago
Context.php
6 years ago
Cookie.php
5 years ago
CronArchive.php
5 years ago
DataArray.php
6 years ago
DataTable.php
6 years ago
Date.php
6 years ago
Db.php
6 years ago
DbHelper.php
6 years ago
Development.php
6 years ago
DeviceDetectorFactory.php
6 years ago
ErrorHandler.php
6 years ago
EventDispatcher.php
6 years ago
ExceptionHandler.php
6 years ago
FileIntegrity.php
6 years ago
Filechecks.php
6 years ago
Filesystem.php
6 years ago
FrontController.php
6 years ago
Http.php
6 years ago
IP.php
6 years ago
Log.php
6 years ago
LogDeleter.php
6 years ago
Mail.php
6 years ago
Metrics.php
6 years ago
MetricsFormatter.php
6 years ago
Nonce.php
5 years ago
Notification.php
6 years ago
NumberFormatter.php
6 years ago
Option.php
5 years ago
Period.php
6 years ago
Piwik.php
6 years ago
Plugin.php
6 years ago
Profiler.php
6 years ago
ProxyHeaders.php
6 years ago
ProxyHttp.php
6 years ago
QuickForm2.php
6 years ago
RankingQuery.php
6 years ago
Registry.php
6 years ago
ReportRenderer.php
6 years ago
ScheduledTask.php
6 years ago
Segment.php
6 years ago
Sequence.php
6 years ago
Session.php
6 years ago
SettingsPiwik.php
6 years ago
SettingsServer.php
6 years ago
Singleton.php
6 years ago
Site.php
6 years ago
TCPDF.php
6 years ago
TaskScheduler.php
6 years ago
Theme.php
6 years ago
Timer.php
6 years ago
Tracker.php
6 years ago
Translate.php
6 years ago
Twig.php
6 years ago
Unzip.php
6 years ago
UpdateCheck.php
6 years ago
Updater.php
6 years ago
Updates.php
6 years ago
Url.php
6 years ago
UrlHelper.php
6 years ago
Version.php
5 years ago
View.php
6 years ago
bootstrap.php
6 years ago
dispatch.php
6 years ago
testMinimumPhpVersion.php
6 years ago
RankingQuery.php
388 lines
| 1 | <?php |
| 2 | /** |
| 3 | * Piwik - free/libre analytics platform |
| 4 | * |
| 5 | * @link https://matomo.org |
| 6 | * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 7 | * |
| 8 | */ |
| 9 | |
| 10 | namespace Piwik; |
| 11 | |
| 12 | use Exception; |
| 13 | |
| 14 | /** |
| 15 | * The ranking query class wraps an arbitrary SQL query with more SQL that limits |
| 16 | * the number of results while aggregating the rest in an a new "Others" row. It also |
| 17 | * allows for some more fancy things that can be configured via method calls of this |
| 18 | * class. The advanced use cases are explained in the doc comments of the methods. |
| 19 | * |
| 20 | * The general use case looks like this: |
| 21 | * |
| 22 | * // limit to 500 rows + "Others" |
| 23 | * $rankingQuery = new RankingQuery(); |
| 24 | * $rankingQuery->setLimit(500); |
| 25 | * |
| 26 | * // idaction_url will be "Others" in the row that contains the aggregated rest |
| 27 | * $rankingQuery->addLabelColumn('idaction_url'); |
| 28 | * |
| 29 | * // the actual query. it's important to sort it before the limit is applied |
| 30 | * $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits |
| 31 | * FROM log_link_visit_action |
| 32 | * GROUP BY idaction_url |
| 33 | * ORDER BY nb_hits DESC'; |
| 34 | * |
| 35 | * // execute the query |
| 36 | * $rankingQuery->execute($sql); |
| 37 | * |
| 38 | * For more examples, see RankingQueryTest.php |
| 39 | * |
| 40 | * @api |
| 41 | */ |
| 42 | class RankingQuery |
| 43 | { |
| 44 | // a special label used to mark the 'Others' row in a ranking query result set. this is mapped to the |
| 45 | // datatable summary row during archiving. |
| 46 | const LABEL_SUMMARY_ROW = '__mtm_ranking_query_others__'; |
| 47 | |
| 48 | /** |
| 49 | * Contains the labels of the inner query. |
| 50 | * Format: "label" => true (to make sure labels don't appear twice) |
| 51 | * @var array |
| 52 | */ |
| 53 | private $labelColumns = array(); |
| 54 | |
| 55 | /** |
| 56 | * The columns of the inner query that are not labels |
| 57 | * Format: "label" => "aggregation function" or false for no aggregation |
| 58 | * @var array |
| 59 | */ |
| 60 | private $additionalColumns = array(); |
| 61 | |
| 62 | /** |
| 63 | * The limit for each group |
| 64 | * @var int |
| 65 | */ |
| 66 | private $limit = 5; |
| 67 | |
| 68 | /** |
| 69 | * The name of the columns that marks rows to be excluded from the limit |
| 70 | * @var string |
| 71 | */ |
| 72 | private $columnToMarkExcludedRows = false; |
| 73 | |
| 74 | /** |
| 75 | * The column that is used to partition the result |
| 76 | * @var bool|string |
| 77 | */ |
| 78 | private $partitionColumn = false; |
| 79 | |
| 80 | /** |
| 81 | * The possible values for the column $this->partitionColumn |
| 82 | * @var array |
| 83 | */ |
| 84 | private $partitionColumnValues = array(); |
| 85 | |
| 86 | /** |
| 87 | * The value to use in the label of the 'Others' row. |
| 88 | * @var string |
| 89 | */ |
| 90 | private $othersLabelValue = self::LABEL_SUMMARY_ROW; |
| 91 | |
| 92 | /** |
| 93 | * Constructor. |
| 94 | * |
| 95 | * @param int|false $limit The result row limit. See {@link setLimit()}. |
| 96 | */ |
| 97 | public function __construct($limit = false) |
| 98 | { |
| 99 | if ($limit !== false) { |
| 100 | $this->setLimit($limit); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | /** |
| 105 | * Set the limit after which everything is grouped to "Others". |
| 106 | * |
| 107 | * @param int $limit |
| 108 | */ |
| 109 | public function setLimit($limit) |
| 110 | { |
| 111 | $this->limit = $limit; |
| 112 | } |
| 113 | |
| 114 | /** |
| 115 | * Set the value to use for the label in the 'Others' row. |
| 116 | * |
| 117 | * @param string $value |
| 118 | */ |
| 119 | public function setOthersLabel($value) |
| 120 | { |
| 121 | $this->othersLabelValue = $value; |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * Add a label column. |
| 126 | * Labels are the columns that are replaced with "Others" after the limit. |
| 127 | * |
| 128 | * @param string|array $labelColumn |
| 129 | */ |
| 130 | public function addLabelColumn($labelColumn) |
| 131 | { |
| 132 | if (is_array($labelColumn)) { |
| 133 | foreach ($labelColumn as $label) { |
| 134 | $this->addLabelColumn($label); |
| 135 | } |
| 136 | return; |
| 137 | } |
| 138 | $this->labelColumns[$labelColumn] = true; |
| 139 | } |
| 140 | |
| 141 | /** |
| 142 | * @return array |
| 143 | */ |
| 144 | public function getLabelColumns() |
| 145 | { |
| 146 | return $this->labelColumns; |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * Add a column that has be added to the outer queries. |
| 151 | * |
| 152 | * @param $column |
| 153 | * @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others", |
| 154 | * eg, `'min'`, `'max'` or `'sum'`. |
| 155 | */ |
| 156 | public function addColumn($column, $aggregationFunction = false) |
| 157 | { |
| 158 | if (is_array($column)) { |
| 159 | foreach ($column as $c) { |
| 160 | $this->addColumn($c, $aggregationFunction); |
| 161 | } |
| 162 | return; |
| 163 | } |
| 164 | $this->additionalColumns[$column] = $aggregationFunction; |
| 165 | } |
| 166 | |
| 167 | /** |
| 168 | * Sets a column that will be used to filter the result into two categories. |
| 169 | * Rows where this column has a value > 0 will be removed from the result and put |
| 170 | * into another array. Both the result and the array of excluded rows are returned |
| 171 | * by {@link execute()}. |
| 172 | * |
| 173 | * @param $column string Name of the column. |
| 174 | * @throws Exception if method is used more than once. |
| 175 | */ |
| 176 | public function setColumnToMarkExcludedRows($column) |
| 177 | { |
| 178 | if ($this->columnToMarkExcludedRows !== false) { |
| 179 | throw new Exception("setColumnToMarkExcludedRows can only be used once"); |
| 180 | } |
| 181 | |
| 182 | $this->columnToMarkExcludedRows = $column; |
| 183 | $this->addColumn($this->columnToMarkExcludedRows); |
| 184 | } |
| 185 | |
| 186 | /** |
| 187 | * This method can be used to parition the result based on the possible values of one |
| 188 | * table column. This means the query will split the result set into other sets of rows |
| 189 | * for each possible value you provide (where the rows of each set have a column value |
| 190 | * that equals a possible value). Each of these new sets of rows will be individually |
| 191 | * limited resulting in several limited result sets. |
| 192 | * |
| 193 | * For example, you can run a query aggregating some data on the log_action table and |
| 194 | * partition by log_action.type with the possible values of {@link Piwik\Tracker\Action::TYPE_PAGE_URL}, |
| 195 | * {@link Piwik\Tracker\Action::TYPE_OUTLINK}, {@link Piwik\Tracker\Action::TYPE_DOWNLOAD}. |
| 196 | * The result will be three separate result sets that are aggregated the same ways, but for rows |
| 197 | * where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for |
| 198 | * rows `log_action.type = TYPE_DOWNLOAD`. |
| 199 | * |
| 200 | * @param $partitionColumn string The column name to partion by. |
| 201 | * @param $possibleValues Array of possible column values. |
| 202 | * @throws Exception if method is used more than once. |
| 203 | */ |
| 204 | public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues) |
| 205 | { |
| 206 | if ($this->partitionColumn !== false) { |
| 207 | throw new Exception("partitionResultIntoMultipleGroups can only be used once"); |
| 208 | } |
| 209 | |
| 210 | $this->partitionColumn = $partitionColumn; |
| 211 | $this->partitionColumnValues = $possibleValues; |
| 212 | $this->addColumn($partitionColumn); |
| 213 | } |
| 214 | |
| 215 | /** |
| 216 | * Executes the query. |
| 217 | * The object has to be configured first using the other methods. |
| 218 | * |
| 219 | * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering |
| 220 | * has to be specified in this query. {@link RankingQuery} cannot apply ordering |
| 221 | * itself. |
| 222 | * @param $bind array Bindings for the inner query. |
| 223 | * @param int $timeLimitInMs Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimitInMs > 0 |
| 224 | * @return array The format depends on which methods have been used |
| 225 | * to configure the ranking query. |
| 226 | */ |
| 227 | public function execute($innerQuery, $bind = array(), $timeLimitInMs = 0) |
| 228 | { |
| 229 | $query = $this->generateRankingQuery($innerQuery); |
| 230 | $query = DbHelper::addMaxExecutionTimeHintToQuery($query, $timeLimitInMs); |
| 231 | |
| 232 | $data = Db::getReader()->fetchAll($query, $bind); |
| 233 | |
| 234 | if ($this->columnToMarkExcludedRows !== false) { |
| 235 | // split the result into the regular result and the rows with special treatment |
| 236 | $excludedFromLimit = array(); |
| 237 | $result = array(); |
| 238 | foreach ($data as &$row) { |
| 239 | if ($row[$this->columnToMarkExcludedRows] != 0) { |
| 240 | $excludedFromLimit[] = $row; |
| 241 | } else { |
| 242 | $result[] = $row; |
| 243 | } |
| 244 | } |
| 245 | $data = array( |
| 246 | 'result' => &$result, |
| 247 | 'excludedFromLimit' => &$excludedFromLimit |
| 248 | ); |
| 249 | } |
| 250 | |
| 251 | if ($this->partitionColumn !== false) { |
| 252 | if ($this->columnToMarkExcludedRows !== false) { |
| 253 | $data['result'] = $this->splitPartitions($data['result']); |
| 254 | } else { |
| 255 | $data = $this->splitPartitions($data); |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | return $data; |
| 260 | } |
| 261 | |
| 262 | private function splitPartitions(&$data) |
| 263 | { |
| 264 | $result = array(); |
| 265 | foreach ($data as &$row) { |
| 266 | $partition = $row[$this->partitionColumn]; |
| 267 | if (!isset($result[$partition])) { |
| 268 | $result[$partition] = array(); |
| 269 | } |
| 270 | $result[$partition][] = & $row; |
| 271 | } |
| 272 | return $result; |
| 273 | } |
| 274 | |
| 275 | /** |
| 276 | * Generate the SQL code that does the magic. |
| 277 | * If you want to get the result, use execute() instead. If you want to run the query |
| 278 | * yourself, use this method. |
| 279 | * |
| 280 | * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering |
| 281 | * has to be specified in this query. {@link RankingQuery} cannot apply ordering |
| 282 | * itself. |
| 283 | * @return string The entire ranking query SQL. |
| 284 | */ |
| 285 | public function generateRankingQuery($innerQuery) |
| 286 | { |
| 287 | // +1 to include "Others" |
| 288 | $limit = $this->limit + 1; |
| 289 | $counterExpression = $this->getCounterExpression($limit); |
| 290 | |
| 291 | // generate select clauses for label columns |
| 292 | $labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`'; |
| 293 | $labelColumnsOthersSwitch = array(); |
| 294 | foreach ($this->labelColumns as $column => $true) { |
| 295 | $labelColumnsOthersSwitch[] = " |
| 296 | CASE |
| 297 | WHEN counter = $limit THEN '" . $this->othersLabelValue . "' |
| 298 | ELSE `$column` |
| 299 | END AS `$column` |
| 300 | "; |
| 301 | } |
| 302 | $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch); |
| 303 | |
| 304 | // generate select clauses for additional columns |
| 305 | $additionalColumnsString = ''; |
| 306 | $additionalColumnsAggregatedString = ''; |
| 307 | foreach ($this->additionalColumns as $additionalColumn => $aggregation) { |
| 308 | $additionalColumnsString .= ', `' . $additionalColumn . '`'; |
| 309 | if ($aggregation !== false) { |
| 310 | $additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`'; |
| 311 | } else { |
| 312 | $additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`'; |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | // initialize the counters |
| 317 | if ($this->partitionColumn !== false) { |
| 318 | $initCounter = ''; |
| 319 | foreach ($this->partitionColumnValues as $value) { |
| 320 | $initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', '; |
| 321 | } |
| 322 | } else { |
| 323 | $initCounter = '( SELECT @counter:=0 ) initCounter,'; |
| 324 | } |
| 325 | |
| 326 | // add a counter to the query |
| 327 | // we rely on the sorting of the inner query |
| 328 | $withCounter = " |
| 329 | SELECT |
| 330 | $labelColumnsString, |
| 331 | $counterExpression AS counter |
| 332 | $additionalColumnsString |
| 333 | FROM |
| 334 | $initCounter |
| 335 | ( $innerQuery ) actualQuery |
| 336 | "; |
| 337 | |
| 338 | // group by the counter - this groups "Others" because the counter stops at $limit |
| 339 | $groupBy = 'counter'; |
| 340 | if ($this->partitionColumn !== false) { |
| 341 | $groupBy .= ', `' . $this->partitionColumn . '`'; |
| 342 | } |
| 343 | $groupOthers = " |
| 344 | SELECT |
| 345 | $labelColumnsOthersSwitch |
| 346 | $additionalColumnsAggregatedString |
| 347 | FROM ( $withCounter ) AS withCounter |
| 348 | GROUP BY $groupBy |
| 349 | "; |
| 350 | return $groupOthers; |
| 351 | } |
| 352 | |
| 353 | private function getCounterExpression($limit) |
| 354 | { |
| 355 | $whens = array(); |
| 356 | |
| 357 | if ($this->columnToMarkExcludedRows !== false) { |
| 358 | // when a row has been specified that marks which records should be excluded |
| 359 | // from limiting, we don't give those rows the normal counter but -1 times the |
| 360 | // value they had before. this way, they have a separate number space (i.e. negative |
| 361 | // integers). |
| 362 | $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}"; |
| 363 | } |
| 364 | |
| 365 | if ($this->partitionColumn !== false) { |
| 366 | // partition: one counter per possible value |
| 367 | foreach ($this->partitionColumnValues as $value) { |
| 368 | $isValue = '`' . $this->partitionColumn . '` = ' . intval($value); |
| 369 | $counter = '@counter' . intval($value); |
| 370 | $whens[] = "WHEN $isValue AND $counter = $limit THEN $limit"; |
| 371 | $whens[] = "WHEN $isValue THEN $counter:=$counter+1"; |
| 372 | } |
| 373 | $whens[] = "ELSE 0"; |
| 374 | } else { |
| 375 | // no partitioning: add a single counter |
| 376 | $whens[] = "WHEN @counter = $limit THEN $limit"; |
| 377 | $whens[] = "ELSE @counter:=@counter+1"; |
| 378 | } |
| 379 | |
| 380 | return " |
| 381 | CASE |
| 382 | " . implode(" |
| 383 | ", $whens) . " |
| 384 | END |
| 385 | "; |
| 386 | } |
| 387 | } |
| 388 |