PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / 1.3.1
Matomo Analytics – Powerful, Privacy-First Insights for WordPress v1.3.1
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / DataAccess / RawLogDao.php
matomo / app / core / DataAccess Last commit date
LogQueryBuilder 6 years ago Actions.php 6 years ago ArchiveSelector.php 6 years ago ArchiveTableCreator.php 6 years ago ArchiveTableDao.php 6 years ago ArchiveWriter.php 6 years ago ArchivingDbAdapter.php 6 years ago LogAggregator.php 5 years ago LogQueryBuilder.php 6 years ago LogTableTemporary.php 6 years ago Model.php 6 years ago RawLogDao.php 6 years ago TableMetadata.php 6 years ago
RawLogDao.php
451 lines
1 <?php
2 /**
3 * Piwik - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9 namespace Piwik\DataAccess;
10
11 use Piwik\Common;
12 use Piwik\Container\StaticContainer;
13 use Piwik\Db;
14 use Piwik\Plugin\Dimension\DimensionMetadataProvider;
15 use Piwik\Plugin\LogTablesProvider;
16
17 /**
18 * DAO that queries log tables.
19 */
20 class RawLogDao
21 {
22 const DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME = 'tmp_log_actions_to_keep';
23
24 /**
25 * @var DimensionMetadataProvider
26 */
27 private $dimensionMetadataProvider;
28
29 /**
30 * @var LogTablesProvider
31 */
32 private $logTablesProvider;
33
34 public function __construct(DimensionMetadataProvider $provider = null, LogTablesProvider $logTablesProvider = null)
35 {
36 $this->dimensionMetadataProvider = $provider ?: StaticContainer::get('Piwik\Plugin\Dimension\DimensionMetadataProvider');
37 $this->logTablesProvider = $logTablesProvider ?: StaticContainer::get('Piwik\Plugin\LogTablesProvider');
38 }
39
40 /**
41 * @param array $values
42 * @param string $idVisit
43 */
44 public function updateVisits(array $values, $idVisit)
45 {
46 $sql = "UPDATE " . Common::prefixTable('log_visit')
47 . " SET " . $this->getColumnSetExpressions(array_keys($values))
48 . " WHERE idvisit = ?";
49
50 $this->update($sql, $values, $idVisit);
51 }
52
53 /**
54 * @param array $values
55 * @param string $idVisit
56 */
57 public function updateConversions(array $values, $idVisit)
58 {
59 $sql = "UPDATE " . Common::prefixTable('log_conversion')
60 . " SET " . $this->getColumnSetExpressions(array_keys($values))
61 . " WHERE idvisit = ?";
62
63 $this->update($sql, $values, $idVisit);
64 }
65
66 /**
67 * @param string $from
68 * @param string $to
69 * @return int
70 */
71 public function countVisitsWithDatesLimit($from, $to)
72 {
73 $sql = "SELECT COUNT(*) AS num_rows"
74 . " FROM " . Common::prefixTable('log_visit')
75 . " WHERE visit_last_action_time >= ? AND visit_last_action_time < ?";
76
77 $bind = array($from, $to);
78
79 return (int) Db::fetchOne($sql, $bind);
80 }
81
82 /**
83 * Iterates over logs in a log table in chunks. Parameters to this function are as backend agnostic
84 * as possible w/o dramatically increasing code complexity.
85 *
86 * @param string $logTable The log table name. Unprefixed, eg, `log_visit`.
87 * @param array[] $conditions An array describing the conditions logs must match in the query. Translates to
88 * the WHERE part of a SELECT statement. Each element must contain three elements:
89 *
90 * * the column name
91 * * the operator (ie, '=', '<>', '<', etc.)
92 * * the operand (ie, a value)
93 *
94 * The elements are AND-ed together.
95 *
96 * Example:
97 *
98 * ```
99 * array(
100 * array('visit_first_action_time', '>=', ...),
101 * array('visit_first_action_time', '<', ...)
102 * )
103 * ```
104 * @param int $iterationStep The number of rows to query at a time.
105 * @param callable $callback The callback that processes each chunk of rows.
106 * @param string $willDelete Set to true if you will make sure to delete all rows that were fetched. If you are in
107 * doubt and not sure if to set true or false, use "false". Setting it to true will
108 * enable an internal performance improvement but it can result in an endless loop if not
109 * used properly.
110 */
111 public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback, $willDelete)
112 {
113 $lastId = 0;
114
115 if ($willDelete) {
116 // we don't want to look at eg idvisit so the query will be mostly index covered as the
117 // "where idvisit > 0 ... ORDER BY idvisit ASC" will be gone... meaning we don't need to look at a huge range
118 // of visits...
119 $idField = null;
120 $bindFunction = function ($bind, $lastId) {
121 return $bind;
122 };
123 } else {
124 // when we are not deleting, we need to ensure to iterate over each visitor step by step... meaning we
125 // need to remember which visit we have already looked at and which one not. Therefore we need to apply
126 // "where idvisit > $lastId" in the query and "order by idvisit ASC"
127 $idField = $this->getIdFieldForLogTable($logTable);
128 $bindFunction = function ($bind, $lastId) {
129 return array_merge(array($lastId), $bind);
130 };
131 }
132
133 list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep);
134
135 do {
136 $rows = Db::fetchAll($query, call_user_func($bindFunction, $bind, $lastId));
137 if (!empty($rows)) {
138 if ($idField) {
139 $lastId = $rows[count($rows) - 1][$idField];
140 }
141 $callback($rows);
142 }
143 } while (count($rows) == $iterationStep);
144 }
145
146 /**
147 * Deletes conversions for the supplied visit IDs from log_conversion. This method does not cascade, so
148 * conversion items will not be deleted.
149 *
150 * @param int[] $visitIds
151 * @return int The number of deleted rows.
152 */
153 public function deleteFromLogTable($tableName, $visitIds)
154 {
155 $sql = "DELETE FROM `" . Common::prefixTable($tableName) . "` WHERE idvisit IN "
156 . $this->getInFieldExpressionWithInts($visitIds);
157
158 $statement = Db::query($sql);
159 return $statement->rowCount();
160 }
161
162 /**
163 * Deletes conversion items for the supplied visit IDs from log_conversion_item.
164 *
165 * @param int[] $visitIds
166 * @return int The number of deleted rows.
167 */
168 public function deleteConversionItems($visitIds)
169 {
170 $sql = "DELETE FROM `" . Common::prefixTable('log_conversion_item') . "` WHERE idvisit IN "
171 . $this->getInFieldExpressionWithInts($visitIds);
172
173 $statement = Db::query($sql);
174 return $statement->rowCount();
175 }
176
177 /**
178 * Deletes all unused entries from the log_action table. This method uses a temporary table to store used
179 * actions, and then deletes rows from log_action that are not in this temporary table.
180 *
181 * Table locking is required to avoid concurrency issues.
182 *
183 * @throws \Exception If table locking permission is not granted to the current MySQL user.
184 */
185 public function deleteUnusedLogActions()
186 {
187 if (!Db::isLockPrivilegeGranted()) {
188 throw new \Exception("RawLogDao.deleteUnusedLogActions() requires table locking permission in order to complete without error.");
189 }
190
191 // get current max ID in log tables w/ idaction references.
192 $maxIds = $this->getMaxIdsInLogTables();
193
194 $this->createTempTableForStoringUsedActions();
195
196 // do large insert (inserting everything before maxIds) w/o locking tables...
197 $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true);
198
199 // ... then do small insert w/ locked tables to minimize the amount of time tables are locked.
200 $this->lockLogTables();
201 $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false);
202
203 // delete before unlocking tables so there's no chance a new log row that references an
204 // unused action will be inserted.
205 $this->deleteUnusedActions();
206
207 Db::unlockAllTables();
208
209 $this->dropTempTableForStoringUsedActions();
210 }
211
212 /**
213 * Returns the list of the website IDs that received some visits between the specified timestamp.
214 *
215 * @param string $fromDateTime
216 * @param string $toDateTime
217 * @return bool true if there are visits for this site between the given timeframe, false if not
218 */
219 public function hasSiteVisitsBetweenTimeframe($fromDateTime, $toDateTime, $idSite)
220 {
221 $sites = Db::fetchOne("SELECT 1
222 FROM " . Common::prefixTable('log_visit') . "
223 WHERE idsite = ?
224 AND visit_last_action_time > ?
225 AND visit_last_action_time < ?
226 LIMIT 1", array($idSite, $fromDateTime, $toDateTime));
227
228 return (bool) $sites;
229 }
230
231 /**
232 * @param array $columnsToSet
233 * @return string
234 */
235 protected function getColumnSetExpressions(array $columnsToSet)
236 {
237 $columnsToSet = array_map(
238 function ($column) {
239 return $column . ' = ?';
240 },
241 $columnsToSet
242 );
243
244 return implode(', ', $columnsToSet);
245 }
246
247 /**
248 * @param array $values
249 * @param $idVisit
250 * @param $sql
251 * @return \Zend_Db_Statement
252 * @throws \Exception
253 */
254 protected function update($sql, array $values, $idVisit)
255 {
256 return Db::query($sql, array_merge(array_values($values), array($idVisit)));
257 }
258
259 protected function getIdFieldForLogTable($logTable)
260 {
261 $idColumns = $this->getTableIdColumns();
262
263 if (isset($idColumns[$logTable])) {
264 return $idColumns[$logTable];
265 }
266
267 throw new \InvalidArgumentException("Unknown log table '$logTable'.");
268 }
269
270 // TODO: instead of creating a log query like this, we should re-use segments. to do this, however, there must be a 1-1
271 // mapping for dimensions => segments, and each dimension should automatically have a segment.
272 private function createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep)
273 {
274 $bind = array();
275
276 $sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE ";
277
278 $parts = array();
279
280 if ($idField) {
281 $parts[] = "$idField > ?";
282 }
283
284 foreach ($conditions as $condition) {
285 list($column, $operator, $value) = $condition;
286
287 if (is_array($value)) {
288 $parts[] = "$column IN (" . Common::getSqlStringFieldsArray($value) . ")";
289
290 $bind = array_merge($bind, $value);
291 } else {
292 $parts[]= "$column $operator ?";
293
294 $bind[] = $value;
295 }
296 }
297 $sql .= implode(' AND ', $parts);
298
299 if ($idField) {
300 $sql .= " ORDER BY $idField ASC";
301 }
302
303 $sql .= " LIMIT " . (int)$iterationStep;
304
305 return array($sql, $bind);
306 }
307
308 private function getInFieldExpressionWithInts($idVisits)
309 {
310 $sql = "(";
311
312 $isFirst = true;
313 foreach ($idVisits as $idVisit) {
314 if ($isFirst) {
315 $isFirst = false;
316 } else {
317 $sql .= ', ';
318 }
319
320 $sql .= (int)$idVisit;
321 }
322
323 $sql .= ")";
324
325 return $sql;
326 }
327
328 protected function getMaxIdsInLogTables()
329 {
330 $idColumns = $this->getTableIdColumns();
331 $tables = array_keys($idColumns);
332
333 $result = array();
334 foreach ($tables as $table) {
335 $idCol = $idColumns[$table];
336 $result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
337 }
338
339 return $result;
340 }
341
342 private function createTempTableForStoringUsedActions()
343 {
344 $sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME) . " (
345 idaction INT(11),
346 PRIMARY KEY (idaction)
347 )";
348 Db::query($sql);
349 }
350
351 private function dropTempTableForStoringUsedActions()
352 {
353 $sql = "DROP TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
354 Db::query($sql);
355 }
356
357 // protected for testing purposes
358 protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000)
359 {
360 $tempTableName = Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
361
362 $idColumns = $this->getTableIdColumns();
363 foreach ($this->dimensionMetadataProvider->getActionReferenceColumnsByTable() as $table => $columns) {
364 $idCol = $idColumns[$table];
365 // Create select query for requesting ALL needed fields at once
366 $sql = "SELECT " . implode(',' ,$columns) . " FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?";
367
368 if ($olderThan) {
369 // Why start on zero? When running for a couple of months, this will generate about 10000+ queries with zero result. Use the lowest value instead.... saves a LOT of waiting time!
370 $start = (int) Db::fetchOne("SELECT MIN($idCol) FROM " . Common::prefixTable($table));;
371 $finish = $maxIds[$table];
372 } else {
373 $start = $maxIds[$table];
374 $finish = (int) Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table));
375 }
376 // Borrowed from Db::segmentedFetchAll
377 // Request records per $insertIntoTempIterationStep amount
378 // Loop over the result set, mapping all numeric fields in a single insert query
379
380 // Insert query would be: INSERT IGNORE INTO [temp_table] VALUES (X),(Y),(Z) depending on the amount of fields requested per row
381 for ($i = $start; $i <= $finish; $i += $insertIntoTempIterationStep) {
382 $currentParams = array($i, $i + $insertIntoTempIterationStep);
383 $result = Db::fetchAll($sql, $currentParams);
384 // Now we loop over the result set of max $insertIntoTempIterationStep rows and create insert queries
385 $keepValues = [];
386 foreach ($result as $row) {
387 $keepValues = array_merge($keepValues, array_filter(array_values($row), "is_numeric"));
388 if (count($keepValues) >= 1000) {
389 $insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
390 $insert .= implode('),(', $keepValues);
391 $insert .= ')';
392
393 Db::exec($insert);
394 $keepValues = [];
395 }
396 }
397
398 $insert = 'INSERT IGNORE INTO ' . $tempTableName .' VALUES (';
399 $insert .= implode('),(', $keepValues);
400 $insert .= ')';
401
402 Db::exec($insert);
403 }
404 }
405 }
406
407 private function lockLogTables()
408 {
409 $tables = $this->getTableIdColumns();
410 unset($tables['log_action']); // we write lock it
411 $tableNames = array_keys($tables);
412
413 $readLocks = array();
414 foreach ($tableNames as $tableName) {
415 $readLocks[] = Common::prefixTable($tableName);
416 }
417
418 Db::lockTables(
419 $readLocks,
420 $writeLocks = Common::prefixTables('log_action')
421 );
422 }
423
424 private function deleteUnusedActions()
425 {
426 list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME);
427
428 $deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable
429 FROM $logActionTable
430 LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction
431 WHERE tmp.idaction IS NULL";
432
433 Db::query($deleteSql);
434 }
435
436 protected function getTableIdColumns()
437 {
438 $columns = array();
439
440 foreach ($this->logTablesProvider->getAllLogTables() as $logTable) {
441 $idColumn = $logTable->getIdColumn();
442
443 if (!empty($idColumn)) {
444 $columns[$logTable->getName()] = $idColumn;
445 }
446 }
447
448 return $columns;
449 }
450 }
451