ArchiveInvalidator
6 years ago
ArchiveInvalidator.php
6 years ago
ArchivePurger.php
6 years ago
ArchiveQuery.php
6 years ago
ArchiveQueryFactory.php
6 years ago
Chunk.php
6 years ago
DataCollection.php
6 years ago
DataTableFactory.php
6 years ago
Parameters.php
6 years ago
Chunk.php
145 lines
| 1 | <?php |
| 2 | /** |
| 3 | * Piwik - free/libre analytics platform |
| 4 | * |
| 5 | * @link https://matomo.org |
| 6 | * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later |
| 7 | * |
| 8 | */ |
| 9 | |
| 10 | namespace Piwik\Archive; |
| 11 | |
| 12 | use Piwik\DataTable; |
| 13 | |
| 14 | /** |
| 15 | * This class is used to split blobs of DataTables into chunks. Each blob used to be stored under one blob in the |
| 16 | * archive table. For better efficiency we do now combine multiple DataTable into one blob entry. |
| 17 | * |
| 18 | * Chunks are identified by having the recordName $recordName_chunk_0_99, $recordName_chunk_100_199 (this chunk stores |
| 19 | * the subtable 100-199). |
| 20 | */ |
| 21 | class Chunk |
| 22 | { |
| 23 | const ARCHIVE_APPENDIX_SUBTABLES = 'chunk'; |
| 24 | const NUM_TABLES_IN_CHUNK = 100; |
| 25 | |
| 26 | /** |
| 27 | * Get's the record name to use for a given tableId/subtableId. |
| 28 | * |
| 29 | * @param string $recordName eg 'Actions_ActionsUrl' |
| 30 | * @param int $tableId eg '5' for tableId '5' |
| 31 | * @return string eg 'Actions_ActionsUrl_chunk_0_99' as the table should be stored under this blob id. |
| 32 | */ |
| 33 | public function getRecordNameForTableId($recordName, $tableId) |
| 34 | { |
| 35 | $chunk = (floor($tableId / self::NUM_TABLES_IN_CHUNK)); |
| 36 | $start = $chunk * self::NUM_TABLES_IN_CHUNK; |
| 37 | $end = $start + self::NUM_TABLES_IN_CHUNK - 1; |
| 38 | |
| 39 | return $recordName . $this->getAppendix() . $start . '_' . $end; |
| 40 | } |
| 41 | |
| 42 | /** |
| 43 | * Moves the given blobs into chunks and assigns a proper record name containing the chunk number. |
| 44 | * |
| 45 | * @param string $recordName The original archive record name, eg 'Actions_ActionsUrl' |
| 46 | * @param array $blobs An array containg a mapping of tableIds to blobs. Eg array(0 => 'blob', 1 => 'subtableBlob', ...) |
| 47 | * @return array An array where each blob is moved into a chunk, indexed by recordNames. |
| 48 | * eg array('Actions_ActionsUrl_chunk_0_99' => array(0 => 'blob', 1 => 'subtableBlob', ...), |
| 49 | * 'Actions_ActionsUrl_chunk_100_199' => array(...)) |
| 50 | */ |
| 51 | public function moveArchiveBlobsIntoChunks($recordName, $blobs) |
| 52 | { |
| 53 | $chunks = array(); |
| 54 | |
| 55 | foreach ($blobs as $tableId => $blob) { |
| 56 | $name = $this->getRecordNameForTableId($recordName, $tableId); |
| 57 | |
| 58 | if (!array_key_exists($name, $chunks)) { |
| 59 | $chunks[$name] = array(); |
| 60 | } |
| 61 | |
| 62 | $chunks[$name][$tableId] = $blob; |
| 63 | } |
| 64 | |
| 65 | return $chunks; |
| 66 | } |
| 67 | |
| 68 | /** |
| 69 | * Detects whether a recordName like 'Actions_ActionUrls_chunk_0_99' or 'Actions_ActionUrls' belongs to a |
| 70 | * chunk or not. |
| 71 | * |
| 72 | * To be a valid recordName that belongs to a chunk it must end with '_chunk_NUMERIC_NUMERIC'. |
| 73 | * |
| 74 | * @param string $recordName |
| 75 | * @return bool |
| 76 | */ |
| 77 | public function isRecordNameAChunk($recordName) |
| 78 | { |
| 79 | $posAppendix = $this->getEndPosOfChunkAppendix($recordName); |
| 80 | |
| 81 | if (false === $posAppendix) { |
| 82 | return false; |
| 83 | } |
| 84 | |
| 85 | // will contain "0_99" of "chunk_0_99" |
| 86 | $blobId = substr($recordName, $posAppendix); |
| 87 | |
| 88 | return $this->isChunkRange($blobId); |
| 89 | } |
| 90 | |
| 91 | private function isChunkRange($blobId) |
| 92 | { |
| 93 | $blobId = explode('_', $blobId); |
| 94 | |
| 95 | return 2 === count($blobId) && is_numeric($blobId[0]) && is_numeric($blobId[1]); |
| 96 | } |
| 97 | |
| 98 | /** |
| 99 | * When having a record like 'Actions_ActionUrls_chunk_0_99" it will return the raw recordName 'Actions_ActionUrls'. |
| 100 | * |
| 101 | * @param string $recordName |
| 102 | * @return string |
| 103 | */ |
| 104 | public function getRecordNameWithoutChunkAppendix($recordName) |
| 105 | { |
| 106 | if (!$this->isRecordNameAChunk($recordName)) { |
| 107 | return $recordName; |
| 108 | } |
| 109 | |
| 110 | $posAppendix = $this->getStartPosOfChunkAppendix($recordName); |
| 111 | |
| 112 | if (false === $posAppendix) { |
| 113 | return $recordName; |
| 114 | } |
| 115 | |
| 116 | return substr($recordName, 0, $posAppendix); |
| 117 | } |
| 118 | |
| 119 | /** |
| 120 | * Returns the string that is appended to the original record name. This appendix identifes a record name is a |
| 121 | * chunk. |
| 122 | * @return string |
| 123 | */ |
| 124 | public function getAppendix() |
| 125 | { |
| 126 | return '_' . self::ARCHIVE_APPENDIX_SUBTABLES . '_'; |
| 127 | } |
| 128 | |
| 129 | private function getStartPosOfChunkAppendix($recordName) |
| 130 | { |
| 131 | return strpos($recordName, $this->getAppendix()); |
| 132 | } |
| 133 | |
| 134 | private function getEndPosOfChunkAppendix($recordName) |
| 135 | { |
| 136 | $pos = strpos($recordName, $this->getAppendix()); |
| 137 | |
| 138 | if ($pos === false) { |
| 139 | return false; |
| 140 | } |
| 141 | |
| 142 | return $pos + strlen($this->getAppendix()); |
| 143 | } |
| 144 | } |
| 145 |