PluginProbe ʕ •ᴥ•ʔ
Matomo Analytics – Powerful, Privacy-First Insights for WordPress / trunk
Matomo Analytics – Powerful, Privacy-First Insights for WordPress vtrunk
5.11.1 5.11.0 5.10.2 5.10.1 trunk 1.0.2 1.0.3 1.0.4 1.0.5 1.0.6 1.1.0 1.1.1 1.1.2 1.1.3 1.2.0 1.3.0 1.3.1 1.3.2 4.0.0 4.0.1 4.0.2 4.0.3 4.0.4 4.1.0 4.1.1 4.1.2 4.1.3 4.10.0 4.11.0 4.12.0 4.13.0 4.13.2 4.13.3 4.13.4 4.13.5 4.14.0 4.14.1 4.14.2 4.15.0 4.15.1 4.15.2 4.15.3 4.2.0 4.3.0 4.3.1 4.4.1 4.4.2 4.5.0 4.6.0 5.0.1 5.0.2 5.0.3 5.0.4 5.0.5 5.0.6 5.0.7 5.0.8 5.1.0 5.1.1 5.1.2 5.1.3 5.1.4 5.1.5 5.1.6 5.1.7 5.10.0 5.2.0 5.2.1 5.2.2 5.3.0 5.3.1 5.3.2 5.3.3 5.6.0 5.6.1 5.7.0 5.7.1 5.8.0 5.8.1 5.8.2
matomo / app / core / Segment.php
matomo / app / core Last commit date
API 1 month ago Access 3 months ago Application 1 month ago Archive 1 month ago ArchiveProcessor 1 month ago Archiver 2 years ago AssetManager 1 month ago Auth 6 months ago Category 6 months ago Changes 1 month ago CliMulti 1 year ago Columns 1 month ago Concurrency 1 month ago Config 1 month ago Container 1 month ago CronArchive 3 months ago DataAccess 1 month ago DataFiles 2 years ago DataTable 2 weeks ago Db 2 weeks ago DeviceDetector 1 year ago Email 2 years ago Exception 4 months ago Http 4 months ago Intl 3 months ago Log 2 years ago Mail 1 year ago Measurable 6 months ago Menu 1 month ago Metrics 3 months ago Notification 6 months ago Period 1 month ago Plugin 2 weeks ago Policy 1 month ago ProfessionalServices 1 year ago Report 1 year ago ReportRenderer 3 months ago Request 3 months ago Scheduler 1 month ago Segment 1 month ago Session 2 weeks ago Settings 1 month ago Tracker 2 weeks ago Translation 1 month ago Twig 1 year ago UpdateCheck 3 months ago Updater 1 month ago Updates 2 days ago Validators 1 year ago View 1 month ago ViewDataTable 2 weeks ago Visualization 1 year ago Widget 1 month ago .htaccess 2 years ago Access.php 1 month ago Archive.php 1 month ago ArchiveProcessor.php 1 month ago AssetManager.php 1 month ago Auth.php 6 months ago AuthResult.php 6 months ago BaseFactory.php 2 years ago Cache.php 2 years ago CacheId.php 4 months ago CliMulti.php 1 month ago Common.php 2 weeks ago Config.php 1 month ago Console.php 3 months ago Context.php 2 years ago Cookie.php 1 year ago CronArchive.php 1 month ago DI.php 3 months ago DataArray.php 1 month ago DataTable.php 1 month ago Date.php 1 month ago Db.php 1 month ago DbHelper.php 1 month ago Development.php 1 year ago ErrorHandler.php 6 months ago EventDispatcher.php 1 month ago ExceptionHandler.php 4 months ago FileIntegrity.php 1 month ago Filechecks.php 1 year ago Filesystem.php 1 month ago FrontController.php 4 months ago Http.php 1 month ago IP.php 1 year ago Log.php 3 months ago LogDeleter.php 1 year ago Mail.php 1 year ago Metrics.php 1 month ago NoAccessException.php 2 years ago Nonce.php 6 months ago Notification.php 1 month ago NumberFormatter.php 5 months ago Option.php 5 months ago Period.php 1 month ago Piwik.php 1 month ago Plugin.php 1 month ago Process.php 1 month ago Profiler.php 6 months ago ProxyHeaders.php 4 months ago ProxyHttp.php 5 months ago QuickForm2.php 3 months ago RankingQuery.php 1 month ago ReportRenderer.php 1 month ago Request.php 1 month ago Segment.php 1 month ago Sequence.php 6 months ago Session.php 2 weeks ago SettingsPiwik.php 1 month ago SettingsServer.php 1 year ago Singleton.php 2 years ago Site.php 1 month ago SiteContentDetector.php 1 month ago SupportedBrowser.php 2 years ago TCPDF.php 1 year ago Theme.php 1 year ago Timer.php 1 month ago Tracker.php 1 month ago Twig.php 1 month ago Unzip.php 1 year ago UpdateCheck.php 1 month ago Updater.php 1 month ago UpdaterErrorException.php 2 years ago Updates.php 3 months ago Url.php 3 months ago UrlHelper.php 1 month ago Version.php 2 days ago View.php 1 month ago bootstrap.php 1 year ago dispatch.php 2 years ago testMinimumPhpVersion.php 6 months ago
Segment.php
638 lines
1 <?php
2
3 /**
4 * Matomo - free/libre analytics platform
5 *
6 * @link https://matomo.org
7 * @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
8 */
9 namespace Piwik;
10
11 use Exception;
12 use Piwik\API\Request;
13 use Piwik\ArchiveProcessor\Rules;
14 use Piwik\Cache as PiwikCache;
15 use Piwik\Container\StaticContainer;
16 use Piwik\DataAccess\LogQueryBuilder;
17 use Piwik\Plugins\SegmentEditor\SegmentEditor;
18 use Piwik\Segment\SegmentExpression;
19 use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel;
20 use Piwik\Segment\SegmentsList;
21 /**
22 * Limits the set of visits Piwik uses when aggregating analytics data.
23 *
24 * A segment is a condition used to filter visits. They can, for example,
25 * select visits that have a specific browser or come from a specific
26 * country, or both.
27 *
28 * Plugins that aggregate data stored in Piwik can support segments by
29 * using this class when generating aggregation SQL queries.
30 *
31 * ### Examples
32 *
33 * **Basic usage**
34 *
35 * $idSites = array(1,2,3);
36 * $segmentStr = "browserCode==ff;countryCode==CA";
37 * $segment = new Segment($segmentStr, $idSites);
38 *
39 * $query = $segment->getSelectQuery(
40 * $select = "table.col1, table2.col2",
41 * $from = array("table", "table2"),
42 * $where = "table.col3 = ?",
43 * $bind = array(5),
44 * $orderBy = "table.col1 DESC",
45 * $groupBy = "table2.col2"
46 * );
47 *
48 * Db::fetchAll($query['sql'], $query['bind']);
49 *
50 * **Creating a _null_ segment**
51 *
52 * $idSites = array(1,2,3);
53 * $segment = new Segment('', $idSites);
54 * // $segment->getSelectQuery will return a query that selects all visits
55 *
56 * @api
57 */
58 class Segment
59 {
60 /**
61 * @var SegmentExpression
62 */
63 protected $segmentExpression = null;
64 /**
65 * @var string
66 */
67 protected $string = null;
68 /**
69 * @var string
70 */
71 protected $originalString = null;
72 /**
73 * @var array
74 */
75 protected $idSites = null;
76 /**
77 * @var Date
78 */
79 protected $startDate = null;
80 /**
81 * @var Date
82 */
83 protected $endDate = null;
84 /**
85 * @var LogQueryBuilder
86 */
87 private $segmentQueryBuilder;
88 /**
89 * @var bool
90 */
91 private $isSegmentEncoded;
92 /**
93 * @var Exception|null
94 */
95 private $missingDatesException = null;
96 /**
97 * Truncate the Segments to 8k
98 */
99 public const SEGMENT_TRUNCATE_LIMIT = 8192;
100 public const CACHE_KEY = 'segmenthashes';
101 public const SEGMENT_HAS_BUILT_CACHE_KEY = 'segmenthashbuilt';
102 /**
103 * When using segments that contain a != or !@ condition on a non visit dimension (e.g. action, conversion, ...) it
104 * is needed to use a subquery to get correct results. To avoid subqueries that fetch too many data it's required to
105 * set a startDate and/or an endDate in this case. That date will be used to limit the subquery (along with possibly
106 * given idSites). If no startDate and endDate is given for such a segment it will generate a query that directly
107 * joins the according tables, but trigger a php warning as results might be incorrect.
108 *
109 * @param string $segmentCondition The segment condition, eg, `'browserCode=ff;countryCode=CA'`.
110 * @param array $idSites The list of sites the segment will be used with. Some segments are
111 * dependent on the site, such as goal segments.
112 * @param Date|null $startDate start date used to limit subqueries
113 * @param Date|null $endDate end date used to limit subqueries
114 */
115 public function __construct($segmentCondition, $idSites, ?\Piwik\Date $startDate = null, ?\Piwik\Date $endDate = null)
116 {
117 $this->segmentQueryBuilder = StaticContainer::get('Piwik\\DataAccess\\LogQueryBuilder');
118 $segmentCondition = trim($segmentCondition ?: '');
119 if (!\Piwik\SettingsPiwik::isSegmentationEnabled() && !empty($segmentCondition)) {
120 throw new Exception("The Super User has disabled the Segmentation feature.");
121 }
122 $this->originalString = $segmentCondition;
123 if ($startDate instanceof \Piwik\Date) {
124 $this->startDate = $startDate;
125 }
126 if ($endDate instanceof \Piwik\Date) {
127 $this->endDate = $endDate;
128 }
129 // The segment expression can be urlencoded. Unfortunately, both the encoded and decoded versions
130 // can usually be parsed successfully. To pick the right one, we try both and pick the one w/ more
131 // successfully parsed subexpressions.
132 $subexpressionsDecoded = 0;
133 if (urldecode($segmentCondition) !== $segmentCondition) {
134 try {
135 $this->initializeSegment(urldecode($segmentCondition), $idSites);
136 $subexpressionsDecoded = $this->segmentExpression->getSubExpressionCount();
137 } catch (Exception $e) {
138 // ignore
139 }
140 }
141 $subexpressionsRaw = 0;
142 try {
143 $this->initializeSegment($segmentCondition, $idSites);
144 $subexpressionsRaw = $this->segmentExpression->getSubExpressionCount();
145 } catch (Exception $e) {
146 // ignore
147 }
148 if ($subexpressionsRaw > $subexpressionsDecoded) {
149 // segment initialized above
150 $this->isSegmentEncoded = \false;
151 } else {
152 $this->initializeSegment(urldecode($segmentCondition), $idSites);
153 $this->isSegmentEncoded = \true;
154 }
155 }
156 /**
157 * Checks if the provided segmentCondition is valid and available for the given idSites
158 *
159 * @params array $idSites
160 * @api since Matomo 5.3.0
161 */
162 public static function isAvailable(string $segmentCondition, array $idSites) : bool
163 {
164 try {
165 new self($segmentCondition, $idSites);
166 } catch (Exception $e) {
167 return \false;
168 }
169 return \true;
170 }
171 /**
172 * Returns the segment expression.
173 * @return SegmentExpression
174 * @api since Piwik 3.2.0
175 */
176 public function getSegmentExpression()
177 {
178 return $this->segmentExpression;
179 }
180 /**
181 * @throws Exception
182 */
183 private function getAvailableSegments()
184 {
185 // start cache
186 $cache = PiwikCache::getTransientCache();
187 //covert cache id
188 $cacheId = 'API.getSegmentsMetadata.' . \Piwik\SettingsPiwik::getPiwikInstanceId() . '.' . implode(",", $this->idSites);
189 //fetch cache lockId
190 $availableSegments = $cache->fetch($cacheId);
191 // segment metadata
192 if (empty($availableSegments)) {
193 $availableSegments = Request::processRequest('API.getSegmentsMetadata', array('idSites' => $this->idSites, '_hideImplementationData' => 0, 'filter_limit' => -1, 'filter_offset' => 0, '_showAllSegments' => 1), []);
194 // index by segment name
195 $availableSegments = array_column($availableSegments, null, 'segment');
196 // remove segments we don't have permission to use
197 foreach ($availableSegments as $segment => $segmentInfo) {
198 if (isset($segmentInfo['permission']) && $segmentInfo['permission'] != 1) {
199 $availableSegments[$segment] = null;
200 }
201 }
202 $cache->save($cacheId, $availableSegments);
203 }
204 return $availableSegments;
205 }
206 private function getSegmentByName($name)
207 {
208 $segments = $this->getAvailableSegments();
209 if (array_key_exists($name, $segments)) {
210 if ($segments[$name] === null) {
211 throw new \Piwik\NoAccessException("You do not have enough permission to access the segment " . $name);
212 }
213 return $segments[$name];
214 }
215 throw new Exception("Segment '{$name}' is not a supported segment.");
216 }
217 /**
218 * @param $string
219 * @param $idSites
220 * @throws Exception
221 */
222 protected function initializeSegment($string, $idSites)
223 {
224 // As a preventive measure, we restrict the filter size to a safe limit
225 $string = substr($string, 0, self::SEGMENT_TRUNCATE_LIMIT);
226 $this->string = $string;
227 if (empty($idSites)) {
228 $idSites = [];
229 } elseif (!is_array($idSites)) {
230 $idSites = [$idSites];
231 }
232 $this->idSites = $idSites;
233 $segment = new SegmentExpression($string);
234 $this->segmentExpression = $segment;
235 // parse segments
236 $expressions = $segment->parseSubExpressions();
237 $expressions = $this->getExpressionsWithUnionsResolved($expressions);
238 $expressions = $this->mergeSubqueryExpressionsInTree($expressions);
239 // convert segments name to sql segment
240 // check that user is allowed to view this segment
241 // and apply a filter to the value to match if necessary (to map DB fields format)
242 $cleanedExpressions = array_map(function (array $orExpressions) {
243 return array_map(function (array $operand) {
244 return $this->getCleanedExpression($operand);
245 }, $orExpressions);
246 }, $expressions);
247 $segment->setSubExpressionsAfterCleanup($cleanedExpressions);
248 }
249 private function getExpressionsWithUnionsResolved(array $expressions) : array
250 {
251 $expressionsWithUnions = array_map(function ($orExpressions) {
252 $mappedOrExpressions = [];
253 foreach ($orExpressions as $operand) {
254 $name = $operand[SegmentExpression::INDEX_OPERAND_NAME];
255 $availableSegment = $this->getSegmentByName($name);
256 // We leave segments using !@ and != operands untouched for segments not on log_visit table as they will be build using a subquery
257 if (!$this->doesSegmentNeedSubquery($operand[SegmentExpression::INDEX_OPERAND_OPERATOR], $name) && !empty($availableSegment['unionOfSegments'])) {
258 foreach ($availableSegment['unionOfSegments'] as $segmentNameOfUnion) {
259 $operand[SegmentExpression::INDEX_OPERAND_NAME] = $segmentNameOfUnion;
260 $mappedOrExpressions[] = $operand;
261 }
262 } else {
263 $mappedOrExpressions[] = $operand;
264 }
265 }
266 return $mappedOrExpressions;
267 }, $expressions);
268 return $expressionsWithUnions;
269 }
270 private function isVisitSegment($name)
271 {
272 $availableSegment = $this->getSegmentByName($name);
273 if (!empty($availableSegment['unionOfSegments'])) {
274 foreach ($availableSegment['unionOfSegments'] as $segmentNameOfUnion) {
275 $unionSegment = $this->getSegmentByName($segmentNameOfUnion);
276 if (strpos($unionSegment['sqlSegment'], 'log_visit.') === 0) {
277 return \true;
278 }
279 }
280 } elseif (strpos($availableSegment['sqlSegment'], 'log_visit.') === 0) {
281 return \true;
282 }
283 return \false;
284 }
285 private function doesSegmentNeedSubquery($operator, $segmentName)
286 {
287 $requiresSubQuery = in_array($operator, [SegmentExpression::MATCH_DOES_NOT_CONTAIN, SegmentExpression::MATCH_NOT_EQUAL]) && !$this->isVisitSegment($segmentName);
288 if ($requiresSubQuery && empty($this->startDate) && empty($this->endDate)) {
289 if (\Piwik\Development::isEnabled()) {
290 $this->missingDatesException = new Exception();
291 }
292 return \false;
293 }
294 return $requiresSubQuery;
295 }
296 private function getInvertedOperatorForSubQuery($operator)
297 {
298 if ($operator === SegmentExpression::MATCH_DOES_NOT_CONTAIN) {
299 return SegmentExpression::MATCH_CONTAINS;
300 } elseif ($operator === SegmentExpression::MATCH_NOT_EQUAL) {
301 return SegmentExpression::MATCH_EQUAL;
302 }
303 throw new Exception("Operator not support for subqueries");
304 }
305 /**
306 * Returns `true` if the segment is empty, `false` if otherwise.
307 */
308 public function isEmpty()
309 {
310 return $this->segmentExpression->isEmpty();
311 }
312 /**
313 * Detects whether the Piwik instance is configured to be able to archive this segment. It checks whether the segment
314 * will be either archived via browser or cli archiving. It does not check if the segment has been archived. If you
315 * want to know whether the segment has been archived, the actual report data needs to be requested.
316 *
317 * This method does not take any date/period into consideration. Meaning a Piwik instance might be able to archive
318 * this segment in general, but not for a certain period if eg the archiving of range dates is disabled.
319 *
320 * @return bool
321 */
322 public function willBeArchived()
323 {
324 if ($this->isEmpty()) {
325 return \true;
326 }
327 $idSites = $this->idSites;
328 return Rules::isRequestAuthorizedToArchive() || Rules::isBrowserArchivingAvailableForSegments() || Rules::isSegmentPreProcessed($idSites, $this);
329 }
330 protected function getCleanedExpression(array $expression) : array
331 {
332 $name = $expression[SegmentExpression::INDEX_OPERAND_NAME];
333 $matchType = $expression[SegmentExpression::INDEX_OPERAND_OPERATOR];
334 $value = $expression[SegmentExpression::INDEX_OPERAND_VALUE];
335 if (empty($this->idSites)) {
336 $segmentsList = SegmentsList::get();
337 } else {
338 $segmentsList = \Piwik\Context::changeIdSite(implode(',', $this->idSites), function () {
339 return SegmentsList::get();
340 });
341 }
342 $segmentObject = $segmentsList->getSegment($name);
343 $sqlName = $segmentObject ? $segmentObject->getSqlSegment() : null;
344 $joinTable = null;
345 if ($segmentObject && $segmentObject->dimension && $segmentObject->dimension->getDbColumnJoin()) {
346 $join = $segmentObject->dimension->getDbColumnJoin();
347 $dbDiscriminator = $segmentObject->dimension->getDbDiscriminator();
348 // we append alias since an archive query may add the table with a different join. we could eg add $table_$segmentName but
349 // then we would join an extra table per segment when we ideally want to join each table only once. However, we still need
350 // to see which table/column it joins to join it accurately each table extra if the same table is joined with different columns;
351 $tableAlias = $join->getTable() . '_segment_' . str_replace('.', '', $sqlName ?: '');
352 $joinTable = ['table' => $join->getTable(), 'tableAlias' => $tableAlias, 'field' => $tableAlias . '.' . $join->getTargetColumn(), 'joinOn' => $sqlName . ' = ' . $tableAlias . '.' . $join->getColumn()];
353 if ($dbDiscriminator) {
354 $joinTable['discriminator'] = $tableAlias . '.' . $dbDiscriminator->getColumn() . ' = \'' . $dbDiscriminator->getValue() . '\'';
355 }
356 }
357 if ($matchType == SegmentExpression::MATCH_IDVISIT_NOT_IN) {
358 $segmentObj = new \Piwik\Segment($value, $this->idSites, $this->startDate, $this->endDate);
359 $select = 'log_visit.idvisit';
360 $from = 'log_visit';
361 $datetimeField = 'visit_last_action_time';
362 $where = [];
363 $bind = [];
364 if (!empty($this->idSites)) {
365 $where[] = "{$from}.idsite IN (" . \Piwik\Common::getSqlStringFieldsArray($this->idSites) . ")";
366 $bind = $this->idSites;
367 }
368 if ($this->startDate instanceof \Piwik\Date) {
369 $where[] = "{$from}.{$datetimeField} >= ?";
370 $bind[] = $this->startDate->toString(\Piwik\Date::DATE_TIME_FORMAT);
371 }
372 if ($this->endDate instanceof \Piwik\Date) {
373 $where[] = "{$from}.{$datetimeField} <= ?";
374 $bind[] = $this->endDate->toString(\Piwik\Date::DATE_TIME_FORMAT);
375 }
376 $logQueryBuilder = StaticContainer::get('Piwik\\DataAccess\\LogQueryBuilder');
377 $forceGroupByBackup = $logQueryBuilder->getForcedInnerGroupBySubselect();
378 $logQueryBuilder->forceInnerGroupBySubselect(LogQueryBuilder::FORCE_INNER_GROUP_BY_NO_SUBSELECT);
379 $query = $segmentObj->getSelectQuery($select, $from, implode(' AND ', $where), $bind);
380 $logQueryBuilder->forceInnerGroupBySubselect($forceGroupByBackup);
381 return ['log_visit.idvisit', SegmentExpression::MATCH_ACTIONS_NOT_CONTAINS, $query, null, null];
382 }
383 if (empty($segmentObject)) {
384 throw new Exception("Segment '{$name}' is not a supported segment.");
385 }
386 $segment = $this->getSegmentByName($name);
387 if ($matchType != SegmentExpression::MATCH_IS_NOT_NULL_NOR_EMPTY && $matchType != SegmentExpression::MATCH_IS_NULL_OR_EMPTY) {
388 if (isset($segment['sqlFilterValue'])) {
389 $value = call_user_func($segment['sqlFilterValue'], $value, $segment['sqlSegment']);
390 }
391 // apply presentation filter
392 if (isset($segment['sqlFilter'])) {
393 $value = call_user_func($segment['sqlFilter'], $value, $segment['sqlSegment'], $matchType, $name);
394 if (is_null($value)) {
395 // null is returned in TableLogAction::getIdActionFromSegment()
396 return array(null, $matchType, null, null, $segment);
397 }
398 // sqlFilter-callbacks might return arrays for more complex cases
399 // e.g. see TableLogAction::getIdActionFromSegment()
400 if (is_array($value) && isset($value['SQL'])) {
401 // Special case: returned value is a sub sql expression!
402 $matchType = SegmentExpression::MATCH_ACTIONS_CONTAINS;
403 $joinTable = null;
404 }
405 if (is_array($value) && isset($value['value'])) {
406 $value = $value['value'];
407 $joinTable = !empty($value['joinTable']);
408 }
409 }
410 }
411 return array($sqlName, $matchType, $value, $joinTable, $segment);
412 }
413 /**
414 * Returns the segment condition.
415 *
416 * @return string
417 */
418 public function getString()
419 {
420 return $this->string;
421 }
422 /**
423 * Returns a hash of the segment condition, or the empty string if the segment
424 * condition is empty.
425 *
426 * @return string
427 */
428 public function getHash()
429 {
430 if (empty($this->string)) {
431 return '';
432 }
433 return self::getSegmentHash($this->string);
434 }
435 public static function getSegmentHash($definition)
436 {
437 $cache = \Piwik\Cache::getEagerCache();
438 $cacheKey = self::CACHE_KEY . md5($definition);
439 if ($cache->contains($cacheKey)) {
440 return $cache->fetch($cacheKey);
441 }
442 $defaultHash = md5(urldecode($definition));
443 // if the cache for segments already built, but this segment was not found,
444 // we return the default segment, this can be a segment from url or
445 // something like "visitorType==new"
446 if ($cache->contains(self::SEGMENT_HAS_BUILT_CACHE_KEY)) {
447 return $defaultHash;
448 }
449 // the segment hash is not built yet, let's do it
450 $model = new SegmentEditorModel();
451 $segments = $model->getAllSegmentsAndIgnoreVisibility();
452 foreach ($segments as $segment) {
453 $cacheKeyTemp = self::CACHE_KEY . md5($segment['definition']);
454 $cache->save($cacheKeyTemp, $segment['hash']);
455 $cacheKeyTemp = self::CACHE_KEY . md5(urldecode($segment['definition']));
456 $cache->save($cacheKeyTemp, $segment['hash']);
457 $cacheKeyTemp = self::CACHE_KEY . md5(urlencode($segment['definition']));
458 $cache->save($cacheKeyTemp, $segment['hash']);
459 }
460 $cache->save(self::SEGMENT_HAS_BUILT_CACHE_KEY, \true);
461 // if we found the segment, return it's hash, but maybe this
462 // segment is not stored in the db, return the default
463 if ($cache->contains($cacheKey)) {
464 return $cache->fetch($cacheKey);
465 }
466 return $defaultHash;
467 }
468 /**
469 * Extend an SQL query that aggregates data over one of the 'log_' tables with segment expressions.
470 *
471 * @param string $select The select clause. Should NOT include the **SELECT** just the columns, eg,
472 * `'t1.col1 as col1, t2.col2 as col2'`.
473 * @param array|string $from Array of table names (without prefix), eg, `array('log_visit', 'log_conversion')`.
474 * @param false|string $where (optional) Where clause, eg, `'t1.col1 = ? AND t2.col2 = ?'`.
475 * @param array|string $bind (optional) Bind parameters, eg, `array($col1Value, $col2Value)`.
476 * @param false|string $orderBy (optional) Order by clause, eg, `"t1.col1 ASC"`.
477 * @param false|string $groupBy (optional) Group by clause, eg, `"t2.col2"`.
478 * @param int $limit Limit number of result to $limit
479 * @param int $offset Specified the offset of the first row to return
480 * @param bool $forceGroupBy Force the group by and not using a subquery. Note: This may make the query slower see https://github.com/matomo-org/matomo/issues/9200#issuecomment-183641293
481 * A $groupBy value needs to be set for this to work.
482 * @param int If set to value >= 1 then the Select query (and All inner queries) will be LIMIT'ed by this value.
483 * Use only when you're not aggregating or it will sample the data.
484 * @return array{sql: string, bind: array<scalar>} The entire select query.
485 */
486 public function getSelectQuery($select, $from, $where = \false, $bind = array(), $orderBy = \false, $groupBy = \false, $limit = 0, $offset = 0, $forceGroupBy = \false, bool $withRollup = \false)
487 {
488 if (\Piwik\Development::isEnabled() && !empty($this->missingDatesException)) {
489 $e = new Exception();
490 \Piwik\Log::warning('Avoiding segment subquery due to missing start date and/or an end date. ' . 'Please ensure a start date and/or end date is set when initializing segment: ' . "\n\nCreation stacktrace:\n" . $this->missingDatesException->getTraceAsString() . "\n\nUsage stacktrace:\n" . $e->getTraceAsString());
491 }
492 $segmentExpression = $this->segmentExpression;
493 $limitAndOffset = null;
494 if ($limit > 0) {
495 $limitAndOffset = (int) $offset . ', ' . (int) $limit;
496 }
497 try {
498 if ($forceGroupBy && $groupBy) {
499 $this->segmentQueryBuilder->forceInnerGroupBySubselect(LogQueryBuilder::FORCE_INNER_GROUP_BY_NO_SUBSELECT);
500 }
501 $result = $this->segmentQueryBuilder->getSelectQueryString($segmentExpression, $select, $from, $where, $bind, $groupBy, $orderBy, $limitAndOffset, $withRollup);
502 } catch (Exception $e) {
503 if ($forceGroupBy && $groupBy) {
504 $this->segmentQueryBuilder->forceInnerGroupBySubselect('');
505 }
506 throw $e;
507 }
508 if ($forceGroupBy && $groupBy) {
509 $this->segmentQueryBuilder->forceInnerGroupBySubselect('');
510 }
511 return $result;
512 }
513 /**
514 * Returns the segment string.
515 *
516 * @return string
517 */
518 public function __toString()
519 {
520 return (string) $this->getString();
521 }
522 /**
523 * Combines this segment with another segment condition, if the segment condition is not already
524 * in the segment.
525 *
526 * The combination is naive in that it does not take order of operations into account.
527 *
528 * @param string $segment
529 * @param string $operator The operator to use. Should be either SegmentExpression::AND_DELIMITER
530 * or SegmentExpression::OR_DELIMITER.
531 * @param string $segmentCondition The segment condition to add.
532 * @return string
533 * @throws Exception
534 */
535 public static function combine($segment, $operator, $segmentCondition)
536 {
537 if (empty($segment)) {
538 return $segmentCondition;
539 }
540 if (empty($segmentCondition) || self::containsCondition($segment, $operator, $segmentCondition)) {
541 return $segment;
542 }
543 return $segment . $operator . $segmentCondition;
544 }
545 private static function containsCondition($segment, $operator, $segmentCondition)
546 {
547 // check when segment/condition are of same encoding
548 return strpos($segment, $operator . $segmentCondition) !== \false || strpos($segment, $segmentCondition . $operator) !== \false || strpos($segment, urlencode($operator . $segmentCondition)) !== \false || strpos($segment, urlencode($segmentCondition . $operator)) !== \false || strpos($segment, $operator . urlencode($segmentCondition)) !== \false || strpos($segment, urlencode($segmentCondition) . $operator) !== \false || strpos($segment, $operator . urldecode($segmentCondition)) !== \false || strpos($segment, urldecode($segmentCondition) . $operator) !== \false || $segment === $segmentCondition || $segment === urlencode($segmentCondition) || $segment === urldecode($segmentCondition);
549 }
550 public function getStoredSegmentName($idSite)
551 {
552 $segment = $this->getString();
553 if (empty($segment)) {
554 return \Piwik\Piwik::translate('SegmentEditor_DefaultAllVisits');
555 }
556 $availableSegments = SegmentEditor::getAllSegmentsForSite($idSite);
557 $foundStoredSegment = null;
558 foreach ($availableSegments as $storedSegment) {
559 if ($storedSegment['definition'] == $segment || $storedSegment['definition'] == urldecode($segment) || $storedSegment['definition'] == urlencode($segment) || $storedSegment['definition'] == $this->originalString || $storedSegment['definition'] == urldecode($this->originalString) || $storedSegment['definition'] == urlencode($this->originalString)) {
560 $foundStoredSegment = $storedSegment;
561 }
562 }
563 if (isset($foundStoredSegment)) {
564 // segment name is stored sanitized
565 return \Piwik\Common::unsanitizeInputValues($foundStoredSegment['name']);
566 }
567 return $this->isSegmentEncoded ? urldecode($segment) : $segment;
568 }
569 public function getOriginalString()
570 {
571 return $this->originalString;
572 }
573 /**
574 * Build subqueries for segments that are not on log_visit table but use !@ or != as operator
575 * This is required to ensure segments like actionUrl!@value really do not include any visit having an action containing `value`
576 *
577 * Adjacent segment conditions that both require subqueries are merged here into single NOT IN sql subqueries,
578 * which improves performance.
579 *
580 * Subquery segment conditions that are next to each other in a chain of OR's are merged together and
581 * subquery segment conditions that are next to each other in a chain of AND's, but are also alone and not
582 * a part of an OR expression, are merged.
583 *
584 * The operands for the merged conditions in the parsed intermediate structure use the special MATCH_IDVISIT_NOT_IN
585 * operator.
586 */
587 private function mergeSubqueryExpressionsInTree(array $tree) : array
588 {
589 $andExpressions = array_map(function ($orExpressions) {
590 return $this->mergeSubqueryExpressionsInExpr($orExpressions, \false);
591 }, $tree);
592 $mappedAndExpressions = $this->mergeSubqueryExpressionsInExpr($andExpressions, \true);
593 return $mappedAndExpressions;
594 }
595 private function mergeSubqueryExpressionsInExpr(array $expressions, bool $isAndChain) : array
596 {
597 // nothing to merge if there's only one expression
598 if (!$isAndChain && count($expressions) <= 1) {
599 return $expressions;
600 }
601 $mappedExpressions = [];
602 $idvisitNotInExpressions = [];
603 foreach ($expressions as $childExpressionsOrOperand) {
604 // if this is an AND chain w/ more than one sub-expression being OR-ed together, we can't do anything about the NOT IN subqueries there
605 if ($isAndChain && count($childExpressionsOrOperand) > 1) {
606 $mappedExpressions[] = $childExpressionsOrOperand;
607 continue;
608 }
609 $operand = $isAndChain ? $childExpressionsOrOperand[0] : $childExpressionsOrOperand;
610 $name = $operand[SegmentExpression::INDEX_OPERAND_NAME];
611 $matchType = $operand[SegmentExpression::INDEX_OPERAND_OPERATOR];
612 $value = $operand[SegmentExpression::INDEX_OPERAND_VALUE];
613 if (!$this->doesSegmentNeedSubquery($matchType, $name)) {
614 $mappedExpressions[] = $childExpressionsOrOperand;
615 continue;
616 }
617 // if the segment is pageTitle!=def, then NOT IN sql will have to be idvisit NOT IN (... WHERE pageTitle == def),
618 // so we must invert the operator before we create a MATCH_IDVISIT_NOT_IN operand below
619 $operator = $this->getInvertedOperatorForSubQuery($matchType);
620 $idvisitNotInExpressions[] = $name . $operator . $this->escapeSegmentValue($value);
621 }
622 if (!empty($idvisitNotInExpressions)) {
623 $newOperand = [SegmentExpression::INDEX_OPERAND_NAME => null, SegmentExpression::INDEX_OPERAND_OPERATOR => SegmentExpression::MATCH_IDVISIT_NOT_IN, SegmentExpression::INDEX_OPERAND_VALUE => implode($isAndChain ? SegmentExpression::OR_DELIMITER : SegmentExpression::AND_DELIMITER, $idvisitNotInExpressions)];
624 $mappedExpressions[] = $isAndChain ? [$newOperand] : $newOperand;
625 }
626 return $mappedExpressions;
627 }
628 /**
629 * Escapes segment expression delimiters in a segment value with a backslash if not already done.
630 */
631 private function escapeSegmentValue(string $value) : string
632 {
633 $delimiterPattern = SegmentExpression::AND_DELIMITER . SegmentExpression::OR_DELIMITER;
634 $pattern = '/((?<!\\\\)[' . preg_quote($delimiterPattern) . '])/';
635 return preg_replace($pattern, '\\\\$1', $value);
636 }
637 }
638