PluginProbe ʕ •ᴥ•ʔ
Jetpack – WP Security, Backup, Speed, & Growth / 15.9-a.7
Jetpack – WP Security, Backup, Speed, & Growth v15.9-a.7
16.0-a.7 16.0-a.5 15.9.1 16.0-a.3 16.0-a.1 15.9 15.9-beta 15.9-a.7 15.9-a.5 15.9-a.3 15.9-a.1 15.8 15.8-beta 15.8-a.7 15.8-a.5 5.2.5 5.3.4 5.4.4 5.5.5 5.6.5 5.7.5 5.8.4 5.9.4 6.0.4 6.1 6.1.1 6.1.2 6.1.3 6.1.4 6.1.5 6.2 6.2.1 6.2.2 6.2.3 6.2.4 6.2.5 6.3 6.3.1 6.3.2 6.3.3 6.3.4 6.3.5 6.3.6 6.3.7 6.4 6.4.1 6.4.2 6.4.3 6.4.4 6.4.5 6.4.6 6.5 6.5.1 6.5.2 6.5.3 6.5.4 6.6 6.6.1 6.6.2 6.6.3 6.6.4 6.6.5 6.7 6.7.1 6.7.2 6.7.3 6.7.4 6.8 6.8.1 6.8.2 6.8.3 6.8.4 6.8.5 6.9 6.9.1 6.9.2 6.9.3 6.9.4 7.0 7.0.1 7.0.2 7.0.3 7.0.4 7.0.5 7.1 7.1.1 7.1.2 7.1.3 7.1.4 7.1.5 7.2 7.2.1 7.2.1.1 7.2.2 7.2.3 7.2.4 7.2.5 7.3 7.3.0.1 7.3.1 7.3.1.1 7.3.2 7.3.3 7.3.4 7.3.5 7.4 7.4.1 7.4.2 7.4.3 7.4.4 7.4.5 7.5 7.5.0.1 7.5.1 7.5.2 7.5.3 7.5.4 7.5.5 7.5.6 7.5.7 7.6 7.6.1 7.6.2 7.6.3 7.6.4 7.7 7.7.1 7.7.2 7.7.3 7.7.4 7.7.5 7.7.6 7.8 7.8.1 7.8.2 7.8.3 7.8.4 7.9 7.9.1 7.9.2 7.9.3 7.9.4 8.0 8.0.1 8.0.2 8.0.3 8.1 8.1.1 8.1.2 8.1.3 8.1.4 8.2 8.2.0.1 8.2.1 8.2.2 8.2.3 8.2.4 8.2.5 8.2.6 8.3 8.3.1 8.3.2 8.3.3 8.4 8.4.1 8.4.2 8.4.3 8.4.4 8.4.5 8.5 8.5.1 8.5.2 8.5.3 8.6 8.6.1 8.6.2 8.6.3 8.6.4 8.7 8.7.0.1 8.7.1 8.7.2 8.7.3 8.7.4 8.8 8.8.1 8.8.2 8.8.3 8.8.4 8.8.5 8.9 8.9.1 8.9.2 8.9.3 8.9.4 9.0 9.0.1 9.0.2 9.0.3 9.0.4 9.0.5 9.1 9.1.1 9.1.2 9.1.3 9.2 9.2.1 9.2.2 9.2.3 9.2.4 9.3 9.3.1 9.3.2 9.3.3 9.3.4 9.3.5 9.4 9.4.1 9.4.2 9.4.3 9.4.4 9.5 9.5.1 9.5.2 9.5.3 9.5.4 9.5.5 9.6 9.6.1 9.6.2 9.6.3 9.6.4 9.7 9.7.1 9.7.2 15.7-beta.2 9.7.3 15.7.1 9.8 15.8-a.1 9.8.1 15.8-a.3 9.8.2 2.0.9 9.8.3 2.1.7 9.9 2.2.10 9.9.1 2.3.10 9.9.2 2.4.7 9.9.3 2.5.5 2.6.6 2.7.5 2.8.5 2.9.6 3.0.6 3.1.5 3.2.5 3.3.6 3.4.6 3.5.6 3.6.4 3.7.5 3.8.5 3.9.10 4.0.7 4.1.4 4.2.5 4.3.5 4.4.5 4.5.3 4.6.3 4.7.4 4.8.5 4.9.3 5.0.3 5.1.4 trunk 10.0 10.0.1 10.0.2 10.1 10.1.1 10.1.2 10.2 10.2.1 10.2.2 10.2.3 10.3 10.3.1 10.3.2 10.4 10.4.1 10.4.2 10.5 10.5.1 10.5.2 10.5.3 10.6 10.6.1 10.6.2 10.7 10.7.1 10.7.2 10.8 10.8.1 10.8.2 10.9 10.9.1 10.9.2 10.9.3 11.0 11.0.1 11.0.2 11.1 11.1.1 11.1.2 11.1.3 11.1.4 11.2 11.2.1 11.2.2 11.3 11.3.1 11.3.2 11.3.3 11.3.4 11.4 11.4.1 11.4.2 11.5 11.5.1 11.5.2 11.5.3 11.6 11.6.1 11.6.2 11.7 11.7.1 11.7.2 11.7.3 11.8 11.8.3 11.8.4 11.8.5 11.8.6 11.9 11.9.1 11.9.2 11.9.3 12.0 12.0.1 12.0.2 12.1 12.1.1 12.1.2 12.2 12.2.1 12.2.2 12.3 12.3.1 12.4 12.4.1 12.5 12.5.1 12.6 12.6.1 12.6.2 12.6.3 12.7 12.7.1 12.7.2 12.8 12.8.1 12.8.2 12.9 12.9.1 12.9.2 12.9.3 12.9.4 13.0 13.0.1 13.1 13.1.1 13.1.2 13.1.3 13.1.4 13.2 13.2.1 13.2.2 13.2.3 13.3 13.3.1 13.3.2 13.4 13.4.1 13.4.2 13.4.3 13.4.4 13.5 13.5.1 13.6 13.6.1 13.7 13.7.1 13.8 13.8.1 13.8.2 13.9 13.9.1 14.0 14.1 14.2 14.2.1 14.3 14.4 14.4.1 14.5 14.6 14.7 14.8 14.9 14.9.1 15.0 15.0.1 15.0.2 15.1 15.1.1 15.2 15.3 15.3.1 15.4 15.5 15.6 15.7 15.7-a.1 15.7-a.3 15.7-a.5 15.7-a.7 15.7-beta
jetpack / vendor / wikimedia / aho-corasick / src / MultiStringMatcher.php
jetpack / vendor / wikimedia / aho-corasick / src Last commit date
MultiStringMatcher.php 4 years ago MultiStringReplacer.php 4 years ago
MultiStringMatcher.php
231 lines
1 <?php
2 /**
3 * AhoCorasick PHP Library
4 *
5 * A PHP implementation of the Aho-Corasick string matching algorithm.
6 *
7 * Alfred V. Aho and Margaret J. Corasick, "Efficient string matching:
8 * an aid to bibliographic search", CACM, 18(6):333-340, June 1975.
9 *
10 * @link http://xlinux.nist.gov/dads//HTML/ahoCorasick.html
11 * @link https://en.wikipedia.org/wiki/Aho-Corasick_string_matching_algorithm
12 *
13 * Copyright (C) 2015 Ori Livneh <ori@wikimedia.org>
14 *
15 * Licensed under the Apache License, Version 2.0 (the "License");
16 * you may not use this file except in compliance with the License.
17 * You may obtain a copy of the License at
18 *
19 * http://www.apache.org/licenses/LICENSE-2.0
20 *
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an "AS IS" BASIS,
23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 * See the License for the specific language governing permissions and
25 * limitations under the License.
26 *
27 * @file
28 * @author Ori Livneh <ori@wikimedia.org>
29 */
30
31 namespace AhoCorasick;
32
33 /**
34 * Represents a finite state machine that can find all occurrences
35 * of a set of search keywords in a body of text.
36 *
37 * The time it takes to construct the finite state machine is
38 * proportional to the sum of the lengths of the search keywords.
39 * Once constructed, the machine can locate all occurences of all
40 * search keywords in a body of text in a single pass, making exactly
41 * one state transition per input character.
42 *
43 * This is an implementation of the Aho-Corasick string matching
44 * algorithm.
45 *
46 * Alfred V. Aho and Margaret J. Corasick, "Efficient string matching:
47 * an aid to bibliographic search", CACM, 18(6):333-340, June 1975.
48 *
49 * @link http://xlinux.nist.gov/dads//HTML/ahoCorasick.html
50 */
51 class MultiStringMatcher {
52
53 /** @var string[] The set of keywords to be searched for. **/
54 protected $searchKeywords = [];
55
56 /** @var int The number of possible states of the string-matching finite state machine. **/
57 protected $numStates = 1;
58
59 /** @var array Mapping of states to outputs. **/
60 protected $outputs = [];
61
62 /** @var array Mapping of failure transitions. **/
63 protected $noTransitions = [];
64
65 /** @var array Mapping of success transitions. **/
66 protected $yesTransitions = [];
67
68 /**
69 * Constructor.
70 *
71 * @param string[] $searchKeywords The set of keywords to be matched.
72 */
73 public function __construct( array $searchKeywords ) {
74 foreach ( $searchKeywords as $keyword ) {
75 if ( $keyword !== '' ) {
76 $this->searchKeywords[$keyword] = strlen( $keyword );
77 }
78 }
79
80 if ( !$this->searchKeywords ) {
81 trigger_error( __METHOD__ . ': The set of search keywords is empty.', E_USER_WARNING );
82 // Unreachable 'return' when PHPUnit detects trigger_error
83 return; // @codeCoverageIgnore
84 }
85
86 $this->computeYesTransitions();
87 $this->computeNoTransitions();
88 }
89
90 /**
91 * Accessor for the search keywords.
92 *
93 * @return string[] Search keywords.
94 */
95 public function getKeywords() {
96 return array_keys( $this->searchKeywords );
97 }
98
99 /**
100 * Map the current state and input character to the next state.
101 *
102 * @param int $currentState The current state of the string-matching
103 * automaton.
104 * @param string $inputChar The character the string-matching
105 * automaton is currently processing.
106 * @return int The state the automaton should transition to.
107 */
108 public function nextState( $currentState, $inputChar ) {
109 $initialState = $currentState;
110 while ( true ) {
111 $transitions =& $this->yesTransitions[$currentState];
112 if ( isset( $transitions[$inputChar] ) ) {
113 $nextState = $transitions[$inputChar];
114 // Avoid failure transitions next time.
115 if ( $currentState !== $initialState ) {
116 $this->yesTransitions[$initialState][$inputChar] = $nextState;
117 }
118 return $nextState;
119 }
120 if ( $currentState === 0 ) {
121 return 0;
122 }
123 $currentState = $this->noTransitions[$currentState];
124 }
125 // Unreachable outside 'while'
126 } // @codeCoverageIgnore
127
128 /**
129 * Locate the search keywords in some text.
130 *
131 * @param string $text The string to search in.
132 * @return array[] An array of matches. Each match is a vector
133 * containing an integer offset and the matched keyword.
134 *
135 * @par Example:
136 * @code
137 * $keywords = new MultiStringMatcher( array( 'ore', 'hell' ) );
138 * $keywords->searchIn( 'She sells sea shells by the sea shore.' );
139 * // result: array( array( 15, 'hell' ), array( 34, 'ore' ) )
140 * @endcode
141 */
142 public function searchIn( $text ) {
143 if ( !$this->searchKeywords || $text === '' ) {
144 return []; // fast path
145 }
146
147 $state = 0;
148 $results = [];
149 $length = strlen( $text );
150
151 for ( $i = 0; $i < $length; $i++ ) {
152 $ch = $text[$i];
153 $state = $this->nextState( $state, $ch );
154 foreach ( $this->outputs[$state] as $match ) {
155 $offset = $i - $this->searchKeywords[$match] + 1;
156 $results[] = [ $offset, $match ];
157 }
158 }
159
160 return $results;
161 }
162
163 /**
164 * Get the state transitions which the string-matching automaton
165 * shall make as it advances through input text.
166 *
167 * Constructs a directed tree with a root node which represents the
168 * initial state of the string-matching automaton and from which a
169 * path exists which spells out each search keyword.
170 */
171 protected function computeYesTransitions() {
172 $this->yesTransitions = [ [] ];
173 $this->outputs = [ [] ];
174 foreach ( $this->searchKeywords as $keyword => $length ) {
175 $state = 0;
176 for ( $i = 0; $i < $length; $i++ ) {
177 $ch = $keyword[$i];
178 if ( !empty( $this->yesTransitions[$state][$ch] ) ) {
179 $state = $this->yesTransitions[$state][$ch];
180 } else {
181 $this->yesTransitions[$state][$ch] = $this->numStates;
182 $this->yesTransitions[] = [];
183 $this->outputs[] = [];
184 $state = $this->numStates++;
185 }
186 }
187
188 $this->outputs[$state][] = $keyword;
189 }
190 }
191
192 /**
193 * Get the state transitions which the string-matching automaton
194 * shall make when a partial match proves false.
195 */
196 protected function computeNoTransitions() {
197 $queue = [];
198 $this->noTransitions = [];
199
200 foreach ( $this->yesTransitions[0] as $ch => $toState ) {
201 $queue[] = $toState;
202 $this->noTransitions[$toState] = 0;
203 }
204
205 while ( true ) {
206 $fromState = array_shift( $queue );
207 if ( $fromState === null ) {
208 break;
209 }
210 foreach ( $this->yesTransitions[$fromState] as $ch => $toState ) {
211 $queue[] = $toState;
212 $state = $this->noTransitions[$fromState];
213
214 while ( $state !== 0 && empty( $this->yesTransitions[$state][$ch] ) ) {
215 $state = $this->noTransitions[$state];
216 }
217
218 if ( isset( $this->yesTransitions[$state][$ch] ) ) {
219 $noState = $this->yesTransitions[$state][$ch];
220 } else {
221 $noState = 0;
222 }
223
224 $this->noTransitions[$toState] = $noState;
225 $this->outputs[$toState] = array_merge(
226 $this->outputs[$toState], $this->outputs[$noState] );
227 }
228 }
229 }
230 }
231