Normalizer.php
241 lines
| 1 | <?php |
| 2 | namespace MailPoetVendor\Symfony\Polyfill\Intl\Normalizer; |
| 3 | if (!defined('ABSPATH')) exit; |
| 4 | class Normalizer |
| 5 | { |
| 6 | const FORM_D = \MailPoetVendor\Normalizer::FORM_D; |
| 7 | const FORM_KD = \MailPoetVendor\Normalizer::FORM_KD; |
| 8 | const FORM_C = \MailPoetVendor\Normalizer::FORM_C; |
| 9 | const FORM_KC = \MailPoetVendor\Normalizer::FORM_KC; |
| 10 | const NFD = \MailPoetVendor\Normalizer::NFD; |
| 11 | const NFKD = \MailPoetVendor\Normalizer::NFKD; |
| 12 | const NFC = \MailPoetVendor\Normalizer::NFC; |
| 13 | const NFKC = \MailPoetVendor\Normalizer::NFKC; |
| 14 | private static $C; |
| 15 | private static $D; |
| 16 | private static $KD; |
| 17 | private static $cC; |
| 18 | private static $ulenMask = array("\xc0" => 2, "\xd0" => 2, "\xe0" => 3, "\xf0" => 4); |
| 19 | private static $ASCII = " eiasntrolud][cmp'\ng|hv.fb,:=-q10C2*yx)(L9AS/P\"EjMIk3>5T<D4}B{8FwR67UGN;JzV#HOW_&!K?XQ%Y\\\tZ+~^\$@`\x00\x01\x02\x03\x04\x05\x06\x07\x08\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"; |
| 20 | public static function isNormalized($s, $form = self::NFC) |
| 21 | { |
| 22 | if (!\in_array($form, array(self::NFD, self::NFKD, self::NFC, self::NFKC))) { |
| 23 | return \false; |
| 24 | } |
| 25 | $s = (string) $s; |
| 26 | if (!isset($s[\strspn($s, self::$ASCII)])) { |
| 27 | return \true; |
| 28 | } |
| 29 | if (self::NFC == $form && \preg_match('//u', $s) && !\preg_match('/[^\\x00-\\x{2FF}]/u', $s)) { |
| 30 | return \true; |
| 31 | } |
| 32 | return self::normalize($s, $form) === $s; |
| 33 | } |
| 34 | public static function normalize($s, $form = self::NFC) |
| 35 | { |
| 36 | $s = (string) $s; |
| 37 | if (!\preg_match('//u', $s)) { |
| 38 | return \false; |
| 39 | } |
| 40 | switch ($form) { |
| 41 | case self::NFC: |
| 42 | $C = \true; |
| 43 | $K = \false; |
| 44 | break; |
| 45 | case self::NFD: |
| 46 | $C = \false; |
| 47 | $K = \false; |
| 48 | break; |
| 49 | case self::NFKC: |
| 50 | $C = \true; |
| 51 | $K = \true; |
| 52 | break; |
| 53 | case self::NFKD: |
| 54 | $C = \false; |
| 55 | $K = \true; |
| 56 | break; |
| 57 | default: |
| 58 | if (\defined('\MailPoetVendor\Normalizer::NONE') && \MailPoetVendor\Normalizer::NONE == $form) { |
| 59 | return $s; |
| 60 | } |
| 61 | return \false; |
| 62 | } |
| 63 | if ('' === $s) { |
| 64 | return ''; |
| 65 | } |
| 66 | if ($K && null === self::$KD) { |
| 67 | self::$KD = self::getData('compatibilityDecomposition'); |
| 68 | } |
| 69 | if (null === self::$D) { |
| 70 | self::$D = self::getData('canonicalDecomposition'); |
| 71 | self::$cC = self::getData('combiningClass'); |
| 72 | } |
| 73 | if (null !== ($mbEncoding = 2 & (int) \ini_get('mbstring.func_overload') ? \mb_internal_encoding() : null)) { |
| 74 | \mb_internal_encoding('8bit'); |
| 75 | } |
| 76 | $r = self::decompose($s, $K); |
| 77 | if ($C) { |
| 78 | if (null === self::$C) { |
| 79 | self::$C = self::getData('canonicalComposition'); |
| 80 | } |
| 81 | $r = self::recompose($r); |
| 82 | } |
| 83 | if (null !== $mbEncoding) { |
| 84 | \mb_internal_encoding($mbEncoding); |
| 85 | } |
| 86 | return $r; |
| 87 | } |
| 88 | private static function recompose($s) |
| 89 | { |
| 90 | $ASCII = self::$ASCII; |
| 91 | $compMap = self::$C; |
| 92 | $combClass = self::$cC; |
| 93 | $ulenMask = self::$ulenMask; |
| 94 | $result = $tail = ''; |
| 95 | $i = $s[0] < "\x80" ? 1 : $ulenMask[$s[0] & "\xf0"]; |
| 96 | $len = \strlen($s); |
| 97 | $lastUchr = \substr($s, 0, $i); |
| 98 | $lastUcls = isset($combClass[$lastUchr]) ? 256 : 0; |
| 99 | while ($i < $len) { |
| 100 | if ($s[$i] < "\x80") { |
| 101 | // ASCII chars |
| 102 | if ($tail) { |
| 103 | $lastUchr .= $tail; |
| 104 | $tail = ''; |
| 105 | } |
| 106 | if ($j = \strspn($s, $ASCII, $i + 1)) { |
| 107 | $lastUchr .= \substr($s, $i, $j); |
| 108 | $i += $j; |
| 109 | } |
| 110 | $result .= $lastUchr; |
| 111 | $lastUchr = $s[$i]; |
| 112 | $lastUcls = 0; |
| 113 | ++$i; |
| 114 | continue; |
| 115 | } |
| 116 | $ulen = $ulenMask[$s[$i] & "\xf0"]; |
| 117 | $uchr = \substr($s, $i, $ulen); |
| 118 | if ($lastUchr < "ᄀ" || "ᄒ" < $lastUchr || $uchr < "� |
| 119 | �" || "� |
| 120 | �" < $uchr || $lastUcls) { |
| 121 | // Table lookup and combining chars composition |
| 122 | $ucls = isset($combClass[$uchr]) ? $combClass[$uchr] : 0; |
| 123 | if (isset($compMap[$lastUchr . $uchr]) && (!$lastUcls || $lastUcls < $ucls)) { |
| 124 | $lastUchr = $compMap[$lastUchr . $uchr]; |
| 125 | } elseif ($lastUcls = $ucls) { |
| 126 | $tail .= $uchr; |
| 127 | } else { |
| 128 | if ($tail) { |
| 129 | $lastUchr .= $tail; |
| 130 | $tail = ''; |
| 131 | } |
| 132 | $result .= $lastUchr; |
| 133 | $lastUchr = $uchr; |
| 134 | } |
| 135 | } else { |
| 136 | // Hangul chars |
| 137 | $L = \ord($lastUchr[2]) - 0x80; |
| 138 | $V = \ord($uchr[2]) - 0xa1; |
| 139 | $T = 0; |
| 140 | $uchr = \substr($s, $i + $ulen, 3); |
| 141 | if ("ᆧ" <= $uchr && $uchr <= "ᇂ") { |
| 142 | $T = \ord($uchr[2]) - 0xa7; |
| 143 | 0 > $T && ($T += 0x40); |
| 144 | $ulen += 3; |
| 145 | } |
| 146 | $L = 0xac00 + ($L * 21 + $V) * 28 + $T; |
| 147 | $lastUchr = \chr(0xe0 | $L >> 12) . \chr(0x80 | $L >> 6 & 0x3f) . \chr(0x80 | $L & 0x3f); |
| 148 | } |
| 149 | $i += $ulen; |
| 150 | } |
| 151 | return $result . $lastUchr . $tail; |
| 152 | } |
| 153 | private static function decompose($s, $c) |
| 154 | { |
| 155 | $result = ''; |
| 156 | $ASCII = self::$ASCII; |
| 157 | $decompMap = self::$D; |
| 158 | $combClass = self::$cC; |
| 159 | $ulenMask = self::$ulenMask; |
| 160 | if ($c) { |
| 161 | $compatMap = self::$KD; |
| 162 | } |
| 163 | $c = array(); |
| 164 | $i = 0; |
| 165 | $len = \strlen($s); |
| 166 | while ($i < $len) { |
| 167 | if ($s[$i] < "\x80") { |
| 168 | // ASCII chars |
| 169 | if ($c) { |
| 170 | \ksort($c); |
| 171 | $result .= \implode('', $c); |
| 172 | $c = array(); |
| 173 | } |
| 174 | $j = 1 + \strspn($s, $ASCII, $i + 1); |
| 175 | $result .= \substr($s, $i, $j); |
| 176 | $i += $j; |
| 177 | continue; |
| 178 | } |
| 179 | $ulen = $ulenMask[$s[$i] & "\xf0"]; |
| 180 | $uchr = \substr($s, $i, $ulen); |
| 181 | $i += $ulen; |
| 182 | if ($uchr < "가" || "힣" < $uchr) { |
| 183 | // Table lookup |
| 184 | if ($uchr !== ($j = isset($compatMap[$uchr]) ? $compatMap[$uchr] : (isset($decompMap[$uchr]) ? $decompMap[$uchr] : $uchr))) { |
| 185 | $uchr = $j; |
| 186 | $j = \strlen($uchr); |
| 187 | $ulen = $uchr[0] < "\x80" ? 1 : $ulenMask[$uchr[0] & "\xf0"]; |
| 188 | if ($ulen != $j) { |
| 189 | // Put trailing chars in $s |
| 190 | $j -= $ulen; |
| 191 | $i -= $j; |
| 192 | if (0 > $i) { |
| 193 | $s = \str_repeat(' ', -$i) . $s; |
| 194 | $len -= $i; |
| 195 | $i = 0; |
| 196 | } |
| 197 | while ($j--) { |
| 198 | $s[$i + $j] = $uchr[$ulen + $j]; |
| 199 | } |
| 200 | $uchr = \substr($uchr, 0, $ulen); |
| 201 | } |
| 202 | } |
| 203 | if (isset($combClass[$uchr])) { |
| 204 | // Combining chars, for sorting |
| 205 | if (!isset($c[$combClass[$uchr]])) { |
| 206 | $c[$combClass[$uchr]] = ''; |
| 207 | } |
| 208 | $c[$combClass[$uchr]] .= $uchr; |
| 209 | continue; |
| 210 | } |
| 211 | } else { |
| 212 | // Hangul chars |
| 213 | $uchr = \unpack('C*', $uchr); |
| 214 | $j = ($uchr[1] - 224 << 12) + ($uchr[2] - 128 << 6) + $uchr[3] - 0xac80; |
| 215 | $uchr = "\xe1\x84" . \chr(0x80 + (int) ($j / 588)) . "\xe1\x85" . \chr(0xa1 + (int) ($j % 588 / 28)); |
| 216 | if ($j %= 28) { |
| 217 | $uchr .= $j < 25 ? "\xe1\x86" . \chr(0xa7 + $j) : "\xe1\x87" . \chr(0x67 + $j); |
| 218 | } |
| 219 | } |
| 220 | if ($c) { |
| 221 | \ksort($c); |
| 222 | $result .= \implode('', $c); |
| 223 | $c = array(); |
| 224 | } |
| 225 | $result .= $uchr; |
| 226 | } |
| 227 | if ($c) { |
| 228 | \ksort($c); |
| 229 | $result .= \implode('', $c); |
| 230 | } |
| 231 | return $result; |
| 232 | } |
| 233 | private static function getData($file) |
| 234 | { |
| 235 | if (\file_exists($file = __DIR__ . '/Resources/unidata/' . $file . '.php')) { |
| 236 | return require $file; |
| 237 | } |
| 238 | return \false; |
| 239 | } |
| 240 | } |
| 241 |