AbstractHtmlProcessor.php
4 years ago
CssToAttributeConverter.php
4 years ago
HtmlNormalizer.php
4 years ago
HtmlPruner.php
4 years ago
HtmlPruner.php
138 lines
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Pelago\Emogrifier\HtmlProcessor; |
| 6 | |
| 7 | use Pelago\Emogrifier\CssInliner; |
| 8 | use Pelago\Emogrifier\Utilities\ArrayIntersector; |
| 9 | |
| 10 | /** |
| 11 | * This class can remove things from HTML. |
| 12 | */ |
| 13 | class HtmlPruner extends AbstractHtmlProcessor |
| 14 | { |
| 15 | /** |
| 16 | * We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only |
| 17 | * supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase, |
| 18 | * not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE") |
| 19 | * to lowercase. |
| 20 | * |
| 21 | * @var string |
| 22 | */ |
| 23 | private const DISPLAY_NONE_MATCHER |
| 24 | = '//*[@style and contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")' |
| 25 | . ' and not(@class and contains(concat(" ", normalize-space(@class), " "), " -emogrifier-keep "))]'; |
| 26 | |
| 27 | /** |
| 28 | * Removes elements that have a "display: none;" style. |
| 29 | * |
| 30 | * @return self fluent interface |
| 31 | */ |
| 32 | public function removeElementsWithDisplayNone(): self |
| 33 | { |
| 34 | $elementsWithStyleDisplayNone = $this->getXPath()->query(self::DISPLAY_NONE_MATCHER); |
| 35 | if ($elementsWithStyleDisplayNone->length === 0) { |
| 36 | return $this; |
| 37 | } |
| 38 | |
| 39 | foreach ($elementsWithStyleDisplayNone as $element) { |
| 40 | $parentNode = $element->parentNode; |
| 41 | if ($parentNode !== null) { |
| 42 | $parentNode->removeChild($element); |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | return $this; |
| 47 | } |
| 48 | |
| 49 | /** |
| 50 | * Removes classes that are no longer required (e.g. because there are no longer any CSS rules that reference them) |
| 51 | * from `class` attributes. |
| 52 | * |
| 53 | * Note that this does not inspect the CSS, but expects to be provided with a list of classes that are still in use. |
| 54 | * |
| 55 | * This method also has the (presumably beneficial) side-effect of minifying (removing superfluous whitespace from) |
| 56 | * `class` attributes. |
| 57 | * |
| 58 | * @param array<array-key, string> $classesToKeep names of classes that should not be removed |
| 59 | * |
| 60 | * @return self fluent interface |
| 61 | */ |
| 62 | public function removeRedundantClasses(array $classesToKeep = []): self |
| 63 | { |
| 64 | $elementsWithClassAttribute = $this->getXPath()->query('//*[@class]'); |
| 65 | |
| 66 | if ($classesToKeep !== []) { |
| 67 | $this->removeClassesFromElements($elementsWithClassAttribute, $classesToKeep); |
| 68 | } else { |
| 69 | // Avoid unnecessary processing if there are no classes to keep. |
| 70 | $this->removeClassAttributeFromElements($elementsWithClassAttribute); |
| 71 | } |
| 72 | |
| 73 | return $this; |
| 74 | } |
| 75 | |
| 76 | /** |
| 77 | * Removes classes from the `class` attribute of each element in `$elements`, except any in `$classesToKeep`, |
| 78 | * removing the `class` attribute itself if the resultant list is empty. |
| 79 | * |
| 80 | * @param \DOMNodeList $elements |
| 81 | * @param array<array-key, string> $classesToKeep |
| 82 | */ |
| 83 | private function removeClassesFromElements(\DOMNodeList $elements, array $classesToKeep): void |
| 84 | { |
| 85 | $classesToKeepIntersector = new ArrayIntersector($classesToKeep); |
| 86 | |
| 87 | /** @var \DOMElement $element */ |
| 88 | foreach ($elements as $element) { |
| 89 | $elementClasses = \preg_split('/\\s++/', \trim($element->getAttribute('class'))); |
| 90 | $elementClassesToKeep = $classesToKeepIntersector->intersectWith($elementClasses); |
| 91 | if ($elementClassesToKeep !== []) { |
| 92 | $element->setAttribute('class', \implode(' ', $elementClassesToKeep)); |
| 93 | } else { |
| 94 | $element->removeAttribute('class'); |
| 95 | } |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | /** |
| 100 | * Removes the `class` attribute from each element in `$elements`. |
| 101 | * |
| 102 | * @param \DOMNodeList $elements |
| 103 | */ |
| 104 | private function removeClassAttributeFromElements(\DOMNodeList $elements): void |
| 105 | { |
| 106 | /** @var \DOMElement $element */ |
| 107 | foreach ($elements as $element) { |
| 108 | $element->removeAttribute('class'); |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | /** |
| 113 | * After CSS has been inlined, there will likely be some classes in `class` attributes that are no longer referenced |
| 114 | * by any remaining (uninlinable) CSS. This method removes such classes. |
| 115 | * |
| 116 | * Note that it does not inspect the remaining CSS, but uses information readily available from the `CssInliner` |
| 117 | * instance about the CSS rules that could not be inlined. |
| 118 | * |
| 119 | * @param CssInliner $cssInliner object instance that performed the CSS inlining |
| 120 | * |
| 121 | * @return self fluent interface |
| 122 | * |
| 123 | * @throws \BadMethodCallException if `inlineCss` has not first been called on `$cssInliner` |
| 124 | */ |
| 125 | public function removeRedundantClassesAfterCssInlined(CssInliner $cssInliner): self |
| 126 | { |
| 127 | $classesToKeepAsKeys = []; |
| 128 | foreach ($cssInliner->getMatchingUninlinableSelectors() as $selector) { |
| 129 | \preg_match_all('/\\.(-?+[_a-zA-Z][\\w\\-]*+)/', $selector, $matches); |
| 130 | $classesToKeepAsKeys += \array_fill_keys($matches[1], true); |
| 131 | } |
| 132 | |
| 133 | $this->removeRedundantClasses(\array_keys($classesToKeepAsKeys)); |
| 134 | |
| 135 | return $this; |
| 136 | } |
| 137 | } |
| 138 |