Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added masterminds/html5 #831

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ code coverage of the fixed bugs and the new features.
To run the existing PHPUnit tests, run this command:

```shell
composer ci:tests:unit
composer ci:tests
```


Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,4 @@ jobs:
composer show;

- name: Run Tests
run: composer ci:tests:unit
run: composer ci:tests
9 changes: 8 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,14 @@
"symfony/css-selector": "^3.4.32 || ^4.3.5 || ^5.0"
},
"require-dev": {
"masterminds/html5": "^2.7",
"php-parallel-lint/php-parallel-lint": "^1.2.0",
"slevomat/coding-standard": "^4.0.0",
"squizlabs/php_codesniffer": "^3.5.1"
},
"suggest": {
"masterminds/html5": "Use instead of PHP's built-in DOMDocument for HTML5 support."
},
"autoload": {
"psr-4": {
"Pelago\\Emogrifier\\": "src/"
Expand All @@ -73,9 +77,12 @@
"ci:php:md": "\"./tools/phpmd.phar\" src text config/phpmd.xml",
"ci:php:psalm": "\"./tools/psalm.phar\" --show-info=false",
"ci:tests:unit": "\"./tools/phpunit.phar\"",
"ci:tests:html5:unit": "EMOGRIFIER_HTML5=true \"./tools/phpunit.phar\"",
"ci:tests:sof": "\"./tools/phpunit.phar\" --stop-on-failure",
"ci:tests:html5:sof": "EMOGRIFIER_HTML5=true \"./tools/phpunit.phar\" --stop-on-failure",
"ci:tests": [
"@ci:tests:unit"
"@ci:tests:unit",
"@ci:tests:html5:unit"
],
"ci:dynamic": [
"@ci:tests"
Expand Down
90 changes: 81 additions & 9 deletions src/HtmlProcessor/AbstractHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

namespace Pelago\Emogrifier\HtmlProcessor;

use DOMNode;
use Masterminds\HTML5;

/**
* Base class for HTML processor that e.g., can remove, add or modify nodes or attributes.
*
Expand Down Expand Up @@ -37,6 +40,11 @@ abstract class AbstractHtmlProcessor
*/
protected $domDocument = null;

/**
* @var HTML5|null
*/
protected $html5 = null;

/**
* @var \DOMXPath
*/
Expand All @@ -55,19 +63,20 @@ private function __construct()
* Builds a new instance from the given HTML.
*
* @param string $unprocessedHtml raw HTML, must be UTF-encoded, must not be empty
* @param bool $html5 use masterminds/html5 parser instead of DOMDocument.
*
* @return static
*
* @throws \InvalidArgumentException if $unprocessedHtml is anything other than a non-empty string
*/
public static function fromHtml(string $unprocessedHtml): self
public static function fromHtml(string $unprocessedHtml, ?bool $html5 = null): self
{
if ($unprocessedHtml === '') {
throw new \InvalidArgumentException('The provided HTML must not be empty.', 1515763647);
}

$instance = new static();
$instance->setHtml($unprocessedHtml);
$instance->setHtml($unprocessedHtml, $html5);

return $instance;
}
Expand All @@ -91,10 +100,14 @@ public static function fromDomDocument(\DOMDocument $document): self
* Sets the HTML to process.
*
* @param string $html the HTML to process, must be UTF-8-encoded
* @param bool $html5 use masterminds/html5 parser instead of DOMDocument.
*/
private function setHtml(string $html): void
private function setHtml(string $html, ?bool $html5): void
{
$this->createUnifiedDomDocument($html);
// If html5 is NULL, fallback to the environment flag.
$html5 = $html5 ?? $this->isHtml5Env();

$this->createUnifiedDomDocument($html, $html5);
}

/**
Expand Down Expand Up @@ -136,7 +149,7 @@ private function setDomDocument(\DOMDocument $domDocument): void
*/
public function render(): string
{
$htmlWithPossibleErroneousClosingTags = $this->getDomDocument()->saveHTML();
$htmlWithPossibleErroneousClosingTags = $this->saveHTML();

return $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
}
Expand All @@ -148,7 +161,7 @@ public function render(): string
*/
public function renderBodyContent(): string
{
$htmlWithPossibleErroneousClosingTags = $this->getDomDocument()->saveHTML($this->getBodyElement());
$htmlWithPossibleErroneousClosingTags = $this->saveHTML($this->getBodyElement());
$bodyNodeHtml = $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);

return \preg_replace('%</?+body(?:\\s[^>]*+)?+>%', '', $bodyNodeHtml);
Expand Down Expand Up @@ -184,13 +197,36 @@ private function getBodyElement(): \DOMElement
* The DOM document will always have a BODY element and a document type.
*
* @param string $html
* @param bool $html5
*/
private function createUnifiedDomDocument(string $html): void
private function createUnifiedDomDocument(string $html, bool $html5): void
{
$this->createRawDomDocument($html);
$html = $this->prepareHtmlForDomConversion($html);

$html5 ? $this->createHtml5Document($html) : $this->createRawDomDocument($html);

$this->ensureExistenceOfBodyElement();
}

/**
* Creates a HTML5 document parser instance from the given HTML.
*
* @param string $html
*
* @throws \RuntimeException
*/
private function createHtml5Document(string $html): void
{
if (!\class_exists(HTML5::class)) {
throw new \RuntimeException('Class ' . HTML5::class . 'not found. Install the masterminds/html5 library.');
}

$this->html5 = new HTML5(['disable_html_ns' => true]);
$domDocument = $this->html5->parse($html);

$this->setDomDocument($domDocument);
}

/**
* Creates a DOMDocument instance from the given HTML and stores it in $this->domDocument.
*
Expand All @@ -202,7 +238,7 @@ private function createRawDomDocument(string $html): void
$domDocument->strictErrorChecking = false;
$domDocument->formatOutput = true;
$libXmlState = \libxml_use_internal_errors(true);
$domDocument->loadHTML($this->prepareHtmlForDomConversion($html));
$domDocument->loadHTML($html);
\libxml_clear_errors();
\libxml_use_internal_errors($libXmlState);

Expand Down Expand Up @@ -334,4 +370,40 @@ private function ensureExistenceOfBodyElement(): void
}
$htmlElement->appendChild($this->getDomDocument()->createElement('body'));
}

/**
* Dumps the internal document into a string using HTML formatting.
*
* @param DOMNode $dom [optional] parameter to output a subset of the document.
*
* @return string the HTML, or false if an error occurred.
*/
private function saveHTML(DOMNode $dom = null): string
{
if (isset($this->html5)) {
if ($dom === null) {
$dom = $this->domDocument;
}

return $this->html5->saveHTML($dom);
}

// Fall back to DOMDocument.
return $this->getDomDocument()->saveHTML($dom);
}

/**
* Check whether HTML5 environment is enabled.
*
* @return bool
*/
private function isHtml5Env(): bool
{
$env = \getenv('EMOGRIFIER_HTML5');
if (is_bool($env)) {
return $env;
}

return false;
}
}