From ad852d3e81af660b0889814d8f4fe34f181c8fd5 Mon Sep 17 00:00:00 2001 From: Sigurdur Gudbrandsson Date: Tue, 24 Jan 2017 12:38:07 +0000 Subject: [PATCH 1/6] Badly formatted youtube URLs cause a parse error The DOM parser throws away invalid HTML. When you have http://youtube.com/embed/_FlV6pgwlrk&list=123 then the `IframeYouTubeTagTransformPass::getYouTubeCode()` will match the ID as `_FlV6pgwlrk&list=123` but since the ampersand isn't in a proper HTML format (`&`), then DOM will throw this whole element away. I wrote a test to make sure noone breaks this in the future. --- src/Pass/IframeYouTubeTagTransformPass.php | 8 +++--- .../fragment-html/youtube-bad-fragment.html | 4 +++ .../youtube-bad-fragment.html.out | 27 +++++++++++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 tests/test-data/fragment-html/youtube-bad-fragment.html create mode 100644 tests/test-data/fragment-html/youtube-bad-fragment.html.out diff --git a/src/Pass/IframeYouTubeTagTransformPass.php b/src/Pass/IframeYouTubeTagTransformPass.php index 67017b38..e55b0ffa 100644 --- a/src/Pass/IframeYouTubeTagTransformPass.php +++ b/src/Pass/IframeYouTubeTagTransformPass.php @@ -125,25 +125,25 @@ protected function getYouTubeCode(DOMQuery $el) if (preg_match('&(*UTF8)/embed/([^/?\&]+)&i', $href, $matches)) { if (!empty($matches[1])) { $youtube_code = $matches[1]; - return $youtube_code; + return htmlspecialchars($youtube_code); } } if (preg_match('&(*UTF8)youtu\.be/([^/?\&]+)&i', $href, $matches)) { if (!empty($matches[1])) { $youtube_code = $matches[1]; - return $youtube_code; + return htmlspecialchars($youtube_code); } } if (preg_match('!(*UTF8)watch\?v=([^&]+)!i', $href, $matches)) { if (!empty($matches[1])) { $youtube_code = $matches[1]; - return $youtube_code; + return htmlspecialchars($youtube_code); } } - return $youtube_code; + return htmlspecialchars($youtube_code); } /** diff --git a/tests/test-data/fragment-html/youtube-bad-fragment.html b/tests/test-data/fragment-html/youtube-bad-fragment.html new file mode 100644 index 00000000..ff4f5921 --- /dev/null +++ b/tests/test-data/fragment-html/youtube-bad-fragment.html @@ -0,0 +1,4 @@ + diff --git a/tests/test-data/fragment-html/youtube-bad-fragment.html.out b/tests/test-data/fragment-html/youtube-bad-fragment.html.out new file mode 100644 index 00000000..4bd97cc4 --- /dev/null +++ b/tests/test-data/fragment-html/youtube-bad-fragment.html.out @@ -0,0 +1,27 @@ + + + +ORIGINAL HTML +--------------- +Line 1: +Line 5: + + +Transformations made from HTML tags to AMP custom tags +------------------------------------------------------- + + \ No newline at end of file + + + + + + + + + + + + + diff --git a/tests/test-data/fragment-html/youtube-fragment.html.out b/tests/test-data/fragment-html/youtube-fragment.html.out index d711b65a..21365f43 100644 --- a/tests/test-data/fragment-html/youtube-fragment.html.out +++ b/tests/test-data/fragment-html/youtube-fragment.html.out @@ -1,11 +1,55 @@ + + + + + + + + + + + + + ORIGINAL HTML --------------- -Line 1: +Line 1: +Line 5: +Line 6: +Line 10: +Line 11: +Line 15: +Line 16: +Line 20: +Line 21: +Line 25: +Line 26: +Line 30: +Line 31: +Line 35: Transformations made from HTML tags to AMP custom tags @@ -14,6 +58,24 @@ Transformations made from HTML tags to AMP custom tags