From 5e6e8b23ef013fc3655f6bdb15a81ab96b176089 Mon Sep 17 00:00:00 2001 From: Sebastian Thulin Date: Tue, 21 May 2024 10:36:08 +0200 Subject: [PATCH 1/3] fix: add utf8 escape function --- source/php/Index.Test.php | 17 ++++++++++++++++- source/php/Index.php | 17 +++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/source/php/Index.Test.php b/source/php/Index.Test.php index 507fba4b..ee75a1e2 100644 --- a/source/php/Index.Test.php +++ b/source/php/Index.Test.php @@ -160,7 +160,22 @@ public function testThatABlockPostIsGeneratingAExcerpt() { $this->assertNotEmpty($truncatedExcerpt); } -} + public function testThatMalformedUTF8ContentIsFixed() { + + // Given + $post = [ + "post_title" => "Test Post", + "post_content" => "This is a test post with a malformed UTF8 character: \x80" + ]; + // When + $fixedContent = $this->invokeMethod( + $this->targetTestClass, + 'utf8ize', + [$post] + ); + $this->assertEquals($fixedContent["post_content"], "This is a test post with a malformed UTF8 character: "); + } +} diff --git a/source/php/Index.php b/source/php/Index.php index 58c67e3f..eec2693c 100644 --- a/source/php/Index.php +++ b/source/php/Index.php @@ -132,6 +132,8 @@ public function index($post) } }); + $post = self::utf8ize($post); // UTF-8 Escape + //Index post if (self::recordToLarge($post)) { $splitRecord = self::splitRecord($post); @@ -462,4 +464,19 @@ private static function getPostAndPostId($post) return [$post, $postId]; } + + public static function utf8ize($data) { + if (is_array($data)) { + foreach ($data as $key => $value) { + $data[$key] = self::utf8ize($value); + } + } else if (is_object($data)) { + foreach ($data as $key => $value) { + $data->$key = self::utf8ize($value); + } + } else if (is_string($data)) { + return mb_convert_encoding($data, 'UTF-8'); + } + return $data; + } } From 352814c5d1bd7ea88ec0df65bddfaa54e344b682 Mon Sep 17 00:00:00 2001 From: Sebastian Thulin Date: Tue, 21 May 2024 10:37:32 +0200 Subject: [PATCH 2/3] fix: add docs --- source/php/Index.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/php/Index.php b/source/php/Index.php index eec2693c..a4ca7661 100644 --- a/source/php/Index.php +++ b/source/php/Index.php @@ -465,6 +465,12 @@ private static function getPostAndPostId($post) return [$post, $postId]; } + /** + * Convert data to utf-8 + * + * @param mixed $data + * @return mixed + */ public static function utf8ize($data) { if (is_array($data)) { foreach ($data as $key => $value) { From d19d9273b6cd56ccb500d1df59650e3421c16c05 Mon Sep 17 00:00:00 2001 From: Thor Brink Date: Tue, 21 May 2024 09:06:09 +0000 Subject: [PATCH 3/3] test: Index::utf8ize --- source/php/Index.Test.php | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/source/php/Index.Test.php b/source/php/Index.Test.php index ee75a1e2..e068ea20 100644 --- a/source/php/Index.Test.php +++ b/source/php/Index.Test.php @@ -165,17 +165,14 @@ public function testThatMalformedUTF8ContentIsFixed() { // Given $post = [ "post_title" => "Test Post", - "post_content" => "This is a test post with a malformed UTF8 character: \x80" + "post_content" => "R\xc3\xb8d P\xc3\xb8lse 🌭" ]; // When - $fixedContent = $this->invokeMethod( - $this->targetTestClass, - 'utf8ize', - [$post] - ); + $post = Index::utf8ize($post); - $this->assertEquals($fixedContent["post_content"], "This is a test post with a malformed UTF8 character: "); - } + // Then + $this->assertEquals("Rød Pølse 🌭", $post['post_content']); + } }