diff --git a/source/php/Index.Test.php b/source/php/Index.Test.php index 507fba4b..e068ea20 100644 --- a/source/php/Index.Test.php +++ b/source/php/Index.Test.php @@ -160,7 +160,19 @@ public function testThatABlockPostIsGeneratingAExcerpt() { $this->assertNotEmpty($truncatedExcerpt); } -} + public function testThatMalformedUTF8ContentIsFixed() { + + // Given + $post = [ + "post_title" => "Test Post", + "post_content" => "R\xc3\xb8d P\xc3\xb8lse 🌭" + ]; + // When + $post = Index::utf8ize($post); + // Then + $this->assertEquals("Rød Pølse 🌭", $post['post_content']); + } +} diff --git a/source/php/Index.php b/source/php/Index.php index 58c67e3f..a4ca7661 100644 --- a/source/php/Index.php +++ b/source/php/Index.php @@ -132,6 +132,8 @@ public function index($post) } }); + $post = self::utf8ize($post); // UTF-8 Escape + //Index post if (self::recordToLarge($post)) { $splitRecord = self::splitRecord($post); @@ -462,4 +464,25 @@ private static function getPostAndPostId($post) return [$post, $postId]; } + + /** + * Convert data to utf-8 + * + * @param mixed $data + * @return mixed + */ + public static function utf8ize($data) { + if (is_array($data)) { + foreach ($data as $key => $value) { + $data[$key] = self::utf8ize($value); + } + } else if (is_object($data)) { + foreach ($data as $key => $value) { + $data->$key = self::utf8ize($value); + } + } else if (is_string($data)) { + return mb_convert_encoding($data, 'UTF-8'); + } + return $data; + } }