From 41bbfed03baa4a1cf7ed0b061a8195ca1fabcf97 Mon Sep 17 00:00:00 2001 From: PhilipMay Date: Sat, 9 Dec 2023 23:18:35 +0100 Subject: [PATCH] Add functions to check for invisible characters and special whitespaces --- mltb2/text.py | 28 ++++++++++++++++++++++++++++ tests/test_text.py | 26 ++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/mltb2/text.py b/mltb2/text.py index 90321b9..b4ef242 100644 --- a/mltb2/text.py +++ b/mltb2/text.py @@ -51,6 +51,20 @@ def remove_invisible_characters(text: str) -> str: return text.translate(INVISIBLE_CHARACTERS_TRANS) +def has_invisible_characters(text: str) -> bool: + """Check if text contains invisible characters. + + The invisible characters are defined in the constant `INVISIBLE_CHARACTERS`. + + Args: + text: The text to check. + + Returns: + ``True`` if the text contains invisible characters, ``False`` otherwise. + """ + return any(char in text for char in INVISIBLE_CHARACTERS) + + def replace_special_whitespaces(text: str) -> str: """Replace special whitespaces with normal whitespaces. @@ -63,3 +77,17 @@ def replace_special_whitespaces(text: str) -> str: The cleaned text. """ return text.translate(SPECIAL_WHITESPACES_TRANS) + + +def has_special_whitespaces(text: str) -> bool: + """Check if text contains special whitespaces. + + The special whitespaces are defined in the constant `SPECIAL_WHITESPACES`. + + Args: + text: The text to check. + + Returns: + ``True`` if the text contains special whitespaces, ``False`` otherwise. + """ + return any(char in text for char in SPECIAL_WHITESPACES) diff --git a/tests/test_text.py b/tests/test_text.py index 476a5cc..bd56674 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -7,6 +7,8 @@ from mltb2.text import ( INVISIBLE_CHARACTERS, SPECIAL_WHITESPACES, + has_invisible_characters, + has_special_whitespaces, remove_invisible_characters, replace_special_whitespaces, ) @@ -48,3 +50,27 @@ def test_replace_special_whitespaces_single_char(char: str): text = f">{char}<" result = replace_special_whitespaces(text) assert result == "> <" + + +def test_has_invisible_characters_true(): + text = "Hello\u200bWorld\u00ad!" + result = has_invisible_characters(text) + assert result + + +def test_has_invisible_characters_false(): + text = "Hello!" + result = has_invisible_characters(text) + assert not result + + +def test_has_special_whitespaces_true(): + text = "a\u00a0b\u2009c\u202fd\u2007e\u200af" + result = has_special_whitespaces(text) + assert result + + +def test_has_special_whitespaces_false(): + text = "Hello you!" + result = has_special_whitespaces(text) + assert not result