Skip to content

Commit

Permalink
Add functions to check for invisible characters and special whitespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay committed Dec 9, 2023
1 parent 8c09329 commit 41bbfed
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
28 changes: 28 additions & 0 deletions mltb2/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,20 @@ def remove_invisible_characters(text: str) -> str:
return text.translate(INVISIBLE_CHARACTERS_TRANS)


def has_invisible_characters(text: str) -> bool:
"""Check if text contains invisible characters.
The invisible characters are defined in the constant `INVISIBLE_CHARACTERS`.
Args:
text: The text to check.
Returns:
``True`` if the text contains invisible characters, ``False`` otherwise.
"""
return any(char in text for char in INVISIBLE_CHARACTERS)


def replace_special_whitespaces(text: str) -> str:
"""Replace special whitespaces with normal whitespaces.
Expand All @@ -63,3 +77,17 @@ def replace_special_whitespaces(text: str) -> str:
The cleaned text.
"""
return text.translate(SPECIAL_WHITESPACES_TRANS)


def has_special_whitespaces(text: str) -> bool:
"""Check if text contains special whitespaces.
The special whitespaces are defined in the constant `SPECIAL_WHITESPACES`.
Args:
text: The text to check.
Returns:
``True`` if the text contains special whitespaces, ``False`` otherwise.
"""
return any(char in text for char in SPECIAL_WHITESPACES)
26 changes: 26 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from mltb2.text import (
INVISIBLE_CHARACTERS,
SPECIAL_WHITESPACES,
has_invisible_characters,
has_special_whitespaces,
remove_invisible_characters,
replace_special_whitespaces,
)
Expand Down Expand Up @@ -48,3 +50,27 @@ def test_replace_special_whitespaces_single_char(char: str):
text = f">{char}<"
result = replace_special_whitespaces(text)
assert result == "> <"


def test_has_invisible_characters_true():
text = "Hello\u200bWorld\u00ad!"
result = has_invisible_characters(text)
assert result


def test_has_invisible_characters_false():
text = "Hello!"
result = has_invisible_characters(text)
assert not result


def test_has_special_whitespaces_true():
text = "a\u00a0b\u2009c\u202fd\u2007e\u200af"
result = has_special_whitespaces(text)
assert result


def test_has_special_whitespaces_false():
text = "Hello you!"
result = has_special_whitespaces(text)
assert not result

0 comments on commit 41bbfed

Please sign in to comment.