Skip to content

Commit

Permalink
Add remove_multiple_whitespaces function to remove multiple whitespac…
Browse files Browse the repository at this point in the history
…es from text
  • Loading branch information
PhilipMay committed Dec 10, 2023
1 parent 924113c commit 436800f
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
18 changes: 17 additions & 1 deletion mltb2/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

"""Text specific module."""

from typing import Dict, Final, Tuple
import re
from typing import Dict, Final, Pattern, Tuple

INVISIBLE_CHARACTERS: Final[Tuple[str, ...]] = (
"\u200b", # Zero Width Space (ZWSP) https://www.compart.com/en/unicode/U+200b
Expand Down Expand Up @@ -37,6 +38,9 @@
SPECIAL_WHITESPACES_TRANS: Final[Dict[int, str]] = str.maketrans({char: " " for char in SPECIAL_WHITESPACES})


MULTI_SPACE_PATTERN: Pattern = re.compile(r" {2,}")


def remove_invisible_characters(text: str) -> str:
"""Remove invisible characters from text.
Expand Down Expand Up @@ -91,3 +95,15 @@ def has_special_whitespaces(text: str) -> bool:
``True`` if the text contains special whitespaces, ``False`` otherwise.
"""
return any(char in text for char in SPECIAL_WHITESPACES)


def remove_multiple_whitespaces(text: str) -> str:
"""Remove multiple whitespaces from text.
Args:
text: The text from which the multiple whitespaces are to be removed.
Returns:
The cleaned text.
"""
return MULTI_SPACE_PATTERN.sub(" ", text)
25 changes: 25 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
has_invisible_characters,
has_special_whitespaces,
remove_invisible_characters,
remove_multiple_whitespaces,
replace_special_whitespaces,
)

Expand Down Expand Up @@ -74,3 +75,27 @@ def test_has_special_whitespaces_false():
text = "Hello you!"
result = has_special_whitespaces(text)
assert not result


def test_remove_multiple_whitespaces():
text = "Hello World !"
result = remove_multiple_whitespaces(text)
assert result == "Hello World !"


def test_remove_multiple_whitespaces_empty():
text = ""
result = remove_multiple_whitespaces(text)
assert result == ""


def test_remove_multiple_whitespaces_empty_result():
text = " "
result = remove_multiple_whitespaces(text)
assert result == " "


def test_remove_multiple_whitespaces_one_space():
text = " "
result = remove_multiple_whitespaces(text)
assert result == " "

0 comments on commit 436800f

Please sign in to comment.