diff --git a/mltb2/text.py b/mltb2/text.py
index d998c4e..8eae3d4 100644
--- a/mltb2/text.py
+++ b/mltb2/text.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Philip May
+# Copyright (c) 2023-2024 Philip May
# This software is distributed under the terms of the MIT license
# which is available at https://opensource.org/licenses/MIT
@@ -54,6 +54,29 @@
MULTI_SPACE_PATTERN: Pattern = re.compile(r" {2,}")
+XML_TAG_PATTERN: Pattern = re.compile(r"<\/?[\w:]+( \/|\/|)>")
+
+
+def has_xml_tag(text: str) -> bool:
+ """Check if text contains XML tags (one or multiple).
+
+ These are some XML tags we detect:
+
+ - ````
+ - ````
+ - ````
+ - ````
+ - ````
+
+ While we do not detect ``a < b but x > y``.
+
+ Args:
+ text: The text to check.
+ Returns:
+ ``True`` if the text contains XML tags, ``False`` otherwise.
+ """
+ return re.search(XML_TAG_PATTERN, text) is not None
+
def remove_invisible_characters(text: str) -> str:
"""Remove invisible characters from text.
diff --git a/tests/test_text.py b/tests/test_text.py
index 9c1d949..2d6551f 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -17,6 +17,7 @@
clean_all_invisible_chars_and_whitespaces,
has_invisible_characters,
has_special_whitespaces,
+ has_xml_tag,
remove_invisible_characters,
replace_multiple_whitespaces,
replace_special_whitespaces,
@@ -229,3 +230,38 @@ def test_normalize_counter_to_defaultdict_empty_counter():
assert isinstance(normalized_counter, defaultdict)
assert len(normalized_counter) == 0
+
+
+@pytest.mark.parametrize(
+ "text",
+ [
+ "Some textmore text",
+ "Some textmore text",
+ "Some textmore text",
+ "Some textmore text",
+ "Some textmore text",
+ ],
+)
+def test_has_xml_tag_with_tags(text: str):
+ assert has_xml_tag(text)
+
+
+@pytest.mark.parametrize(
+ "text",
+ [
+ "Some text",
+ "",
+ "a < b but x > y",
+ ],
+)
+def test_has_xml_tag_without_tags(text: str):
+ assert not has_xml_tag(text)
+
+
+@settings(max_examples=1000)
+@given(text())
+def test_has_xml_tag_hypothesis(text: str):
+ result = has_xml_tag(text)
+ if result:
+ assert "<" in text
+ assert ">" in text