From 5716a519c191fdb1e0ef2b8b85102fe4d1ed1b26 Mon Sep 17 00:00:00 2001
From: MichaelDecent <michaeldecent2@gmail.com>
Date: Fri, 10 Jan 2025 08:21:19 +0100
Subject: [PATCH 1/5] feat: add Swarmauri TextBlob Parser

---
 .../README.md                                 |  1 +
 .../pyproject.toml                            | 55 ++++++++++++++++++
 .../TextBlobNounParser.py                     | 57 +++++++++++++++++++
 .../TextBlobSentenceParser.py                 | 49 ++++++++++++++++
 .../__init__.py                               | 16 ++++++
 5 files changed, 178 insertions(+)
 create mode 100644 pkgs/community/swarmauri_parser_communitytextblob/README.md
 create mode 100644 pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
 create mode 100644 pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
 create mode 100644 pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py
 create mode 100644 pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/__init__.py

diff --git a/pkgs/community/swarmauri_parser_communitytextblob/README.md b/pkgs/community/swarmauri_parser_communitytextblob/README.md
new file mode 100644
index 000000000..cd26902a2
--- /dev/null
+++ b/pkgs/community/swarmauri_parser_communitytextblob/README.md
@@ -0,0 +1 @@
+# Swarmauri Example Community Package
\ No newline at end of file
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml b/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
new file mode 100644
index 000000000..e62e8ae26
--- /dev/null
+++ b/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
@@ -0,0 +1,55 @@
+[tool.poetry]
+name = "swarmauri_parser_communitytextblob"
+version = "0.6.0.dev1"
+description = "TextBlob Parser for Swarmauri."
+authors = ["Jacob Stewart <jacob@swarmauri.com>"]
+license = "Apache-2.0"
+readme = "README.md"
+repository = "http://github.com/swarmauri/swarmauri-sdk"
+classifiers = [
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12"
+]
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.13"
+
+# Swarmauri
+swarmauri_core = { path = "../../core" }
+swarmauri_base = { path = "../../base" }
+
+
+[tool.poetry.group.dev.dependencies]
+flake8 = "^7.0"
+pytest = "^8.0"
+pytest-asyncio = ">=0.24.0"
+pytest-xdist = "^3.6.1"
+pytest-json-report = "^1.5.0"
+python-dotenv = "*"
+requests = "^2.32.3"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+norecursedirs = ["combined", "scripts"]
+
+markers = [
+    "test: standard test",
+    "unit: Unit tests",
+    "integration: Integration tests",
+    "acceptance: Acceptance tests",
+    "experimental: Experimental tests"
+]
+log_cli = true
+log_cli_level = "INFO"
+log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
+asyncio_default_fixture_loop_scope = "function"
+
+[tool.poetry.plugins."swarmauri.parsers"]
+TextBlobNounParser = "swarmauri_parser_communitytextblob:TextBlobNounParser"
+TextBlobSentenceParser = "swarmauri_parser_communitytextblob:TextBlobSentenceParser"
\ No newline at end of file
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
new file mode 100644
index 000000000..68023f1aa
--- /dev/null
+++ b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
@@ -0,0 +1,57 @@
+from textblob import TextBlob
+from typing import List, Union, Any, Literal
+from swarmauri.documents.concrete.Document import Document
+from swarmauri.parsers.base.ParserBase import ParserBase
+
+
+class TextBlobNounParser(ParserBase):
+    """
+    A concrete implementation of IParser using TextBlob for Natural Language Processing tasks.
+
+    This parser leverages TextBlob's functionalities such as noun phrase extraction,
+    sentiment analysis, classification, language translation, and more for parsing texts.
+    """
+
+    type: Literal["TextBlobNounParser"] = "TextBlobNounParser"
+
+    def __init__(self, **kwargs):
+        try:
+            import nltk
+
+            # Download required NLTK data
+            nltk.download("punkt")
+            nltk.download("averaged_perceptron_tagger")
+            nltk.download("brown")
+            nltk.download("wordnet")
+            super().__init__(**kwargs)
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")
+
+    def parse(self, data: Union[str, Any]) -> List[Document]:
+        """
+        Parses the input data using TextBlob to perform basic NLP tasks
+        and returns a list of documents with the parsed information.
+
+        Parameters:
+        - data (Union[str, Any]): The input data to parse, expected to be text data for this parser.
+
+        Returns:
+        - List[IDocument]: A list of documents with metadata generated from the parsing process.
+        """
+        # Ensure the data is a string
+        if not isinstance(data, str):
+            raise ValueError("TextBlobParser expects a string as input data.")
+
+        try:
+            # Use TextBlob for NLP tasks
+            blob = TextBlob(data)
+
+            # Extracts noun phrases to demonstrate one of TextBlob's capabilities.
+            noun_phrases = list(blob.noun_phrases)
+
+            # Create document with extracted information
+            document = Document(content=data, metadata={"noun_phrases": noun_phrases})
+
+            return [document]
+        except Exception as e:
+            raise RuntimeError(f"Error during text parsing: {str(e)}")
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py
new file mode 100644
index 000000000..b816b065c
--- /dev/null
+++ b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py
@@ -0,0 +1,49 @@
+from textblob import TextBlob
+from typing import List, Union, Any, Literal
+from swarmauri.documents.concrete.Document import Document
+from swarmauri.parsers.base.ParserBase import ParserBase
+
+
+class TextBlobSentenceParser(ParserBase):
+    """
+    A parser that leverages TextBlob to break text into sentences.
+
+    This parser uses the natural language processing capabilities of TextBlob
+    to accurately identify sentence boundaries within large blocks of text.
+    """
+
+    type: Literal["TextBlobSentenceParser"] = "TextBlobSentenceParser"
+
+    def __init__(self, **kwargs):
+        import nltk
+
+        nltk.download("punkt_tab")
+        super().__init__(**kwargs)
+
+    def parse(self, data: Union[str, Any]) -> List[Document]:
+        """
+        Parses the input text into sentence-based document chunks using TextBlob.
+
+        Args:
+            data (Union[str, Any]): The input text to be parsed.
+
+        Returns:
+            List[IDocument]: A list of IDocument instances, each representing a sentence.
+        """
+        # Ensure the input is a string
+        if not isinstance(data, str):
+            data = str(data)
+
+        # Utilize TextBlob for sentence tokenization
+        blob = TextBlob(data)
+        sentences = blob.sentences
+
+        # Create a document instance for each sentence
+        documents = [
+            Document(
+                content=str(sentence), metadata={"parser": "TextBlobSentenceParser"}
+            )
+            for index, sentence in enumerate(sentences)
+        ]
+
+        return documents
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/__init__.py b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/__init__.py
new file mode 100644
index 000000000..a83877682
--- /dev/null
+++ b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/__init__.py
@@ -0,0 +1,16 @@
+from .TextBlobNounParser import TextBlobNounParser
+from .TextBlobSentenceParser import TextBlobSentenceParser
+
+__version__ = "0.6.0.dev26"
+__long_desc__ = """
+
+# Swarmauri TextBlob Based Components
+
+Components Included:
+- TextBlobNounParser
+- TextBlobSentenceParser
+
+Follow us at: https://github.com/swarmauri
+Star us at: https://github.com/swarmauri/swarmauri-sdk
+
+"""

From dc2573f0d48760d838ad68f7efb59adf66c653aa Mon Sep 17 00:00:00 2001
From: MichaelDecent <michaeldecent2@gmail.com>
Date: Fri, 10 Jan 2025 08:34:26 +0100
Subject: [PATCH 2/5] add punkt_tab resource download for NLTK in
 TextBlobNounParser.

Co-authored-by: Lavesh-Akhadkar laveshakhadkar@gmail.com
---
 .../swarmauri_parser_communitytextblob/TextBlobNounParser.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
index 68023f1aa..c5f6cb31a 100644
--- a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
+++ b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
@@ -23,6 +23,7 @@ def __init__(self, **kwargs):
             nltk.download("averaged_perceptron_tagger")
             nltk.download("brown")
             nltk.download("wordnet")
+            nltk.download('punkt_tab')
             super().__init__(**kwargs)
         except Exception as e:
             raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")

From ea5b6e59e91ae2dd26eadc45f771d4c23c6d08d2 Mon Sep 17 00:00:00 2001
From: MichaelDecent <michaeldecent2@gmail.com>
Date: Fri, 10 Jan 2025 08:41:27 +0100
Subject: [PATCH 3/5] feat: add TextBlob dependency and implement unit tests
 for NLTK parsers

---
 .../pyproject.toml                            |  3 ++
 .../TextBlobNounParser.py                     |  6 +--
 .../TextBlobSentenceParser.py                 |  4 +-
 .../unit/TextBlobNounParser_unit_test.py      | 43 +++++++++++++++++++
 .../unit/TextBlobSentenceParser_unit_test.py  | 23 ++++++++++
 5 files changed, 74 insertions(+), 5 deletions(-)
 create mode 100644 pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py
 create mode 100644 pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py

diff --git a/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml b/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
index e62e8ae26..f2b3b8ff9 100644
--- a/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
+++ b/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
@@ -20,6 +20,9 @@ python = ">=3.10,<3.13"
 swarmauri_core = { path = "../../core" }
 swarmauri_base = { path = "../../base" }
 
+# Dependencies
+textblob = "^0.18.0"
+
 
 [tool.poetry.group.dev.dependencies]
 flake8 = "^7.0"
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
index c5f6cb31a..36ad70218 100644
--- a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
+++ b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobNounParser.py
@@ -1,7 +1,7 @@
 from textblob import TextBlob
 from typing import List, Union, Any, Literal
-from swarmauri.documents.concrete.Document import Document
-from swarmauri.parsers.base.ParserBase import ParserBase
+from swarmauri_standard.documents.Document import Document
+from swarmauri_base.parsers.ParserBase import ParserBase
 
 
 class TextBlobNounParser(ParserBase):
@@ -23,7 +23,7 @@ def __init__(self, **kwargs):
             nltk.download("averaged_perceptron_tagger")
             nltk.download("brown")
             nltk.download("wordnet")
-            nltk.download('punkt_tab')
+            nltk.download("punkt_tab")
             super().__init__(**kwargs)
         except Exception as e:
             raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py
index b816b065c..0b94ba9c9 100644
--- a/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py
+++ b/pkgs/community/swarmauri_parser_communitytextblob/swarmauri_parser_communitytextblob/TextBlobSentenceParser.py
@@ -1,7 +1,7 @@
 from textblob import TextBlob
 from typing import List, Union, Any, Literal
-from swarmauri.documents.concrete.Document import Document
-from swarmauri.parsers.base.ParserBase import ParserBase
+from swarmauri_standard.documents.Document import Document
+from swarmauri_base.parsers.ParserBase import ParserBase
 
 
 class TextBlobSentenceParser(ParserBase):
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py
new file mode 100644
index 000000000..e5f8a550c
--- /dev/null
+++ b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py
@@ -0,0 +1,43 @@
+import pytest
+from swarmauri_community.parsers.concrete.TextBlobNounParser import TextBlobNounParser as Parser
+
+
+def setup_module(module):
+    """Setup any state specific to the execution of the given module."""
+    try:
+        # Initialize a parser to trigger NLTK downloads
+        Parser()
+    except Exception as e:
+        pytest.skip(f"Failed to initialize NLTK resources: {str(e)}")
+
+
+@pytest.fixture(scope="module")
+def parser():
+    """Fixture to provide a parser instance for tests."""
+    return Parser()
+
+
+@pytest.mark.unit
+def test_ubc_resource(parser):
+    assert parser.resource == "Parser"
+
+
+@pytest.mark.unit
+def test_ubc_type(parser):
+    assert parser.type == "TextBlobNounParser"
+
+
+@pytest.mark.unit
+def test_serialization(parser):
+    assert parser.id == Parser.model_validate_json(parser.model_dump_json()).id
+
+
+@pytest.mark.unit
+def test_parse(parser):
+    try:
+        documents = parser.parse("One more large chapula please.")
+        assert documents[0].resource == "Document"
+        assert documents[0].content == "One more large chapula please."
+        assert documents[0].metadata["noun_phrases"] == ["large chapula"]
+    except Exception as e:
+        pytest.fail(f"Parser failed with error: {str(e)}")
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py
new file mode 100644
index 000000000..36c347906
--- /dev/null
+++ b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py
@@ -0,0 +1,23 @@
+import pytest
+from swarmauri_community.parsers.concrete.TextBlobSentenceParser import TextBlobSentenceParser as Parser
+
+@pytest.mark.unit
+def test_ubc_resource():
+    parser = Parser()
+    assert parser.resource == 'Parser'
+
+@pytest.mark.unit
+def test_ubc_type():
+    parser = Parser()
+    assert parser.type == 'TextBlobSentenceParser'
+
+@pytest.mark.unit
+def test_serialization():
+    parser = Parser()
+    assert parser.id == Parser.model_validate_json(parser.model_dump_json()).id
+
+@pytest.mark.unit
+def test_parse():
+    documents = Parser().parse('One more large chapula please.')
+    assert documents[0].resource == 'Document'
+    assert documents[0].content == 'One more large chapula please.'

From ef0aab4363b280c4ae17345fa49b3315474a177e Mon Sep 17 00:00:00 2001
From: MichaelDecent <michaeldecent2@gmail.com>
Date: Fri, 10 Jan 2025 08:55:16 +0100
Subject: [PATCH 4/5] fix: update import paths for TextBlobNounParser and
 TextBlobSentenceParser in unit tests

---
 .../tests/unit/TextBlobNounParser_unit_test.py                  | 2 +-
 .../tests/unit/TextBlobSentenceParser_unit_test.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py
index e5f8a550c..df9748212 100644
--- a/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py
+++ b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobNounParser_unit_test.py
@@ -1,5 +1,5 @@
 import pytest
-from swarmauri_community.parsers.concrete.TextBlobNounParser import TextBlobNounParser as Parser
+from swarmauri_parser_communitytextblob.TextBlobSentenceParser import TextBlobNounParser as Parser
 
 
 def setup_module(module):
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py
index 36c347906..75375c5cb 100644
--- a/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py
+++ b/pkgs/community/swarmauri_parser_communitytextblob/tests/unit/TextBlobSentenceParser_unit_test.py
@@ -1,5 +1,5 @@
 import pytest
-from swarmauri_community.parsers.concrete.TextBlobSentenceParser import TextBlobSentenceParser as Parser
+from swarmauri_parser_communitytextblob.TextBlobSentenceParser import TextBlobSentenceParser as Parser
 
 @pytest.mark.unit
 def test_ubc_resource():

From b1076e145624df0740b51d9028ff3a7c89dc34df Mon Sep 17 00:00:00 2001
From: MichaelDecent <michaeldecent2@gmail.com>
Date: Fri, 10 Jan 2025 09:12:22 +0100
Subject: [PATCH 5/5] fix: update nltk and textblob dependencies in
 pyproject.toml files

---
 pkgs/community/swarmauri_community/pyproject.toml             | 4 ++--
 .../swarmauri_parser_communitytextblob/pyproject.toml         | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pkgs/community/swarmauri_community/pyproject.toml b/pkgs/community/swarmauri_community/pyproject.toml
index dfccb914c..f0023c02c 100644
--- a/pkgs/community/swarmauri_community/pyproject.toml
+++ b/pkgs/community/swarmauri_community/pyproject.toml
@@ -32,7 +32,7 @@ captcha = "^0.6.0"
 #gradio = { version = "^5.4.0", optional = true }
 leptonai = { version = "^0.22.0", optional = true }
 neo4j = { version = "^5.25.0", optional = true }
-nltk = { version = "^3.9.1", optional = true }
+#nltk = { version = "^3.9.1", optional = true }
 pandas = "^2.2.3"
 psutil = { version = "^6.1.0", optional = true }
 pygithub = { version = "^2.4.0", optional = true }
@@ -49,7 +49,7 @@ pinecone-client = { version = "^5.0.1", optional = true, extras = ["grpc"] }
 pypdf = { version = "^5.0.1", optional = true }
 pypdftk = { version = "^0.5", optional = true }
 weaviate-client = { version = "^4.9.2", optional = true }
-textblob = { version = "^0.18.0", optional = true }
+#textblob = { version = "^0.18.0", optional = true }
 torch = { version = "^2.4.1", optional = true}
 scikit-learn = { version = "^1.5.2", optional = true }
 #protobuf = { version = "^3.20.0", optional = true }
diff --git a/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml b/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
index f2b3b8ff9..fa0369057 100644
--- a/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
+++ b/pkgs/community/swarmauri_parser_communitytextblob/pyproject.toml
@@ -22,6 +22,7 @@ swarmauri_base = { path = "../../base" }
 
 # Dependencies
 textblob = "^0.18.0"
+nltk = "^3.9.1"
 
 
 [tool.poetry.group.dev.dependencies]