From f0a610fc7a2c4316585269c060b6268eccb71378 Mon Sep 17 00:00:00 2001
From: Kye Gomez <swarms_wd@Kyes-MacBook-Pro.local>
Date: Sat, 10 Aug 2024 11:25:04 -0400
Subject: [PATCH] [SWARMS]Memory]

---
 pyproject.toml                     |   2 +-
 server/Dockerfile                  |  32 ++++
 server/README.md                   |   0
 server/api.py                      | 259 +++++++++++++++++++++++++++++
 server/collab.ipynb                | 152 +++++++++++++++++
 server/requirements.txt            |   4 +
 swarms_memory/chroma_db_wrapper.py |  60 +++++--
 test.md                            |  32 ----
 8 files changed, 492 insertions(+), 49 deletions(-)
 create mode 100644 server/Dockerfile
 create mode 100644 server/README.md
 create mode 100644 server/api.py
 create mode 100644 server/collab.ipynb
 create mode 100644 server/requirements.txt
 delete mode 100644 test.md

diff --git a/pyproject.toml b/pyproject.toml
index 6546d82..77224a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "swarms-memory"
-version = "0.0.3"
+version = "0.0.5"
 description = "Swarms Memory - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
diff --git a/server/Dockerfile b/server/Dockerfile
new file mode 100644
index 0000000..0b13119
--- /dev/null
+++ b/server/Dockerfile
@@ -0,0 +1,32 @@
+# Start with the official Python 3.11 slim image
+FROM python:3.11-slim
+
+# Set environment variables to prevent Python from writing .pyc files to disk
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV API_HOST="0.0.0.0"
+ENV API_PORT=8000
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies and update package list
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    gcc \
+    libpq-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+
+# Copy the FastAPI application code
+COPY . .
+
+# Expose the port FastAPI will run on
+EXPOSE 8000
+
+# Command to run the application with Uvicorn and Gunicorn
+CMD ["gunicorn", "main:api", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000"]
diff --git a/server/README.md b/server/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/server/api.py b/server/api.py
new file mode 100644
index 0000000..f093fce
--- /dev/null
+++ b/server/api.py
@@ -0,0 +1,259 @@
+from fastapi import FastAPI, HTTPException, Path, Body
+from pydantic import BaseModel, Field
+from typing import List, Optional
+from loguru import logger
+import chromadb
+import uuid
+import os
+import uvicorn
+
+app = FastAPI()
+
+# Initialize the ChromaDB client
+chroma_client = chromadb.Client()
+
+# Logger configuration
+logger.add("api_logs.log", rotation="500 MB")
+
+
+# Pydantic models
+class CreateCollectionRequest(BaseModel):
+    name: str = Field(..., description="The name of the collection")
+
+
+class AddDocumentsRequest(BaseModel):
+    documents: List[str] = Field(
+        ...,
+        description="List of documents to be added to the collection",
+    )
+    ids: Optional[List[str]] = Field(
+        None,
+        description="Optional list of document IDs. If not provided, IDs will be autogenerated",
+    )
+
+
+class QueryDocumentsRequest(BaseModel):
+    query_texts: List[str] = Field(
+        ...,
+        description="List of query texts to search for similar documents",
+    )
+    n_results: int = Field(
+        1, description="Number of similar results to return"
+    )
+
+
+class UpdateDocumentRequest(BaseModel):
+    document: str = Field(
+        ..., description="The updated content of the document"
+    )
+
+
+# Routes
+
+
+@app.post("/collections", response_model=dict)
+def create_collection(request: CreateCollectionRequest):
+    """
+    Creates a new collection with the specified name.
+    """
+    try:
+        collection = chroma_client.create_collection(
+            name=request.name
+        )
+        logger.info(f"Created collection with name: {request.name}")
+        return {
+            "message": f"Collection '{request.name}' created successfully."
+        }
+    except Exception as e:
+        logger.error(f"Error creating collection: {e}")
+        raise HTTPException(
+            status_code=500, detail="Failed to create collection."
+        )
+
+
+# @app.get("/collections/{collection_id}/get_all_docs", response_model=dict)
+# def get_all_docs(
+#     collection_id: str = Path(
+#         ...,
+#         description="The ID of the collection"
+#     ),
+# ):
+#     collection = chroma_client.get_collection(collection_id)
+
+#     if not collection:
+#         raise HTTPException(
+#             status_code=404, detail="Collection not found."
+#         )
+
+#     return collection.get_all_docs()
+
+
+@app.post(
+    "/collections/{collection_id}/documents", response_model=dict
+)
+def add_documents(
+    collection_id: str = Path(
+        ..., description="The ID of the collection"
+    ),
+    request: AddDocumentsRequest = Body(...),
+):
+    """
+    Adds one or more documents to the specified collection.
+    """
+    try:
+        collection = chroma_client.get_collection(collection_id)
+        if not collection:
+            raise HTTPException(
+                status_code=404, detail="Collection not found."
+            )
+
+        ids = request.ids or [
+            str(uuid.uuid4()) for _ in range(len(request.documents))
+        ]
+        collection.add(documents=request.documents, ids=ids)
+        logger.info(
+            f"Added {len(request.documents)} documents to collection {collection_id}."
+        )
+        return {
+            "message": f"Documents added successfully to collection {collection_id}.",
+            "ids": ids,
+        }
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        logger.error(
+            f"Error adding documents to collection {collection_id}: {e}"
+        )
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to add documents to collection.",
+        )
+
+
+@app.get(
+    "/collections/{collection_id}/documents", response_model=dict
+)
+def query_documents(
+    collection_id: str = Path(
+        ..., description="The ID of the collection"
+    ),
+    query: QueryDocumentsRequest = Body(...),
+):
+    """
+    Queries the collection for the most similar documents based on the provided query texts.
+    """
+    try:
+        collection = chroma_client.get_collection(collection_id)
+        if not collection:
+            raise HTTPException(
+                status_code=404, detail="Collection not found."
+            )
+
+        results = collection.query(
+            query_texts=query.query_texts, n_results=query.n_results
+        )
+        logger.info(
+            f"Queried collection {collection_id} with {query.query_texts}."
+        )
+        return results
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        logger.error(
+            f"Error querying documents in collection {collection_id}: {e}"
+        )
+        raise HTTPException(
+            status_code=500, detail="Failed to query documents."
+        )
+
+
+@app.delete(
+    "/collections/{collection_id}/documents/{document_id}",
+    response_model=dict,
+)
+def delete_document(
+    collection_id: str = Path(
+        ..., description="The ID of the collection"
+    ),
+    document_id: str = Path(
+        ..., description="The ID of the document to delete"
+    ),
+):
+    """
+    Deletes a specific document from the collection.
+    """
+    try:
+        collection = chroma_client.get_collection(collection_id)
+        if not collection:
+            raise HTTPException(
+                status_code=404, detail="Collection not found."
+            )
+
+        collection.delete(ids=[document_id])
+        logger.info(
+            f"Deleted document {document_id} from collection {collection_id}."
+        )
+        return {
+            "message": f"Document {document_id} deleted successfully from collection {collection_id}."
+        }
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        logger.error(
+            f"Error deleting document {document_id} from collection {collection_id}: {e}"
+        )
+        raise HTTPException(
+            status_code=500, detail="Failed to delete document."
+        )
+
+
+@app.put(
+    "/collections/{collection_id}/documents/{document_id}",
+    response_model=dict,
+)
+def update_document(
+    collection_id: str = Path(
+        ..., description="The ID of the collection"
+    ),
+    document_id: str = Path(
+        ..., description="The ID of the document to update"
+    ),
+    request: UpdateDocumentRequest = Body(...),
+):
+    """
+    Updates the content of a specific document within a collection.
+    """
+    try:
+        collection = chroma_client.get_collection(collection_id)
+        if not collection:
+            raise HTTPException(
+                status_code=404, detail="Collection not found."
+            )
+
+        collection.update(
+            documents=[request.document], ids=[document_id]
+        )
+        logger.info(
+            f"Updated document {document_id} in collection {collection_id}."
+        )
+        return {
+            "message": f"Document {document_id} updated successfully in collection {collection_id}."
+        }
+    except HTTPException as e:
+        raise e
+    except Exception as e:
+        logger.error(
+            f"Error updating document {document_id} in collection {collection_id}: {e}"
+        )
+        raise HTTPException(
+            status_code=500, detail="Failed to update document."
+        )
+
+
+if __name__ == "__main__":
+
+    uvicorn.run(
+        app,
+        host=os.getenv("API_HOST"),
+        port=os.getenv("API_PORT", 8000),
+    )
diff --git a/server/collab.ipynb b/server/collab.ipynb
new file mode 100644
index 0000000..cb3d1e8
--- /dev/null
+++ b/server/collab.ipynb
@@ -0,0 +1,152 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Status Code: 200\n",
+      "Response JSON: {'message': \"Collection 'my_collection' created successfully.\"}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a config\n",
+    "import requests\n",
+    "\n",
+    "url = \"http://127.0.0.1:8000/collections\"\n",
+    "\n",
+    "data = {\n",
+    "    \"name\": \"my_collection\"\n",
+    "}\n",
+    "\n",
+    "response = requests.post(url, json=data)\n",
+    "\n",
+    "print(\"Status Code:\", response.status_code)\n",
+    "print(\"Response JSON:\", response.json())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Status Code: 200\n",
+      "Response JSON: {'message': 'Documents added successfully to collection my_collection.', 'ids': ['512d881c-8362-4a55-97e4-7436167537fb', '38d0c12f-dbf9-4c68-81a5-c748dfdd3c0c']}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "collection_id = \"my_collection\"  # Replace with the actual collection ID\n",
+    "\n",
+    "url = f\"http://127.0.0.1:8000/collections/{collection_id}/documents\"\n",
+    "data = {\n",
+    "    \"documents\": [\n",
+    "        \"This is a document about pineapples\",\n",
+    "        \"This is a document about oranges\"\n",
+    "    ],\n",
+    "}\n",
+    "\n",
+    "response = requests.post(url, json=data)\n",
+    "\n",
+    "print(\"Status Code:\", response.status_code)\n",
+    "print(\"Response JSON:\", response.json())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Status Code: 200\n",
+      "Response JSON: {'ids': [['512d881c-8362-4a55-97e4-7436167537fb', '38d0c12f-dbf9-4c68-81a5-c748dfdd3c0c']], 'distances': [[1.0590863227844238, 1.2430635690689087]], 'metadatas': [[None, None]], 'embeddings': None, 'documents': [['This is a document about pineapples', 'This is a document about oranges']], 'uris': None, 'data': None, 'included': ['metadatas', 'documents', 'distances']}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "collection_id = \"my_collection\"  # Replace with the actual collection ID\n",
+    "\n",
+    "url = f\"http://127.0.0.1:8000/collections/{collection_id}/documents\"\n",
+    "data = {\n",
+    "    \"query_texts\": [\"This is a query document about Hawaii\"],\n",
+    "    \"n_results\": 2\n",
+    "}\n",
+    "\n",
+    "response = requests.get(url, json=data)\n",
+    "\n",
+    "print(\"Status Code:\", response.status_code)\n",
+    "print(\"Response JSON:\", response.json())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Status Code: 200\n",
+      "Response JSON: {'message': 'Document your_document_id_here deleted successfully from collection my_collection.'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "collection_id = \"my_collection\"  # Replace with the actual collection ID\n",
+    "\n",
+    "document_id = \"your_document_id_here\"  # Replace with the actual document ID\n",
+    "url = f\"http://127.0.0.1:8000/collections/{collection_id}/documents/{document_id}\"\n",
+    "\n",
+    "response = requests.delete(url)\n",
+    "\n",
+    "print(\"Status Code:\", response.status_code)\n",
+    "print(\"Response JSON:\", response.json())\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/server/requirements.txt b/server/requirements.txt
new file mode 100644
index 0000000..e28c908
--- /dev/null
+++ b/server/requirements.txt
@@ -0,0 +1,4 @@
+chromadb
+fastapi
+pydantic
+loguru
\ No newline at end of file
diff --git a/swarms_memory/chroma_db_wrapper.py b/swarms_memory/chroma_db_wrapper.py
index c5b85bf..6645730 100644
--- a/swarms_memory/chroma_db_wrapper.py
+++ b/swarms_memory/chroma_db_wrapper.py
@@ -5,10 +5,10 @@
 
 import chromadb
 from dotenv import load_dotenv
-
+from loguru import logger
+from swarms.memory.base_vectordb import BaseVectorDatabase
 from swarms.utils.data_to_text import data_to_text
 from swarms.utils.markdown_message import display_markdown_message
-from swarms.memory.base_vectordb import BaseVectorDatabase
 
 # Load environment variables
 load_dotenv()
@@ -165,7 +165,9 @@ def query(
         except Exception as e:
             raise Exception(f"Failed to query documents: {str(e)}")
 
-    def traverse_directory(self):
+    def traverse_directory(
+        self, docs_folder: str = None, *args, **kwargs
+    ):
         """
         Traverse through every file in the given directory and its subdirectories,
         and return the paths of all files.
@@ -174,16 +176,42 @@ def traverse_directory(self):
         Returns:
         - list: A list of paths to each file in the directory and its subdirectories.
         """
-        added_to_db = False
-
-        for root, dirs, files in os.walk(self.docs_folder):
-            for file in files:
-                file_path = os.path.join(
-                    root, file
-                )  # Change this line
-                _, ext = os.path.splitext(file_path)
-                data = data_to_text(file_path)
-                added_to_db = self.add(str(data))
-                print(f"{file_path} added to Database")
-
-        return added_to_db
+        try:
+            logger.info(f"Traversing directory: {self.docs_folder}")
+            added_to_db = False
+            allowed_extensions = [
+                "txt",
+                "pdf",
+                "docx",
+                "doc",
+                "md",
+                "yaml",
+                "json",
+                "csv",
+                "tsv",
+                "xls",
+                "xlsx",
+                "xml",
+                "yml",
+            ]
+
+            for root, dirs, files in os.walk(self.docs_folder):
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    _, ext = os.path.splitext(file_path)
+                    if ext.lower() in allowed_extensions:
+                        data = data_to_text(file_path)
+                        added_to_db = self.add(str(data))
+                        print(f"{file_path} added to Database")
+                    else:
+                        print(
+                            f"Skipped {file_path} due to unsupported file extension"
+                        )
+
+            return added_to_db
+
+        except Exception as error:
+            logger.error(
+                f"Failed to traverse directory: {str(error)}"
+            )
+            raise error
diff --git a/test.md b/test.md
deleted file mode 100644
index d1afa1d..0000000
--- a/test.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<div align="center">
-  <a href="https://swarms.world">
-    <h1>Swarms Memory</h1>
-  </a>
-</div>
-<p align="center">
-  <em>Easy to use, reliable, and bleeding-edge RAG systems.</em>
-</p>
-
-<p align="center">
-    <a href="https://pypi.org/project/swarms/" target="_blank">
-        <img alt="Python" src="https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54" />
-        <img alt="Version" src="https://img.shields.io/pypi/v/swarms?style=for-the-badge&color=3670A0">
-    </a>
-</p>
-<p align="center">
-<a href="https://twitter.com/swarms_corp/">🐦 Twitter</a>
-<span>&nbsp;&nbsp;•&nbsp;&nbsp;</span>
-<a href="https://discord.gg/agora-999382051935506503">📢 Discord</a>
-<span>&nbsp;&nbsp;•&nbsp;&nbsp;</span>
-<a href="https://swarms.world/explorer">Swarms Platform</a>
-<span>&nbsp;&nbsp;•&nbsp;&nbsp;</span>
-<a href="https://docs.swarms.world">📙 Documentation</a>
-</p>
-
-
-[![GitHub issues](https://img.shields.io/github/issues/kyegomez/swarms)](https://github.com/kyegomez/swarms-memory/issues) [![GitHub forks](https://img.shields.io/github/forks/kyegomez/swarms)](https://github.com/kyegomez/swarms-memory/network) [![GitHub stars](https://img.shields.io/github/stars/kyegomez/swarms)](https://github.com/kyegomez/swarms-memory/stargazers) [![GitHub license](https://img.shields.io/github/license/kyegomez/swarms-memory)](https://github.com/kyegomez/swarms-memory/blob/main/LICENSE)[![GitHub star chart](https://img.shields.io/github/stars/kyegomez/swarms-memory?style=social)](https://star-history.com/#kyegomez/swarms)[![Dependency Status](https://img.shields.io/librariesio/github/kyegomez/swarms)](https://libraries.io/github/kyegomez/swarms) [![Downloads](https://static.pepy.tech/badge/swarms-memory/month)](https://pepy.tech/project/swarms-memory)
-
-[![Join the Agora discord](https://img.shields.io/discord/1110910277110743103?label=Discord&logo=discord&logoColor=white&style=plastic&color=d7b023)![Share on Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Share%20%40kyegomez/swarmsmemory)](https://twitter.com/intent/tweet?text=Check%20out%20this%20amazing%20AI%20project:%20&url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms) [![Share on Facebook](https://img.shields.io/badge/Share-%20facebook-blue)](https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms) [![Share on LinkedIn](https://img.shields.io/badge/Share-%20linkedin-blue)](https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&title=&summary=&source=)
-
-[![Share on Reddit](https://img.shields.io/badge/-Share%20on%20Reddit-orange)](https://www.reddit.com/submit?url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&title=Swarms%20-%20the%20future%20of%20AI) [![Share on Hacker News](https://img.shields.io/badge/-Share%20on%20Hacker%20News-orange)](https://news.ycombinator.com/submitlink?u=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&t=Swarms%20-%20the%20future%20of%20AI) [![Share on Pinterest](https://img.shields.io/badge/-Share%20on%20Pinterest-red)](https://pinterest.com/pin/create/button/?url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&media=https%3A%2F%2Fexample.com%2Fimage.jpg&description=Swarms%20-%20the%20future%20of%20AI) [![Share on WhatsApp](https://img.shields.io/badge/-Share%20on%20WhatsApp-green)](https://api.whatsapp.com/send?text=Check%20out%20Swarms%20-%20the%20future%20of%20AI%20%23swarms%20%23AI%0A%0Ahttps%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms)
-