From f0a610fc7a2c4316585269c060b6268eccb71378 Mon Sep 17 00:00:00 2001 From: Kye Gomez Date: Sat, 10 Aug 2024 11:25:04 -0400 Subject: [PATCH] [SWARMS]Memory] --- pyproject.toml | 2 +- server/Dockerfile | 32 ++++ server/README.md | 0 server/api.py | 259 +++++++++++++++++++++++++++++ server/collab.ipynb | 152 +++++++++++++++++ server/requirements.txt | 4 + swarms_memory/chroma_db_wrapper.py | 60 +++++-- test.md | 32 ---- 8 files changed, 492 insertions(+), 49 deletions(-) create mode 100644 server/Dockerfile create mode 100644 server/README.md create mode 100644 server/api.py create mode 100644 server/collab.ipynb create mode 100644 server/requirements.txt delete mode 100644 test.md diff --git a/pyproject.toml b/pyproject.toml index 6546d82..77224a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms-memory" -version = "0.0.3" +version = "0.0.5" description = "Swarms Memory - Pytorch" license = "MIT" authors = ["Kye Gomez "] diff --git a/server/Dockerfile b/server/Dockerfile new file mode 100644 index 0000000..0b13119 --- /dev/null +++ b/server/Dockerfile @@ -0,0 +1,32 @@ +# Start with the official Python 3.11 slim image +FROM python:3.11-slim + +# Set environment variables to prevent Python from writing .pyc files to disk +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV API_HOST="0.0.0.0" +ENV API_PORT=8000 + +# Set working directory +WORKDIR /app + +# Install system dependencies and update package list +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + gcc \ + libpq-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Copy the FastAPI application code +COPY . . + +# Expose the port FastAPI will run on +EXPOSE 8000 + +# Command to run the application with Uvicorn and Gunicorn +CMD ["gunicorn", "main:api", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000"] diff --git a/server/README.md b/server/README.md new file mode 100644 index 0000000..e69de29 diff --git a/server/api.py b/server/api.py new file mode 100644 index 0000000..f093fce --- /dev/null +++ b/server/api.py @@ -0,0 +1,259 @@ +from fastapi import FastAPI, HTTPException, Path, Body +from pydantic import BaseModel, Field +from typing import List, Optional +from loguru import logger +import chromadb +import uuid +import os +import uvicorn + +app = FastAPI() + +# Initialize the ChromaDB client +chroma_client = chromadb.Client() + +# Logger configuration +logger.add("api_logs.log", rotation="500 MB") + + +# Pydantic models +class CreateCollectionRequest(BaseModel): + name: str = Field(..., description="The name of the collection") + + +class AddDocumentsRequest(BaseModel): + documents: List[str] = Field( + ..., + description="List of documents to be added to the collection", + ) + ids: Optional[List[str]] = Field( + None, + description="Optional list of document IDs. If not provided, IDs will be autogenerated", + ) + + +class QueryDocumentsRequest(BaseModel): + query_texts: List[str] = Field( + ..., + description="List of query texts to search for similar documents", + ) + n_results: int = Field( + 1, description="Number of similar results to return" + ) + + +class UpdateDocumentRequest(BaseModel): + document: str = Field( + ..., description="The updated content of the document" + ) + + +# Routes + + +@app.post("/collections", response_model=dict) +def create_collection(request: CreateCollectionRequest): + """ + Creates a new collection with the specified name. + """ + try: + collection = chroma_client.create_collection( + name=request.name + ) + logger.info(f"Created collection with name: {request.name}") + return { + "message": f"Collection '{request.name}' created successfully." + } + except Exception as e: + logger.error(f"Error creating collection: {e}") + raise HTTPException( + status_code=500, detail="Failed to create collection." + ) + + +# @app.get("/collections/{collection_id}/get_all_docs", response_model=dict) +# def get_all_docs( +# collection_id: str = Path( +# ..., +# description="The ID of the collection" +# ), +# ): +# collection = chroma_client.get_collection(collection_id) + +# if not collection: +# raise HTTPException( +# status_code=404, detail="Collection not found." +# ) + +# return collection.get_all_docs() + + +@app.post( + "/collections/{collection_id}/documents", response_model=dict +) +def add_documents( + collection_id: str = Path( + ..., description="The ID of the collection" + ), + request: AddDocumentsRequest = Body(...), +): + """ + Adds one or more documents to the specified collection. + """ + try: + collection = chroma_client.get_collection(collection_id) + if not collection: + raise HTTPException( + status_code=404, detail="Collection not found." + ) + + ids = request.ids or [ + str(uuid.uuid4()) for _ in range(len(request.documents)) + ] + collection.add(documents=request.documents, ids=ids) + logger.info( + f"Added {len(request.documents)} documents to collection {collection_id}." + ) + return { + "message": f"Documents added successfully to collection {collection_id}.", + "ids": ids, + } + except HTTPException as e: + raise e + except Exception as e: + logger.error( + f"Error adding documents to collection {collection_id}: {e}" + ) + raise HTTPException( + status_code=500, + detail="Failed to add documents to collection.", + ) + + +@app.get( + "/collections/{collection_id}/documents", response_model=dict +) +def query_documents( + collection_id: str = Path( + ..., description="The ID of the collection" + ), + query: QueryDocumentsRequest = Body(...), +): + """ + Queries the collection for the most similar documents based on the provided query texts. + """ + try: + collection = chroma_client.get_collection(collection_id) + if not collection: + raise HTTPException( + status_code=404, detail="Collection not found." + ) + + results = collection.query( + query_texts=query.query_texts, n_results=query.n_results + ) + logger.info( + f"Queried collection {collection_id} with {query.query_texts}." + ) + return results + except HTTPException as e: + raise e + except Exception as e: + logger.error( + f"Error querying documents in collection {collection_id}: {e}" + ) + raise HTTPException( + status_code=500, detail="Failed to query documents." + ) + + +@app.delete( + "/collections/{collection_id}/documents/{document_id}", + response_model=dict, +) +def delete_document( + collection_id: str = Path( + ..., description="The ID of the collection" + ), + document_id: str = Path( + ..., description="The ID of the document to delete" + ), +): + """ + Deletes a specific document from the collection. + """ + try: + collection = chroma_client.get_collection(collection_id) + if not collection: + raise HTTPException( + status_code=404, detail="Collection not found." + ) + + collection.delete(ids=[document_id]) + logger.info( + f"Deleted document {document_id} from collection {collection_id}." + ) + return { + "message": f"Document {document_id} deleted successfully from collection {collection_id}." + } + except HTTPException as e: + raise e + except Exception as e: + logger.error( + f"Error deleting document {document_id} from collection {collection_id}: {e}" + ) + raise HTTPException( + status_code=500, detail="Failed to delete document." + ) + + +@app.put( + "/collections/{collection_id}/documents/{document_id}", + response_model=dict, +) +def update_document( + collection_id: str = Path( + ..., description="The ID of the collection" + ), + document_id: str = Path( + ..., description="The ID of the document to update" + ), + request: UpdateDocumentRequest = Body(...), +): + """ + Updates the content of a specific document within a collection. + """ + try: + collection = chroma_client.get_collection(collection_id) + if not collection: + raise HTTPException( + status_code=404, detail="Collection not found." + ) + + collection.update( + documents=[request.document], ids=[document_id] + ) + logger.info( + f"Updated document {document_id} in collection {collection_id}." + ) + return { + "message": f"Document {document_id} updated successfully in collection {collection_id}." + } + except HTTPException as e: + raise e + except Exception as e: + logger.error( + f"Error updating document {document_id} in collection {collection_id}: {e}" + ) + raise HTTPException( + status_code=500, detail="Failed to update document." + ) + + +if __name__ == "__main__": + + uvicorn.run( + app, + host=os.getenv("API_HOST"), + port=os.getenv("API_PORT", 8000), + ) diff --git a/server/collab.ipynb b/server/collab.ipynb new file mode 100644 index 0000000..cb3d1e8 --- /dev/null +++ b/server/collab.ipynb @@ -0,0 +1,152 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status Code: 200\n", + "Response JSON: {'message': \"Collection 'my_collection' created successfully.\"}\n" + ] + } + ], + "source": [ + "# Create a config\n", + "import requests\n", + "\n", + "url = \"http://127.0.0.1:8000/collections\"\n", + "\n", + "data = {\n", + " \"name\": \"my_collection\"\n", + "}\n", + "\n", + "response = requests.post(url, json=data)\n", + "\n", + "print(\"Status Code:\", response.status_code)\n", + "print(\"Response JSON:\", response.json())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status Code: 200\n", + "Response JSON: {'message': 'Documents added successfully to collection my_collection.', 'ids': ['512d881c-8362-4a55-97e4-7436167537fb', '38d0c12f-dbf9-4c68-81a5-c748dfdd3c0c']}\n" + ] + } + ], + "source": [ + "import requests\n", + "\n", + "collection_id = \"my_collection\" # Replace with the actual collection ID\n", + "\n", + "url = f\"http://127.0.0.1:8000/collections/{collection_id}/documents\"\n", + "data = {\n", + " \"documents\": [\n", + " \"This is a document about pineapples\",\n", + " \"This is a document about oranges\"\n", + " ],\n", + "}\n", + "\n", + "response = requests.post(url, json=data)\n", + "\n", + "print(\"Status Code:\", response.status_code)\n", + "print(\"Response JSON:\", response.json())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status Code: 200\n", + "Response JSON: {'ids': [['512d881c-8362-4a55-97e4-7436167537fb', '38d0c12f-dbf9-4c68-81a5-c748dfdd3c0c']], 'distances': [[1.0590863227844238, 1.2430635690689087]], 'metadatas': [[None, None]], 'embeddings': None, 'documents': [['This is a document about pineapples', 'This is a document about oranges']], 'uris': None, 'data': None, 'included': ['metadatas', 'documents', 'distances']}\n" + ] + } + ], + "source": [ + "import requests\n", + "\n", + "collection_id = \"my_collection\" # Replace with the actual collection ID\n", + "\n", + "url = f\"http://127.0.0.1:8000/collections/{collection_id}/documents\"\n", + "data = {\n", + " \"query_texts\": [\"This is a query document about Hawaii\"],\n", + " \"n_results\": 2\n", + "}\n", + "\n", + "response = requests.get(url, json=data)\n", + "\n", + "print(\"Status Code:\", response.status_code)\n", + "print(\"Response JSON:\", response.json())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status Code: 200\n", + "Response JSON: {'message': 'Document your_document_id_here deleted successfully from collection my_collection.'}\n" + ] + } + ], + "source": [ + "import requests\n", + "\n", + "collection_id = \"my_collection\" # Replace with the actual collection ID\n", + "\n", + "document_id = \"your_document_id_here\" # Replace with the actual document ID\n", + "url = f\"http://127.0.0.1:8000/collections/{collection_id}/documents/{document_id}\"\n", + "\n", + "response = requests.delete(url)\n", + "\n", + "print(\"Status Code:\", response.status_code)\n", + "print(\"Response JSON:\", response.json())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..e28c908 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,4 @@ +chromadb +fastapi +pydantic +loguru \ No newline at end of file diff --git a/swarms_memory/chroma_db_wrapper.py b/swarms_memory/chroma_db_wrapper.py index c5b85bf..6645730 100644 --- a/swarms_memory/chroma_db_wrapper.py +++ b/swarms_memory/chroma_db_wrapper.py @@ -5,10 +5,10 @@ import chromadb from dotenv import load_dotenv - +from loguru import logger +from swarms.memory.base_vectordb import BaseVectorDatabase from swarms.utils.data_to_text import data_to_text from swarms.utils.markdown_message import display_markdown_message -from swarms.memory.base_vectordb import BaseVectorDatabase # Load environment variables load_dotenv() @@ -165,7 +165,9 @@ def query( except Exception as e: raise Exception(f"Failed to query documents: {str(e)}") - def traverse_directory(self): + def traverse_directory( + self, docs_folder: str = None, *args, **kwargs + ): """ Traverse through every file in the given directory and its subdirectories, and return the paths of all files. @@ -174,16 +176,42 @@ def traverse_directory(self): Returns: - list: A list of paths to each file in the directory and its subdirectories. """ - added_to_db = False - - for root, dirs, files in os.walk(self.docs_folder): - for file in files: - file_path = os.path.join( - root, file - ) # Change this line - _, ext = os.path.splitext(file_path) - data = data_to_text(file_path) - added_to_db = self.add(str(data)) - print(f"{file_path} added to Database") - - return added_to_db + try: + logger.info(f"Traversing directory: {self.docs_folder}") + added_to_db = False + allowed_extensions = [ + "txt", + "pdf", + "docx", + "doc", + "md", + "yaml", + "json", + "csv", + "tsv", + "xls", + "xlsx", + "xml", + "yml", + ] + + for root, dirs, files in os.walk(self.docs_folder): + for file in files: + file_path = os.path.join(root, file) + _, ext = os.path.splitext(file_path) + if ext.lower() in allowed_extensions: + data = data_to_text(file_path) + added_to_db = self.add(str(data)) + print(f"{file_path} added to Database") + else: + print( + f"Skipped {file_path} due to unsupported file extension" + ) + + return added_to_db + + except Exception as error: + logger.error( + f"Failed to traverse directory: {str(error)}" + ) + raise error diff --git a/test.md b/test.md deleted file mode 100644 index d1afa1d..0000000 --- a/test.md +++ /dev/null @@ -1,32 +0,0 @@ -
- -

Swarms Memory

-
-
-

- Easy to use, reliable, and bleeding-edge RAG systems. -

- -

- - Python - Version - -

-

-🐦 Twitter -  •   -📢 Discord -  •   -Swarms Platform -  •   -📙 Documentation -

- - -[![GitHub issues](https://img.shields.io/github/issues/kyegomez/swarms)](https://github.com/kyegomez/swarms-memory/issues) [![GitHub forks](https://img.shields.io/github/forks/kyegomez/swarms)](https://github.com/kyegomez/swarms-memory/network) [![GitHub stars](https://img.shields.io/github/stars/kyegomez/swarms)](https://github.com/kyegomez/swarms-memory/stargazers) [![GitHub license](https://img.shields.io/github/license/kyegomez/swarms-memory)](https://github.com/kyegomez/swarms-memory/blob/main/LICENSE)[![GitHub star chart](https://img.shields.io/github/stars/kyegomez/swarms-memory?style=social)](https://star-history.com/#kyegomez/swarms)[![Dependency Status](https://img.shields.io/librariesio/github/kyegomez/swarms)](https://libraries.io/github/kyegomez/swarms) [![Downloads](https://static.pepy.tech/badge/swarms-memory/month)](https://pepy.tech/project/swarms-memory) - -[![Join the Agora discord](https://img.shields.io/discord/1110910277110743103?label=Discord&logo=discord&logoColor=white&style=plastic&color=d7b023)![Share on Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Share%20%40kyegomez/swarmsmemory)](https://twitter.com/intent/tweet?text=Check%20out%20this%20amazing%20AI%20project:%20&url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms) [![Share on Facebook](https://img.shields.io/badge/Share-%20facebook-blue)](https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms) [![Share on LinkedIn](https://img.shields.io/badge/Share-%20linkedin-blue)](https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&title=&summary=&source=) - -[![Share on Reddit](https://img.shields.io/badge/-Share%20on%20Reddit-orange)](https://www.reddit.com/submit?url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&title=Swarms%20-%20the%20future%20of%20AI) [![Share on Hacker News](https://img.shields.io/badge/-Share%20on%20Hacker%20News-orange)](https://news.ycombinator.com/submitlink?u=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&t=Swarms%20-%20the%20future%20of%20AI) [![Share on Pinterest](https://img.shields.io/badge/-Share%20on%20Pinterest-red)](https://pinterest.com/pin/create/button/?url=https%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms&media=https%3A%2F%2Fexample.com%2Fimage.jpg&description=Swarms%20-%20the%20future%20of%20AI) [![Share on WhatsApp](https://img.shields.io/badge/-Share%20on%20WhatsApp-green)](https://api.whatsapp.com/send?text=Check%20out%20Swarms%20-%20the%20future%20of%20AI%20%23swarms%20%23AI%0A%0Ahttps%3A%2F%2Fgithub.com%2Fkyegomez%2Fswarms) -