diff --git a/.env b/.env index 31b87d1..28ce062 100644 --- a/.env +++ b/.env @@ -1,2 +1,7 @@ VOLUME="/source/local-machine/dir:target/multi-container/app/dir" -# VOLUME="c:/Users/User/:/User" e.g. \ No newline at end of file +# VOLUME="c:/Users/User/:/User" e.g. +MODELS_PATH="/path/to/gguf/models" +# MODELS_PATH="c:/Users/User/.cache/llama.cpp/" +MODEL="your_favorite_model.gguf" +# MODEL="stories260K.gguf" +MAX_TOKENS="512" \ No newline at end of file diff --git a/README.md b/README.md index 95438b5..8782dc7 100644 --- a/README.md +++ b/README.md @@ -23,18 +23,26 @@ git clone https://github.com/AstraBert/everything-ai.git cd everything-ai ``` ### 2. Set your `.env` file -Modify the `VOLUME` variable in the .env file so that you can mount your local file system into Docker container. +Modify: +- `VOLUME` variable in the .env file so that you can mount your local file system into Docker container. +- `MODELS_PATH` variable in the .env file so that you can tell llama.cpp where you stored the GGUF models you downloaded. +- `MODEL` variable in the .env file so that you can tell llama.cpp what model to use (use the actual name of the gguf file, and do not forget the .gguf extension!) +- `MAX_TOKENS` variable in the .env file so that you can tell llama.cpp how many new tokens it can generate as output. -An example could be: +An example of a `.env` file could be: ```bash VOLUME="c:/Users/User/:/User/" +MODELS_PATH="c:/Users/User/.cache/llama.cpp/" +MODEL="stories260K.gguf" +MAX_TOKENS="512" ``` -This means that now everything that is under "c:/Users/User/" on your local machine is under "/User/" in your Docker container. +This means that now everything that is under "c:/Users/User/" on your local machine is under "/User/" in your Docker container, that llama.cpp knows where to look for models and what model to look for, along with the maximum new tokens for its output. ### 3. Pull the necessary images ```bash -docker pull astrabert/everything-ai -docker pull qdrant/qdrant +docker pull astrabert/everything-ai:latest +docker pull qdrant/qdrant:latest +docker pull ghcr.io/ggerganov/llama.cpp:server ``` ### 4. Run the multi-container app ```bash @@ -63,6 +71,7 @@ Choose the task among: - *protein-folding*: get the 3D structure of a protein from its amino-acid sequence, using ESM-2 backbone model - **GPU ONLY** - *autotrain*: fine-tune a model on a specific downstream task with autotrain-advanced, just by specifying you HF username, HF writing token and the path to a yaml config file for the training - *spaces-api-supabase*: use HF Spaces API in combination with Supabase PostgreSQL databases in order to unleash more powerful LLMs and larger RAG-oriented vector databases - **MULTILINGUAL** +- *llama.cpp-and-qdrant*: same as *retrieval-text-generation*, but uses **llama.cpp** as inference engine, so you MUST NOT specify a model - **MULTILINGUAL** - *image-retrieval-search*: search an image database uploading a folder as database input. The folder should have the following structure: ``` @@ -87,4 +96,3 @@ Once everything is ready, you can head over to `localhost:7860` and start using -## Complete documentation is coming soon...🚀 diff --git a/compose.yaml b/compose.yaml index 7c81222..5c8e0d5 100644 --- a/compose.yaml +++ b/compose.yaml @@ -6,7 +6,7 @@ services: everything-ai: image: astrabert/everything-ai volumes: - - ${VOLUME} + - $VOLUME networks: - mynet ports: @@ -19,4 +19,14 @@ services: volumes: - "./qdrant_storage:/qdrant/storage" networks: - - mynet \ No newline at end of file + - mynet + llama_server: + image: ghcr.io/ggerganov/llama.cpp:server + ports: + - "8000:8000" + volumes: + - "$MODELS_PATH:/models" + networks: + - mynet + command: "-m /models/$MODEL --port 8000 --host 0.0.0.0 -n $MAX_TOKENS" + diff --git a/docker/Dockerfile b/docker/Dockerfile index 3e45b1d..98940ec 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -7,9 +7,6 @@ WORKDIR /app # Add the current directory contents into the container at /app ADD . /app -#Upgrade gradio -RUN pip install gradio_molecule3d - # Expose the port that the application will run on EXPOSE 8760 diff --git a/docker/llama_cpp_int.py b/docker/llama_cpp_int.py new file mode 100644 index 0000000..c022646 --- /dev/null +++ b/docker/llama_cpp_int.py @@ -0,0 +1,131 @@ +from utils import Translation, PDFdatabase, NeuralSearcher +import gradio as gr +from qdrant_client import QdrantClient +from sentence_transformers import SentenceTransformer +from argparse import ArgumentParser +import os + +argparse = ArgumentParser() + +argparse.add_argument( + "-pf", + "--pdf_file", + help="Single pdf file or N pdfs reported like this: /path/to/file1.pdf,/path/to/file2.pdf,...,/path/to/fileN.pdf (there is no strict naming, you just need to provide them comma-separated)", + required=False, + default="No file" +) + +argparse.add_argument( + "-d", + "--directory", + help="Directory where all your pdfs of interest are stored", + required=False, + default="No directory" +) + +argparse.add_argument( + "-l", + "--language", + help="Language of the written content contained in the pdfs", + required=False, + default="Same as query" +) + +args = argparse.parse_args() + + +pdff = args.pdf_file +dirs = args.directory +lan = args.language + + +if pdff.replace("\\","").replace("'","") != "None" and dirs.replace("\\","").replace("'","") == "No directory": + pdfs = pdff.replace("\\","/").replace("'","").split(",") +else: + pdfs = [os.path.join(dirs.replace("\\","/").replace("'",""), f) for f in os.listdir(dirs.replace("\\","/").replace("'","")) if f.endswith(".pdf")] + +client = QdrantClient(host="host.docker.internal", port="6333") +encoder = SentenceTransformer("all-MiniLM-L6-v2") + +pdfdb = PDFdatabase(pdfs, encoder, client) +pdfdb.preprocess() +pdfdb.collect_data() +pdfdb.qdrant_collection_and_upload() + + +import requests + +def llama_cpp_respond(query, max_new_tokens): + url = "http://host.docker.internal:8000/completion" + headers = { + "Content-Type": "application/json" + } + data = { + "prompt": query, + "n_predict": int(max_new_tokens) + } + + response = requests.post(url, headers=headers, json=data) + + a = response.json() + return a["content"] + + +def reply(max_new_tokens, message): + global pdfdb + txt = Translation(message, "en") + if txt.original == "en" and lan.replace("\\","").replace("'","") == "None": + txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) + results = txt2txt.search(message) + response = llama_cpp_respond(results[0]["text"], max_new_tokens) + return response + elif txt.original == "en" and lan.replace("\\","").replace("'","") != "None": + txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) + transl = Translation(message, lan.replace("\\","").replace("'","")) + message = transl.translatef() + results = txt2txt.search(message) + t = Translation(results[0]["text"], txt.original) + res = t.translatef() + response = llama_cpp_respond(res, max_new_tokens) + return response + elif txt.original != "en" and lan.replace("\\","").replace("'","") == "None": + txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) + results = txt2txt.search(message) + transl = Translation(results[0]["text"], "en") + translation = transl.translatef() + response = llama_cpp_respond(translation, max_new_tokens) + t = Translation(response, txt.original) + res = t.translatef() + return res + else: + txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) + transl = Translation(message, lan.replace("\\","").replace("'","")) + message = transl.translatef() + results = txt2txt.search(message) + t = Translation(results[0]["text"], txt.original) + res = t.translatef() + response = llama_cpp_respond(res, max_new_tokens) + tr = Translation(response, txt.original) + ress = tr.translatef() + return ress + +demo = gr.Interface( + reply, + [ + gr.Textbox( + label="Max new tokens", + info="The number reported should not be higher than the one specified within the .env file", + lines=3, + value=f"512", + ), + gr.Textbox( + label="Input query", + info="Write your input query here", + lines=3, + value=f"What are penguins?", + ) + ], + title="everything-ai-llamacpp", + outputs="textbox" +) +demo.launch(server_name="0.0.0.0", share=False) \ No newline at end of file diff --git a/docker/retrieval_text_generation.py b/docker/retrieval_text_generation.py index 251eb28..dd1e970 100644 --- a/docker/retrieval_text_generation.py +++ b/docker/retrieval_text_generation.py @@ -48,10 +48,10 @@ lan = args.language -if pdff.replace("\\","").replace("'","") != "None" and dirs.replace("\\","").replace("'","") == "None": - pdfs = pdff.replace("\\","").replace("'","").split(",") +if pdff.replace("\\","").replace("'","") != "None" and dirs.replace("\\","").replace("'","") == "No directory": + pdfs = pdff.replace("\\","/").replace("'","").split(",") else: - pdfs = [os.path.join(dirs.replace("\\","").replace("'",""), f) for f in os.listdir(dirs.replace("\\","").replace("'","")) if f.endswith(".pdf")] + pdfs = [os.path.join(dirs.replace("\\","/").replace("'",""), f) for f in os.listdir(dirs.replace("\\","/").replace("'","")) if f.endswith(".pdf")] client = QdrantClient(host="host.docker.internal", port="6333") encoder = SentenceTransformer("all-MiniLM-L6-v2") diff --git a/docker/select_and_run.py b/docker/select_and_run.py index 8f7fed7..6f12adb 100644 --- a/docker/select_and_run.py +++ b/docker/select_and_run.py @@ -1,16 +1,19 @@ import subprocess as sp import gradio as gr -TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py", "protein-folding": "protein_folding_with_esm.py", "video-generation": "video_generation.py", "speech-recognition": "speech_recognition.py", "spaces-api-supabase": "spaces_api_supabase.py", "audio-classification": "audio_classification.py", "autotrain": "autotrain_interface.py"} +TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py", "protein-folding": "protein_folding_with_esm.py", "video-generation": "video_generation.py", "speech-recognition": "speech_recognition.py", "spaces-api-supabase": "spaces_api_supabase.py", "audio-classification": "audio_classification.py", "autotrain": "autotrain_interface.py", "llama.cpp-and-qdrant": "llama_cpp_int.py"} def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim="512", gradioclient="None", supabaseurl="None", collectname="None", supenc="all-MiniLM-L6-v2", supdim="384"): - if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations" and tsk != "retrieval-image-search" and tsk != "autotrain" and tsk != "protein-folding" and tsk != "spaces-api-supabase": + if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations" and tsk != "retrieval-image-search" and tsk != "autotrain" and tsk != "protein-folding" and tsk != "spaces-api-supabase" and tsk != "llama.cpp-and-qdrant": sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}", shell=True) return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}" elif tsk == "retrieval-text-generation": sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'", shell=True) return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'" + elif tsk == "llama.cpp-and-qdrant": + sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -pf '{pdff}' -d '{dirs}' -l '{lan}'", shell=True) + return f"python3 {TASK_TO_SCRIPT[tsk]} -pf '{pdff}' -d '{dirs}' -l '{lan}'" elif tsk == "image-generation-pollinations" or tsk == "autotrain" or tsk == "protein-folding": sp.run(f"python3 {TASK_TO_SCRIPT[tsk]}", shell=True) return f"python3 {TASK_TO_SCRIPT[tsk]}" @@ -41,15 +44,15 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim=" ), gr.Textbox( label="PDF file(s)", - info="Single pdf file or N pdfs reported like this: /path/to/file1.pdf,/path/to/file2.pdf,...,/path/to/fileN.pdf (there is no strict naming, you just need to provide them comma-separated): only available with 'retrieval-text-generation'", + info="Single pdf file or N pdfs reported like this: /path/to/file1.pdf,/path/to/file2.pdf,...,/path/to/fileN.pdf (there is no strict naming, you just need to provide them comma-separated), please do not use '\\' as path separators: only available with 'retrieval-text-generation'", lines=3, - value="None", + value="No file", ), gr.Textbox( label="Directory", - info="Directory where all your pdfs or images (.jpg, .jpeg, .png) of interest are stored (only available with 'retrieval-text-generation' for pdfs and 'retrieval-image-search' for images)", + info="Directory where all your pdfs or images (.jpg, .jpeg, .png) of interest are stored (only available with 'retrieval-text-generation' for pdfs and 'retrieval-image-search' for images). Please do not use '\\' as path separators", lines=3, - value="None", + value="No directory", ), gr.Textbox( label="Language",