From 397f7b80bd86a1e7da78e94f7e0f75b21436ac56 Mon Sep 17 00:00:00 2001
From: Ed Lee <16417837+edlee123@users.noreply.github.com>
Date: Thu, 19 Dec 2024 21:35:50 -0600
Subject: [PATCH 1/8] First commit of llamacpp Opea component

Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com>
---
 .../llms/text-generation/llamacpp/Dockerfile  | 27 ++++++
 comps/llms/text-generation/llamacpp/README.md | 84 +++++++++++++++++++
 .../llms/text-generation/llamacpp/__init__.py |  2 +
 .../llamacpp/docker_compose_llm.yaml          | 39 +++++++++
 .../text-generation/llamacpp/entrypoint.sh    |  8 ++
 comps/llms/text-generation/llamacpp/llm.py    | 65 ++++++++++++++
 .../llamacpp/requirements-runtime.txt         |  1 +
 .../text-generation/llamacpp/requirements.txt | 12 +++
 8 files changed, 238 insertions(+)
 create mode 100644 comps/llms/text-generation/llamacpp/Dockerfile
 create mode 100644 comps/llms/text-generation/llamacpp/README.md
 create mode 100644 comps/llms/text-generation/llamacpp/__init__.py
 create mode 100644 comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
 create mode 100644 comps/llms/text-generation/llamacpp/entrypoint.sh
 create mode 100644 comps/llms/text-generation/llamacpp/llm.py
 create mode 100644 comps/llms/text-generation/llamacpp/requirements-runtime.txt
 create mode 100644 comps/llms/text-generation/llamacpp/requirements.txt

diff --git a/comps/llms/text-generation/llamacpp/Dockerfile b/comps/llms/text-generation/llamacpp/Dockerfile
new file mode 100644
index 0000000000..a362c3bf67
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/Dockerfile
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    curl \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+# Assumes we're building from the GenAIComps directory.
+COPY ../../../comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r /home/user/comps/llms/text-generation/llamacpp/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/llms/text-generation/llamacpp/
+
+ENTRYPOINT ["bash", "entrypoint.sh"]
diff --git a/comps/llms/text-generation/llamacpp/README.md b/comps/llms/text-generation/llamacpp/README.md
new file mode 100644
index 0000000000..b8f64aac04
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/README.md
@@ -0,0 +1,84 @@
+# Introduction
+
+[llama.cpp](https://github.com/ggerganov/llama.cpp) provides inference in pure C/C++, and enables "LLM inference with minimal setup and state-of-the-art performance on a wide range of hardware - locally and in the cloud".
+
+This OPEA component wraps llama.cpp server so that it can interface with other OPEA components, or for creating OPEA Megaservices.
+
+## TLDR
+
+```bash
+cd GenAIComps/
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yml up
+```
+
+Please note it's instructive to run and validate each the llama.cpp server and OPEA component below.
+
+## 1. Run the llama.cpp server
+
+```bash
+cd GenAIComps
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml up llamacpp-server --force-recreate
+```
+
+Notes:
+
+i) If you prefer to run above in the background without screen output use `up -d` . The `--force-recreate` clears cache.
+
+ii) To tear down the llama.cpp server and remove the container:
+
+`docker compose -f comps/llms/text-generation/llamacpp/langchain/docker_compose_llm.yaml llamacpp-server down`
+
+iii) For [llama.cpp settings](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md) please specify them in the docker_compose_llm.yaml file.
+
+#### Verify the llama.cpp Service:
+
+```bash
+curl --request POST \
+    --url http://localhost:8080/completion \
+    --header "Content-Type: application/json" \
+    --data '{"prompt": "Building a website can be done in 10 simple steps:","n_predict": 128}'
+```
+
+## 2. Run the llama.cpp OPEA Service
+
+This is essentially a wrapper component of Llama.cpp server. OPEA nicely standardizes and verifies LLM inputs with LLMParamsDoc class (see llm.py).
+
+### 2.1 Build the llama.cpp OPEA image:
+
+```bash
+cd GenAIComps/
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yml up llama-opea-llm
+```
+
+Equivalently, the above can be achieved with `build` and `run` from the Dockerfile. Build:
+
+```bash
+cd GenAIComps/
+docker build --no-cache -t opea/llm-llamacpp:latest \
+  --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy \
+  -f comps/llms/text-generation/llamacpp/Dockerfile .
+```
+
+And run:
+
+```bash
+docker run --network host -e http_proxy=$http_proxy -e https_proxy=$https_proxy \
+  opea/llm-llamacpp:latest
+```
+
+### 2.3 Consume the llama.cpp Microservice:
+
+```bash
+curl http://127.0.0.1:9000/v1/chat/completions  -X POST \
+   -d '{"query":"What is Deep Learning?","max_tokens":32,"top_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
+   -H 'Content-Type: application/json'
+```
+
+### Notes
+
+Tearing down services and removing containers:
+
+```bash
+cd GenAIComps/comps/llms/text-generation/llamacpp/
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml down
+```
diff --git a/comps/llms/text-generation/llamacpp/__init__.py b/comps/llms/text-generation/llamacpp/__init__.py
new file mode 100644
index 0000000000..916f3a44b2
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
new file mode 100644
index 0000000000..88937ff0d6
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
@@ -0,0 +1,39 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  llamacpp-server:
+    image: ghcr.io/ggerganov/llama.cpp:server
+    ports:
+      - 8080:8080
+    environment:
+      # Refer to settings here: https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md
+      # Llama.cpp is based on .gguf format, and Hugging Face offers many .gguf format models.
+      LLAMA_ARG_MODEL_URL: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
+      LLAMA_ARG_CTX_SIZE: 4096
+      LLAMA_ARG_N_PARALLEL: 2
+      LLAMA_ARG_ENDPOINT_METRICS: 1
+      LLAMA_ARG_PORT: 8080
+
+  llamacpp-opea-llm:
+    image: opea/llm-llamacpp:latest
+    build:
+        # Set this to allow COPY comps in the Dockerfile.
+        # When using docker compose with -f, the comps context is 4 levels down from docker_compose_llm.yaml.
+        context: ../../../../
+        dockerfile: ./comps/llms/text-generation/llamacpp/Dockerfile
+    depends_on:
+      - llamacpp-server
+    ports:
+      - "9000:9000"
+    network_mode: "host" # equivalent to: docker run --network host ...
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      # LLAMACPP_ENDPOINT: ${LLAMACPP_ENDPOINT}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/llms/text-generation/llamacpp/entrypoint.sh b/comps/llms/text-generation/llamacpp/entrypoint.sh
new file mode 100644
index 0000000000..c9a5a3d07e
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/entrypoint.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# pip --no-cache-dir install -r requirements-runtime.txt
+
+python llm.py
diff --git a/comps/llms/text-generation/llamacpp/llm.py b/comps/llms/text-generation/llamacpp/llm.py
new file mode 100644
index 0000000000..5612199eb0
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/llm.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import openai
+from fastapi.responses import StreamingResponse
+
+from comps import CustomLogger, LLMParamsDoc, ServiceType, opea_microservices, register_microservice
+
+logger = CustomLogger("llm_llamacpp")
+logflag = os.getenv("LOGFLAG", False)
+llamacpp_endpoint = os.getenv("LLAMACPP_ENDPOINT", "http://localhost:8080/")
+
+
+# OPEA microservice wrapper of llama.cpp
+# llama.cpp server uses openai API format: https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md
+@register_microservice(
+    name="opea_service@llm_llamacpp",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/chat/completions",
+    host="0.0.0.0",
+    port=9000,
+)
+async def llm_generate(input: LLMParamsDoc):
+    if logflag:
+        logger.info(input)
+        logger.info(llamacpp_endpoint)
+
+    client = openai.OpenAI(
+        base_url=llamacpp_endpoint, api_key="sk-no-key-required"  # "http://<Your api-server IP>:port"
+    )
+
+    # Llama.cpp works with openai API format
+    # The openai api doesn't have top_k parameter
+    # https://community.openai.com/t/which-openai-gpt-models-if-any-allow-specifying-top-k/777982/2
+    chat_completion = client.chat.completions.create(
+        model=input.model,
+        messages=[{"role": "user", "content": input.query}],
+        max_tokens=input.max_tokens,
+        temperature=input.temperature,
+        top_p=input.top_p,
+        frequency_penalty=input.frequency_penalty,
+        presence_penalty=input.presence_penalty,
+        stream=input.streaming,
+    )
+
+    if input.streaming:
+
+        def stream_generator():
+            for c in chat_completion:
+                if logflag:
+                    logger.info(c)
+                yield f"data: {c.model_dump_json()}\n\n"
+            yield "data: [DONE]\n\n"
+
+        return StreamingResponse(stream_generator(), media_type="text/event-stream")
+    else:
+        if logflag:
+            logger.info(chat_completion)
+        return chat_completion
+
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@llm_llamacpp"].start()
diff --git a/comps/llms/text-generation/llamacpp/requirements-runtime.txt b/comps/llms/text-generation/llamacpp/requirements-runtime.txt
new file mode 100644
index 0000000000..225adde271
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/requirements-runtime.txt
@@ -0,0 +1 @@
+langserve
diff --git a/comps/llms/text-generation/llamacpp/requirements.txt b/comps/llms/text-generation/llamacpp/requirements.txt
new file mode 100644
index 0000000000..fdb5f5a016
--- /dev/null
+++ b/comps/llms/text-generation/llamacpp/requirements.txt
@@ -0,0 +1,12 @@
+aiohttp
+docarray[full]
+fastapi
+huggingface_hub
+openai
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+shortuuid
+transformers
+uvicorn

From cb4f5e59a53161ea893dc6fa38ee49266d7a3f69 Mon Sep 17 00:00:00 2001
From: Ed Lee <16417837+edlee123@users.noreply.github.com>
Date: Thu, 19 Dec 2024 21:50:26 -0600
Subject: [PATCH 2/8] Removed unneeded requirements file

Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com>
---
 comps/llms/text-generation/llamacpp/requirements-runtime.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 comps/llms/text-generation/llamacpp/requirements-runtime.txt

diff --git a/comps/llms/text-generation/llamacpp/requirements-runtime.txt b/comps/llms/text-generation/llamacpp/requirements-runtime.txt
deleted file mode 100644
index 225adde271..0000000000
--- a/comps/llms/text-generation/llamacpp/requirements-runtime.txt
+++ /dev/null
@@ -1 +0,0 @@
-langserve

From 2a48bae8e3231c82a370b18ae681968997ed36b7 Mon Sep 17 00:00:00 2001
From: Ed Lee <16417837+edlee123@users.noreply.github.com>
Date: Mon, 6 Jan 2025 15:38:25 -0600
Subject: [PATCH 3/8] Pin the llama.cpp server version, and fix small typo

Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com>
---
 comps/llms/text-generation/llamacpp/README.md               | 2 +-
 comps/llms/text-generation/llamacpp/docker_compose_llm.yaml | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/comps/llms/text-generation/llamacpp/README.md b/comps/llms/text-generation/llamacpp/README.md
index b8f64aac04..7b7ffa7d5a 100644
--- a/comps/llms/text-generation/llamacpp/README.md
+++ b/comps/llms/text-generation/llamacpp/README.md
@@ -8,7 +8,7 @@ This OPEA component wraps llama.cpp server so that it can interface with other O
 
 ```bash
 cd GenAIComps/
-docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yml up
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml up
 ```
 
 Please note it's instructive to run and validate each the llama.cpp server and OPEA component below.
diff --git a/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
index 88937ff0d6..9a718661bc 100644
--- a/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
+++ b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
@@ -3,7 +3,8 @@
 
 services:
   llamacpp-server:
-    image: ghcr.io/ggerganov/llama.cpp:server
+    # image: ghcr.io/ggerganov/llama.cpp:server
+    image: ghcr.io/ggerganov/llama.cpp:server-b4419
     ports:
       - 8080:8080
     environment:

From 4e8215225a2afb528137dc0598731af59e42e1bb Mon Sep 17 00:00:00 2001
From: Ed Lee <16417837+edlee123@users.noreply.github.com>
Date: Mon, 6 Jan 2025 15:55:50 -0600
Subject: [PATCH 4/8] Update README.md to describe hardware support, and
 provide reference.

Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com>
---
 comps/llms/text-generation/llamacpp/README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/comps/llms/text-generation/llamacpp/README.md b/comps/llms/text-generation/llamacpp/README.md
index 7b7ffa7d5a..d1e5054a2e 100644
--- a/comps/llms/text-generation/llamacpp/README.md
+++ b/comps/llms/text-generation/llamacpp/README.md
@@ -4,6 +4,10 @@
 
 This OPEA component wraps llama.cpp server so that it can interface with other OPEA components, or for creating OPEA Megaservices.
 
+llama.cpp supports this [hardware](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#supported-backends), and has only been tested on CPU.
+
+To use a CUDA server please refer to [this llama.cpp reference](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md#docker) and modify docker_compose_llm.yaml accordingly.
+
 ## TLDR
 
 ```bash
@@ -47,7 +51,7 @@ This is essentially a wrapper component of Llama.cpp server. OPEA nicely standar
 
 ```bash
 cd GenAIComps/
-docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yml up llama-opea-llm
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml up llama-opea-llm
 ```
 
 Equivalently, the above can be achieved with `build` and `run` from the Dockerfile. Build:

From baf381dca98ae237347db41fb0fcdd4b64943f86 Mon Sep 17 00:00:00 2001
From: Ed Lee <16417837+edlee123@users.noreply.github.com>
Date: Mon, 6 Jan 2025 16:03:42 -0600
Subject: [PATCH 5/8] Updated docker_compose_llm.yaml so that the
 llamacpp-server so the pulled image has specific tag.

Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com>
---
 comps/llms/text-generation/llamacpp/docker_compose_llm.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
index 9a718661bc..dd220b6f1a 100644
--- a/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
+++ b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
@@ -3,7 +3,6 @@
 
 services:
   llamacpp-server:
-    # image: ghcr.io/ggerganov/llama.cpp:server
     image: ghcr.io/ggerganov/llama.cpp:server-b4419
     ports:
       - 8080:8080

From 9d7539dd213b017879c607b94c4336520b8fc64e Mon Sep 17 00:00:00 2001
From: Ed Lee <16417837+edlee123@users.noreply.github.com>
Date: Tue, 7 Jan 2025 11:04:43 -0600
Subject: [PATCH 6/8] Small adjustments to README.md

Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com>
---
 comps/llms/text-generation/llamacpp/README.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/comps/llms/text-generation/llamacpp/README.md b/comps/llms/text-generation/llamacpp/README.md
index d1e5054a2e..15a96ca1fb 100644
--- a/comps/llms/text-generation/llamacpp/README.md
+++ b/comps/llms/text-generation/llamacpp/README.md
@@ -28,9 +28,9 @@ Notes:
 
 i) If you prefer to run above in the background without screen output use `up -d` . The `--force-recreate` clears cache.
 
-ii) To tear down the llama.cpp server and remove the container:
+ii) To stop the llama.cpp server:
 
-`docker compose -f comps/llms/text-generation/llamacpp/langchain/docker_compose_llm.yaml llamacpp-server down`
+`docker compose -f comps/llms/text-generation/llamacpp/langchain/docker_compose_llm.yaml llamacpp-server stop`
 
 iii) For [llama.cpp settings](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md) please specify them in the docker_compose_llm.yaml file.
 
@@ -80,9 +80,11 @@ curl http://127.0.0.1:9000/v1/chat/completions  -X POST \
 
 ### Notes
 
-Tearing down services and removing containers:
+Stopping services:
 
 ```bash
 cd GenAIComps/comps/llms/text-generation/llamacpp/
-docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml down
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml stop
 ```
+
+`down` may be used instead of 'stop' if you'd like to stop and delete the containers.
\ No newline at end of file

From fd15ee7529e98ae81c8d4b04483e6f2f1209215c Mon Sep 17 00:00:00 2001
From: Ed Lee <16417837+edlee123@users.noreply.github.com>
Date: Fri, 10 Jan 2025 13:13:47 -0600
Subject: [PATCH 7/8] This removes unneeded dependencies in the Dockerfile,
 unneeded entrypoint.sh

Signed-off-by: Ed Lee <16417837+edlee123@users.noreply.github.com>
---
 comps/llms/text-generation/llamacpp/Dockerfile           | 9 ++-------
 comps/llms/text-generation/llamacpp/README.md            | 8 ++++----
 .../text-generation/llamacpp/docker_compose_llm.yaml     | 3 +--
 comps/llms/text-generation/llamacpp/entrypoint.sh        | 8 --------
 4 files changed, 7 insertions(+), 21 deletions(-)
 delete mode 100644 comps/llms/text-generation/llamacpp/entrypoint.sh

diff --git a/comps/llms/text-generation/llamacpp/Dockerfile b/comps/llms/text-generation/llamacpp/Dockerfile
index a362c3bf67..70500e35d7 100644
--- a/comps/llms/text-generation/llamacpp/Dockerfile
+++ b/comps/llms/text-generation/llamacpp/Dockerfile
@@ -3,18 +3,13 @@
 
 FROM python:3.11-slim
 
-RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
-    curl \
-    libgl1-mesa-glx \
-    libjemalloc-dev
-
 RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \
     chown -R user /home/user/
 
 USER user
 
-# Assumes we're building from the GenAIComps directory.
+# Assumes we're building from the GenAIComps directory, and docker file is in comps/llms/text-generation/llamacpp
 COPY ../../../comps /home/user/comps
 
 RUN pip install --no-cache-dir --upgrade pip setuptools && \
@@ -24,4 +19,4 @@ ENV PYTHONPATH=$PYTHONPATH:/home/user
 
 WORKDIR /home/user/comps/llms/text-generation/llamacpp/
 
-ENTRYPOINT ["bash", "entrypoint.sh"]
+ENTRYPOINT ["python", "llm.py"]
\ No newline at end of file
diff --git a/comps/llms/text-generation/llamacpp/README.md b/comps/llms/text-generation/llamacpp/README.md
index 15a96ca1fb..e03fd7c365 100644
--- a/comps/llms/text-generation/llamacpp/README.md
+++ b/comps/llms/text-generation/llamacpp/README.md
@@ -21,12 +21,12 @@ Please note it's instructive to run and validate each the llama.cpp server and O
 
 ```bash
 cd GenAIComps
-docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml up llamacpp-server --force-recreate
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml up llamacpp-server
 ```
 
 Notes:
 
-i) If you prefer to run above in the background without screen output use `up -d` . The `--force-recreate` clears cache.
+i) If you prefer to run above in the background without screen output use `up -d`.
 
 ii) To stop the llama.cpp server:
 
@@ -51,7 +51,7 @@ This is essentially a wrapper component of Llama.cpp server. OPEA nicely standar
 
 ```bash
 cd GenAIComps/
-docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml up llama-opea-llm
+docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml up llamacpp-opea-llm --force-recreate
 ```
 
 Equivalently, the above can be achieved with `build` and `run` from the Dockerfile. Build:
@@ -87,4 +87,4 @@ cd GenAIComps/comps/llms/text-generation/llamacpp/
 docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml stop
 ```
 
-`down` may be used instead of 'stop' if you'd like to stop and delete the containers.
\ No newline at end of file
+`down` may be used instead of 'stop' if you'd like to stop, and delete the containers and networks.
\ No newline at end of file
diff --git a/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
index dd220b6f1a..d66d93afd5 100644
--- a/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
+++ b/comps/llms/text-generation/llamacpp/docker_compose_llm.yaml
@@ -18,7 +18,7 @@ services:
   llamacpp-opea-llm:
     image: opea/llm-llamacpp:latest
     build:
-        # Set this to allow COPY comps in the Dockerfile.
+        # This context is to allow the 'COPY comps' command in the Dockerfile.
         # When using docker compose with -f, the comps context is 4 levels down from docker_compose_llm.yaml.
         context: ../../../../
         dockerfile: ./comps/llms/text-generation/llamacpp/Dockerfile
@@ -31,7 +31,6 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      # LLAMACPP_ENDPOINT: ${LLAMACPP_ENDPOINT}
     restart: unless-stopped
 
 networks:
diff --git a/comps/llms/text-generation/llamacpp/entrypoint.sh b/comps/llms/text-generation/llamacpp/entrypoint.sh
deleted file mode 100644
index c9a5a3d07e..0000000000
--- a/comps/llms/text-generation/llamacpp/entrypoint.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# pip --no-cache-dir install -r requirements-runtime.txt
-
-python llm.py

From c931902d616cb692fc3ffb54af4ad165adcdde4d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 10 Jan 2025 19:18:51 +0000
Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/llms/text-generation/llamacpp/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comps/llms/text-generation/llamacpp/README.md b/comps/llms/text-generation/llamacpp/README.md
index e03fd7c365..00b8e0b778 100644
--- a/comps/llms/text-generation/llamacpp/README.md
+++ b/comps/llms/text-generation/llamacpp/README.md
@@ -87,4 +87,4 @@ cd GenAIComps/comps/llms/text-generation/llamacpp/
 docker compose -f comps/llms/text-generation/llamacpp/docker_compose_llm.yaml stop
 ```
 
-`down` may be used instead of 'stop' if you'd like to stop, and delete the containers and networks.
\ No newline at end of file
+`down` may be used instead of 'stop' if you'd like to stop, and delete the containers and networks.