Adding ui for demo-question-answering (#7713)

Co-authored-by: berkecanrizai <[email protected]> GitOrigin-RevId: 57a508d9a63ffd15564e6a5fead3d3a2ff12731d
pathwaycom · Nov 21, 2024 · 60adb51 · 60adb51
1 parent f536ab9
commit 60adb51
Show file tree

Hide file tree

Showing 7 changed files with 242 additions and 13 deletions.
diff --git a/examples/pipelines/demo-question-answering/Dockerfile b/examples/pipelines/demo-question-answering/Dockerfile
@@ -11,6 +11,4 @@ RUN pip install -U --no-cache-dir -r requirements.txt
 
 COPY . .
 
-EXPOSE 8000
-
 CMD ["python", "app.py"]
diff --git a/examples/pipelines/demo-question-answering/README.md b/examples/pipelines/demo-question-answering/README.md
@@ -68,6 +68,7 @@ This folder contains several objects:
 - `Dockerfile`, the Docker configuration for running the pipeline in the container;
 - `.env`, a short environment variables configuration file where the OpenAI key must be stored;
 - `data/`, a folder with exemplary files that can be used for the test runs.
+- `ui/`, a simple ui written in Streamlit for asking questions.
 
 ## Pathway tooling
 - Prompts and helpers
@@ -206,20 +207,20 @@ Please note that the local run requires the dependencies to be installed. It can
 
 ### With Docker
 
-In order to let the pipeline get updated with each change in local files, you need to mount the folder onto the docker. The following commands show how to do that.
+Build the Docker with:
 
-You can omit the ```-v `pwd`/data:/app/data``` part if you are not using local files as a source. 
 ```bash
-# Make sure you are in the right directory.
-cd examples/pipelines/demo-question-answering
+docker compose build
+```
 
-# Build the image in this folder
-docker build -t qa .
+And, run with:
 
-# Run the image, mount the `data` folder into image and expose the port `8000`
-docker run -v `pwd`/data:/app/data -p 8000:8000 qa
+```bash
+docker compose up
 ```
 
+This will start the pipeline and the ui for asking questions.
+
 ### Query the documents
 You will see the logs for parsing & embedding documents in the Docker image logs. 
 Give it a few minutes to finish up on embeddings, you will see `0 entries (x minibatch(es)) have been...` message.
@@ -265,12 +266,12 @@ Search API gives you the ability to search in available inputs and get up-to-dat
 
 ```bash
 curl -X 'POST' \
-  'http://0.0.0.0:8000/v1/retrieve' \
+  'http://0.0.0.0:8006/v1/retrieve' \
   -H 'accept: */*' \
   -H 'Content-Type: application/json' \
   -d '{
-  "query": "What is the start date of the contract?",
-  "k": 2
+  "query": "Which articles of General Data Protection Regulation are relevant for clinical trials?",
+  "k": 6
 }'
 ```
 
@@ -341,3 +342,6 @@ To execute similar curl queries as above, you can visit [ai-pipelines page](http
 First, you can try adding your files and seeing changes in the index. To test index updates, simply add more files to the `data` folder.
 
 If you are using Google Drive or other sources, simply upload your files there.
+
+### Using the UI
+This pipeline includes a simple ui written in Streamlit. After you run the pipeline with `docker compose up`, you can access the UI at `http://localhost:8501`. This UI uses the `/v1/pw_ai_answer` endpoint to answer your questions.
diff --git a/examples/pipelines/demo-question-answering/docker-compose.yml b/examples/pipelines/demo-question-answering/docker-compose.yml
@@ -0,0 +1,27 @@
+services:
+  app:
+    build:
+      context: .
+    ports:
+      - "${PATHWAY_PORT:-8000}:${PATHWAY_PORT:-8000}"
+    networks:
+      - network
+    volumes:
+      - ./data:/app/data
+      - ./Cache:/app/Cache
+
+  ui:
+    build:
+      context: ui
+    networks:
+      - network
+    environment:
+      PATHWAY_HOST: "app"
+      PATHWAY_PORT: "${PATHWAY_PORT:-8000}"
+      UI_PORT: 8501
+    ports:
+      - "8501:8501"
+
+networks:
+  network:
+    driver: bridge
diff --git a/examples/pipelines/demo-question-answering/ui/Dockerfile b/examples/pipelines/demo-question-answering/ui/Dockerfile
@@ -0,0 +1,14 @@
+ARG PATHWAY_SRC_IMAGE=pathwaycom/pathway:latest
+
+FROM ${PATHWAY_SRC_IMAGE}
+
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /ui
+
+COPY requirements.txt .
+RUN pip install -U --no-cache-dir -r requirements.txt
+
+COPY . .
+
+CMD exec streamlit run ui.py --server.port ${UI_PORT}
diff --git a/examples/pipelines/demo-question-answering/ui/favicon.ico b/examples/pipelines/demo-question-answering/ui/favicon.ico
diff --git a/examples/pipelines/demo-question-answering/ui/requirements.txt b/examples/pipelines/demo-question-answering/ui/requirements.txt
@@ -0,0 +1,6 @@
+streamlit==1.35.0
+load_dotenv==0.1.0
+nest_asyncio==1.6.0
+aiohttp==3.9.5
+beautifulsoup4==4.12.3
+openai==1.35.10
diff --git a/examples/pipelines/demo-question-answering/ui/ui.py b/examples/pipelines/demo-question-answering/ui/ui.py
@@ -0,0 +1,180 @@
+# Copyright © 2024 Pathway
+
+import logging
+import os
+
+import requests
+import streamlit as st
+from dotenv import load_dotenv
+from pathway.xpacks.llm.question_answering import RAGClient
+
+load_dotenv()
+
+PATHWAY_HOST = os.environ.get("PATHWAY_HOST", "app")
+PATHWAY_PORT = os.environ.get("PATHWAY_PORT", 8000)
+
+st.set_page_config(page_title="Pathway RAG App", page_icon="favicon.ico")
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(name)s %(levelname)s %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    force=True,
+)
+
+logger = logging.getLogger("streamlit")
+logger.setLevel(logging.INFO)
+
+conn = RAGClient(url=f"http://{PATHWAY_HOST}:{PATHWAY_PORT}")
+
+note = """
+<H4><b>Ask a question"""
+st.markdown(note, unsafe_allow_html=True)
+
+st.markdown(
+    """
+<style>
+div[data-baseweb="base-input"]{
+}
+input[class]{
+font-size:150%;
+color: black;}
+button[data-testid="baseButton-primary"], button[data-testid="baseButton-secondary"]{
+    border: none;
+    display: flex;
+    background-color: #E7E7E7;
+    color: #454545;
+    transition: color 0.3s;
+}
+button[data-testid="baseButton-primary"]:hover{
+    color: #1C1CF0;
+    background-color: rgba(28,28,240,0.3);
+}
+button[data-testid="baseButton-secondary"]:hover{
+    color: #DC280B;
+    background-color: rgba(220,40,11,0.3);
+}
+div[data-testid="stHorizontalBlock"]:has(button[data-testid="baseButton-primary"]){
+    display: flex;
+    flex-direction: column;
+    z-index: 0;
+    width: 3rem;
+
+    transform: translateY(-500px) translateX(672px);
+}
+</style>
+""",
+    unsafe_allow_html=True,
+)
+
+
+question = st.text_input(label="", placeholder="Ask your question?")
+
+
+def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
+    """Get all available options in a specific metadata key."""
+    options = set(map(lambda x: x[opt_key], metadata_list))
+    return list(options)
+
+
+logger.info("Requesting pw_list_documents...")
+document_meta_list = conn.pw_list_documents(keys=[])
+logger.info("Received response pw_list_documents")
+
+st.session_state["document_meta_list"] = document_meta_list
+
+available_files = get_options_list(st.session_state["document_meta_list"], "path")
+
+
+with st.sidebar:
+    st.info(
+        body="See the source code [here](https://github.com/pathwaycom/llm-app/tree/main/examples/pipelines/demo-question-answering).",  # noqa: E501
+        icon=":material/code:",
+    )
+
+    file_names = [i.split("/")[-1] for i in available_files]
+
+    markdown_table = "| Indexed files |\n| --- |\n"
+    for file_name in file_names:
+        markdown_table += f"| {file_name} |\n"
+    st.markdown(markdown_table, unsafe_allow_html=True)
+
+    st.button("⟳ Refresh", use_container_width=True)
+
+css = """
+<style>
+.slider-container {
+    margin-top: 20px; /* Add some space between the main image and the slider */
+}
+
+.slider-item {
+    float: left;
+    margin: 10px;
+    width: 120px; /* Adjust the width to your liking */
+    // height: 50px; /* Adjust the height to your liking */
+    border: 1px solid #ccc;
+    border-radius: 5px;
+    cursor: pointer;
+}
+
+.slider-item img {
+    width: 100%;
+    height: 100%;
+    object-fit: cover;
+    border-radius: 5px;
+}
+
+.slider-wrapper {
+    display: flex;
+    justify-content: center;
+    flex-wrap: wrap;
+}
+
+.slider-item {
+    margin: 10px;
+}
+
+</style>"""
+
+
+st.markdown(css, unsafe_allow_html=True)
+
+
+def send_post_request(
+    url: str, data: dict, headers: dict = {}, timeout: int | None = None
+):
+    response = requests.post(url, json=data, headers=headers, timeout=timeout)
+    response.raise_for_status()
+    return response.json()
+
+
+if question:
+    logger.info(
+        {
+            "_type": "search_request_event",
+            "query": question,
+        }
+    )
+
+    api_url = f"http://{PATHWAY_HOST}:{PATHWAY_PORT}/v1/pw_ai_answer"
+    payload = {
+        "prompt": question,
+        "response_type": "long",
+    }
+    with st.spinner("Retrieving response..."):
+        response = send_post_request(api_url, payload)
+
+    # response = conn.pw_ai_answer(question)
+
+    logger.info(
+        {
+            "_type": "search_response_event",
+            "query": question,
+            "response": type(response),
+        }
+    )
+
+    logger.info(type(response))
+
+    st.markdown(f"**Answering question:** {question}")
+    st.markdown(f"""{response}""")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,6 +11,4 @@ RUN pip install -U --no-cache-dir -r requirements.txt

		COPY . .

		EXPOSE 8000

		CMD ["python", "app.py"]