-
Notifications
You must be signed in to change notification settings - Fork 38
/
pdf-rag-streamlit.py
160 lines (128 loc) · 4.82 KB
/
pdf-rag-streamlit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# app.py
import streamlit as st
import os
import logging
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
import ollama
# Configure logging
logging.basicConfig(level=logging.INFO)
# Constants
DOC_PATH = "./data/BOI.pdf"
MODEL_NAME = "llama3.2"
EMBEDDING_MODEL = "nomic-embed-text"
VECTOR_STORE_NAME = "simple-rag"
PERSIST_DIRECTORY = "./chroma_db"
def ingest_pdf(doc_path):
"""Load PDF documents."""
if os.path.exists(doc_path):
loader = UnstructuredPDFLoader(file_path=doc_path)
data = loader.load()
logging.info("PDF loaded successfully.")
return data
else:
logging.error(f"PDF file not found at path: {doc_path}")
st.error("PDF file not found.")
return None
def split_documents(documents):
"""Split documents into smaller chunks."""
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=300)
chunks = text_splitter.split_documents(documents)
logging.info("Documents split into chunks.")
return chunks
@st.cache_resource
def load_vector_db():
"""Load or create the vector database."""
# Pull the embedding model if not already available
ollama.pull(EMBEDDING_MODEL)
embedding = OllamaEmbeddings(model=EMBEDDING_MODEL)
if os.path.exists(PERSIST_DIRECTORY):
vector_db = Chroma(
embedding_function=embedding,
collection_name=VECTOR_STORE_NAME,
persist_directory=PERSIST_DIRECTORY,
)
logging.info("Loaded existing vector database.")
else:
# Load and process the PDF document
data = ingest_pdf(DOC_PATH)
if data is None:
return None
# Split the documents into chunks
chunks = split_documents(data)
vector_db = Chroma.from_documents(
documents=chunks,
embedding=embedding,
collection_name=VECTOR_STORE_NAME,
persist_directory=PERSIST_DIRECTORY,
)
vector_db.persist()
logging.info("Vector database created and persisted.")
return vector_db
def create_retriever(vector_db, llm):
"""Create a multi-query retriever."""
QUERY_PROMPT = PromptTemplate(
input_variables=["question"],
template="""You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from
a vector database. By generating multiple perspectives on the user question, your
goal is to help the user overcome some of the limitations of the distance-based
similarity search. Provide these alternative questions separated by newlines.
Original question: {question}""",
)
retriever = MultiQueryRetriever.from_llm(
vector_db.as_retriever(), llm, prompt=QUERY_PROMPT
)
logging.info("Retriever created.")
return retriever
def create_chain(retriever, llm):
"""Create the chain with preserved syntax."""
# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
logging.info("Chain created with preserved syntax.")
return chain
def main():
st.title("Document Assistant")
# User input
user_input = st.text_input("Enter your question:", "")
if user_input:
with st.spinner("Generating response..."):
try:
# Initialize the language model
llm = ChatOllama(model=MODEL_NAME)
# Load the vector database
vector_db = load_vector_db()
if vector_db is None:
st.error("Failed to load or create the vector database.")
return
# Create the retriever
retriever = create_retriever(vector_db, llm)
# Create the chain
chain = create_chain(retriever, llm)
# Get the response
response = chain.invoke(input=user_input)
st.markdown("**Assistant:**")
st.write(response)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
else:
st.info("Please enter a question to get started.")
if __name__ == "__main__":
main()