-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
125 additions
and
200 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,16 @@ | ||
from langchain.chains import ConversationalRetrievalChain | ||
from app.chat.models import ChatArgs | ||
|
||
from app.chat.vector_stores.pinecone import build_retriever | ||
from app.chat.llms.chatopenai import build_llm | ||
from app.chat.memories.sql_memory import build_memory | ||
|
||
def build_chat(chat_args: ChatArgs): | ||
""" | ||
:param chat_args: ChatArgs object containing | ||
conversation_id, pdf_id, metadata, and streaming flag. | ||
:return: A chain | ||
Example Usage: | ||
chain = build_chat(chat_args) | ||
""" | ||
retriever = build_retriever(chat_args) | ||
llm = build_llm(chat_args) | ||
memory = build_memory(chat_args) | ||
|
||
pass | ||
return ConversationalRetrievalChain.from_llm( | ||
llm=llm, | ||
memory=memory, | ||
retriever=retriever | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,27 @@ | ||
def create_embeddings_for_pdf(pdf_id: str, pdf_path: str): | ||
""" | ||
Generate and store embeddings for the given pdf | ||
from langchain.document_loaders import PyPDFLoader | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from app.chat.vector_stores.pinecone import vector_store | ||
|
||
1. Extract text from the specified PDF. | ||
2. Divide the extracted text into manageable chunks. | ||
3. Generate an embedding for each chunk. | ||
4. Persist the generated embeddings. | ||
def create_embeddings_for_pdf(pdf_id: str, pdf_path: str): | ||
text_splitter = RecursiveCharacterTextSplitter( | ||
chunk_size=500, | ||
chunk_overlap=100 | ||
) | ||
|
||
:param pdf_id: The unique identifier for the PDF. | ||
:param pdf_path: The file path to the PDF. | ||
loader = PyPDFLoader(file_path=pdf_path) | ||
documents = loader.load_and_split(text_splitter) | ||
|
||
Example Usage: | ||
# for doc in documents: | ||
# try: | ||
# print(doc) | ||
# except UnicodeEncodeError: | ||
# print("UnicodeEncodeError") | ||
|
||
create_embeddings_for_pdf('123456', '/path/to/pdf') | ||
""" | ||
for doc in documents: | ||
doc.metadata = { | ||
"page": doc.metadata["page"], | ||
"text": doc.page_content, | ||
"pdf_id": pdf_id | ||
} | ||
|
||
pass | ||
vector_store.add_documents(documents) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from langchain.embeddings import OpenAIEmbeddings | ||
|
||
embeddings = OpenAIEmbeddings() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# from langchain.chains import ConversationalRetrievalChain | ||
# from langchain_openai import ChatOpenAI | ||
# from app.chat.vector_stores.pinecone import build_retriever | ||
# from app.chat.llms.chatopenai import build_llm | ||
# from app.chat.memories.sql_memory import build_memory | ||
|
||
# def build_llm(chat_args): | ||
# retriever = build_retriever(chat_args) | ||
# llm = build_llm(chat_args) | ||
# memory = build_memory(chat_args) | ||
# return ConversationalRetrievalChain.from_llm( | ||
# llm=llm, | ||
# retriever=retriever, | ||
# memory=memory | ||
# ) | ||
|
||
from langchain.chat_models import ChatOpenAI | ||
|
||
def build_llm(chat_args): | ||
return ChatOpenAI() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from pydantic import BaseModel | ||
from langchain.memory import ConversationBufferMemory | ||
from langchain.schema import BaseChatMessageHistory | ||
|
||
from app.web.api import( | ||
get_messages_by_conversation_id, | ||
add_message_to_conversation | ||
) | ||
|
||
class SqlMessageHistory(BaseChatMessageHistory, BaseModel): | ||
conversation_id: str | ||
@property | ||
def messages(self): | ||
return get_messages_by_conversation_id(self.conversation_id) | ||
|
||
def add_message(self, message): | ||
return add_message_to_conversation( | ||
conversation_id = self.conversation_id, | ||
role = message.type, | ||
content = message.content | ||
) | ||
|
||
def clear(self): | ||
pass | ||
|
||
def build_memory(chat_args): | ||
return ConversationBufferMemory( | ||
chat_memory=SqlMessageHistory( | ||
conversation_id=chat_args.conversation_id | ||
), | ||
return_messages=True, | ||
memory_key="chat_history", | ||
output_key="answer" | ||
) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import os | ||
import pinecone | ||
from langchain.vectorstores.pinecone import Pinecone | ||
from app.chat.embeddings.openai import embeddings | ||
|
||
pinecone.Pinecone( | ||
api_key=os.getenv("PINECONE_API_KEY"), | ||
environment=os.getenv("PINECONE_ENV_NAME") | ||
) | ||
|
||
vector_store = Pinecone.from_existing_index( | ||
os.getenv("PINECONE_INDEX_NAME"), embeddings | ||
) | ||
|
||
def build_retriever(chat_args): | ||
search_kwargs = {"filter": { "pdf_id": chat_args.pdf_id }} | ||
return vector_store.as_retriever( | ||
search_kwargs=search_kwargs | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.