Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Ketan-K17 committed Jul 7, 2024
1 parent 53cbe8e commit 4048d6f
Show file tree
Hide file tree
Showing 16 changed files with 125 additions and 200 deletions.
24 changes: 12 additions & 12 deletions app/chat/chat.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from langchain.chains import ConversationalRetrievalChain
from app.chat.models import ChatArgs

from app.chat.vector_stores.pinecone import build_retriever
from app.chat.llms.chatopenai import build_llm
from app.chat.memories.sql_memory import build_memory

def build_chat(chat_args: ChatArgs):
"""
:param chat_args: ChatArgs object containing
conversation_id, pdf_id, metadata, and streaming flag.
:return: A chain
Example Usage:
chain = build_chat(chat_args)
"""
retriever = build_retriever(chat_args)
llm = build_llm(chat_args)
memory = build_memory(chat_args)

pass
return ConversationalRetrievalChain.from_llm(
llm=llm,
memory=memory,
retriever=retriever
)
35 changes: 22 additions & 13 deletions app/chat/create_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
def create_embeddings_for_pdf(pdf_id: str, pdf_path: str):
"""
Generate and store embeddings for the given pdf
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from app.chat.vector_stores.pinecone import vector_store

1. Extract text from the specified PDF.
2. Divide the extracted text into manageable chunks.
3. Generate an embedding for each chunk.
4. Persist the generated embeddings.
def create_embeddings_for_pdf(pdf_id: str, pdf_path: str):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)

:param pdf_id: The unique identifier for the PDF.
:param pdf_path: The file path to the PDF.
loader = PyPDFLoader(file_path=pdf_path)
documents = loader.load_and_split(text_splitter)

Example Usage:
# for doc in documents:
# try:
# print(doc)
# except UnicodeEncodeError:
# print("UnicodeEncodeError")

create_embeddings_for_pdf('123456', '/path/to/pdf')
"""
for doc in documents:
doc.metadata = {
"page": doc.metadata["page"],
"text": doc.page_content,
"pdf_id": pdf_id
}

pass
vector_store.add_documents(documents)
Empty file added app/chat/embeddings/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions app/chat/embeddings/openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
20 changes: 20 additions & 0 deletions app/chat/llms/chatopenai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# from langchain.chains import ConversationalRetrievalChain
# from langchain_openai import ChatOpenAI
# from app.chat.vector_stores.pinecone import build_retriever
# from app.chat.llms.chatopenai import build_llm
# from app.chat.memories.sql_memory import build_memory

# def build_llm(chat_args):
# retriever = build_retriever(chat_args)
# llm = build_llm(chat_args)
# memory = build_memory(chat_args)
# return ConversationalRetrievalChain.from_llm(
# llm=llm,
# retriever=retriever,
# memory=memory
# )

from langchain.chat_models import ChatOpenAI

def build_llm(chat_args):
return ChatOpenAI()
Empty file added app/chat/memories/__init__.py
Empty file.
34 changes: 34 additions & 0 deletions app/chat/memories/sql_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pydantic import BaseModel
from langchain.memory import ConversationBufferMemory
from langchain.schema import BaseChatMessageHistory

from app.web.api import(
get_messages_by_conversation_id,
add_message_to_conversation
)

class SqlMessageHistory(BaseChatMessageHistory, BaseModel):
conversation_id: str
@property
def messages(self):
return get_messages_by_conversation_id(self.conversation_id)

def add_message(self, message):
return add_message_to_conversation(
conversation_id = self.conversation_id,
role = message.type,
content = message.content
)

def clear(self):
pass

def build_memory(chat_args):
return ConversationBufferMemory(
chat_memory=SqlMessageHistory(
conversation_id=chat_args.conversation_id
),
return_messages=True,
memory_key="chat_history",
output_key="answer"
)
Empty file.
19 changes: 19 additions & 0 deletions app/chat/vector_stores/pinecone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import pinecone
from langchain.vectorstores.pinecone import Pinecone
from app.chat.embeddings.openai import embeddings

pinecone.Pinecone(
api_key=os.getenv("PINECONE_API_KEY"),
environment=os.getenv("PINECONE_ENV_NAME")
)

vector_store = Pinecone.from_existing_index(
os.getenv("PINECONE_INDEX_NAME"), embeddings
)

def build_retriever(chat_args):
search_kwargs = {"filter": { "pdf_id": chat_args.pdf_id }}
return vector_store.as_retriever(
search_kwargs=search_kwargs
)
5 changes: 2 additions & 3 deletions app/web/views/pdf_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,13 @@ def upload_file(file_id, file_path, file_name):

pdf = Pdf.create(id=file_id, name=file_name, user_id=g.user.id)

# TODO: Defer this to be processed by the worker
process_document(pdf.id)
process_document.delay(pdf.id)

return pdf.as_dict()


@bp.route("/<string:pdf_id>", methods=["GET"])
@login_required
@login_required
@load_model(Pdf)
def show(pdf):
return jsonify(
Expand Down
13 changes: 2 additions & 11 deletions client/src/components/auth/AuthLinks.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,6 @@
{#if $auth.user}
<a
class="font-medium text-gray-600 hover:text-gray-400 dark:text-gray-400 dark:hover:text-gray-500"
href="/auth/signout">Sign Out</a
>
href="/auth/signout">Sign Out</a>
{:else}
<a
class="font-medium text-gray-600 hover:text-gray-400 dark:text-gray-400 dark:hover:text-gray-500"
href="/auth/signin">Login</a
>
<a
class="font-medium text-gray-600 hover:text-gray-400 dark:text-gray-400 dark:hover:text-gray-500"
href="/auth/signup">Sign Up</a
>
{/if}
{/if}
11 changes: 11 additions & 0 deletions client/src/routes/+layout.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,14 @@
<Navbar />
<slot />
</div>

<script context="module" lang="ts">
import { auth } from '$s/auth';
import { redirect } from '@sveltejs/kit';
export const load = async ({ session }) => {
if (!session.user) {
throw redirect(302, '/documents');
}
};
</script>
59 changes: 0 additions & 59 deletions client/src/routes/auth/signin/+page.svelte

This file was deleted.

35 changes: 0 additions & 35 deletions client/src/routes/auth/signout/+page.svelte

This file was deleted.

67 changes: 0 additions & 67 deletions client/src/routes/auth/signup/+page.svelte

This file was deleted.

Binary file added spice.pdf
Binary file not shown.

0 comments on commit 4048d6f

Please sign in to comment.