Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG Fix Lumina charging] #3903

Merged
merged 1 commit into from
Dec 26, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 41 additions & 31 deletions one_fm/wiki_chat_bot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@
import json

import frappe
from llama_index.core import SimpleDirectoryReader,VectorStoreIndex,PromptTemplate,GPTListIndex,StorageContext, load_index_from_storage
from llama_index.core import SimpleDirectoryReader,Document,VectorStoreIndex,PromptTemplate,GPTListIndex,StorageContext, load_index_from_storage
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.schema import TextNode
from langchain.text_splitter import RecursiveCharacterTextSplitter


from one_fm.api.v1.utils import response

def split_text_into_chunks(text, chunk_size=4096):
def split_text_into_chunks(text, chunk_size=2096):
#This method was creeated to mitigate maxtoken errors
splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=200)
return splitter.split_text(text)
Expand All @@ -20,29 +19,42 @@ def create_vector_index():
try:
os.environ["OPENAI_API_KEY"] = frappe.local.conf.CHATGPT_APIKEY
existing_text_nodes,new_text_nodes = [],[]
embedding_model = OpenAIEmbedding(model_name="gpt-4o-mini")
# Load existing data and append to nodelist
existing_docs = SimpleDirectoryReader("vector_index").load_data()

for doc in existing_docs:
chunks = split_text_into_chunks(doc.text)
existing_text_nodes.extend([TextNode(text=chunk) for chunk in chunks])
directory_path = "vector_index"
if os.path.exists(directory_path):
if os.listdir(directory_path):
existing_docs = SimpleDirectoryReader("vector_index").load_data()
storage_context = StorageContext.from_defaults(persist_dir="vector_index")
vector_index_ = load_index_from_storage(storage_context)
for doc in existing_docs:
chunks = split_text_into_chunks(doc.text)
existing_text_nodes.extend([TextNode(text=chunk) for chunk in chunks])
else:
os.mkdir(directory_path)

# Load new data
new_docs = SimpleDirectoryReader(get_folder_path()).load_data()
# new_text_nodes = [TextNode(text=doc.text) for doc in new_docs]

for doc in new_docs:
#Split the texts so we don't go over the max token value of the model
chunks = split_text_into_chunks(doc.text)
new_text_nodes.extend([TextNode(text=chunk) for chunk in chunks])
# Merge existing and new vector indexes
merged_nodes = existing_text_nodes+new_text_nodes
merged_vector_index = VectorStoreIndex(nodes= merged_nodes,embedding =embedding_model)
combined_text = "\n".join([node.text for node in new_text_nodes])
# Create a Document object
new_document = Document(text=combined_text)
if not os.listdir(directory_path):
vector_index_ = VectorStoreIndex.from_documents(new_docs)
else:
vector_index_.insert(new_document)
# merged_nodes = existing_text_nodes+new_text_nodes
# merged_vector_index = VectorStoreIndex(nodes= merged_nodes,embedding =embedding_model)

# Persist the merged vector index
merged_vector_index.storage_context.persist(persist_dir="vector_index")
# merged_vector_index.storage_context.persist(persist_dir="vector_index")
vector_index_.storage_context.persist(persist_dir=directory_path)

return merged_vector_index
return vector_index_
except:
frappe.log_error(frappe.get_traceback(), "Error while adding to bot memory(Chat-BOT)")

Expand All @@ -53,38 +65,36 @@ def create_vector_index():
def ask_question(question: str = None):
try:
os.environ["OPENAI_API_KEY"] = frappe.local.conf.CHATGPT_APIKEY
if not question:
if not question.strip():
return response("Bad Request !", 400, error="Question can not be empty")
storage_context = StorageContext.from_defaults(persist_dir="vector_index")
index = load_index_from_storage(storage_context)

prompt_template_str = (
"Context information is below.\n"
"You are Lumina, an AI assistant working for One Facilities Management, a company headquartered in Kuwait. "
"You always respond to your name when addressed and provide assistance accordingly. "
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the context information and not prior knowledge, "
"You are an AI assistant called Lumina.\n"
"You do not need to introduce yourself or say who you are when you are not asked directly\n"
"You Work for One Faciities Management, A company with it's Headquarters in Kuwait\n"
"Whenever Lumina does not find the required data,ask the user to upload the most updated data to enable you answer the question appropriately\n"
"Query: {query_str}\n"
"Answer: "
)

refine_prompt_str = (
"We have the opportunity to refine the original answer "
"(only if needed) with some more context below.\n"
"------------\n"
"You should always respond in the same language as the query string even if the context is a different language \n"

"Given the new context, refine the original answer to better "
"answer the question: {query_str}. "

"Original Answer: {existing_answer}"
)
"As Lumina, the AI assistant for One Facilities Management, refine the original answer with the additional context below, "
"ensuring you respond to your name when addressed and maintain consistency in your responses.\n"
"------------\n"
"{context_str}\n"
"------------\n"
"Original Answer: {existing_answer}\n"
"Refined Answer: "
)

text_qa_template = PromptTemplate(prompt_template_str)
refined_text_qa_template = PromptTemplate(refine_prompt_str)
llm = OpenAI(model="gpt-4o-mini")
llm = OpenAI(model="gpt-4o-mini-2024-07-18")
query_engine = index.as_query_engine(llm=llm,text_qa_template=text_qa_template,refine_template=refined_text_qa_template)
answer = query_engine.query(question)
return response(message="Success", status_code=200, data={"question": question, "answer": answer.response})
Expand Down
Loading