-
Notifications
You must be signed in to change notification settings - Fork 1
/
customized_tools.py
166 lines (139 loc) · 6.04 KB
/
customized_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
""" This is a file for custom tools that you can use in the LLM agent
"""
import os
import psycopg2
import openai
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import AzureOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.tools import tool
from src.docsearch.docsearch import (
docsearch_create_indexes_from_files,
docsearch_query_indexes,
)
from src.langchain_summary import produce_meta_summary, summarise_articles
from src.newsearch.refinitiv_query import (
create_rkd_base_header,
parse_freetext_headlines,
parse_news_stories_texts,
retrieve_freetext_headlines,
retrieve_news_stories,
)
# load environment variables
load_dotenv()
# set global variables
RKD_USERNAME = os.getenv("REFINITIV_USERNAME")
RKD_PASSWORD = os.getenv("REFINITIV_PASSWORD")
RKD_APP_ID = os.getenv("REFINITIV_APP_ID")
# openai configuration
openai.api_key = os.getenv('AZURE_OPENAI_API_KEY')
openai.api_version = os.getenv('AZURE_OPENAI_API_VERSION')
openai.api_type = os.getenv('AZURE_OPENAI_API_TYPE')
openai.api_base = os.getenv('AZURE_OPENAI_API_BASE')
# Create an instance of Azure OpenAI; find completions is faster than chat
CHAT_LLM = AzureOpenAI(
deployment_name="text-davinci-003",
model_name="text-davinci-003",
temperature=0,
best_of=1,
)
TEXT_SPLITTER = RecursiveCharacterTextSplitter(chunk_size=7_000, chunk_overlap=400)
# Create database connection
host = os.getenv('PG_HOST')
dbname = os.getenv('PG_DB_NAME')
user = os.getenv('PG_USER')
password = os.getenv('PG_PASSWORD')
sslmode = os.getenv('PG_SSLMODE')
# Construct connection string
conn_string = f"host={host} user={user} dbname={dbname} password={password} sslmode={sslmode}"
conn = psycopg2.connect(conn_string)
@tool("Refinitiv freetext news search summary tool", return_direct=True)
def refinitiv_freetext_news_summary_tool(input: str) -> str:
"""
Queries the Refinitiv News API for news articles related to the free text input
which have happened in the last num_weeks_ago.
Then summarises the news articles and returns the summary of enriched headlines.
"""
base_header = create_rkd_base_header(RKD_USERNAME, RKD_PASSWORD, RKD_APP_ID)
# freetext headline search; set last_n_weeks as 2; queries both headline and body
# for english text (Refinitiv is better for English than Chinese queries).
freetext_results = retrieve_freetext_headlines(base_header, input, 2, "both", "EN")
freetext_news_articles = parse_freetext_headlines(freetext_results)
# load full news stories related to those headlines
news_stories = retrieve_news_stories(
base_header, [article.id for article in freetext_news_articles]
)
news_stories_texts = parse_news_stories_texts(news_stories)
# summarise the news stories
article_summaries = summarise_articles(
chat_llm=CHAT_LLM,
text_splitter=TEXT_SPLITTER,
article_headlines=[a.headline for a in freetext_news_articles],
article_texts=news_stories_texts,
)
# produce meta summary
meta_summary = produce_meta_summary(CHAT_LLM, TEXT_SPLITTER, article_summaries)
return meta_summary
@tool("document question answering", return_direct=True)
def document_question_answering(input: str) -> str:
"""
Answers questions related to HSBC knowledge documents and gives answers
from HSBC's perspective on topics.
"""
# initialise docsearch variables
NUM_DIMENSIONS = 1536
EMBEDDINGS_MODEL = OpenAIEmbeddings(model="text-embedding-ada-002")
ENDPOINT = os.getenv("FORM_RECOGNISER_ENDPOINT")
CREDENTIAL = AzureKeyCredential(os.getenv("FORM_RECOGNISER_KEY"))
DOC_ANALYSIS_CLIENT = DocumentAnalysisClient(ENDPOINT, CREDENTIAL)
# text splitter with smaller chunk size because docs are larger
TEXT_SPLITTER = RecursiveCharacterTextSplitter(chunk_size=3_000, chunk_overlap=300)
# TODO: Right now loads from a sample directory; need to find a way to load
# from a vector database or otherwise? would this be pre-loaded??
# Then can replace FAISS lookup for sample documents.
FILES_DIR = "./data/pdf_img_samples/"
uploaded_files = [os.path.join(FILES_DIR, f) for f in os.listdir(FILES_DIR)]
# create faiss index and index_doc_store
faiss_index, index_doc_store = docsearch_create_indexes_from_files(
NUM_DIMENSIONS,
uploaded_files,
DOC_ANALYSIS_CLIENT,
EMBEDDINGS_MODEL,
TEXT_SPLITTER,
)
# query faiss index
result = docsearch_query_indexes(
input, faiss_index, index_doc_store, EMBEDDINGS_MODEL, CHAT_LLM
)
return result
@tool("hsbc knowledge search tool")
def hsbc_knowledge_tool_pgvector(input: str) -> str:
"""useful for when you need to answer questions about hsbc related knowledge"""
try:
# get embedding from input
response = openai.Embedding.create(input=input, engine="text-embedding-ada-002")
embeddings = response['data'][0]['embedding']
# create cursor
cur = conn.cursor()
# execute query
cur.execute(f"SELECT content FROM hsbc_homepage_content ORDER BY embedding <-> '{embeddings}' LIMIT 1;")
# retrieve records
records = cur.fetchall()
# close cursor
cur.close()
# return answer
return records[0][0]
except Exception as e:
print(e)
return "Sorry, I don't understand your question. Please try again."
@tool("reject tool", return_direct=True)
def reject_tool(input: str) -> str:
# LLM agent sometimes will not reject question not related to HSBC, hence adding this tools to stop the thought/action process
"""useful for when you need to answer questions not related to HSBC"""
return """
I'm sorry, but as a customer service chatbot for HSBC Hongkong, I am only able to assist with questions related to HSBC Hongkong products and services.
Is there anything else related to HSBC Hongkong that I can help you with?
"""