-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathllm_interface.py
69 lines (54 loc) · 2.46 KB
/
llm_interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import chromadb
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext, SimpleDirectoryReader
llm = Ollama(model='llama2', request_timeout=300.0)
print(llm.metadata)
def stream_response(prompt):
"""
Streams the completion of a prompt using a large language model (LLM).
Args:
- prompt (str): The prompt text to be completed by the language model.
Returns:
- delta (float): The delta value associated with each response generated
by the language model in response to the provided prompt.
Notes:
- This function streams the completion of a given prompt using a language
model (LLM) and yields the delta values associated with each response
generated by the model.
- The delta value represents a measure of difference or change associated
with each response.
- A small delay of 0.02 seconds is introduced between each iteration to
control the streaming rate.
Example:
>>> for delta in stream_response("Generate a summary for a given text"):
>>> print(delta)
"""
response = llm.stream_complete(prompt)
for token in response:
yield token.delta
def search_pdf(query):
"""
Function to search for a given query in a collection of PDF documents
and return the response generator.
Args:
- query (str): The query string to search for in the PDF documents.
Returns:
- response (str): A response generator containing the search results.
"""
documents = SimpleDirectoryReader("data/").load_data()
# Create Chroma DB client and store
client = chromadb.PersistentClient(path="./chroma_db_data")
chroma_collection = client.get_or_create_collection(name="reviews")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# Initialize ServiceContext
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
# Create VectorStoreIndex and query engine
index = VectorStoreIndex.from_documents(documents,
service_context=service_context,
storage_context=storage_context)
query_engine = index.as_query_engine(streaming=True)
# Query
response = query_engine.query(query)
return response.response_gen