-
Notifications
You must be signed in to change notification settings - Fork 0
/
search.py
49 lines (39 loc) · 1.46 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import requests
import ollama
import chromadb
import os
VECTOR_DB_PATH = "./data/"
URL_COLLECTION_NAME = "documents_index"
MAX_TOKENS = 4096 # https://inference.readthedocs.io/en/latest/models/builtin/llm/tiny-llama.html
MAX_EMBEDDING_RESULTS = 1
ANSWER_MODEL = "tinyllama:latest" # use llama2 for better
EMBEDDING_MODEL = "all-minilm:latest" # use mxbai-embed-large for better results
print("API Server:" + os.environ["OLLAMA_HOST"])
client = chromadb.PersistentClient(path=VECTOR_DB_PATH)
def search_index(query):
print("Query:"+query)
response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=query)
embedding = response["embedding"]
collection = client.get_or_create_collection(name=URL_COLLECTION_NAME)
results = collection.query(query_embeddings=[embedding],n_results=MAX_EMBEDDING_RESULTS)
# print(results)
data = ""
for data_part in results['documents'][0]:
data = data + data_part[0]
return data
def answer(data, query):
output = ollama.generate(
model=ANSWER_MODEL,
prompt=f"Using this data: {data}. Respond to this prompt: {query}"
)
print("===============================================")
print(output['response'])
print("===============================================")
def main(query):
data = search_index(query)
# print("data:"+ data)
answer(data, query)
if __name__ == '__main__':
# query = input("Ask:")
query = "Who can apply for admission?"
main(query)