This repository has been archived by the owner on Mar 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 90
/
chat2.py
120 lines (101 loc) · 4.47 KB
/
chat2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import openai
import json
import numpy as np
from numpy.linalg import norm
import re
from time import time,sleep
from uuid import uuid4
import datetime
import pinecone
def open_file(filepath):
with open(filepath, 'r', encoding='utf-8') as infile:
return infile.read()
def save_file(filepath, content):
with open(filepath, 'w', encoding='utf-8') as outfile:
outfile.write(content)
def load_json(filepath):
with open(filepath, 'r', encoding='utf-8') as infile:
return json.load(infile)
def save_json(filepath, payload):
with open(filepath, 'w', encoding='utf-8') as outfile:
json.dump(payload, outfile, ensure_ascii=False, sort_keys=True, indent=2)
def timestamp_to_datetime(unix_time):
return datetime.datetime.fromtimestamp(unix_time).strftime("%A, %B %d, %Y at %I:%M%p %Z")
def gpt3_embedding(content, engine='text-embedding-ada-002'):
content = content.encode(encoding='ASCII',errors='ignore').decode() # fix any UNICODE errors
response = openai.Embedding.create(input=content,engine=engine)
vector = response['data'][0]['embedding'] # this is a normal list
return vector
def gpt3_completion(prompt, engine='text-davinci-003', temp=0.0, top_p=1.0, tokens=400, freq_pen=0.0, pres_pen=0.0, stop=['USER:', 'RAVEN:']):
max_retry = 5
retry = 0
prompt = prompt.encode(encoding='ASCII',errors='ignore').decode()
while True:
try:
response = openai.Completion.create(
engine=engine,
prompt=prompt,
temperature=temp,
max_tokens=tokens,
top_p=top_p,
frequency_penalty=freq_pen,
presence_penalty=pres_pen,
stop=stop)
text = response['choices'][0]['text'].strip()
text = re.sub('[\r\n]+', '\n', text)
text = re.sub('[\t ]+', ' ', text)
filename = '%s_gpt3.txt' % time()
if not os.path.exists('gpt3_logs'):
os.makedirs('gpt3_logs')
save_file('gpt3_logs/%s' % filename, prompt + '\n\n==========\n\n' + text)
return text
except Exception as oops:
retry += 1
if retry >= max_retry:
return "GPT3 error: %s" % oops
print('Error communicating with OpenAI:', oops)
sleep(1)
def load_conversation(results):
result = list()
for m in results['matches']:
info = load_json('nexus/%s.json' % m['id'])
result.append(info)
ordered = sorted(result, key=lambda d: d['time'], reverse=False) # sort them all chronologically
messages = [i['message'] for i in ordered]
return '\n'.join(messages).strip()
if __name__ == '__main__':
convo_length = 30
openai.api_key = open_file('key_openai.txt')
pinecone.init(api_key=open_file('key_pinecone.txt'), environment='us-east1-gcp')
vdb = pinecone.Index("raven-mvp")
while True:
#### get user input, save it, vectorize it, save to pinecone
payload = list()
a = input('\n\nUSER: ')
timestamp = time()
timestring = timestamp_to_datetime(timestamp)
#message = '%s: %s - %s' % ('USER', timestring, a)
message = a
vector = gpt3_embedding(message)
unique_id = str(uuid4())
metadata = {'speaker': 'USER', 'time': timestamp, 'message': message, 'timestring': timestring, 'uuid': unique_id}
save_json('nexus/%s.json' % unique_id, metadata)
payload.append((unique_id, vector))
#### search for relevant messages, and generate a response
results = vdb.query(vector=vector, top_k=convo_length)
conversation = load_conversation(results) # results should be a DICT with 'matches' which is a LIST of DICTS, with 'id'
prompt = open_file('prompt_response.txt').replace('<<CONVERSATION>>', conversation).replace('<<MESSAGE>>', a)
#### generate response, vectorize, save, etc
output = gpt3_completion(prompt)
timestamp = time()
timestring = timestamp_to_datetime(timestamp)
#message = '%s: %s - %s' % ('RAVEN', timestring, output)
message = output
vector = gpt3_embedding(message)
unique_id = str(uuid4())
metadata = {'speaker': 'RAVEN', 'time': timestamp, 'message': message, 'timestring': timestring, 'uuid': unique_id}
save_json('nexus/%s.json' % unique_id, metadata)
payload.append((unique_id, vector))
vdb.upsert(payload)
print('\n\nRAVEN: %s' % output)