Skip to content

Commit

Permalink
Merge pull request #5 from patham9/Evaluation
Browse files Browse the repository at this point in the history
Evaluation
  • Loading branch information
patham9 authored Apr 29, 2023
2 parents 43d30ca + 4ad89a4 commit 9ecc90f
Show file tree
Hide file tree
Showing 14 changed files with 1,396 additions and 77 deletions.
1 change: 1 addition & 0 deletions All.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Correct.json

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions ExportSentence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from NarsGPT import *
import json

Line_Input_Output_ExpectedOutput = []
Line = 1
while True:
try:
line = input()
except EOFError:
#print(NarsGPT_AddInput("where is the cat?"))
exit(0)
parts = ",".join(line.split(",")[1:]).split(",,,,,,")
Input, expectedOutput = parts
Input = Input.strip()
expectedOutput = expectedOutput.strip()
if expectedOutput != "":
if not Input.endswith("?"):
Input += "?"
actualOutput = NarsGPT_AddInput(Input)
Dic = {"Line": Line, "Input": Input, "actualOutput": actualOutput, "expectedOutput": expectedOutput}
Line_Input_Output_ExpectedOutput.append(Dic)
for k in Dic:
print(k+":", Dic[k])
print("\n")
filename = "OUT.json"
with open(filename, 'w') as f:
json.dump((Line_Input_Output_ExpectedOutput, currentTime), f)
Line += 1


1,158 changes: 1,158 additions & 0 deletions INT_Inf_benchmarkTest.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Incorrect.json

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions Judge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import json
import openai
import time
import sys
for x in sys.argv:
if x.startswith("API_KEY="):
openai.api_key = x.split("API_KEY=")[1]

with open("OUT.json") as json_file:
ListOfDicts, _ = json.load(json_file)

# {"Line": Line, "Input": Input, "actualOutput": actualOutput, "expectedOutput": expectedOutput}
Questions = []
for D in ListOfDicts:
Line = D["Line"]
Input = D["Input"]
actualOutput = D["actualOutput"]
expectedOutput = D["expectedOutput"]
if expectedOutput != "":
Questions.append(D)

with open("QUESTIONS.json", 'w') as f:
json.dump(Questions, f)

PROMPT = """Does the actual output contain the asked information answered in the expected output?
The question: _QUESTION_
The actual output: _ACTUAL_OUTPUT_
The expected output: _EXPECTED_OUTPUT_
Please answer yes/no only!"""

All = []
Correct = []
Incorrect = []
for D in Questions:
Line = D["Line"]
Input = D["Input"]
actualOutput = D["actualOutput"]
expectedOutput = D["expectedOutput"]
send_prompt = PROMPT.replace("_QUESTION_", Input).replace("_ACTUAL_OUTPUT_",actualOutput).replace("_EXPECTED_OUTPUT_",expectedOutput)
print(send_prompt)
while True:
try:
response = openai.ChatCompletion.create(model='gpt-3.5-turbo', messages=[ {"role": "user", "content": send_prompt}], max_tokens=200, temperature=0)
ret = response['choices'][0]['message']['content']
except:
print("Error: API call failed, will try repeating it in 10 seconds!")
time.sleep(10) #wait 10 seconds
continue
break
YES = "yes" in ret.lower()
D["Correct"] = YES
print("Correct?", YES)
if YES:
Correct.append(D)
else:
Incorrect.append(D)
All.append(D)
scores = {"Correct": len(Correct), "Incorrect": len(Incorrect), "Ratio" : float(len(Correct)) / float(len(All))}
print("So far:", scores)
with open("All.json", 'w') as f:
json.dump(All, f)
with open("Correct.json", 'w') as f:
json.dump(Correct, f)
with open("Incorrect.json", 'w') as f:
json.dump(Incorrect, f)
with open("scores.json", 'w') as f:
json.dump(scores, f)
127 changes: 70 additions & 57 deletions Memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
* THE SOFTWARE.
* """

from openai.embeddings_utils import get_embedding, cosine_similarity
from ast import literal_eval
from os.path import exists
import json
Expand All @@ -33,58 +34,82 @@
import NAR
os.chdir(cwd)
from Truth import *
import time

def RetrieveQuestionContent(memory, attention_buf, inp, max_LTM_retrievals=5):
def get_embedding_robust(inp):
while True:
try:
ret = get_embedding(inp)
except:
print("//Failed get embedding, will retry API call in 10s")
time.sleep(10)
continue
break
return ret

def ProductPrettify(term):
if " --> " in term and " * " in term.split(" --> ")[0]:
arg1 = term.split(" * ")[0].strip()
arg2 = term.split(" * ")[1].split(" --> ")[0].strip()
relarg = term.split(" --> ")[1].strip()
term = arg1 + " " + relarg + " " + arg2
return term.replace("(","").replace(")","")

def Term_AsSentence(T):
term = T[1:-1] if "<" in T else T
if "=/>" not in term:
term = ProductPrettify(term)
else:
if " =/> " in term:
prec_op = [ProductPrettify(p) for p in term.split(" =/> ")[0].split(" &/ ")]
removeParentheses = lambda u: u.replace(" --> ["," hasproperty ").replace(" --> "," isa ").replace(" - ", " and not ").replace("(",""). \
replace("<","").replace(")","").replace(">","").replace(" "," ").strip()
precs = removeParentheses(" and when then ".join(prec_op[:-1]))
op = prec_op[-1]
if " --> " in op:
op = removeParentheses(prec_op[-1].split(" --> ")[1] + " " + prec_op[-1].split(" --> ")[0]).replace("{SELF} *", "")
term = "When '" + precs + "' then '" + removeParentheses(op) + "' causes '" + removeParentheses(term.split(" =/> ")[1]) + "'"
term = term.replace(" --> [", " hasproperty ").replace("]","").replace("[","").replace(" --> ", " isa ").replace(" &/ ", " then ").replace(" =/> ", " causes ")
return term

def Term_Embedded(T):
return get_embedding_robust(Term_AsSentence(T).replace("-"," ").replace("_"," "))

def RetrieveQuestionContent(memory, attention_buf, inp, max_LTM_retrievals=30):
primed = {}
words = [x.strip().replace("?","") for x in inp.split(" ")]
for x in words:
n = Lemmatize(x, wordnet.NOUN)
v = Lemmatize(x, wordnet.VERB)
for m in list(memory.items()):
padded = lambda w: " " + w.replace(">"," ").replace("<"," ").replace("("," ").replace(")"," ") + " "
if padded(n) in padded(m[0][0]) or padded(v) in padded(m[0][0]):
if m not in attention_buf:
matchQuality = 2 if (padded(n) in padded(m[0][0]) and padded(v) in padded(m[0][0])) else 1
if m[0] not in primed:
primed[m[0]] = (matchQuality, m[1])
else:
primed[m[0]] = (primed[m[0]][0] + matchQuality, primed[m[0]][1])
qu_embed = get_embedding_robust(inp)
for m in list(memory.items()):
if m not in attention_buf:
matchQuality = cosine_similarity(qu_embed, m[1][3])
primed[m[0]] = (matchQuality, m[1])
primed = list(primed.items())
primed.sort(key=lambda x: (-x[1][0], -Truth_Expectation(x[1][1][2]))) #sort by query match first then by truth expectation
primed = primed[:max_LTM_retrievals]
#for m in primed:
# print("//Retrieved from LTM:", m)
# print("//Retrieved from LTM:", m[0], m[1][:-1])
primed = [(x[0],x[1][1]) for x in primed]
return list(reversed(primed))

def Memory_attention_buffer(memory, attention_buffer_size, inpQuestion = None):
attention_buf=[]
relevant_item_list = list(memory.items())
#relevant_item_list = list(memory.items())
#find attention_buffer_size/2 newest items:
relevant_item_list.sort(key=lambda x: -x[1][0])
attention_buf += reversed(relevant_item_list[0:int(attention_buffer_size/2)]) #newer comes later in prompt
#relevant_item_list.sort(key=lambda x: -x[1][0])
#attention_buf += reversed(relevant_item_list[0:int(attention_buffer_size/2)]) #newer comes later in prompt
#find additional attention_buffer_size/2 useful items which were not already part of the newest
relevant_item_list.sort(key=lambda x: -x[1][1])
for x in attention_buf:
if x in relevant_item_list:
relevant_item_list.remove(x) #so we won't select it as it is already part of mem
i = 0
while len(attention_buf) < attention_buffer_size and i < len(relevant_item_list):
attention_buf = [relevant_item_list[i]] + attention_buf
i += 1
#relevant_item_list.sort(key=lambda x: -x[1][1])
#for x in attention_buf:
# if x in relevant_item_list:
# relevant_item_list.remove(x) #so we won't select it as it is already part of mem
#i = 0
#while len(attention_buf) < attention_buffer_size and i < len(relevant_item_list):
# attention_buf = [relevant_item_list[i]] + attention_buf
# i += 1
#pull in question content that is not already included
if inpQuestion is not None:
attention_buf = RetrieveQuestionContent(memory, attention_buf, inpQuestion) + attention_buf
attention_buf = RetrieveQuestionContent(memory, attention_buf, inpQuestion) #+ attention_buf
return attention_buf

def ProductPrettify(term):
if " --> " in term and " * " in term.split(" --> ")[0]:
arg1 = term.split(" * ")[0].strip()
arg2 = term.split(" * ")[1].split(" --> ")[0].strip()
relarg = term.split(" --> ")[1].strip()
term = arg1 + " " + relarg + " " + arg2
return term.replace("(","").replace(")","")

def Memory_generate_prompt(currentTime, memory, prompt_start, prompt_end, attention_buffer_size, inpQuestion = None, TimeHandling = True):
prompt_memory = ""
buf = Memory_attention_buffer(memory, attention_buffer_size, inpQuestion)
Expand All @@ -110,20 +135,7 @@ def Memory_generate_prompt(currentTime, memory, prompt_start, prompt_end, attent
flags.append("Contradictory")
certainty = Truth_Expectation((f,c))
truthtype = '"' + " ".join(flags) + '"'
term = x[0][0][1:-1] if "<" in x[0][0] else x[0][0]
if "=/>" not in term:
term = ProductPrettify(term)
else:
if " =/> " in term:
prec_op = [ProductPrettify(p) for p in term.split(" =/> ")[0].split(" &/ ")]
removeParentheses = lambda u: u.replace(" --> ["," hasproperty ").replace(" --> "," isa ").replace(" - ", " and not ").replace("(",""). \
replace("<","").replace(")","").replace(">","").replace(" "," ").strip()
precs = removeParentheses(" and when then ".join(prec_op[:-1]))
op = prec_op[-1]
if " --> " in op:
op = removeParentheses(prec_op[-1].split(" --> ")[1] + " " + prec_op[-1].split(" --> ")[0]).replace("{SELF} *", "")
term = "When '" + precs + "' then '" + removeParentheses(op) + "' causes '" + removeParentheses(term.split(" =/> ")[1]) + "'"
term = term.replace(" --> [", " hasproperty ").replace("]","").replace("[","").replace(" --> ", " isa ").replace(" &/ ", " then ").replace(" =/> ", " causes ")
term = Term_AsSentence(x[0][0])
prompt_memory += f"i={i}: {term}. {timeterm}truthtype={truthtype} certainty={certainty}\n"
return buf, prompt_start + prompt_memory + prompt_end

Expand Down Expand Up @@ -153,7 +165,7 @@ def query(currentTime, memory, term, time):
return currentTime
if (term, time) not in retrieved and (term, time) in memory:
retrieved.add((term, time))
(_, _, (f, c)) = memory[(term, time)]
(_, _, (f, c), _) = memory[(term, time)]
if time == "eternal":
_, currentTime = ProcessInput(currentTime, memory, f"{term}. {{{f} {c}}}")
if time == currentTime:
Expand All @@ -162,7 +174,7 @@ def query(currentTime, memory, term, time):
parts = term.split("?1")
bestTerm, bestTruth, bestTime = (None, (0.0, 0.5), "eternal")
for (term2, time2) in memory:
(_, _, (f2, c2)) = memory[(term2, time2)]
(_, _, (f2, c2), _) = memory[(term2, time2)]
if time2 == time and term2.startswith(parts[0]) and term2.endswith(parts[1]):
if Truth_Expectation((f2, c2)) > Truth_Expectation((bestTruth[0], bestTruth[1])):
bestTerm = term2
Expand Down Expand Up @@ -206,11 +218,11 @@ def ProcessInput(currentTime, memory, inputforNAR, backups = ["input", "answers"
c2 = float(derivation["truth"]["confidence"])
usefulnessAddition = 1000000 if "Priority" not in derivation or derivation["Priority"] == 1.0 else 1
if (term, time) in memory:
(t, usefulness, (f, c)) = memory[(term, time)]
(t, usefulness, (f, c), embedding) = memory[(term, time)]
if c2 > c:
memory[(term, time)] = (currentTime, usefulness + usefulnessAddition, (f2, c2))
memory[(term, time)] = (currentTime, usefulness + usefulnessAddition, (f2, c2), embedding)
else:
memory[(term, time)] = (currentTime, usefulnessAddition, (f2, c2))
memory[(term, time)] = (currentTime, usefulnessAddition, (f2, c2), Term_Embedded(term))
if ">." in inputforNAR or "! :|:" in inputforNAR:
currentTime += 1
if inputforNAR.isdigit():
Expand All @@ -220,7 +232,7 @@ def ProcessInput(currentTime, memory, inputforNAR, backups = ["input", "answers"
relations = set(["isa", "are", "hasproperty"])
def Relation(inp, currentTime, memory, s, v, p, punctuation_tv):
global relations
if s.replace("_", " ") not in inp or p.replace("_", " ") not in inp:
if s.replace("_", " ") not in inp.replace(". "," ").replace("'","") or p.replace("_", " ") not in inp.replace(". "," ").replace("'",""):
#print("//!!!! filtered out", s, v, p)
return False, currentTime
s = Lemmatize(s, wordnet.NOUN)
Expand All @@ -238,7 +250,7 @@ def Relation(inp, currentTime, memory, s, v, p, punctuation_tv):
return True, currentTime

def Property(inp, currentTime, memory, s, p, punctuation_tv):
if s.replace("_", " ") not in inp or p.replace("_", " ") not in inp:
if s.replace("_", " ") not in inp.replace(". "," ").replace("'","") or p.replace("_", " ") not in inp.replace(". "," ").replace("'",""):
#print("//!!!! filtered out", s, "hasproperty", p)
return False, currentTime
s = Lemmatize(s, wordnet.NOUN)
Expand All @@ -252,6 +264,7 @@ def Property(inp, currentTime, memory, s, p, punctuation_tv):
hadRelation = set([])
def Memory_digest_sentence(inp, currentTime, memory, sentence, truth, PrintMemoryUpdates, TimeHandling):
global lastTime, hadRelation
#print(">>>>", sentence)
if currentTime != lastTime:
hadRelation = set([])
if sentence in hadRelation:
Expand Down Expand Up @@ -312,7 +325,7 @@ def Memory_Eternalize(currentTime, memory, eternalizationDistance = 3):
belief = memory[(m, t)]
if t != "eternal" and currentTime - t > eternalizationDistance:
deletes.append((m, t))
additions.append(((m, "eternal"), (belief[0], belief[1], Truth_Eternalize(belief[2]))))
additions.append(((m, "eternal"), (belief[0], belief[1], Truth_Eternalize(belief[2]), belief[3])))
for k in deletes:
del memory[k]
for (k, v) in additions:
Expand Down
Loading

0 comments on commit 9ecc90f

Please sign in to comment.