-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreadability.py
70 lines (55 loc) · 2.06 KB
/
readability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# To activate env run: $ source activate cs6172_env
# To deactivate env run $ conda deactivate
import openai
from decouple import config
import numpy as np
from trainingSet import trainingSet
labels = ["Readable", "Acceptable", "Difficult", "Unreadable"]
labels = [label.strip().lower().capitalize() for label in labels]
# TEST SET
def classifier(query):
labels = ["Readable", "Acceptable", "Difficult", "Unreadable"]
labels = [label.strip().lower().capitalize() for label in labels]
result = openai.Classification.create(
query=query,
search_model="ada",
model="davinci-codex",
logprobs=5,
labels=labels,
max_examples=6,
examples= trainingSet
)
return result
def cost(classification):
labels = ["Readable", "Acceptable", "Difficult", "Unreadable"]
labels = [label.strip().lower().capitalize() for label in labels]
# Take the starting tokens for probability estimation.
# Labels should have distinct starting tokens.
# Here tokens are case-sensitive.
labels = [" " + label for label in labels]
top_logprobs = classification["completion"]["choices"][0]["logprobs"]["top_logprobs"][1]
probs = {
sublabel: np.exp(logp)
for sublabel, logp in top_logprobs.items()
}
label_probs = {}
for sublabel, prob in probs.items():
for label in labels:
if sublabel in label:
label_probs[label] = prob
# Fill in the probability for the special "Unknown" label.
if sum(label_probs.values()) < 1.0:
label_probs[" Unknown"] = 1.0 - sum(label_probs.values())
# Print expected probabilities
# for label_prob in label_probs.keys():
# print(label_prob, ": ", label_probs[label_prob])
label_weights = {}
label_weights[" Readable"] = 0.1
label_weights[" Acceptable"] = 1
label_weights[" Difficult"] = 10
label_weights[" Unreadable"] = 100
label_weights[" Unknown"] = 25
cost = 0
for label in label_probs.keys():
cost += label_probs[label] * label_weights[label]
return cost