-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathui_utils.py
117 lines (104 loc) · 3.7 KB
/
ui_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import json
from costing import estimate_cost
def decode_streamed_http_response(response_data):
"""Decode the streamed HTTP response of text or chat response streams"""
combined_string = ""
chunks = response_data.split("data: ")
for chunk in chunks:
if chunk.strip() == "[DONE]":
break
if not chunk:
continue
json_data = json.loads(chunk)
content = ""
# Try to extract chunks of text, but fall back to chat:
try:
content = json_data["choices"][0]["text"]
except Exception:
content = (
json_data.get("choices", [{}])[0].get(
"delta", {}).get("content", "")
)
combined_string += content
return combined_string
def decode_input_messages(request_data):
"""Decode the input messages from the request data"""
request_data = json.loads(request_data)
return request_data["messages"]
def format_chat_input(data):
"""Format the messages input for the stats page"""
if not data:
return ""
table_text = "Role\tContent\n\n"
for item in data:
content = item.get("content")
role = item.get("role")
table_text += f"{role}\t{content}\n\n"
return table_text
def stats_template(cache={}, OPENAI_API_BASE="https://api.openai.com/v1"):
data = []
total_request_costings_estimate = 0
for _, row in cache.items():
try:
request_data = json.loads(row["request_data"].decode())
except (json.decoder.JSONDecodeError, UnicodeDecodeError):
continue
messages = request_data.get("messages", None)
if messages:
messages = format_chat_input(messages)
prompt = request_data.get("prompt", None)
response_content = row["content"].decode()
input_data = messages or prompt[0]
response_text = decode_streamed_http_response(response_content)
model = request_data.get("model", None)
estimated_request_cost = estimate_cost(request_text = input_data, response_text = response_text, model = model)
total_request_costings_estimate += estimated_request_cost
data.append(
{
"url": row["url"].replace(OPENAI_API_BASE, ""),
"input": input_data,
"content": decode_streamed_http_response(response_content),
"response_length": len(response_text),
"model": model,
"cost_estimate": f"${estimated_request_cost:.6f}",
"request_method": row.get("method", "?")
}
)
table_rows = ""
for row in data:
table_rows += f"""
<tr>
<td>{row['url']}</td>
<td>{row['input']}</td>
<td>{row['content']}</td>
<td>{row['response_length']}</td>
<td>{row['model']}</td>
<td>{row['cost_estimate']}</td>
<td>{row['request_method']}</td>
</tr>
"""
total_request_costings_estimate_formatted = f"${total_request_costings_estimate:.6f}"
html = f"""
<html>
<head>
<title>Cached LLM Responses</title>
</head>
<body>
<h1>Cached Responses</h1>
<h3>Total Cost Estimate: {total_request_costings_estimate_formatted}</h3>
<table>
<tr>
<th>URL</th>
<th>Input</th>
<th>Response</th>
<th>Length</th>
<th>Model</th>
<th>Cost est.</th>
<th>Method</th>
</tr>
{table_rows}
</table>
</body>
</html>
"""
return html