-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathazureopenai.py
296 lines (225 loc) · 9.11 KB
/
azureopenai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
import logging
from logging.handlers import RotatingFileHandler
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
handlers=[logging.FileHandler("openai_usage.log")],
)
import functools
import json
from typing import Any, Literal, Optional, cast
import dsp
import backoff
import openai
from openai import AzureOpenAI
from dsp.modules.cache_utils import CacheMemory, NotebookCacheMemory, cache_turn_on
from dsp.modules.lm import LM
try:
OPENAI_LEGACY = int(openai.version.__version__[0]) == 0
except Exception:
OPENAI_LEGACY = True
try:
from openai.openai_object import OpenAIObject
import openai.error
ERRORS = (
openai.error.RateLimitError,
openai.error.ServiceUnavailableError,
openai.error.APIError,
)
except Exception:
ERRORS = (openai.RateLimitError, openai.APIError)
OpenAIObject = dict
def backoff_hdlr(details):
"""Handler from https://pypi.org/project/backoff/"""
print(
"Backing off {wait:0.1f} seconds after {tries} tries "
"calling function {target} with kwargs "
"{kwargs}".format(**details)
)
class AzureOpenAI(LM):
"""Wrapper around OpenAI's GPT API. Supports Azure API only.
Args:
model (str, optional): OpenAI or Azure supported LLM model to use. Defaults to "text-davinci-002".
api_key (Optional[str], optional): API provider Authentication token. use Defaults to None.
api_provider (Literal["openai", "azure"], optional): The API provider to use. Defaults to "openai".
model_type (Literal["chat", "text"], optional): The type of model that was specified. Mainly to decide the optimal prompting strategy. Defaults to "text".
**kwargs: Additional arguments to pass to the API provider.
"""
def __init__(
self,
model: str = "gpt-3.5-turbo-instruct",
api_key: Optional[str] = None,
api_provider: Literal["openai", "azure"] = "openai",
api_base: Optional[str] = None,
model_type: Literal["chat", "text"] = None,
**kwargs,
):
super().__init__(model)
self.provider = "openai"
openai.api_type = api_provider
default_model_type = (
"chat"
if ("gpt-3.5" in model or "turbo" in model or "gpt-4" in model)
and ("instruct" not in model)
else "text"
)
self.model_type = model_type if model_type else default_model_type
if api_provider == "azure":
# assert (
# "engine" in kwargs or "deployment_id" in kwargs
# ), "Must specify engine or deployment_id for Azure API instead of model."
assert "api_version" in kwargs, "Must specify api_version for Azure API"
# assert api_base is not None, "Must specify api_base for Azure API"
if kwargs.get("api_version"):
openai.api_version = kwargs["api_version"]
if api_key:
openai.api_key = api_key
if api_base:
if OPENAI_LEGACY:
openai.api_base = api_base
else:
openai.base_url = api_base
self.kwargs = {
"temperature": 0.0,
"max_tokens": 150,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0,
"n": 1,
**kwargs,
} # TODO: add kwargs above for </s>
self.kwargs["model"] = model
# self.client = AzureOpenAI(api_version=kwargs["api_version"])
if api_provider == "azure":
self.kwargs.pop("api_version", None)
self.kwargs.pop("engine", None)
self.kwargs.pop("deployment_id", None)
self.history: list[dict[str, Any]] = []
def _openai_client(self):
return openai
def log_usage(self, response):
"""Log the total tokens from the OpenAI API response."""
usage_data = response.get("usage")
if usage_data:
total_tokens = usage_data.get("total_tokens")
logging.info(f"{total_tokens}")
def basic_request(self, prompt: str, **kwargs):
raw_kwargs = kwargs
kwargs = {**self.kwargs, **kwargs}
if self.model_type == "chat":
# caching mechanism requires hashable kwargs
kwargs["messages"] = [{"role": "user", "content": prompt}]
kwargs = {"stringify_request": json.dumps(kwargs)}
response = chat_request(**kwargs)
else:
kwargs["prompt"] = prompt
response = completions_request(**kwargs)
history = {
"prompt": prompt,
"response": response,
"kwargs": kwargs,
"raw_kwargs": raw_kwargs,
}
self.history.append(history)
return response
@backoff.on_exception(
backoff.expo,
ERRORS,
max_time=1000,
on_backoff=backoff_hdlr,
)
def request(self, prompt: str, **kwargs):
"""Handles retreival of GPT-3 completions whilst handling rate limiting and caching."""
if "model_type" in kwargs:
del kwargs["model_type"]
return self.basic_request(prompt, **kwargs)
def _get_choice_text(self, choice: dict[str, Any]) -> str:
if self.model_type == "chat":
return choice["message"]["content"]
return choice["text"]
def __call__(
self,
prompt: str,
only_completed: bool = True,
return_sorted: bool = False,
**kwargs,
) -> list[dict[str, Any]]:
"""Retrieves completions from GPT-3.
Args:
prompt (str): prompt to send to GPT-3
only_completed (bool, optional): return only completed responses and ignores completion due to length. Defaults to True.
return_sorted (bool, optional): sort the completion choices using the returned probabilities. Defaults to False.
Returns:
list[dict[str, Any]]: list of completion choices
"""
assert only_completed, "for now"
assert return_sorted is False, "for now"
# if kwargs.get("n", 1) > 1:
# if self.model_type == "chat":
# kwargs = {**kwargs}
# else:
# kwargs = {**kwargs, "logprobs": 5}
response = self.request(prompt, **kwargs)
if dsp.settings.log_openai_usage:
self.log_usage(response)
choices = response["choices"]
completed_choices = [c for c in choices if c["finish_reason"] != "length"]
if only_completed and len(completed_choices):
choices = completed_choices
completions = [self._get_choice_text(c) for c in choices]
if return_sorted and kwargs.get("n", 1) > 1:
scored_completions = []
for c in choices:
tokens, logprobs = (
c["logprobs"]["tokens"],
c["logprobs"]["token_logprobs"],
)
if "<|endoftext|>" in tokens:
index = tokens.index("<|endoftext|>") + 1
tokens, logprobs = tokens[:index], logprobs[:index]
avglog = sum(logprobs) / len(logprobs)
scored_completions.append((avglog, self._get_choice_text(c)))
scored_completions = sorted(scored_completions, reverse=True)
completions = [c for _, c in scored_completions]
return completions
@CacheMemory.cache
def cached_gpt3_request_v2(**kwargs):
return openai.Completion.create(**kwargs)
@functools.lru_cache(maxsize=None if cache_turn_on else 0)
@NotebookCacheMemory.cache
def cached_gpt3_request_v2_wrapped(**kwargs):
return cached_gpt3_request_v2(**kwargs)
@CacheMemory.cache
def _cached_gpt3_turbo_request_v2(**kwargs) -> OpenAIObject:
if "stringify_request" in kwargs:
kwargs = json.loads(kwargs["stringify_request"])
return cast(OpenAIObject, openai.ChatCompletion.create(**kwargs))
@functools.lru_cache(maxsize=None if cache_turn_on else 0)
@NotebookCacheMemory.cache
def _cached_gpt3_turbo_request_v2_wrapped(**kwargs) -> OpenAIObject:
return _cached_gpt3_turbo_request_v2(**kwargs)
@CacheMemory.cache
def v1_cached_gpt3_request_v2(**kwargs):
return openai.completions.create(**kwargs)
@functools.lru_cache(maxsize=None if cache_turn_on else 0)
@NotebookCacheMemory.cache
def v1_cached_gpt3_request_v2_wrapped(**kwargs):
return v1_cached_gpt3_request_v2(**kwargs)
@CacheMemory.cache
def v1_cached_gpt3_turbo_request_v2(**kwargs):
if "stringify_request" in kwargs:
kwargs = json.loads(kwargs["stringify_request"])
return openai.chat.completions.create(**kwargs)
@functools.lru_cache(maxsize=None if cache_turn_on else 0)
@NotebookCacheMemory.cache
def v1_cached_gpt3_turbo_request_v2_wrapped(**kwargs):
return v1_cached_gpt3_turbo_request_v2(**kwargs)
def chat_request(**kwargs):
if OPENAI_LEGACY:
return _cached_gpt3_turbo_request_v2_wrapped(**kwargs)
return v1_cached_gpt3_turbo_request_v2_wrapped(**kwargs).model_dump()
def completions_request(**kwargs):
if OPENAI_LEGACY:
return cached_gpt3_request_v2_wrapped(**kwargs)
return v1_cached_gpt3_request_v2_wrapped(**kwargs).model_dump()