Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Replace deprecated text-davinci-003 model with gpt-3.5-turbo-instruct model #6660

Merged
merged 3 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions e2e/pipelines/test_pipeline_topologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,7 @@ def test_join_with_rrf(docs):
results = p.run(query=query)

# list of precalculated expected results
expected_scores = [
0.03278688524590164,
0.03200204813108039,
0.03200204813108039,
0.031009615384615385,
0.031009615384615385,
]
expected_scores = [1.0, 0.9684979838709676, 0.9684979838709676, 0.9533577533577533, 0.9533577533577533]
assert all(
doc.score == pytest.approx(expected_scores[idx], abs=1e-3) for idx, doc in enumerate(results["documents"])
)
2 changes: 1 addition & 1 deletion e2e/pipelines/test_standard_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_webqa_pipeline():
search_key = os.environ.get("SERPERDEV_API_KEY")
openai_key = os.environ.get("OPENAI_API_KEY")
pn = PromptNode(
"text-davinci-003",
"gpt-3.5-turbo-instruct",
api_key=openai_key,
max_length=256,
default_prompt_template="question-answering-with-document-scores",
Expand Down
2 changes: 1 addition & 1 deletion examples/web_lfqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"""

prompt_node = PromptNode(
"text-davinci-003", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=256
"gpt-3.5-turbo-instruct", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=256
)

web_retriever = WebRetriever(api_key=search_key, top_search_results=5, mode="preprocessed_documents", top_k=30)
Expand Down
2 changes: 1 addition & 1 deletion examples/web_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
raise ValueError("Please set the OPENAI_API_KEY environment variable")

prompt_node = PromptNode(
"text-davinci-003",
"gpt-3.5-turbo-instruct",
api_key=openai_key,
max_length=256,
default_prompt_template="question-answering-with-document-scores",
Expand Down
4 changes: 2 additions & 2 deletions haystack/nodes/prompt/invocation_layer/azure_open_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
azure_deployment_name: str,
api_key: str,
api_version: str = "2022-12-01",
model_name_or_path: str = "text-davinci-003",
model_name_or_path: str = "gpt-3.5-turbo-instruct",
max_length: Optional[int] = 100,
**kwargs,
):
Expand All @@ -42,7 +42,7 @@ def supports(cls, model_name_or_path: str, **kwargs) -> bool:
Ensures Azure OpenAI Invocation Layer is selected when `azure_base_url` and `azure_deployment_name` are provided in
addition to a list of supported models.
"""
valid_model = model_name_or_path in ["ada", "babbage", "davinci", "curie"] or any(
valid_model = model_name_or_path in ["ada", "babbage", "davinci", "curie", "gpt-3.5-turbo-instruct"] or any(
m in model_name_or_path for m in ["-ada-", "-babbage-", "-davinci-", "-curie-"]
)
return valid_model and has_azure_parameters(**kwargs)
2 changes: 1 addition & 1 deletion haystack/nodes/prompt/invocation_layer/open_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class OpenAIInvocationLayer(PromptModelInvocationLayer):
def __init__(
self,
api_key: str,
model_name_or_path: str = "text-davinci-003",
model_name_or_path: str = "gpt-3.5-turbo-instruct",
max_length: Optional[int] = 100,
api_base: str = "https://api.openai.com/v1",
openai_organization: Optional[str] = None,
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/sampler/top_p_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TopPSampler(BaseSampler):

```python
prompt_node = PromptNode(
"text-davinci-003",
"gpt-3.5-turbo-instruct",
api_key=openai_key,
max_length=256,
default_prompt_template="question-answering-with-document-scores",
Expand Down
2 changes: 1 addition & 1 deletion test/pipelines/test_standard_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_webqa_pipeline():
search_key = os.environ.get("SERPERDEV_API_KEY")
openai_key = os.environ.get("OPENAI_API_KEY")
pn = PromptNode(
"text-davinci-003",
"gpt-3.5-turbo-instruct",
api_key=openai_key,
max_length=256,
default_prompt_template="question-answering-with-document-scores",
Expand Down
4 changes: 2 additions & 2 deletions test/prompt/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ def prompt_model(request, haystack_azure_conf):
api_key = os.environ.get("OPENAI_API_KEY", "KEY_NOT_FOUND")
if api_key is None or api_key == "":
api_key = "KEY_NOT_FOUND"
return PromptModel("text-davinci-003", api_key=api_key)
return PromptModel("gpt-3.5-turbo-instruct", api_key=api_key)
elif request.param == "azure":
api_key = os.environ.get("AZURE_OPENAI_API_KEY", "KEY_NOT_FOUND")
if api_key is None or api_key == "":
api_key = "KEY_NOT_FOUND"
return PromptModel("text-davinci-003", api_key=api_key, model_kwargs=haystack_azure_conf)
return PromptModel("gpt-3.5-turbo-instruct", api_key=api_key, model_kwargs=haystack_azure_conf)
else:
return PromptModel("google/flan-t5-base", devices=["cpu"])

Expand Down
4 changes: 2 additions & 2 deletions test/prompt/invocation_layer/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_openai_token_limit_warning(mock_openai_tokenizer, caplog):
@pytest.mark.parametrize(
"model_name,max_tokens_limit",
[
("text-davinci-003", 4097),
("gpt-3.5-turbo-instruct", 4096),
("gpt-3.5-turbo", 4096),
("gpt-3.5-turbo-16k", 16384),
("gpt-4-32k", 32768),
Expand All @@ -76,7 +76,7 @@ def test_openai_token_limit_warning_not_triggered(caplog, mock_openai_tokenizer,
@pytest.mark.parametrize(
"model_name,max_tokens_limit",
[
("text-davinci-003", 4097),
("gpt-3.5-turbo-instruct", 4096),
("gpt-3.5-turbo", 4096),
("gpt-3.5-turbo-16k", 16384),
("gpt-4-32k", 32768),
Expand Down
8 changes: 4 additions & 4 deletions test/prompt/test_prompt_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,15 @@ def test_azure_vs_open_ai_invocation_layer_selection():
node = PromptNode("gpt-4", api_key="some_key", model_kwargs=azure_model_kwargs)
assert isinstance(node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer)

node = PromptNode("text-davinci-003", api_key="some_key", model_kwargs=azure_model_kwargs)
node = PromptNode("gpt-3.5-turbo-instruct", api_key="some_key", model_kwargs=azure_model_kwargs)
assert isinstance(node.prompt_model.model_invocation_layer, AzureOpenAIInvocationLayer)

node = PromptNode("gpt-4", api_key="some_key")
assert isinstance(node.prompt_model.model_invocation_layer, ChatGPTInvocationLayer) and not isinstance(
node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer
)

node = PromptNode("text-davinci-003", api_key="some_key")
node = PromptNode("gpt-3.5-turbo-instruct", api_key="some_key")
assert isinstance(node.prompt_model.model_invocation_layer, OpenAIInvocationLayer) and not isinstance(
node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer
)
Expand Down Expand Up @@ -850,7 +850,7 @@ def test_complex_pipeline_with_all_features(tmp_path, haystack_openai_config):
- name: pmodel_openai
type: PromptModel
params:
model_name_or_path: text-davinci-003
model_name_or_path: gpt-3.5-turbo-instruct
model_kwargs:
temperature: 0.9
max_tokens: 64
Expand Down Expand Up @@ -1052,7 +1052,7 @@ def test_content_moderation_gpt_3():
OpenAIInvocationLayer.
"""
prompt_node = PromptNode(
model_name_or_path="text-davinci-003", api_key="key", model_kwargs={"moderate_content": True}
model_name_or_path="gpt-3.5-turbo-instruct", api_key="key", model_kwargs={"moderate_content": True}
)
with patch("haystack.nodes.prompt.invocation_layer.open_ai.check_openai_policy_violation") as mock_check, patch(
"haystack.nodes.prompt.invocation_layer.open_ai.openai_request"
Expand Down
6 changes: 3 additions & 3 deletions test/utils/test_openai_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ def test_openai_text_completion_tokenization_details_gpt_default():

@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_davinci():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-davinci-003")
assert tokenizer_name == "p50k_base"
assert max_tokens_limit == 4097
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-instruct")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 4096


@pytest.mark.unit
Expand Down