From ca812cb3dab420e2fa377c30869b869e95b8f210 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 29 Dec 2023 11:07:25 +0100 Subject: [PATCH] fix: Replace deprecated `text-davinci-003` model with `gpt-3.5-turbo-instruct` model (#6660) * replace davinci with 3.5 turbo instruct model * fix max token limit and tokenizer in tests * fix azure model support and scores in tests --- e2e/pipelines/test_pipeline_topologies.py | 8 +------- e2e/pipelines/test_standard_pipelines.py | 2 +- examples/web_lfqa.py | 2 +- examples/web_qa.py | 2 +- haystack/nodes/prompt/invocation_layer/azure_open_ai.py | 4 ++-- haystack/nodes/prompt/invocation_layer/open_ai.py | 2 +- haystack/nodes/sampler/top_p_sampler.py | 2 +- test/pipelines/test_standard_pipelines.py | 2 +- test/prompt/conftest.py | 4 ++-- test/prompt/invocation_layer/test_openai.py | 4 ++-- test/prompt/test_prompt_node.py | 8 ++++---- test/utils/test_openai_utils.py | 6 +++--- 12 files changed, 20 insertions(+), 26 deletions(-) diff --git a/e2e/pipelines/test_pipeline_topologies.py b/e2e/pipelines/test_pipeline_topologies.py index 752191d34a..f2d1a1001e 100644 --- a/e2e/pipelines/test_pipeline_topologies.py +++ b/e2e/pipelines/test_pipeline_topologies.py @@ -178,13 +178,7 @@ def test_join_with_rrf(docs): results = p.run(query=query) # list of precalculated expected results - expected_scores = [ - 0.03278688524590164, - 0.03200204813108039, - 0.03200204813108039, - 0.031009615384615385, - 0.031009615384615385, - ] + expected_scores = [1.0, 0.9684979838709676, 0.9684979838709676, 0.9533577533577533, 0.9533577533577533] assert all( doc.score == pytest.approx(expected_scores[idx], abs=1e-3) for idx, doc in enumerate(results["documents"]) ) diff --git a/e2e/pipelines/test_standard_pipelines.py b/e2e/pipelines/test_standard_pipelines.py index f25ddcd13b..3ebc5a4702 100644 --- a/e2e/pipelines/test_standard_pipelines.py +++ b/e2e/pipelines/test_standard_pipelines.py @@ -207,7 +207,7 @@ def test_webqa_pipeline(): search_key = os.environ.get("SERPERDEV_API_KEY") openai_key = os.environ.get("OPENAI_API_KEY") pn = PromptNode( - "text-davinci-003", + "gpt-3.5-turbo-instruct", api_key=openai_key, max_length=256, default_prompt_template="question-answering-with-document-scores", diff --git a/examples/web_lfqa.py b/examples/web_lfqa.py index ff5dbe15e0..cfdf81c602 100644 --- a/examples/web_lfqa.py +++ b/examples/web_lfqa.py @@ -21,7 +21,7 @@ """ prompt_node = PromptNode( - "text-davinci-003", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=256 + "gpt-3.5-turbo-instruct", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=256 ) web_retriever = WebRetriever(api_key=search_key, top_search_results=5, mode="preprocessed_documents", top_k=30) diff --git a/examples/web_qa.py b/examples/web_qa.py index 352d2d226d..adc7f19dba 100644 --- a/examples/web_qa.py +++ b/examples/web_qa.py @@ -12,7 +12,7 @@ raise ValueError("Please set the OPENAI_API_KEY environment variable") prompt_node = PromptNode( - "text-davinci-003", + "gpt-3.5-turbo-instruct", api_key=openai_key, max_length=256, default_prompt_template="question-answering-with-document-scores", diff --git a/haystack/nodes/prompt/invocation_layer/azure_open_ai.py b/haystack/nodes/prompt/invocation_layer/azure_open_ai.py index d10dc65463..001d6da8ba 100644 --- a/haystack/nodes/prompt/invocation_layer/azure_open_ai.py +++ b/haystack/nodes/prompt/invocation_layer/azure_open_ai.py @@ -19,7 +19,7 @@ def __init__( azure_deployment_name: str, api_key: str, api_version: str = "2022-12-01", - model_name_or_path: str = "text-davinci-003", + model_name_or_path: str = "gpt-3.5-turbo-instruct", max_length: Optional[int] = 100, **kwargs, ): @@ -42,7 +42,7 @@ def supports(cls, model_name_or_path: str, **kwargs) -> bool: Ensures Azure OpenAI Invocation Layer is selected when `azure_base_url` and `azure_deployment_name` are provided in addition to a list of supported models. """ - valid_model = model_name_or_path in ["ada", "babbage", "davinci", "curie"] or any( + valid_model = model_name_or_path in ["ada", "babbage", "davinci", "curie", "gpt-3.5-turbo-instruct"] or any( m in model_name_or_path for m in ["-ada-", "-babbage-", "-davinci-", "-curie-"] ) return valid_model and has_azure_parameters(**kwargs) diff --git a/haystack/nodes/prompt/invocation_layer/open_ai.py b/haystack/nodes/prompt/invocation_layer/open_ai.py index 825da26234..0e26d709f8 100644 --- a/haystack/nodes/prompt/invocation_layer/open_ai.py +++ b/haystack/nodes/prompt/invocation_layer/open_ai.py @@ -33,7 +33,7 @@ class OpenAIInvocationLayer(PromptModelInvocationLayer): def __init__( self, api_key: str, - model_name_or_path: str = "text-davinci-003", + model_name_or_path: str = "gpt-3.5-turbo-instruct", max_length: Optional[int] = 100, api_base: str = "https://api.openai.com/v1", openai_organization: Optional[str] = None, diff --git a/haystack/nodes/sampler/top_p_sampler.py b/haystack/nodes/sampler/top_p_sampler.py index b77e448760..60d09d83a9 100644 --- a/haystack/nodes/sampler/top_p_sampler.py +++ b/haystack/nodes/sampler/top_p_sampler.py @@ -35,7 +35,7 @@ class TopPSampler(BaseSampler): ```python prompt_node = PromptNode( - "text-davinci-003", + "gpt-3.5-turbo-instruct", api_key=openai_key, max_length=256, default_prompt_template="question-answering-with-document-scores", diff --git a/test/pipelines/test_standard_pipelines.py b/test/pipelines/test_standard_pipelines.py index 2d6523d7a6..d8512c2fbd 100644 --- a/test/pipelines/test_standard_pipelines.py +++ b/test/pipelines/test_standard_pipelines.py @@ -79,7 +79,7 @@ def test_webqa_pipeline(): search_key = os.environ.get("SERPERDEV_API_KEY") openai_key = os.environ.get("OPENAI_API_KEY") pn = PromptNode( - "text-davinci-003", + "gpt-3.5-turbo-instruct", api_key=openai_key, max_length=256, default_prompt_template="question-answering-with-document-scores", diff --git a/test/prompt/conftest.py b/test/prompt/conftest.py index 9d38e6d0dd..12b850207f 100644 --- a/test/prompt/conftest.py +++ b/test/prompt/conftest.py @@ -23,12 +23,12 @@ def prompt_model(request, haystack_azure_conf): api_key = os.environ.get("OPENAI_API_KEY", "KEY_NOT_FOUND") if api_key is None or api_key == "": api_key = "KEY_NOT_FOUND" - return PromptModel("text-davinci-003", api_key=api_key) + return PromptModel("gpt-3.5-turbo-instruct", api_key=api_key) elif request.param == "azure": api_key = os.environ.get("AZURE_OPENAI_API_KEY", "KEY_NOT_FOUND") if api_key is None or api_key == "": api_key = "KEY_NOT_FOUND" - return PromptModel("text-davinci-003", api_key=api_key, model_kwargs=haystack_azure_conf) + return PromptModel("gpt-3.5-turbo-instruct", api_key=api_key, model_kwargs=haystack_azure_conf) else: return PromptModel("google/flan-t5-base", devices=["cpu"]) diff --git a/test/prompt/invocation_layer/test_openai.py b/test/prompt/invocation_layer/test_openai.py index 5ae3458788..63a47b31ad 100644 --- a/test/prompt/invocation_layer/test_openai.py +++ b/test/prompt/invocation_layer/test_openai.py @@ -53,7 +53,7 @@ def test_openai_token_limit_warning(mock_openai_tokenizer, caplog): @pytest.mark.parametrize( "model_name,max_tokens_limit", [ - ("text-davinci-003", 4097), + ("gpt-3.5-turbo-instruct", 4096), ("gpt-3.5-turbo", 4096), ("gpt-3.5-turbo-16k", 16384), ("gpt-4-32k", 32768), @@ -76,7 +76,7 @@ def test_openai_token_limit_warning_not_triggered(caplog, mock_openai_tokenizer, @pytest.mark.parametrize( "model_name,max_tokens_limit", [ - ("text-davinci-003", 4097), + ("gpt-3.5-turbo-instruct", 4096), ("gpt-3.5-turbo", 4096), ("gpt-3.5-turbo-16k", 16384), ("gpt-4-32k", 32768), diff --git a/test/prompt/test_prompt_node.py b/test/prompt/test_prompt_node.py index 972a04be18..8a5d3459d5 100644 --- a/test/prompt/test_prompt_node.py +++ b/test/prompt/test_prompt_node.py @@ -216,7 +216,7 @@ def test_azure_vs_open_ai_invocation_layer_selection(): node = PromptNode("gpt-4", api_key="some_key", model_kwargs=azure_model_kwargs) assert isinstance(node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer) - node = PromptNode("text-davinci-003", api_key="some_key", model_kwargs=azure_model_kwargs) + node = PromptNode("gpt-3.5-turbo-instruct", api_key="some_key", model_kwargs=azure_model_kwargs) assert isinstance(node.prompt_model.model_invocation_layer, AzureOpenAIInvocationLayer) node = PromptNode("gpt-4", api_key="some_key") @@ -224,7 +224,7 @@ def test_azure_vs_open_ai_invocation_layer_selection(): node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer ) - node = PromptNode("text-davinci-003", api_key="some_key") + node = PromptNode("gpt-3.5-turbo-instruct", api_key="some_key") assert isinstance(node.prompt_model.model_invocation_layer, OpenAIInvocationLayer) and not isinstance( node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer ) @@ -850,7 +850,7 @@ def test_complex_pipeline_with_all_features(tmp_path, haystack_openai_config): - name: pmodel_openai type: PromptModel params: - model_name_or_path: text-davinci-003 + model_name_or_path: gpt-3.5-turbo-instruct model_kwargs: temperature: 0.9 max_tokens: 64 @@ -1052,7 +1052,7 @@ def test_content_moderation_gpt_3(): OpenAIInvocationLayer. """ prompt_node = PromptNode( - model_name_or_path="text-davinci-003", api_key="key", model_kwargs={"moderate_content": True} + model_name_or_path="gpt-3.5-turbo-instruct", api_key="key", model_kwargs={"moderate_content": True} ) with patch("haystack.nodes.prompt.invocation_layer.open_ai.check_openai_policy_violation") as mock_check, patch( "haystack.nodes.prompt.invocation_layer.open_ai.openai_request" diff --git a/test/utils/test_openai_utils.py b/test/utils/test_openai_utils.py index 7126542f0c..92add5f219 100644 --- a/test/utils/test_openai_utils.py +++ b/test/utils/test_openai_utils.py @@ -22,9 +22,9 @@ def test_openai_text_completion_tokenization_details_gpt_default(): @pytest.mark.unit def test_openai_text_completion_tokenization_details_gpt_davinci(): - tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-davinci-003") - assert tokenizer_name == "p50k_base" - assert max_tokens_limit == 4097 + tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-instruct") + assert tokenizer_name == "cl100k_base" + assert max_tokens_limit == 4096 @pytest.mark.unit