fix: Replace deprecated text-davinci-003 model with `gpt-3.5-turbo-…

…instruct` model (#6660) * replace davinci with 3.5 turbo instruct model * fix max token limit and tokenizer in tests * fix azure model support and scores in tests
deepset-ai · Dec 29, 2023 · ca812cb · ca812cb
1 parent 123cfd7
commit ca812cb
Show file tree

Hide file tree

Showing 12 changed files with 20 additions and 26 deletions.
diff --git a/e2e/pipelines/test_pipeline_topologies.py b/e2e/pipelines/test_pipeline_topologies.py
@@ -178,13 +178,7 @@ def test_join_with_rrf(docs):
     results = p.run(query=query)
 
     # list of precalculated expected results
-    expected_scores = [
-        0.03278688524590164,
-        0.03200204813108039,
-        0.03200204813108039,
-        0.031009615384615385,
-        0.031009615384615385,
-    ]
+    expected_scores = [1.0, 0.9684979838709676, 0.9684979838709676, 0.9533577533577533, 0.9533577533577533]
     assert all(
         doc.score == pytest.approx(expected_scores[idx], abs=1e-3) for idx, doc in enumerate(results["documents"])
     )
diff --git a/e2e/pipelines/test_standard_pipelines.py b/e2e/pipelines/test_standard_pipelines.py
@@ -207,7 +207,7 @@ def test_webqa_pipeline():
     search_key = os.environ.get("SERPERDEV_API_KEY")
     openai_key = os.environ.get("OPENAI_API_KEY")
     pn = PromptNode(
-        "text-davinci-003",
+        "gpt-3.5-turbo-instruct",
         api_key=openai_key,
         max_length=256,
         default_prompt_template="question-answering-with-document-scores",

diff --git a/examples/web_lfqa.py b/examples/web_lfqa.py
@@ -21,7 +21,7 @@
 """
 
 prompt_node = PromptNode(
-    "text-davinci-003", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=256
+    "gpt-3.5-turbo-instruct", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=256
 )
 
 web_retriever = WebRetriever(api_key=search_key, top_search_results=5, mode="preprocessed_documents", top_k=30)

diff --git a/examples/web_qa.py b/examples/web_qa.py
@@ -12,7 +12,7 @@
     raise ValueError("Please set the OPENAI_API_KEY environment variable")
 
 prompt_node = PromptNode(
-    "text-davinci-003",
+    "gpt-3.5-turbo-instruct",
     api_key=openai_key,
     max_length=256,
     default_prompt_template="question-answering-with-document-scores",

diff --git a/haystack/nodes/prompt/invocation_layer/azure_open_ai.py b/haystack/nodes/prompt/invocation_layer/azure_open_ai.py
@@ -19,7 +19,7 @@ def __init__(
         azure_deployment_name: str,
         api_key: str,
         api_version: str = "2022-12-01",
-        model_name_or_path: str = "text-davinci-003",
+        model_name_or_path: str = "gpt-3.5-turbo-instruct",
         max_length: Optional[int] = 100,
         **kwargs,
     ):
@@ -42,7 +42,7 @@ def supports(cls, model_name_or_path: str, **kwargs) -> bool:
         Ensures Azure OpenAI Invocation Layer is selected when `azure_base_url` and `azure_deployment_name` are provided in
         addition to a list of supported models.
         """
-        valid_model = model_name_or_path in ["ada", "babbage", "davinci", "curie"] or any(
+        valid_model = model_name_or_path in ["ada", "babbage", "davinci", "curie", "gpt-3.5-turbo-instruct"] or any(
             m in model_name_or_path for m in ["-ada-", "-babbage-", "-davinci-", "-curie-"]
         )
         return valid_model and has_azure_parameters(**kwargs)
diff --git a/haystack/nodes/prompt/invocation_layer/open_ai.py b/haystack/nodes/prompt/invocation_layer/open_ai.py
@@ -33,7 +33,7 @@ class OpenAIInvocationLayer(PromptModelInvocationLayer):
     def __init__(
         self,
         api_key: str,
-        model_name_or_path: str = "text-davinci-003",
+        model_name_or_path: str = "gpt-3.5-turbo-instruct",
         max_length: Optional[int] = 100,
         api_base: str = "https://api.openai.com/v1",
         openai_organization: Optional[str] = None,

diff --git a/haystack/nodes/sampler/top_p_sampler.py b/haystack/nodes/sampler/top_p_sampler.py
@@ -35,7 +35,7 @@ class TopPSampler(BaseSampler):
 
     ```python
     prompt_node = PromptNode(
-        "text-davinci-003",
+        "gpt-3.5-turbo-instruct",
         api_key=openai_key,
         max_length=256,
         default_prompt_template="question-answering-with-document-scores",

diff --git a/test/pipelines/test_standard_pipelines.py b/test/pipelines/test_standard_pipelines.py
@@ -79,7 +79,7 @@ def test_webqa_pipeline():
     search_key = os.environ.get("SERPERDEV_API_KEY")
     openai_key = os.environ.get("OPENAI_API_KEY")
     pn = PromptNode(
-        "text-davinci-003",
+        "gpt-3.5-turbo-instruct",
         api_key=openai_key,
         max_length=256,
         default_prompt_template="question-answering-with-document-scores",

diff --git a/test/prompt/conftest.py b/test/prompt/conftest.py
@@ -23,12 +23,12 @@ def prompt_model(request, haystack_azure_conf):
         api_key = os.environ.get("OPENAI_API_KEY", "KEY_NOT_FOUND")
         if api_key is None or api_key == "":
             api_key = "KEY_NOT_FOUND"
-        return PromptModel("text-davinci-003", api_key=api_key)
+        return PromptModel("gpt-3.5-turbo-instruct", api_key=api_key)
     elif request.param == "azure":
         api_key = os.environ.get("AZURE_OPENAI_API_KEY", "KEY_NOT_FOUND")
         if api_key is None or api_key == "":
             api_key = "KEY_NOT_FOUND"
-        return PromptModel("text-davinci-003", api_key=api_key, model_kwargs=haystack_azure_conf)
+        return PromptModel("gpt-3.5-turbo-instruct", api_key=api_key, model_kwargs=haystack_azure_conf)
     else:
         return PromptModel("google/flan-t5-base", devices=["cpu"])
 

diff --git a/test/prompt/invocation_layer/test_openai.py b/test/prompt/invocation_layer/test_openai.py
@@ -53,7 +53,7 @@ def test_openai_token_limit_warning(mock_openai_tokenizer, caplog):
 @pytest.mark.parametrize(
     "model_name,max_tokens_limit",
     [
-        ("text-davinci-003", 4097),
+        ("gpt-3.5-turbo-instruct", 4096),
         ("gpt-3.5-turbo", 4096),
         ("gpt-3.5-turbo-16k", 16384),
         ("gpt-4-32k", 32768),
@@ -76,7 +76,7 @@ def test_openai_token_limit_warning_not_triggered(caplog, mock_openai_tokenizer,
 @pytest.mark.parametrize(
     "model_name,max_tokens_limit",
     [
-        ("text-davinci-003", 4097),
+        ("gpt-3.5-turbo-instruct", 4096),
         ("gpt-3.5-turbo", 4096),
         ("gpt-3.5-turbo-16k", 16384),
         ("gpt-4-32k", 32768),

diff --git a/test/prompt/test_prompt_node.py b/test/prompt/test_prompt_node.py
@@ -216,15 +216,15 @@ def test_azure_vs_open_ai_invocation_layer_selection():
     node = PromptNode("gpt-4", api_key="some_key", model_kwargs=azure_model_kwargs)
     assert isinstance(node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer)
 
-    node = PromptNode("text-davinci-003", api_key="some_key", model_kwargs=azure_model_kwargs)
+    node = PromptNode("gpt-3.5-turbo-instruct", api_key="some_key", model_kwargs=azure_model_kwargs)
     assert isinstance(node.prompt_model.model_invocation_layer, AzureOpenAIInvocationLayer)
 
     node = PromptNode("gpt-4", api_key="some_key")
     assert isinstance(node.prompt_model.model_invocation_layer, ChatGPTInvocationLayer) and not isinstance(
         node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer
     )
 
-    node = PromptNode("text-davinci-003", api_key="some_key")
+    node = PromptNode("gpt-3.5-turbo-instruct", api_key="some_key")
     assert isinstance(node.prompt_model.model_invocation_layer, OpenAIInvocationLayer) and not isinstance(
         node.prompt_model.model_invocation_layer, AzureChatGPTInvocationLayer
     )
@@ -850,7 +850,7 @@ def test_complex_pipeline_with_all_features(tmp_path, haystack_openai_config):
             - name: pmodel_openai
               type: PromptModel
               params:
-                model_name_or_path: text-davinci-003
+                model_name_or_path: gpt-3.5-turbo-instruct
                 model_kwargs:
                   temperature: 0.9
                   max_tokens: 64
@@ -1052,7 +1052,7 @@ def test_content_moderation_gpt_3():
     OpenAIInvocationLayer.
     """
     prompt_node = PromptNode(
-        model_name_or_path="text-davinci-003", api_key="key", model_kwargs={"moderate_content": True}
+        model_name_or_path="gpt-3.5-turbo-instruct", api_key="key", model_kwargs={"moderate_content": True}
     )
     with patch("haystack.nodes.prompt.invocation_layer.open_ai.check_openai_policy_violation") as mock_check, patch(
         "haystack.nodes.prompt.invocation_layer.open_ai.openai_request"

diff --git a/test/utils/test_openai_utils.py b/test/utils/test_openai_utils.py
@@ -22,9 +22,9 @@ def test_openai_text_completion_tokenization_details_gpt_default():
 
 @pytest.mark.unit
 def test_openai_text_completion_tokenization_details_gpt_davinci():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="text-davinci-003")
-    assert tokenizer_name == "p50k_base"
-    assert max_tokens_limit == 4097
+    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-instruct")
+    assert tokenizer_name == "cl100k_base"
+    assert max_tokens_limit == 4096
 
 
 @pytest.mark.unit