From 66966a6e725f7ab533886c86d418c4a663bb9966 Mon Sep 17 00:00:00 2001 From: ccurme Date: Tue, 5 Nov 2024 18:02:24 -0500 Subject: [PATCH] openai[patch]: release 0.2.6 (#27924) Some additions in support of [predicted outputs](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs) feature: - Bump openai sdk version - Add integration test - Add example to integration docs The `prediction` kwarg is already plumbed through model invocation. --- docs/docs/integrations/chat/openai.ipynb | 99 ++++++++++++++++++- libs/partners/openai/poetry.lock | 16 +-- libs/partners/openai/pyproject.toml | 4 +- .../chat_models/test_base.py | 43 ++++++++ 4 files changed, 150 insertions(+), 12 deletions(-) diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb index 687f6ddc30bc1..9ab1c22c70c8f 100644 --- a/docs/docs/integrations/chat/openai.ipynb +++ b/docs/docs/integrations/chat/openai.ipynb @@ -509,6 +509,101 @@ "output_message.content" ] }, + { + "cell_type": "markdown", + "id": "5c35d0a4-a6b8-4d35-a02b-a37a8bda5692", + "metadata": {}, + "source": [ + "## Predicted output\n", + "\n", + ":::info\n", + "Requires `langchain-openai>=0.2.6`\n", + ":::\n", + "\n", + "Some OpenAI models (such as their `gpt-4o` and `gpt-4o-mini` series) support [Predicted Outputs](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs), which allow you to pass in a known portion of the LLM's expected output ahead of time to reduce latency. This is useful for cases such as editing text or code, where only a small part of the model's output will change.\n", + "\n", + "Here's an example:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "88fee1e9-58c1-42ad-ae23-24b882e175e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/// \n", + "/// Represents a user with a first name, last name, and email.\n", + "/// \n", + "public class User\n", + "{\n", + " /// \n", + " /// Gets or sets the user's first name.\n", + " /// \n", + " public string FirstName { get; set; }\n", + "\n", + " /// \n", + " /// Gets or sets the user's last name.\n", + " /// \n", + " public string LastName { get; set; }\n", + "\n", + " /// \n", + " /// Gets or sets the user's email.\n", + " /// \n", + " public string Email { get; set; }\n", + "}\n", + "{'token_usage': {'completion_tokens': 226, 'prompt_tokens': 166, 'total_tokens': 392, 'completion_tokens_details': {'accepted_prediction_tokens': 49, 'audio_tokens': None, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 107}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_45cf54deae', 'finish_reason': 'stop', 'logprobs': None}\n" + ] + } + ], + "source": [ + "code = \"\"\"\n", + "/// \n", + "/// Represents a user with a first name, last name, and username.\n", + "/// \n", + "public class User\n", + "{\n", + " /// \n", + " /// Gets or sets the user's first name.\n", + " /// \n", + " public string FirstName { get; set; }\n", + "\n", + " /// \n", + " /// Gets or sets the user's last name.\n", + " /// \n", + " public string LastName { get; set; }\n", + "\n", + " /// \n", + " /// Gets or sets the user's username.\n", + " /// \n", + " public string Username { get; set; }\n", + "}\n", + "\"\"\"\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-4o\")\n", + "query = (\n", + " \"Replace the Username property with an Email property. \"\n", + " \"Respond only with code, and with no markdown formatting.\"\n", + ")\n", + "response = llm.invoke(\n", + " [{\"role\": \"user\", \"content\": query}, {\"role\": \"user\", \"content\": code}],\n", + " prediction={\"type\": \"content\", \"content\": code},\n", + ")\n", + "print(response.content)\n", + "print(response.response_metadata)" + ] + }, + { + "cell_type": "markdown", + "id": "2ee1b26d-a388-4e7c-9f40-bfd1388ecc03", + "metadata": {}, + "source": [ + "Note that currently predictions are billed as additional tokens and may increase your usage and costs in exchange for this reduced latency." + ] + }, { "cell_type": "markdown", "id": "feb4a499", @@ -601,7 +696,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -615,7 +710,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/libs/partners/openai/poetry.lock b/libs/partners/openai/poetry.lock index a1d51f1d22efe..30fa377575298 100644 --- a/libs/partners/openai/poetry.lock +++ b/libs/partners/openai/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -518,7 +518,7 @@ url = "../../core" [[package]] name = "langchain-standard-tests" -version = "0.1.1" +version = "0.3.0" description = "Standard tests for LangChain implementations" optional = false python-versions = ">=3.9,<4.0" @@ -527,7 +527,7 @@ develop = true [package.dependencies] httpx = "^0.27.0" -langchain-core = "^0.3.0" +langchain-core = "^0.3.15" pytest = ">=7,<9" syrupy = "^4" @@ -667,13 +667,13 @@ files = [ [[package]] name = "openai" -version = "1.52.2" +version = "1.54.1" description = "The official Python library for the openai API" optional = false -python-versions = ">=3.7.1" +python-versions = ">=3.8" files = [ - {file = "openai-1.52.2-py3-none-any.whl", hash = "sha256:57e9e37bc407f39bb6ec3a27d7e8fb9728b2779936daa1fcf95df17d3edfaccc"}, - {file = "openai-1.52.2.tar.gz", hash = "sha256:87b7d0f69d85f5641678d414b7ee3082363647a5c66a462ed7f3ccb59582da0d"}, + {file = "openai-1.54.1-py3-none-any.whl", hash = "sha256:3cb49ccb6bfdc724ad01cc397d323ef8314fc7d45e19e9de2afdd6484a533324"}, + {file = "openai-1.54.1.tar.gz", hash = "sha256:5b832bf82002ba8c4f6e5e25c1c0f5d468c22f043711544c716eaffdb30dd6f1"}, ] [package.dependencies] @@ -1561,4 +1561,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "985f183aed7188a51964c30831939cae8060d906e8b07d4257e423016b6f3dd4" +content-hash = "77af861c052decd1c194936575c15491a606fd763556b427e2c659d5ea7aae72" diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml index 255e488375d43..661c2b0a98490 100644 --- a/libs/partners/openai/pyproject.toml +++ b/libs/partners/openai/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "langchain-openai" -version = "0.2.5" +version = "0.2.6" description = "An integration package connecting OpenAI and LangChain" authors = [] readme = "README.md" @@ -24,7 +24,7 @@ ignore_missing_imports = true [tool.poetry.dependencies] python = ">=3.9,<4.0" langchain-core = "^0.3.15" -openai = "^1.52.0" +openai = "^1.54.0" tiktoken = ">=0.7,<1" [tool.ruff.lint] diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index ccce5722c3c8e..31ff734355491 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -3,6 +3,7 @@ import base64 import json from pathlib import Path +from textwrap import dedent from typing import Any, AsyncIterator, List, Literal, Optional, cast import httpx @@ -1018,3 +1019,45 @@ def test_audio_input_modality() -> None: assert isinstance(output, AIMessage) assert "audio" in output.additional_kwargs + + +def test_prediction_tokens() -> None: + code = dedent(""" + /// + /// Represents a user with a first name, last name, and username. + /// + public class User + { + /// + /// Gets or sets the user's first name. + /// + public string FirstName { get; set; } + + /// + /// Gets or sets the user's last name. + /// + public string LastName { get; set; } + + /// + /// Gets or sets the user's username. + /// + public string Username { get; set; } + } + """) + + llm = ChatOpenAI(model="gpt-4o") + query = ( + "Replace the Username property with an Email property. " + "Respond only with code, and with no markdown formatting." + ) + response = llm.invoke( + [{"role": "user", "content": query}, {"role": "user", "content": code}], + prediction={"type": "content", "content": code}, + ) + assert isinstance(response, AIMessage) + assert response.response_metadata is not None + output_token_details = response.response_metadata["token_usage"][ + "completion_tokens_details" + ] + assert output_token_details["accepted_prediction_tokens"] > 0 + assert output_token_details["rejected_prediction_tokens"] > 0