langchain-ai · nfcampos · Oct 14, 2024 · Oct 7, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -1,4 +1,4 @@
 """Schemas for the LangSmith API."""

 from __future__ import annotations

@@ -17,7 +17,7 @@
 )
 from uuid import UUID
 
-from typing_extensions import TypedDict
+from typing_extensions import NotRequired, TypedDict
 
 try:
     from pydantic.v1 import (  # type: ignore[import]
@@ -891,3 +891,64 @@
     """Last updated time."""
     num_likes = "num_likes"
     """Number of likes."""
+
+
+class InputTokenDetails(TypedDict, total=False):
+    """Breakdown of input token counts.
+
+    Does *not* need to sum to full input token count. Does *not* need to have all keys.
+    """
+
+    audio: int
+    """Audio input tokens."""
+    cache_creation: int
+    """Input tokens that were cached and there was a cache miss.
+
+    Since there was a cache miss, the cache was created from these tokens.
+    """
+    cache_read: int
+    """Input tokens that were cached and there was a cache hit.
+
+    Since there was a cache hit, the tokens were read from the cache. More precisely,
+    the model state given these tokens was read from the cache.
+    """
+
+
+class OutputTokenDetails(TypedDict, total=False):
+    """Breakdown of output token counts.
+
+    Does *not* need to sum to full output token count. Does *not* need to have all keys.
+    """
+
+    audio: int
+    """Audio output tokens."""
+    reasoning: int
+    """Reasoning output tokens.
+
+    Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
+    models) that are not returned as part of model output.
+    """
+
+
+class UsageMetadata(TypedDict):
+    """Usage metadata for a message, such as token counts.
+
+    This is a standard representation of token usage that is consistent across models.
+    """
+
+    input_tokens: int
+    """Count of input (or prompt) tokens. Sum of all input token types."""
+    output_tokens: int
+    """Count of output (or completion) tokens. Sum of all output token types."""
+    total_tokens: int
+    """Total token count. Sum of input_tokens + output_tokens."""
+    input_token_details: NotRequired[InputTokenDetails]
+    """Breakdown of input token counts.
+
+    Does *not* need to sum to full input token count. Does *not* need to have all keys.
+    """
+    output_token_details: NotRequired[OutputTokenDetails]
+    """Breakdown of output token counts.
+
+    Does *not* need to sum to full output token count. Does *not* need to have all keys.
+    """
diff --git a/python/langsmith/wrappers/_openai.py b/python/langsmith/wrappers/_openai.py
@@ -21,6 +21,7 @@
 
 from langsmith import client as ls_client
 from langsmith import run_helpers
+from langsmith.schemas import InputTokenDetails, OutputTokenDetails, UsageMetadata
 
 if TYPE_CHECKING:
     from openai import AsyncOpenAI, OpenAI
@@ -141,6 +142,12 @@ def _reduce_chat(all_chunks: List[ChatCompletionChunk]) -> dict:
         ]
     else:
         d = {"choices": [{"message": {"role": "assistant", "content": ""}}]}
+    # streamed outputs don't go through `process_outputs`
+    # so we need to flatten metadata here
+    oai_token_usage = d.pop("usage")
+    d["usage_metadata"] = (
+        _create_usage_metadata(oai_token_usage) if oai_token_usage else None
+    )
     return d
 
 
@@ -160,12 +167,59 @@ def _reduce_completions(all_chunks: List[Completion]) -> dict:
     return d
 
 
+def _create_usage_metadata(oai_token_usage: dict) -> UsageMetadata:
+    input_tokens = oai_token_usage.get("prompt_tokens", 0)
+    output_tokens = oai_token_usage.get("completion_tokens", 0)
+    total_tokens = oai_token_usage.get("total_tokens", input_tokens + output_tokens)
+    input_token_details: dict = {
+        "audio": (oai_token_usage.get("prompt_tokens_details") or {}).get(
+            "audio_tokens"
+        ),
+        "cache_read": (oai_token_usage.get("prompt_tokens_details") or {}).get(
+            "cached_tokens"
+        ),
+    }
+    output_token_details: dict = {
+        "audio": (oai_token_usage.get("completion_tokens_details") or {}).get(
+            "audio_tokens"
+        ),
+        "reasoning": (oai_token_usage.get("completion_tokens_details") or {}).get(
+            "reasoning_tokens"
+        ),
+    }
+    return UsageMetadata(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        total_tokens=total_tokens,
+        input_token_details=InputTokenDetails(
+            **{k: v for k, v in input_token_details.items() if v is not None}
+        ),
+        output_token_details=OutputTokenDetails(
+            **{k: v for k, v in output_token_details.items() if v is not None}
+        ),
+    )
+
+
+def _process_chat_completion(outputs: Any):
+    try:
+        rdict = outputs.model_dump()
+        oai_token_usage = rdict.pop("usage")
+        rdict["usage_metadata"] = (
+            _create_usage_metadata(oai_token_usage) if oai_token_usage else None
+        )
+        return rdict
+    except BaseException as e:
+        logger.debug(f"Error processing chat completion: {e}")
+        return {"output": outputs}
+
+
 def _get_wrapper(
     original_create: Callable,
     name: str,
     reduce_fn: Callable,
     tracing_extra: Optional[TracingExtra] = None,
     invocation_params_fn: Optional[Callable] = None,
+    process_outputs: Optional[Callable] = None,
 ) -> Callable:
     textra = tracing_extra or {}
 
@@ -177,6 +231,7 @@ def create(*args, stream: bool = False, **kwargs):
             reduce_fn=reduce_fn if stream else None,
             process_inputs=_strip_not_given,
             _invocation_params_fn=invocation_params_fn,
+            process_outputs=process_outputs,
             **textra,
         )
 
@@ -191,6 +246,7 @@ async def acreate(*args, stream: bool = False, **kwargs):
             reduce_fn=reduce_fn if stream else None,
             process_inputs=_strip_not_given,
             _invocation_params_fn=invocation_params_fn,
+            process_outputs=process_outputs,
             **textra,
         )
         return await decorator(original_create)(*args, stream=stream, **kwargs)
@@ -232,6 +288,7 @@ def wrap_openai(
         _reduce_chat,
         tracing_extra=tracing_extra,
         invocation_params_fn=functools.partial(_infer_invocation_params, "chat"),
+        process_outputs=_process_chat_completion,
     )
     client.completions.create = _get_wrapper(  # type: ignore[method-assign]
         client.completions.create,

diff --git a/python/tests/integration_tests/test_data/wrap_openai_chat_async_complex_usage.json b/python/tests/integration_tests/test_data/wrap_openai_chat_async_complex_usage.json
@@ -0,0 +1,124 @@
+[
+  {
+    "post": [
+      {
+        "id": "e0fd4e71-878f-42b7-8c1d-90d6bb7bfb8d",
+        "start_time": "2024-10-08T05:02:29.036723+00:00",
+        "extra": {
+          "metadata": {
+            "ls_method": "traceable",
+            "ls_provider": "openai",
+            "ls_model_type": "chat",
+            "ls_model_name": "o1-mini",
+            "revision_id": "v0.1.82-377-g07cb5b9-dirty"
+          },
+          "runtime": {
+            "sdk": "langsmith-py",
+            "sdk_version": "0.1.131",
+            "library": "langsmith",
+            "platform": "macOS-13.2-arm64-arm-64bit",
+            "runtime": "python",
+            "py_implementation": "CPython",
+            "runtime_version": "3.11.7",
+            "langchain_version": "0.2.9",
+            "langchain_core_version": "0.2.21"
+          }
+        },
+        "serialized": {
+          "name": "ChatOpenAI",
+          "signature": "(*, messages: 'Iterable[ChatCompletionMessageParam]', model: 'Union[str, ChatModel]', frequency_penalty: 'Optional[float] | NotGiven' = NOT_GIVEN, function_call: 'completion_create_params.FunctionCall | NotGiven' = NOT_GIVEN, functions: 'Iterable[completion_create_params.Function] | NotGiven' = NOT_GIVEN, logit_bias: 'Optional[Dict[str, int]] | NotGiven' = NOT_GIVEN, logprobs: 'Optional[bool] | NotGiven' = NOT_GIVEN, max_completion_tokens: 'Optional[int] | NotGiven' = NOT_GIVEN, max_tokens: 'Optional[int] | NotGiven' = NOT_GIVEN, n: 'Optional[int] | NotGiven' = NOT_GIVEN, parallel_tool_calls: 'bool | NotGiven' = NOT_GIVEN, presence_penalty: 'Optional[float] | NotGiven' = NOT_GIVEN, response_format: 'completion_create_params.ResponseFormat | NotGiven' = NOT_GIVEN, seed: 'Optional[int] | NotGiven' = NOT_GIVEN, service_tier: \"Optional[Literal['auto', 'default']] | NotGiven\" = NOT_GIVEN, stop: 'Union[Optional[str], List[str]] | NotGiven' = NOT_GIVEN, stream: 'Optional[Literal[False]] | Literal[True] | NotGiven' = NOT_GIVEN, stream_options: 'Optional[ChatCompletionStreamOptionsParam] | NotGiven' = NOT_GIVEN, temperature: 'Optional[float] | NotGiven' = NOT_GIVEN, tool_choice: 'ChatCompletionToolChoiceOptionParam | NotGiven' = NOT_GIVEN, tools: 'Iterable[ChatCompletionToolParam] | NotGiven' = NOT_GIVEN, top_logprobs: 'Optional[int] | NotGiven' = NOT_GIVEN, top_p: 'Optional[float] | NotGiven' = NOT_GIVEN, user: 'str | NotGiven' = NOT_GIVEN, extra_headers: 'Headers | None' = None, extra_query: 'Query | None' = None, extra_body: 'Body | None' = None, timeout: 'float | httpx.Timeout | None | NotGiven' = NOT_GIVEN) -> 'ChatCompletion | AsyncStream[ChatCompletionChunk]'",
+          "doc": null
+        },
+        "events": [],
+        "tags": [],
+        "attachments": {},
+        "dotted_order": "20241008T050229036723Ze0fd4e71-878f-42b7-8c1d-90d6bb7bfb8d",
+        "trace_id": "e0fd4e71-878f-42b7-8c1d-90d6bb7bfb8d",
+        "outputs": {},
+        "session_name": "default",
+        "name": "ChatOpenAI",
+        "inputs": {
+          "messages": [
+            {
+              "role": "user",
+              "content": "Write a bash script that takes a matrix represented as a string with format '[1,2],[3,4],[5,6]' and prints the transpose in the same format."
+            }
+          ],
+          "model": "o1-mini",
+          "stream": false,
+          "extra_headers": null,
+          "extra_query": null,
+          "extra_body": null
+        },
+        "run_type": "llm"
+      }
+    ]
+  },
+  {
+    "patch": [
+      {
+        "id": "e0fd4e71-878f-42b7-8c1d-90d6bb7bfb8d",
+        "name": "ChatOpenAI",
+        "trace_id": "e0fd4e71-878f-42b7-8c1d-90d6bb7bfb8d",
+        "parent_run_id": null,
+        "dotted_order": "20241008T050229036723Ze0fd4e71-878f-42b7-8c1d-90d6bb7bfb8d",
+        "tags": [],
+        "extra": {
+          "metadata": {
+            "ls_method": "traceable",
+            "ls_provider": "openai",
+            "ls_model_type": "chat",
+            "ls_model_name": "o1-mini",
+            "revision_id": "v0.1.82-377-g07cb5b9-dirty"
+          },
+          "runtime": {
+            "sdk": "langsmith-py",
+            "sdk_version": "0.1.131",
+            "library": "langsmith",
+            "platform": "macOS-13.2-arm64-arm-64bit",
+            "runtime": "python",
+            "py_implementation": "CPython",
+            "runtime_version": "3.11.7",
+            "langchain_version": "0.2.9",
+            "langchain_core_version": "0.2.21"
+          }
+        },
+        "end_time": "2024-10-08T05:02:43.925875+00:00",
+        "outputs": {
+          "id": "chatcmpl-AFwebVAjn9wnURg0lhlbSCs7aSZ4r",
+          "choices": [
+            {
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null,
+              "message": {
+                "content": "Here's a bash script that takes a matrix represented as a string in the format `'[1,2],[3,4],[5,6]'` and prints its transpose in the same format. This script uses `awk` to handle the parsing and transposing of the matrix.\n\n```bash\n#!/bin/bash\n\n# Check if exactly one argument is provided\nif [ \"$#\" -ne 1 ]; then\n    echo \"Usage: $0 '[a,b,c],[d,e,f],[g,h,i]'\"\n    exit 1\nfi\n\ninput=\"$1\"\n\n# Use awk to parse and transpose the matrix\necho \"$input\" | awk '\nBEGIN{\n    # Define the field separator to split numbers, ignoring brackets and commas\n    FS=\"[\\\\[\\\\],]+\"\n}\n{\n    # Iterate over each field in the input\n    for(i=1; i<=NF; i++) {\n        if($i != \"\"){\n            matrix[NR, i] = $i\n            # Keep track of the maximum number of columns\n            if(i > max_cols){\n                max_cols = i\n            }\n        }\n    }\n    # Keep track of the total number of rows\n    if(NR > total_rows){\n        total_rows = NR\n    }\n}\nEND{\n    # Iterate over each column to create the transposed rows\n    for(col=1; col<=max_cols; col++) {\n        printf \"[\"\n        for(row=1; row<=total_rows; row++) {\n            printf \"%s\", matrix[row, col]\n            if(row < total_rows){\n                printf \",\"\n            }\n        }\n        printf \"]\"\n        # Add a comma between transposed rows, except after the last one\n        if(col < max_cols){\n            printf \",\"\n        }\n    }\n    printf \"\\n\"\n}\n'\n```\n\n### How It Works\n\n1. **Input Validation**:\n    - The script first checks if exactly one argument is provided. If not, it displays usage instructions and exits.\n\n2. **Parsing the Input**:\n    - The input string (e.g., `'[1,2],[3,4],[5,6]'`) is piped to `awk`.\n    - The `FS` (Field Separator) is set to split the input based on `[`, `]`, and `,` characters, effectively extracting the numbers.\n\n3. **Storing the Matrix**:\n    - The script stores each number in a two-dimensional array `matrix[row, column]`.\n    - It keeps track of the maximum number of columns and the total number of rows to handle non-square matrices.\n\n4. **Transposing the Matrix**:\n    - In the `END` block, the script iterates over each column of the original matrix and prints them as rows of the transposed matrix.\n    - The output is formatted to match the original input style, enclosing each transposed row in square brackets and separating them with commas.\n\n### Example Usage\n\n```bash\n./transpose_matrix.sh '[1,2],[3,4],[5,6]'\n```\n\n**Output:**\n```\n[1,3,5],[2,4,6]\n```\n\n### Making the Script Executable\n\n1. **Save the Script**:\n    - Save the script to a file, for example, `transpose_matrix.sh`.\n\n2. **Make It Executable**:\n    ```bash\n    chmod +x transpose_matrix.sh\n    ```\n\n3. **Run the Script**:\n    ```bash\n    ./transpose_matrix.sh '[1,2],[3,4],[5,6]'\n    ```\n\n### Handling Different Matrix Sizes\n\nThe script is designed to handle non-square matrices as well. For example:\n\n```bash\n./transpose_matrix.sh '[1,2,3],[4,5,6]'\n```\n\n**Output:**\n```\n[1,4],[2,5],[3,6]\n```\n\n### Notes\n\n- The script assumes that the input matrix is well-formed, with each row enclosed in square brackets and numbers separated by commas.\n- It supports matrices with varying numbers of rows and columns.\n- Ensure that you have `awk` installed on your system, which is typically available by default on most Unix-like systems.",
+                "refusal": null,
+                "role": "assistant",
+                "function_call": null,
+                "tool_calls": null
+              }
+            }
+          ],
+          "created": 1728363749,
+          "model": "o1-mini-2024-09-12",
+          "object": "chat.completion",
+          "service_tier": null,
+          "system_fingerprint": "fp_f7eab99a33",
+          "usage_metadata": {
+            "input_tokens": 43,
+            "output_tokens": 2605,
+            "total_tokens": 2648,
+            "input_token_details": {
+              "cache_read": 0
+            },
+            "output_token_details": {
+              "reasoning": 1728
+            }
+          }
+        },
+        "events": []
+      }
+    ]
+  }
+]