diff --git a/kong/llm/plugin/shared-filters/normalize-json-response.lua b/kong/llm/plugin/shared-filters/normalize-json-response.lua index 1e0988f52495..633602a4c178 100644 --- a/kong/llm/plugin/shared-filters/normalize-json-response.lua +++ b/kong/llm/plugin/shared-filters/normalize-json-response.lua @@ -57,6 +57,24 @@ local function transform_body(conf) end set_global_ctx("response_body", response_body) -- to be sent out later or consumed by other plugins + + -- update the usage metrics, if the original request is not openai compatible format + -- TODO: to de-duplicate that from parse-json-response + local t, err + if response_body then + t, err = cjson.decode(response_body) + if err then + kong.log.warn("failed to decode response body for usage introspection: ", err) + end + + if t and t.usage and t.usage.prompt_tokens then + ai_plugin_o11y.metrics_set("llm_prompt_tokens_count", t.usage.prompt_tokens) + end + + if t and t.usage and t.usage.completion_tokens then + ai_plugin_o11y.metrics_set("llm_completion_tokens_count", t.usage.completion_tokens) + end + end end function _M:run(conf)