Cleanup on aisle METRICS. Note: See below, this is a breaking change

1. Fleshed out MetricsFrames and broke it into a proper set of types 2. Add model_name as a property to the AIService so that it can be automatically included in metrics and also remove that overhead from all the various services themselves Breaking change! Because of the types improvements, the MetricsFrame type has changed. Each frame will have a list of metrics simlilar to before except each item in the list will only contain one type of metric: "ttfb", "tokens", "characters", or "processing". Previously these fields would be in every entry but set to None if they didn't apply. While this changes internal handling of the MetricsFrame, it does NOT break the RTVI/daily messaging of metrics. That format remains the same. Also. Remember to use model_name for accessing a service's current model and set_model_name for setting it.
cyrilS-dev · Sep 20, 2024 · a4edb3d · a4edb3d
1 parent ed409d0
commit a4edb3d
Show file tree

Hide file tree

Showing 21 changed files with 190 additions and 98 deletions.
diff --git a/examples/foundational/06-listen-and-respond.py b/examples/foundational/06-listen-and-respond.py
@@ -10,6 +10,7 @@
 import sys
 
 from pipecat.frames.frames import Frame, LLMMessagesFrame, MetricsFrame
+from pipecat.metrics.metrics import TTFBMetricsData, ProcessingMetricsData, LLMUsageMetricsData, TTSUsageMetricsData
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -37,8 +38,19 @@
 class MetricsLogger(FrameProcessor):
     async def process_frame(self, frame: Frame, direction: FrameDirection):
         if isinstance(frame, MetricsFrame):
-            print(
-                f"!!! MetricsFrame: {frame}, ttfb: {frame.ttfb}, processing: {frame.processing}, tokens: {frame.tokens}, characters: {frame.characters}")
+            for d in frame.data:
+                if isinstance(d, TTFBMetricsData):
+                    print(f"!!! MetricsFrame: {frame}, ttfb: {d.value}")
+                elif isinstance(d, ProcessingMetricsData):
+                    print(f"!!! MetricsFrame: {frame}, processing: {d.value}")
+                elif isinstance(d, LLMUsageMetricsData):
+                    tokens = d.value
+                    print(
+                        f"!!! MetricsFrame: {frame}, tokens: {
+                            tokens.prompt_tokens}, characters: {
+                            tokens.completion_tokens}")
+                elif isinstance(d, TTSUsageMetricsData):
+                    print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
         await self.push_frame(frame, direction)
 
 

diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py
@@ -4,11 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
 
-from typing import Any, List, Mapping, Optional, Tuple
+from typing import Any, List, Optional, Tuple
 
 from dataclasses import dataclass, field
 
 from pipecat.clocks.base_clock import BaseClock
+from pipecat.metrics.metrics import MetricsData
 from pipecat.transcriptions.language import Language
 from pipecat.utils.time import nanoseconds_to_str
 from pipecat.utils.utils import obj_count, obj_id
@@ -333,10 +334,8 @@ class BotInterruptionFrame(SystemFrame):
 class MetricsFrame(SystemFrame):
     """Emitted by processor that can compute metrics like latencies.
     """
-    ttfb: List[Mapping[str, Any]] | None = None
-    processing: List[Mapping[str, Any]] | None = None
-    tokens: List[Mapping[str, Any]] | None = None
-    characters: List[Mapping[str, Any]] | None = None
+    data: List[MetricsData]
+
 
 #
 # Control frames

diff --git a/src/pipecat/metrics/__init__.py b/src/pipecat/metrics/__init__.py
diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py
@@ -0,0 +1,31 @@
+from typing import Optional
+from pydantic import BaseModel
+
+
+class MetricsData(BaseModel):
+    processor: str
+    model: Optional[str] = None
+
+
+class TTFBMetricsData(MetricsData):
+    value: float
+
+
+class ProcessingMetricsData(MetricsData):
+    value: float
+
+
+class LLMTokenUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+    cache_read_input_tokens: Optional[int] = None
+    cache_creation_input_tokens: Optional[int] = None
+
+
+class LLMUsageMetricsData(MetricsData):
+    value: LLMTokenUsage
+
+
+class TTSUsageMetricsData(MetricsData):
+    value: int
diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py
@@ -20,6 +20,7 @@
     MetricsFrame,
     StartFrame,
     StopTaskFrame)
+from pipecat.metrics.metrics import TTFBMetricsData, ProcessingMetricsData
 from pipecat.pipeline.base_pipeline import BasePipeline
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.utils.utils import obj_count, obj_id
@@ -118,9 +119,11 @@ async def queue_frames(self, frames: Iterable[Frame] | AsyncIterable[Frame]):
 
     def _initial_metrics_frame(self) -> MetricsFrame:
         processors = self._pipeline.processors_with_metrics()
-        ttfb = [{"processor": p.name, "value": 0.0} for p in processors]
-        processing = [{"processor": p.name, "value": 0.0} for p in processors]
-        return MetricsFrame(ttfb=ttfb, processing=processing)
+        data = []
+        for p in processors:
+            data.append(TTFBMetricsData(processor=p.name, value=0.0))
+            data.append(ProcessingMetricsData(processor=p.name, value=0.0))
+        return MetricsFrame(data=data)
 
     async def _process_down_queue(self):
         self._clock.start()

diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py
@@ -19,6 +19,13 @@
     StartInterruptionFrame,
     StopInterruptionFrame,
     SystemFrame)
+from pipecat.metrics.metrics import (
+    LLMTokenUsage,
+    LLMUsageMetricsData,
+    MetricsData,
+    ProcessingMetricsData,
+    TTFBMetricsData,
+    TTSUsageMetricsData)
 from pipecat.utils.utils import obj_count, obj_id
 
 from loguru import logger
@@ -31,11 +38,20 @@ class FrameDirection(Enum):
 
 class FrameProcessorMetrics:
     def __init__(self, name: str):
-        self._name = name
+        self._core_metrics_data = MetricsData(processor=name)
         self._start_ttfb_time = 0
         self._start_processing_time = 0
         self._should_report_ttfb = True
 
+    def _processor_name(self):
+        return self._core_metrics_data.processor
+
+    def _model_name(self):
+        return self._core_metrics_data.model
+
+    def set_core_metrics_data(self, data: MetricsData):
+        self._core_metrics_data = data
+
     async def start_ttfb_metrics(self, report_only_initial_ttfb):
         if self._should_report_ttfb:
             self._start_ttfb_time = time.time()
@@ -46,13 +62,13 @@ async def stop_ttfb_metrics(self):
             return None
 
         value = time.time() - self._start_ttfb_time
-        logger.debug(f"{self._name} TTFB: {value}")
-        ttfb = {
-            "processor": self._name,
-            "value": value
-        }
+        logger.debug(f"{self._processor_name()} TTFB: {value}")
+        ttfb = TTFBMetricsData(
+            processor=self._processor_name(),
+            value=value,
+            model=self._model_name())
         self._start_ttfb_time = 0
-        return MetricsFrame(ttfb=[ttfb])
+        return MetricsFrame(data=[ttfb])
 
     async def start_processing_metrics(self):
         self._start_processing_time = time.time()
@@ -62,26 +78,28 @@ async def stop_processing_metrics(self):
             return None
 
         value = time.time() - self._start_processing_time
-        logger.debug(f"{self._name} processing time: {value}")
-        processing = {
-            "processor": self._name,
-            "value": value
-        }
+        logger.debug(f"{self._processor_name()} processing time: {value}")
+        processing = ProcessingMetricsData(
+            processor=self._processor_name(), value=value, model=self._model_name())
         self._start_processing_time = 0
-        return MetricsFrame(processing=[processing])
+        return MetricsFrame(data=[processing])
 
-    async def start_llm_usage_metrics(self, tokens: dict):
+    async def start_llm_usage_metrics(self, tokens: LLMTokenUsage):
         logger.debug(
-            f"{self._name} prompt tokens: {tokens['prompt_tokens']}, completion tokens: {tokens['completion_tokens']}")
-        return MetricsFrame(tokens=[tokens])
+            f"{self._processor_name()} prompt tokens: {tokens.prompt_tokens}, completion tokens: {tokens.completion_tokens}")
+        value = LLMUsageMetricsData(
+            processor=self._processor_name(),
+            model=self._model_name(),
+            value=tokens)
+        return MetricsFrame(data=[value])
 
     async def start_tts_usage_metrics(self, text: str):
-        characters = {
-            "processor": self._name,
-            "value": len(text),
-        }
-        logger.debug(f"{self._name} usage characters: {characters['value']}")
-        return MetricsFrame(characters=[characters])
+        characters = TTSUsageMetricsData(
+            processor=self._processor_name(),
+            model=self._model_name(),
+            value=len(text))
+        logger.debug(f"{self._processor_name()} usage characters: {characters.value}")
+        return MetricsFrame(data=[characters])
 
 
 class FrameProcessor:
@@ -140,6 +158,9 @@ def report_only_initial_ttfb(self):
     def can_generate_metrics(self) -> bool:
         return False
 
+    def set_core_metrics_data(self, data: MetricsData):
+        self._metrics.set_core_metrics_data(data)
+
     async def start_ttfb_metrics(self):
         if self.can_generate_metrics() and self.metrics_enabled:
             await self._metrics.start_ttfb_metrics(self._report_only_initial_ttfb)
@@ -160,7 +181,7 @@ async def stop_processing_metrics(self):
             if frame:
                 await self.push_frame(frame)
 
-    async def start_llm_usage_metrics(self, tokens: dict):
+    async def start_llm_usage_metrics(self, tokens: LLMTokenUsage):
         if self.can_generate_metrics() and self.usage_metrics_enabled:
             frame = await self._metrics.start_llm_usage_metrics(tokens)
             if frame:

diff --git a/src/pipecat/services/ai_services.py b/src/pipecat/services/ai_services.py
@@ -32,6 +32,7 @@
     UserImageRequestFrame,
     VisionImageRawFrame
 )
+from pipecat.metrics.metrics import MetricsData
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.transcriptions.language import Language
 from pipecat.utils.audio import calculate_audio_volume
@@ -46,6 +47,15 @@
 class AIService(FrameProcessor):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        self._model_name: str = ""
+
+    @property
+    def model_name(self) -> str:
+        return self._model_name
+
+    def set_model_name(self, model: str):
+        self._model_name = model
+        self.set_core_metrics_data(MetricsData(processor=self.name, model=self._model_name))
 
     async def start(self, frame: StartFrame):
         pass
@@ -158,7 +168,7 @@ def sample_rate(self) -> int:
 
     @abstractmethod
     async def set_model(self, model: str):
-        pass
+        self.set_model_name(model)
 
     @abstractmethod
     async def set_voice(self, voice: str):
@@ -367,7 +377,7 @@ def __init__(self, **kwargs):
 
     @abstractmethod
     async def set_model(self, model: str):
-        pass
+        self.set_model_name(model)
 
     @abstractmethod
     async def set_language(self, language: Language):

diff --git a/src/pipecat/services/anthropic.py b/src/pipecat/services/anthropic.py
@@ -29,6 +29,7 @@
     FunctionCallInProgressFrame,
     StartInterruptionFrame
 )
+from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_services import LLMService
 from pipecat.processors.aggregators.openai_llm_context import (
@@ -84,7 +85,7 @@ def __init__(
             **kwargs):
         super().__init__(**kwargs)
         self._client = AsyncAnthropic(api_key=api_key)
-        self._model = model
+        self.set_model_name(model)
         self._max_tokens = max_tokens
         self._enable_prompt_caching_beta = enable_prompt_caching_beta
 
@@ -137,7 +138,7 @@ async def _process_context(self, context: OpenAILLMContext):
                 tools=context.tools or [],
                 system=context.system,
                 messages=messages,
-                model=self._model,
+                model=self.model_name,
                 max_tokens=self._max_tokens,
                 stream=True)
 
@@ -231,7 +232,7 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
             context = AnthropicLLMContext.from_image_frame(frame)
         elif isinstance(frame, LLMModelUpdateFrame):
             logger.debug(f"Switching LLM model to: [{frame.model}]")
-            self._model = frame.model
+            self.set_model_name(frame.model)
         elif isinstance(frame, LLMEnablePromptCachingFrame):
             logger.debug(f"Setting enable prompt caching to: [{frame.enable}]")
             self._enable_prompt_caching_beta = frame.enable
@@ -251,15 +252,13 @@ async def _report_usage_metrics(
             cache_creation_input_tokens: int,
             cache_read_input_tokens: int):
         if prompt_tokens or completion_tokens or cache_creation_input_tokens or cache_read_input_tokens:
-            tokens = {
-                "processor": self.name,
-                "model": self._model,
-                "prompt_tokens": prompt_tokens,
-                "completion_tokens": completion_tokens,
-                "cache_creation_input_tokens": cache_creation_input_tokens,
-                "cache_read_input_tokens": cache_read_input_tokens,
-                "total_tokens": prompt_tokens + completion_tokens
-            }
+            tokens = LLMTokenUsage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                cache_creation_input_tokens=cache_creation_input_tokens,
+                cache_read_input_tokens=cache_read_input_tokens,
+                total_tokens=prompt_tokens + completion_tokens
+            )
             await self.start_llm_usage_metrics(tokens)
 
 

diff --git a/src/pipecat/services/azure.py b/src/pipecat/services/azure.py
@@ -22,6 +22,8 @@
     TTSStoppedFrame,
     TranscriptionFrame,
     URLImageRawFrame)
+from pipecat.metrics.metrics import TTSUsageMetricsData
+from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_services import STTService, TTSService, ImageGenService
 from pipecat.services.openai import BaseOpenAILLMService
 from pipecat.utils.time import time_now_iso8601
@@ -190,7 +192,7 @@ def __init__(
         self._api_key = api_key
         self._azure_endpoint = endpoint
         self._api_version = api_version
-        self._model = model
+        self.set_model_name(model)
         self._image_size = image_size
         self._aiohttp_session = aiohttp_session