Support models with non-standard finish_reason (#2229)

Some model launchers (TGI, sglang) may return non-standard `finish_reason` values, such as `"eos_token"` or `""`. This commit removes the strict parsing of `finish_reason` so that the OpenAI-compatible endpoint does not fail for these model launchers.
dstackai · Jan 27, 2025 · 3918689 · 3918689
1 parent b30d047
commit 3918689
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 6 deletions.
diff --git a/src/dstack/_internal/proxy/lib/schemas/model_proxy.py b/src/dstack/_internal/proxy/lib/schemas/model_proxy.py
@@ -2,8 +2,6 @@
 
 from dstack._internal.core.models.common import CoreModel
 
-FinishReason = Literal["stop", "length", "tool_calls", "eos_token"]
-
 
 class ChatMessage(CoreModel):
     role: str  # TODO(egor-s) types
@@ -30,15 +28,15 @@ class ChatCompletionsRequest(CoreModel):
 
 
 class ChatCompletionsChoice(CoreModel):
-    finish_reason: FinishReason
+    finish_reason: str
     index: int
     message: ChatMessage
 
 
 class ChatCompletionsChunkChoice(CoreModel):
     delta: object
     logprobs: object = {}
-    finish_reason: Optional[FinishReason]
+    finish_reason: Optional[str]
     index: int
 
 

diff --git a/src/dstack/_internal/proxy/lib/services/model_proxy/clients/tgi.py b/src/dstack/_internal/proxy/lib/services/model_proxy/clients/tgi.py
@@ -17,7 +17,6 @@
     ChatCompletionsResponse,
     ChatCompletionsUsage,
     ChatMessage,
-    FinishReason,
 )
 from dstack._internal.proxy.lib.services.model_proxy.clients.base import ChatCompletionsClient
 
@@ -180,7 +179,7 @@ def get_payload(self, request: ChatCompletionsRequest) -> Dict:
         }
 
     @staticmethod
-    def finish_reason(reason: str) -> FinishReason:
+    def finish_reason(reason: str) -> str:
         if reason == "stop_sequence" or reason == "eos_token":
             return "stop"
         if reason == "length":