-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for reasoning in the UI #4559
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -324,7 +324,22 @@ export type ImageMessagePart = { | |
imageUrl: { url: string }; | ||
}; | ||
|
||
export type MessagePart = TextMessagePart | ImageMessagePart; | ||
export type ThinkingMessagePart = { | ||
type: "thinking"; | ||
thinking: string; | ||
signature: string; | ||
}; | ||
|
||
export type RedactedThinkingMessagePart = { | ||
type: "redacted_thinking"; | ||
data: string; | ||
}; | ||
|
||
export type MessagePart = | ||
| TextMessagePart | ||
| ImageMessagePart | ||
| ThinkingMessagePart | ||
| RedactedThinkingMessagePart; | ||
|
||
export type MessageContent = string | MessagePart[]; | ||
|
||
|
@@ -360,6 +375,7 @@ export interface UserChatMessage { | |
export interface AssistantChatMessage { | ||
role: "assistant"; | ||
content: MessageContent; | ||
reasoning_content?: string; | ||
toolCalls?: ToolCallDelta[]; | ||
} | ||
|
||
|
@@ -921,11 +937,17 @@ export interface BaseCompletionOptions { | |
prediction?: Prediction; | ||
tools?: Tool[]; | ||
toolChoice?: ToolChoice; | ||
thinking?: { | ||
type: "enabled" | "disabled"; | ||
budget_tokens?: number; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this supposed to be the budget for the number of tokens to allocate to thinking? If so I think a more declarative |
||
}; | ||
reasoning_effort?: "high" | "medium" | "low"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we make this into an enum instead? Bit of a nitpick but will make any future refactors easier. |
||
} | ||
|
||
export interface ModelCapability { | ||
uploadImage?: boolean; | ||
tools?: boolean; | ||
thinking?: boolean; | ||
} | ||
|
||
export interface ModelDescription { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -174,7 +174,8 @@ function autodetectTemplateType(model: string): TemplateType | undefined { | |
lower.includes("pplx") || | ||
lower.includes("gemini") || | ||
lower.includes("grok") || | ||
lower.includes("moonshot") | ||
lower.includes("moonshot") || | ||
lower.includes("deepseek-reasoner") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to avoid deepseek-reasoner using _streamComplete in |
||
) { | ||
return undefined; | ||
} | ||
|
@@ -373,11 +374,45 @@ function autodetectPromptTemplates( | |
return templates; | ||
} | ||
|
||
const PROVIDER_SUPPORTS_THINKING: string[] = ["anthropic", "openai", "deepseek"]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we pull this logic out into a separate file, e.g. |
||
|
||
const MODEL_SUPPORTS_THINKING: string[] = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think support for other proxy providers like OpenRouter could be added as well. I haven't looked into it. |
||
"claude-3-7-sonnet-20250219", | ||
"claude-3-7-sonnet-latest", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This didn't work for me despite using https://hub.continue.dev/anthropic/claude-3-7-sonnet?view=config which has |
||
"o3-mini", | ||
"o3-mini-2025-01-31", | ||
"o1", | ||
"o1-2024-12-17", | ||
"deepseek-reasoner", | ||
]; | ||
|
||
function modelSupportsThinking( | ||
provider: string, | ||
model: string, | ||
title: string | undefined, | ||
capabilities: ModelCapability | undefined, | ||
): boolean { | ||
if (capabilities?.thinking !== undefined) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if the capabilities: thinking is necessary. Thinking support needs to be hardcoded in some places anyway, so I don't know if there's a reasonable way to try to force it to be enabled. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I think your intuition here is accurate. It's not quite as simple as image uploads. If you're reasonably confident, I would be onboard to remove it. |
||
return capabilities.thinking; | ||
} | ||
|
||
if (!PROVIDER_SUPPORTS_THINKING.includes(provider)) { | ||
return false; | ||
} | ||
|
||
const lower = model.toLowerCase(); | ||
return MODEL_SUPPORTS_THINKING.some( | ||
(modelName) => lower.includes(modelName) || title?.includes(modelName), | ||
); | ||
} | ||
|
||
export { | ||
autodetectPromptTemplates, | ||
autodetectTemplateFunction, | ||
autodetectTemplateType, | ||
llmCanGenerateInParallel, | ||
modelSupportsImages, | ||
modelSupportsTools, | ||
modelSupportsThinking, | ||
modelSupportsTools | ||
}; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,8 +90,15 @@ async function countTokensAsync( | |
const promises = content.map(async (part) => { | ||
if (part.type === "imageUrl") { | ||
return countImageTokens(part); | ||
} else if (part.type === "thinking") { | ||
return (await encoding.encode(part.thinking ?? "")).length; | ||
} else if (part.type === "redacted_thinking") { | ||
// For redacted thinking, don't count any tokens | ||
return 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "All extended thinking tokens (including redacted thinking tokens) are billed as output tokens and count toward your rate limits." But they would have to be counted from the API's response:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little confused here - mind linking to the docs you're referencing? |
||
} else if (part.type === "text") { | ||
return (await encoding.encode(part.text ?? "")).length; | ||
} | ||
return (await encoding.encode(part.text ?? "")).length; | ||
return 0; | ||
}); | ||
return (await Promise.all(promises)).reduce((sum, val) => sum + val, 0); | ||
} | ||
|
@@ -106,12 +113,17 @@ function countTokens( | |
const encoding = encodingForModel(modelName); | ||
if (Array.isArray(content)) { | ||
return content.reduce((acc, part) => { | ||
return ( | ||
acc + | ||
(part.type === "text" | ||
? encoding.encode(part.text ?? "", "all", []).length | ||
: countImageTokens(part)) | ||
); | ||
if (part.type === "text") { | ||
return acc + encoding.encode(part.text ?? "", "all", []).length; | ||
} else if (part.type === "imageUrl") { | ||
return acc + countImageTokens(part); | ||
} else if (part.type === "thinking") { | ||
return acc + encoding.encode(part.thinking ?? "", "all", []).length; | ||
} else if (part.type === "redacted_thinking") { | ||
// For redacted thinking, don't count any tokens | ||
return acc; | ||
} | ||
return acc; | ||
}, 0); | ||
} else { | ||
return encoding.encode(content ?? "", "all", []).length; | ||
|
@@ -469,5 +481,6 @@ export { | |
pruneLinesFromTop, | ||
pruneRawPromptFromTop, | ||
pruneStringFromBottom, | ||
pruneStringFromTop, | ||
pruneStringFromTop | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After going through this PR I realized that these properties are
snake_case
to match what the Anthropic API is expecting - however, I'd still prefer to keep themcamelCase
in our TS interfaces, both here and elsewhere in the PR.