Skip to content

Commit

Permalink
Merge pull request #642 from websentry-ai/vs/fix-anthropic-cache-tokens
Browse files Browse the repository at this point in the history
Fixes the cached token count for unbound provider models
  • Loading branch information
mrubens authored Jan 29, 2025
2 parents 117c4ab + d643359 commit 449b9ef
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 15 deletions.
50 changes: 40 additions & 10 deletions src/api/providers/__tests__/unbound.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { UnboundHandler } from "../unbound"
import { ApiHandlerOptions } from "../../../shared/api"
import OpenAI from "openai"
import { Anthropic } from "@anthropic-ai/sdk"

// Mock OpenAI client
Expand All @@ -16,6 +15,7 @@ jest.mock("openai", () => {
create: (...args: any[]) => {
const stream = {
[Symbol.asyncIterator]: async function* () {
// First chunk with content
yield {
choices: [
{
Expand All @@ -24,13 +24,25 @@ jest.mock("openai", () => {
},
],
}
// Second chunk with usage data
yield {
choices: [
{
delta: {},
index: 0,
},
],
choices: [{ delta: {}, index: 0 }],
usage: {
prompt_tokens: 10,
completion_tokens: 5,
total_tokens: 15,
},
}
// Third chunk with cache usage data
yield {
choices: [{ delta: {}, index: 0 }],
usage: {
prompt_tokens: 8,
completion_tokens: 4,
total_tokens: 12,
cache_creation_input_tokens: 3,
cache_read_input_tokens: 2,
},
}
},
}
Expand Down Expand Up @@ -95,19 +107,37 @@ describe("UnboundHandler", () => {
},
]

it("should handle streaming responses", async () => {
it("should handle streaming responses with text and usage data", async () => {
const stream = handler.createMessage(systemPrompt, messages)
const chunks: any[] = []
const chunks: Array<{ type: string } & Record<string, any>> = []
for await (const chunk of stream) {
chunks.push(chunk)
}

expect(chunks.length).toBe(1)
expect(chunks.length).toBe(3)

// Verify text chunk
expect(chunks[0]).toEqual({
type: "text",
text: "Test response",
})

// Verify regular usage data
expect(chunks[1]).toEqual({
type: "usage",
inputTokens: 10,
outputTokens: 5,
})

// Verify usage data with cache information
expect(chunks[2]).toEqual({
type: "usage",
inputTokens: 8,
outputTokens: 4,
cacheWriteTokens: 3,
cacheReadTokens: 2,
})

expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "claude-3-5-sonnet-20241022",
Expand Down
25 changes: 20 additions & 5 deletions src/api/providers/unbound.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@ import OpenAI from "openai"
import { ApiHandler, SingleCompletionHandler } from "../"
import { ApiHandlerOptions, ModelInfo, UnboundModelId, unboundDefaultModelId, unboundModels } from "../../shared/api"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream } from "../transform/stream"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"

interface UnboundUsage extends OpenAI.CompletionUsage {
cache_creation_input_tokens?: number
cache_read_input_tokens?: number
}

export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
private options: ApiHandlerOptions
Expand Down Expand Up @@ -96,7 +101,7 @@ export class UnboundHandler implements ApiHandler, SingleCompletionHandler {

for await (const chunk of completion) {
const delta = chunk.choices[0]?.delta
const usage = chunk.usage
const usage = chunk.usage as UnboundUsage

if (delta?.content) {
yield {
Expand All @@ -106,11 +111,21 @@ export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
}

if (usage) {
yield {
const usageData: ApiStreamUsageChunk = {
type: "usage",
inputTokens: usage?.prompt_tokens || 0,
outputTokens: usage?.completion_tokens || 0,
inputTokens: usage.prompt_tokens || 0,
outputTokens: usage.completion_tokens || 0,
}

// Only add cache tokens if they exist
if (usage.cache_creation_input_tokens) {
usageData.cacheWriteTokens = usage.cache_creation_input_tokens
}
if (usage.cache_read_input_tokens) {
usageData.cacheReadTokens = usage.cache_read_input_tokens
}

yield usageData
}
}
}
Expand Down

0 comments on commit 449b9ef

Please sign in to comment.