diff --git a/README.md b/README.md index 9f83dfb..4f6dd34 100644 --- a/README.md +++ b/README.md @@ -205,6 +205,19 @@ There are a few similar properties which control budget allocation you mind find It's important to note that all of the `flex*` properties allow for cooperative use of the token budget for a prompt, but have no effect on the prioritization and pruning logic undertaken once all elements are rendered. +#### Debugging Budgeting + +You can set a `tracer` property on the `PromptElement` to debug how your elements are rendered and how this library allocates your budget. We include a basic `HTMLTracer` you can use: + +```js +const renderer = new PromptRenderer(/* ... */); +const tracer = new HTMLTracer(); +renderer.tracer = tracer; +renderer.render(/* ... */); + +fs.writeFile('debug.html', tracer.toHTML()); +``` + ### Usage in Tools Visual Studio Code's API supports language models tools, sometimes called 'functions'. The tools API allows tools to return multiple content types of data to its consumers, and this library supports both returning rich prompt elements to tool callers, as well as using rich content returned from tools. @@ -228,7 +241,7 @@ async function doToolInvocation(options: LanguageModelToolInvocationOptions): vs } ``` -### As a Consumer +#### As a Consumer You may invoke the `vscode.lm.invokeTool` API however you see fit. If you know your token budget in advance, you should pass it to the tool when you call `invokeTool` via the `tokenOptions` option. You can then render the result using the `` helper element, for example: diff --git a/src/base/htmlTracer.ts b/src/base/htmlTracer.ts new file mode 100644 index 0000000..1462494 --- /dev/null +++ b/src/base/htmlTracer.ts @@ -0,0 +1,82 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation and GitHub. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +import { ITracer } from './tracer'; + +/** + * Handler that can trace rendering internals into an HTML summary. + */ +export class HTMLTracer implements ITracer { + private readonly entities: string[] = []; + private value = ''; + + private elementStack: { hadChildren: boolean }[] = []; + + public startRenderPass(): void { + const stackElem = this.elementStack[this.elementStack.length - 1]; + if (stackElem && !stackElem.hadChildren) { + stackElem.hadChildren = true; + this.value += `
Children`; + } + + this.value += `
`; + } + public startRenderFlex(group: number, reserved: number, remainingTokenBudget: number): void { + this.value += `

flexGrow=${group}

${reserved} tokens reserved, ${remainingTokenBudget} tokens to split between children

`; + } + public didRenderElement(name: string, literals: string[]): void { + this.value += `

${this.entity(`<${name} />`)}

`; + if (literals.length) { + this.value += `
    ${literals.map(l => this.entity(l.replace(/\n/g, '\\n'), 'li')).join('')}
`; + } + this.elementStack.push({ hadChildren: false }); + } + public didRenderChildren(tokensConsumed: number): void { + if (this.elementStack.pop()!.hadChildren) { + this.value += `
`; + } + if (tokensConsumed) { + this.value += `

${tokensConsumed} tokens consumed by children

`; + } + } + public endRenderFlex(): void { + this.value += ''; + } + public endRenderPass(): void { + this.value += ''; + } + + public toHTML() { + return this.value + + `` + + ``; + } + + private entity(s: string, tag = 'span') { + this.entities.push(s); + return `<${tag} class="entity-${this.entities.length - 1}">`; + } +} + +const style = `body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', system-ui, 'Ubuntu', 'Droid Sans', sans-serif; +} + +.render-pass { + padding: 4px; + border-left: 2px solid #ccc; + + &:hover { + border-left-color: #000; + } +} + +.literals li { + white-space: pre; + font-family: monospace; +} + +.render-flex, .render-element { + padding-left: 10px; +}`; diff --git a/src/base/index.ts b/src/base/index.ts index 3f93954..aa8c76d 100644 --- a/src/base/index.ts +++ b/src/base/index.ts @@ -11,10 +11,12 @@ import { AnyTokenizer, ITokenizer } from './tokenizer/tokenizer'; import { BasePromptElementProps, IChatEndpointInfo, PromptElementCtor } from './types'; import { ChatDocumentContext, LanguageModelChatMessage } from './vscodeTypes.d'; +export * from './htmlTracer'; export * as JSONTree from './jsonTypes'; export { AssistantChatMessage, ChatMessage, ChatRole, FunctionChatMessage, SystemChatMessage, ToolChatMessage, UserChatMessage } from './openai'; export * from './results'; export { ITokenizer } from './tokenizer/tokenizer'; +export * from './tracer'; export * from './tsx-globals'; export * from './types'; diff --git a/src/base/promptRenderer.ts b/src/base/promptRenderer.ts index 11bddf7..1440573 100644 --- a/src/base/promptRenderer.ts +++ b/src/base/promptRenderer.ts @@ -10,6 +10,7 @@ import { PromptElement } from "./promptElement"; import { AssistantMessage, BaseChatMessage, ChatMessagePromptElement, TextChunk, ToolMessage, isChatMessagePromptElement } from "./promptElements"; import { PromptMetadata, PromptReference } from "./results"; import { ITokenizer } from "./tokenizer/tokenizer"; +import { ITracer } from './tracer'; import { BasePromptElementProps, IChatEndpointInfo, PromptElementCtor, PromptPiece, PromptPieceChild, PromptSizing } from "./types"; import { coalesce } from "./util/arrays"; import { URI } from "./util/vs/common/uri"; @@ -60,6 +61,7 @@ export class PromptRenderer

{ private readonly _ignoredFiles: URI[] = []; private readonly _root = new PromptTreeElement(null, 0); private readonly _references: PromptReference[] = []; + public tracer: ITracer | undefined = undefined; /** * @@ -129,22 +131,32 @@ export class PromptRenderer

{ flexGroup.push({ element, promptElementInstance: promptElement }); } + if (promptElements.size === 0) { + return; + } + + this.tracer?.startRenderPass(); + const flexGroups = [...promptElements.entries()].sort(([a], [b]) => b - a).map(([_, group]) => group); const setReserved = (groupIndex: number, reserved: boolean) => { const sign = reserved ? 1 : -1; + let reservedTokens = 0; for (let i = groupIndex + 1; i < flexGroups.length; i++) { for (const { element } of flexGroups[i]) { if (element.props.flexReserve) { sizing.consume(sign * element.props.flexReserve); + reservedTokens += element.props.flexReserve } } } + return reservedTokens; }; // Prepare all currently known prompt elements in parallel for (const [groupIndex, promptElements] of flexGroups.entries()) { // Temporarily consume any reserved budget for later elements so that the sizing is calculated correctly here. - setReserved(groupIndex, true); + const reservedTokens = setReserved(groupIndex, true); + this.tracer?.startRenderFlex(groupIndex, reservedTokens, sizing.remainingTokenBudget); // Calculate the flex basis for dividing the budget amongst siblings in this group. let flexBasisSum = 0; @@ -192,13 +204,18 @@ export class PromptRenderer

{ // Compute token budget for the pieces that this child wants to render const childSizing = new PromptSizingContext(elementSizing.tokenBudget, this._endpoint); const { tokensConsumed } = await computeTokensConsumedByLiterals(this._tokenizer, element, promptElementInstance, pieces); + this.tracer?.didRenderElement(element.ctor.name, pieces.filter(p => p.kind === 'literal').map(p => p.value)); childSizing.consume(tokensConsumed); await this._handlePromptChildren(element, pieces, childSizing, progress, token); + this.tracer?.didRenderChildren(childSizing.consumed); // Tally up the child consumption into the parent context for any subsequent flex group sizing.consume(childSizing.consumed); } + + this.tracer?.endRenderFlex(); } + this.tracer?.endRenderPass(); } private async _prioritize(things: T[], cmp: (a: T, b: T) => number, count: (thing: T) => Promise) { diff --git a/src/base/tracer.ts b/src/base/tracer.ts new file mode 100644 index 0000000..85e599d --- /dev/null +++ b/src/base/tracer.ts @@ -0,0 +1,21 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation and GitHub. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +/** + * Handler that can trace rendering internals. + */ +export interface ITracer { + /** starts a pass of rendering multiple elements */ + startRenderPass(): void; + /** starts rendering a flex group */ + startRenderFlex(group: number, reserved: number, remainingTokenBudget: number): void; + /** Marks that an element was rendered. May be followed by `startRenderPass` for children */ + didRenderElement(name: string, literals: string[]): void; + /** Marks that an element's children were rendered and consumed that many tokens */ + didRenderChildren(tokensConsumed: number): void; + /** ends rendering a flex group */ + endRenderFlex(): void; + /** ends a previously started render pass */ + endRenderPass(): void; +}