{ private readonly _usedContext: ChatDocumentContext[] = []; private readonly _ignoredFiles: URI[] = []; + private readonly _growables: { initialConsume: number; elem: PromptTreeElement }[] = []; private readonly _root = new PromptTreeElement(null, 0); /** Epoch used to tracing the order in which elements render. */ - private _epoch = 0; public tracer: ITracer | undefined = undefined; /** @@ -197,27 +197,52 @@ export class PromptRenderer
{
continue;
}
- const pieces = flattenAndReduce(template);
-
- // Compute token budget for the pieces that this child wants to render
- const childSizing = new PromptSizingContext(elementSizing.tokenBudget, this._endpoint);
- const { tokensConsumed } = await computeTokensConsumedByLiterals(this._tokenizer, element, promptElementInstance, pieces);
- childSizing.consume(tokensConsumed);
- await this._handlePromptChildren(element, pieces, childSizing, progress, token);
+ const childConsumption = await this._processPromptRenderPiece(
+ new PromptSizingContext(elementSizing.tokenBudget, this._endpoint),
+ element,
+ promptElementInstance,
+ template,
+ progress,
+ token,
+ );
+
+ // Append growables here so that when we go back and expand them we do so in render order.
+ if (promptElementInstance instanceof Expandable) {
+ this._growables.push({ initialConsume: childConsumption, elem: element.node });
+ }
// Tally up the child consumption into the parent context for any subsequent flex group
- sizing.consume(childSizing.consumed);
+ sizing.consume(childConsumption);
}
}
}
+ private async _processPromptRenderPiece(
+ elementSizing: PromptSizingContext,
+ element: QueueItem {
* The total token count is guaranteed to be less than or equal to the token budget.
*/
public async render(progress?: Progress {
token,
);
- const { container, allMetadata, removed } = await this._getFinalElementTree(this._endpoint.modelMaxPromptTokens);
+ const { container, allMetadata, removed } = await this._getFinalElementTree(this._endpoint.modelMaxPromptTokens, token);
this.tracer?.didMaterializeTree?.({
budget: this._endpoint.modelMaxPromptTokens,
renderedTree: { container, removed, budget: this._endpoint.modelMaxPromptTokens },
tokenizer: this._tokenizer,
- renderTree: budget => this._getFinalElementTree(budget).then(r => ({ ...r, budget })),
+ renderTree: budget => this._getFinalElementTree(budget, undefined).then(r => ({ ...r, budget })),
});
// Then finalize the chat messages
@@ -305,10 +329,24 @@ export class PromptRenderer {
};
}
- private async _getFinalElementTree(tokenBudget: number) {
+ /**
+ * Note: this may be called multiple times from the tracer as users play
+ * around with budgets. It should be side-effect-free.
+ */
+ private async _getFinalElementTree(tokenBudget: number, token: CancellationToken | undefined) {
// Trim the elements to fit within the token budget. We check the "lower bound"
// first because that's much more cache-friendly as we remove elements.
const container = this._root.materialize() as MaterializedContainer;
+ const initialTokenCount = await container.tokenCount(this._tokenizer);
+ if (initialTokenCount < tokenBudget) {
+ const didChange = await this._grow(container, initialTokenCount, tokenBudget, token);
+
+ // if nothing grew, we already counted tokens so we can safely return
+ if (!didChange) {
+ return { container, allMetadata: [...container.allMetadata()], removed: 0 };
+ }
+ }
+
const allMetadata = [...container.allMetadata()];
let removed = 0;
while (
@@ -322,6 +360,52 @@ export class PromptRenderer {
return { container, allMetadata, removed };
}
+ /** Grows all Expandable elements, returns if any changes were made. */
+ private async _grow(tree: MaterializedContainer, tokensUsed: number, tokenBudget: number, token: CancellationToken | undefined): Promise Token changes here will prune elements and re-render 'pure' ones, but the entire prompt is not being re-rendered Token changes here will prune elements and re-render Expandable ones, but the entire prompt is not being re-rendered Changing the render epoch lets you see the order in which elements are rendered and how the token budget is allocated.