diff --git a/.npmignore b/.npmignore
index 1e1f0b0..70b6350 100644
--- a/.npmignore
+++ b/.npmignore
@@ -12,3 +12,4 @@ tsconfig.json
 dist/base/test/
 *.map
 dist/base/tokenizer/cl100kBaseTokenizer*.*
+dist/base/tokenizer/cl100k_base.tiktoken
diff --git a/build/postcompile.ts b/build/postcompile.ts
index 5cf4580..64996c1 100644
--- a/build/postcompile.ts
+++ b/build/postcompile.ts
@@ -2,19 +2,7 @@
  *  Copyright (c) Microsoft Corporation and GitHub. All rights reserved.
  *--------------------------------------------------------------------------------------------*/
 
-import * as fs from 'fs';
-import * as path from 'path';
-
-const REPO_ROOT = path.join(__dirname, '..');
-
-export async function copyStaticAssets(srcpaths: string[], dst: string): Promise<void> {
-	await Promise.all(srcpaths.map(async srcpath => {
-		const src = path.join(REPO_ROOT, srcpath);
-		const dest = path.join(REPO_ROOT, dst, path.basename(srcpath));
-		await fs.promises.mkdir(path.dirname(dest), { recursive: true });
-		await fs.promises.copyFile(src, dest);
-	}));
-}
+import { copyStaticAssets } from './postinstall';
 
 async function main() {
 	// Ship the vscodeTypes.d.ts file in the dist bundle
diff --git a/build/postinstall.ts b/build/postinstall.ts
new file mode 100644
index 0000000..926c112
--- /dev/null
+++ b/build/postinstall.ts
@@ -0,0 +1,26 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation and GitHub. All rights reserved.
+ *--------------------------------------------------------------------------------------------*/
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+const REPO_ROOT = path.join(__dirname, '..');
+
+export async function copyStaticAssets(srcpaths: string[], dst: string): Promise<void> {
+	await Promise.all(srcpaths.map(async srcpath => {
+		const src = path.join(REPO_ROOT, srcpath);
+		const dest = path.join(REPO_ROOT, dst, path.basename(srcpath));
+		await fs.promises.mkdir(path.dirname(dest), { recursive: true });
+		await fs.promises.copyFile(src, dest);
+	}));
+}
+
+async function main() {
+	// Ship the tiktoken file in the dist bundle
+	await copyStaticAssets([
+		'src/base/tokenizer/cl100k_base.tiktoken',
+	], 'dist/base/tokenizer');
+}
+
+main();
\ No newline at end of file
diff --git a/package.json b/package.json
index 166251a..0ae7e8a 100644
--- a/package.json
+++ b/package.json
@@ -9,7 +9,8 @@
 		"compile": "tsc -p tsconfig.json && tsx ./build/postcompile.ts",
 		"watch": "tsc --watch --sourceMap",
 		"test": "vscode-test",
-		"prettier": "prettier --list-different --write --cache ."
+		"prettier": "prettier --list-different --write --cache .",
+		"prepare": "tsx ./build/postinstall.ts"
 	},
 	"keywords": [],
 	"author": "Microsoft Corporation",
diff --git a/src/base/index.ts b/src/base/index.ts
index e8d5e34..726be23 100644
--- a/src/base/index.ts
+++ b/src/base/index.ts
@@ -75,7 +75,7 @@ export async function renderPrompt<P extends BasePromptElementProps>(
 	mode: 'vscode' | 'none' = 'vscode',
 ): Promise<{ messages: (ChatMessage | LanguageModelChatMessage)[]; tokenCount: number; metadatas: MetadataMap; usedContext: ChatDocumentContext[]; references: PromptReference[] }> {
 	let tokenizer = 'countTokens' in tokenizerMetadata
-		? new AnyTokenizer(tokenizerMetadata.countTokens)
+		? new AnyTokenizer((text, token) => tokenizerMetadata.countTokens(text, token))
 		: tokenizerMetadata;
 	const renderer = new PromptRenderer(endpoint, ctor, props, tokenizer);
 	let { messages, tokenCount, references } = await renderer.render(progress, token);
diff --git a/src/base/test/renderer.test.tsx b/src/base/test/renderer.test.tsx
index bcee71c..32bff41 100644
--- a/src/base/test/renderer.test.tsx
+++ b/src/base/test/renderer.test.tsx
@@ -3,7 +3,7 @@
  *--------------------------------------------------------------------------------------------*/
 
 import * as assert from 'assert';
-import { ChatMessage, ChatRole } from '../openai';
+import { BaseTokensPerCompletion, ChatMessage, ChatRole } from '../openai';
 import { PromptElement } from '../promptElement';
 import {
 	AssistantMessage,
@@ -26,7 +26,7 @@ import {
 
 suite('PromptRenderer', () => {
 	const fakeEndpoint: any = {
-		modelMaxPromptTokens: 8192,
+		modelMaxPromptTokens: 8192 - BaseTokensPerCompletion,
 	} satisfies Partial<IChatEndpointInfo>;
 	const tokenizer = new Cl100KBaseTokenizer();
 
@@ -98,7 +98,7 @@ suite('PromptRenderer', () => {
 					"This late pivot means we don't have time to boil the ocean for the client deliverable.",
 			},
 		]);
-		assert.deepStrictEqual(res.tokenCount, 129);
+		assert.deepStrictEqual(res.tokenCount, 129  - BaseTokensPerCompletion);
 	});
 
 	test('runs async prepare in parallel', async () => {
@@ -270,7 +270,7 @@ suite('PromptRenderer', () => {
 				{ role: 'assistant', content: 'I am terrific, how are you?' },
 				{ role: 'user', content: 'What time is it?' },
 			]);
-			assert.deepStrictEqual(res.tokenCount, 130);
+			assert.deepStrictEqual(res.tokenCount, 130 - BaseTokensPerCompletion);
 		});
 
 		test('no shaving at limit', async () => {
@@ -315,11 +315,11 @@ suite('PromptRenderer', () => {
 				{ role: 'assistant', content: 'I am terrific, how are you?' },
 				{ role: 'user', content: 'What time is it?' },
 			]);
-			assert.deepStrictEqual(res.tokenCount, 130);
+			assert.deepStrictEqual(res.tokenCount, 130 - BaseTokensPerCompletion);
 		});
 
 		test('shaving one', async () => {
-			const res = await renderWithMaxPromptTokens(129, Prompt1, {});
+			const res = await renderWithMaxPromptTokens(129 - BaseTokensPerCompletion, Prompt1, {});
 			assert.deepStrictEqual(res.messages, [
 				{
 					role: 'system',
@@ -355,11 +355,11 @@ suite('PromptRenderer', () => {
 				{ role: 'assistant', content: 'I am terrific, how are you?' },
 				{ role: 'user', content: 'What time is it?' },
 			]);
-			assert.deepStrictEqual(res.tokenCount, 118);
+			assert.deepStrictEqual(res.tokenCount, 118 - BaseTokensPerCompletion);
 		});
 
 		test('shaving two', async () => {
-			const res = await renderWithMaxPromptTokens(110, Prompt1, {});
+			const res = await renderWithMaxPromptTokens(110 - BaseTokensPerCompletion, Prompt1, {});
 			assert.deepStrictEqual(res.messages, [
 				{
 					role: 'system',
@@ -390,11 +390,11 @@ suite('PromptRenderer', () => {
 				{ role: 'assistant', content: 'I am terrific, how are you?' },
 				{ role: 'user', content: 'What time is it?' },
 			]);
-			assert.deepStrictEqual(res.tokenCount, 102);
+			assert.deepStrictEqual(res.tokenCount, 102 - BaseTokensPerCompletion);
 		});
 
 		test('shaving a lot', async () => {
-			const res = await renderWithMaxPromptTokens(54, Prompt1, {});
+			const res = await renderWithMaxPromptTokens(54 - BaseTokensPerCompletion, Prompt1, {});
 			assert.deepStrictEqual(res.messages, [
 				{
 					role: 'system',
@@ -413,7 +413,7 @@ suite('PromptRenderer', () => {
 				},
 				{ role: 'user', content: 'What time is it?' },
 			]);
-			assert.deepStrictEqual(res.tokenCount, 53);
+			assert.deepStrictEqual(res.tokenCount, 53 - BaseTokensPerCompletion);
 		});
 	});
 	suite('renders prompts based on dynamic token budget', function () {
@@ -461,7 +461,7 @@ suite('PromptRenderer', () => {
 
 		test('passes budget to children based on declared flex', async () => {
 			const fakeEndpoint: any = {
-				modelMaxPromptTokens: 100, // Total allowed tokens
+				modelMaxPromptTokens: 100 - BaseTokensPerCompletion, // Total allowed tokens
 			} satisfies Partial<IChatEndpointInfo>;
 			const inst = new PromptRenderer(
 				fakeEndpoint,
@@ -564,7 +564,7 @@ suite('PromptRenderer', () => {
 			test('are rendered to chat messages', async () => {
 				// First render with large token budget so nothing gets dropped
 				const largeTokenBudgetEndpoint: any = {
-					modelMaxPromptTokens: 8192,
+					modelMaxPromptTokens: 8192 - BaseTokensPerCompletion,
 				} satisfies Partial<IChatEndpointInfo>;
 				const inst1 = new PromptRenderer(
 					largeTokenBudgetEndpoint,
@@ -604,13 +604,13 @@ suite('PromptRenderer', () => {
 					},
 					{ role: 'user', content: 'What is your name?' },
 				]);
-				assert.deepStrictEqual(res1.tokenCount, 165);
+				assert.deepStrictEqual(res1.tokenCount, 165 - BaseTokensPerCompletion);
 			});
 
 			test('are prioritized and fit within token budget', async () => {
 				// Render with smaller token budget and ensure that messages are reduced in size
 				const smallTokenBudgetEndpoint: any = {
-					modelMaxPromptTokens: 140,
+					modelMaxPromptTokens: 140 - BaseTokensPerCompletion,
 				} satisfies Partial<IChatEndpointInfo>;
 				const inst2 = new PromptRenderer(
 					smallTokenBudgetEndpoint,
@@ -619,7 +619,7 @@ suite('PromptRenderer', () => {
 					tokenizer
 				);
 				const res2 = await inst2.render(undefined, undefined);
-				assert.equal(res2.tokenCount, 120);
+				assert.equal(res2.tokenCount, 120 - BaseTokensPerCompletion);
 				assert.deepStrictEqual(res2.messages, [
 					{
 						role: 'system',
@@ -706,7 +706,7 @@ suite('PromptRenderer', () => {
 				}
 
 				const smallTokenBudgetEndpoint: any = {
-					modelMaxPromptTokens: 150,
+					modelMaxPromptTokens: 150 - BaseTokensPerCompletion,
 				} satisfies Partial<IChatEndpointInfo>;
 				const inst2 = new PromptRenderer(
 					smallTokenBudgetEndpoint,
@@ -775,7 +775,7 @@ LOW MED 00 01 02 03 04 05 06 07 08 09
 				}
 
 				const smallTokenBudgetEndpoint: any = {
-					modelMaxPromptTokens: 150,
+					modelMaxPromptTokens: 150 - BaseTokensPerCompletion,
 				} satisfies Partial<IChatEndpointInfo>;
 				const inst2 = new PromptRenderer(
 					smallTokenBudgetEndpoint,
@@ -829,7 +829,7 @@ LOW MED 00 01 02 03 04 05 06 07 08 09
 
 		test('reports reference that survived prioritization', async () => {
 			const endpoint: any = {
-				modelMaxPromptTokens: 4096,
+				modelMaxPromptTokens: 4096 - BaseTokensPerCompletion,
 			} satisfies Partial<IChatEndpointInfo>;
 
 			const inst = new PromptRenderer(
@@ -891,7 +891,7 @@ LOW MED 00 01 02 03 04 05 06 07 08 09
 			}
 
 			const endpoint: any = {
-				modelMaxPromptTokens: 4096,
+				modelMaxPromptTokens: 4096 - BaseTokensPerCompletion,
 			} satisfies Partial<IChatEndpointInfo>;
 
 			const inst = new PromptRenderer(