diff --git a/packages/core/src/integrations/GptTokenizerTokenizer.ts b/packages/core/src/integrations/GptTokenizerTokenizer.ts index 3a166fb35..318abcf33 100644 --- a/packages/core/src/integrations/GptTokenizerTokenizer.ts +++ b/packages/core/src/integrations/GptTokenizerTokenizer.ts @@ -3,6 +3,7 @@ import type { Tokenizer, TokenizerCallInfo } from './Tokenizer.js'; import { encode, encodeChat } from 'gpt-tokenizer'; import Emittery from 'emittery'; import { getError } from '../utils/errors.js'; +import { sum } from 'lodash'; export class GptTokenizerTokenizer implements Tokenizer { emitter = new Emittery<{ @@ -19,13 +20,16 @@ export class GptTokenizerTokenizer implements Tokenizer { getTokenCountForMessages(messages: ChatMessage[], _info: TokenizerCallInfo): number { try { - return encodeChat( + const encodedChat = encodeChat( messages.map((message) => ({ role: message.type as 'system' | 'user' | 'assistant', // Doesn't support 'function' yet content: message.message, name: message.name, })), - ).length; + (_info.model as any) ?? 'gpt-3.5-turbo', + ); + + return encodedChat.length; } catch (err) { this.emitter.emit('error', getError(err)); return 0; diff --git a/packages/core/src/model/GraphProcessor.ts b/packages/core/src/model/GraphProcessor.ts index c5c67bfbb..4588c8f8c 100644 --- a/packages/core/src/model/GraphProcessor.ts +++ b/packages/core/src/model/GraphProcessor.ts @@ -654,6 +654,12 @@ export class GraphProcessor { this.#graphInputs = inputs; this.#contextValues ??= contextValues; + if (this.#context.tokenizer) { + this.#context.tokenizer.on('error', (error) => { + this.#emitter.emit('error', { error }); + }); + } + if (!this.#isSubProcessor) { this.#emitter.emit('start', { contextValues: this.#contextValues, @@ -1326,10 +1332,16 @@ export class GraphProcessor { const plugin = this.#registry.getPluginFor(node.type); + let tokenizer = this.#context.tokenizer; + if (!tokenizer) { + tokenizer = new GptTokenizerTokenizer(); + tokenizer.on('error', (e) => this.#emitter.emit('error', { error: e })); + } + const context: InternalProcessContext = { ...this.#context, node, - tokenizer: this.#context.tokenizer ?? new GptTokenizerTokenizer(), + tokenizer, executor: this.executor ?? 'nodejs', project: this.#project, executionCache: this.#executionCache,