Skip to content

Commit

Permalink
Fix gpt tokenizer (not) encoding chat messages
Browse files Browse the repository at this point in the history
  • Loading branch information
abrenneke committed Oct 13, 2023
1 parent df24582 commit 15d6492
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
8 changes: 6 additions & 2 deletions packages/core/src/integrations/GptTokenizerTokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { Tokenizer, TokenizerCallInfo } from './Tokenizer.js';
import { encode, encodeChat } from 'gpt-tokenizer';
import Emittery from 'emittery';
import { getError } from '../utils/errors.js';
import { sum } from 'lodash';

export class GptTokenizerTokenizer implements Tokenizer {
emitter = new Emittery<{
Expand All @@ -19,13 +20,16 @@ export class GptTokenizerTokenizer implements Tokenizer {

getTokenCountForMessages(messages: ChatMessage[], _info: TokenizerCallInfo): number {
try {
return encodeChat(
const encodedChat = encodeChat(
messages.map((message) => ({
role: message.type as 'system' | 'user' | 'assistant', // Doesn't support 'function' yet
content: message.message,
name: message.name,
})),
).length;
(_info.model as any) ?? 'gpt-3.5-turbo',
);

return encodedChat.length;
} catch (err) {
this.emitter.emit('error', getError(err));
return 0;
Expand Down
14 changes: 13 additions & 1 deletion packages/core/src/model/GraphProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,12 @@ export class GraphProcessor {
this.#graphInputs = inputs;
this.#contextValues ??= contextValues;

if (this.#context.tokenizer) {
this.#context.tokenizer.on('error', (error) => {
this.#emitter.emit('error', { error });
});
}

if (!this.#isSubProcessor) {
this.#emitter.emit('start', {
contextValues: this.#contextValues,
Expand Down Expand Up @@ -1326,10 +1332,16 @@ export class GraphProcessor {

const plugin = this.#registry.getPluginFor(node.type);

let tokenizer = this.#context.tokenizer;
if (!tokenizer) {
tokenizer = new GptTokenizerTokenizer();
tokenizer.on('error', (e) => this.#emitter.emit('error', { error: e }));
}

const context: InternalProcessContext = {
...this.#context,
node,
tokenizer: this.#context.tokenizer ?? new GptTokenizerTokenizer(),
tokenizer,
executor: this.executor ?? 'nodejs',
project: this.#project,
executionCache: this.#executionCache,
Expand Down

0 comments on commit 15d6492

Please sign in to comment.