From a433d1606cc9f24cec7f7cc0a947e416373a5d7b Mon Sep 17 00:00:00 2001 From: Yiming Zhang Date: Tue, 10 Dec 2024 00:22:45 -0500 Subject: [PATCH 1/5] feat: use regex patterns for vision models and allow adding capabilities to models through env var NEXT_PUBLIC_VISION_MODELS. --- app/constant.ts | 15 +++++++++++++++ app/utils.ts | 29 +++++++++-------------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/app/constant.ts b/app/constant.ts index 25c8d98eae3..fe38fb1fe40 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -291,6 +291,21 @@ export const DEFAULT_TTS_VOICES = [ "shimmer", ]; +export const VISION_MODEL_REGEXES = [ + /vision/, + /gpt-4o/, + /claude-3/, + /gemini-1\.5/, + /gemini-exp/, + /learnlm/, + /qwen-vl/, + /qwen2-vl/, + /gpt-4-turbo(?!.*preview)/, // Matches "gpt-4-turbo" but not "gpt-4-turbo-preview" + /^dall-e-3$/, // Matches exactly "dall-e-3" +]; + +export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/]; + const openaiModels = [ "gpt-3.5-turbo", "gpt-3.5-turbo-1106", diff --git a/app/utils.ts b/app/utils.ts index b62bc126da7..fafbc9e79b4 100644 --- a/app/utils.ts +++ b/app/utils.ts @@ -5,6 +5,7 @@ import { RequestMessage } from "./client/api"; import { ServiceProvider } from "./constant"; // import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http"; import { fetch as tauriStreamFetch } from "./utils/stream"; +import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant"; export function trimTopic(topic: string) { // Fix an issue where double quotes still show in the Indonesian language @@ -252,27 +253,15 @@ export function getMessageImages(message: RequestMessage): string[] { } export function isVisionModel(model: string) { - // Note: This is a better way using the TypeScript feature instead of `&&` or `||` (ts v5.5.0-dev.20240314 I've been using) - - const excludeKeywords = ["claude-3-5-haiku-20241022"]; - const visionKeywords = [ - "vision", - "gpt-4o", - "claude-3", - "gemini-1.5", - "gemini-exp", - "learnlm", - "qwen-vl", - "qwen2-vl", - ]; - const isGpt4Turbo = - model.includes("gpt-4-turbo") && !model.includes("preview"); - + const envVisionModels = process.env.NEXT_PUBLIC_VISION_MODELS?.split(",").map( + (m) => m.trim(), + ); + if (envVisionModels?.includes(model)) { + return true; + } return ( - !excludeKeywords.some((keyword) => model.includes(keyword)) && - (visionKeywords.some((keyword) => model.includes(keyword)) || - isGpt4Turbo || - isDalle3(model)) + !EXCLUDE_VISION_MODEL_REGEXES.some((regex) => regex.test(model)) && + VISION_MODEL_REGEXES.some((regex) => regex.test(model)) ); } From ed8c3580c8fce9c12c42e2a8ac086ea2f8887953 Mon Sep 17 00:00:00 2001 From: Yiming Zhang Date: Fri, 20 Dec 2024 19:07:00 -0500 Subject: [PATCH 2/5] test: add unit tests for isVisionModel utility function --- test/vision-model-checker.test.ts | 67 +++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 test/vision-model-checker.test.ts diff --git a/test/vision-model-checker.test.ts b/test/vision-model-checker.test.ts new file mode 100644 index 00000000000..842ef644ab4 --- /dev/null +++ b/test/vision-model-checker.test.ts @@ -0,0 +1,67 @@ +import { isVisionModel } from "../app/utils"; + +describe("isVisionModel", () => { + const originalEnv = process.env; + + beforeEach(() => { + jest.resetModules(); + process.env = { ...originalEnv }; + }); + + afterEach(() => { + process.env = originalEnv; + }); + + test("should identify vision models using regex patterns", () => { + const visionModels = [ + "gpt-4-vision", + "claude-3-opus", + "gemini-1.5-pro", + "gemini-2.0", + "gemini-exp-vision", + "learnlm-vision", + "qwen-vl-max", + "qwen2-vl-max", + "gpt-4-turbo", + "dall-e-3", + ]; + + visionModels.forEach((model) => { + expect(isVisionModel(model)).toBe(true); + }); + }); + + test("should exclude specific models", () => { + expect(isVisionModel("claude-3-5-haiku-20241022")).toBe(false); + }); + + test("should not identify non-vision models", () => { + const nonVisionModels = [ + "gpt-3.5-turbo", + "gpt-4-turbo-preview", + "claude-2", + "regular-model", + ]; + + nonVisionModels.forEach((model) => { + expect(isVisionModel(model)).toBe(false); + }); + }); + + test("should identify models from NEXT_PUBLIC_VISION_MODELS env var", () => { + process.env.NEXT_PUBLIC_VISION_MODELS = "custom-vision-model,another-vision-model"; + + expect(isVisionModel("custom-vision-model")).toBe(true); + expect(isVisionModel("another-vision-model")).toBe(true); + expect(isVisionModel("unrelated-model")).toBe(false); + }); + + test("should handle empty or missing NEXT_PUBLIC_VISION_MODELS", () => { + process.env.NEXT_PUBLIC_VISION_MODELS = ""; + expect(isVisionModel("unrelated-model")).toBe(false); + + delete process.env.NEXT_PUBLIC_VISION_MODELS; + expect(isVisionModel("unrelated-model")).toBe(false); + expect(isVisionModel("gpt-4-vision")).toBe(true); + }); +}); \ No newline at end of file From 210b29bfbecaebc53c4f37ed23c5df28d28d41fb Mon Sep 17 00:00:00 2001 From: Yiming Zhang Date: Sat, 21 Dec 2024 03:51:54 -0500 Subject: [PATCH 3/5] refactor: remove NEXT_PUBLIC_ prefix from VISION_MODELS env var --- app/config/build.ts | 1 + app/utils.ts | 8 +++++--- test/vision-model-checker.test.ts | 10 +++++----- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/app/config/build.ts b/app/config/build.ts index b2b1ad49da1..aa7c10729b6 100644 --- a/app/config/build.ts +++ b/app/config/build.ts @@ -40,6 +40,7 @@ export const getBuildConfig = () => { buildMode, isApp, template: process.env.DEFAULT_INPUT_TEMPLATE ?? DEFAULT_INPUT_TEMPLATE, + visionModels: process.env.VISION_MODELS || "", }; }; diff --git a/app/utils.ts b/app/utils.ts index fafbc9e79b4..f49f1a46609 100644 --- a/app/utils.ts +++ b/app/utils.ts @@ -6,6 +6,7 @@ import { ServiceProvider } from "./constant"; // import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http"; import { fetch as tauriStreamFetch } from "./utils/stream"; import { VISION_MODEL_REGEXES, EXCLUDE_VISION_MODEL_REGEXES } from "./constant"; +import { getClientConfig } from "./config/client"; export function trimTopic(topic: string) { // Fix an issue where double quotes still show in the Indonesian language @@ -253,9 +254,10 @@ export function getMessageImages(message: RequestMessage): string[] { } export function isVisionModel(model: string) { - const envVisionModels = process.env.NEXT_PUBLIC_VISION_MODELS?.split(",").map( - (m) => m.trim(), - ); + const clientConfig = getClientConfig(); + const envVisionModels = clientConfig.visionModels + ?.split(",") + .map((m) => m.trim()); if (envVisionModels?.includes(model)) { return true; } diff --git a/test/vision-model-checker.test.ts b/test/vision-model-checker.test.ts index 842ef644ab4..734e992d829 100644 --- a/test/vision-model-checker.test.ts +++ b/test/vision-model-checker.test.ts @@ -48,19 +48,19 @@ describe("isVisionModel", () => { }); }); - test("should identify models from NEXT_PUBLIC_VISION_MODELS env var", () => { - process.env.NEXT_PUBLIC_VISION_MODELS = "custom-vision-model,another-vision-model"; + test("should identify models from VISION_MODELS env var", () => { + process.env.VISION_MODELS = "custom-vision-model,another-vision-model"; expect(isVisionModel("custom-vision-model")).toBe(true); expect(isVisionModel("another-vision-model")).toBe(true); expect(isVisionModel("unrelated-model")).toBe(false); }); - test("should handle empty or missing NEXT_PUBLIC_VISION_MODELS", () => { - process.env.NEXT_PUBLIC_VISION_MODELS = ""; + test("should handle empty or missing VISION_MODELS", () => { + process.env.VISION_MODELS = ""; expect(isVisionModel("unrelated-model")).toBe(false); - delete process.env.NEXT_PUBLIC_VISION_MODELS; + delete process.env.VISION_MODELS; expect(isVisionModel("unrelated-model")).toBe(false); expect(isVisionModel("gpt-4-vision")).toBe(true); }); From ea1329f73e516546dab7193425e1e7dfdd232eb6 Mon Sep 17 00:00:00 2001 From: Yiming Zhang Date: Sat, 21 Dec 2024 04:07:58 -0500 Subject: [PATCH 4/5] fix: add optional chaining to prevent errors when accessing visionModels --- app/utils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/utils.ts b/app/utils.ts index f49f1a46609..962e68a101c 100644 --- a/app/utils.ts +++ b/app/utils.ts @@ -255,7 +255,7 @@ export function getMessageImages(message: RequestMessage): string[] { export function isVisionModel(model: string) { const clientConfig = getClientConfig(); - const envVisionModels = clientConfig.visionModels + const envVisionModels = clientConfig?.visionModels ?.split(",") .map((m) => m.trim()); if (envVisionModels?.includes(model)) { From a127ae1fb45d641b9f138057e56a10ece96b2964 Mon Sep 17 00:00:00 2001 From: Yiming Zhang Date: Sat, 21 Dec 2024 13:12:41 -0500 Subject: [PATCH 5/5] docs: add VISION_MODELS section to README files --- README.md | 7 +++++++ README_CN.md | 7 +++++++ README_JA.md | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/README.md b/README.md index 0c06b73f05b..79e041f3d4d 100644 --- a/README.md +++ b/README.md @@ -355,6 +355,13 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name Change default model +### `VISION_MODELS` (optional) + +> Default: Empty +> Example: `gpt-4-vision,claude-3-opus,my-custom-model` means add vision capabilities to these models in addition to the default pattern matches (which detect models containing keywords like "vision", "claude-3", "gemini-1.5", etc). + +Add additional models to have vision capabilities, beyond the default pattern matching. Multiple models should be separated by commas. + ### `WHITE_WEBDAV_ENDPOINTS` (optional) You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format: diff --git a/README_CN.md b/README_CN.md index d4da8b9da13..8173b9c4d1c 100644 --- a/README_CN.md +++ b/README_CN.md @@ -235,6 +235,13 @@ ChatGLM Api Url. 更改默认模型 +### `VISION_MODELS` (可选) + +> 默认值:空 +> 示例:`gpt-4-vision,claude-3-opus,my-custom-model` 表示为这些模型添加视觉能力,作为对默认模式匹配的补充(默认会检测包含"vision"、"claude-3"、"gemini-1.5"等关键词的模型)。 + +在默认模式匹配之外,添加更多具有视觉能力的模型。多个模型用逗号分隔。 + ### `DEFAULT_INPUT_TEMPLATE` (可选) 自定义默认的 template,用于初始化『设置』中的『用户输入预处理』配置项 diff --git a/README_JA.md b/README_JA.md index 062c112629d..29eb0d27529 100644 --- a/README_JA.md +++ b/README_JA.md @@ -217,6 +217,13 @@ ByteDance モードでは、`modelName@bytedance=deploymentName` 形式でモデ デフォルトのモデルを変更します。 +### `VISION_MODELS` (オプション) + +> デフォルト:空 +> 例:`gpt-4-vision,claude-3-opus,my-custom-model` は、これらのモデルにビジョン機能を追加します。これはデフォルトのパターンマッチング("vision"、"claude-3"、"gemini-1.5"などのキーワードを含むモデルを検出)に加えて適用されます。 + +デフォルトのパターンマッチングに加えて、追加のモデルにビジョン機能を付与します。複数のモデルはカンマで区切ります。 + ### `DEFAULT_INPUT_TEMPLATE` (オプション) 『設定』の『ユーザー入力前処理』の初期設定に使用するテンプレートをカスタマイズします。