diff --git a/src/ScTranslations.json b/src/ScTranslations.json index 07933a45..6d066a2d 100644 --- a/src/ScTranslations.json +++ b/src/ScTranslations.json @@ -5,7 +5,7 @@ "initial_message": "Hi, I'm ChatGPT with access to your notes via Smart Connections. Ask me a question about your notes and I'll try to answer it." }, "es": { - "pronouns": ["mi", "yo", "mí", "tú"], + "pronouns": ["mi", "yo", "mí", "tú", "mis"], "prompt": "Basándose en sus notas", "initial_message": "Hola, soy ChatGPT con acceso a tus apuntes a través de Smart Connections. Hazme una pregunta sobre tus apuntes e intentaré responderte." }, diff --git a/src/contains_self_referential_keywords.js b/src/contains_self_referential_keywords.js new file mode 100644 index 00000000..11a7cb0a --- /dev/null +++ b/src/contains_self_referential_keywords.js @@ -0,0 +1,34 @@ +const ScTranslations = require("./ScTranslations"); +const { contains_folder_reference } = require("./contains_folder_reference"); +const { contains_internal_link } = require("./contains_internal_link"); +const { extract_internal_links } = require("./extract_internal_links"); +const { extract_folder_references } = require("./extract_folder_references"); +const { contains_system_prompt_ref, extract_system_prompt_ref } = require("./contains_system_prompt_ref"); + +// check if includes keywords referring to one's own notes +async function contains_self_referential_keywords(env, user_input, language) { + const language_settings = ScTranslations[language]; + if (!language_settings) return false; + let check_str = `${user_input}`; + if(contains_internal_link(check_str)){ + const extracted_links = extract_internal_links({}, check_str); + for(const link of extracted_links){ + check_str = check_str.replace(link, ''); + } + } + if(contains_folder_reference(check_str)){ + const folders = await env.plugin.get_folders(); // get folder references + const extracted_folder_references = extract_folder_references(folders, check_str); + for(const folder_reference of extracted_folder_references){ + check_str = check_str.replace(folder_reference, ''); + } + } + if(contains_system_prompt_ref(check_str)){ + const {mention, mention_pattern} = extract_system_prompt_ref(check_str); + check_str = check_str.replace(mention_pattern, ''); + } + + if (check_str.match(new RegExp(`\\b(${language_settings.pronouns.join("|")})\\b`, "gi"))) return true; + return false; +} +exports.contains_self_referential_keywords = contains_self_referential_keywords; diff --git a/src/contains_self_referential_keywords.test.js b/src/contains_self_referential_keywords.test.js new file mode 100644 index 00000000..0ce9b036 --- /dev/null +++ b/src/contains_self_referential_keywords.test.js @@ -0,0 +1,76 @@ +const test = require('ava'); +const { contains_self_referential_keywords } = require('./contains_self_referential_keywords'); +const mock_env = { + plugin: { + get_folders: () => { + return [ + '/my notes/as context/', + ]; + } + } +}; + +test('returns true for input containing self-referential pronouns in English', async t => { + const user_input = 'I want to review my notes'; + t.true(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('returns true for input containing self-referential pronouns in Spanish', async t => { + const user_input = 'Quiero revisar mis notas'; + t.true(await contains_self_referential_keywords(mock_env, user_input, 'es')); +}); + +test('returns false for input without self-referential pronouns', async t => { + const user_input = 'The sky is blue'; + t.false(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('returns true for input with uppercase self-referential pronouns', async t => { + const user_input = 'MY notes are important to ME'; + t.true(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('returns true for input with mixed case self-referential pronouns', async t => { + const user_input = 'Show Me My latest notes'; + t.true(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('returns false for input with partial matches of pronouns ("myself" contains "my")', async t => { + const user_input = 'The word "myself" is not always self-referential'; + t.false(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('returns true for input with pronouns at the beginning or end of sentences', async t => { + const user_input = 'My thoughts are clear. These ideas belong to me.'; + t.true(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('returns false for empty input', async t => { + const user_input = ''; + t.false(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('handles unsupported language gracefully', async t => { + const user_input = 'This is a test'; + t.false(await contains_self_referential_keywords(mock_env, user_input, 'unsupported_language')); +}); + +test('returns true for input containing multiple self-referential pronouns', async t => { + const user_input = 'I need to organize my notes so I can find them easily'; + t.true(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('excludes self-referential pronouns that are within links', async t => { + const user_input = 'Should use [[my notes]] as context without lookup'; + t.false(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('excludes self-referential pronouns that are within /folder/paths/', async t => { + const user_input = 'Should not match /my notes/as context/ as self-referential'; + t.false(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); + +test('excludes self-referential pronouns that are within @"system prompt" refs', async t => { + const user_input = 'Should use @"my system prompt" in this query'; + t.false(await contains_self_referential_keywords(mock_env, user_input, 'en')); +}); \ No newline at end of file diff --git a/src/contains_system_prompt_ref.js b/src/contains_system_prompt_ref.js new file mode 100644 index 00000000..8f6e129f --- /dev/null +++ b/src/contains_system_prompt_ref.js @@ -0,0 +1,11 @@ +function contains_system_prompt_ref(content) { + return content.includes("@\""); +} +function extract_system_prompt_ref(content) { + const mention_pattern = /@\"([^"]+)\"/; + const mention = content.match(mention_pattern)[1]; + return { mention, mention_pattern }; +} + +exports.contains_system_prompt_ref = contains_system_prompt_ref; +exports.extract_system_prompt_ref = extract_system_prompt_ref; \ No newline at end of file diff --git a/src/extract_internal_links.js b/src/extract_internal_links.js new file mode 100644 index 00000000..09710370 --- /dev/null +++ b/src/extract_internal_links.js @@ -0,0 +1,12 @@ +function extract_internal_links(env, user_input) { + const matches = user_input.match(/\[\[(.*?)\]\]/g); + console.log(matches); + // return array of TFile objects + if (matches && env.plugin) return matches.map(match => { + const tfile = env.plugin.app.metadataCache.getFirstLinkpathDest(match.replace("[[", "").replace("]]", ""), "/"); + return tfile; + }); + if (matches) return matches; + return []; +} +exports.extract_internal_links = extract_internal_links; diff --git a/src/sc_actions.js b/src/sc_actions.js index ea92f5f1..2990053a 100644 --- a/src/sc_actions.js +++ b/src/sc_actions.js @@ -1,8 +1,8 @@ -const ScTranslations = require("./ScTranslations"); const openapi_spec = require('../build/actions_openapi.json'); const handlers = require('./actions/_actions'); const { lookup } = require('./actions/lookup'); const { json_ref_resolve } = require('./json_ref_resolve'); +const { contains_self_referential_keywords } = require("./contains_self_referential_keywords"); class ScActions { constructor(env, opts = {}) { @@ -47,8 +47,9 @@ class ScActions { } return; } + const should_trigger_lookup = await this.should_trigger_retrieval(user_input); // if contains self referential keywords or folder reference - if (this.should_trigger_retrieval(user_input)) { + if (should_trigger_lookup) { console.log("should trigger retrieval"); if(this.actions.lookup && this.env.chat_model.config.actions){ // sets current.tool_choice to lookup @@ -59,17 +60,12 @@ class ScActions { } } } - should_trigger_retrieval(user_input) { + async should_trigger_retrieval(user_input) { // if(!this.plugin?.brain?.smart_blocks?.keys.length) return false; // if no smart blocks, return false - if (this.contains_self_referential_keywords(user_input)) return true; + if (await contains_self_referential_keywords(this.env, user_input, this.config.language)) return true; if (this.env.chats.current.scope.key_starts_with_any) return true; // if scope.key_starts_with_any is set, return true (has folder reference) return false; } - // check if includes keywords referring to one's own notes - contains_self_referential_keywords(user_input) { - if (user_input.match(new RegExp(`\\b(${ScTranslations[this.config.language].pronouns.join("|")})\\b`, "gi"))) return true; - return false; - } // BACKWARD COMPATIBILITY for non-function-calling models (DEPRECATED) async get_context_hyde(user_input) { console.log("get_context_hyde"); diff --git a/src/sc_chat.js b/src/sc_chat.js index fcdca155..30a712d0 100644 --- a/src/sc_chat.js +++ b/src/sc_chat.js @@ -2,6 +2,8 @@ const { SmartChat } = require('smart-chats/smart_chat'); const { extract_folder_references } = require("./extract_folder_references"); const { contains_internal_link } = require("./contains_internal_link"); const { contains_folder_reference } = require('./contains_folder_reference'); +const { extract_internal_links } = require('./extract_internal_links'); +const { contains_system_prompt_ref, extract_system_prompt_ref } = require('./contains_system_prompt_ref'); class ScChat extends SmartChat { async new_user_message(content){ @@ -23,9 +25,8 @@ class ScChat extends SmartChat { async parse_user_message(content) { this.env.chats.current.scope = {}; // reset scope // DO: decided: should this be moved to new_user_message()??? Partially as sc-context??? - if (content.includes("@\"")) { - const mention_pattern = /@\"([^"]+)\"/; - const mention = content.match(mention_pattern)[1]; + if (contains_system_prompt_ref(content)) { + const { mention, mention_pattern } = extract_system_prompt_ref(content); const sys_msg = { role: "system", content: "```sc-system\n" + mention + "\n```" @@ -67,14 +68,4 @@ class ScChat extends SmartChat { } exports.ScChat = ScChat; -function extract_internal_links(env, user_input) { - const matches = user_input.match(/\[\[(.*?)\]\]/g); - console.log(matches); - // return array of TFile objects - if (matches) return matches.map(match => { - const tfile = env.plugin.app.metadataCache.getFirstLinkpathDest(match.replace("[[", "").replace("]]", ""), "/"); - return tfile; - }); - return []; -} -exports.extract_internal_links = extract_internal_links; \ No newline at end of file +