Skip to content

Commit

Permalink
Change to new algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
zeroliu committed Dec 27, 2024
1 parent f1ddbf1 commit 5db1551
Show file tree
Hide file tree
Showing 7 changed files with 322 additions and 203 deletions.
36 changes: 28 additions & 8 deletions src/components/modals/SimilarNotesModal.tsx
Original file line number Diff line number Diff line change
@@ -1,35 +1,55 @@
import { Result } from "@orama/orama";
import { InternalTypedDocument } from "@orama/orama";
import { SimilarNoteEntry } from "@/search/findSimilarNotes";

Check failure on line 1 in src/components/modals/SimilarNotesModal.tsx

View workflow job for this annotation

GitHub Actions / build (20.x)

Cannot find module '@/search/findSimilarNotes' or its corresponding type declarations.
import { App, Modal, TFile } from "obsidian";

export class SimilarNotesModal extends Modal {
private hits: Result<InternalTypedDocument<any>>[];
private similarNotes: SimilarNoteEntry[];

constructor(app: App, hits: Result<InternalTypedDocument<any>>[]) {
constructor(app: App, similarNotes: SimilarNoteEntry[]) {
super(app);
this.hits = hits;
this.similarNotes = similarNotes;
}

onOpen() {
const { contentEl } = this;
contentEl.empty();

contentEl.createEl("h2", { text: "Similar Note Blocks to Current Note" });
contentEl.createEl("h2", { text: "Relevant Notes" });

const containerEl = contentEl.createEl("ul", { cls: "similar-notes-container" });
this.hits.forEach((item) => {
this.similarNotes.forEach((item) => {
const itemEl = containerEl.createEl("li", { cls: "similar-note-item" });
const metadataTexts = [
`Similarity: ${Math.round((item.metadata.similarityScore ?? 0) * 100)}%`,
];
if (item.metadata.hasOutgoingLinks) {
metadataTexts.push("Link");
}
if (item.metadata.hasBacklinks) {
metadataTexts.push("Backlink");
}

// Create a clickable title
const titleEl = itemEl.createEl("a", {
text: `${item.document.title} (Score: ${item.score.toFixed(2)})`,
text: `${item.document.title}`,
cls: "similar-note-title",
});

titleEl.addEventListener("click", (event) => {
event.preventDefault();
this.navigateToNote(item.document.path);
});

const pathEl = itemEl.createEl("div", {
text: `${item.document.path}`,
});
pathEl.style.fontSize = "0.8em";
pathEl.style.color = "var(--text-muted)";

const relevanceScoreEl = itemEl.createEl("div", {
text: `${metadataTexts.join(" | ")}`,
});
relevanceScoreEl.style.fontSize = "0.8em";
relevanceScoreEl.style.color = "var(--text-faint)";
});
}

Expand Down
10 changes: 5 additions & 5 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { CHAT_VIEWTYPE, DEFAULT_OPEN_AREA, EVENT_NAMES } from "@/constants";
import { CustomPromptProcessor } from "@/customPromptProcessor";
import { encryptAllKeys } from "@/encryptionService";
import { CustomError } from "@/error";
import { findSimilarNotes } from "@/search/findSimilarNotes";
import { findRelevantNotes } from "@/search/findRelevantNotes";
import { HybridRetriever } from "@/search/hybridRetriever";
import { getAllQAMarkdownContent } from "@/search/searchUtils";
import VectorStoreManager from "@/search/vectorStoreManager";
Expand Down Expand Up @@ -314,8 +314,8 @@ export default class CopilotPlugin extends Plugin {
});

this.addCommand({
id: "find-similar-notes",
name: "Find similar notes to active note",
id: "find-relevant-notes",
name: "Find relevant notes to active note",
callback: async () => {
const activeFile = this.app.workspace.getActiveFile();
if (!activeFile) {
Expand All @@ -328,11 +328,11 @@ export default class CopilotPlugin extends Plugin {
throw new CustomError("Embeddings API not found.");
}
const db = await this.vectorStoreManager.getOrInitializeDb(embeddingsAPI);
const hits = await findSimilarNotes({
const relevantNotes = await findRelevantNotes({
db,
filePath: activeFile.path,
});
new SimilarNotesModal(this.app, hits).open();
new SimilarNotesModal(this.app, relevantNotes).open();
},
});

Expand Down
59 changes: 59 additions & 0 deletions src/noteUtils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import "./types";
import { TFile } from "obsidian";

/**
* Get all outgoing links from a note
* @param file The note file to analyze
* @returns Array of linked note
*/
export function getLinkedNotes(file: TFile): TFile[] {
// Get the cache for the current file
const fileCache = app.metadataCache.getFileCache(file);
const linkedNotes: TFile[] = [];

if (fileCache?.links) {
// Get all wiki-style links [[link]]
for (const link of fileCache.links) {
const resolvedFile = app.metadataCache.getFirstLinkpathDest(link.link, file.path);
if (resolvedFile) {
linkedNotes.push(resolvedFile);
}
}
}

if (fileCache?.embeds) {
// Get all embedded links ![[link]]
for (const embed of fileCache.embeds) {
const resolvedFile = app.metadataCache.getFirstLinkpathDest(embed.link, file.path);
if (resolvedFile) {
linkedNotes.push(resolvedFile);
}
}
}

return [...new Set(linkedNotes)];
}

/**
* Get all notes that link to the given note
* @param file The note file to analyze
* @returns Array of backlinked note
*/
export function getBacklinkedNotes(file: TFile): TFile[] {
const backlinkedNotes: TFile[] = [];

// Get the backlinks from metadata cache
const backlinks = app.metadataCache.getBacklinksForFile(file);

if (backlinks?.data) {
// Convert the backlinks map to array of paths
for (const [path] of backlinks.data) {
const file = app.vault.getAbstractFileByPath(path);
if (file instanceof TFile) {
backlinkedNotes.push(file);
}
}
}

return backlinkedNotes;
}
209 changes: 209 additions & 0 deletions src/search/findRelevantNotes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
import { getBacklinkedNotes, getLinkedNotes } from "@/noteUtils";
import { DBOperations } from "@/search/dbOperations";
import { getSettings } from "@/settings/model";
import { InternalTypedDocument, Orama, Result } from "@orama/orama";
import { TFile } from "obsidian";

const MIN_SIMILARITY_SCORE = 0.3;
const MAX_K = 20;
const ORIGINAL_WEIGHT = 0.5;
const OUTGOING_LINKED_WEIGHT = 0.25;
const BACKLINK_WEIGHT = 0.25;

/**
* Gets the embeddings for the given note path.
* @param notePath - The note path to get embeddings for.
* @param db - The Orama database.
* @returns The embeddings for the given note path.
*/
async function getNoteEmbeddings(notePath: string, db: Orama<any>): Promise<number[][]> {
const debug = getSettings().debug;
const hits = await DBOperations.getDocsByPath(db, notePath);
if (!hits) {
if (debug) {
console.log("No hits found for note:", notePath);
}
return [];
}

const embeddings: number[][] = [];
for (const hit of hits) {
if (!hit.document.embedding) {
if (debug) {
console.log("No embedding found for note:", notePath);
}
continue;
}
embeddings.push(hit.document.embedding);
}
return embeddings;
}

/**
* Gets the average embedding for the given embeddings.
* @param noteEmbeddings - The embeddings of the original note.
* @returns The average embedding.
*/
function getAverageEmbedding(noteEmbeddings: number[][]): number[] {
if (noteEmbeddings.length === 0) {
return [];
}

const embeddingLength = noteEmbeddings[0].length;
const averageEmbedding = Array(embeddingLength).fill(0);
noteEmbeddings.forEach((embedding) => {
embedding.forEach((value, index) => {
averageEmbedding[index] += value / embeddingLength;
});
});
return averageEmbedding;
}

/**
* Gets the highest score hits for each note and removes the current file path
* from the results.
* @param hits - The hits to get the highest score for.
* @param currentFilePath - The current file path.
* @returns A map of the highest score hits for each note.
*/
function getHighestScoreHits(hits: Result<InternalTypedDocument<any>>[], currentFilePath: string) {
const hitMap = new Map<string, number>();
for (const hit of hits) {
const matchingScore = hitMap.get(hit.document.path);
if (matchingScore) {
if (hit.score > matchingScore) {
hitMap.set(hit.document.path, hit.score);
}
} else {
hitMap.set(hit.document.path, hit.score);
}
}
hitMap.delete(currentFilePath);
return hitMap;
}

async function calculateSimilarityScore({
db,
filePath,
}: {
db: Orama<any>;
filePath: string;
}): Promise<Map<string, number>> {
const debug = getSettings().debug;

const currentNoteEmbeddings = await getNoteEmbeddings(filePath, db);
const averageEmbedding = getAverageEmbedding(currentNoteEmbeddings);
if (averageEmbedding.length === 0) {
if (debug) {
console.log("No embeddings found for note:", filePath);
}
return new Map();
}

const hits = await DBOperations.getDocsByEmbedding(db, averageEmbedding, {
limit: MAX_K,
similarity: MIN_SIMILARITY_SCORE,
});
return getHighestScoreHits(hits, filePath);
}

function calculateOutgoingLinksScore(file: TFile) {
const scoreMap = new Map<string, number>();
const linkedNotes = getLinkedNotes(file);
const linkedNotePaths = linkedNotes.map((note) => note.path);
for (const notePath of linkedNotePaths) {
scoreMap.set(notePath, 1);
}
return scoreMap;
}

function calculateBacklinksScore(file: TFile) {
const scoreMap = new Map<string, number>();
const backlinkedNotes = getBacklinkedNotes(file);
const backlinkedNotePaths = backlinkedNotes.map((note) => note.path);
for (const notePath of backlinkedNotePaths) {
scoreMap.set(notePath, 1);
}
return scoreMap;
}

function mergeScoreMaps(
similarityScoreMap: Map<string, number>,
outgoingLinksScoreMap: Map<string, number>,
backlinksScoreMap: Map<string, number>
) {
const mergedMap = new Map<string, number>();
const totalWeight = ORIGINAL_WEIGHT + OUTGOING_LINKED_WEIGHT + BACKLINK_WEIGHT;
for (const [key, value] of similarityScoreMap) {
mergedMap.set(key, (value * ORIGINAL_WEIGHT) / totalWeight);
}
for (const [key, value] of outgoingLinksScoreMap) {
mergedMap.set(key, (mergedMap.get(key) ?? 0) + (value * OUTGOING_LINKED_WEIGHT) / totalWeight);
}
for (const [key, value] of backlinksScoreMap) {
mergedMap.set(key, (mergedMap.get(key) ?? 0) + (value * BACKLINK_WEIGHT) / totalWeight);
}
return mergedMap;
}

export type RelevantNoteEntry = {
document: {
path: string;
title: string;
};
metadata: {
score: number;
similarityScore: number | undefined;
hasOutgoingLinks: boolean;
hasBacklinks: boolean;
};
};
/**
* Finds the relevant notes for the given file path.
* @param db - The Orama database.
* @param filePath - The file path to find relevant notes for.
* @returns The relevant notes hits for the given file path. Empty array if no
* relevant notes are found or the index does not exist.
*/
export async function findRelevantNotes({
db,
filePath,
}: {
db: Orama<any>;
filePath: string;
}): Promise<RelevantNoteEntry[]> {
const file = app.vault.getAbstractFileByPath(filePath);
if (!(file instanceof TFile)) {
return [];
}

const similarityScoreMap = await calculateSimilarityScore({ db, filePath });
const outgoingLinksScoreMap = calculateOutgoingLinksScore(file);
const backlinksScoreMap = calculateBacklinksScore(file);
const mergedScoreMap = mergeScoreMaps(
similarityScoreMap,
outgoingLinksScoreMap,
backlinksScoreMap
);
const sortedHits = Array.from(mergedScoreMap.entries()).sort((a, b) => b[1] - a[1]);
return sortedHits

Check failure on line 189 in src/search/findRelevantNotes.ts

View workflow job for this annotation

GitHub Actions / build (20.x)

Type '({ document: { path: string; title: string; }; metadata: { score: number; similarityScore: number | undefined; hasOutgoingLinks: boolean; hasBacklinks: boolean; }; } | null)[]' is not assignable to type 'RelevantNoteEntry[]'.
.map(([path, score]) => {
const file = app.vault.getAbstractFileByPath(path);
if (!(file instanceof TFile)) {
return null;
}
return {
document: {
path,
title: file.basename,
},
metadata: {
score,
similarityScore: similarityScoreMap.get(path),
hasOutgoingLinks: !!outgoingLinksScoreMap.get(path),
hasBacklinks: !!backlinksScoreMap.get(path),
},
};
})
.filter((entry) => entry !== null);
}
Loading

0 comments on commit 5db1551

Please sign in to comment.