From 63d4711593d085100e0c5ca247403b2bef996331 Mon Sep 17 00:00:00 2001 From: azep-ninja Date: Tue, 31 Dec 2024 13:29:41 -0700 Subject: [PATCH 1/5] rag knowledge/multi agent --- .../20240318103238_remote_schema.sql | 85 ++++- packages/adapter-postgres/schema.sql | 35 +- packages/adapter-postgres/src/index.ts | 177 +++++++++ packages/adapter-sqlite/src/index.ts | 205 ++++++++++- packages/adapter-sqlite/src/sqliteTables.ts | 28 +- packages/adapter-sqljs/src/index.ts | 189 ++++++++++ packages/adapter-sqljs/src/sqliteTables.ts | 28 +- packages/adapter-supabase/schema.sql | 48 ++- packages/adapter-supabase/src/index.ts | 227 ++++++++++++ packages/core/src/database.ts | 43 +++ packages/core/src/index.ts | 1 + packages/core/src/ragknowledge.ts | 336 ++++++++++++++++++ packages/core/src/runtime.ts | 178 +++++++++- packages/core/src/types.ts | 77 +++- 14 files changed, 1624 insertions(+), 33 deletions(-) create mode 100644 packages/core/src/ragknowledge.ts diff --git a/packages/adapter-postgres/migrations/20240318103238_remote_schema.sql b/packages/adapter-postgres/migrations/20240318103238_remote_schema.sql index 53bba89e45..2867a12aea 100644 --- a/packages/adapter-postgres/migrations/20240318103238_remote_schema.sql +++ b/packages/adapter-postgres/migrations/20240318103238_remote_schema.sql @@ -507,6 +507,63 @@ CREATE TABLE IF NOT EXISTS "public"."rooms" ( "createdAt" timestamp with time zone DEFAULT ("now"() AT TIME ZONE 'utc'::"text") NOT NULL ); +CREATE OR REPLACE FUNCTION "public"."search_knowledge"( + "query_embedding" "extensions"."vector", + "query_agent_id" "uuid", + "match_threshold" double precision, + "match_count" integer, + "search_text" text +) RETURNS TABLE ( + "id" "uuid", + "agentId" "uuid", + "content" "jsonb", + "embedding" "extensions"."vector", + "createdAt" timestamp with time zone, + "similarity" double precision +) LANGUAGE "plpgsql" AS $$ +BEGIN + RETURN QUERY + WITH vector_matches AS ( + SELECT id, + 1 - (embedding <=> query_embedding) as vector_score + FROM knowledge + WHERE (agentId IS NULL AND isShared = true) OR agentId = query_agent_id + AND embedding IS NOT NULL + ), + keyword_matches AS ( + SELECT id, + CASE + WHEN content->>'text' ILIKE '%' || search_text || '%' THEN 3.0 + ELSE 1.0 + END * + CASE + WHEN content->'metadata'->>'isChunk' = 'true' THEN 1.5 + WHEN content->'metadata'->>'isMain' = 'true' THEN 1.2 + ELSE 1.0 + END as keyword_score + FROM knowledge + WHERE (agentId IS NULL AND isShared = true) OR agentId = query_agent_id + ) + SELECT + k.id, + k."agentId", + k.content, + k.embedding, + k."createdAt", + (v.vector_score * kw.keyword_score) as similarity + FROM knowledge k + JOIN vector_matches v ON k.id = v.id + LEFT JOIN keyword_matches kw ON k.id = kw.id + WHERE (k.agentId IS NULL AND k.isShared = true) OR k.agentId = query_agent_id + AND ( + v.vector_score >= match_threshold + OR (kw.keyword_score > 1.0 AND v.vector_score >= 0.3) + ) + ORDER BY similarity DESC + LIMIT match_count; +END; +$$; + ALTER TABLE "public"."rooms" OWNER TO "postgres"; ALTER TABLE ONLY "public"."relationships" @@ -564,6 +621,9 @@ ALTER TABLE ONLY "public"."relationships" ALTER TABLE ONLY "public"."relationships" ADD CONSTRAINT "relationships_userId_fkey" FOREIGN KEY ("userId") REFERENCES "public"."accounts"("id"); +ALTER TABLE ONLY "public"."knowledge" + ADD CONSTRAINT "knowledge_agentId_fkey" FOREIGN KEY ("agentId") REFERENCES "public"."accounts"("id") ON DELETE CASCADE; + CREATE POLICY "Can select and update all data" ON "public"."accounts" USING (("auth"."uid"() = "id")) WITH CHECK (("auth"."uid"() = "id")); CREATE POLICY "Enable delete for users based on userId" ON "public"."goals" FOR DELETE TO "authenticated" USING (("auth"."uid"() = "userId")); @@ -600,6 +660,18 @@ CREATE POLICY "Enable update for users of own id" ON "public"."rooms" FOR UPDATE CREATE POLICY "Enable users to delete their own relationships/friendships" ON "public"."relationships" FOR DELETE TO "authenticated" USING ((("auth"."uid"() = "userA") OR ("auth"."uid"() = "userB"))); +CREATE POLICY "Enable read access for all users" ON "public"."knowledge" + FOR SELECT USING (true); + +CREATE POLICY "Enable insert for authenticated users only" ON "public"."knowledge" + FOR INSERT TO "authenticated" WITH CHECK (true); + +CREATE POLICY "Enable update for authenticated users" ON "public"."knowledge" + FOR UPDATE TO "authenticated" USING (true) WITH CHECK (true); + +CREATE POLICY "Enable delete for users based on agentId" ON "public"."knowledge" + FOR DELETE TO "authenticated" USING (("auth"."uid"() = "agentId")); + ALTER TABLE "public"."accounts" ENABLE ROW LEVEL SECURITY; ALTER TABLE "public"."goals" ENABLE ROW LEVEL SECURITY; @@ -614,6 +686,8 @@ ALTER TABLE "public"."relationships" ENABLE ROW LEVEL SECURITY; ALTER TABLE "public"."rooms" ENABLE ROW LEVEL SECURITY; +ALTER TABLE "public"."knowledge" ENABLE ROW LEVEL SECURITY; + CREATE POLICY "select_own_account" ON "public"."accounts" FOR SELECT USING (("auth"."uid"() = "id")); GRANT USAGE ON SCHEMA "public" TO "postgres"; @@ -703,6 +777,10 @@ GRANT ALL ON TABLE "public"."secrets" TO "service_role"; GRANT ALL ON TABLE "public"."secrets" TO "supabase_admin"; GRANT ALL ON TABLE "public"."secrets" TO "supabase_auth_admin"; +GRANT ALL ON TABLE "public"."knowledge" TO "authenticated"; +GRANT ALL ON TABLE "public"."knowledge" TO "service_role"; +GRANT ALL ON TABLE "public"."knowledge" TO "supabase_admin"; +GRANT ALL ON TABLE "public"."knowledge" TO "supabase_auth_admin"; GRANT ALL ON FUNCTION "public"."get_participant_userState"("roomId" "uuid", "userId" "uuid") TO "authenticated"; GRANT ALL ON FUNCTION "public"."get_participant_userState"("roomId" "uuid", "userId" "uuid") TO "service_role"; @@ -710,7 +788,7 @@ GRANT ALL ON FUNCTION "public"."get_participant_userState"("roomId" "uuid", "use GRANT ALL ON FUNCTION "public"."get_participant_userState"("roomId" "uuid", "userId" "uuid") TO "supabase_auth_admin"; GRANT ALL ON FUNCTION "public"."set_participant_userState"("roomId" "uuid", "userId" "uuid", "state" "text") TO "authenticated"; -GRANT ALL ON FUNCTION "public"."set_participant_userState"("roomId" "uuid", "userId" "uuid", "state" "text") TO "service_role"; +GRANT ALL ON FUNCTION "public"."set_participant_userState"("roomId" "uuid", "userId" "uuid", "state" "text") TO "service_role"; GRANT ALL ON FUNCTION "public"."set_participant_userState"("roomId" "uuid", "userId" "uuid", "state" "text") TO "supabase_admin"; GRANT ALL ON FUNCTION "public"."set_participant_userState"("roomId" "uuid", "userId" "uuid", "state" "text") TO "supabase_auth_admin"; @@ -733,4 +811,9 @@ ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TAB ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "supabase_admin"; ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TABLES TO "supabase_auth_admin"; +GRANT ALL ON FUNCTION "public"."search_knowledge"("query_embedding" "extensions"."vector", "query_agent_id" "uuid", "match_threshold" double precision, "match_count" integer, "search_text" text) TO "authenticated"; +GRANT ALL ON FUNCTION "public"."search_knowledge"("query_embedding" "extensions"."vector", "query_agent_id" "uuid", "match_threshold" double precision, "match_count" integer, "search_text" text) TO "service_role"; +GRANT ALL ON FUNCTION "public"."search_knowledge"("query_embedding" "extensions"."vector", "query_agent_id" "uuid", "match_threshold" double precision, "match_count" integer, "search_text" text) TO "supabase_admin"; +GRANT ALL ON FUNCTION "public"."search_knowledge"("query_embedding" "extensions"."vector", "query_agent_id" "uuid", "match_threshold" double precision, "match_count" integer, "search_text" text) TO "supabase_auth_admin"; + RESET ALL; \ No newline at end of file diff --git a/packages/adapter-postgres/schema.sql b/packages/adapter-postgres/schema.sql index 4a0f7c6f1d..b8327e54df 100644 --- a/packages/adapter-postgres/schema.sql +++ b/packages/adapter-postgres/schema.sql @@ -9,6 +9,7 @@ -- DROP TABLE IF EXISTS memories CASCADE; -- DROP TABLE IF EXISTS rooms CASCADE; -- DROP TABLE IF EXISTS accounts CASCADE; +-- DROP TABLE IF EXISTS knowledge CASCADE; CREATE EXTENSION IF NOT EXISTS vector; @@ -24,9 +25,6 @@ BEGIN -- Then check for Ollama ELSIF current_setting('app.use_ollama_embedding', TRUE) = 'true' THEN RETURN 1024; -- Ollama mxbai-embed-large dimension - -- Then check for GAIANET - ELSIF current_setting('app.use_gaianet_embedding', TRUE) = 'true' THEN - RETURN 768; -- Gaianet nomic-embed dimension ELSE RETURN 384; -- BGE/Other embedding dimension END IF; @@ -130,11 +128,38 @@ CREATE TABLE IF NOT EXISTS cache ( PRIMARY KEY ("key", "agentId") ); +DO $$ +DECLARE + vector_dim INTEGER; +BEGIN + vector_dim := get_embedding_dimension(); + + EXECUTE format(' + CREATE TABLE IF NOT EXISTS knowledge ( + "id" UUID PRIMARY KEY, + "agentId" UUID REFERENCES accounts("id"), + "content" JSONB NOT NULL, + "embedding" vector(%s), + "createdAt" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + "isMain" BOOLEAN DEFAULT FALSE, + "originalId" UUID REFERENCES knowledge("id"), + "chunkIndex" INTEGER, + "isShared" BOOLEAN DEFAULT FALSE, + CHECK((isShared = true AND "agentId" IS NULL) OR (isShared = false AND "agentId" IS NOT NULL)) + )', vector_dim); +END $$; + -- Indexes CREATE INDEX IF NOT EXISTS idx_memories_embedding ON memories USING hnsw ("embedding" vector_cosine_ops); CREATE INDEX IF NOT EXISTS idx_memories_type_room ON memories("type", "roomId"); CREATE INDEX IF NOT EXISTS idx_participants_user ON participants("userId"); CREATE INDEX IF NOT EXISTS idx_participants_room ON participants("roomId"); CREATE INDEX IF NOT EXISTS idx_relationships_users ON relationships("userA", "userB"); - -COMMIT; +CREATE INDEX IF NOT EXISTS idx_knowledge_agent ON knowledge("agentId"); +CREATE INDEX IF NOT EXISTS idx_knowledge_agent_main ON knowledge("agentId", "isMain"); +CREATE INDEX IF NOT EXISTS idx_knowledge_original ON knowledge("originalId"); +CREATE INDEX IF NOT EXISTS idx_knowledge_created ON knowledge("agentId", "createdAt"); +CREATE INDEX IF NOT EXISTS idx_knowledge_shared ON knowledge("isShared"); +CREATE INDEX IF NOT EXISTS idx_knowledge_embedding ON knowledge USING ivfflat (embedding vector_cosine_ops); + +COMMIT; \ No newline at end of file diff --git a/packages/adapter-postgres/src/index.ts b/packages/adapter-postgres/src/index.ts index 8a3eb14f2d..5047a247ac 100644 --- a/packages/adapter-postgres/src/index.ts +++ b/packages/adapter-postgres/src/index.ts @@ -24,6 +24,7 @@ import { getEmbeddingConfig, DatabaseAdapter, EmbeddingProvider, + RAGKnowledgeItem } from "@elizaos/core"; import fs from "fs"; import { fileURLToPath } from "url"; @@ -1467,6 +1468,182 @@ export class PostgresDatabaseAdapter } }, "deleteCache"); } + + async getKnowledge(params: { + id?: UUID; + agentId: UUID; + limit?: number; + query?: string; + }): Promise { + return this.withDatabase(async () => { + let sql = `SELECT * FROM knowledge WHERE ("agentId" = $1 OR "isShared" = true)`; + const queryParams: any[] = [params.agentId]; + let paramCount = 1; + + if (params.id) { + paramCount++; + sql += ` AND id = $${paramCount}`; + queryParams.push(params.id); + } + + if (params.limit) { + paramCount++; + sql += ` LIMIT $${paramCount}`; + queryParams.push(params.limit); + } + + const { rows } = await this.pool.query(sql, queryParams); + + return rows.map(row => ({ + id: row.id, + agentId: row.agentId, + content: typeof row.content === 'string' ? JSON.parse(row.content) : row.content, + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + createdAt: row.createdAt.getTime() + })); + }, "getKnowledge"); + } + + async searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array; + match_threshold: number; + match_count: number; + searchText?: string; + }): Promise { + return this.withDatabase(async () => { + const cacheKey = `embedding_${params.agentId}_${params.searchText}`; + const cachedResult = await this.getCache({ + key: cacheKey, + agentId: params.agentId + }); + + if (cachedResult) { + return JSON.parse(cachedResult); + } + + const vectorStr = `[${Array.from(params.embedding).join(",")}]`; + + const sql = ` + WITH vector_scores AS ( + SELECT id, + 1 - (embedding <-> $1::vector) as vector_score + FROM knowledge + WHERE ("agentId" IS NULL AND "isShared" = true) OR "agentId" = $2 + AND embedding IS NOT NULL + ), + keyword_matches AS ( + SELECT id, + CASE + WHEN content->>'text' ILIKE $3 THEN 3.0 + ELSE 1.0 + END * + CASE + WHEN (content->'metadata'->>'isChunk')::boolean = true THEN 1.5 + WHEN (content->'metadata'->>'isMain')::boolean = true THEN 1.2 + ELSE 1.0 + END as keyword_score + FROM knowledge + WHERE ("agentId" IS NULL AND "isShared" = true) OR "agentId" = $2 + ) + SELECT k.*, + v.vector_score, + kw.keyword_score, + (v.vector_score * kw.keyword_score) as combined_score + FROM knowledge k + JOIN vector_scores v ON k.id = v.id + LEFT JOIN keyword_matches kw ON k.id = kw.id + WHERE ("agentId" IS NULL AND "isShared" = true) OR k."agentId" = $2 + AND ( + v.vector_score >= $4 + OR (kw.keyword_score > 1.0 AND v.vector_score >= 0.3) + ) + ORDER BY combined_score DESC + LIMIT $5 + `; + + const { rows } = await this.pool.query(sql, [ + vectorStr, + params.agentId, + `%${params.searchText || ''}%`, + params.match_threshold, + params.match_count + ]); + + const results = rows.map(row => ({ + id: row.id, + agentId: row.agentId, + content: typeof row.content === 'string' ? JSON.parse(row.content) : row.content, + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + createdAt: row.createdAt.getTime(), + similarity: row.combined_score + })); + + await this.setCache({ + key: cacheKey, + agentId: params.agentId, + value: JSON.stringify(results) + }); + + return results; + }, "searchKnowledge"); + } + + async createKnowledge(knowledge: RAGKnowledgeItem): Promise { + return this.withDatabase(async () => { + const client = await this.pool.connect(); + try { + await client.query('BEGIN'); + + const sql = ` + INSERT INTO knowledge ( + id, "agentId", content, embedding, "createdAt", + "isMain", "originalId", "chunkIndex", "isShared" + ) VALUES ($1, $2, $3, $4, to_timestamp($5/1000.0), $6, $7, $8, $9) + ON CONFLICT (id) DO NOTHING + `; + + const metadata = knowledge.content.metadata || {}; + const vectorStr = knowledge.embedding ? + `[${Array.from(knowledge.embedding).join(",")}]` : null; + + await client.query(sql, [ + knowledge.id, + metadata.isShared ? null : knowledge.agentId, + knowledge.content, + vectorStr, + knowledge.createdAt || Date.now(), + metadata.isMain || false, + metadata.originalId || null, + metadata.chunkIndex || null, + metadata.isShared || false + ]); + + await client.query('COMMIT'); + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } + }, "createKnowledge"); + } + + async removeKnowledge(id: UUID): Promise { + return this.withDatabase(async () => { + await this.pool.query('DELETE FROM knowledge WHERE id = $1', [id]); + }, "removeKnowledge"); + } + + async clearKnowledge(agentId: UUID, shared?: boolean): Promise { + return this.withDatabase(async () => { + const sql = shared ? + 'DELETE FROM knowledge WHERE ("agentId" = $1 OR "isShared" = true)' : + 'DELETE FROM knowledge WHERE "agentId" = $1'; + + await this.pool.query(sql, [agentId]); + }, "clearKnowledge"); + } } export default PostgresDatabaseAdapter; diff --git a/packages/adapter-sqlite/src/index.ts b/packages/adapter-sqlite/src/index.ts index b6627a8c62..e22c036a9f 100644 --- a/packages/adapter-sqlite/src/index.ts +++ b/packages/adapter-sqlite/src/index.ts @@ -1,7 +1,7 @@ export * from "./sqliteTables.ts"; export * from "./sqlite_vec.ts"; -import { DatabaseAdapter, IDatabaseCacheAdapter } from "@elizaos/core"; +import { DatabaseAdapter, elizaLogger, IDatabaseCacheAdapter } from "@elizaos/core"; import { Account, Actor, @@ -11,6 +11,7 @@ import { type Memory, type Relationship, type UUID, + RAGKnowledgeItem } from "@elizaos/core"; import { Database } from "better-sqlite3"; import { v4 } from "uuid"; @@ -707,4 +708,206 @@ export class SqliteDatabaseAdapter return false; } } + + async getKnowledge(params: { + id?: UUID; + agentId: UUID; + limit?: number; + query?: string; + }): Promise { + let sql = `SELECT * FROM knowledge WHERE (agentId = ? OR isShared = 1)`; + const queryParams: any[] = [params.agentId]; + + if (params.id) { + sql += ` AND id = ?`; + queryParams.push(params.id); + } + + if (params.limit) { + sql += ` LIMIT ?`; + queryParams.push(params.limit); + } + + interface KnowledgeRow { + id: UUID; + agentId: UUID; + content: string; + embedding: Buffer | null; + createdAt: string | number; + } + + const rows = this.db.prepare(sql).all(...queryParams) as KnowledgeRow[]; + + return rows.map(row => ({ + id: row.id, + agentId: row.agentId, + content: JSON.parse(row.content), + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + createdAt: typeof row.createdAt === 'string' ? Date.parse(row.createdAt) : row.createdAt + })); + } + + async searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array; + match_threshold: number; + match_count: number; + searchText?: string; + }): Promise { + const cacheKey = `embedding_${params.agentId}_${params.searchText}`; + const cachedResult = await this.getCache({ + key: cacheKey, + agentId: params.agentId + }); + + if (cachedResult) { + return JSON.parse(cachedResult); + } + + interface KnowledgeSearchRow { + id: UUID; + agentId: UUID; + content: string; + embedding: Buffer | null; + createdAt: string | number; + vector_score: number; + keyword_score: number; + combined_score: number; + } + + const sql = ` + WITH vector_scores AS ( + SELECT id, + 1 / (1 + vec_distance_L2(embedding, ?)) as vector_score + FROM knowledge + WHERE (agentId IS NULL AND isShared = 1) OR agentId = ? + AND embedding IS NOT NULL + ), + keyword_matches AS ( + SELECT id, + CASE + WHEN lower(json_extract(content, '$.text')) LIKE ? THEN 3.0 + ELSE 1.0 + END * + CASE + WHEN json_extract(content, '$.metadata.isChunk') = 1 THEN 1.5 + WHEN json_extract(content, '$.metadata.isMain') = 1 THEN 1.2 + ELSE 1.0 + END as keyword_score + FROM knowledge + WHERE (agentId IS NULL AND isShared = 1) OR agentId = ? + ) + SELECT k.*, + v.vector_score, + kw.keyword_score, + (v.vector_score * kw.keyword_score) as combined_score + FROM knowledge k + JOIN vector_scores v ON k.id = v.id + LEFT JOIN keyword_matches kw ON k.id = kw.id + WHERE (k.agentId IS NULL AND k.isShared = 1) OR k.agentId = ? + AND ( + v.vector_score >= ? -- Using match_threshold parameter + OR (kw.keyword_score > 1.0 AND v.vector_score >= 0.3) + ) + ORDER BY combined_score DESC + LIMIT ? + `; + + const searchParams = [ + params.embedding, + params.agentId, + `%${params.searchText?.toLowerCase() || ''}%`, + params.agentId, + params.agentId, + params.match_threshold, + params.match_count + ]; + + try { + const rows = this.db.prepare(sql).all(...searchParams) as KnowledgeSearchRow[]; + const results = rows.map(row => ({ + id: row.id, + agentId: row.agentId, + content: JSON.parse(row.content), + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + createdAt: typeof row.createdAt === 'string' ? Date.parse(row.createdAt) : row.createdAt, + similarity: row.combined_score + })); + + await this.setCache({ + key: cacheKey, + agentId: params.agentId, + value: JSON.stringify(results) + }); + + return results; + } catch (error) { + elizaLogger.error('Error in searchKnowledge:', error); + throw error; + } + + } + + async createKnowledge(knowledge: RAGKnowledgeItem): Promise { + try { + this.db.transaction(() => { + const sql = ` + INSERT INTO knowledge ( + id, agentId, content, embedding, createdAt, + isMain, originalId, chunkIndex, isShared + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `; + + const embeddingArray = knowledge.embedding || null; + + const metadata = knowledge.content.metadata || {}; + const isShared = metadata.isShared ? 1 : 0; + + this.db.prepare(sql).run( + knowledge.id, + metadata.isShared ? null : knowledge.agentId, + JSON.stringify(knowledge.content), + embeddingArray, + knowledge.createdAt || Date.now(), + metadata.isMain ? 1 : 0, + metadata.originalId || null, + metadata.chunkIndex || null, + isShared + ); + + })(); + } catch (error: any) { + const isShared = knowledge.content.metadata?.isShared; + const isPrimaryKeyError = error?.code === 'SQLITE_CONSTRAINT_PRIMARYKEY'; + + if (isShared && isPrimaryKeyError) { + elizaLogger.info(`Shared knowledge ${knowledge.id} already exists, skipping`); + return; + } else if (!isShared && !error.message?.includes('SQLITE_CONSTRAINT_PRIMARYKEY')) { + elizaLogger.error(`Error creating knowledge ${knowledge.id}:`, { + error, + embeddingLength: knowledge.embedding?.length, + content: knowledge.content + }); + throw error; + } + + elizaLogger.debug(`Knowledge ${knowledge.id} already exists, skipping`); + } + } + + async removeKnowledge(id: UUID): Promise { + const sql = `DELETE FROM knowledge WHERE id = ?`; + this.db.prepare(sql).run(id); + } + + async clearKnowledge(agentId: UUID, shared?: boolean): Promise { + const sql = shared ? `DELETE FROM knowledge WHERE (agentId = ? OR isShared = 1)` : `DELETE FROM knowledge WHERE agentId = ?`; + try { + this.db.prepare(sql).run(agentId); + } catch (error) { + elizaLogger.error(`Error clearing knowledge for agent ${agentId}:`, error); + throw error; + } + } } diff --git a/packages/adapter-sqlite/src/sqliteTables.ts b/packages/adapter-sqlite/src/sqliteTables.ts index fdd47e5697..87fc26743f 100644 --- a/packages/adapter-sqlite/src/sqliteTables.ts +++ b/packages/adapter-sqlite/src/sqliteTables.ts @@ -92,6 +92,22 @@ CREATE TABLE IF NOT EXISTS "cache" ( PRIMARY KEY ("key", "agentId") ); +-- Table: knowledge +CREATE TABLE IF NOT EXISTS "knowledge" ( + "id" TEXT PRIMARY KEY, + "agentId" TEXT, + "content" TEXT NOT NULL CHECK(json_valid("content")), + "embedding" BLOB, + "createdAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + "isMain" INTEGER DEFAULT 0, + "originalId" TEXT, + "chunkIndex" INTEGER, + "isShared" INTEGER DEFAULT 0, + FOREIGN KEY ("agentId") REFERENCES "accounts"("id"), + FOREIGN KEY ("originalId") REFERENCES "knowledge"("id"), + CHECK((isShared = 1 AND agentId IS NULL) OR (isShared = 0 AND agentId IS NOT NULL)) +); + -- Index: relationships_id_key CREATE UNIQUE INDEX IF NOT EXISTS "relationships_id_key" ON "relationships" ("id"); @@ -101,4 +117,14 @@ CREATE UNIQUE INDEX IF NOT EXISTS "memories_id_key" ON "memories" ("id"); -- Index: participants_id_key CREATE UNIQUE INDEX IF NOT EXISTS "participants_id_key" ON "participants" ("id"); -COMMIT;`; +-- Index: knowledge +CREATE INDEX IF NOT EXISTS "knowledge_agent_key" ON "knowledge" ("agentId"); +CREATE INDEX IF NOT EXISTS "knowledge_agent_main_key" ON "knowledge" ("agentId", "isMain"); +CREATE INDEX IF NOT EXISTS "knowledge_original_key" ON "knowledge" ("originalId"); +CREATE INDEX IF NOT EXISTS "knowledge_content_key" ON "knowledge" + ((json_extract(content, '$.text'))) + WHERE json_extract(content, '$.text') IS NOT NULL; +CREATE INDEX IF NOT EXISTS "knowledge_created_key" ON "knowledge" ("agentId", "createdAt"); +CREATE INDEX IF NOT EXISTS "knowledge_shared_key" ON "knowledge" ("isShared"); + +COMMIT;`; \ No newline at end of file diff --git a/packages/adapter-sqljs/src/index.ts b/packages/adapter-sqljs/src/index.ts index 0383a7e38b..8b8d267c7a 100644 --- a/packages/adapter-sqljs/src/index.ts +++ b/packages/adapter-sqljs/src/index.ts @@ -12,6 +12,8 @@ import { type Memory, type Relationship, type UUID, + RAGKnowledgeItem, + elizaLogger } from "@elizaos/core"; import { v4 } from "uuid"; import { sqliteTables } from "./sqliteTables.ts"; @@ -802,4 +804,191 @@ export class SqlJsDatabaseAdapter return false; } } + + async getKnowledge(params: { + id?: UUID; + agentId: UUID; + limit?: number; + query?: string; + }): Promise { + let sql = `SELECT * FROM knowledge WHERE ("agentId" = ? OR "isShared" = 1)`; + const queryParams: any[] = [params.agentId]; + + if (params.id) { + sql += ` AND id = ?`; + queryParams.push(params.id); + } + + if (params.limit) { + sql += ` LIMIT ?`; + queryParams.push(params.limit); + } + + const stmt = this.db.prepare(sql); + stmt.bind(queryParams); + const results: RAGKnowledgeItem[] = []; + + while (stmt.step()) { + const row = stmt.getAsObject() as any; + results.push({ + id: row.id, + agentId: row.agentId, + content: JSON.parse(row.content), + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, // Convert Uint8Array back to Float32Array + createdAt: row.createdAt + }); + } + stmt.free(); + return results; + } + + async searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array; + match_threshold: number; + match_count: number; + searchText?: string; + }): Promise { + const cacheKey = `embedding_${params.agentId}_${params.searchText}`; + const cachedResult = await this.getCache({ + key: cacheKey, + agentId: params.agentId + }); + + if (cachedResult) { + return JSON.parse(cachedResult); + } + + let sql = ` + WITH vector_scores AS ( + SELECT id, + 1 / (1 + vec_distance_L2(embedding, ?)) as vector_score + FROM knowledge + WHERE ("agentId" IS NULL AND "isShared" = 1) OR "agentId" = ? + AND embedding IS NOT NULL + ), + keyword_matches AS ( + SELECT id, + CASE + WHEN json_extract(content, '$.text') LIKE ? THEN 3.0 + ELSE 1.0 + END * + CASE + WHEN json_extract(content, '$.metadata.isChunk') = 1 THEN 1.5 + WHEN json_extract(content, '$.metadata.isMain') = 1 THEN 1.2 + ELSE 1.0 + END as keyword_score + FROM knowledge + WHERE ("agentId" IS NULL AND "isShared" = 1) OR "agentId" = ? + ) + SELECT k.*, + v.vector_score, + kw.keyword_score, + (v.vector_score * kw.keyword_score) as combined_score + FROM knowledge k + JOIN vector_scores v ON k.id = v.id + LEFT JOIN keyword_matches kw ON k.id = kw.id + WHERE (k.agentId IS NULL AND k.isShared = 1) OR k.agentId = ? + AND ( + v.vector_score >= ? -- Using match_threshold parameter + OR (kw.keyword_score > 1.0 AND v.vector_score >= 0.3) + ) + ORDER BY combined_score DESC + LIMIT ? + `; + + const stmt = this.db.prepare(sql); + stmt.bind([ + new Uint8Array(params.embedding.buffer), + params.agentId, + `%${params.searchText || ''}%`, + params.agentId, + params.agentId, + params.match_threshold, + params.match_count + ]); + + const results: RAGKnowledgeItem[] = []; + while (stmt.step()) { + const row = stmt.getAsObject() as any; + results.push({ + id: row.id, + agentId: row.agentId, + content: JSON.parse(row.content), + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + createdAt: row.createdAt, + similarity: row.keyword_score + }); + } + stmt.free(); + + await this.setCache({ + key: cacheKey, + agentId: params.agentId, + value: JSON.stringify(results) + }); + + return results; + } + + async createKnowledge(knowledge: RAGKnowledgeItem): Promise { + try { + const sql = ` + INSERT INTO knowledge ( + id, "agentId", content, embedding, "createdAt", + "isMain", "originalId", "chunkIndex", "isShared" + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `; + + const stmt = this.db.prepare(sql); + const metadata = knowledge.content.metadata || {}; + + stmt.run([ + knowledge.id, + metadata.isShared ? null : knowledge.agentId, + JSON.stringify(knowledge.content), + knowledge.embedding ? new Uint8Array(knowledge.embedding.buffer) : null, + knowledge.createdAt || Date.now(), + metadata.isMain ? 1 : 0, + metadata.originalId || null, + metadata.chunkIndex || null, + metadata.isShared ? 1 : 0 + ]); + stmt.free(); + } catch (error: any) { + const isShared = knowledge.content.metadata?.isShared; + const isPrimaryKeyError = error?.code === 'SQLITE_CONSTRAINT_PRIMARYKEY'; + + if (isShared && isPrimaryKeyError) { + elizaLogger.info(`Shared knowledge ${knowledge.id} already exists, skipping`); + return; + } else if (!isShared && !error.message?.includes('SQLITE_CONSTRAINT_PRIMARYKEY')) { + elizaLogger.error(`Error creating knowledge ${knowledge.id}:`, { + error, + embeddingLength: knowledge.embedding?.length, + content: knowledge.content + }); + throw error; + } + + elizaLogger.debug(`Knowledge ${knowledge.id} already exists, skipping`); + } + } + + async removeKnowledge(id: UUID): Promise { + const sql = `DELETE FROM knowledge WHERE id = ?`; + const stmt = this.db.prepare(sql); + stmt.run([id]); + stmt.free(); + } + + async clearKnowledge(agentId: UUID, shared?: boolean): Promise { + const sql = shared ? + `DELETE FROM knowledge WHERE ("agentId" = ? OR "isShared" = 1)` : + `DELETE FROM knowledge WHERE "agentId" = ?`; + + const stmt = this.db.prepare(sql); + stmt.run([agentId]); + stmt.free(); + } } diff --git a/packages/adapter-sqljs/src/sqliteTables.ts b/packages/adapter-sqljs/src/sqliteTables.ts index fdd47e5697..87fc26743f 100644 --- a/packages/adapter-sqljs/src/sqliteTables.ts +++ b/packages/adapter-sqljs/src/sqliteTables.ts @@ -92,6 +92,22 @@ CREATE TABLE IF NOT EXISTS "cache" ( PRIMARY KEY ("key", "agentId") ); +-- Table: knowledge +CREATE TABLE IF NOT EXISTS "knowledge" ( + "id" TEXT PRIMARY KEY, + "agentId" TEXT, + "content" TEXT NOT NULL CHECK(json_valid("content")), + "embedding" BLOB, + "createdAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + "isMain" INTEGER DEFAULT 0, + "originalId" TEXT, + "chunkIndex" INTEGER, + "isShared" INTEGER DEFAULT 0, + FOREIGN KEY ("agentId") REFERENCES "accounts"("id"), + FOREIGN KEY ("originalId") REFERENCES "knowledge"("id"), + CHECK((isShared = 1 AND agentId IS NULL) OR (isShared = 0 AND agentId IS NOT NULL)) +); + -- Index: relationships_id_key CREATE UNIQUE INDEX IF NOT EXISTS "relationships_id_key" ON "relationships" ("id"); @@ -101,4 +117,14 @@ CREATE UNIQUE INDEX IF NOT EXISTS "memories_id_key" ON "memories" ("id"); -- Index: participants_id_key CREATE UNIQUE INDEX IF NOT EXISTS "participants_id_key" ON "participants" ("id"); -COMMIT;`; +-- Index: knowledge +CREATE INDEX IF NOT EXISTS "knowledge_agent_key" ON "knowledge" ("agentId"); +CREATE INDEX IF NOT EXISTS "knowledge_agent_main_key" ON "knowledge" ("agentId", "isMain"); +CREATE INDEX IF NOT EXISTS "knowledge_original_key" ON "knowledge" ("originalId"); +CREATE INDEX IF NOT EXISTS "knowledge_content_key" ON "knowledge" + ((json_extract(content, '$.text'))) + WHERE json_extract(content, '$.text') IS NOT NULL; +CREATE INDEX IF NOT EXISTS "knowledge_created_key" ON "knowledge" ("agentId", "createdAt"); +CREATE INDEX IF NOT EXISTS "knowledge_shared_key" ON "knowledge" ("isShared"); + +COMMIT;`; \ No newline at end of file diff --git a/packages/adapter-supabase/schema.sql b/packages/adapter-supabase/schema.sql index fd7ec2287d..14674261d6 100644 --- a/packages/adapter-supabase/schema.sql +++ b/packages/adapter-supabase/schema.sql @@ -9,6 +9,7 @@ -- DROP TABLE IF EXISTS memories CASCADE; -- DROP TABLE IF EXISTS rooms CASCADE; -- DROP TABLE IF EXISTS accounts CASCADE; +-- DROP TABLE IF EXISTS knowledge CASCADE; CREATE EXTENSION IF NOT EXISTS vector; @@ -61,21 +62,6 @@ CREATE TABLE memories_1024 ( CONSTRAINT fk_agent FOREIGN KEY ("agentId") REFERENCES accounts("id") ON DELETE CASCADE ); -CREATE TABLE memories_768 ( - "id" UUID PRIMARY KEY, - "type" TEXT NOT NULL, - "createdAt" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, - "content" JSONB NOT NULL, - "embedding" vector(768), -- Gaianet nomic-embed - "userId" UUID REFERENCES accounts("id"), - "agentId" UUID REFERENCES accounts("id"), - "roomId" UUID REFERENCES rooms("id"), - "unique" BOOLEAN DEFAULT true NOT NULL, - CONSTRAINT fk_room FOREIGN KEY ("roomId") REFERENCES rooms("id") ON DELETE CASCADE, - CONSTRAINT fk_user FOREIGN KEY ("userId") REFERENCES accounts("id") ON DELETE CASCADE, - CONSTRAINT fk_agent FOREIGN KEY ("agentId") REFERENCES accounts("id") ON DELETE CASCADE -); - CREATE TABLE memories_384 ( "id" UUID PRIMARY KEY, "type" TEXT NOT NULL, @@ -97,8 +83,6 @@ CREATE VIEW memories AS UNION ALL SELECT * FROM memories_1024 UNION ALL - SELECT * FROM memories_768; - UNION ALL SELECT * FROM memories_384; @@ -150,11 +134,31 @@ CREATE TABLE relationships ( CONSTRAINT fk_user FOREIGN KEY ("userId") REFERENCES accounts("id") ON DELETE CASCADE ); +CREATE TABLE cache ( + "key" TEXT NOT NULL, + "agentId" TEXT NOT NULL, + "value" JSONB DEFAULT '{}'::jsonb, + "createdAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + "expiresAt" TIMESTAMP, + PRIMARY KEY ("key", "agentId") +); + +CREATE TABLE knowledge ( + "id" UUID PRIMARY KEY, + "agentId" UUID REFERENCES accounts("id"), + "content" JSONB NOT NULL, + "embedding" vector(1536), + "createdAt" TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + "isMain" BOOLEAN DEFAULT FALSE, + "originalId" UUID REFERENCES knowledge("id"), + "chunkIndex" INTEGER, + "isShared" BOOLEAN DEFAULT FALSE, + CHECK((isShared = true AND "agentId" IS NULL) OR (isShared = false AND "agentId" IS NOT NULL)) +); + -- Add index for Ollama table CREATE INDEX idx_memories_1024_embedding ON memories_1024 USING hnsw ("embedding" vector_cosine_ops); CREATE INDEX idx_memories_1024_type_room ON memories_1024("type", "roomId"); -CREATE INDEX idx_memories_768_embedding ON memories_768 USING hnsw ("embedding" vector_cosine_ops); -CREATE INDEX idx_memories_768_type_room ON memories_768("type", "roomId"); CREATE INDEX idx_memories_1536_embedding ON memories_1536 USING hnsw ("embedding" vector_cosine_ops); CREATE INDEX idx_memories_384_embedding ON memories_384 USING hnsw ("embedding" vector_cosine_ops); CREATE INDEX idx_memories_1536_type_room ON memories_1536("type", "roomId"); @@ -162,5 +166,11 @@ CREATE INDEX idx_memories_384_type_room ON memories_384("type", "roomId"); CREATE INDEX idx_participants_user ON participants("userId"); CREATE INDEX idx_participants_room ON participants("roomId"); CREATE INDEX idx_relationships_users ON relationships("userA", "userB"); +CREATE INDEX idx_knowledge_agent ON knowledge("agentId"); +CREATE INDEX idx_knowledge_agent_main ON knowledge("agentId", "isMain"); +CREATE INDEX idx_knowledge_original ON knowledge("originalId"); +CREATE INDEX idx_knowledge_created ON knowledge("agentId", "createdAt"); +CREATE INDEX idx_knowledge_shared ON knowledge("isShared"); +CREATE INDEX idx_knowledge_embedding ON knowledge USING ivfflat (embedding vector_cosine_ops); COMMIT; \ No newline at end of file diff --git a/packages/adapter-supabase/src/index.ts b/packages/adapter-supabase/src/index.ts index f6f9840f21..9c8d643f61 100644 --- a/packages/adapter-supabase/src/index.ts +++ b/packages/adapter-supabase/src/index.ts @@ -9,6 +9,8 @@ import { type UUID, Participant, Room, + RAGKnowledgeItem, + elizaLogger } from "@elizaos/core"; import { DatabaseAdapter } from "@elizaos/core"; import { v4 as uuid } from "uuid"; @@ -680,4 +682,229 @@ export class SupabaseDatabaseAdapter extends DatabaseAdapter { return data as Relationship[]; } + + async getCache(params: { + key: string; + agentId: UUID; + }): Promise { + const { data, error } = await this.supabase + .from('cache') + .select('value') + .eq('key', params.key) + .eq('agentId', params.agentId) + .single(); + + if (error) { + console.error('Error fetching cache:', error); + return undefined; + } + + return data?.value; + } + + async setCache(params: { + key: string; + agentId: UUID; + value: string; + }): Promise { + const { error } = await this.supabase + .from('cache') + .upsert({ + key: params.key, + agentId: params.agentId, + value: params.value, + createdAt: new Date() + }); + + if (error) { + console.error('Error setting cache:', error); + return false; + } + + return true; + } + + async deleteCache(params: { + key: string; + agentId: UUID; + }): Promise { + try { + const { error } = await this.supabase + .from('cache') + .delete() + .eq('key', params.key) + .eq('agentId', params.agentId); + + if (error) { + elizaLogger.error("Error deleting cache", { + error: error.message, + key: params.key, + agentId: params.agentId, + }); + return false; + } + return true; + } catch (error) { + elizaLogger.error( + "Database connection error in deleteCache", + error instanceof Error ? error.message : String(error) + ); + return false; + } + } + + async getKnowledge(params: { + id?: UUID; + agentId: UUID; + limit?: number; + query?: string; + }): Promise { + let query = this.supabase + .from('knowledge') + .select('*') + .or(`agentId.eq.${params.agentId},isShared.eq.true`); + + if (params.id) { + query = query.eq('id', params.id); + } + + if (params.limit) { + query = query.limit(params.limit); + } + + const { data, error } = await query; + + if (error) { + throw new Error(`Error getting knowledge: ${error.message}`); + } + + return data.map(row => ({ + id: row.id, + agentId: row.agentId, + content: typeof row.content === 'string' ? JSON.parse(row.content) : row.content, + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + createdAt: new Date(row.createdAt).getTime() + })); + } + + async searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array; + match_threshold: number; + match_count: number; + searchText?: string; + }): Promise { + const cacheKey = `embedding_${params.agentId}_${params.searchText}`; + const cachedResult = await this.getCache({ + key: cacheKey, + agentId: params.agentId + }); + + if (cachedResult) { + return JSON.parse(cachedResult); + } + + // Convert Float32Array to array for Postgres vector + const embedding = Array.from(params.embedding); + + const { data, error } = await this.supabase.rpc('search_knowledge', { + query_embedding: embedding, + query_agent_id: params.agentId, + match_threshold: params.match_threshold, + match_count: params.match_count, + search_text: params.searchText || '' + }); + + if (error) { + throw new Error(`Error searching knowledge: ${error.message}`); + } + + const results = data.map(row => ({ + id: row.id, + agentId: row.agentId, + content: typeof row.content === 'string' ? JSON.parse(row.content) : row.content, + embedding: row.embedding ? new Float32Array(row.embedding) : undefined, + createdAt: new Date(row.createdAt).getTime(), + similarity: row.similarity + })); + + await this.setCache({ + key: cacheKey, + agentId: params.agentId, + value: JSON.stringify(results) + }); + + return results; + } + + async createKnowledge(knowledge: RAGKnowledgeItem): Promise { + try { + const metadata = knowledge.content.metadata || {}; + + const { error } = await this.supabase + .from('knowledge') + .insert({ + id: knowledge.id, + agentId: metadata.isShared ? null : knowledge.agentId, + content: knowledge.content, + embedding: knowledge.embedding ? Array.from(knowledge.embedding) : null, + createdAt: knowledge.createdAt || new Date(), + isMain: metadata.isMain || false, + originalId: metadata.originalId || null, + chunkIndex: metadata.chunkIndex || null, + isShared: metadata.isShared || false + }); + + if (error) { + if (metadata.isShared && error.code === '23505') { // Unique violation + elizaLogger.info(`Shared knowledge ${knowledge.id} already exists, skipping`); + return; + } + throw error; + } + } catch (error: any) { + elizaLogger.error(`Error creating knowledge ${knowledge.id}:`, { + error, + embeddingLength: knowledge.embedding?.length, + content: knowledge.content + }); + throw error; + } + } + + async removeKnowledge(id: UUID): Promise { + const { error } = await this.supabase + .from('knowledge') + .delete() + .eq('id', id); + + if (error) { + throw new Error(`Error removing knowledge: ${error.message}`); + } + } + + async clearKnowledge(agentId: UUID, shared?: boolean): Promise { + if (shared) { + const { error } = await this.supabase + .from('knowledge') + .delete() + .filter('agentId', 'eq', agentId) + .filter('isShared', 'eq', true); + + if (error) { + elizaLogger.error(`Error clearing shared knowledge for agent ${agentId}:`, error); + throw error; + } + } else { + const { error } = await this.supabase + .from('knowledge') + .delete() + .eq('agentId', agentId); + + if (error) { + elizaLogger.error(`Error clearing knowledge for agent ${agentId}:`, error); + throw error; + } + } + } } diff --git a/packages/core/src/database.ts b/packages/core/src/database.ts index 9e8cbfa1b5..310c44c32a 100644 --- a/packages/core/src/database.ts +++ b/packages/core/src/database.ts @@ -6,6 +6,7 @@ import { type Memory, type Relationship, type UUID, + RAGKnowledgeItem, Participant, IDatabaseAdapter, } from "./types.ts"; @@ -380,6 +381,48 @@ export abstract class DatabaseAdapter implements IDatabaseAdapter { userId: UUID; }): Promise; + /** + * Retrieves knowledge items based on specified parameters. + * @param params Object containing search parameters + * @returns Promise resolving to array of knowledge items + */ + abstract getKnowledge(params: { + id?: UUID; + agentId: UUID; + limit?: number; + query?: string; + conversationContext?: string; + }): Promise; + + abstract searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array; + match_threshold: number; + match_count: number; + searchText?: string; + }): Promise; + + /** + * Creates a new knowledge item in the database. + * @param knowledge The knowledge item to create + * @returns Promise resolving when creation is complete + */ + abstract createKnowledge(knowledge: RAGKnowledgeItem): Promise; + + /** + * Removes a knowledge item and its associated chunks from the database. + * @param id The ID of the knowledge item to remove + * @returns Promise resolving when removal is complete + */ + abstract removeKnowledge(id: UUID): Promise; + + /** + * Removes an agents full knowledge database and its associated chunks from the database. + * @param agentId The Agent ID of the knowledge items to remove + * @returns Promise resolving when removal is complete + */ + abstract clearKnowledge(agentId: UUID, shared?: boolean): Promise; + /** * Executes an operation with circuit breaker protection. * @param operation A function that returns a Promise to be executed with circuit breaker protection diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index c0360768e9..7dbf7f832d 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -23,4 +23,5 @@ export * from "./uuid.ts"; export * from "./environment.ts"; export * from "./cache.ts"; export { default as knowledge } from "./knowledge.ts"; +export * from "./ragknowledge.ts"; export * from "./utils.ts"; diff --git a/packages/core/src/ragknowledge.ts b/packages/core/src/ragknowledge.ts new file mode 100644 index 0000000000..cca883ae48 --- /dev/null +++ b/packages/core/src/ragknowledge.ts @@ -0,0 +1,336 @@ +import { embed } from "./embedding.ts"; +import elizaLogger from "./logger.ts"; +import { + IRAGKnowledgeManager, + RAGKnowledgeItem, + UUID, + IAgentRuntime +} from "./types.ts"; +import { splitChunks } from "./generation.ts"; +import { stringToUuid } from "./uuid.ts"; + +/** + * Manage knowledge in the database. + */ +export class RAGKnowledgeManager implements IRAGKnowledgeManager { + /** + * The AgentRuntime instance associated with this manager. + */ + runtime: IAgentRuntime; + + /** + * The name of the database table this manager operates on. + */ + tableName: string; + + /** + * Constructs a new KnowledgeManager instance. + * @param opts Options for the manager. + * @param opts.tableName The name of the table this manager will operate on. + * @param opts.runtime The AgentRuntime instance associated with this manager. + */ + constructor(opts: { tableName: string; runtime: IAgentRuntime }) { + this.runtime = opts.runtime; + this.tableName = opts.tableName; + } + + private readonly defaultRAGMatchThreshold = 0.85; + private readonly defaultRAGMatchCount = 5; + + /** + * Common English stop words to filter out from query analysis + */ + private readonly stopWords = new Set([ + 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'does', 'for', 'from', 'had', + 'has', 'have', 'he', 'her', 'his', 'how', 'hey', 'i', 'in', 'is', 'it', 'its', + 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', + 'which', 'who', 'will', 'with', 'would', 'there', 'their', 'they', 'your', 'you' + ]); + + /** + * Filters out stop words and returns meaningful terms + */ + private getQueryTerms(query: string): string[] { + return query.toLowerCase() + .split(' ') + .filter(term => term.length > 3) // Filter very short words + .filter(term => !this.stopWords.has(term)); // Filter stop words + } + + /** + * Preprocesses text content for better RAG performance. + * @param content The text content to preprocess. + * @returns The preprocessed text. + */ + + private preprocess(content: string): string { + if (!content || typeof content !== "string") { + elizaLogger.warn("Invalid input for preprocessing"); + return ""; + } + + return content + .replace(/```[\s\S]*?```/g, "") + .replace(/`.*?`/g, "") + .replace(/#{1,6}\s*(.*)/g, "$1") + .replace(/!\[(.*?)\]\(.*?\)/g, "$1") + .replace(/\[(.*?)\]\(.*?\)/g, "$1") + .replace(/(https?:\/\/)?(www\.)?([^\s]+\.[^\s]+)/g, "$3") + .replace(/<@[!&]?\d+>/g, "") + .replace(/<[^>]*>/g, "") + .replace(/^\s*[-*_]{3,}\s*$/gm, "") + .replace(/\/\*[\s\S]*?\*\//g, "") + .replace(/\/\/.*/g, "") + .replace(/\s+/g, " ") + .replace(/\n{3,}/g, "\n\n") + .replace(/[^a-zA-Z0-9\s\-_./:?=&]/g, "") + .trim() + .toLowerCase(); + } + + private hasProximityMatch(text: string, terms: string[]): boolean { + const words = text.toLowerCase().split(' '); + const positions = terms.map(term => words.findIndex(w => w.includes(term))) + .filter(pos => pos !== -1); + + if (positions.length < 2) return false; + + // Check if any matches are within 5 words of each other + for (let i = 0; i < positions.length - 1; i++) { + if (Math.abs(positions[i] - positions[i + 1]) <= 5) { + return true; + } + } + return false; + } + + async getKnowledge(params: { + query?: string; + id?: UUID; + conversationContext?: string; + limit?: number; + agentId?: UUID; + }): Promise { + const agentId = params.agentId || this.runtime.agentId; + + // If id is provided, do direct lookup first + if (params.id) { + const directResults = await this.runtime.databaseAdapter.getKnowledge({ + id: params.id, + agentId: agentId + }); + + if (directResults.length > 0) { + return directResults; + } + } + + // If no id or no direct results, perform semantic search + if (params.query) { + try { + const processedQuery = this.preprocess(params.query); + + // Build search text with optional context + let searchText = processedQuery; + if (params.conversationContext) { + const relevantContext = this.preprocess(params.conversationContext); + searchText = `${relevantContext} ${processedQuery}`; + } + + const embeddingArray = await embed(this.runtime, searchText); + + const embedding = new Float32Array(embeddingArray); + + // Get results with single query + const results = await this.runtime.databaseAdapter.searchKnowledge({ + agentId: this.runtime.agentId, + embedding: embedding, + match_threshold: this.defaultRAGMatchThreshold, + match_count: (params.limit || this.defaultRAGMatchCount) * 2, + searchText: processedQuery + }); + + // Enhanced reranking with sophisticated scoring + const rerankedResults = results.map(result => { + let score = result.similarity; + + // Check for direct query term matches + const queryTerms = this.getQueryTerms(processedQuery); + + const matchingTerms = queryTerms.filter(term => + result.content.text.toLowerCase().includes(term)); + + if (matchingTerms.length > 0) { + // Much stronger boost for matches + score *= (1 + (matchingTerms.length / queryTerms.length) * 2); // Double the boost + + if (this.hasProximityMatch(result.content.text, matchingTerms)) { + score *= 1.5; // Stronger proximity boost + } + } else { + // More aggressive penalty + if (!params.conversationContext) { + score *= 0.3; // Stronger penalty + } + } + + return { + ...result, + score, + matchedTerms: matchingTerms // Add for debugging + }; + }).sort((a, b) => b.score - a.score); + + // Filter and return results + return rerankedResults + .filter(result => result.score >= this.defaultRAGMatchThreshold) + .slice(0, params.limit || this.defaultRAGMatchCount); + + } catch(error) { + console.log(`[RAG Search Error] ${error}`); + return []; + } + } + + // If neither id nor query provided, return empty array + return []; + } + + async createKnowledge(item: RAGKnowledgeItem): Promise { + if (!item.content.text) { + elizaLogger.warn("Empty content in knowledge item"); + return; + } + + try { + // Process main document + const processedContent = this.preprocess(item.content.text); + const mainEmbeddingArray = await embed(this.runtime, processedContent); + + const mainEmbedding = new Float32Array(mainEmbeddingArray); + + // Create main document + await this.runtime.databaseAdapter.createKnowledge({ + id: item.id, + agentId: this.runtime.agentId, + content: { + text: item.content.text, + metadata: { + ...item.content.metadata, + isMain: true + } + }, + embedding: mainEmbedding, + createdAt: Date.now() + }); + + // Generate and store chunks + const chunks = await splitChunks(processedContent, 512, 20); + + for (const [index, chunk] of chunks.entries()) { + const chunkEmbeddingArray = await embed(this.runtime, chunk); + const chunkEmbedding = new Float32Array(chunkEmbeddingArray); + const chunkId = `${item.id}-chunk-${index}` as UUID; + + await this.runtime.databaseAdapter.createKnowledge({ + id: chunkId, + agentId: this.runtime.agentId, + content: { + text: chunk, + metadata: { + ...item.content.metadata, + isChunk: true, + originalId: item.id, + chunkIndex: index + } + }, + embedding: chunkEmbedding, + createdAt: Date.now() + }); + } + } catch (error) { + elizaLogger.error(`Error processing knowledge ${item.id}:`, error); + throw error; + } + } + + async searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array | number[]; + match_threshold?: number; + match_count?: number; + searchText?: string; + }): Promise { + const { + match_threshold = this.defaultRAGMatchThreshold, + match_count = this.defaultRAGMatchCount, + embedding, + searchText + } = params; + + const float32Embedding = Array.isArray(embedding) ? new Float32Array(embedding) : embedding; + + return await this.runtime.databaseAdapter.searchKnowledge({ + agentId: params.agentId || this.runtime.agentId, + embedding: float32Embedding, + match_threshold, + match_count, + searchText + }); + } + + async removeKnowledge(id: UUID): Promise { + await this.runtime.databaseAdapter.removeKnowledge(id); + } + + async clearKnowledge(shared?: boolean): Promise { + await this.runtime.databaseAdapter.clearKnowledge(this.runtime.agentId, shared ? shared : false); + } + + async processFile(file: { + path: string; + content: string; + type: 'pdf' | 'md' | 'txt'; + isShared?: boolean + }): Promise { + let content = file.content; + + try { + // Process based on file type + switch(file.type) { + case 'pdf': + //To-Do: Add native support for basic PDFs + elizaLogger.warn(`PDF files not currently supported: ${file.type}`) + break; + case 'md': + case 'txt': + break; + default: + elizaLogger.warn(`Unsupported file type: ${file.type}`); + return; + } + + elizaLogger.info(`[Processing Files] ${file.path} ${content} ${file.isShared}`) + + await this.createKnowledge({ + id: stringToUuid(file.path), + agentId: this.runtime.agentId, + content: { + text: content, + metadata: { + source: file.path, + type: file.type, + isShared: file.isShared || false + } + } + }); + } catch (error) { + if (file.isShared && error?.code === 'SQLITE_CONSTRAINT_PRIMARYKEY') { + elizaLogger.info(`Shared knowledge ${file.path} already exists in database, skipping creation`); + return; + } + elizaLogger.error(`Error processing file ${file.path}:`, error); + throw error; + } + } +} \ No newline at end of file diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index 2ba5f016b4..2d0decff0c 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -17,6 +17,7 @@ import { generateText } from "./generation.ts"; import { formatGoalsAsString, getGoals } from "./goals.ts"; import { elizaLogger } from "./index.ts"; import knowledge from "./knowledge.ts"; +import { RAGKnowledgeManager } from "./ragknowledge.ts"; import { MemoryManager } from "./memory.ts"; import { formatActors, formatMessages, getActorDetails } from "./messages.ts"; import { parseJsonArrayFromText } from "./parsing.ts"; @@ -30,8 +31,10 @@ import { IAgentRuntime, ICacheManager, IDatabaseAdapter, + IRAGKnowledgeManager, IMemoryManager, KnowledgeItem, + RAGKnowledgeItem, ModelClass, ModelProviderName, Plugin, @@ -46,6 +49,8 @@ import { type Memory, } from "./types.ts"; import { stringToUuid } from "./uuid.ts"; +import { readFile } from 'fs/promises'; +import { join } from 'path'; /** * Represents the runtime environment for an agent, handling message processing, @@ -139,6 +144,8 @@ export class AgentRuntime implements IAgentRuntime { */ knowledgeManager: IMemoryManager; + ragKnowledgeManager: IRAGKnowledgeManager; + services: Map = new Map(); memoryManagers: Map = new Map(); cacheManager: ICacheManager; @@ -289,6 +296,11 @@ export class AgentRuntime implements IAgentRuntime { tableName: "fragments", }); + this.ragKnowledgeManager = new RAGKnowledgeManager({ + runtime: this, + tableName: 'knowledge' + }); + (opts.managers ?? []).forEach((manager: IMemoryManager) => { this.registerMemoryManager(manager); }); @@ -405,7 +417,15 @@ export class AgentRuntime implements IAgentRuntime { this.character.knowledge && this.character.knowledge.length > 0 ) { - await this.processCharacterKnowledge(this.character.knowledge); + if(this.character.settings.ragKnowledge) { + await this.processCharacterRAGKnowledge(this.character.knowledge); + } else { + const stringKnowledge = this.character.knowledge.filter((item): item is string => + typeof item === 'string' + ); + + await this.processCharacterKnowledge(stringKnowledge); + } } } @@ -459,6 +479,137 @@ export class AgentRuntime implements IAgentRuntime { } } + /** + * Processes character knowledge by creating document memories and fragment memories. + * This function takes an array of knowledge items, creates a document knowledge for each item if it doesn't exist, + * then chunks the content into fragments, embeds each fragment, and creates fragment knowledge. + * An array of knowledge items or objects containing id, path, and content. + */ + private async processCharacterRAGKnowledge(items: (string | { path: string; shared?: boolean })[]) { + let hasError = false; + + for (const item of items) { + if (!item) continue; + + try { + // Check if item is marked as shared + let isShared = false; + let contentItem = item; + + // Only treat as shared if explicitly marked + if (typeof item === 'object' && 'path' in item) { + isShared = item.shared === true; + contentItem = item.path; + } else { + contentItem = item; + } + + const knowledgeId = stringToUuid(contentItem); + const fileExtension = contentItem.split('.').pop()?.toLowerCase(); + + // Check if it's a file or direct knowledge + if (fileExtension && ['md', 'txt', 'pdf'].includes(fileExtension)) { + try { + const rootPath = join(process.cwd(), '..'); + const filePath = join(rootPath, 'characters', 'knowledge', contentItem); + elizaLogger.info("Attempting to read file from:", filePath); + + // Get existing knowledge first + const existingKnowledge = await this.ragKnowledgeManager.getKnowledge({ + id: knowledgeId, + agentId: this.agentId + }); + + let content: string; + + content = await readFile(filePath, 'utf8'); + + if (!content) { + hasError = true; + continue; + } + + // If the file exists in DB, check if content has changed + if (existingKnowledge.length > 0) { + const existingContent = existingKnowledge[0].content.text; + if (existingContent === content) { + elizaLogger.info(`File ${contentItem} unchanged, skipping`); + continue; + } else { + // If content changed, remove old knowledge before adding new + await this.ragKnowledgeManager.removeKnowledge(knowledgeId); + // Also remove any associated chunks + await this.ragKnowledgeManager.removeKnowledge(`${knowledgeId}-chunk-*` as UUID); + } + } + + elizaLogger.info( + `Successfully read ${fileExtension.toUpperCase()} file content for`, + this.character.name, + "-", + contentItem + ); + + await this.ragKnowledgeManager.processFile({ + path: contentItem, + content: content, + type: fileExtension as 'pdf' | 'md' | 'txt', + isShared: isShared + }); + + } catch (error: any) { + hasError = true; + elizaLogger.error( + `Failed to read knowledge file ${contentItem}. Error details:`, + error?.message || error || 'Unknown error' + ); + continue; // Continue to next item even if this one fails + } + } else { + // Handle direct knowledge string + elizaLogger.info( + "Processing direct knowledge for", + this.character.name, + "-", + contentItem.slice(0, 100) + ); + + const existingKnowledge = await this.ragKnowledgeManager.getKnowledge({ + id: knowledgeId, + agentId: this.agentId + }); + + if (existingKnowledge.length > 0) { + elizaLogger.info(`Direct knowledge ${knowledgeId} already exists, skipping`); + continue; + } + + await this.ragKnowledgeManager.createKnowledge({ + id: knowledgeId, + agentId: this.agentId, + content: { + text: contentItem, + metadata: { + type: 'direct' + } + } + }); + } + } catch (error: any) { + hasError = true; + elizaLogger.error( + `Error processing knowledge item ${item}:`, + error?.message || error || 'Unknown error' + ); + continue; // Continue to next item even if this one fails + } + } + + if (hasError) { + elizaLogger.warn('Some knowledge items failed to process, but continuing with available knowledge'); + } + } + getSetting(key: string) { // check if the key is in the character.settings.secrets object if (this.character.settings?.secrets?.[key]) { @@ -995,9 +1146,27 @@ Text: ${attachment.text} .join(" "); } - const knowledegeData = await knowledge.get(this, message); + let knowledgeData = []; + let formattedKnowledge = ''; + + if(this.character.settings?.ragKnowledge) { + const recentContext = recentMessagesData + .slice(-3) // Last 3 messages + .map(msg => msg.content.text) + .join(' '); + + knowledgeData = await this.ragKnowledgeManager.getKnowledge({ + query: message.content.text, + conversationContext: recentContext, + limit: 5 + }); + + formattedKnowledge = formatKnowledge(knowledgeData); + } else { + knowledgeData = await knowledge.get(this, message); - const formattedKnowledge = formatKnowledge(knowledegeData); + formattedKnowledge = formatKnowledge(knowledgeData); + } const initialState = { agentId: this.agentId, @@ -1014,7 +1183,8 @@ Text: ${attachment.text} ] : "", knowledge: formattedKnowledge, - knowledgeData: knowledegeData, + knowledgeData: knowledgeData, + ragKnowledgeData: knowledgeData, // Recent interactions between the sender and receiver, formatted as messages recentMessageInteractions: formattedMessageInteractions, // Recent interactions between the sender and receiver, formatted as posts diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 4b0b3d5d10..f3456f94c6 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -325,6 +325,8 @@ export interface State { knowledge?: string; /** Optional knowledge data */ knowledgeData?: KnowledgeItem[]; + /** Optional knowledge data */ + ragKnowledgeData?: RAGKnowledgeItem[]; /** Additional dynamic properties */ [key: string]: unknown; @@ -737,7 +739,7 @@ export type Character = { adjectives: string[]; /** Optional knowledge base */ - knowledge?: string[]; + knowledge?: (string | { path: string; shared?: boolean })[]; /** Supported client platforms */ clients: Clients[]; @@ -784,6 +786,7 @@ export type Character = { solana?: any[]; [key: string]: any[]; }; + ragKnowledge?: boolean; }; /** Optional client-specific config */ @@ -996,6 +999,26 @@ export interface IDatabaseAdapter { }): Promise; getRelationships(params: { userId: UUID }): Promise; + + getKnowledge(params: { + id?: UUID; + agentId: UUID; + limit?: number; + query?: string; + conversationContext?: string; + }): Promise; + + searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array; + match_threshold: number; + match_count: number; + searchText?: string; + }): Promise; + + createKnowledge(knowledge: RAGKnowledgeItem): Promise; + removeKnowledge(id: UUID): Promise; + clearKnowledge(agentId: UUID, shared?: boolean): Promise; } export interface IDatabaseCacheAdapter { @@ -1053,6 +1076,35 @@ export interface IMemoryManager { countMemories(roomId: UUID, unique?: boolean): Promise; } +export interface IRAGKnowledgeManager { + runtime: IAgentRuntime; + tableName: string; + + getKnowledge(params: { + query?: string; + id?: UUID; + limit?: number; + conversationContext?: string; + agentId?: UUID; + }): Promise; + createKnowledge(item: RAGKnowledgeItem): Promise; + removeKnowledge(id: UUID): Promise; + searchKnowledge(params: { + agentId: UUID; + embedding: Float32Array | number[]; + match_threshold?: number; + match_count?: number; + searchText?: string; + }): Promise; + clearKnowledge(shared?: boolean): Promise; + processFile(file: { + path: string; + content: string; + type: 'pdf' | 'md' | 'txt', + isShared: boolean; + }): Promise; +} + export type CacheOptions = { expires?: number; }; @@ -1111,6 +1163,7 @@ export interface IAgentRuntime { descriptionManager: IMemoryManager; documentsManager: IMemoryManager; knowledgeManager: IMemoryManager; + ragKnowledgeManager: IRAGKnowledgeManager; loreManager: IMemoryManager; cacheManager: ICacheManager; @@ -1302,6 +1355,28 @@ export type KnowledgeItem = { content: Content; }; +export interface RAGKnowledgeItem { + id: UUID; + agentId: UUID; + content: { + text: string; + metadata?: { + isMain?: boolean; + isChunk?: boolean; + originalId?: UUID; + chunkIndex?: number; + source?: string; + type?: string; + isShared?: boolean; + [key: string]: unknown; + }; + }; + embedding?: Float32Array; + createdAt?: number; + similarity?: number; + score?: number; +} + export interface ActionResponse { like: boolean; retweet: boolean; From fc68576888d457c4b8ea8efe2dc06670f2d9a5f1 Mon Sep 17 00:00:00 2001 From: "Ninja Dev (QI)" <142059473+azep-ninja@users.noreply.github.com> Date: Wed, 1 Jan 2025 21:05:58 -0700 Subject: [PATCH 2/5] Fixed character validation --- packages/core/src/environment.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/core/src/environment.ts b/packages/core/src/environment.ts index 0758d0d31d..9307ef63e6 100644 --- a/packages/core/src/environment.ts +++ b/packages/core/src/environment.ts @@ -76,7 +76,15 @@ export const CharacterSchema = z.object({ postExamples: z.array(z.string()), topics: z.array(z.string()), adjectives: z.array(z.string()), - knowledge: z.array(z.string()).optional(), + knowledge: z.array( + z.union([ + z.string(), + z.object({ + path: z.string(), + shared: z.boolean().optional() + }) + ]) + ).optional(), clients: z.array(z.nativeEnum(Clients)), plugins: z.union([ z.array(z.string()), From 2dab9e8c295831f9be0189c625a0b956bc5b238a Mon Sep 17 00:00:00 2001 From: azep-ninja Date: Sun, 5 Jan 2025 08:50:00 -0700 Subject: [PATCH 3/5] fxx dcker --- packages/plugin-story/Readme.md | 316 ++++++++++++++++++-------------- 1 file changed, 179 insertions(+), 137 deletions(-) diff --git a/packages/plugin-story/Readme.md b/packages/plugin-story/Readme.md index 32276d874a..9f8661626d 100644 --- a/packages/plugin-story/Readme.md +++ b/packages/plugin-story/Readme.md @@ -1,186 +1,228 @@ -# Plugin Story +# @elizaos/plugin-story -A plugin for managing intellectual property (IP) operations, including registration, licensing, and integration with IPFS for decentralized storage. +The Story Protocol plugin enables interaction with Story Protocol's IP management and licensing system on the Odyssey testnet. -## Overview and Purpose +## Overview -The Plugin Story simplifies the process of managing intellectual property by providing APIs and utilities for registering IP, licensing it, and uploading related data to IPFS. It is designed to streamline workflows for developers dealing with IP management in decentralized or traditional environments. +This plugin provides functionality to: +- Register IP assets on Story Protocol +- License IP assets +- Attach license terms to IP assets +- Query IP asset details and available licenses +- Manage wallet interactions with Story Protocol -## Installation Instructions +## Installation ```bash npm install @elizaos/plugin-story ``` -## Configuration Requirements +## Configuration -Ensure you have the following dependencies installed: +The plugin requires the following environment variables: -- `ethers` -- `@elizaos/core` -- `ipfs-http-client` - -## Usage Examples +```env +STORY_PRIVATE_KEY=your_private_key +STORY_API_KEY=your_api_key +STORY_API_BASE_URL=https://api.story.xyz +PINATA_JWT=your_pinata_jwt_token +``` -### Register Intellectual Property +## Usage -#### TypeScript Example +Import and register the plugin in your Eliza configuration: ```typescript -import { registerIP } from '@elizaos/plugin-story/actions/registerIP'; +import { storyPlugin } from "@elizaos/plugin-story"; -const ipDetails = { - name: 'My First IP', - description: 'A sample intellectual property', - owner: '0x123...456', +export default { + plugins: [storyPlugin], + // ... other configuration }; - -try { - const registrationResult = await registerIP(ipDetails); - console.log('IP Registered Successfully:', registrationResult); -} catch (error) { - console.error('IP Registration Failed:', error); -} ``` -### License Intellectual Property +## Features -```typescript -import { licenseIP } from '@elizaos/plugin-story/actions/licenseIP'; +### Register IP -const licenseData = { - ipId: 'IP123', - licenseType: 'Exclusive', - duration: 12, // in months -}; - -try { - const licenseResult = await licenseIP(licenseData); - console.log('IP Licensed Successfully:', licenseResult); -} catch (error) { - console.error('IP Licensing Failed:', error); -} -``` - -### Upload Data to IPFS +Register a new IP asset on Story Protocol: ```typescript -import { uploadJSONToIPFS } from '@elizaos/plugin-story/functions/uploadJSONToIPFS'; - -const jsonData = { - name: 'Sample Data', - description: 'Data to be stored on IPFS', -}; - -try { - const ipfsHash = await uploadJSONToIPFS(jsonData); - console.log('Data uploaded to IPFS. Hash:', ipfsHash); -} catch (error) { - console.error('IPFS Upload Failed:', error); -} +// Example conversation +User: "I want to register my IP titled 'My Story' with the description 'An epic tale'" +Assistant: "I'll help you register your IP on Story Protocol..." ``` -## API Reference - -### Actions - -#### `registerIP` - -Registers intellectual property. - -**Parameters:** - -- `details: { name: string; description: string; owner: string; }` - -**Returns:** - -- `Promise` - Result of the registration process. - -#### `licenseIP` - -Licenses registered intellectual property. - -**Parameters:** - -- `licenseData: { ipId: string; licenseType: string; duration: number; }` - -**Returns:** - -- `Promise` - Result of the licensing process. +### License IP -#### `getIPDetails` +License an existing IP asset: -Fetches details of a specific intellectual property. - -**Parameters:** - -- `ipId: string` - -**Returns:** - -- `Promise` - Details of the requested IP. - -### Functions - -#### `uploadJSONToIPFS` - -Uploads JSON data to IPFS. - -**Parameters:** +```typescript +// Example conversation +User: "I want to license IP Asset 0x1234...5678 with license terms ID 1" +Assistant: "I'll help you license that IP asset..." +``` -- `data: object` +### Attach Terms -**Returns:** +Attach license terms to an IP asset: -- `Promise` - The IPFS hash of the uploaded data. +```typescript +// Example conversation +User: "I want to attach commercial license terms with 10% revenue share to IP 0x1234...5678" +Assistant: "I'll help you attach those license terms..." +``` -### Templates +### Get IP Details -#### `index` +Query details about an IP asset: -Provides reusable templates for consistent IP management workflows. +```typescript +// Example conversation +User: "Get details for IP Asset 0x1234...5678" +Assistant: "Here are the details for that IP asset..." +``` -## Common Issues/Troubleshooting +### Get Available Licenses -### Issue: IPFS Upload Fails +Query available licenses for an IP asset: -- **Cause:** Invalid or large JSON data. -- **Solution:** Validate and compress JSON data before uploading. +```typescript +// Example conversation +User: "What licenses are available for IP Asset 0x1234...5678?" +Assistant: "Here are the available licenses..." +``` -### Issue: IP Registration Fails +## API Reference -- **Cause:** Missing or invalid owner address. -- **Solution:** Verify the owner's blockchain address. +### Actions -## Additional Documentation +- `REGISTER_IP`: Register a new IP asset +- `LICENSE_IP`: License an existing IP asset +- `ATTACH_TERMS`: Attach license terms to an IP +- `GET_IP_DETAILS`: Get details about an IP +- `GET_AVAILABLE_LICENSES`: Get available licenses for an IP -### Examples Folder +### Providers -The `examples/` folder contains practical implementations for registering, licensing, and uploading IP data. +- `storyWalletProvider`: Manages wallet interactions with Story Protocol -### Testing Guide +## Development -Run the following command to execute tests: +### Building ```bash -npm test +npm run build ``` -### Plugin Development Guide - -Developers can extend the plugin by adding new actions and utilities. Refer to the `src/` folder for detailed implementation patterns. +### Testing -### Security Best Practices - -- Validate all inputs for IP management actions. -- Ensure proper authentication and authorization for licensing. -- Keep dependencies updated to prevent vulnerabilities. - -### Performance Optimization Guide - -- Optimize IPFS uploads by compressing data. -- Cache frequently accessed IP details for faster retrieval. - -## Value Add +```bash +npm run test +``` -This plugin enhances intellectual property management workflows, reduces implementation overhead, and ensures compatibility with decentralized storage systems like IPFS. +## Dependencies + +- `@story-protocol/core-sdk`: Core SDK for Story Protocol +- `@pinata/sdk`: IPFS pinning service +- `viem`: Ethereum interaction library +- Other standard dependencies listed in package.json + +## Future Enhancements + +The following features and improvements are planned for future releases: + +1. **IP Management** + - Batch IP registration + - Advanced metadata management + - IP relationship mapping + - Automated IP verification + - Collection management + - IP analytics dashboard + +2. **Licensing Features** + - Custom license templates + - License negotiation tools + - Automated royalty distribution + - Usage tracking system + - License violation detection + - Bulk licensing tools + +3. **Rights Management** + - Advanced permission systems + - Rights transfer automation + - Usage rights tracking + - Derivative works management + - Rights verification tools + - Dispute resolution system + +4. **Smart Contract Integration** + - Contract deployment templates + - Automated verification + - Contract upgrade system + - Security analysis tools + - Gas optimization + - Multi-signature support + +5. **Content Management** + - Media file handling + - Content versioning + - Distribution tracking + - Content authentication + - Storage optimization + - Format conversion tools + +6. **Revenue Management** + - Automated payments + - Revenue sharing tools + - Payment tracking + - Financial reporting + - Tax documentation + - Audit trail system + +7. **Developer Tools** + - Enhanced SDK features + - Testing framework + - Documentation generator + - CLI improvements + - Integration templates + - Performance monitoring + +8. **Analytics and Reporting** + - Usage statistics + - Revenue analytics + - License tracking + - Performance metrics + - Custom reporting + - Market analysis tools + +We welcome community feedback and contributions to help prioritize these enhancements. + +## Contributing + +Contributions are welcome! Please see the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information. + +## Credits + +This plugin integrates with and builds upon several key technologies: + +- [Story Protocol](https://www.story.xyz/): IP management and licensing platform +- [@story-protocol/core-sdk](https://www.npmjs.com/package/@story-protocol/core-sdk): Official Story Protocol SDK +- [@pinata/sdk](https://www.npmjs.com/package/@pinata/sdk): IPFS pinning service +- [viem](https://www.npmjs.com/package/viem): Ethereum interaction library + +Special thanks to: +- The Story Protocol team for developing the IP management platform +- The Story Protocol Developer community +- The Pinata team for IPFS infrastructure +- The Eliza community for their contributions and feedback + +For more information about Story Protocol capabilities: +- [Story Protocol Documentation](https://docs.story.xyz/) +- [Story Protocol Dashboard](https://app.story.xyz/) +- [Story Protocol Blog](https://www.story.xyz/blog) +- [Story Protocol GitHub](https://github.com/storyprotocol) + +## License + +This plugin is part of the Eliza project. See the main project repository for license information. From 98e564b693b987fdd635e5e025c0eac190da4711 Mon Sep 17 00:00:00 2001 From: azep-ninja Date: Sun, 5 Jan 2025 14:33:53 -0700 Subject: [PATCH 4/5] sync --- packages/adapter-sqljs/src/index.ts | 1 - packages/core/src/ragknowledge.ts | 91 ++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 17 deletions(-) diff --git a/packages/adapter-sqljs/src/index.ts b/packages/adapter-sqljs/src/index.ts index 25021aba80..db27215e10 100644 --- a/packages/adapter-sqljs/src/index.ts +++ b/packages/adapter-sqljs/src/index.ts @@ -18,7 +18,6 @@ import { import { v4 } from "uuid"; import { sqliteTables } from "./sqliteTables.ts"; import { Database } from "./types.ts"; -import { elizaLogger } from "@elizaos/core"; export class SqlJsDatabaseAdapter extends DatabaseAdapter diff --git a/packages/core/src/ragknowledge.ts b/packages/core/src/ragknowledge.ts index cca883ae48..0856cea67a 100644 --- a/packages/core/src/ragknowledge.ts +++ b/packages/core/src/ragknowledge.ts @@ -293,26 +293,30 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { type: 'pdf' | 'md' | 'txt'; isShared?: boolean }): Promise { + const timeMarker = (label: string) => { + const time = (Date.now() - startTime) / 1000; + elizaLogger.info(`[Timing] ${label}: ${time.toFixed(2)}s`); + }; + + const startTime = Date.now(); let content = file.content; try { - // Process based on file type - switch(file.type) { - case 'pdf': - //To-Do: Add native support for basic PDFs - elizaLogger.warn(`PDF files not currently supported: ${file.type}`) - break; - case 'md': - case 'txt': - break; - default: - elizaLogger.warn(`Unsupported file type: ${file.type}`); - return; - } + const fileSizeKB = (new TextEncoder().encode(content)).length / 1024; + elizaLogger.info(`[File Progress] Starting ${file.path} (${fileSizeKB.toFixed(2)} KB)`); - elizaLogger.info(`[Processing Files] ${file.path} ${content} ${file.isShared}`) + // Step 1: Preprocessing + const preprocessStart = Date.now(); + const processedContent = this.preprocess(content); + timeMarker('Preprocessing'); + + // Step 2: Main document embedding + const mainEmbeddingArray = await embed(this.runtime, processedContent); + const mainEmbedding = new Float32Array(mainEmbeddingArray); + timeMarker('Main embedding'); - await this.createKnowledge({ + // Step 3: Create main document + await this.runtime.databaseAdapter.createKnowledge({ id: stringToUuid(file.path), agentId: this.runtime.agentId, content: { @@ -322,8 +326,63 @@ export class RAGKnowledgeManager implements IRAGKnowledgeManager { type: file.type, isShared: file.isShared || false } - } + }, + embedding: mainEmbedding, + createdAt: Date.now() }); + timeMarker('Main document storage'); + + // Step 4: Generate chunks + const chunks = await splitChunks(processedContent, 512, 20); + const totalChunks = chunks.length; + elizaLogger.info(`Generated ${totalChunks} chunks`); + timeMarker('Chunk generation'); + + // Step 5: Process chunks with larger batches + const BATCH_SIZE = 10; // Increased batch size + let processedChunks = 0; + + for (let i = 0; i < chunks.length; i += BATCH_SIZE) { + const batchStart = Date.now(); + const batch = chunks.slice(i, Math.min(i + BATCH_SIZE, chunks.length)); + + // Process embeddings in parallel + const embeddings = await Promise.all( + batch.map(chunk => embed(this.runtime, chunk)) + ); + + // Batch database operations + await Promise.all(embeddings.map(async (embeddingArray, index) => { + const chunkId = `${stringToUuid(file.path)}-chunk-${i + index}` as UUID; + const chunkEmbedding = new Float32Array(embeddingArray); + + await this.runtime.databaseAdapter.createKnowledge({ + id: chunkId, + agentId: this.runtime.agentId, + content: { + text: batch[index], + metadata: { + source: file.path, + type: file.type, + isShared: file.isShared || false, + isChunk: true, + originalId: stringToUuid(file.path), + chunkIndex: i + index + } + }, + embedding: chunkEmbedding, + createdAt: Date.now() + }); + })); + + processedChunks += batch.length; + const batchTime = (Date.now() - batchStart) / 1000; + elizaLogger.info(`[Batch Progress] Processed ${processedChunks}/${totalChunks} chunks (${batchTime.toFixed(2)}s for batch)`); + } + + const totalTime = (Date.now() - startTime) / 1000; + elizaLogger.info(`[Complete] Processed ${file.path} in ${totalTime.toFixed(2)}s`); + } catch (error) { if (file.isShared && error?.code === 'SQLITE_CONSTRAINT_PRIMARYKEY') { elizaLogger.info(`Shared knowledge ${file.path} already exists in database, skipping creation`); From f91aa731d182962c1e610f69abed543dccbe4e56 Mon Sep 17 00:00:00 2001 From: azep-ninja Date: Sun, 5 Jan 2025 23:51:23 -0700 Subject: [PATCH 5/5] sync --- packages/core/src/runtime.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index 030c9ff14b..940bbba3e4 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -35,6 +35,7 @@ import { IMemoryManager, KnowledgeItem, RAGKnowledgeItem, + Media, ModelClass, ModelProviderName, Plugin,