From 7fb1532ffd52aef0b5e9eb879d826f5e55ce0327 Mon Sep 17 00:00:00 2001 From: cmgzn Date: Mon, 21 Oct 2024 17:45:22 +0800 Subject: [PATCH] Add support for user-defined storage_context in llamaindex_knowledge and update knowledge config structure --- .../configs/knowledge_config.json | 55 +++++++++++++++++-- src/agentscope/rag/llama_index_knowledge.py | 51 ++++++++++++++++- 2 files changed, 98 insertions(+), 8 deletions(-) diff --git a/examples/conversation_with_RAG_agents/configs/knowledge_config.json b/examples/conversation_with_RAG_agents/configs/knowledge_config.json index d7ef45542..206676093 100644 --- a/examples/conversation_with_RAG_agents/configs/knowledge_config.json +++ b/examples/conversation_with_RAG_agents/configs/knowledge_config.json @@ -20,7 +20,7 @@ } } }, - "store_and_index": { + "data_parse": { "transformations": [ { "create_object": true, @@ -34,7 +34,22 @@ ] } } - ] + ], + "store_and_index": { + "storage_context": { + "vector_store": { + "create_object": true, + "module": "llama_index.vector_stores.elasticsearch", + "class": "ElasticsearchStore", + "init_args": { + "index_name": "agentscope_code_rag", + "es_url": "http://localhost:9200", + "es_user": "elastic", + "es_password": "" + } + } + } + } }, { "knowledge_id": "agentscope_api_rag", @@ -57,7 +72,22 @@ } } } - ] + ], + "store_and_index": { + "storage_context": { + "vector_store": { + "create_object": true, + "module": "llama_index.vector_stores.elasticsearch", + "class": "ElasticsearchStore", + "init_args": { + "index_name": "agentscope_api_rag", + "es_url": "http://localhost:9200", + "es_user": "elastic", + "es_password": "" + } + } + } + } }, { "knowledge_id": "agentscope_global_rag", @@ -95,7 +125,7 @@ } } }, - "store_and_index": { + "data_parse": { "transformations": [ { "create_object": true, @@ -109,6 +139,21 @@ ] } } - ] + ], + "store_and_index": { + "storage_context": { + "vector_store": { + "create_object": true, + "module": "llama_index.vector_stores.elasticsearch", + "class": "ElasticsearchStore", + "init_args": { + "index_name": "agentscope_global_rag", + "es_url": "http://localhost:9200", + "es_user": "elastic", + "es_password": "" + } + } + } + } } ] \ No newline at end of file diff --git a/src/agentscope/rag/llama_index_knowledge.py b/src/agentscope/rag/llama_index_knowledge.py index 142f71068..e4566bb07 100644 --- a/src/agentscope/rag/llama_index_knowledge.py +++ b/src/agentscope/rag/llama_index_knowledge.py @@ -252,9 +252,21 @@ def _load_index(self) -> None: Load the persisted index from persist_dir. """ # load the storage_context - storage_context = StorageContext.from_defaults( - persist_dir=self.persist_dir, + # Inject the persist_dir setting + self.knowledge_config.get("store_and_index", {}).get( + "storage_context", + {}, + ).update( + { + "persist_dir": self.persist_dir, + }, ) + storage_context = self._set_store(self.knowledge_config) + if not storage_context: + # if storage_context is not set, use the default + storage_context = StorageContext.from_defaults( + persist_dir=self.persist_dir, + ) # construct index from self.index = load_index_from_storage( storage_context=storage_context, @@ -287,10 +299,14 @@ def _data_to_index(self) -> None: transformations=transformations, ) nodes = nodes + nodes_docs + # set store + storage_context = self._set_store(self.knowledge_config) # convert nodes to index + # if storage_context is None, use the default self.index = VectorStoreIndex( nodes=nodes, embed_model=self.emb_model, + storage_context=storage_context, ) logger.info("index calculation completed.") # persist the calculated index @@ -401,7 +417,16 @@ def _set_transformations(self, config: dict) -> Any: Args: config (dict): a dictionary containing configurations. """ - if "store_and_index" in config: + if "data_parse" in config: + temp = self._prepare_args_from_config( + config=config.get("data_parse", {}), + ) + transformations = temp.get("transformations") + elif "store_and_index" in config: + logger.warning( + "The old configuration structure is deprecated, " + "please use data_parse instead of store_and_index.", + ) temp = self._prepare_args_from_config( config=config.get("store_and_index", {}), ) @@ -427,6 +452,26 @@ def _set_transformations(self, config: dict) -> Any: transformations = {"transformations": transformations} return transformations + def _set_store(self, config: dict) -> Any: + """ + Set the store as needed, or just use the default setting. + + Args: + config (dict): a dictionary containing configurations. + """ + if "store_and_index" in config: + temp = self._prepare_args_from_config( + config=config.get("store_and_index", {}), + ) + context_config = temp.get("storage_context") + else: + return None + + # Create the storage context + storage_context = StorageContext.from_defaults(**context_config) + logger.info("storage_context is ready.") + return storage_context + def _get_retriever( self, similarity_top_k: int = None,