From db6aa106428ed4cf49f09d824435e295f5c61462 Mon Sep 17 00:00:00 2001 From: Nav Date: Sat, 4 Jan 2025 15:00:11 +1100 Subject: [PATCH] Add stats method to PineconeClient and new pinecone-stats tool - Implemented `stats` method in `PineconeClient` to retrieve detailed statistics about the Pinecone index, including vector count, index dimension, fullness, and namespace-specific stats. - Added a new tool `pinecone-stats` in `server.py` to allow users to fetch index statistics via the server interface. - Removed category labels from existing tools for consistency. --- src/mcp_pinecone/pinecone.py | 35 +++++++++++++++++++++++++++++++++++ src/mcp_pinecone/server.py | 19 +++++++++++++------ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/mcp_pinecone/pinecone.py b/src/mcp_pinecone/pinecone.py index 797bc27..7a3698a 100644 --- a/src/mcp_pinecone/pinecone.py +++ b/src/mcp_pinecone/pinecone.py @@ -186,6 +186,41 @@ def search_records( logger.error(f"Error searching records: {e}") raise + def stats(self) -> Dict[str, Any]: + """ + Get detailed statistics about the index including: + - Total vector count + - Index dimension + - Index fullness + - Namespace-specific statistics + + Returns: + Dict[str, Any]: A dictionary containing: + - namespaces: Dict mapping namespace names to their statistics + - dimension: Dimension of the indexed vectors + - index_fullness: Fullness of the index (0-1 scale) + - total_vector_count: Total number of vectors across all namespaces + + """ + try: + stats = self.index.describe_index_stats() + # Convert namespaces to dict - each NamespaceSummary needs to be converted to dict + namespaces_dict = {} + for ns_name, ns_summary in stats.namespaces.items(): + namespaces_dict[ns_name] = { + "vector_count": ns_summary.vector_count, + } + + return { + "namespaces": namespaces_dict, + "dimension": stats.dimension, + "index_fullness": stats.index_fullness, + "total_vector_count": stats.total_vector_count, + } + except Exception as e: + logger.error(f"Error getting stats: {e}") + raise + def delete_records( self, ids: List[str], namespace: Optional[str] = None ) -> Dict[str, Any]: diff --git a/src/mcp_pinecone/server.py b/src/mcp_pinecone/server.py index 04e7862..4b5b065 100644 --- a/src/mcp_pinecone/server.py +++ b/src/mcp_pinecone/server.py @@ -130,7 +130,6 @@ async def handle_list_tools() -> list[types.Tool]: types.Tool( name="read-document", description="Read a document from the pinecone knowledge base", - category="read", inputSchema={ "type": "object", "properties": { @@ -146,7 +145,6 @@ async def handle_list_tools() -> list[types.Tool]: types.Tool( name="chunk-document", description="First step in document storage process. Chunks a document into smaller segments for optimal storage and retrieval. Must be called before upsert-document.", - category="mutation", inputSchema={ "type": "object", "properties": { @@ -160,7 +158,6 @@ async def handle_list_tools() -> list[types.Tool]: types.Tool( name="embed-document", description="Second step in document storage process. Embeds a document into the knowledge base as a vector. Must be used after chunk-document. Expects chunks from the chunk-document response.", - category="mutation", inputSchema={ "type": "object", "properties": { @@ -183,7 +180,6 @@ async def handle_list_tools() -> list[types.Tool]: types.Tool( name="upsert-document", description="Third step in document storage process. Upserts a document into the knowledge base. Must be used after chunk-document and embed-document. Expects embeddings from the embed-document response.", - category="mutation", inputSchema={ "type": "object", "properties": { @@ -211,7 +207,6 @@ async def handle_list_tools() -> list[types.Tool]: types.Tool( name="process-document", description="Process a document by optionally chunking, embedding, and upserting it into the knowledge base. Returns the document ID.", - category="mutation", inputSchema={ "type": "object", "properties": { @@ -234,7 +229,6 @@ async def handle_list_tools() -> list[types.Tool]: types.Tool( name="list-documents", description="List all documents in the knowledge base by namespace", - category="read", inputSchema={ "type": "object", "properties": { @@ -246,6 +240,15 @@ async def handle_list_tools() -> list[types.Tool]: "required": ["namespace"], }, ), + types.Tool( + name="pinecone-stats", + description="Get stats about the Pinecone index specified in this server", + inputSchema={ + "type": "object", + "properties": {}, + "required": [], + }, + ), ] @@ -259,6 +262,10 @@ async def handle_call_tool( results = pinecone_client.list_records(namespace=namespace) return [types.TextContent(type="text", text=json.dumps(results))] + if name == "pinecone-stats": + stats = pinecone_client.stats() + return [types.TextContent(type="text", text=json.dumps(stats))] + if name == "semantic-search": query = arguments.get("query") top_k = arguments.get("top_k", 10)