Skip to content

Commit

Permalink
Add stats method to PineconeClient and new pinecone-stats tool
Browse files Browse the repository at this point in the history
- Implemented `stats` method in `PineconeClient` to retrieve detailed statistics about the Pinecone index, including vector count, index dimension, fullness, and namespace-specific stats.
- Added a new tool `pinecone-stats` in `server.py` to allow users to fetch index statistics via the server interface.
- Removed category labels from existing tools for consistency.
  • Loading branch information
sirmews committed Jan 4, 2025
1 parent 2aaddc8 commit db6aa10
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
35 changes: 35 additions & 0 deletions src/mcp_pinecone/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,41 @@ def search_records(
logger.error(f"Error searching records: {e}")
raise

def stats(self) -> Dict[str, Any]:
"""
Get detailed statistics about the index including:
- Total vector count
- Index dimension
- Index fullness
- Namespace-specific statistics
Returns:
Dict[str, Any]: A dictionary containing:
- namespaces: Dict mapping namespace names to their statistics
- dimension: Dimension of the indexed vectors
- index_fullness: Fullness of the index (0-1 scale)
- total_vector_count: Total number of vectors across all namespaces
"""
try:
stats = self.index.describe_index_stats()
# Convert namespaces to dict - each NamespaceSummary needs to be converted to dict
namespaces_dict = {}
for ns_name, ns_summary in stats.namespaces.items():
namespaces_dict[ns_name] = {
"vector_count": ns_summary.vector_count,
}

return {
"namespaces": namespaces_dict,
"dimension": stats.dimension,
"index_fullness": stats.index_fullness,
"total_vector_count": stats.total_vector_count,
}
except Exception as e:
logger.error(f"Error getting stats: {e}")
raise

def delete_records(
self, ids: List[str], namespace: Optional[str] = None
) -> Dict[str, Any]:
Expand Down
19 changes: 13 additions & 6 deletions src/mcp_pinecone/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ async def handle_list_tools() -> list[types.Tool]:
types.Tool(
name="read-document",
description="Read a document from the pinecone knowledge base",
category="read",
inputSchema={
"type": "object",
"properties": {
Expand All @@ -146,7 +145,6 @@ async def handle_list_tools() -> list[types.Tool]:
types.Tool(
name="chunk-document",
description="First step in document storage process. Chunks a document into smaller segments for optimal storage and retrieval. Must be called before upsert-document.",
category="mutation",
inputSchema={
"type": "object",
"properties": {
Expand All @@ -160,7 +158,6 @@ async def handle_list_tools() -> list[types.Tool]:
types.Tool(
name="embed-document",
description="Second step in document storage process. Embeds a document into the knowledge base as a vector. Must be used after chunk-document. Expects chunks from the chunk-document response.",
category="mutation",
inputSchema={
"type": "object",
"properties": {
Expand All @@ -183,7 +180,6 @@ async def handle_list_tools() -> list[types.Tool]:
types.Tool(
name="upsert-document",
description="Third step in document storage process. Upserts a document into the knowledge base. Must be used after chunk-document and embed-document. Expects embeddings from the embed-document response.",
category="mutation",
inputSchema={
"type": "object",
"properties": {
Expand Down Expand Up @@ -211,7 +207,6 @@ async def handle_list_tools() -> list[types.Tool]:
types.Tool(
name="process-document",
description="Process a document by optionally chunking, embedding, and upserting it into the knowledge base. Returns the document ID.",
category="mutation",
inputSchema={
"type": "object",
"properties": {
Expand All @@ -234,7 +229,6 @@ async def handle_list_tools() -> list[types.Tool]:
types.Tool(
name="list-documents",
description="List all documents in the knowledge base by namespace",
category="read",
inputSchema={
"type": "object",
"properties": {
Expand All @@ -246,6 +240,15 @@ async def handle_list_tools() -> list[types.Tool]:
"required": ["namespace"],
},
),
types.Tool(
name="pinecone-stats",
description="Get stats about the Pinecone index specified in this server",
inputSchema={
"type": "object",
"properties": {},
"required": [],
},
),
]


Expand All @@ -259,6 +262,10 @@ async def handle_call_tool(
results = pinecone_client.list_records(namespace=namespace)
return [types.TextContent(type="text", text=json.dumps(results))]

if name == "pinecone-stats":
stats = pinecone_client.stats()
return [types.TextContent(type="text", text=json.dumps(stats))]

if name == "semantic-search":
query = arguments.get("query")
top_k = arguments.get("top_k", 10)
Expand Down

0 comments on commit db6aa10

Please sign in to comment.