ui.txt

File Name: __init__.py
========================================

========================================

File Name: chainlit\chainlit.py
========================================

========================================

File Name: streamlit\config.py
========================================
# API endpoint configurations
API_BASE_URL = "http://localhost:8000"

DB_ENDPOINTS = {
    "create": f"{API_BASE_URL}/chromadb/create",
    "delete": f"{API_BASE_URL}/chromadb/delete",
    "create_collection": f"{API_BASE_URL}/chromadb/collections",  # /{collection_name}
    "delete_collection": f"{API_BASE_URL}/chromadb/collections",  # /{collection_name}
    "list_collections": f"{API_BASE_URL}/chromadb/collections",
    "list_embeddings": f"{API_BASE_URL}/chromadb/embeddings",  # New endpoint
}

# Index operations endpoints
INDEX_ENDPOINTS = {
    "add_documents": f"{API_BASE_URL}/chroma",  # /{collection_name}/add_documents
    "search": f"{API_BASE_URL}/chroma",  # /{collection_name}/search
    "delete_document": f"{API_BASE_URL}/chroma",  # /{collection_name}/documents/{document_id}
    "update_document": f"{API_BASE_URL}/chroma",  # /{collection_name}/documents/{document_id}
    "count": f"{API_BASE_URL}/chroma",  # /{collection_name}/count
    "process_pdfs": f"{API_BASE_URL}/chroma",  # /{collection_name}/process_pdfs
    "process_folder": f"{API_BASE_URL}/chroma",  # /{collection_name}/process_folder
}

# Google Drive endpoints
GDRIVE_ENDPOINTS = {
    "authorize": f"{API_BASE_URL}/gdrive/authorize",
    "oauth2callback": f"{API_BASE_URL}/gdrive/oauth2callback",
    "download_files": f"{API_BASE_URL}/gdrive/download_files",  # /{folder_id}
}

# Styling configurations
STYLES = {
    "success_color": "#0FBA81",
    "error_color": "#FF4B4B",
    "warning_color": "#FFA726",
    "info_color": "#2196F3",
}
========================================

File Name: streamlit\Home.py
========================================
import streamlit as st

st.set_page_config(
    page_title="AIIP RAG Agents",
    page_icon="🤖",
    layout="wide"
)

st.title("Welcome to AIIP RAG Agents! 🤖")

st.markdown("""
### Navigate using the sidebar to:
1. 🗄️ **Database Operations**: Manage ChromaDB databases and collections
2. 📑 **Index Operations**: Process and index documents
3. 💬 **Agent Chat**: Interact with RAG agents

Choose an option from the sidebar to get started!
""")


========================================

File Name: streamlit\__init__.py
========================================

========================================

File Name: streamlit\components\status.py
========================================
import streamlit as st
from typing import Optional

def show_status_message(
    message: str,
    type: str = "info",
    duration: Optional[int] = None,
    key: Optional[str] = None
):
    """
    Display a status message using Streamlit.
    
    Args:
        message: Message to display
        type: Type of message (success, error, warning, info)
        duration: Duration to show message in seconds
        key: Unique key for the message
    """
    if type == "success":
        st.success(message, icon="✅")
    elif type == "error":
        st.error(message, icon="🚨")
    elif type == "warning":
        st.warning(message, icon="⚠️")
    else:
        st.info(message, icon="ℹ️")
        
    if duration:
        st.empty().success(message)
        time.sleep(duration)
        st.empty()

def show_operation_status(operation_name: str, success: bool = True):
    """
    Show operation status with appropriate styling.
    
    Args:
        operation_name: Name of the operation
        success: Whether operation was successful
    """
    if success:
        st.success(f"{operation_name} completed successfully!", icon="✅")
    else:
        st.error(f"{operation_name} failed. Please try again.", icon="🚨")
========================================

File Name: streamlit\components\__init__.py
========================================

========================================

File Name: streamlit\pages\01_Database_Operations.py
========================================
import streamlit as st
import sys
from pathlib import Path
import time

sys.path.append(str(Path(__file__).parent.parent))

from utils.api import ChromaDBClient
from components.status import show_status_message, show_operation_status
from config import DB_ENDPOINTS

def init_page():
    st.set_page_config(
        page_title="Database Operations",
        page_icon="🗄️",
        layout="wide"
    )
    st.title("ChromaDB Operations 🗄️")
    
    # Initialize API client
    return ChromaDBClient(DB_ENDPOINTS)

def render_database_section(client: ChromaDBClient):
    """Render database initialization section"""
    st.header("Database Management")
    
    # Get available embedding models
    try:
        embeddings_response = client.list_embeddings()
        available_embeddings = embeddings_response.get("embeddings", {})
    except Exception as e:
        show_status_message(f"Error fetching embedding models: {str(e)}", type="error")
        available_embeddings = {}

    # Database configuration form
    with st.expander("Configure Database", expanded=False):
        embedding_name = st.selectbox(
            "Embedding Model",
            options=list(available_embeddings.keys()),
            help="Select the embedding model to use"
        )
        
        space_type = st.selectbox(
            "Vector Space",
            options=["cosine", "l2", "ip"],
            help="Select the vector space type for similarity calculations"
        )
        
        if st.button("Apply Configuration", type="primary"):
            try:
                config = {
                    "database_type": "ChromaDB",
                    "collection_name": "default_collection",
                    "embedding": available_embeddings[embedding_name],
                    "parameters": {
                        "collection_metadata": {"hnsw:space": space_type}
                    }
                }
                response = client.create_database(config)
                show_operation_status("Database configuration")
            except Exception as e:
                show_status_message(f"Error configuring database: {str(e)}", type="error")

def render_collections_section(client: ChromaDBClient):
    """Render collections management section"""
    st.header("Collections Management")
    
    # Create collection
    with st.expander("Create New Collection", expanded=False):
        col_name = st.text_input("Collection Name")
        if st.button("Create Collection", disabled=not col_name):
            try:
                response = client.create_collection(col_name)
                show_operation_status("Collection creation")
                time.sleep(1)
                st.rerun()
            except Exception as e:
                show_status_message(f"Error creating collection: {str(e)}", type="error")
    
    # List and manage collections
    st.subheader("Existing Collections")
    try:
        response = client.list_collections()
        collections = response.get("collections", [])
        
        if not collections:
            st.info("No collections found. Create one above! 👆")
        else:
            for col in collections:
                col1, col2 = st.columns([3, 1])
                with col1:
                    st.write(f"📁 {col}")
                with col2:
                    if st.button("Delete", key=f"del_{col}", type="secondary"):
                        try:
                            if st.session_state.get(f"confirm_delete_{col}"):
                                response = client.delete_collection(col)
                                show_operation_status("Collection deletion")
                                st.session_state[f"confirm_delete_{col}"] = False
                                time.sleep(1)
                                st.rerun()
                            else:
                                st.session_state[f"confirm_delete_{col}"] = True
                                show_status_message(
                                    f"⚠️ Click again to confirm deletion of collection '{col}'",
                                    type="warning"
                                )
                        except Exception as e:
                            show_status_message(
                                f"Error deleting collection: {str(e)}",
                                type="error"
                            )
                            st.session_state[f"confirm_delete_{col}"] = False
    except Exception as e:
        show_status_message(f"Error listing collections: {str(e)}", type="error")

def main():
    client = init_page()
    
    # Render main sections
    render_database_section(client)
    st.divider()
    render_collections_section(client)

if __name__ == "__main__":
    main()
========================================

File Name: streamlit\pages\02_Chroma_Index_Operations.py
========================================
from typing import Any, Dict, List
import streamlit as st
import sys
from pathlib import Path
import time
import os
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Add the parent directory to sys.path
sys.path.append(str(Path(__file__).parent.parent))

from utils.api import ChromaIndexClient, GDriveClient
from utils.file_utils import FileManager
from components.status import show_status_message, show_operation_status
from config import INDEX_ENDPOINTS, GDRIVE_ENDPOINTS

def init_page():
    st.set_page_config(
        page_title="Index Operations",
        page_icon="📑",
        layout="wide"
    )
    st.title("Document Processing & Index Operations 📑")
    
    # Check for auth callback
    if st.query_params.get("auth_success"):
        st.session_state.drive_authorized = True
        st.query_params.clear()
    
    # Initialize API clients
    index_client = ChromaIndexClient(INDEX_ENDPOINTS)
    gdrive_client = GDriveClient(GDRIVE_ENDPOINTS)
    
    return index_client, gdrive_client

def check_drive_auth():
    """Check if already authorized with Google Drive"""
    return os.path.exists('token.pickle')

def render_gdrive_section(gdrive_client: GDriveClient):
    """Render Google Drive integration section"""
    st.header("Google Drive Integration")
    
    # Initialize session state for drive authorization
    if "drive_authorized" not in st.session_state:
        st.session_state.drive_authorized = check_drive_auth()
    
    # Step 1: Authorization
    if not st.session_state.drive_authorized:
        st.write("🔐 Connect to Google Drive to access your files")
        if st.button("Connect to Google Drive", type="primary"):
            try:
                auth_url = gdrive_client.get_auth_url()
                st.markdown(
                    """
                    1. Click the link below to authorize access to Google Drive
                    2. After authorization, you'll be redirected back automatically
                    """
                )
                st.markdown(f"[Click here to authorize]({auth_url})")
            except Exception as e:
                show_status_message(f"Error connecting to Google Drive: {str(e)}", type="error")
        return
    
    # Step 2: Folder Selection (only shown after authorization)
    st.success("✅ Connected to Google Drive")
    
    with st.form("folder_form"):
        folder_id = st.text_input(
            "Folder ID",
            help="Enter the Google Drive folder ID containing your PDFs"
        )
        submit_button = st.form_submit_button("Download Files", type="primary")
    
    # Process download outside the form to maintain success message
    if submit_button and folder_id:
        try:
            with st.spinner("Downloading files from Drive..."):
                response = gdrive_client.download_files(folder_id)
                if response and isinstance(response, dict) and response.get("files"):
                    downloaded_files = response["files"]
                    st.session_state.downloaded_files = downloaded_files
                    file_count = len(downloaded_files)
                    logger.info(f"Downloaded files: {downloaded_files}")
                    
                    # Create success message with file names using markdown
                    success_msg = [
                        f"✨ Successfully downloaded {file_count} document{'s' if file_count > 1 else ''}:",
                        "",  # Empty line for spacing
                        *[f"* {file}" for file in downloaded_files]
                    ]
                    st.success("\n".join(success_msg))
                else:
                    st.info("No files found in the specified folder")
        except Exception as e:
            show_status_message(f"Error downloading files: {str(e)}", type="error")

def render_local_upload():
    """Render local file upload section"""
    st.header("Local File Upload")
    
    # Initialize session state
    if "upload_status" not in st.session_state:
        st.session_state.upload_status = {"completed": False, "files": set()}
    
    uploaded_files = st.file_uploader(
        "Upload PDF files",
        type=["pdf"],
        accept_multiple_files=True,
        key="file_uploader"
    )
    
    if uploaded_files:
        # Get filenames of newly uploaded files
        current_files = {f.name for f in uploaded_files}
        new_files = current_files - st.session_state.upload_status["files"]
        
        if new_files:  # Only process new files
            with st.spinner("Copying files to raw data directory..."):
                uploaded_count = 0
                for file in uploaded_files:
                    if file.name in new_files:
                        try:
                            FileManager.save_uploaded_file(file)
                            uploaded_count += 1
                            st.session_state.upload_status["files"].add(file.name)
                        except Exception as e:
                            show_status_message(f"Error saving file {file.name}: {str(e)}", type="error")
                            continue
                
                if uploaded_count > 0:
                    show_status_message(f"Successfully uploaded {uploaded_count} files", type="success")
    
    # Clear status when no files are selected
    elif st.session_state.upload_status["files"]:
        st.session_state.upload_status = {"completed": False, "files": set()}
        
def render_available_documents():
    """Render available documents section"""
    st.subheader("Available Documents")  # Changed from header to subheader
    
    # Get files from raw data directory
    available_files = FileManager.get_raw_data_files()
    
    if not available_files:
        st.info("No documents available. Upload files or download from Google Drive.")
        return []
    
    st.write(f"Found {len(available_files)} documents in raw data directory:")
    
    # Create checkboxes for file selection with two columns
    col1, col2 = st.columns(2)
    
    selected_files = []
    for idx, file in enumerate(sorted(available_files)):
        # Alternate between columns
        with col1 if idx % 2 == 0 else col2:
            if st.checkbox(f"📄 {file}", key=f"file_{file}"):
                file_path = FileManager.get_file_path(file)
                if FileManager.is_valid_pdf(file_path):
                    selected_files.append(file_path)
                else:
                    st.warning(f"Invalid or missing file: {file}")
    
    return selected_files

def render_document_processing(client: ChromaIndexClient):
    """Render document processing section with chunking options"""
    st.header("Document Processing")
    
    # Collection selection for processing
    collection_name = st.text_input("Target Collection Name")
    if not collection_name:
        st.warning("Please enter a collection name to process documents")
        return
        
    # Show current collection documents
    st.subheader(f"Current Documents in {collection_name}")
    render_collection_documents(client, collection_name)
    
    # Process new documents
    st.subheader("Process New Documents")
    
    # Chunking parameters
    with st.expander("Chunking Configuration", expanded=False):
        chunk_size = st.slider(
            "Chunk Size",
            min_value=100,
            max_value=20000,
            value=10000,
            step=1000,
            help="Size of document chunks. Larger values mean longer but fewer chunks"
        )
        
        chunk_overlap = st.slider(
            "Chunk Overlap",
            min_value=0,
            max_value=2000,
            value=200,
            step=100,
            help="Number of characters to overlap between chunks. Helps maintain context"
        )
        
        st.info("""
        Chunk size recommendations:
        - 10000: Good for general purpose use
        - 4000: Better for precise retrievals
        - 1000 and less: Best for very specific queries
        
        Overlap recommendations:
        - 200: Standard overlap
        - 500: More context preservation
        - 1000: Maximum context preservation
        """)
    
    selected_files = render_available_documents()
    
    if selected_files:
        num_selected = len(selected_files)
        container = st.container()
        with container:
            st.write(f"Selected {num_selected} document{'s' if num_selected > 1 else ''} for processing")
            col1, col2 = st.columns([1, 4])
            with col1:
                if st.button("Process Files", type="primary"):
                    try:
                        with st.spinner(f"Processing {num_selected} files..."):
                            logger.info(f"Starting to process {num_selected} files for collection {collection_name}")
                            response = client.process_pdfs(
                                collection_name,
                                selected_files,
                                chunk_size=chunk_size,
                                chunk_overlap=chunk_overlap
                            )
                            if response.get("message"):
                                show_operation_status(response["message"])
                                st.success(f"Chunking config used: Size={chunk_size}, Overlap={chunk_overlap}")
                            time.sleep(1)
                            st.rerun()
                    except Exception as e:
                        logger.error(f"Error processing files: {str(e)}", exc_info=True)
                        show_status_message(f"Error processing files: {str(e)}", type="error")
    else:
        st.info("Select documents to process from the list above")

def render_collection_documents(client: ChromaIndexClient, collection_name: str):
    """Render documents in collection with search configuration"""
    try:
        count = client.count_documents(collection_name)
        total_docs = count.get("count", 0)
        st.metric("Total Chunks in Collection", total_docs)
        
        if total_docs > 0:
            view_tab, search_tab = st.tabs(["📚 Collection Overview", "🔍 Search Documents"])
            
            with view_tab:
                try:
                    with st.spinner("Loading documents..."):
                        # Overview request
                        overview_config = {
                            "search_type": "similarity",
                            "k": 100,
                            "search_parameters": {}
                        }
                        results = client.search_documents(
                            collection_name=collection_name,
                            query="",  # Empty query for overview
                            retriever_config=overview_config
                        )
                        if results.get("results"):
                            render_document_results(results["results"], context="overview")
                        else:
                            st.info("No documents to display")
                except Exception as e:
                    st.error(f"Error loading document overview: {str(e)}")
                    logger.error(f"Document overview error: {str(e)}", exc_info=True)
            
            with search_tab:
                # Search configuration
                with st.expander("Search Configuration", expanded=False):
                    search_type = st.selectbox(
                        "Search Type",
                        options=["similarity", "mmr", "similarity_score_threshold"],
                        help="""
                        - similarity: Standard similarity search
                        - mmr: Maximal Marginal Relevance for diverse results
                        - similarity_score_threshold: Filter by minimum similarity
                        """
                    )
                    
                    k = st.slider(
                        "Number of results", 
                        min_value=1, 
                        max_value=20, 
                        value=4,
                        help="Number of documents to return"
                    )
                    
                    # Additional parameters based on search type
                    search_parameters = {}
                    if search_type == "mmr":
                        search_parameters["fetch_k"] = st.slider(
                            "Fetch K (MMR)",
                            min_value=k,
                            max_value=50,
                            value=20,
                            help="Number of documents to fetch before reranking"
                        )
                        search_parameters["lambda_mult"] = st.slider(
                            "Lambda (Diversity)",
                            min_value=0.0,
                            max_value=1.0,
                            value=0.5,
                            help="0 = maximum diversity, 1 = maximum relevance"
                        )
                    elif search_type == "similarity_score_threshold":
                        search_parameters["score_threshold"] = st.slider(
                            "Score Threshold",
                            min_value=0.0,
                            max_value=1.0,
                            value=0.8,
                            help="Minimum similarity score (0-1) for results"
                        )
                
                # Search interface
                query = st.text_input(
                    "Search Query",
                    key="search_input",
                    help="Enter your search query"
                )
                
                if query:
                    try:
                        with st.spinner("Searching..."):
                            # Clean and prepare the query
                            query_str = str(query).strip()
                            
                            # Construct retriever config
                            retriever_config = {
                                "search_type": search_type,
                                "k": k,
                                "search_parameters": search_parameters
                            }
                            
                            # Debug info
                            logger.debug(f"Search params - Query: '{query_str}', Config: {retriever_config}")
                            
                            # Make the search request
                            results = client.search_documents(
                                collection_name=collection_name,
                                query=query_str,
                                retriever_config=retriever_config
                            )
                            
                            if results.get("results"):
                                # Show search configuration used
                                st.success(f"""
                                    Search performed with:
                                    - Query: "{query_str}"
                                    - Type: {search_type}
                                    - K: {k}
                                    {f'- Additional parameters: {search_parameters}' if search_parameters else ''}
                                """)
                                render_document_results(results["results"], context="search")
                            else:
                                st.info(f"No results found for query: '{query_str}'")
                    except Exception as e:
                        logger.error(f"Search error: {str(e)}", exc_info=True)
                        st.error(f"Error performing search: {str(e)}")
                        # Additional debug info
                        logger.debug(f"Failed request details - Query: '{query}', Type: {type(query)}")
            
        else:
            st.info("Collection is empty. Process some documents to see them here.")
            
    except Exception as e:
        logger.error(f"Error fetching collection documents: {str(e)}", exc_info=True)
        show_status_message(f"Error fetching collection documents: {str(e)}", type="error")

def render_document_results(results: List[Dict[str, Any]], context: str = "overview"):
    """Helper function to render document results
    
    Args:
        results: List of document results to render
        context: String indicating the context ("overview" or "search") to create unique keys
    """
    # Group chunks by source file
    docs_by_file = {}
    for doc in results:
        source_file = doc.get("metadata", {}).get("source_file", "Unknown Source")
        if source_file not in docs_by_file:
            docs_by_file[source_file] = []
        docs_by_file[source_file].append(doc)
    
    # Display documents grouped by file
    for file_name, chunks in docs_by_file.items():
        with st.expander(f"📄 {file_name} ({len(chunks)} chunks)"):
            st.write(f"**Source File:** {file_name}")
            for idx, chunk in enumerate(chunks, 1):
                st.markdown("---")
                st.write(f"**Chunk {idx}** (Page: {chunk.get('metadata', {}).get('page_number', 'Unknown')})")
                
                # Create two columns: one for preview, one for "Show full content" button
                col1, col2 = st.columns([4, 1])
                with col1:
                    st.text(chunk.get('page_content', '')[:200] + "...")
                with col2:
                    # Use context in key to make it unique
                    button_key = f"{context}_show_content_{file_name}_{idx}"
                    toggle_key = f"{context}_toggle_{file_name}_{idx}"
                    
                    if toggle_key not in st.session_state:
                        st.session_state[toggle_key] = False
                        
                    if st.button("Show Full", key=button_key):
                        st.session_state[toggle_key] = not st.session_state[toggle_key]
                
                # Show full content if button was clicked
                if st.session_state[toggle_key]:
                    st.text(chunk.get('page_content', ''))

def main():
    index_client, gdrive_client = init_page()
    
    # Main sections
    render_gdrive_section(gdrive_client)
    st.divider()
    render_local_upload()
    st.divider()
    render_document_processing(index_client)

if __name__ == "__main__":
    main()
========================================

File Name: streamlit\pages\03_Agent_Chat.py
========================================

========================================

File Name: streamlit\pages\__init__.py
========================================

========================================

File Name: streamlit\utils\api.py
========================================
import requests
from typing import Optional, Dict, Any, List
import logging
from pathlib import Path
import os
from contextlib import ExitStack

logger = logging.getLogger(__name__)

class APIClient:
    @staticmethod
    def make_request(
        method: str,
        url: str,
        json: Optional[Dict[str, Any]] = None,
        files: Optional[Dict[str, Any]] = None,
        raise_for_status: bool = True
    ) -> Dict[str, Any]:
        """Make a request to the API endpoint."""
        try:
            # Debug print
            logger.info(f"Making {method} request to {url}")
            if json:
                logger.info(f"Request body: {json}")

            response = requests.request(
                method=method,
                url=url,
                json=json,
                files=files
            )
            
            # Debug print response
            logger.info(f"Response status: {response.status_code}")
            logger.info(f"Response content: {response.text}")
            
            if raise_for_status:
                response.raise_for_status()
                
            return response.json()
            
        except requests.exceptions.RequestException as e:
            logger.error(f"API request failed: {str(e)}")
            if hasattr(e, 'response'):
                logger.error(f"Response status: {e.response.status_code}")
                logger.error(f"Response content: {e.response.text}")
            raise

class ChromaDBClient:
    """Client for ChromaDB API operations"""
    
    def __init__(self, endpoints: Dict[str, str]):
        self.endpoints = endpoints
        
    def create_database(self, config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """Initialize or reconfigure ChromaDB database"""
        return APIClient.make_request("POST", self.endpoints["create"], json=config)
    
    def create_collection(self, collection_name: str) -> Dict[str, str]:
        """Create a new collection"""
        url = f"{self.endpoints['create_collection']}/{collection_name}"
        return APIClient.make_request("POST", url)
    
    def delete_collection(self, collection_name: str) -> Dict[str, str]:
        """Delete a collection"""
        url = f"{self.endpoints['delete_collection']}/{collection_name}"
        return APIClient.make_request("DELETE", url)
    
    def list_collections(self) -> Dict[str, list]:
        """List all collections"""
        return APIClient.make_request("GET", self.endpoints["list_collections"])
        
    def list_embeddings(self) -> Dict[str, Dict[str, Any]]:
        """Get available embedding models"""
        return APIClient.make_request("GET", self.endpoints["list_embeddings"])

class ChromaIndexClient:
    """Client for Chroma indexing operations"""
    
    def __init__(self, endpoints: Dict[str, str]):
        self.endpoints = endpoints
        
    def process_pdfs(
        self, 
        collection_name: str, 
        file_paths: List[str],
        chunk_size: int = 10000,
        chunk_overlap: int = 200
    ) -> Dict[str, Any]:
        """Process PDF files and add to collection"""
        url = f"{self.endpoints['process_pdfs']}/{collection_name}/process_pdfs"
        
        # Use ExitStack to manage multiple file handles
        with ExitStack() as stack:
            files = []
            for file_path in file_paths:
                try:
                    abs_path = os.path.abspath(file_path)
                    if os.path.exists(abs_path) and abs_path.lower().endswith('.pdf'):
                        f = stack.enter_context(open(abs_path, 'rb'))
                        filename = os.path.basename(abs_path)
                        files.append(('files', (filename, f, 'application/pdf')))
                    else:
                        logger.warning(f"Skipping invalid file: {file_path}")
                except Exception as e:
                    logger.error(f"Error processing file {file_path}: {str(e)}")
                    raise
            
            if not files:
                raise ValueError("No valid PDF files to process")
            
            # Add chunking parameters to request
            params = {
                "chunk_size": chunk_size,
                "chunk_overlap": chunk_overlap
            }
            
            return APIClient.make_request("POST", url, files=files, params=params)
    
    def search_documents(
        self, 
        collection_name: str, 
        query: str,
        retriever_config: Optional[Dict[str, Any]] = None
    ) -> Dict[str, List]:
        """Search documents in collection with optional retriever config"""
        url = f"{self.endpoints['search']}/{collection_name}/search"
        
        # Default configuration if none provided
        if retriever_config is None:
            retriever_config = {
                "collection_name": collection_name,  # Added this field
                "search_type": "similarity",
                "k": 100 if not query else 4,
                "search_parameters": {}
            }
        else:
            # Ensure collection_name is in the config
            retriever_config["collection_name"] = collection_name

        try:
            # Prepare request body
            request_body = {
                "query": str(query).strip(),
                "retriever_config": retriever_config
            }
            
            # Debug print
            logger.info(f"Making request to {url}")
            logger.info(f"Request body: {request_body}")

            # Send request
            response = APIClient.make_request(
                method="POST",
                url=url,
                json=request_body
            )
            
            return response
            
        except Exception as e:
            logger.error(f"Error in search documents: {str(e)}")
            if hasattr(e, 'response'):
                logger.error(f"Response content: {e.response.text}")
            raise
        
    def process_folder(
        self, 
        collection_name: str, 
        folder_path: str,
        chunk_size: int = 10000,
        chunk_overlap: int = 200
    ) -> Dict[str, Any]:
        """Process folder of PDFs with chunking parameters"""
        url = f"{self.endpoints['process_folder']}/{collection_name}/process_folder"
        params = {
            "chunk_size": chunk_size,
            "chunk_overlap": chunk_overlap
        }
        return APIClient.make_request(
            "POST", 
            url, 
            json={"folder_path": folder_path},
            params=params
        )
    
    def count_documents(self, collection_name: str) -> Dict[str, int]:
        """Get document count in collection"""
        url = f"{self.endpoints['count']}/{collection_name}/count"
        return APIClient.make_request("GET", url)
    
    def add_documents(self, collection_name: str, documents: List[Dict[str, Any]]) -> Dict[str, str]:
        """Add documents to collection"""
        url = f"{self.endpoints['add_documents']}/{collection_name}/add_documents"
        return APIClient.make_request("POST", url, json=documents)
    
    def delete_document(self, collection_name: str, document_id: str) -> Dict[str, str]:
        """Delete document from collection"""
        url = f"{self.endpoints['delete_document']}/{collection_name}/documents/{document_id}"
        return APIClient.make_request("DELETE", url)
    
    def update_document(self, collection_name: str, document_id: str, document: Dict[str, Any]) -> Dict[str, str]:
        """Update document in collection"""
        url = f"{self.endpoints['update_document']}/{collection_name}/documents/{document_id}"
        return APIClient.make_request("PUT", url, json=document)

class GDriveClient:
    """Client for Google Drive operations"""
    
    def __init__(self, endpoints: Dict[str, str]):
        self.endpoints = endpoints
        
    def get_auth_url(self) -> str:
        """Get Google Drive authorization URL"""
        return self.endpoints["authorize"]
    
    def download_files(self, folder_id: str) -> Dict[str, Any]:
        """Download files from Google Drive folder"""
        url = f"{self.endpoints['download_files']}/{folder_id}"
        return APIClient.make_request("GET", url)
========================================

File Name: streamlit\utils\file_utils.py
========================================
import os
import shutil
from pathlib import Path
from typing import List, Set
import logging

logger = logging.getLogger(__name__)

class FileManager:
    RAW_DATA_DIR = "data/raw_data"
    
    @classmethod
    def ensure_raw_data_dir(cls):
        """Ensure the raw data directory exists"""
        os.makedirs(cls.RAW_DATA_DIR, exist_ok=True)
    
    @classmethod
    def get_raw_data_files(cls) -> Set[str]:
        """Get list of files in raw data directory"""
        cls.ensure_raw_data_dir()
        return {f for f in os.listdir(cls.RAW_DATA_DIR) if f.lower().endswith('.pdf')}
    
    @classmethod
    def save_uploaded_file(cls, file) -> str:
        """Save an uploaded file to raw data directory"""
        try:
            cls.ensure_raw_data_dir()
            file_path = os.path.join(cls.RAW_DATA_DIR, file.name)
            
            # If file exists, add a number to the filename
            base_name, extension = os.path.splitext(file.name)
            counter = 1
            while os.path.exists(file_path):
                new_name = f"{base_name}_{counter}{extension}"
                file_path = os.path.join(cls.RAW_DATA_DIR, new_name)
                counter += 1
            
            # Save the file
            with open(file_path, "wb") as f:
                f.write(file.getvalue())
            
            logger.info(f"Successfully saved file to {file_path}")
            return file_path
            
        except Exception as e:
            logger.error(f"Error saving file {file.name}: {str(e)}")
            raise
    
    @classmethod
    def get_file_path(cls, filename: str) -> str:
        """Get full path for a file in raw data directory"""
        return os.path.join(cls.RAW_DATA_DIR, filename)
    
    @classmethod
    def is_valid_pdf(cls, file_path: str) -> bool:
        """Check if a file is a valid PDF"""
        return os.path.exists(file_path) and file_path.lower().endswith('.pdf')
========================================

File Name: streamlit\utils\__init__.py
========================================

========================================