cairo-book · enitrat · Dec 2, 2024 · Oct 29, 2024 · Nov 6, 2024
diff --git a/backend.dev.dockerfile b/backend.dev.dockerfile
@@ -1,21 +1,27 @@
-FROM node:slim
-
-ARG SEARXNG_API_URL
+FROM node:20-bullseye-slim
 
 WORKDIR /home/starknet-agent
 
-# Copy package.json and yarn.lock first to leverage Docker cache
+# Install Python and build dependencies
+RUN apt-get update && apt-get install -y \
+    python3 \
+    python3-pip \
+    build-essential \
+    python-is-python3 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy package files
 COPY package.json yarn.lock ./
 
+# Install dependencies
+RUN yarn install
 
-# Copy the rest of the application code
+# Copy source code
 COPY . .
 
-RUN sed -i "s|SEARXNG = \".*\"|SEARXNG = \"${SEARXNG_API_URL}\"|g" /home/starknet-agent/config.toml
+# Build TypeScript
+RUN yarn build
 
-RUN mkdir -p /home/starknet-agent/data
+EXPOSE 3001
 
-# Install dependencies including development ones
-RUN yarn install
-# Use the existing dev command
 CMD ["yarn", "run", "dev"]
diff --git a/docker-compose.dev-hosted.yml b/docker-compose.dev-hosted.yml
@@ -1,5 +1,4 @@
 services:
-
   starknet-agent-backend:
     build:
       context: .
@@ -43,7 +42,7 @@ services:
       - WATCHPACK_POLLING=true
     restart: unless-stopped
 
-  # cairobook-ingest:
+  # docs-ingest:
   #   build:
   #     context: .
   #     dockerfile: ingest.dockerfile

diff --git a/src/agents/ragSearchAgent.ts b/src/agents/ragSearchAgent.ts
@@ -58,31 +58,55 @@ export const handleStream = async (
   stream: IterableReadableStream<StreamEvent>,
   emitter: eventEmitter,
 ): Promise<void> => {
-  for await (const event of stream) {
-    if (
-      event.event === 'on_chain_end' &&
-      event.name === 'FinalSourceRetriever'
-    ) {
-      emitter.emit(
-        'data',
-        JSON.stringify({ type: 'sources', data: event.data.output }),
-      );
-    }
-    if (
-      event.event === 'on_chain_stream' &&
-      event.name === 'FinalResponseGenerator'
-    ) {
-      emitter.emit(
-        'data',
-        JSON.stringify({ type: 'response', data: event.data.chunk }),
-      );
-    }
-    if (
-      event.event === 'on_chain_end' &&
-      event.name === 'FinalResponseGenerator'
-    ) {
-      emitter.emit('end');
+  logger.info('Starting stream handling');
+  try {
+    for await (const event of stream) {
+      logger.debug('Stream event received:', {
+        eventType: event.event,
+        name: event.name,
+      });
+
+      if (
+        event.event === 'on_chain_end' &&
+        event.name === 'FinalSourceRetriever'
+      ) {
+        logger.info('Sources retrieved:', {
+          sourceCount: event.data.output.length,
+        });
+        emitter.emit(
+          'data',
+          JSON.stringify({
+            type: 'sources',
+            data: event.data.output,
+          }),
+        );
+      }
+
+      if (
+        event.event === 'on_chain_stream' &&
+        event.name === 'FinalResponseGenerator'
+      ) {
+        logger.debug('Response chunk received');
+        emitter.emit(
+          'data',
+          JSON.stringify({
+            type: 'response',
+            data: event.data.chunk,
+          }),
+        );
+      }
+
+      if (
+        event.event === 'on_chain_end' &&
+        event.name === 'FinalResponseGenerator'
+      ) {
+        logger.info('Stream completed successfully');
+        emitter.emit('end');
+      }
     }
+  } catch (error) {
+    logger.error('Error in handleStream:', error);
+    throw error;
   }
 };
 
@@ -96,12 +120,16 @@ export const createBasicSearchRetrieverChain = (
     llm,
     strParser,
     RunnableLambda.from(async (input: string) => {
+      logger.debug('Search retriever input:', { input });
       if (input === 'not_needed') {
         return { query: '', docs: [] };
       }
 
       const documents = await vectorStore.similaritySearch(input, 5);
-
+      logger.debug('Vector store search results:', {
+        documentCount: documents.length,
+        firstDoc: documents[0],
+      });
       return { query: input, docs: documents };
     }),
   ]);
@@ -138,29 +166,55 @@ export const rerankDocs =
     query: string;
     docs: Document[];
   }): Promise<Document[]> => {
+    logger.debug('Reranking docs input:', {
+      query,
+      docsLength: docs.length,
+      firstDoc: docs[0],
+    });
+
     if (docs.length === 0 || query === 'Summarize') {
+      logger.info('Skipping reranking - empty docs or summarize query');
       return docs;
     }
 
     const docsWithContent = docs.filter(
       (doc) => doc.pageContent && doc.pageContent.length > 0,
     );
-
-    const [docEmbeddings, queryEmbedding] = await Promise.all([
-      embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
-      embeddings.embedQuery(query),
-    ]);
-
-    const similarity = docEmbeddings.map((docEmbedding, i) => ({
-      index: i,
-      similarity: computeSimilarity(queryEmbedding, docEmbedding),
-    }));
-
-    return similarity
-      .filter((sim) => sim.similarity > 0.5)
-      .sort((a, b) => b.similarity - a.similarity)
-      .slice(0, 15)
-      .map((sim) => docsWithContent[sim.index]);
+    logger.debug('Filtered documents with content:', {
+      originalCount: docs.length,
+      filteredCount: docsWithContent.length,
+    });
+
+    try {
+      const [docEmbeddings, queryEmbedding] = await Promise.all([
+        embeddings.embedDocuments(
+          docsWithContent.map((doc) => doc.pageContent),
+        ),
+        embeddings.embedQuery(query),
+      ]);
+      logger.debug('Embeddings generated successfully');
+
+      const similarity = docEmbeddings.map((docEmbedding, i) => ({
+        index: i,
+        similarity: computeSimilarity(queryEmbedding, docEmbedding),
+      }));
+
+      const rerankedDocs = similarity
+        .filter((sim) => sim.similarity > 0.5)
+        .sort((a, b) => b.similarity - a.similarity)
+        .slice(0, 15)
+        .map((sim) => docsWithContent[sim.index]);
+
+      logger.info('Reranking completed', {
+        inputDocs: docsWithContent.length,
+        filteredDocs: rerankedDocs.length,
+      });
+
+      return rerankedDocs;
+    } catch (error) {
+      logger.error('Error in rerankDocs:', error);
+      throw error;
+    }
   };
 
 export const createBasicSearchAnsweringChain = (
@@ -234,7 +288,13 @@ export const basicRagSearch = (
 ): eventEmitter => {
   const emitter = new eventEmitter();
 
+  logger.info('Starting RAG search', {
+    query,
+    historyLength: history.length,
+  });
+
   try {
+    logger.debug('Initializing search chain');
     const basicSearchAnsweringChain = createBasicSearchAnsweringChain(
       llm,
       embeddings,
@@ -244,6 +304,7 @@ export const basicRagSearch = (
       noSourceFoundPrompt,
     );
 
+    logger.debug('Starting stream');
     const stream = basicSearchAnsweringChain.streamEvents(
       {
         chat_history: history,
@@ -254,13 +315,25 @@ export const basicRagSearch = (
       },
     );
 
-    handleStream(stream, emitter);
+    handleStream(stream, emitter).catch((error) => {
+      logger.error('Stream handling failed:', error);
+      emitter.emit(
+        'error',
+        JSON.stringify({
+          data: 'An error occurred while processing the stream',
+        }),
+      );
+    });
   } catch (err) {
+    logger.error('Error in basicRagSearch:', {
+      error: err,
+      query,
+      historyLength: history.length,
+    });
     emitter.emit(
       'error',
       JSON.stringify({ data: 'An error has occurred please try again later' }),
     );
-    logger.error(`Error in Search: ${err}`);
   }
 
   return emitter;

diff --git a/src/agents/ragSearchAgents/cairoBookSearchAgent.ts b/src/agents/ragSearchAgents/cairoBookSearchAgent.ts
@@ -73,7 +73,7 @@ neutral and educational tone in your responses. Format your responses using Mark
 readability. Use code blocks for Cairo code examples. Provide medium to long responses that are
 comprehensive and informative.
 
-You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+You have to cite the answer using [number] notation. You must cite the sentences with their relevant context number. You must cite each and every part of the answer so the user can know where the information is coming from.
 Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
 However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
 

diff --git a/src/agents/ragSearchAgents/starknetDocsSearchAgent.ts b/src/agents/ragSearchAgents/starknetDocsSearchAgent.ts
@@ -39,7 +39,7 @@ neutral and educational tone in your responses. Format your responses using Mark
 readability. Use code blocks for Cairo code examples. Provide medium to long responses that are
 comprehensive and informative.
 
-You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+You have to cite the answer using [number] notation. You must cite the sentences with their relevant context number. You must cite each and every part of the answer so the user can know where the information is coming from.
 Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
 However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
 

diff --git a/src/agents/ragSearchAgents/starknetEcosystemSearchAgent.ts b/src/agents/ragSearchAgents/starknetEcosystemSearchAgent.ts
@@ -51,7 +51,7 @@ neutral and educational tone in your responses. Format your responses using Mark
 readability. Use code blocks for Cairo code examples. Provide medium to long responses that are
 comprehensive and informative.
 
-You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from.
+You have to cite the answer using [number] notation. You must cite the sentences with their relevant context number. You must cite each and every part of the answer so the user can know where the information is coming from.
 Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
 However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer.
 
@@ -77,7 +77,7 @@ Remember, your knowledge is based solely on the provided Cairo and Starknet docu
 accuracy and relevance in your responses. Today's date is ${new Date().toISOString()}
 `;
 
-const handleStarknetEcosystemSearch= (
+const handleStarknetEcosystemSearch = (
   message: string,
   history: BaseMessage[],
   llm: BaseChatModel,