From 585c35bcec240c5d7aaf0eb53a122ee38a0c01bc Mon Sep 17 00:00:00 2001 From: ChihYu Yeh Date: Thu, 4 Jul 2024 13:46:28 +0800 Subject: [PATCH] orjson dump and formatting for debug messages --- .../src/pipelines/ask/followup_generation.py | 11 ++++++-- .../src/pipelines/ask/generation.py | 15 +++++++--- .../src/pipelines/ask/historical_question.py | 12 +++++--- .../src/pipelines/ask/sql_correction.py | 15 +++++++--- .../src/pipelines/ask_details/generation.py | 6 ++-- .../src/pipelines/indexing/indexing.py | 28 ++++++++++++++----- .../pipelines/sql_explanation/generation.py | 14 +++++++--- .../pipelines/sql_regeneration/generation.py | 12 ++++++-- 8 files changed, 82 insertions(+), 31 deletions(-) diff --git a/wren-ai-service/src/pipelines/ask/followup_generation.py b/wren-ai-service/src/pipelines/ask/followup_generation.py index 9ddebc828d..a41f3b20e1 100644 --- a/wren-ai-service/src/pipelines/ask/followup_generation.py +++ b/wren-ai-service/src/pipelines/ask/followup_generation.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any, List +import orjson from hamilton import base from hamilton.experimental.h_async import AsyncDriver from haystack import Document @@ -138,7 +139,9 @@ def prompt( prompt_builder: PromptBuilder, ) -> dict: logger.debug(f"query: {query}") - logger.debug(f"documents: {documents}") + logger.debug( + f"documents: {orjson.dumps(documents, option=orjson.OPT_INDENT_2).decode()}" + ) logger.debug(f"history: {history}") return prompt_builder.run( query=query, documents=documents, history=history, alert=alert @@ -147,13 +150,15 @@ def prompt( @async_timer async def generate(prompt: dict, generator: Any) -> dict: - logger.debug(f"prompt: {prompt}") + logger.debug(f"prompt: {orjson.dumps(prompt, option=orjson.OPT_INDENT_2).decode()}") return await generator.run(prompt=prompt.get("prompt")) @async_timer async def post_process(generate: dict, post_processor: GenerationPostProcessor) -> dict: - logger.debug(f"generate: {generate}") + logger.debug( + f"generate: {orjson.dumps(generate, option=orjson.OPT_INDENT_2).decode()}" + ) return await post_processor.run(generate.get("replies")) diff --git a/wren-ai-service/src/pipelines/ask/generation.py b/wren-ai-service/src/pipelines/ask/generation.py index e64b2d7926..15ea28dabc 100644 --- a/wren-ai-service/src/pipelines/ask/generation.py +++ b/wren-ai-service/src/pipelines/ask/generation.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any, Dict, List +import orjson from hamilton import base from hamilton.experimental.h_async import AsyncDriver from haystack import Document @@ -98,8 +99,12 @@ def prompt( prompt_builder: PromptBuilder, ) -> dict: logger.debug(f"query: {query}") - logger.debug(f"documents: {documents}") - logger.debug(f"exclude: {exclude}") + logger.debug( + f"documents: {orjson.dumps(documents, option=orjson.OPT_INDENT_2).decode()}" + ) + logger.debug( + f"exclude: {orjson.dumps(exclude, option=orjson.OPT_INDENT_2).decode()}" + ) return prompt_builder.run( query=query, documents=documents, exclude=exclude, alert=alert ) @@ -107,13 +112,15 @@ def prompt( @async_timer async def generate(prompt: dict, generator: Any) -> dict: - logger.debug(f"prompt: {prompt}") + logger.debug(f"prompt: {orjson.dumps(prompt, option=orjson.OPT_INDENT_2).decode()}") return await generator.run(prompt=prompt.get("prompt")) @async_timer async def post_process(generate: dict, post_processor: GenerationPostProcessor) -> dict: - logger.debug(f"generate: {generate}") + logger.debug( + f"generate: {orjson.dumps(generate, option=orjson.OPT_INDENT_2).decode()}" + ) return await post_processor.run(generate.get("replies")) diff --git a/wren-ai-service/src/pipelines/ask/historical_question.py b/wren-ai-service/src/pipelines/ask/historical_question.py index 95aa280d4f..31f8885e48 100644 --- a/wren-ai-service/src/pipelines/ask/historical_question.py +++ b/wren-ai-service/src/pipelines/ask/historical_question.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional +import orjson from hamilton import base from hamilton.experimental.h_async import AsyncDriver from haystack import Document, component @@ -64,13 +65,14 @@ async def embedding(query: str, embedder: Any) -> dict: @async_timer async def retrieval(embedding: dict, retriever: Any) -> dict: res = await retriever.run(query_embedding=embedding.get("embedding")) - documents = res.get("documents") - return dict(documents=documents) + return dict(documents=res.get("documents")) @timer def filtered_documents(retrieval: dict, score_filter: ScoreFilter) -> dict: - logger.debug(f"retrieval: {retrieval}") + logger.debug( + f"retrieval: {orjson.dumps(retrieval, option=orjson.OPT_INDENT_2).decode()}" + ) return score_filter.run(documents=retrieval.get("documents")) @@ -78,7 +80,9 @@ def filtered_documents(retrieval: dict, score_filter: ScoreFilter) -> dict: def formatted_output( filtered_documents: dict, output_formatter: OutputFormatter ) -> dict: - logger.debug(f"filtered_documents: {filtered_documents}") + logger.debug( + f"filtered_documents: {orjson.dumps(filtered_documents, option=orjson.OPT_INDENT_2).decode()}" + ) return output_formatter.run(documents=filtered_documents.get("documents")) diff --git a/wren-ai-service/src/pipelines/ask/sql_correction.py b/wren-ai-service/src/pipelines/ask/sql_correction.py index 160414d1a8..cafb8a2584 100644 --- a/wren-ai-service/src/pipelines/ask/sql_correction.py +++ b/wren-ai-service/src/pipelines/ask/sql_correction.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any, Dict, List +import orjson from hamilton import base from hamilton.experimental.h_async import AsyncDriver from haystack import Document @@ -64,8 +65,12 @@ def prompt( alert: str, prompt_builder: PromptBuilder, ) -> dict: - logger.debug(f"documents: {documents}") - logger.debug(f"invalid_generation_results: {invalid_generation_results}") + logger.debug( + f"documents: {orjson.dumps(documents, option=orjson.OPT_INDENT_2).decode()}" + ) + logger.debug( + f"invalid_generation_results: {orjson.dumps(invalid_generation_results, option=orjson.OPT_INDENT_2).decode()}" + ) return prompt_builder.run( documents=documents, invalid_generation_results=invalid_generation_results, @@ -75,13 +80,15 @@ def prompt( @async_timer async def generate(prompt: dict, generator: Any) -> dict: - logger.debug(f"prompt: {prompt}") + logger.debug(f"prompt: {orjson.dumps(prompt, option=orjson.OPT_INDENT_2).decode()}") return await generator.run(prompt=prompt.get("prompt")) @async_timer async def post_process(generate: dict, post_processor: GenerationPostProcessor) -> dict: - logger.debug(f"generate: {generate}") + logger.debug( + f"generate: {orjson.dumps(generate, options=orjson.OPT_INDENT_2).decode()}" + ) return await post_processor.run(generate.get("replies")) diff --git a/wren-ai-service/src/pipelines/ask_details/generation.py b/wren-ai-service/src/pipelines/ask_details/generation.py index 0b7675f175..6b9f073e8b 100644 --- a/wren-ai-service/src/pipelines/ask_details/generation.py +++ b/wren-ai-service/src/pipelines/ask_details/generation.py @@ -128,13 +128,15 @@ def prompt(sql: str, prompt_builder: PromptBuilder) -> dict: @async_timer async def generate(prompt: dict, generator: Any) -> dict: - logger.debug(f"prompt: {prompt}") + logger.debug(f"prompt: {orjson.dumps(prompt, option=orjson.OPT_INDENT_2).decode()}") return await generator.run(prompt=prompt.get("prompt")) @async_timer async def post_process(generate: dict, post_processor: GenerationPostProcessor) -> dict: - logger.debug(f"generate: {generate}") + logger.debug( + f"generate: {orjson.dumps(generate, option=orjson.OPT_INDENT_2).decode()}" + ) return await post_processor.run(generate.get("replies")) diff --git a/wren-ai-service/src/pipelines/indexing/indexing.py b/wren-ai-service/src/pipelines/indexing/indexing.py index 9f8e509003..ed17808b94 100644 --- a/wren-ai-service/src/pipelines/indexing/indexing.py +++ b/wren-ai-service/src/pipelines/indexing/indexing.py @@ -323,7 +323,9 @@ def clean_document_store(mdl_str: str, cleaner: DocumentCleaner) -> Dict[str, An def validate_mdl( clean_document_store: Dict[str, Any], validator: MDLValidator ) -> Dict[str, Any]: - logger.debug(f"input in validate_mdl: {clean_document_store}") + logger.debug( + f"input in validate_mdl: {orjson.dumps(clean_document_store, option=orjson.OPT_INDENT_2).decode()}" + ) mdl = clean_document_store.get("mdl") res = validator.run(mdl=mdl) return dict(mdl=res["mdl"]) @@ -331,7 +333,9 @@ def validate_mdl( @timer def convert_to_ddl(mdl: Dict[str, Any], ddl_converter: DDLConverter) -> Dict[str, Any]: - logger.debug(f"input in convert_to_ddl: {mdl}") + logger.debug( + f"input in convert_to_ddl: {orjson.dumps(mdl, option=orjson.OPT_INDENT_2).decode()}" + ) return ddl_converter.run(mdl=mdl) @@ -339,13 +343,17 @@ def convert_to_ddl(mdl: Dict[str, Any], ddl_converter: DDLConverter) -> Dict[str async def embed_ddl( convert_to_ddl: Dict[str, Any], ddl_embedder: Any ) -> Dict[str, Any]: - logger.debug(f"input in embed_ddl: {convert_to_ddl}") + logger.debug( + f"input in embed_ddl: {orjson.dumps(convert_to_ddl, option=orjson.OPT_INDENT_2).decode()}" + ) return await ddl_embedder.run(documents=convert_to_ddl["documents"]) @timer def write_ddl(embed_ddl: Dict[str, Any], ddl_writer: DocumentWriter) -> None: - logger.debug(f"input in write_ddl: {embed_ddl}") + logger.debug( + f"input in write_ddl: {orjson.dumps(embed_ddl, option=orjson.OPT_INDENT_2).decode()}" + ) return ddl_writer.run(documents=embed_ddl["documents"]) @@ -353,7 +361,9 @@ def write_ddl(embed_ddl: Dict[str, Any], ddl_writer: DocumentWriter) -> None: def convert_to_view( mdl: Dict[str, Any], view_converter: ViewConverter ) -> Dict[str, Any]: - logger.debug(f"input in convert_to_view: {mdl}") + logger.debug( + f"input in convert_to_view: {orjson.dumps(mdl, option=orjson.OPT_INDENT_2).decode()}" + ) return view_converter.run(mdl=mdl) @@ -361,13 +371,17 @@ def convert_to_view( async def embed_view( convert_to_view: Dict[str, Any], view_embedder: Any ) -> Dict[str, Any]: - logger.debug(f"input in embed_view: {convert_to_view}") + logger.debug( + f"input in embed_view: {orjson.dumps(convert_to_view, option=orjson.OPT_INDENT_2).decode()}" + ) return await view_embedder.run(documents=convert_to_view["documents"]) @timer def write_view(embed_view: Dict[str, Any], view_writer: DocumentWriter) -> None: - logger.debug(f"input in write_view: {embed_view}") + logger.debug( + f"input in write_view: {orjson.dumps(embed_view, option=orjson.OPT_INDENT_2).decode()}" + ) return view_writer.run(documents=embed_view["documents"]) diff --git a/wren-ai-service/src/pipelines/sql_explanation/generation.py b/wren-ai-service/src/pipelines/sql_explanation/generation.py index 008699c55c..14aec018cc 100644 --- a/wren-ai-service/src/pipelines/sql_explanation/generation.py +++ b/wren-ai-service/src/pipelines/sql_explanation/generation.py @@ -228,7 +228,9 @@ def run(self, replies: List[str]) -> Dict[str, Any]: def preprocess( sql_analysis_results: List[dict], pre_processor: SQLAnalysisPreprocessor ) -> List[dict]: - logger.debug(f"sql_analysis_results: {sql_analysis_results}") + logger.debug( + f"sql_analysis_results: {orjson.dumps(sql_analysis_results, option=orjson.OPT_INDENT_2).decode()}" + ) return pre_processor.run(sql_analysis_results) @@ -243,7 +245,9 @@ def prompt( ) -> dict: logger.debug(f"question: {question}") logger.debug(f"sql: {sql}") - logger.debug(f"preprocess: {preprocess}") + logger.debug( + f"preprocess: {orjson.dumps(preprocess, option=orjson.OPT_INDENT_2).decode()}" + ) logger.debug(f"sql_summary: {sql_summary}") logger.debug(f"full_sql: {full_sql}") return prompt_builder.run( @@ -257,13 +261,15 @@ def prompt( @async_timer async def generate(prompt: dict, generator: Any) -> dict: - logger.debug(f"prompt: {prompt}") + logger.debug(f"prompt: {orjson.dumps(prompt, option=orjson.OPT_INDENT_2).decode()}") return await generator.run(prompt=prompt.get("prompt")) @timer def post_process(generate: dict, post_processor: GenerationPostProcessor) -> dict: - logger.debug(f"generate: {generate}") + logger.debug( + f"generate: {orjson.dumps(generate, option=orjson.OPT_INDENT_2).decode()}" + ) return post_processor.run(generate.get("replies")) diff --git a/wren-ai-service/src/pipelines/sql_regeneration/generation.py b/wren-ai-service/src/pipelines/sql_regeneration/generation.py index eed254eaba..cfce0aaaeb 100644 --- a/wren-ai-service/src/pipelines/sql_regeneration/generation.py +++ b/wren-ai-service/src/pipelines/sql_regeneration/generation.py @@ -105,7 +105,9 @@ def sql_regeneration_prompt( preprocess: Dict[str, Any], sql_regeneration_prompt_builder: PromptBuilder, ) -> dict: - logger.debug(f"preprocess: {preprocess}") + logger.debug( + f"preprocess: {orjson.dumps(preprocess, option=orjson.OPT_INDENT_2).decode()}" + ) return sql_regeneration_prompt_builder.run(results=preprocess["results"]) @@ -114,7 +116,9 @@ async def sql_regeneration_generate( sql_regeneration_prompt: dict, sql_regeneration_generator: Any, ) -> dict: - logger.debug(f"sql_regeneration_prompt: {sql_regeneration_prompt}") + logger.debug( + f"sql_regeneration_prompt: {orjson.dumps(sql_regeneration_prompt, option=orjson.OPT_INDENT_2).decode()}" + ) return await sql_regeneration_generator.run( prompt=sql_regeneration_prompt.get("prompt") ) @@ -125,7 +129,9 @@ def sql_regeneration_post_process( sql_regeneration_generate: dict, sql_regeneration_post_processor: SQLRegenerationPostProcessor, ) -> dict: - logger.debug(f"sql_regeneration_generate: {sql_regeneration_generate}") + logger.debug( + f"sql_regeneration_generate: {orjson.dumps(sql_regeneration_generate, option=orjson.OPT_INDENT_2).decode()}" + ) return sql_regeneration_post_processor.run( replies=sql_regeneration_generate.get("replies"), )