diff --git a/ibis-server/pyproject.toml b/ibis-server/pyproject.toml index 9e78bc6a8..d482cd139 100644 --- a/ibis-server/pyproject.toml +++ b/ibis-server/pyproject.toml @@ -62,6 +62,7 @@ markers = [ [tool.ruff] line-length = 88 target-version = "py311" +exclude = ["tools/"] [tool.ruff.lint] select = [ diff --git a/ibis-server/tools/README.md b/ibis-server/tools/README.md new file mode 100644 index 000000000..6dd73c856 --- /dev/null +++ b/ibis-server/tools/README.md @@ -0,0 +1,10 @@ +# Description + +This folder contains useful tools and scripts for debugging and validation. + +# Tools +- `mdl_validation.py`: Used to validate a Wren MDL. This script attempts to select all columns in all models. + - Requires the `wren_core` library. Run `just install-core` and `just install` before using it. +``` +poetry run python tools/mdl_validation.py mdl.json function_list/bigquery.csv +``` diff --git a/ibis-server/tools/mdl_validation.py b/ibis-server/tools/mdl_validation.py new file mode 100644 index 000000000..d8d4dc5a0 --- /dev/null +++ b/ibis-server/tools/mdl_validation.py @@ -0,0 +1,47 @@ +# +# This script is used to validate the MDL file by transforming the SQL queries +# in the MDL file to the SQL queries. This script only validate if it can be transformed +# by Wren core. It does not validate the SQL can be executed by the data source. +# +# Argements: +# - manifest_json_path: Path to the JSON file +# - function_list_path: Path to the function list CSV file +# + +import argparse +import base64 +import json + +from wren_core import SessionContext + +# Set up argument parsing +parser = argparse.ArgumentParser(description="Validate the MDL file") +parser.add_argument("manifest_json_path", help="Path to the JSON file") +parser.add_argument("function_list_path", help="Path to the function list CSV file") + +args = parser.parse_args() + +# Read and encode the JSON data +with open(args.manifest_json_path) as file: + mdl = json.load(file) + # Convert to JSON string + json_str = json.dumps(mdl) + # Encode to base64 + encoded_str = base64.b64encode(json_str.encode("utf-8")).decode("utf-8") + +session_context = SessionContext(encoded_str, args.function_list_path) +error_cases = [] +for model in mdl["models"]: + for column in model["columns"]: + # ignore hidden columns + if column.get("is_hidden"): + continue + sql = f"select \"{column['name']}\" from \"{model['name']}\"" + try: + planned_sql = session_context.transform_sql(sql) + except Exception: + error_cases.append((model, column)) + print(f"Error transforming {model['name']} {column['name']}") + +if len(error_cases) > 0: + raise Exception(f"Error transforming {len(error_cases)} columns")