From 5bcc50f0f0c094a40fe3aae71710b685f8a6fd1e Mon Sep 17 00:00:00 2001
From: Grigory Statsenko <gstatsenko@nebius.com>
Date: Wed, 18 Oct 2023 14:33:58 +0200
Subject: [PATCH] Updated generator of formula ref example data

---
 .../dl_formula_ref/generator.py               | 34 ++++++-------
 .../dl_formula_ref/scripts/common.py          |  5 ++
 .../dl_formula_ref/scripts/example_data.py    | 50 ++++++++++++++++---
 .../dl_formula_ref/scripts/formula_doc.py     |  5 +-
 .../db/examples/test_preparation.py           | 26 ++++++----
 lib/dl_formula_ref/pyproject.toml             |  4 +-
 6 files changed, 83 insertions(+), 41 deletions(-)
 create mode 100644 lib/dl_formula_ref/dl_formula_ref/scripts/common.py

diff --git a/lib/dl_formula_ref/dl_formula_ref/generator.py b/lib/dl_formula_ref/dl_formula_ref/generator.py
index bbc1fce26..f45a977d6 100644
--- a/lib/dl_formula_ref/dl_formula_ref/generator.py
+++ b/lib/dl_formula_ref/dl_formula_ref/generator.py
@@ -7,12 +7,9 @@
 import os
 from typing import (
     Collection,
-    Dict,
     Iterable,
-    List,
     Optional,
     Sequence,
-    Tuple,
 )
 
 import attr
@@ -105,7 +102,7 @@ class ReferenceDocGenerator:
 
     _gen_config: RefDocGeneratorConfig = attr.ib(init=False)
     _func_ref: FuncReference = attr.ib(init=False)
-    _renderers_by_tmpl: Dict[Tuple[FuncPathTemplate, CatPathTemplate], FuncRenderer] = attr.ib(init=False, factory=dict)
+    _renderers_by_tmpl: dict[tuple[FuncPathTemplate, CatPathTemplate], FuncRenderer] = attr.ib(init=False, factory=dict)
     _jinja_env: jinja2.Environment = attr.ib(init=False)
 
     def __attrs_post_init__(self) -> None:
@@ -115,6 +112,9 @@ def __attrs_post_init__(self) -> None:
         )
         self._jinja_env = get_jinja_env(self._gen_config)
 
+    def func_ref(self) -> FuncReference:
+        return self._func_ref
+
     def _get_renderer(self, doc_config: FuncDocTemplateConfig) -> FuncRenderer:
         path_renderer = PathRenderer(
             func_ref=self._func_ref,
@@ -147,9 +147,9 @@ def _get_single_rendered_func(
 
     def _render_funcs(
         self,
-        raw_funcs: List[RawMultiAudienceFunc],
+        raw_funcs: list[RawMultiAudienceFunc],
         doc_config: FuncDocTemplateConfig,
-    ) -> List[RenderedMultiAudienceFunc]:
+    ) -> list[RenderedMultiAudienceFunc]:
         func_doc_structs = []
         for raw_func in raw_funcs:
             func_key = RefFunctionKey.normalized(name=raw_func.name, category_name=raw_func.category.name)
@@ -172,8 +172,8 @@ def generate_doc_func(self, outdir: str):
                 print(full_path)
 
     def _group_raw_funcs_by_category(
-        self, raw_funcs: List[RawMultiAudienceFunc]
-    ) -> Dict[str, List[RawMultiAudienceFunc]]:
+        self, raw_funcs: list[RawMultiAudienceFunc]
+    ) -> dict[str, list[RawMultiAudienceFunc]]:
         funcs_by_category = defaultdict(list)
         for func in raw_funcs:
             funcs_by_category[func.category.name].append(func)
@@ -231,7 +231,7 @@ def _generate_doc_list(
         context_path: str,
         title: str,
         description: str,
-        rend_funcs: List[RenderedMultiAudienceFunc],
+        rend_funcs: list[RenderedMultiAudienceFunc],
         in_category: bool,
         meta_title: str = "",
         meta_description: str = "",
@@ -415,7 +415,7 @@ def _get_func_base_class(name: str) -> Optional[type]:
 
         return None
 
-    def _get_func_source_info(self, name: str) -> Tuple[str, int]:
+    def _get_func_source_info(self, name: str) -> tuple[str, int]:
         """Get file name and line number for given function"""
         func_cls = self._get_func_base_class(name)
         assert func_cls is not None
@@ -424,23 +424,23 @@ def _get_func_source_info(self, name: str) -> Tuple[str, int]:
         lineno = inspect.getsourcelines(func_cls)[1]
         return filename, lineno
 
-    def _load_db_config(self) -> Dict[DialectCombo, Db]:
-        with open(self._gen_config.db_config_file) as config_file:
+    def generate_example_data(self, db_config_path: str, output_path: str, default_dialect: DialectCombo) -> None:
+        # TODO: Move out of this class
+        with open(db_config_path) as config_file:
             raw_config: dict = json.load(config_file)
-        return {
+
+        db_by_dialect = {
             get_dialect_from_str(d_name): make_db_from_config(
                 make_db_config(dialect=get_dialect_from_str(d_name), url=d_url)
             )
             for d_name, d_url in raw_config.items()
         }
 
-    def generate_example_data(self) -> None:
         raw_funcs = self._func_ref.as_list()
-        db_by_dialect = self._load_db_config()
         preparer = DataPreparer(
-            storage_filename=self._gen_config.example_data_file,
+            storage_filename=output_path,
             db_by_dialect=db_by_dialect,
-            default_example_dialect=self._gen_config.default_example_dialect,
+            default_example_dialect=default_dialect,
         )
         examples: list[ExampleBase]
         for multi_func in raw_funcs:
diff --git a/lib/dl_formula_ref/dl_formula_ref/scripts/common.py b/lib/dl_formula_ref/dl_formula_ref/scripts/common.py
new file mode 100644
index 000000000..a265b4a9f
--- /dev/null
+++ b/lib/dl_formula_ref/dl_formula_ref/scripts/common.py
@@ -0,0 +1,5 @@
+from dl_formula_ref.config import ConfigVersion
+
+
+def conf_version_type(s: str) -> ConfigVersion:
+    return ConfigVersion[s]
diff --git a/lib/dl_formula_ref/dl_formula_ref/scripts/example_data.py b/lib/dl_formula_ref/dl_formula_ref/scripts/example_data.py
index 0d15970a9..17f5bc9f2 100644
--- a/lib/dl_formula_ref/dl_formula_ref/scripts/example_data.py
+++ b/lib/dl_formula_ref/dl_formula_ref/scripts/example_data.py
@@ -3,22 +3,43 @@
 import argparse
 
 from dl_db_testing.loader import load_db_testing_lib
+from dl_formula.core.dialect import (
+    DialectCombo,
+    get_dialect_from_str,
+)
 from dl_formula_ref.generator import (
     ConfigVersion,
     ReferenceDocGenerator,
 )
 from dl_formula_ref.loader import load_formula_ref
+from dl_formula_ref.scripts.common import conf_version_type
+
+
+def dialect_type(s: str) -> DialectCombo:
+    return get_dialect_from_str(s)
 
 
-parser = argparse.ArgumentParser(prog="Example data management tool")
-subparsers = parser.add_subparsers(title="command", dest="command")
+def get_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="Example data management tool")
+    subparsers = parser.add_subparsers(title="command", dest="command")
 
-subparsers.add_parser("generate", help="Generate data for examples")
+    generate_parser = subparsers.add_parser("generate", help="Generate data for examples")
+    generate_parser.add_argument("--output", help="Output file path")
+    generate_parser.add_argument("--db-config", help="Database configuration file")
+    generate_parser.add_argument(
+        "--default-dialect", type=dialect_type, help="Default dialect to use for example data generation"
+    )
+    generate_parser.add_argument(
+        "--config-version", type=conf_version_type, default=ConfigVersion.default.name, help="Configuration version"
+    )
+    return parser
 
 
 class ExampleDataTool:
     @classmethod
-    def generate_example_data(cls):
+    def generate_example_data(
+        cls, config_version: ConfigVersion, output_path: str, db_config_path: str, default_dialect: DialectCombo
+    ):
         """
         Requires a DB URL mapping in local file `dl_formula_ref/db_config.json`
         with the following format:
@@ -30,8 +51,12 @@ def generate_example_data(cls):
         defining all the required database types:
         - CLICKHOUSE_21_8
         """
-        ref_doc_generator = ReferenceDocGenerator(locale="en", config_version=ConfigVersion.default)
-        ref_doc_generator.generate_example_data()
+        ref_doc_generator = ReferenceDocGenerator(locale="en", config_version=config_version)
+        ref_doc_generator.generate_example_data(
+            output_path=output_path,
+            db_config_path=db_config_path,
+            default_dialect=default_dialect,
+        )
 
         print("Generated data successfully")
 
@@ -39,13 +64,22 @@ def generate_example_data(cls):
     def run(cls, args):
         tool = cls()
 
-        if args.command == "generate":
-            tool.generate_example_data()
+        match args.command:
+            case "generate":
+                tool.generate_example_data(
+                    config_version=args.config_version,
+                    output_path=args.output,
+                    db_config_path=args.db_config,
+                    default_dialect=args.default_dialect,
+                )
+            case _:
+                raise RuntimeError(f"Invalid command {args.command}")
 
 
 def main():
     load_formula_ref()
     load_db_testing_lib()
+    parser = get_parser()
     ExampleDataTool.run(parser.parse_args())
 
 
diff --git a/lib/dl_formula_ref/dl_formula_ref/scripts/formula_doc.py b/lib/dl_formula_ref/dl_formula_ref/scripts/formula_doc.py
index a5e39c370..9c8d7f4d3 100644
--- a/lib/dl_formula_ref/dl_formula_ref/scripts/formula_doc.py
+++ b/lib/dl_formula_ref/dl_formula_ref/scripts/formula_doc.py
@@ -9,10 +9,7 @@
 )
 from dl_formula_ref.loader import load_formula_ref
 from dl_formula_ref.localization import DEFAULT_LOCALE
-
-
-def conf_version_type(s: str) -> ConfigVersion:
-    return ConfigVersion[s]
+from dl_formula_ref.scripts.common import conf_version_type
 
 
 parser = argparse.ArgumentParser(prog="Formula documentation command line tool")
diff --git a/lib/dl_formula_ref/dl_formula_ref_tests/db/examples/test_preparation.py b/lib/dl_formula_ref/dl_formula_ref_tests/db/examples/test_preparation.py
index d552579d3..48ffcf8fe 100644
--- a/lib/dl_formula_ref/dl_formula_ref_tests/db/examples/test_preparation.py
+++ b/lib/dl_formula_ref/dl_formula_ref_tests/db/examples/test_preparation.py
@@ -13,7 +13,7 @@
 
 
 @pytest.fixture(scope="function")
-def example_db_conf_patch(monkeypatch, all_db_configurations, dbe):
+def example_db_config_file(all_db_configurations, dbe):
     db_conf_data = {
         "CLICKHOUSE_22_10": all_db_configurations[ClickHouseDialect.CLICKHOUSE_22_10],
     }
@@ -22,23 +22,29 @@ def example_db_conf_patch(monkeypatch, all_db_configurations, dbe):
         with open(db_conf_f.name, "w") as db_conf_f_w:
             json.dump(db_conf_data, db_conf_f_w)
 
-        monkeypatch.setattr(DOC_GEN_CONFIG_DEFAULT, "db_config_file", db_conf_f.name)
-        monkeypatch.setattr(DOC_GEN_CONFIG_DEFAULT, "default_example_dialect", ClickHouseDialect.CLICKHOUSE_22_10)
-        yield
+        yield db_conf_f.name
 
 
 @pytest.fixture(scope="function")
-def example_data_file_patch(monkeypatch):
+def config_example_dialect_patch(monkeypatch):
+    monkeypatch.setattr(DOC_GEN_CONFIG_DEFAULT, "default_example_dialect", ClickHouseDialect.CLICKHOUSE_22_10)
+
+
+@pytest.fixture(scope="function")
+def example_data_file(monkeypatch):
     with tempfile.NamedTemporaryFile() as ex_data_f:
-        monkeypatch.setattr(DOC_GEN_CONFIG_DEFAULT, "example_data_file", ex_data_f.name)
-        yield
+        yield ex_data_f.name
 
 
-def test_prepare_example_data(example_db_conf_patch, example_data_file_patch):
+def test_prepare_example_data(example_db_config_file, example_data_file, config_example_dialect_patch):
     gen = ReferenceDocGenerator(config_version=ConfigVersion.default, locale="en")
-    gen.generate_example_data()
+    gen.generate_example_data(
+        output_path=example_data_file,
+        db_config_path=example_db_config_file,
+        db_dialect=ClickHouseDialect.CLICKHOUSE_22_10,
+    )
 
-    with open(DOC_GEN_CONFIG_DEFAULT.example_data_file, "rb") as ex_data_f:
+    with open(example_data_file, "rb") as ex_data_f:
         data: list = json.load(ex_data_f)
         keys = {item[0] for item in data}
         assert "ABS.Example" in keys
diff --git a/lib/dl_formula_ref/pyproject.toml b/lib/dl_formula_ref/pyproject.toml
index f88dbf5d0..62fe37f7d 100644
--- a/lib/dl_formula_ref/pyproject.toml
+++ b/lib/dl_formula_ref/pyproject.toml
@@ -45,8 +45,8 @@ root_dir = "dl_formula_ref_tests/"
 target_path = "db"
 
 [tool.poetry.scripts]
-bi-formula-ref-doc = "dl_formula_ref.scripts.formula_doc:main"
-bi-formula-ref-exdata = "dl_formula_ref.scripts.example_data:main"
+dl-formula-ref-doc = "dl_formula_ref.scripts.formula_doc:main"
+dl-formula-ref-exdata = "dl_formula_ref.scripts.example_data:main"
 
 [tool.poetry.plugins]
 [tool.poetry.plugins."dl_formula_ref.plugins"]