diff --git a/CHANGELOG.md b/CHANGELOG.md index d6b650e..d3f91b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## [1.4.0] - 2024-05-23 + +### Changed + +- Helper function that generates lineage files to omit empty properties + +### Fixed + +- Conversion tool for simple and advanced source code mappings + ## [1.3.0] - 2024-01-04 ### Added diff --git a/setup.cfg b/setup.cfg index 49a5856..e127894 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = custom-technical-lineage -version = 1.3.0 +version = 1.4.0 author = Kristof Van Coillie author_email = kristof.vancoillie@collibra.com description = Helper scripts for custom technical lineage diff --git a/src/helper.py b/src/helper.py index 4512c05..11a244e 100644 --- a/src/helper.py +++ b/src/helper.py @@ -59,7 +59,7 @@ def generate_json_files( # creating lineage.json with open(custom_lineage_config.output_directory_path / "lineage.json", "w") as out_file: - json.dump(lineages, out_file, default=pydantic_encoder) + json.dump([lineage.model_dump(exclude_none=True) for lineage in lineages], out_file, default=pydantic_encoder) # creating metadata.json with open(custom_lineage_config.output_directory_path / "metadata.json", "w") as out_file: diff --git a/tests/test_data/conversion/lineage.json b/tests/test_data/conversion/lineage.json index a062ace..2f9eac2 100644 --- a/tests/test_data/conversion/lineage.json +++ b/tests/test_data/conversion/lineage.json @@ -105,8 +105,27 @@ "column": "col2" } ], - "mapping": "VIEW1 creation", - "source_code": "SELECT col1, col2 from T1;" - }] + "mapping_ref": { + "mapping": "VIEW1_creation", + "source_code": "transforms.sql", + "codebase_pos": [ + { + "pos_start": 0, + "pos_len": 26 + } + ] + } + } + ], + "codebase_files": { + "transforms.sql": { + "mapping_refs": { + "VIEW1_creation": { + "pos_start": 0, + "pos_len": 26 + } + } + } + } } \ No newline at end of file diff --git a/tests/test_data/conversion/lineage_v3.json b/tests/test_data/conversion/lineage_v3.json index ead3afa..d546884 100644 --- a/tests/test_data/conversion/lineage_v3.json +++ b/tests/test_data/conversion/lineage_v3.json @@ -22,8 +22,7 @@ "leaf": { "name": "col1", "type": "Column" - }, - "props": null + } }, "trg": { "nodes": [ @@ -47,10 +46,12 @@ "leaf": { "name": "col1", "type": "Column" - }, - "props": null + } }, - "source_code": null + "source_code": { + "path": "source_codes/uuid.txt", + "transformation_display_name": "VIEW1 creation" + } }, { "src": { @@ -75,8 +76,7 @@ "leaf": { "name": "col2", "type": "Column" - }, - "props": null + } }, "trg": { "nodes": [ @@ -100,9 +100,17 @@ "leaf": { "name": "col2", "type": "Column" - }, - "props": null + } }, - "source_code": null + "source_code": { + "path": "source_codes/uuid.txt", + "highlights": [ + { + "start": 0, + "len": 26 + } + ], + "transformation_display_name": "VIEW1_creation" + } } ] \ No newline at end of file diff --git a/tests/test_data/conversion/lineage_v3_no_source_code.json b/tests/test_data/conversion/lineage_v3_no_source_code.json new file mode 100644 index 0000000..3e914af --- /dev/null +++ b/tests/test_data/conversion/lineage_v3_no_source_code.json @@ -0,0 +1,102 @@ +[ + { + "src": { + "nodes": [ + { + "name": "snowflake", + "type": "System" + }, + { + "name": "DB1", + "type": "Database" + }, + { + "name": "PUBLIC", + "type": "Schema" + } + ], + "parent": { + "name": "T1", + "type": "Table" + }, + "leaf": { + "name": "col1", + "type": "Column" + } + }, + "trg": { + "nodes": [ + { + "name": "snowflake", + "type": "System" + }, + { + "name": "DB1", + "type": "Database" + }, + { + "name": "PUBLIC", + "type": "Schema" + } + ], + "parent": { + "name": "VIEW1", + "type": "Table" + }, + "leaf": { + "name": "col1", + "type": "Column" + } + } + }, + { + "src": { + "nodes": [ + { + "name": "snowflake", + "type": "System" + }, + { + "name": "DB1", + "type": "Database" + }, + { + "name": "PUBLIC", + "type": "Schema" + } + ], + "parent": { + "name": "T1", + "type": "Table" + }, + "leaf": { + "name": "col2", + "type": "Column" + } + }, + "trg": { + "nodes": [ + { + "name": "snowflake", + "type": "System" + }, + { + "name": "DB1", + "type": "Database" + }, + { + "name": "PUBLIC", + "type": "Schema" + } + ], + "parent": { + "name": "VIEW1", + "type": "Table" + }, + "leaf": { + "name": "col2", + "type": "Column" + } + } + } +] \ No newline at end of file diff --git a/tests/test_data/conversion/transforms.sql b/tests/test_data/conversion/transforms.sql new file mode 100644 index 0000000..8dd9d01 --- /dev/null +++ b/tests/test_data/conversion/transforms.sql @@ -0,0 +1 @@ +SELECT col1, col2 from T1; \ No newline at end of file diff --git a/tests/test_data/csv/lineage_v3.json b/tests/test_data/csv/lineage_v3.json index d43f920..b26b7f8 100644 --- a/tests/test_data/csv/lineage_v3.json +++ b/tests/test_data/csv/lineage_v3.json @@ -50,8 +50,7 @@ "leaf": { "name": "UI_2L", "type": "Column" - }, - "props": null + } }, "source_code": { "path": "source_codes/uuid.txt", @@ -91,8 +90,7 @@ "leaf": { "name": "USERID", "type": "Column" - }, - "props": null + } }, "trg": { "nodes": [ @@ -116,8 +114,7 @@ "leaf": { "name": "UI_2L", "type": "Column" - }, - "props": null + } }, "source_code": { "path": "source_codes/uuid.txt", @@ -181,9 +178,7 @@ "leaf": { "name": "UI_2L", "type": "Column" - }, - "props": null - }, - "source_code": null + } + } } ] \ No newline at end of file diff --git a/tests/test_translate_to_batch_format.py b/tests/test_translate_to_batch_format.py index 6c53045..b2fb887 100644 --- a/tests/test_translate_to_batch_format.py +++ b/tests/test_translate_to_batch_format.py @@ -4,6 +4,37 @@ from tools.translate_to_batch_format import convert +def test_translate_with_simple_and_advanced_source_code() -> None: + convert( + input_directory="./test_data/conversion", + output_directory="./test_data/conversion/v3", + migrate_source_code=True, + ) + + # compare converted with expected + with open("./test_data/conversion/metadata.json") as input_file: + expected_metadata = json.load(input_file) + + with open("./test_data/conversion/v3/metadata.json") as input_file: + generated_metadata = json.load(input_file) + + with open("./test_data/conversion/lineage_v3.json") as input_file: + expected_lineage = json.load(input_file) + + with open("./test_data/conversion/v3/lineage.json") as input_file: + generated_lineage = json.load(input_file) + + for lineage in generated_lineage: + if lineage.get("source_code"): + lineage["source_code"]["path"] = "source_codes/uuid.txt" + + assert expected_metadata == generated_metadata + assert expected_lineage == generated_lineage + + # cleanup + shutil.rmtree("./test_data/conversion/v3", ignore_errors=True) + + def test_translat_without_source_code() -> None: # convert input convert( @@ -19,7 +50,7 @@ def test_translat_without_source_code() -> None: with open("./test_data/conversion/v3/metadata.json") as input_file: generated_metadata = json.load(input_file) - with open("./test_data/conversion/lineage_v3.json") as input_file: + with open("./test_data/conversion/lineage_v3_no_source_code.json") as input_file: expected_lineage = json.load(input_file) with open("./test_data/conversion/v3/lineage.json") as input_file: diff --git a/tools/translate_to_batch_format.py b/tools/translate_to_batch_format.py index 3531c21..f65a16e 100644 --- a/tools/translate_to_batch_format.py +++ b/tools/translate_to_batch_format.py @@ -67,7 +67,7 @@ def _convert_lineage_source( if "mapping_ref" in lineage_relationship_v1: source_code_file_v1 = lineage_relationship_v1.get("mapping_ref", {}).get("source_code", "") mapping_v1 = lineage_relationship_v1.get("mapping_ref", {}).get("mapping", "") - codebase_pos_v1 = lineage_relationship_v1.get("codebase_pos", []) + codebase_pos_v1 = lineage_relationship_v1.get("mapping_ref", {}).get("codebase_pos", []) if not source_code_file_v1: return None @@ -88,7 +88,7 @@ def _convert_lineage_source( custom_lineage_config=custom_lineage_config, transformation_display_name=mapping_v1, highlights=[ - SourceCodeHighLight(start=highlight_v1["post_start"], len=highlight_v1["pos_len"]) + SourceCodeHighLight(start=highlight_v1["pos_start"], len=highlight_v1["pos_len"]) for highlight_v1 in codebase_pos_v1 ], )