Skip to content

Commit 24e88ba

Browse files
authored
Merge pull request #318 from bruin-data/hotfix/revert
Hotfix: Revert column lineage for schema
2 parents 15929bc + 2224eff commit 24e88ba

File tree

2 files changed

+62
-42
lines changed

2 files changed

+62
-42
lines changed

pythonsrc/parser/main.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from dataclasses import dataclass
22

3-
import sqlglot
43
from sqlglot import parse_one, exp, lineage
54
from sqlglot.lineage import Node
65
from sqlglot.optimizer import optimize
@@ -112,8 +111,10 @@ def get_column_lineage(query: str, schema: dict, dialect: str):
112111
if not isinstance(parsed, exp.Query):
113112
return {"columns": []}
114113
try:
115-
optimized = optimize(parsed, schema, dialect=dialect)
116-
except Exception as e:
114+
optimized = optimize(
115+
parsed, schema_dict_to_schema_object(schema), dialect=dialect
116+
)
117+
except Exception:
117118
return {"columns": []}
118119

119120
result = []
@@ -182,19 +183,20 @@ def merge_parts(table: exp.Table) -> str:
182183
part.name for part in table.parts if isinstance(part, exp.Identifier)
183184
)
184185

186+
185187
def schema_dict_to_schema_object(schema_dict: dict) -> dict:
186188
result = {}
187-
189+
188190
for table_path, value in schema_dict.items():
189191
current = result
190-
parts = table_path.split('.')
191-
192+
parts = table_path.split(".")
193+
192194
# Handle all parts except the last one
193195
for part in parts[:-1]:
194196
if part not in current:
195197
current[part] = {}
196198
current = current[part]
197-
199+
198200
# Handle the last part
199201
current[parts[-1]] = value
200202

pythonsrc/parser/main_test.py

+53-35
Original file line numberDiff line numberDiff line change
@@ -1047,62 +1047,62 @@
10471047
},
10481048
{
10491049
"name": "col1",
1050-
"type": "UNKNOWN",
1050+
"type": "TEXT",
10511051
"upstream": [{"column": "col1", "table": "dataset1.table1"}],
10521052
},
10531053
{
10541054
"name": "col1",
1055-
"type": "UNKNOWN",
1055+
"type": "TEXT",
10561056
"upstream": [{"column": "col1", "table": "dataset1.table1"}],
10571057
},
10581058
{
10591059
"name": "col2",
1060-
"type": "UNKNOWN",
1060+
"type": "TEXT",
10611061
"upstream": [{"column": "col2", "table": "dataset1.table1"}],
10621062
},
10631063
{
10641064
"name": "col2",
1065-
"type": "UNKNOWN",
1065+
"type": "TEXT",
10661066
"upstream": [{"column": "col2", "table": "dataset1.table1"}],
10671067
},
10681068
{
10691069
"name": "col3",
1070-
"type": "UNKNOWN",
1070+
"type": "TEXT",
10711071
"upstream": [{"column": "col3", "table": "dataset1.table1"}],
10721072
},
10731073
{
10741074
"name": "col3",
1075-
"type": "UNKNOWN",
1075+
"type": "TEXT",
10761076
"upstream": [{"column": "col3", "table": "dataset1.table1"}],
10771077
},
10781078
{
10791079
"name": "col3",
1080-
"type": "UNKNOWN",
1080+
"type": "TEXT",
10811081
"upstream": [{"column": "col3", "table": "dataset1.table1"}],
10821082
},
10831083
{
10841084
"name": "col4",
1085-
"type": "UNKNOWN",
1085+
"type": "TEXT",
10861086
"upstream": [{"column": "col4", "table": "dataset1.table1"}],
10871087
},
10881088
{
10891089
"name": "col4",
1090-
"type": "UNKNOWN",
1090+
"type": "TEXT",
10911091
"upstream": [{"column": "col4", "table": "dataset1.table1"}],
10921092
},
10931093
{
10941094
"name": "col4",
1095-
"type": "UNKNOWN",
1095+
"type": "TEXT",
10961096
"upstream": [{"column": "col4", "table": "dataset1.table1"}],
10971097
},
10981098
{
10991099
"name": "col5",
1100-
"type": "UNKNOWN",
1100+
"type": "TEXT",
11011101
"upstream": [{"column": "col5", "table": "dataset1.table1"}],
11021102
},
11031103
{
11041104
"name": "col6",
1105-
"type": "UNKNOWN",
1105+
"type": "TEXT",
11061106
"upstream": [{"column": "col6", "table": "dataset1.table1"}],
11071107
},
11081108
{
@@ -1112,27 +1112,27 @@
11121112
},
11131113
{
11141114
"name": "organization",
1115-
"type": "UNKNOWN",
1115+
"type": "TEXT",
11161116
"upstream": [{"column": "col1", "table": "dataset3.table3"}],
11171117
},
11181118
{
11191119
"name": "organizationid",
1120-
"type": "UNKNOWN",
1120+
"type": "TEXT",
11211121
"upstream": [{"column": "col2", "table": "dataset3.table3"}],
11221122
},
11231123
{
11241124
"name": "programname",
1125-
"type": "UNKNOWN",
1125+
"type": "TEXT",
11261126
"upstream": [{"column": "col1", "table": "dataset2.table2"}],
11271127
},
11281128
{
11291129
"name": "teamid",
1130-
"type": "UNKNOWN",
1130+
"type": "TEXT",
11311131
"upstream": [{"column": "col2", "table": "dataset6.table6"}],
11321132
},
11331133
{
11341134
"name": "teamname",
1135-
"type": "UNKNOWN",
1135+
"type": "TEXT",
11361136
"upstream": [{"column": "col1", "table": "dataset6.table6"}],
11371137
},
11381138
],
@@ -1248,62 +1248,62 @@
12481248
"expected": [
12491249
{
12501250
"name": "col1",
1251-
"type": "UNKNOWN",
1251+
"type": "TEXT",
12521252
"upstream": [{"column": "col1", "table": "project1.dataset1.table1"}],
12531253
},
12541254
{
12551255
"name": "col1",
1256-
"type": "UNKNOWN",
1256+
"type": "TEXT",
12571257
"upstream": [{"column": "col1", "table": "project1.dataset1.table1"}],
12581258
},
12591259
{
12601260
"name": "col2",
1261-
"type": "UNKNOWN",
1261+
"type": "TEXT",
12621262
"upstream": [{"column": "col2", "table": "project1.dataset1.table1"}],
12631263
},
12641264
{
12651265
"name": "col2",
1266-
"type": "UNKNOWN",
1266+
"type": "TEXT",
12671267
"upstream": [{"column": "col2", "table": "project1.dataset1.table1"}],
12681268
},
12691269
{
12701270
"name": "col3",
1271-
"type": "UNKNOWN",
1271+
"type": "TEXT",
12721272
"upstream": [{"column": "col3", "table": "project1.dataset1.table1"}],
12731273
},
12741274
{
12751275
"name": "col3",
1276-
"type": "UNKNOWN",
1276+
"type": "TEXT",
12771277
"upstream": [{"column": "col3", "table": "project1.dataset1.table1"}],
12781278
},
12791279
{
12801280
"name": "col3",
1281-
"type": "UNKNOWN",
1281+
"type": "TEXT",
12821282
"upstream": [{"column": "col3", "table": "project1.dataset1.table1"}],
12831283
},
12841284
{
12851285
"name": "col4",
1286-
"type": "UNKNOWN",
1286+
"type": "TEXT",
12871287
"upstream": [{"column": "col4", "table": "project1.dataset1.table1"}],
12881288
},
12891289
{
12901290
"name": "col4",
1291-
"type": "UNKNOWN",
1291+
"type": "TEXT",
12921292
"upstream": [{"column": "col4", "table": "project1.dataset1.table1"}],
12931293
},
12941294
{
12951295
"name": "col4",
1296-
"type": "UNKNOWN",
1296+
"type": "TEXT",
12971297
"upstream": [{"column": "col4", "table": "project1.dataset1.table1"}],
12981298
},
12991299
{
13001300
"name": "col5",
1301-
"type": "UNKNOWN",
1301+
"type": "TEXT",
13021302
"upstream": [{"column": "col5", "table": "project1.dataset1.table1"}],
13031303
},
13041304
{
13051305
"name": "col6",
1306-
"type": "UNKNOWN",
1306+
"type": "TEXT",
13071307
"upstream": [{"column": "col6", "table": "project1.dataset1.table1"}],
13081308
},
13091309
{
@@ -1313,12 +1313,12 @@
13131313
},
13141314
{
13151315
"name": "department",
1316-
"type": "UNKNOWN",
1316+
"type": "TEXT",
13171317
"upstream": [{"column": "col1", "table": "project3.dataset3.table3"}],
13181318
},
13191319
{
13201320
"name": "departmentid",
1321-
"type": "UNKNOWN",
1321+
"type": "TEXT",
13221322
"upstream": [{"column": "col2", "table": "project3.dataset3.table3"}],
13231323
},
13241324
{
@@ -1328,18 +1328,18 @@
13281328
},
13291329
{
13301330
"name": "programname",
1331-
"type": "UNKNOWN",
1331+
"type": "TEXT",
13321332
"upstream": [{"column": "col1", "table": "project2.dataset2.table2"}],
13331333
},
13341334
{"name": "project_credits", "type": "INT", "upstream": []},
13351335
{
13361336
"name": "projectid",
1337-
"type": "UNKNOWN",
1337+
"type": "TEXT",
13381338
"upstream": [{"column": "col2", "table": "project6.dataset6.table6"}],
13391339
},
13401340
{
13411341
"name": "projectname",
1342-
"type": "UNKNOWN",
1342+
"type": "TEXT",
13431343
"upstream": [{"column": "col1", "table": "project6.dataset6.table6"}],
13441344
},
13451345
],
@@ -1381,6 +1381,24 @@
13811381
},
13821382
],
13831383
},
1384+
{
1385+
"name": "simple nested table references",
1386+
"dialect": "bigquery",
1387+
"query": """SELECT mycol FROM raw.table1""",
1388+
"schema": {
1389+
"raw.table1": {
1390+
"mycol": "STRING",
1391+
}
1392+
},
1393+
"expected": [
1394+
{
1395+
"name": "mycol",
1396+
"type": "TEXT",
1397+
"upstream": [{"column": "mycol", "table": "raw.table1"}],
1398+
},
1399+
],
1400+
"expected_non_selected": [],
1401+
},
13841402
]
13851403

13861404

@@ -1416,4 +1434,4 @@ def test_extract_non_select_column(query, schema, expected, dialect):
14161434
parsed = parse_one(query, dialect=dialect)
14171435
optimized = optimize(parsed, schema, dialect=dialect)
14181436
result = extract_non_selected_columns(optimized)
1419-
assert result == expected
1437+
assert result == expected

0 commit comments

Comments
 (0)