Skip to content

Commit 97f1812

Browse files
authored
Bug 1988207 - Look for schema in output_dir in glean_app_ping_views (#8096)
* Bug 1988207 - Look for schema in output_dir in glean_app_ping_views * add default sql_dir
1 parent 92d2bc8 commit 97f1812

File tree

3 files changed

+86
-16
lines changed

3 files changed

+86
-16
lines changed

bigquery_etl/schema/__init__.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -363,16 +363,26 @@ def recurse_fields(
363363

364364
if node_name in source_schema_nodes: # field exists in app schema
365365
# field matches, can query as-is
366-
if node == source_schema_nodes[node_name] and (
367-
# don't need to unnest scalar
368-
dtype != "RECORD"
369-
or not unnest_structs
370-
# reached max record depth to unnest
371-
or len(node_path) > max_unnest_depth > 0
372-
# field not in unnest allowlist
373-
or (
374-
unnest_allowlist is not None
375-
and node_path[0] not in unnest_allowlist
366+
if (
367+
dtype == source_schema_nodes[node["name"]]["type"]
368+
and (
369+
# fields must match for records, otherwise only type needs to match
370+
# note that this will be false if field descriptions don't match
371+
dtype != "RECORD"
372+
or node["fields"]
373+
== source_schema_nodes[node_name]["fields"]
374+
)
375+
and (
376+
# don't need to unnest scalar
377+
dtype != "RECORD"
378+
or not unnest_structs
379+
# reached max record depth to unnest
380+
or len(node_path) > max_unnest_depth > 0
381+
# field not in unnest allowlist
382+
or (
383+
unnest_allowlist is not None
384+
and node_path[0] not in unnest_allowlist
385+
)
376386
)
377387
):
378388
if (

sql_generators/glean_usage/glean_app_ping_views.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,14 @@ def _process_ping(ping_name):
116116
if channel_dataset_view in ignored_pings:
117117
continue
118118

119-
sql_dir = Path(ConfigLoader.get("default", "sql_dir", fallback="sql"))
119+
# look for schema in output_dir because bqetl generate all runs stable_views first
120+
sql_dir = (
121+
output_dir
122+
or Path(ConfigLoader.get("default", "sql_dir", fallback="sql"))
123+
/ project_id
124+
)
120125
existing_schema_path = (
121-
sql_dir
122-
/ "moz-fx-data-shared-prod"
123-
/ channel_dataset
124-
/ view_name
125-
/ SCHEMA_FILE
126+
sql_dir / channel_dataset / view_name / SCHEMA_FILE
126127
)
127128

128129
schema = None

tests/schema/test_schema.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,3 +689,62 @@ def test_generate_select_expression_unnest_allowlist():
689689
unnest_structs=True, unnest_allowlist=["record2"]
690690
)
691691
assert reformat(unnest_expr) == reformat(expected_expr)
692+
693+
694+
def test_generate_compatible_select_expression_description():
695+
source_schema = {
696+
"fields": [
697+
{"name": "scalar", "type": "INTEGER", "description": "abc"},
698+
{
699+
"name": "record",
700+
"type": "RECORD",
701+
"fields": [
702+
{"name": "nested", "type": "DATE"},
703+
],
704+
},
705+
{
706+
"name": "record2",
707+
"type": "RECORD",
708+
"fields": [
709+
{"name": "nested", "type": "DATE"},
710+
],
711+
"description": "abc",
712+
},
713+
]
714+
}
715+
target_schema = {
716+
"fields": [
717+
{"name": "scalar", "type": "INTEGER"},
718+
{
719+
"name": "record",
720+
"type": "RECORD",
721+
"fields": [
722+
{"name": "nested", "type": "DATE", "description": "abc"},
723+
],
724+
},
725+
{
726+
"name": "record2",
727+
"type": "RECORD",
728+
"fields": [
729+
{"name": "nested", "type": "DATE"},
730+
],
731+
},
732+
]
733+
}
734+
735+
# RECORDs will be unnested if nested descriptions don't match
736+
expected_expr = """
737+
scalar,
738+
STRUCT(
739+
record.nested
740+
) AS `record`,
741+
record2
742+
"""
743+
744+
source = Schema.from_json(source_schema)
745+
target = Schema.from_json(target_schema)
746+
747+
select_expr = source.generate_compatible_select_expression(
748+
target, unnest_structs=False
749+
)
750+
assert reformat(select_expr) == reformat(expected_expr)

0 commit comments

Comments
 (0)