Skip to content
This repository was archived by the owner on Sep 2, 2025. It is now read-only.

Commit 33167c1

Browse files
committed
Add an option to use INFORMATION_SCHEMA for partition info retrieval
1 parent b06d230 commit 33167c1

File tree

6 files changed

+82
-8
lines changed

6 files changed

+82
-8
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
kind: Features
2+
body: Add an option to use INFORMATION_SCHEMA for partition info retrieval
3+
time: 2023-08-07T23:55:39.31409+02:00
4+
custom:
5+
Author: Kayrnt
6+
Issue: "867"

dbt/adapters/bigquery/connections.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -598,15 +598,17 @@ def dry_run(self, sql: str) -> BigQueryAdapterResponse:
598598
def _bq_job_link(location, project_id, job_id) -> str:
599599
return f"https://console.cloud.google.com/bigquery?project={project_id}&j=bq:{location}:{job_id}&page=queryresults"
600600

601-
def get_partitions_metadata(self, table):
601+
def get_partitions_metadata(self, table, use_legacy_sql=False):
602602
def standard_to_legacy(table):
603603
return table.project + ":" + table.dataset + "." + table.identifier
604604

605-
legacy_sql = "SELECT * FROM [" + standard_to_legacy(table) + "$__PARTITIONS_SUMMARY__]"
605+
if use_legacy_sql:
606+
sql = "SELECT * FROM [" + standard_to_legacy(table) + "$__PARTITIONS_SUMMARY__]"
607+
else:
608+
sql = f"SELECT * FROM `{table.project}.{table.dataset}.INFORMATION_SCHEMA.PARTITIONS` WHERE TABLE_NAME = '{table.identifier}'"
606609

607-
sql = self._add_query_comment(legacy_sql)
608610
# auto_begin is ignored on bigquery, and only included for consistency
609-
_, iterator = self.raw_execute(sql, use_legacy_sql=True)
611+
_, iterator = self.raw_execute(self._add_query_comment(sql), use_legacy_sql=use_legacy_sql)
610612
return self.get_table_from_response(iterator)
611613

612614
def copy_bq_table(self, source, destination, write_disposition):

dbt/adapters/bigquery/impl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class PartitionConfig(dbtClassMixin):
7878
range: Optional[Dict[str, Any]] = None
7979
time_ingestion_partitioning: bool = False
8080
copy_partitions: bool = False
81+
partition_information: str = "model"
8182

8283
PARTITION_DATE = "_PARTITIONDATE"
8384
PARTITION_TIME = "_PARTITIONTIME"

dbt/include/bigquery/macros/etc.sql

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,17 @@
66
{% do adapter.grant_access_to(entity, entity_type, role, grant_target_dict) %}
77
{% endmacro %}
88

9-
{%- macro get_partitions_metadata(table) -%}
9+
{#
10+
This macro returns the partition matadata for provided table.
11+
The expected input is a table object (ie through a `source` or `ref`).
12+
The output contains the result from partitions informations for your input table.
13+
The details of the retrieved columns can be found on https://cloud.google.com/bigquery/docs/managing-partitioned-tables
14+
if use_legacy_sql is set to True, the query will be executed using legacy sql and access the data from __PARTITIONS_SUMMARY__ meta-table
15+
else it will leverage the INFORMATION_SCHEMA.PARTITIONS table.
16+
#}
17+
{%- macro get_partitions_metadata(table, use_legacy_sql = True) -%}
1018
{%- if execute -%}
11-
{%- set res = adapter.get_partitions_metadata(table) -%}
19+
{%- set res = adapter.get_partitions_metadata(table, use_legacy_sql) -%}
1220
{{- return(res) -}}
1321
{%- endif -%}
1422
{{- return(None) -}}

dbt/include/bigquery/macros/materializations/incremental_strategy/common.sql

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,57 @@
22

33
{#-- TODO: revisit partitioning with python models --#}
44
{%- if '_dbt_max_partition' in compiled_code and language == 'sql' -%}
5+
{%- if partition_by.partition_information == "information_schema" -%}
6+
{{ dbt_max_partition_from_information_schema_data_sql(relation, partition_by) }}
7+
{%- else -%}
8+
{{ dbt_max_partition_from_table_data_sql(relation, partition_by) }}
9+
{%- endif -%}
510

6-
declare _dbt_max_partition {{ partition_by.data_type_for_partition() }} default (
11+
{%- endif -%}
12+
13+
{% endmacro %}
14+
15+
{% macro dbt_max_partition_from_table_data_sql(relation, partition_by) %}
16+
17+
declare _dbt_max_partition {{ partition_by.data_type_for_partition() }} default (
718
select max({{ partition_by.field }}) from {{ this }}
819
where {{ partition_by.field }} is not null
9-
);
20+
);
21+
22+
{% endmacro %}
1023

24+
{% macro max_partition_wrapper(field) %}
25+
{{ "MAX(" ~ field ~ ") AS max_partition" }}
26+
{% endmacro %}
27+
28+
{% macro dbt_max_partition_from_information_schema_data_sql(relation, partition_by) %}
29+
declare _dbt_max_partition {{ partition_by.data_type_for_partition() }} default (
30+
{{ partition_from_information_schema_data_sql(relation, partition_by, max_partition_wrapper) }}
31+
);
32+
{% endmacro %}
33+
34+
{% macro partition_from_information_schema_data_sql(relation, partition_by, field_function) %}
35+
36+
{%- if data_type is none -%}
37+
{%- set data_type = partition_by.data_type -%}
38+
{%- set granularity = partition_by.granularity -%}
1139
{%- endif -%}
1240

41+
{# Format partition_id to match the declared variable type #}
42+
{%- if data_type | lower in ('date', 'timestamp', 'datetime') -%}
43+
{%- if granularity == "day" -%}
44+
{%- set format = "%Y%m%d" -%}
45+
{%- else -%}
46+
{%- set format = "%Y%m%d%H" -%}
47+
{%- endif -%}
48+
{%- set field = "parse_" ~data_type ~ "('" ~ format ~ "', partition_id)" -%}
49+
{%- else -%}
50+
{%- set field = "CAST(partition_id AS INT64)" -%}
51+
{%- endif -%}
52+
53+
SELECT {{ field_function(field) }}
54+
FROM `{{relation.project}}.{{relation.dataset}}.INFORMATION_SCHEMA.PARTITIONS`
55+
WHERE TABLE_NAME = '{{relation.identifier}}'
56+
AND NOT(STARTS_WITH(partition_id, "__"))
57+
1358
{% endmacro %}

tests/unit/test_bigquery_adapter.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ def test_parse_partition_by(self):
726726
"granularity": "day",
727727
"time_ingestion_partitioning": False,
728728
"copy_partitions": False,
729+
"partition_information": "model",
729730
},
730731
)
731732

@@ -742,6 +743,7 @@ def test_parse_partition_by(self):
742743
"granularity": "day",
743744
"time_ingestion_partitioning": False,
744745
"copy_partitions": False,
746+
"partition_information": "model",
745747
},
746748
)
747749

@@ -755,6 +757,7 @@ def test_parse_partition_by(self):
755757
"granularity": "month",
756758
"time_ingestion_partitioning": False,
757759
"copy_partitions": False,
760+
"partition_information": "model",
758761
},
759762
)
760763

@@ -768,6 +771,7 @@ def test_parse_partition_by(self):
768771
"granularity": "year",
769772
"time_ingestion_partitioning": False,
770773
"copy_partitions": False,
774+
"partition_information": "model",
771775
},
772776
)
773777

@@ -781,6 +785,7 @@ def test_parse_partition_by(self):
781785
"granularity": "hour",
782786
"time_ingestion_partitioning": False,
783787
"copy_partitions": False,
788+
"partition_information": "model",
784789
},
785790
)
786791

@@ -794,6 +799,7 @@ def test_parse_partition_by(self):
794799
"granularity": "month",
795800
"time_ingestion_partitioning": False,
796801
"copy_partitions": False,
802+
"partition_information": "model",
797803
},
798804
)
799805

@@ -807,6 +813,7 @@ def test_parse_partition_by(self):
807813
"granularity": "year",
808814
"time_ingestion_partitioning": False,
809815
"copy_partitions": False,
816+
"partition_information": "model",
810817
},
811818
)
812819

@@ -820,6 +827,7 @@ def test_parse_partition_by(self):
820827
"granularity": "hour",
821828
"time_ingestion_partitioning": False,
822829
"copy_partitions": False,
830+
"partition_information": "model",
823831
},
824832
)
825833

@@ -833,6 +841,7 @@ def test_parse_partition_by(self):
833841
"granularity": "month",
834842
"time_ingestion_partitioning": False,
835843
"copy_partitions": False,
844+
"partition_information": "model",
836845
},
837846
)
838847

@@ -846,6 +855,7 @@ def test_parse_partition_by(self):
846855
"granularity": "year",
847856
"time_ingestion_partitioning": False,
848857
"copy_partitions": False,
858+
"partition_information": "model",
849859
},
850860
)
851861

@@ -859,6 +869,7 @@ def test_parse_partition_by(self):
859869
"granularity": "day",
860870
"time_ingestion_partitioning": True,
861871
"copy_partitions": True,
872+
"partition_information": "model",
862873
},
863874
)
864875

@@ -882,6 +893,7 @@ def test_parse_partition_by(self):
882893
"range": {"start": 1, "end": 100, "interval": 20},
883894
"time_ingestion_partitioning": False,
884895
"copy_partitions": False,
896+
"partition_information": "model",
885897
},
886898
)
887899

0 commit comments

Comments
 (0)