|
28 | 28 | SaveToFileExecutionRequest, |
29 | 29 | ShowExecutionRequest, |
30 | 30 | StartExecutionRequest, |
31 | | - TableIdentifier, |
| 31 | +) |
| 32 | +from fenic_cloud.protos.engine.v1.engine_pb2 import ( |
| 33 | + TableIdentifier as TableIdentifierProto, |
32 | 34 | ) |
33 | 35 | from fenic_cloud.protos.engine.v1.engine_pb2_grpc import EngineServiceStub |
34 | 36 |
|
35 | 37 | from fenic._backends.cloud.metrics import get_query_execution_metrics |
36 | 38 | from fenic._backends.schema_serde import deserialize_schema, serialize_schema |
| 39 | +from fenic._backends.utils.catalog_utils import TableIdentifier |
37 | 40 | from fenic.core._interfaces import BaseExecution |
| 41 | +from fenic.core._logical_plan.plans.sink import TableSink |
38 | 42 | from fenic.core._logical_plan.serde import LogicalPlanSerde |
39 | 43 | from fenic.core.error import ( |
40 | 44 | CloudExecutionError, |
41 | 45 | CloudSessionError, |
42 | 46 | ExecutionError, |
43 | 47 | InternalError, |
| 48 | + PlanError, |
44 | 49 | ValidationError, |
45 | 50 | ) |
46 | 51 | from fenic.core.metrics import LMMetrics, PhysicalPlanRepr, QueryMetrics, RMMetrics |
@@ -164,16 +169,67 @@ def save_as_table( |
164 | 169 | ) -> QueryMetrics: |
165 | 170 | """Execute the logical plan and save the result as a table.""" |
166 | 171 | logger.debug(f"Saving plan {logical_plan} as table: {table_name}") |
| 172 | + |
| 173 | + if isinstance(logical_plan, TableSink): |
| 174 | + if not logical_plan.location: |
| 175 | + raise ValidationError( |
| 176 | + f"Cannot save to table '{table_name}' - location is required. " |
| 177 | + f"Provide a location.") |
| 178 | + |
| 179 | + table_identifier = TableIdentifier.from_string(table_name).enrich( |
| 180 | + self.session_state.catalog.get_current_catalog(), |
| 181 | + self.session_state.catalog.get_current_database(), |
| 182 | + ) |
| 183 | + |
| 184 | + # If the table doesn't exist, create it, this has to be done in the user's context. |
| 185 | + table_identifier_str = str(table_identifier) |
| 186 | + table_exists = self.session_state.catalog.does_table_exist(table_identifier_str) |
| 187 | + if table_exists: |
| 188 | + if mode == "error": |
| 189 | + raise PlanError( |
| 190 | + f"Cannot save to table '{table_name}' - it already exists and mode is 'error'. " |
| 191 | + f"Choose a different approach: " |
| 192 | + f"1) Use mode='overwrite' to replace the existing table, " |
| 193 | + f"2) Use mode='append' to add data to the existing table, " |
| 194 | + f"3) Use mode='ignore' to skip saving if table exists, " |
| 195 | + f"4) Use a different table name.") |
| 196 | + if mode == "ignore": |
| 197 | + logger.warning(f"Table {table_name} already exists, ignoring write.") |
| 198 | + return QueryMetrics() |
| 199 | + if mode == "append": |
| 200 | + saved_schema = self.session_state.catalog.describe_table(table_identifier_str) |
| 201 | + plan_schema = logical_plan.schema() |
| 202 | + if saved_schema != plan_schema: |
| 203 | + raise PlanError( |
| 204 | + f"Cannot append to table '{table_name}' - schema mismatch detected. " |
| 205 | + f"The existing table has a different schema than your DataFrame. " |
| 206 | + f"Existing schema: {saved_schema} " |
| 207 | + f"Your DataFrame schema: {plan_schema} " |
| 208 | + f"To fix this: " |
| 209 | + f"1) Use mode='overwrite' to replace the table with your DataFrame's schema, " |
| 210 | + f"2) Modify your DataFrame to match the existing table's schema, " |
| 211 | + f"3) Use a different table name.") |
| 212 | + else: |
| 213 | + logger.debug(f"Creating table {table_identifier_str} with location: {logical_plan.location}") |
| 214 | + # Create the table in the catalog. |
| 215 | + result =self.session_state.catalog.create_table( |
| 216 | + table_identifier_str, |
| 217 | + logical_plan.schema(), |
| 218 | + location=logical_plan.location, |
| 219 | + ignore_if_exists=mode == "ignore", |
| 220 | + file_format="PARQUET") |
| 221 | + logger.debug(f"Table {table_identifier_str} created with result: {result}") |
| 222 | + |
167 | 223 | # TODO (DY): check that current catalog and schema (if specified in table_name) match session state |
168 | | - table_identifier = TableIdentifier( |
169 | | - catalog=self.session_state.catalog, |
170 | | - schema=self.session_state.schema, |
| 224 | + table_identifier_proto = TableIdentifierProto( |
| 225 | + catalog=table_identifier.catalog, |
| 226 | + schema=table_identifier.db, |
171 | 227 | table=table_name, |
172 | 228 | ) |
173 | 229 | request = StartExecutionRequest( |
174 | 230 | save_as_table=SaveAsTableExecutionRequest( |
175 | 231 | serialized_plan=LogicalPlanSerde.serialize(logical_plan), |
176 | | - table_identifier=table_identifier, |
| 232 | + table_identifier=table_identifier_proto, |
177 | 233 | mode=mode, |
178 | 234 | ) |
179 | 235 | ) |
|
0 commit comments