@@ -404,7 +404,7 @@ async def run_query(self, query: str, batch_size: int = 100000):
404404 logger .info ("Query execution completed" )
405405
406406 def _execute_pandas_query (
407- self , conn , query
407+ self , conn , query , chunksize : Optional [ int ]
408408 ) -> Union ["pd.DataFrame" , Iterator ["pd.DataFrame" ]]:
409409 """Helper function to execute SQL query using pandas.
410410 The function is responsible for using import_optional_dependency method of the pandas library to import sqlalchemy
@@ -424,13 +424,13 @@ def _execute_pandas_query(
424424 from sqlalchemy import text
425425
426426 if import_optional_dependency ("sqlalchemy" , errors = "ignore" ):
427- return pd .read_sql_query (text (query ), conn , chunksize = self . chunk_size )
427+ return pd .read_sql_query (text (query ), conn , chunksize = chunksize )
428428 else :
429429 dbapi_conn = getattr (conn , "connection" , None )
430- return pd .read_sql_query (query , dbapi_conn , chunksize = self . chunk_size )
430+ return pd .read_sql_query (query , dbapi_conn , chunksize = chunksize )
431431
432432 def _read_sql_query (
433- self , session : "Session" , query : str
433+ self , session : "Session" , query : str , chunksize : Optional [ int ]
434434 ) -> Union ["pd.DataFrame" , Iterator ["pd.DataFrame" ]]:
435435 """Execute SQL query using the provided session.
436436
@@ -442,10 +442,10 @@ def _read_sql_query(
442442 or iterator of DataFrames if chunked.
443443 """
444444 conn = session .connection ()
445- return self ._execute_pandas_query (conn , query )
445+ return self ._execute_pandas_query (conn , query , chunksize = chunksize )
446446
447447 def _execute_query_daft (
448- self , query : str
448+ self , query : str , chunksize : Optional [ int ]
449449 ) -> Union ["daft.DataFrame" , Iterator ["daft.DataFrame" ]]:
450450 """Execute SQL query using the provided engine and daft.
451451
@@ -462,15 +462,11 @@ def _execute_query_daft(
462462 raise ValueError ("Engine is not initialized. Call load() first." )
463463
464464 if isinstance (self .engine , str ):
465- return daft .read_sql (
466- query , self .engine , infer_schema_length = self .chunk_size
467- )
468- return daft .read_sql (
469- query , self .engine .connect , infer_schema_length = self .chunk_size
470- )
465+ return daft .read_sql (query , self .engine , infer_schema_length = chunksize )
466+ return daft .read_sql (query , self .engine .connect , infer_schema_length = chunksize )
471467
472468 def _execute_query (
473- self , query : str
469+ self , query : str , chunksize : Optional [ int ]
474470 ) -> Union ["pd.DataFrame" , Iterator ["pd.DataFrame" ]]:
475471 """Execute SQL query using the provided engine and pandas.
476472
@@ -482,7 +478,7 @@ def _execute_query(
482478 raise ValueError ("Engine is not initialized. Call load() first." )
483479
484480 with self .engine .connect () as conn :
485- return self ._execute_pandas_query (conn , query )
481+ return self ._execute_pandas_query (conn , query , chunksize )
486482
487483 async def get_batched_results (
488484 self ,
@@ -513,12 +509,14 @@ async def get_batched_results(
513509
514510 if async_session :
515511 async with async_session () as session :
516- return await session .run_sync (self ._read_sql_query , query )
512+ return await session .run_sync (
513+ self ._read_sql_query , query , chunksize = self .chunk_size
514+ )
517515 else :
518516 # Run the blocking operation in a thread pool
519517 with concurrent .futures .ThreadPoolExecutor () as executor :
520518 return await asyncio .get_event_loop ().run_in_executor ( # type: ignore
521- executor , self ._execute_query , query
519+ executor , self ._execute_query , query , self . chunk_size
522520 )
523521 except Exception as e :
524522 logger .error (f"Error reading batched data(pandas) from SQL: { str (e )} " )
@@ -549,12 +547,14 @@ async def get_results(self, query: str) -> "pd.DataFrame":
549547
550548 if async_session :
551549 async with async_session () as session :
552- return await session .run_sync (self ._read_sql_query , query )
550+ return await session .run_sync (
551+ self ._read_sql_query , query , chunksize = None
552+ )
553553 else :
554554 # Run the blocking operation in a thread pool
555555 with concurrent .futures .ThreadPoolExecutor () as executor :
556556 result = await asyncio .get_event_loop ().run_in_executor (
557- executor , self ._execute_query , query
557+ executor , self ._execute_query , query , None
558558 )
559559 import pandas as pd
560560
0 commit comments