3
3
import errno
4
4
import hashlib
5
5
import logging
6
+ import os
6
7
import random
7
8
import string
8
9
import uuid
13
14
from socket import SOL_SOCKET
14
15
from typing import Any
15
16
from typing import Dict
17
+ from typing import List
18
+ from typing import Optional
16
19
from typing import Tuple
17
20
21
+ import clog .config
22
+ import srv_configs
18
23
import yaml
24
+ from clog .config import monk_host
25
+ from clog .config import monk_port
26
+ from clog .handlers import MonkHandler
19
27
from typing_extensions import Literal
20
28
21
- DEFAULT_SPARK_RUN_CONFIG = '/nail/srv/ configs/spark.yaml'
29
+ DEFAULT_SPARK_RUN_CONFIG = '/nail/home/sids/repos/srv- configs/common /spark.yaml'
22
30
POD_TEMPLATE_PATH = '/nail/tmp/spark-pt-{file_uuid}.yaml'
23
31
SPARK_EXECUTOR_POD_TEMPLATE = '/nail/srv/configs/spark_executor_pod_template.yaml'
24
32
37
45
38
46
39
47
def load_spark_srv_conf (preset_values = None ) -> Tuple [
40
- Dict [str , Any ],
41
- Dict [str , Any ],
42
- Dict [str , Any ],
43
- Dict [str , Any ],
44
- Dict [str , Dict [str , float ]],
48
+ Dict [str , Any ], # spark_srv_conf
49
+ Dict [str , Any ], # spark_constants
50
+ Dict [str , Any ], # default_spark_srv_conf
51
+ Dict [str , Any ], # mandatory_default_spark_srv_conf
52
+ Dict [str , Dict [str , float ]], # spark_costs
53
+ List [Dict [str , Any ]], # module_configs
45
54
]:
46
55
if preset_values is None :
47
56
preset_values = dict ()
@@ -53,9 +62,10 @@ def load_spark_srv_conf(preset_values=None) -> Tuple[
53
62
default_spark_srv_conf = spark_constants ['defaults' ]
54
63
mandatory_default_spark_srv_conf = spark_constants ['mandatory_defaults' ]
55
64
spark_costs = spark_constants ['cost_factor' ]
65
+ module_configs = loaded_values ['module_config' ]
56
66
return (
57
67
spark_srv_conf , spark_constants , default_spark_srv_conf ,
58
- mandatory_default_spark_srv_conf , spark_costs ,
68
+ mandatory_default_spark_srv_conf , spark_costs , module_configs ,
59
69
)
60
70
except Exception as e :
61
71
log .warning (f'Failed to load { DEFAULT_SPARK_RUN_CONFIG } : { e } ' )
@@ -217,3 +227,161 @@ def get_spark_driver_memory_overhead_mb(spark_conf: Dict[str, str]) -> float:
217
227
)
218
228
driver_mem_overhead_mb = driver_mem_mb * driver_mem_overhead_factor
219
229
return round (driver_mem_overhead_mb , 5 )
230
+
231
+
232
+ def _load_default_service_configurations_for_clog () -> Optional [Dict [str , Any ]]:
233
+ """
234
+ Loads the external configuration file for the 'clog' namespace if specified in
235
+ DEFAULT_SPARK_RUN_CONFIG's 'module_config' section.
236
+ Returns the inline 'config' dictionary for the 'clog' namespace if found,
237
+ otherwise None.
238
+ """
239
+ clog_config_file_path = None
240
+ clog_inline_config = None
241
+ found_clog_module_config = False
242
+
243
+ try :
244
+ _ , _ , _ , _ , _ , module_configs = load_spark_srv_conf ()
245
+
246
+ for mc_item in module_configs :
247
+ if isinstance (mc_item , dict ) and mc_item .get ('namespace' ) == 'clog' :
248
+ found_clog_module_config = True
249
+ clog_config_file_path = mc_item .get ('file' )
250
+ clog_inline_config = mc_item .get ('config' )
251
+ break
252
+
253
+ if not found_clog_module_config :
254
+ log .warning (
255
+ f"Could not find 'clog' namespace entry in 'module_config' "
256
+ f'section within { DEFAULT_SPARK_RUN_CONFIG } .' ,
257
+ )
258
+ return None
259
+
260
+ if clog_config_file_path :
261
+ if os .path .exists (clog_config_file_path ):
262
+ try :
263
+ srv_configs .use_file (clog_config_file_path , namespace = 'clog' )
264
+ log .info (
265
+ f'Successfully loaded clog configuration file { clog_config_file_path } '
266
+ f"into namespace 'clog'." ,
267
+ )
268
+ except Exception as e_use_file :
269
+ log .error (
270
+ f'Error loading clog configuration file { clog_config_file_path } '
271
+ f'using srv_configs.use_file: { e_use_file } ' ,
272
+ )
273
+ else :
274
+ log .error (
275
+ f"Clog configuration file specified in 'module_config' of { DEFAULT_SPARK_RUN_CONFIG } "
276
+ f'does not exist: { clog_config_file_path } .' ,
277
+ )
278
+ else :
279
+ log .info (
280
+ f"No 'file' specified for 'clog' namespace in 'module_config' of { DEFAULT_SPARK_RUN_CONFIG } . "
281
+ 'Not loading any external file for clog via module_config.' ,
282
+ )
283
+
284
+ # Return the inline config dictionary, which might be None if not present
285
+ if isinstance (clog_inline_config , dict ):
286
+ return clog_inline_config
287
+ elif clog_inline_config is not None :
288
+ log .warning (f"Inline 'config' for 'clog' namespace in { DEFAULT_SPARK_RUN_CONFIG } is not a dictionary." )
289
+ return None
290
+ else :
291
+ return None
292
+
293
+ except FileNotFoundError :
294
+ log .error (
295
+ f'Error: Main Spark run config file { DEFAULT_SPARK_RUN_CONFIG } not found. '
296
+ 'Cannot process clog configurations.' ,
297
+ )
298
+ return None
299
+ except yaml .YAMLError as e_yaml :
300
+ log .error (f'Error parsing YAML from { DEFAULT_SPARK_RUN_CONFIG } : { e_yaml } ' )
301
+ return None
302
+ except Exception as e_main :
303
+ log .error (
304
+ f'An unexpected error occurred in _load_default_service_configurations_for_clog: { e_main } ' ,
305
+ )
306
+ return None
307
+
308
+
309
+ def get_clog_handler (
310
+ client_id : Optional [str ] = None ,
311
+ stream_name_override : Optional [str ] = None ,
312
+ ) -> Optional [MonkHandler ]:
313
+ """
314
+ Configures and returns a clog MonkHandler for logging.
315
+
316
+ This utility helps in setting up a MonkHandler. It ensures the external
317
+ clog configuration file (if specified in DEFAULT_SPARK_RUN_CONFIG) is loaded
318
+ into srv_configs. It then determines the log_stream_name with the following
319
+ priority:
320
+ 1. `stream_name_override` argument.
321
+ 2. `log_stream_name` from the inline 'config' of the 'clog' module_config
322
+ in DEFAULT_SPARK_RUN_CONFIG.
323
+ 3. `log_stream_name` from the 'clog' namespace in srv_configs (typically
324
+ loaded from the external file like /nail/srv/configs/clog.yaml).
325
+
326
+ Args:
327
+ client_id: Optional client identifier for the log messages.
328
+ Defaults to the current OS user or 'unknown_spark_user'.
329
+ stream_name_override: Optional explicit clog stream name to use,
330
+ overriding any configured values.
331
+
332
+ Returns:
333
+ A configured MonkHandler instance if successful, otherwise None.
334
+ """
335
+ # Load external file (if any) and get inline config from spark.yaml's module_config
336
+ inline_clog_config = _load_default_service_configurations_for_clog ()
337
+
338
+ actual_client_id = client_id or os .getenv ('USER' ) or 'unknown_spark_user'
339
+ final_stream_name = stream_name_override
340
+
341
+ if not final_stream_name :
342
+ if inline_clog_config and isinstance (inline_clog_config .get ('log_stream_name' ), str ):
343
+ final_stream_name = inline_clog_config ['log_stream_name' ]
344
+ log .info (
345
+ f"Using log_stream_name '{ final_stream_name } ' from inline module_config in "
346
+ f'{ DEFAULT_SPARK_RUN_CONFIG } .' ,
347
+ )
348
+ else :
349
+ try :
350
+ # Fallback to srv_configs (which should have data from the external file)
351
+ clog_srv_configs_dict = srv_configs .get_namespace_as_dict ('clog' )
352
+ final_stream_name = clog_srv_configs_dict .get ('log_stream_name' )
353
+ if final_stream_name :
354
+ log .info (
355
+ f"Using log_stream_name '{ final_stream_name } ' from srv_configs 'clog' namespace "
356
+ f'(likely from external file).' ,
357
+ )
358
+ except Exception as e :
359
+ log .warning (
360
+ f"Could not get 'clog' namespace from srv_configs or 'log_stream_name' key missing. "
361
+ f'This may be okay if stream_name_override or inline config provides it. Error: { e } ' ,
362
+ )
363
+
364
+ if not final_stream_name :
365
+ log .error (
366
+ 'Clog stream_name could not be determined. It was not provided as an argument, '
367
+ 'not found in the inline module_config for "clog", and not found in the '
368
+ '"clog" srv_configs namespace. Clog handler cannot be configured.' ,
369
+ )
370
+ return None
371
+
372
+ # Ensure that clog is configured to enable Monk logging.
373
+ # The default in clog.config.monk_disable is True.
374
+ clog .config .configure_from_dict ({'monk_disable' : False })
375
+ log .info ('Clog has been configured to enable Monk logging (monk_disable=False).' )
376
+
377
+ try :
378
+ handler = MonkHandler (
379
+ client_id = actual_client_id ,
380
+ host = monk_host ,
381
+ port = monk_port ,
382
+ stream = final_stream_name ,
383
+ )
384
+ return handler
385
+ except Exception as e :
386
+ log .error (f"Failed to create MonkHandler for clog with stream '{ final_stream_name } '. Error: { e } " )
387
+ return None
0 commit comments