3
3
import errno
4
4
import hashlib
5
5
import logging
6
- import os
7
6
import random
8
7
import string
9
8
import uuid
14
13
from socket import SOL_SOCKET
15
14
from typing import Any
16
15
from typing import Dict
17
- from typing import List
18
- from typing import Optional
19
16
from typing import Tuple
20
17
21
- import clog .config
22
- import srv_configs
23
18
import yaml
24
- from clog .config import monk_host
25
- from clog .config import monk_port
26
- from clog .handlers import MonkHandler
27
19
from typing_extensions import Literal
28
20
29
21
DEFAULT_SPARK_RUN_CONFIG = '/nail/srv/configs/spark.yaml'
40
32
SPARK_DRIVER_MEM_OVERHEAD_FACTOR_DEFAULT = 0.1
41
33
42
34
43
- log = logging .Logger (__name__ )
35
+ log = logging .getLogger (__name__ )
44
36
log .setLevel (logging .INFO )
45
37
46
38
@@ -50,7 +42,6 @@ def load_spark_srv_conf(preset_values=None) -> Tuple[
50
42
Dict [str , Any ], # default_spark_srv_conf
51
43
Dict [str , Any ], # mandatory_default_spark_srv_conf
52
44
Dict [str , Dict [str , float ]], # spark_costs
53
- List [Dict [str , Any ]], # module_configs
54
45
]:
55
46
if preset_values is None :
56
47
preset_values = dict ()
@@ -62,10 +53,9 @@ def load_spark_srv_conf(preset_values=None) -> Tuple[
62
53
default_spark_srv_conf = spark_constants ['defaults' ]
63
54
mandatory_default_spark_srv_conf = spark_constants ['mandatory_defaults' ]
64
55
spark_costs = spark_constants ['cost_factor' ]
65
- module_configs = loaded_values ['module_config' ]
66
56
return (
67
57
spark_srv_conf , spark_constants , default_spark_srv_conf ,
68
- mandatory_default_spark_srv_conf , spark_costs , module_configs ,
58
+ mandatory_default_spark_srv_conf , spark_costs ,
69
59
)
70
60
except Exception as e :
71
61
log .warning (f'Failed to load { DEFAULT_SPARK_RUN_CONFIG } : { e } ' )
@@ -227,161 +217,3 @@ def get_spark_driver_memory_overhead_mb(spark_conf: Dict[str, str]) -> float:
227
217
)
228
218
driver_mem_overhead_mb = driver_mem_mb * driver_mem_overhead_factor
229
219
return round (driver_mem_overhead_mb , 5 )
230
-
231
-
232
- def _load_default_service_configurations_for_clog () -> Optional [Dict [str , Any ]]:
233
- """
234
- Loads the external configuration file for the 'clog' namespace if specified in
235
- DEFAULT_SPARK_RUN_CONFIG's 'module_config' section.
236
- Returns the inline 'config' dictionary for the 'clog' namespace if found,
237
- otherwise None.
238
- """
239
- clog_config_file_path = None
240
- clog_inline_config = None
241
- found_clog_module_config = False
242
-
243
- try :
244
- _ , _ , _ , _ , _ , module_configs = load_spark_srv_conf ()
245
-
246
- for mc_item in module_configs :
247
- if isinstance (mc_item , dict ) and mc_item .get ('namespace' ) == 'clog' :
248
- found_clog_module_config = True
249
- clog_config_file_path = mc_item .get ('file' )
250
- clog_inline_config = mc_item .get ('config' )
251
- break
252
-
253
- if not found_clog_module_config :
254
- log .warning (
255
- f"Could not find 'clog' namespace entry in 'module_config' "
256
- f'section within { DEFAULT_SPARK_RUN_CONFIG } .' ,
257
- )
258
- return None
259
-
260
- if clog_config_file_path :
261
- if os .path .exists (clog_config_file_path ):
262
- try :
263
- srv_configs .use_file (clog_config_file_path , namespace = 'clog' )
264
- log .info (
265
- f'Successfully loaded clog configuration file { clog_config_file_path } '
266
- f"into namespace 'clog'." ,
267
- )
268
- except Exception as e_use_file :
269
- log .error (
270
- f'Error loading clog configuration file { clog_config_file_path } '
271
- f'using srv_configs.use_file: { e_use_file } ' ,
272
- )
273
- else :
274
- log .error (
275
- f"Clog configuration file specified in 'module_config' of { DEFAULT_SPARK_RUN_CONFIG } "
276
- f'does not exist: { clog_config_file_path } .' ,
277
- )
278
- else :
279
- log .info (
280
- f"No 'file' specified for 'clog' namespace in 'module_config' of { DEFAULT_SPARK_RUN_CONFIG } . "
281
- 'Not loading any external file for clog via module_config.' ,
282
- )
283
-
284
- # Return the inline config dictionary, which might be None if not present
285
- if isinstance (clog_inline_config , dict ):
286
- return clog_inline_config
287
- elif clog_inline_config is not None :
288
- log .warning (f"Inline 'config' for 'clog' namespace in { DEFAULT_SPARK_RUN_CONFIG } is not a dictionary." )
289
- return None
290
- else :
291
- return None
292
-
293
- except FileNotFoundError :
294
- log .error (
295
- f'Error: Main Spark run config file { DEFAULT_SPARK_RUN_CONFIG } not found. '
296
- 'Cannot process clog configurations.' ,
297
- )
298
- return None
299
- except yaml .YAMLError as e_yaml :
300
- log .error (f'Error parsing YAML from { DEFAULT_SPARK_RUN_CONFIG } : { e_yaml } ' )
301
- return None
302
- except Exception as e_main :
303
- log .error (
304
- f'An unexpected error occurred in _load_default_service_configurations_for_clog: { e_main } ' ,
305
- )
306
- return None
307
-
308
-
309
- def get_clog_handler (
310
- client_id : Optional [str ] = None ,
311
- stream_name_override : Optional [str ] = None ,
312
- ) -> Optional [MonkHandler ]:
313
- """
314
- Configures and returns a clog MonkHandler for logging.
315
-
316
- This utility helps in setting up a MonkHandler. It ensures the external
317
- clog configuration file (if specified in DEFAULT_SPARK_RUN_CONFIG) is loaded
318
- into srv_configs. It then determines the log_stream_name with the following
319
- priority:
320
- 1. `stream_name_override` argument.
321
- 2. `log_stream_name` from the inline 'config' of the 'clog' module_config
322
- in DEFAULT_SPARK_RUN_CONFIG.
323
- 3. `log_stream_name` from the 'clog' namespace in srv_configs (typically
324
- loaded from the external file like /nail/srv/configs/clog.yaml).
325
-
326
- Args:
327
- client_id: Optional client identifier for the log messages.
328
- Defaults to the current OS user or 'unknown_spark_user'.
329
- stream_name_override: Optional explicit clog stream name to use,
330
- overriding any configured values.
331
-
332
- Returns:
333
- A configured MonkHandler instance if successful, otherwise None.
334
- """
335
- # Load external file (if any) and get inline config from spark.yaml's module_config
336
- inline_clog_config = _load_default_service_configurations_for_clog ()
337
-
338
- actual_client_id = client_id or os .getenv ('USER' ) or 'unknown_spark_user'
339
- final_stream_name = stream_name_override
340
-
341
- if not final_stream_name :
342
- if inline_clog_config and isinstance (inline_clog_config .get ('log_stream_name' ), str ):
343
- final_stream_name = inline_clog_config ['log_stream_name' ]
344
- log .info (
345
- f"Using log_stream_name '{ final_stream_name } ' from inline module_config in "
346
- f'{ DEFAULT_SPARK_RUN_CONFIG } .' ,
347
- )
348
- else :
349
- try :
350
- # Fallback to srv_configs (which should have data from the external file)
351
- clog_srv_configs_dict = srv_configs .get_namespace_as_dict ('clog' )
352
- final_stream_name = clog_srv_configs_dict .get ('log_stream_name' )
353
- if final_stream_name :
354
- log .info (
355
- f"Using log_stream_name '{ final_stream_name } ' from srv_configs 'clog' namespace "
356
- f'(likely from external file).' ,
357
- )
358
- except Exception as e :
359
- log .warning (
360
- f"Could not get 'clog' namespace from srv_configs or 'log_stream_name' key missing. "
361
- f'This may be okay if stream_name_override or inline config provides it. Error: { e } ' ,
362
- )
363
-
364
- if not final_stream_name :
365
- log .error (
366
- 'Clog stream_name could not be determined. It was not provided as an argument, '
367
- 'not found in the inline module_config for "clog", and not found in the '
368
- '"clog" srv_configs namespace. Clog handler cannot be configured.' ,
369
- )
370
- return None
371
-
372
- # Ensure that clog is configured to enable Monk logging.
373
- # The default in clog.config.monk_disable is True.
374
- clog .config .configure_from_dict ({'monk_disable' : False })
375
- log .info ('Clog has been configured to enable Monk logging (monk_disable=False).' )
376
-
377
- try :
378
- handler = MonkHandler (
379
- client_id = actual_client_id ,
380
- host = monk_host ,
381
- port = monk_port ,
382
- stream = final_stream_name ,
383
- )
384
- return handler
385
- except Exception as e :
386
- log .error (f"Failed to create MonkHandler for clog with stream '{ final_stream_name } '. Error: { e } " )
387
- return None
0 commit comments