79
79
TICKET_NOT_REQUIRED_USERS = {
80
80
'batch' , # non-human spark-run from batch boxes
81
81
'TRON' , # tronjobs that run commands like paasta mark-for-deployment
82
+ 'jenkins' , # username for jenkins pipeline jobs
82
83
None , # placeholder for being unable to determine user
83
84
}
84
85
USER_LABEL_UNSPECIFIED = 'UNSPECIFIED'
@@ -993,9 +994,79 @@ def _get_valid_jira_ticket(self, jira_ticket: Optional[str]) -> Optional[str]:
993
994
if ticket and JIRA_TICKET_PATTERN .match (ticket ):
994
995
log .info (f'Valid Jira ticket provided: { ticket } ' )
995
996
return ticket
996
- log .warning (f'Jira ticket missing or invalid format: { ticket } ' )
997
+ log .info (f'Jira ticket missing or invalid format: { ticket } ' )
997
998
return None
998
999
1000
+ def _handle_jira_ticket_validation (
1001
+ self ,
1002
+ cluster_manager : str ,
1003
+ user : Optional [str ],
1004
+ jira_ticket : Optional [str ],
1005
+ paasta_cluster : str ,
1006
+ paasta_service : str ,
1007
+ paasta_instance : str ,
1008
+ ) -> Optional [str ]:
1009
+ """
1010
+ This method checks if Jira ticket validation is enabled, if the user needs
1011
+ to provide a ticket, and validates the ticket if needed.
1012
+
1013
+ Returns:
1014
+ The validated Jira ticket string if valid, otherwise None.
1015
+
1016
+ Args:
1017
+ cluster_manager: The cluster manager being used
1018
+ user: The user running the job
1019
+ jira_ticket: The Jira ticket provided by the user
1020
+ """
1021
+ flag_enabled = self .mandatory_default_spark_srv_conf .get ('spark.yelp.jira_ticket.enabled' , 'false' )
1022
+ valid_ticket = self ._get_valid_jira_ticket (jira_ticket )
1023
+
1024
+ # Skip validation for local cluster manager or exempt users
1025
+ if cluster_manager == 'local' or user in TICKET_NOT_REQUIRED_USERS :
1026
+ log .debug ('Jira ticket check not required for this job configuration.' )
1027
+ # If exempt, we still pass through the original ticket if it's valid,
1028
+ # otherwise None. This allows exempt users like tron to still have their valid tickets
1029
+ # (if provided) attached as labels, without forcing validation.
1030
+ return valid_ticket
1031
+
1032
+ if valid_ticket is None :
1033
+ log_payload = {
1034
+ 'timestamp' : int (time .time ()),
1035
+ 'event' : 'jira_ticket_validation_warning' ,
1036
+ 'level' : 'WARNING' ,
1037
+ 'reason' : 'Ticket missing or invalid' ,
1038
+ 'user' : user ,
1039
+ 'jira_ticket_provided' : jira_ticket ,
1040
+ 'paasta_cluster' : paasta_cluster ,
1041
+ 'paasta_service' : paasta_service ,
1042
+ 'paasta_instance' : paasta_instance ,
1043
+ }
1044
+ if flag_enabled == 'true' :
1045
+ error_msg = (
1046
+ f'Job requires a valid Jira ticket (format PROJ-1234).\n '
1047
+ f'Jira ticket check is enabled, but ticket "{ jira_ticket } " is '
1048
+ f'missing or invalid for user "{ user } ".\n '
1049
+ 'Please pass the parameter as: paasta spark-run --jira-ticket=PROJ-1234 \n '
1050
+ 'For more information: http://y/spark-jira-ticket-param \n '
1051
+ 'If you have questions, please reach out to #spark on Slack.\n '
1052
+ f'paasta_cluster={ paasta_cluster } , paasta_service={ paasta_service } \n '
1053
+ f'paasta_instance={ paasta_instance } '
1054
+ )
1055
+ utils .log_to_clog ('spark_jira_ticket' , log_payload , error_msg , log )
1056
+ raise RuntimeError (error_msg )
1057
+ else :
1058
+ warning_message = (
1059
+ f'Jira ticket check is configured, but ticket is missing or invalid for user "{ user } ". '
1060
+ f'Proceeding with job execution. Original ticket value: "{ jira_ticket } ". '
1061
+ 'Please pass the parameter as: paasta spark-run --jira-ticket=PROJ-1234 '
1062
+ 'For more information: http://y/spark-jira-ticket-param '
1063
+ 'If you have questions, please reach out to #spark on Slack. '
1064
+ f'paasta_cluster={ paasta_cluster } , paasta_service={ paasta_service } \n '
1065
+ f'paasta_instance={ paasta_instance } '
1066
+ )
1067
+ utils .log_to_clog ('spark_jira_ticket' , log_payload , warning_message , log )
1068
+ return valid_ticket
1069
+
999
1070
def get_spark_conf (
1000
1071
self ,
1001
1072
cluster_manager : str ,
@@ -1040,6 +1111,7 @@ def get_spark_conf(
1040
1111
to launch the batch, and inside the batch use `spark_tools.paasta` to create
1041
1112
spark session.
1042
1113
:param aws_region: The default aws region to use
1114
+ :param jira_ticket: The jira project that this spark job is related to.
1043
1115
:param service_account_name: The k8s service account to use for spark k8s authentication.
1044
1116
:param force_spark_resource_configs: skip the resource/instances recalculation.
1045
1117
This is strongly not recommended.
@@ -1058,20 +1130,10 @@ def get_spark_conf(
1058
1130
# Get user from environment variables if it's not set
1059
1131
user = user or os .environ .get ('USER' , None )
1060
1132
1061
- if self .mandatory_default_spark_srv_conf .get ('spark.yelp.jira_ticket.enabled' ) == 'true' :
1062
- needs_jira_check = cluster_manager != 'local' and user not in TICKET_NOT_REQUIRED_USERS
1063
- if needs_jira_check :
1064
- valid_ticket = self ._get_valid_jira_ticket (jira_ticket )
1065
- if valid_ticket is None :
1066
- error_msg = (
1067
- 'Job requires a valid Jira ticket (format PROJ-1234).\n '
1068
- 'Please pass the parameter as: paasta spark-run --jira-ticket=PROJ-1234 \n '
1069
- 'For more information: https://yelpwiki.yelpcorp.com/spaces/AML/pages/402885641 \n '
1070
- f'If you have questions, please reach out to #spark on Slack. (user={ user } )\n '
1071
- )
1072
- raise RuntimeError (error_msg )
1073
- else :
1074
- log .debug ('Jira ticket check not required for this job configuration.' )
1133
+ # Handle Jira ticket validation if enabled
1134
+ validated_jira_ticket = self ._handle_jira_ticket_validation (
1135
+ cluster_manager , user , jira_ticket , paasta_cluster , paasta_service , paasta_instance ,
1136
+ )
1075
1137
1076
1138
app_base_name = (
1077
1139
user_spark_opts .get ('spark.app.name' ) or
@@ -1160,7 +1222,7 @@ def get_spark_conf(
1160
1222
include_self_managed_configs = not use_eks ,
1161
1223
k8s_server_address = k8s_server_address ,
1162
1224
user = user ,
1163
- jira_ticket = jira_ticket ,
1225
+ jira_ticket = validated_jira_ticket ,
1164
1226
))
1165
1227
elif cluster_manager == 'local' :
1166
1228
spark_conf .update (_get_local_spark_env (
0 commit comments