From 23b13c89d15cf9723a7970b7ed60ae4372b51178 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Wed, 11 Sep 2024 10:35:08 -0700 Subject: [PATCH] Remove default mounting of /etc/pki/spark I missed that this was a thing that we did in my previous PR to always set k8s mounts regardless of whether or not they exist on the host executing service_configuration_lib code - however, my assertion that in the worst case the container runtime would create the missing files wherever these mounts are used was incorrect: in a `paasta spark-run`, the spark driver will run locally and re-use the k8s volume functions to figure out what needs to be mounted. This would normally be fine, but we have a security setup that prevents writes at certain paths: of which /etc/pki is in the set of blocked paths. Since we no longer have a spark cluster that is able to use certificate-based k8s authentication, this should be totally safe to remove as a default. --- service_configuration_lib/spark_config.py | 11 ----------- tests/spark_config_test.py | 10 +--------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/service_configuration_lib/spark_config.py b/service_configuration_lib/spark_config.py index 108f620..94cef35 100644 --- a/service_configuration_lib/spark_config.py +++ b/service_configuration_lib/spark_config.py @@ -65,9 +65,7 @@ 'spark.kubernetes.executor.label.spark.yelp.com/user', } -K8S_AUTH_FOLDER = '/etc/pki/spark' K8S_BASE_VOLUMES: List[Dict[str, str]] = [ - {'containerPath': K8S_AUTH_FOLDER, 'hostPath': K8S_AUTH_FOLDER, 'mode': 'RO'}, {'containerPath': '/etc/passwd', 'hostPath': '/etc/passwd', 'mode': 'RO'}, {'containerPath': '/etc/group', 'hostPath': '/etc/group', 'mode': 'RO'}, ] @@ -348,14 +346,6 @@ def _get_k8s_spark_env( spark_env.update({ 'spark.master': f'k8s://{k8s_server_address}', }) - elif include_self_managed_configs: - spark_env.update( - { - 'spark.kubernetes.authenticate.caCertFile': f'{K8S_AUTH_FOLDER}/{paasta_cluster}-ca.crt', - 'spark.kubernetes.authenticate.clientKeyFile': f'{K8S_AUTH_FOLDER}/{paasta_cluster}-client.key', - 'spark.kubernetes.authenticate.clientCertFile': f'{K8S_AUTH_FOLDER}/{paasta_cluster}-client.crt', - }, - ) return spark_env @@ -1083,7 +1073,6 @@ def get_spark_conf( spark session. :param aws_region: The default aws region to use :param service_account_name: The k8s service account to use for spark k8s authentication. - If not provided, it uses cert files at {K8S_AUTH_FOLDER} to authenticate. :param force_spark_resource_configs: skip the resource/instances recalculation. This is strongly not recommended. :returns: spark opts in a dict. diff --git a/tests/spark_config_test.py b/tests/spark_config_test.py index 6bb456e..a0272d0 100644 --- a/tests/spark_config_test.py +++ b/tests/spark_config_test.py @@ -267,9 +267,7 @@ def mock_paasta_volumes(self, monkeypatch, tmpdir): @pytest.fixture def mock_existed_files(self, mock_paasta_volumes): - existed_files = [v.split(':')[0] for v in mock_paasta_volumes] + [ - '/host/file1', '/host/file2', '/host/file3', '/etc/pki/spark', '/etc/group', '/etc/passwd', - ] + existed_files = [v.split(':')[0] for v in mock_paasta_volumes] with mock.patch('os.path.exists', side_effect=lambda f: f in existed_files): yield existed_files @@ -321,7 +319,6 @@ def test_get_k8s_docker_volumes_conf(self, volumes): ) expected_volumes.update({ - **_get_k8s_volume('/etc/pki/spark', '/etc/pki/spark', 'ro'), **_get_k8s_volume('/etc/passwd', '/etc/passwd', 'ro'), **_get_k8s_volume('/etc/group', '/etc/group', 'ro'), }) @@ -1232,11 +1229,6 @@ def assert_kubernetes_conf(self, base_volumes, ui_port, mock_ephemeral_port_rese 'spark.kubernetes.pyspark.pythonVersion': '3', 'spark.kubernetes.container.image': self.docker_image, 'spark.kubernetes.namespace': 'paasta-spark', - 'spark.kubernetes.authenticate.caCertFile': f'{spark_config.K8S_AUTH_FOLDER}/{self.cluster}-ca.crt', - 'spark.kubernetes.authenticate.clientKeyFile': f'{spark_config.K8S_AUTH_FOLDER}/{self.cluster}-client.key', - 'spark.kubernetes.authenticate.clientCertFile': ( - f'{spark_config.K8S_AUTH_FOLDER}/{self.cluster}-client.crt' - ), 'spark.kubernetes.executor.label.yelp.com/paasta_service': self.service, 'spark.kubernetes.executor.label.yelp.com/paasta_instance': self.instance, 'spark.kubernetes.executor.label.yelp.com/paasta_cluster': self.cluster,