Skip to content

Commit 65c7f95

Browse files
authored
Initial stab at adding TSC support to task_proc (#220)
This should help Karpenter spread things out.
1 parent 269c926 commit 65c7f95

File tree

5 files changed

+152
-0
lines changed

5 files changed

+152
-0
lines changed

task_processing/plugins/kubernetes/kubernetes_pod_executor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
)
4848
from task_processing.plugins.kubernetes.utils import get_pod_volumes
4949
from task_processing.plugins.kubernetes.utils import get_sanitised_kubernetes_name
50+
from task_processing.plugins.kubernetes.utils import get_topology_spread_constraints
5051

5152

5253
logger = logging.getLogger(__name__)
@@ -561,6 +562,9 @@ def run(self, task_config: KubernetesTaskConfig) -> Optional[str]:
561562
affinity=V1Affinity(
562563
node_affinity=get_node_affinity(task_config.node_affinities),
563564
),
565+
topology_spread_constraints=get_topology_spread_constraints(
566+
task_config.topology_spread_constraints
567+
),
564568
# we're hardcoding this as Default as this is what we generally use
565569
# internally - until we have a usecase for something that needs one
566570
# of the other DNS policies, we can probably punt on plumbing all the

task_processing/plugins/kubernetes/task_config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Sequence
99
from typing import Tuple
1010
from typing import TYPE_CHECKING
11+
from typing import Union
1112

1213
from pyrsistent import field
1314
from pyrsistent import m
@@ -25,6 +26,7 @@
2526
from task_processing.plugins.kubernetes.types import ProjectedSAVolume
2627
from task_processing.plugins.kubernetes.types import SecretVolume
2728
from task_processing.plugins.kubernetes.types import SecretVolumeItem
29+
from task_processing.plugins.kubernetes.types import TopologySpreadContraint
2830
from task_processing.plugins.kubernetes.utils import (
2931
DEFAULT_PROJECTED_SA_TOKEN_EXPIRATION_SECONDS,
3032
)
@@ -473,6 +475,11 @@ def __invariant__(self) -> Tuple[Tuple[bool, str], ...]:
473475
factory=pvector,
474476
invariant=_valid_node_affinities,
475477
)
478+
topology_spread_constraints = field(
479+
type=PVector if not TYPE_CHECKING else PVector["TopologySpreadContraint"],
480+
initial=v(),
481+
factory=pvector,
482+
)
476483
labels = field(
477484
type=PMap if not TYPE_CHECKING else PMap[str, str],
478485
initial=m(),

task_processing/plugins/kubernetes/types.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,10 @@ class PodEvent(TypedDict):
8888
object: V1Pod
8989
# this is just the dict-ified version of object - but it's too big to type here
9090
raw_object: Dict[str, Any]
91+
92+
93+
class TopologySpreadContraint(TypedDict):
94+
max_skew: int
95+
topology_key: str
96+
when_unsatisfiable: str
97+
label_selector: Dict[str, str]

task_processing/plugins/kubernetes/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from kubernetes.client import V1EnvVarSource
1212
from kubernetes.client import V1HostPathVolumeSource
1313
from kubernetes.client import V1KeyToPath
14+
from kubernetes.client import V1LabelSelector
1415
from kubernetes.client import V1NodeAffinity
1516
from kubernetes.client import V1NodeSelector
1617
from kubernetes.client import V1NodeSelectorRequirement
@@ -20,13 +21,15 @@
2021
from kubernetes.client import V1SecretKeySelector
2122
from kubernetes.client import V1SecretVolumeSource
2223
from kubernetes.client import V1ServiceAccountTokenProjection
24+
from kubernetes.client import V1TopologySpreadConstraint
2325
from kubernetes.client import V1Volume
2426
from kubernetes.client import V1VolumeMount
2527
from kubernetes.client import V1VolumeProjection
2628
from pyrsistent.typing import PMap
2729
from pyrsistent.typing import PVector
2830

2931
from task_processing.plugins.kubernetes.types import NodeAffinityOperator
32+
from task_processing.plugins.kubernetes.types import TopologySpreadContraint
3033

3134
if TYPE_CHECKING:
3235
from task_processing.plugins.kubernetes.types import EmptyVolume
@@ -417,3 +420,22 @@ def get_kubernetes_service_account_token_volume_mounts(
417420
)
418421
for volume in sa_volumes
419422
]
423+
424+
425+
def get_topology_spread_constraints(
426+
constraints: PVector[TopologySpreadContraint],
427+
) -> List[V1TopologySpreadConstraint]:
428+
"""Build toplogy spread constraints for pod
429+
430+
:param PVector["TopologySpreadContraint"] constraints: list of topology spread constraint configs
431+
:return: list of kubernetes topology spread constraint objects
432+
"""
433+
return [
434+
V1TopologySpreadConstraint(
435+
label_selector=V1LabelSelector(match_labels=constraint["label_selector"]),
436+
max_skew=constraint["max_skew"],
437+
topology_key=constraint["topology_key"],
438+
when_unsatisfiable=constraint["when_unsatisfiable"],
439+
)
440+
for constraint in constraints
441+
]

tests/unit/plugins/kubernetes/kubernetes_pod_executor_test.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from kubernetes.client import V1Container
99
from kubernetes.client import V1ContainerPort
1010
from kubernetes.client import V1HostPathVolumeSource
11+
from kubernetes.client import V1LabelSelector
1112
from kubernetes.client import V1ObjectMeta
1213
from kubernetes.client import V1Pod
1314
from kubernetes.client import V1PodSecurityContext
@@ -16,6 +17,7 @@
1617
from kubernetes.client import V1ResourceRequirements
1718
from kubernetes.client import V1SecurityContext
1819
from kubernetes.client import V1ServiceAccountTokenProjection
20+
from kubernetes.client import V1TopologySpreadConstraint
1921
from kubernetes.client import V1Volume
2022
from kubernetes.client import V1VolumeMount
2123
from kubernetes.client import V1VolumeProjection
@@ -220,6 +222,7 @@ def test_run_single_request_memory(mock_get_node_affinity, k8s_executor):
220222
),
221223
node_selector={"hello": "world"},
222224
affinity=V1Affinity(node_affinity=mock_get_node_affinity.return_value),
225+
topology_spread_constraints=[],
223226
dns_policy="Default",
224227
service_account_name=task_config.service_account_name,
225228
),
@@ -321,6 +324,7 @@ def test_run_single_request_cpu(mock_get_node_affinity, k8s_executor):
321324
),
322325
node_selector={"hello": "world"},
323326
affinity=V1Affinity(node_affinity=mock_get_node_affinity.return_value),
327+
topology_spread_constraints=[],
324328
dns_policy="Default",
325329
service_account_name=task_config.service_account_name,
326330
),
@@ -426,6 +430,7 @@ def test_run_both_requests(mock_get_node_affinity, k8s_executor):
426430
),
427431
node_selector={"hello": "world"},
428432
affinity=V1Affinity(node_affinity=mock_get_node_affinity.return_value),
433+
topology_spread_constraints=[],
429434
dns_policy="Default",
430435
service_account_name=task_config.service_account_name,
431436
),
@@ -526,6 +531,7 @@ def test_run_no_requests(mock_get_node_affinity, k8s_executor):
526531
),
527532
node_selector={"hello": "world"},
528533
affinity=V1Affinity(node_affinity=mock_get_node_affinity.return_value),
534+
topology_spread_constraints=[],
529535
dns_policy="Default",
530536
service_account_name=task_config.service_account_name,
531537
),
@@ -677,6 +683,7 @@ def test_run_authentication_token(mock_get_node_affinity, k8s_executor):
677683
),
678684
node_selector={"hello": "world"},
679685
affinity=V1Affinity(node_affinity=mock_get_node_affinity.return_value),
686+
topology_spread_constraints=[],
680687
dns_policy="Default",
681688
service_account_name=task_config.service_account_name,
682689
),
@@ -692,6 +699,111 @@ def test_run_authentication_token(mock_get_node_affinity, k8s_executor):
692699
]
693700

694701

702+
@mock.patch(
703+
"task_processing.plugins.kubernetes.kubernetes_pod_executor.get_node_affinity",
704+
autospec=True,
705+
)
706+
def test_run_topology_spread_constraint(mock_get_node_affinity, k8s_executor):
707+
task_config = KubernetesTaskConfig(
708+
name="fake_task_name",
709+
uuid="fake_id",
710+
image="fake_docker_image",
711+
command="fake_command",
712+
cpus=1,
713+
cpus_request=0.5,
714+
memory=1024,
715+
disk=1024,
716+
volumes=[],
717+
projected_sa_volumes=[],
718+
node_selectors={"hello": "world"},
719+
node_affinities=[dict(key="a_label", operator="In", value=[])],
720+
topology_spread_constraints=[
721+
{
722+
"max_skew": 1,
723+
"topology_key": "topology.kubernetes.io/zone",
724+
"when_unsatisfiable": "ScheduleAnyway",
725+
"label_selector": {
726+
"app.kubernetes.io/managed-by": "task_proc",
727+
},
728+
},
729+
],
730+
labels={
731+
"some_label": "some_label_value",
732+
},
733+
annotations={
734+
"paasta.yelp.com/some_annotation": "some_value",
735+
},
736+
service_account_name="testsa",
737+
ports=[8888],
738+
stdin=True,
739+
stdin_once=True,
740+
tty=True,
741+
)
742+
expected_container = V1Container(
743+
image=task_config.image,
744+
name="main",
745+
command=["/bin/sh", "-c"],
746+
args=[task_config.command],
747+
security_context=V1SecurityContext(
748+
capabilities=V1Capabilities(drop=list(task_config.cap_drop)),
749+
),
750+
resources=V1ResourceRequirements(
751+
limits={
752+
"cpu": 1.0,
753+
"memory": "1024.0Mi",
754+
"ephemeral-storage": "1024.0Mi",
755+
},
756+
requests={"cpu": 0.5},
757+
),
758+
env=[],
759+
volume_mounts=[],
760+
ports=[V1ContainerPort(container_port=8888)],
761+
stdin=True,
762+
stdin_once=True,
763+
tty=True,
764+
)
765+
expected_pod = V1Pod(
766+
metadata=V1ObjectMeta(
767+
name=task_config.pod_name,
768+
namespace="task_processing_tests",
769+
labels={
770+
"some_label": "some_label_value",
771+
},
772+
annotations={
773+
"paasta.yelp.com/some_annotation": "some_value",
774+
},
775+
),
776+
spec=V1PodSpec(
777+
restart_policy=task_config.restart_policy,
778+
containers=[expected_container],
779+
volumes=[],
780+
share_process_namespace=True,
781+
security_context=V1PodSecurityContext(
782+
fs_group=task_config.fs_group,
783+
),
784+
node_selector={"hello": "world"},
785+
affinity=V1Affinity(node_affinity=mock_get_node_affinity.return_value),
786+
topology_spread_constraints=[
787+
V1TopologySpreadConstraint(
788+
max_skew=1,
789+
topology_key="topology.kubernetes.io/zone",
790+
when_unsatisfiable="ScheduleAnyway",
791+
label_selector=V1LabelSelector(
792+
match_labels={"app.kubernetes.io/managed-by": "task_proc"}
793+
),
794+
),
795+
],
796+
dns_policy="Default",
797+
service_account_name=task_config.service_account_name,
798+
),
799+
)
800+
801+
assert k8s_executor.run(task_config) == task_config.pod_name
802+
assert k8s_executor.kube_client.core.create_namespaced_pod.call_args_list == [
803+
mock.call(body=expected_pod, namespace="task_processing_tests")
804+
]
805+
806+
695807
def test_process_event_enqueues_task_processing_events_pending_to_running(k8s_executor):
696808
mock_pod = mock.Mock(spec=V1Pod)
697809
mock_pod.metadata.name = "test.1234"

0 commit comments

Comments
 (0)