@@ -17,32 +17,38 @@ import (
17
17
18
18
api "flux-framework/flux-operator/api/v1alpha1"
19
19
20
+ ctrl "sigs.k8s.io/controller-runtime"
20
21
jobset "sigs.k8s.io/jobset/api/v1alpha1"
21
22
)
22
23
23
24
func (r * MiniClusterReconciler ) newJobSet (
24
25
cluster * api.MiniCluster ,
25
26
) (* jobset.JobSet , error ) {
26
27
27
- suspend := true
28
+ // I don't really understand how this works, but it seems to be
29
+ // not creating any pods? So bad idea?
30
+ suspend := false
28
31
jobs := jobset.JobSet {
29
32
ObjectMeta : metav1.ObjectMeta {
30
- Name : cluster . Name ,
33
+ Name : "minicluster" ,
31
34
Namespace : cluster .Namespace ,
32
35
Labels : cluster .Spec .JobLabels ,
33
36
},
34
37
Spec : jobset.JobSetSpec {
35
38
36
39
// Suspend child jobs (the worker pods) when broker finishes
40
+ // How do I define a child job?
37
41
Suspend : & suspend ,
38
42
// TODO decide on FailurePolicy here
39
43
// default is to fail if all jobs in jobset fail
40
44
},
41
45
}
42
46
43
47
// Get leader broker job, the parent in the JobSet (worker or follower pods)
48
+ // Both are required to be in indexed completion mode to have a service!
49
+ // I'm not sure that totally makes sense, but ok!
44
50
// cluster, size, entrypoint, indexed
45
- leaderJob , err := r .getJob (cluster , 1 , "broker" , false )
51
+ leaderJob , err := r .getJob (cluster , 1 , "broker" , true )
46
52
if err != nil {
47
53
return & jobs , err
48
54
}
@@ -51,10 +57,11 @@ func (r *MiniClusterReconciler) newJobSet(
51
57
return & jobs , err
52
58
}
53
59
jobs .Spec .ReplicatedJobs = []jobset.ReplicatedJob {leaderJob , workerJob }
60
+ ctrl .SetControllerReference (cluster , & jobs , r .Scheme )
54
61
return & jobs , nil
55
62
}
56
63
57
- // getBrokerJob creates the job for the main leader broker
64
+ // getJob creates a job for a main leader ( broker) or worker (followers)
58
65
func (r * MiniClusterReconciler ) getJob (
59
66
cluster * api.MiniCluster ,
60
67
size int32 ,
@@ -64,14 +71,13 @@ func (r *MiniClusterReconciler) getJob(
64
71
65
72
backoffLimit := int32 (100 )
66
73
podLabels := r .getPodLabels (cluster )
67
- enableDNSHostnames := true
74
+ enableDNSHostnames := false
68
75
completionMode := batchv1 .NonIndexedCompletion
69
76
70
77
if indexed {
71
78
completionMode = batchv1 .IndexedCompletion
72
79
}
73
80
74
- // TODO how are these named
75
81
job := jobset.ReplicatedJob {
76
82
Name : cluster .Name + "-" + entrypoint ,
77
83
@@ -110,7 +116,7 @@ func (r *MiniClusterReconciler) getJob(
110
116
},
111
117
Spec : corev1.PodSpec {
112
118
// matches the service
113
- // Subdomain: restfulServiceName,
119
+ Subdomain : restfulServiceName ,
114
120
Volumes : getVolumes (cluster , entrypoint ),
115
121
RestartPolicy : corev1 .RestartPolicyOnFailure ,
116
122
ImagePullSecrets : getImagePullSecrets (cluster ),
@@ -130,7 +136,12 @@ func (r *MiniClusterReconciler) getJob(
130
136
131
137
// Get volume mounts, add on container specific ones
132
138
mounts := getVolumeMounts (cluster )
133
- containers , err := r .getContainers (cluster .Spec .Containers , cluster .Name , mounts )
139
+ containers , err := r .getContainers (
140
+ cluster .Spec .Containers ,
141
+ cluster .Name ,
142
+ mounts ,
143
+ entrypoint ,
144
+ )
134
145
jobspec .Template .Spec .Containers = containers
135
146
job .Template .Spec = jobspec
136
147
return job , err
0 commit comments