Skip to content

Commit 9e2d0f7

Browse files
authored
feat: support for custom node selector (#12)
* allow node type to be null * feat: support for custom node selector * feat: support for node selector and arm build we want jobs to be able to select different node types Signed-off-by: vsoch <[email protected]>
1 parent 1255777 commit 9e2d0f7

File tree

10 files changed

+196
-40
lines changed

10 files changed

+196
-40
lines changed

.github/workflows/build-deploy.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,34 @@ on:
99
- main
1010

1111
jobs:
12+
# Only build arm image on merge, takes too long otherwise
13+
build-arm:
14+
if: (github.event_name != 'pull_request')
15+
runs-on: ubuntu-latest
16+
name: make and build arm
17+
steps:
18+
- name: Checkout Repository
19+
uses: actions/checkout@v4
20+
- uses: actions/setup-go@v3
21+
with:
22+
go-version: ^1.22
23+
- name: GHCR Login
24+
if: (github.event_name != 'pull_request')
25+
uses: docker/login-action@v2
26+
with:
27+
registry: ghcr.io
28+
username: ${{ github.actor }}
29+
password: ${{ secrets.GITHUB_TOKEN }}
30+
31+
- name: Add custom buildx ARM builder
32+
run: |
33+
docker buildx create --name armbuilder
34+
docker buildx use armbuilder
35+
docker buildx inspect --bootstrap
36+
37+
- name: Deploy Container
38+
run: make arm-deploy
39+
1240
build-manager:
1341
permissions:
1442
packages: write

Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ endif
4141
OPERATOR_SDK_VERSION ?= v1.38.0
4242
# Image URL to use all building/pushing image targets
4343
IMG ?= ghcr.io/converged-computing/state-machine-operator:latest
44+
ARMIMG ?= ghcr.io/converged-computing/state-machine-operator:arm
4445
DEVIMG ?= ghcr.io/converged-computing/state-machine-operator:test
4546
MANAGER_IMG ?= ghcr.io/converged-computing/state-machine-operator:manager
4647

@@ -276,6 +277,16 @@ golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
276277
$(GOLANGCI_LINT): $(LOCALBIN)
277278
$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))
278279

280+
.PHONY: arm-build
281+
arm-build: test ## Build docker image with the manager.
282+
docker buildx build --platform linux/arm64 -t ${ARMIMG} .
283+
284+
.PHONY: arm-deploy
285+
arm-deploy: manifests kustomize
286+
docker buildx build --platform linux/arm64 --push -t ${ARMIMG} .
287+
cd config/manager && $(KUSTOMIZE) edit set image controller=${ARMIMG}
288+
$(KUSTOMIZE) build config/default > examples/dist/state-machine-operator-arm.yaml
289+
279290
# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
280291
# $1 - target path with name of binary
281292
# $2 - package url which can be installed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ For each job script section, the following environment variables are provided fo
9090
- registry: the registry where your artifact will be pushed
9191
- pull_tag: the pull tag to use (if the workflow is pulling)
9292
- push_tag: the push tag to use (if the workflow is pushing)
93+
- properties:
94+
- node-selector: key value pair to be added as node selectors for the job (Kubernetes only). E.g., `node.kubernetes.io/instance-type: c7a.4xlarge`
9395

9496
Take a look at the simple example [examples/state-machine.yaml](examples/state-machine.yaml) to see how push/pull is defined between steps. Given that these are found (with a tag) your artifact will be named `<registry>:<jobid>:<tag>` to be moved between steps.
9597

api/v1alpha1/statemachine_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ type JobStep struct {
9696
// + optional
9797
AppConfig string `json:"appConfig,omitempty"`
9898

99+
// Properties for the tracker, etc.
100+
// +optional
101+
Properties map[string]string `json:"properties,omitempty"`
102+
99103
// Namespace is inherited from StateMachine Spec
100104
// This is the continer image (required)
101105
Image string `json:"image,omitempty"`

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 119 additions & 39 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/state-machine.converged-computing.org_statemachines.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ spec:
117117
name:
118118
description: Name is the name of the job (required)
119119
type: string
120+
properties:
121+
additionalProperties:
122+
type: string
123+
description: Properties for the tracker, etc.
124+
type: object
120125
registry:
121126
description: Configuration for the job registry
122127
properties:

examples/dist/state-machine-operator-dev.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,11 @@ spec:
125125
name:
126126
description: Name is the name of the job (required)
127127
type: string
128+
properties:
129+
additionalProperties:
130+
type: string
131+
description: Properties for the tracker, etc.
132+
type: object
128133
registry:
129134
description: Configuration for the job registry
130135
properties:

internal/controller/manager/jobs/templates/job.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ name: {{ .Job.Name }}
66
image: {{ .Job.Image }}
77
namespace: {{ if .StateMachine.Namespace }}{{ .StateMachine.Namespace }}{{ else }}default{{ end }}
88
workdir: {{ if .Job.Workdir }}{{ .Job.Workdir }}{{ else }}/tmp/out{{ end }}
9-
9+
{{ if .Job.Properties }}properties: {{range $key, $value := .Job.Properties }}
10+
{{ $key }}: {{ $value }}
11+
{{ end }}{{ end }}
1012

1113
{{ if .Job.HasRegistry }}
1214
registry:

python/state_machine_operator/schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
"script": {"type": "string"},
8282
"image": {"type": "string"},
8383
"registry": {"$ref": "#/definitions/registry"},
84+
"properties": {"type": ["object", "null"]},
8485
"workdir": {"type": "string", "default": "/tmp/out"},
8586
"additionalProperties": False,
8687
},

python/state_machine_operator/tracker/kubernetes/tracker.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,18 @@ def delete_configmap(self, name):
9090
except Exception as e:
9191
LOGGER.warning(f"Issue deleting configmap {name}: {e}")
9292

93+
def get_node_selector(self):
94+
"""
95+
Node selector is in properties -> node-selector
96+
"""
97+
# Properties can be provided as a string to json load
98+
props = self.job_desc.get("properties", {})
99+
if isinstance(props, str):
100+
props = json.loads(props)
101+
if not props:
102+
return props
103+
return props.get("node-selector")
104+
93105
def generate_batch_job(self, step, jobid):
94106
"""
95107
Generate the job CRD assuming the config map entrypoint.
@@ -186,6 +198,12 @@ def generate_batch_job(self, step, jobid):
186198
},
187199
}
188200

201+
# Add node selectors? E.g.,
202+
# node.kubernetes.io/instance-type: c7a.4xlarge
203+
node_selector = self.get_node_selector()
204+
if node_selector is not None:
205+
template["spec"]["nodeSelector"] = node_selector
206+
189207
# Only add walltime if it's > 0 and not None
190208
if walltime:
191209
template["spec"]["activeDeadlineSeconds"] = int(walltime)

0 commit comments

Comments
 (0)