Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/dist/state-machine-operator-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ spec:
control-plane: controller-manager
spec:
nodeSelector:
node.kubernetes.io/instance-type: m6a.4xlarge
node.kubernetes.io/instance-type: c7a.4xlarge
containers:
- args:
- --metrics-bind-address=:8443
Expand Down
10 changes: 6 additions & 4 deletions python/state_machine_operator/tracker/flux/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ def generate_flux_job(self, step, jobid):
if not os.path.exists(entrypoint):
utils.write_file(step.script, entrypoint)

# TODO expose more here?
# QUESTION: is a flux task == what we are using for procs?
exclusive = self.config.get("exclusive") in utils.true_values
num_tasks = max(1, self.config.get("tasks") or 1)
jobspec = flux.job.JobspecV1.from_command(
command=command,
# TODO add tasks here, right now assume == number of nodes
num_nodes=step.nodes,
num_tasks=step.nodes,
num_tasks=num_tasks,
cores_per_task=step.cores_per_task,
gpus_per_task=step.gpus,
exclusive=exclusive,
)

# Set user attribute we can later retrieve to identify group
Expand Down Expand Up @@ -147,6 +148,7 @@ def create_step(self, jobid):
gpus=self.ngpus,
workdir=workdir,
)

configfile = os.path.join(workdir, "app-config")

if "script" in self.job_desc:
Expand Down