Skip to content

Commit 476fd7d

Browse files
committed
bins: intial CPU binding support
add bindings for `processor_bind()`, the mechanism to make this happen. then, in propolis-standalone, allow a choice of how binding should be done: allow vCPUs to roam with `cpu_binding = "any"` or pin vCPUs to CPUs 1:1 from the last processor downwards with `cpu_binding = "from-last"`. in `propolis-server`, pick between these two strategies automatically based on how the vCPU count compares to the number of available processors. a VM that would use more than half of available processors is bound in the same way as `propolis-standalone` with `cpu_binding = "from-last"`, but otherwise continues to have unbound vCPUs. in the future, Nexus will dictate `propolis-server`'s vCPU bindings, but exactly how is a bit of an open question.
1 parent fa5c1a8 commit 476fd7d

File tree

10 files changed

+204
-2
lines changed

10 files changed

+204
-2
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ cpuid_profile_config = { path = "crates/cpuid-profile-config" }
5050
dladm = { path = "crates/dladm" }
5151
nvpair = { path = "crates/nvpair" }
5252
nvpair_sys = { path = "crates/nvpair/sys" }
53+
pbind = { path = "crates/pbind" }
5354
propolis-config-toml = { path = "crates/propolis-config-toml" }
5455
propolis_api_types = { path = "crates/propolis-api-types" }
5556
propolis-server-api = { path = "crates/propolis-server-api" }

bin/propolis-server/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ oxide-tokio-rt.workspace = true
4343
oximeter-instruments.workspace = true
4444
oximeter-producer.workspace = true
4545
oximeter.workspace = true
46+
pbind.workspace = true
4647
ron.workspace = true
4748
thiserror.workspace = true
4849
tokio = { workspace = true, features = ["full"] }

bin/propolis-server/src/lib/vcpu_tasks.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ use thiserror::Error;
2222
pub enum VcpuTaskError {
2323
#[error("Failed to spawn a vCPU backing thread: {0}")]
2424
BackingThreadSpawnFailed(std::io::Error),
25+
#[error("CPU bindings did not match vCPUs: {bindings} bindings for {vcpus} vCPUs")]
26+
CpuBindingMismatch { bindings: usize, vcpus: usize },
2527
}
2628

2729
pub struct VcpuTasks {
@@ -41,11 +43,30 @@ impl VcpuTasks {
4143
pub(crate) fn new(
4244
machine: &propolis::Machine,
4345
event_handler: Arc<dyn super::vm::guest_event::VcpuEventHandler>,
46+
bind_cpus: Option<Vec<pbind::processorid_t>>,
4447
log: slog::Logger,
4548
) -> Result<Self, VcpuTaskError> {
4649
let generation = Arc::new(AtomicUsize::new(0));
50+
51+
// We take in an `Option<Vec<..>>` but a `Vec<Option<..>>` is more
52+
// convenient for spawning below, so we have to shuffle values a bit..
53+
let mut bindings = vec![None; machine.vcpus.len()];
54+
if let Some(bind_cpus) = bind_cpus {
55+
if bind_cpus.len() != machine.vcpus.len() {
56+
return Err(VcpuTaskError::CpuBindingMismatch {
57+
bindings: bind_cpus.len(),
58+
vcpus: machine.vcpus.len(),
59+
});
60+
}
61+
for i in 0..machine.vcpus.len() {
62+
bindings[i] = Some(bind_cpus[i]);
63+
}
64+
}
65+
4766
let mut tasks = Vec::new();
48-
for vcpu in machine.vcpus.iter().map(Arc::clone) {
67+
for (vcpu, bind_cpu) in
68+
machine.vcpus.iter().map(Arc::clone).zip(bindings.into_iter())
69+
{
4970
let (task, ctrl) =
5071
propolis::tasks::TaskHdl::new_held(Some(vcpu.barrier_fn()));
5172
let task_log = log.new(slog::o!("vcpu" => vcpu.id));
@@ -54,6 +75,10 @@ impl VcpuTasks {
5475
let thread = std::thread::Builder::new()
5576
.name(format!("vcpu-{}", vcpu.id))
5677
.spawn(move || {
78+
if let Some(bind_cpu) = bind_cpu {
79+
pbind::bind_lwp(bind_cpu)
80+
.expect("can bind to specified CPU");
81+
}
5782
Self::vcpu_loop(
5883
vcpu.as_ref(),
5984
task,

bin/propolis-server/src/lib/vm/ensure.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,9 +581,28 @@ async fn initialize_vm_objects(
581581

582582
init.register_guest_hv_interface(guest_hv_lifecycle);
583583
init.initialize_cpus().await?;
584+
585+
let total_cpus = pbind::online_cpus()?;
586+
let vcpu_count: i32 = machine
587+
.vcpus
588+
.len()
589+
.try_into()
590+
.map_err(|_| anyhow::anyhow!("more than 2^31 vCPUs"))?;
591+
let bind_cpus = if vcpu_count > total_cpus / 2 {
592+
let mut bind_cpus = Vec::new();
593+
for i in 0..vcpu_count {
594+
// Bind to the upper range of CPUs, fairly arbitrary.
595+
bind_cpus.push(total_cpus - vcpu_count + i);
596+
}
597+
Some(bind_cpus)
598+
} else {
599+
None
600+
};
601+
584602
let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new(
585603
&machine,
586604
event_queue.clone() as Arc<dyn super::guest_event::VcpuEventHandler>,
605+
bind_cpus,
587606
log.new(slog::o!("component" => "vcpu_tasks")),
588607
)?);
589608

bin/propolis-standalone/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ slog-term.workspace = true
3939
strum = { workspace = true, features = ["derive"] }
4040
tar.workspace = true
4141
uuid.workspace = true
42+
pbind.workspace = true
4243

4344
[features]
4445
default = []

bin/propolis-standalone/src/config.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ pub struct Main {
5454
pub memory: usize,
5555
pub use_reservoir: Option<bool>,
5656
pub cpuid_profile: Option<String>,
57+
/// How vCPUs should be bound to physical processors, if at all. If not
58+
/// provided, vCPUs are not bound (equivalent to setting `any`).
59+
pub cpu_binding: Option<BindingStrategy>,
5760
/// Process exitcode to emit if/when instance halts
5861
///
5962
/// Default: 0
@@ -69,6 +72,16 @@ pub struct Main {
6972
pub boot_order: Option<Vec<String>>,
7073
}
7174

75+
#[derive(Copy, Clone, Debug, Deserialize, Serialize)]
76+
#[serde(rename_all = "kebab-case")]
77+
pub enum BindingStrategy {
78+
/// vCPUs are not bound to any particular physical processor.
79+
Any,
80+
/// vCPUs are bound to the highest-numbered processors in the system, one
81+
/// vCPU per CPU, with the last vCPU bound to the last physical processor.
82+
FromLast,
83+
}
84+
7285
/// A hard-coded device, either enabled by default or accessible locally
7386
/// on a machine.
7487
#[derive(Clone, Debug, Deserialize, Serialize)]

bin/propolis-standalone/src/main.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,29 @@ impl Instance {
286286
let state = &mut *state_guard;
287287
let machine = state.machine.as_ref().unwrap();
288288

289-
for vcpu in machine.vcpus.iter().map(Arc::clone) {
289+
let bind_cpus = match this.0.config.main.cpu_binding {
290+
Some(config::BindingStrategy::FromLast) => {
291+
let mut bind_cpus = vec![None; machine.vcpus.len()];
292+
let total_cpus =
293+
pbind::online_cpus().expect("can get processor count");
294+
let vcpu_count: i32 =
295+
machine.vcpus.len().try_into().expect("<2^31 vCPUs");
296+
297+
let first_bound_cpu = total_cpus - vcpu_count;
298+
for i in 0..vcpu_count {
299+
// Bind to the upper range of CPUs.
300+
bind_cpus[i as usize] = Some(first_bound_cpu + i);
301+
}
302+
bind_cpus
303+
}
304+
Some(config::BindingStrategy::Any) | None => {
305+
vec![None; machine.vcpus.len()]
306+
}
307+
};
308+
309+
for (vcpu, bind_cpu) in
310+
machine.vcpus.iter().map(Arc::clone).zip(bind_cpus.into_iter())
311+
{
290312
let (task, ctrl) =
291313
propolis::tasks::TaskHdl::new_held(Some(vcpu.barrier_fn()));
292314

@@ -295,6 +317,9 @@ impl Instance {
295317
let _ = std::thread::Builder::new()
296318
.name(format!("vcpu-{}", vcpu.id))
297319
.spawn(move || {
320+
if let Some(bind_cpu) = bind_cpu {
321+
pbind::bind_lwp(bind_cpu).expect("can bind vcpu");
322+
}
298323
Instance::vcpu_loop(inner, vcpu.as_ref(), &task, task_log)
299324
})
300325
.unwrap();

crates/pbind/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[package]
2+
name = "pbind"
3+
version = "0.0.0"
4+
license = "MPL-2.0"
5+
edition = "2021"
6+
7+
[dependencies]
8+
libc.workspace = true

crates/pbind/src/lib.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
// C-style type names follow, opt out of warnings for using names from headers.
6+
#![allow(non_camel_case_types)]
7+
8+
//! Utility functions for binding LWPs to specific CPUs.
9+
//!
10+
//! This is generally a very light wrapper for illumos' `sysconf(3c)` and
11+
//! `processor_bind(2)`, plus a few constants out of related headers.
12+
13+
use std::io::Error;
14+
15+
// From `<sys/types.h>`
16+
pub type id_t = i32;
17+
18+
// From `<sys/processor.h>`
19+
pub type processorid_t = i32;
20+
21+
// From `<sys/procset.h>`
22+
pub type idtype_t = i32;
23+
24+
/// The enum values `idtype_t` can be. This is separate to be more explicit that
25+
/// idtype_t is the ABI type, but is `repr(i32)` to make casting to `idtype_t`
26+
/// trivial.
27+
#[allow(non_camel_case_types)]
28+
#[repr(i32)]
29+
pub enum IdType {
30+
P_PID,
31+
P_PPID,
32+
P_PGID,
33+
P_SID,
34+
P_CID,
35+
P_UID,
36+
P_GID,
37+
P_ALL,
38+
P_LWPID,
39+
P_TASKID,
40+
P_PROJID,
41+
P_POOLID,
42+
P_ZONEID,
43+
P_CTID,
44+
P_CPUID,
45+
P_PSETID,
46+
}
47+
48+
// Returns an `i32` to match `processorid_t`, so that `0..online_cpus()`
49+
// produces a range of processor IDs without additional translation needed.
50+
pub fn online_cpus() -> Result<i32, Error> {
51+
let res = unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) };
52+
53+
if res == -1 {
54+
return Err(Error::last_os_error());
55+
}
56+
57+
res.try_into().map_err(|_| {
58+
// sysconf() reports more than 2^31 processors?!
59+
Error::other(format!("too many processors: {}", res))
60+
})
61+
}
62+
63+
#[cfg(target_os = "illumos")]
64+
/// Bind the current LWP to the specified processor.
65+
pub fn bind_lwp(bind_cpu: processorid_t) -> Result<(), Error> {
66+
extern "C" {
67+
fn processor_bind(
68+
idtype: idtype_t,
69+
id: id_t,
70+
processorid: processorid_t,
71+
obind: *mut processorid_t,
72+
) -> i32;
73+
}
74+
75+
// From `<sys/types.h>`.
76+
const P_MYID: id_t = -1;
77+
78+
let res = unsafe {
79+
processor_bind(
80+
IdType::P_LWPID as i32,
81+
P_MYID,
82+
bind_cpu,
83+
std::ptr::null_mut(),
84+
)
85+
};
86+
87+
if res != 0 {
88+
return Err(Error::last_os_error());
89+
}
90+
91+
Ok(())
92+
}
93+
94+
#[cfg(not(target_os = "illumos"))]
95+
/// On non-illumos targets, we're not actually running a VM. We do need the
96+
/// crate to compile to be nicer for blanket `cargo test` invocations on other
97+
/// platforms. So a no-op function will do.
98+
pub fn bind_lwp(_bind_cpu: processorid_t) -> Result<(), Error> {
99+
Ok(())
100+
}

0 commit comments

Comments
 (0)