Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions lib/propolis/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ ispf = { workspace = true, optional = true }
rand = { workspace = true, optional = true }
softnpu = { workspace = true, optional = true }
dlpi = { workspace = true, optional = true }
static_assertions = "1.1.0"

[dev-dependencies]
crossbeam-channel.workspace = true
Expand Down
5 changes: 4 additions & 1 deletion lib/propolis/src/block/attachment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ use thiserror::Error;
use tokio::sync::futures::Notified;
use tokio::sync::Notify;

/// Static for generating unique block [DeviceId]s with a process
/// Static for generating unique block [DeviceId]s within a process
///
/// Numbering across block devices means that a block `DeviceId` and the queue
/// ID in a block attachment are unique across a VM.
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);

pub const MAX_WORKERS: NonZeroUsize = NonZeroUsize::new(64).unwrap();
Expand Down
8 changes: 6 additions & 2 deletions lib/propolis/src/hw/nvme/admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use std::mem::size_of;

use crate::common::{GuestAddr, GuestRegion, PAGE_SIZE};
use crate::hw::nvme;
use crate::vmm::MemCtx;

use super::bits::*;
Expand All @@ -17,15 +18,16 @@ use super::{

#[usdt::provider(provider = "propolis")]
mod probes {
fn nvme_abort(cid: u16, sqid: u16) {}
fn nvme_abort(cid: u16, devsq_id: u64) {}
}

impl NvmeCtrl {
/// Abort command.
///
/// See NVMe 1.0e Section 5.1 Abort command
pub(super) fn acmd_abort(&self, cmd: &cmds::AbortCmd) -> cmds::Completion {
probes::nvme_abort!(|| (cmd.cid, cmd.sqid));
let devsq_id = nvme::devq_id(self.device_id, cmd.sqid);
probes::nvme_abort!(|| (cmd.cid, devsq_id));

// Verify the SQ in question currently exists
let sqid = cmd.sqid as usize;
Expand Down Expand Up @@ -74,6 +76,7 @@ impl NvmeCtrl {
match self.create_cq(
super::queue::CreateParams {
id: cmd.qid,
device_id: self.device_id,
base: GuestAddr(cmd.prp),
size: cmd.qsize,
},
Expand Down Expand Up @@ -119,6 +122,7 @@ impl NvmeCtrl {
match self.create_sq(
super::queue::CreateParams {
id: cmd.qid,
device_id: self.device_id,
base: GuestAddr(cmd.prp),
size: cmd.qsize,
},
Expand Down
64 changes: 57 additions & 7 deletions lib/propolis/src/hw/nvme/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use std::convert::TryInto;
use std::mem::size_of;
use std::num::NonZeroUsize;
use std::sync::atomic::AtomicU32;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, MutexGuard, Weak};

Expand All @@ -31,13 +32,33 @@ mod requests;
use bits::*;
use queue::{CompQueue, QueueId, SubQueue};

/// Static for generating unique NVMe device identifiers across a VM.
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);

type DeviceId = u32;

#[usdt::provider(provider = "propolis")]
mod probes {
fn nvme_doorbell(off: u64, qid: u16, is_cq: u8, val: u16) {}
fn nvme_doorbell(off: u64, devq_id: u64, is_cq: u8, val: u16) {}
fn nvme_doorbell_admin_cq(val: u16) {}
fn nvme_doorbell_admin_sq(val: u16) {}
fn nvme_admin_cmd(opcode: u8, prp1: u64, prp2: u64) {}
fn nvme_block_notify(sqid: u16, block_qid: u16, occupied_hint: u16) {}
fn nvme_block_notify(devsq_id: u64, block_devqid: u64, occupied_hint: u16) {
}
}

/// Combine an NVMe device and queue ID into a single u64 for probes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... to make a globally unique ID?

Copy link
Member Author

@iximeow iximeow Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't want to go quite that far: the NVMe controller can be reset, which will destroy queues, abandon any in-flight work, and probably have queues with the same ID re-created when the guest sets up the controller again. but I'll adjust this to say a bit more.

pub(crate) fn devq_id(dev: DeviceId, queue: QueueId) -> u64 {
// We'll use the low 16 bits for the queue ID. Assert at compile time that
// queue IDs cannot go above that. Clippy helpfully asserts this is an
// absurd comparison, so silence that. If you see a rustc error here you
// must have changed the type of QueueId such this is no longer absurd!
#[allow(clippy::absurd_extreme_comparisons)]
{
static_assertions::const_assert!(QueueId::MAX <= u16::MAX);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Clippy's defense u16::MAX <= u16::MAX is pretty absurd...

}

((dev as u64) << 16) | (queue as u64)
}

/// The max number of MSI-X interrupts we support
Expand Down Expand Up @@ -154,6 +175,12 @@ const MAX_NUM_IO_QUEUES: usize = MAX_NUM_QUEUES - 1;

/// NVMe Controller
struct NvmeCtrl {
/// A distinguishing identifier for this NVMe controller across the VM.
/// Useful mostly to distinguish queues and commands as seen in probes.
/// `device_id` is held constant across NVMe resets, but not persisted
/// across export and import.
device_id: DeviceId,

/// Internal NVMe Controller state
ctrl: CtrlState,

Expand Down Expand Up @@ -185,6 +212,7 @@ impl NvmeCtrl {
self.create_cq(
queue::CreateParams {
id: queue::ADMIN_QUEUE_ID,
device_id: self.device_id,
base: GuestAddr(self.ctrl.admin_cq_base),
// Convert from 0's based
size: u32::from(self.ctrl.aqa.acqs()) + 1,
Expand All @@ -196,6 +224,7 @@ impl NvmeCtrl {
self.create_sq(
queue::CreateParams {
id: queue::ADMIN_QUEUE_ID,
device_id: self.device_id,
base: GuestAddr(self.ctrl.admin_sq_base),
// Convert from 0's based
size: u32::from(self.ctrl.aqa.asqs()) + 1,
Expand Down Expand Up @@ -589,6 +618,7 @@ impl NvmeCtrl {
self.create_cq(
queue::CreateParams {
id: cqi.id,
device_id: self.device_id,
base: GuestAddr(cqi.base),
size: cqi.size,
},
Expand All @@ -611,6 +641,7 @@ impl NvmeCtrl {
.create_sq(
queue::CreateParams {
id: sqi.id,
device_id: self.device_id,
base: GuestAddr(sqi.base),
size: sqi.size,
},
Expand Down Expand Up @@ -742,6 +773,11 @@ pub struct PciNvme {
/// accesses without stacking them up behind one central lock.
is_enabled: AtomicBool,

/// Duplicate of the controller NVMe device ID, but not requiring locking
/// [NvmeCtrl] to read. This is used to provide additional context in
/// NVMe-related probes.
device_id: DeviceId,

/// Access to NVMe Submission and Completion queues.
///
/// These are protected with per-slot (queue ID) locks, so actions taken on
Expand Down Expand Up @@ -849,6 +885,7 @@ impl PciNvme {
let csts = Status(0);

let state = NvmeCtrl {
device_id: NEXT_DEVICE_ID.fetch_add(1, Ordering::Relaxed),
ctrl: CtrlState { cap, cc, csts, ..Default::default() },
doorbell_buf: None,
msix_hdl: None,
Expand Down Expand Up @@ -879,9 +916,13 @@ impl PciNvme {
}
}));

// Cache device ID before we move it into the Mutex below.
let device_id = state.device_id;

PciNvme {
state: Mutex::new(state),
is_enabled: AtomicBool::new(false),
device_id,
pci_state,
queues: NvmeQueues::default(),
block_attach,
Expand Down Expand Up @@ -1110,9 +1151,12 @@ impl PciNvme {
// 32-bit register but ignore reserved top 16-bits
let val = wo.read_u32() as u16;

// Mix in the device ID for probe purposes
let devq_id = devq_id(self.device_id, qid);

probes::nvme_doorbell!(|| (
off as u64,
qid,
devq_id,
u8::from(is_cq),
val
));
Expand Down Expand Up @@ -1188,9 +1232,12 @@ impl PciNvme {
})
{
let block_qid = queue::sqid_to_block_qid(sqid);
let devsq_id = devq_id(self.device_id, sqid);
let block_devqid =
block::devq_id(self.block_attach.device_id(), block_qid);
probes::nvme_block_notify!(|| (
sqid,
u16::from(block_qid),
devsq_id,
block_devqid,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bringing block_qid -> block_devqid along here because otherwise we'd know which NVMe device notified a block queue ... somewhere ... with no way to find out which block attachment we'd just poked.

num_occupied
));
self.block_attach.notify(
Expand All @@ -1207,14 +1254,17 @@ impl PciNvme {
let sq = guard.as_ref().unwrap();

let num_occupied = sq.notify_tail(val)?;
let devsq_id = sq.devq_id();
// Notification to block layer cannot be issued with SQ lock held
drop(guard);

assert_ne!(qid, queue::ADMIN_QUEUE_ID);
let block_qid = queue::sqid_to_block_qid(qid);
let block_devqid =
block::devq_id(self.block_attach.device_id(), block_qid);
probes::nvme_block_notify!(|| (
qid,
u16::from(block_qid),
devsq_id,
block_devqid,
num_occupied
));
self.block_attach
Expand Down
Loading
Loading