Add hotplug support for nvme-ns devices on the NvmeBus. This enables
NVMe namespace-level hot-add and hot-remove via device_add and
device_del with proper Asynchronous Event Notification (AEN), so the
guest kernel can react to namespace topology changes.
Mark nvme-ns devices as hotpluggable and register the NvmeBus as a
hotplug handler with proper plug and unplug callbacks:
- plug: attach namespace to all started controllers and send an
Asynchronous Event Notification (AEN) with NS_ATTR_CHANGED so
the guest kernel rescans namespaces and adds the block device
- unplug: drain in-flight I/O, detach from all controllers, send
AEN, then unrealize the device. The guest kernel rescans and
removes the block device.
The plug handler skips controllers that haven't started yet
(qs_created == false) to avoid interfering with boot-time namespace
attachment in nvme_start_ctrl().
The unplug handler drains in-flight I/O via nvme_ns_drain() before
detaching the namespace from controllers, so pending requests can
complete normally without touching freed state.
For symmetry with nvme_ns_realize() which sets subsys->namespaces[nsid],
nvme_ns_unrealize() now clears that slot too — making the namespace
lifecycle complete.
Both the controller bus and subsystem bus are configured as hotplug
handlers via qbus_set_bus_hotplug_handler() since nvme-ns devices
may reparent to the subsystem bus during realize.
Example hot-swap sequence using the NVMe subsystem model:
# Boot with: -device nvme-subsys,id=subsys0
# -device nvme,id=ctrl0,subsys=subsys0
# -device nvme-ns,id=ns0,drive=drv0,bus=ctrl0,nsid=1
device_del ns0 # guest receives AEN, removes /dev/nvme0n1
drive_del drv0
drive_add 0 file=disk.qcow2,format=qcow2,id=drv0,if=none
device_add nvme-ns,id=ns0,drive=drv0,bus=ctrl0,nsid=1
# guest receives AEN, adds /dev/nvme0n1
Tested with Linux 6.1 guest (NVMe driver processes AEN and rescans
namespace list automatically).
Signed-off-by: Matthieu <matthieu@min.io>
---
hw/nvme/ctrl.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++
hw/nvme/ns.c | 8 +++++
hw/nvme/subsys.c | 2 ++
3 files changed, 98 insertions(+)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index be6c7028cb..2024b0ff75 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -206,6 +206,7 @@
#include "system/hostmem.h"
#include "hw/pci/msix.h"
#include "hw/pci/pcie_sriov.h"
+#include "hw/core/qdev.h"
#include "system/spdm-socket.h"
#include "migration/vmstate.h"
@@ -9293,6 +9294,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
}
qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
+ qbus_set_bus_hotplug_handler(BUS(&n->bus));
if (nvme_init_subsys(n, errp)) {
return;
@@ -9553,10 +9555,96 @@ static const TypeInfo nvme_info = {
},
};
+static void nvme_ns_hot_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ NvmeNamespace *ns = NVME_NS(dev);
+ NvmeSubsystem *subsys = ns->subsys;
+ uint32_t nsid = ns->params.nsid;
+ int i;
+
+ /*
+ * Attach to all started controllers and notify via AEN.
+ * Skip controllers that haven't started yet (boot-time realize) —
+ * nvme_start_ctrl() will attach namespaces during controller init.
+ */
+ for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
+ NvmeCtrl *ctrl = nvme_subsys_ctrl(subsys, i);
+ if (!ctrl || !ctrl->qs_created) {
+ continue;
+ }
+
+ if (nvme_csi_supported(ctrl, ns->csi) && !ns->params.detached) {
+ nvme_attach_ns(ctrl, ns);
+ nvme_update_dsm_limits(ctrl, ns);
+
+ if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
+ nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
+ NVME_LOG_CHANGED_NSLIST);
+ }
+ }
+ }
+}
+
+static void nvme_ns_hot_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ NvmeNamespace *ns = NVME_NS(dev);
+ NvmeSubsystem *subsys = ns->subsys;
+ uint32_t nsid = ns->params.nsid;
+ int i;
+
+ /*
+ * Drain in-flight I/O before tearing down the namespace.
+ * This must happen while the namespace is still attached to the
+ * controllers so any pending requests can complete normally.
+ */
+ nvme_ns_drain(ns);
+
+ /*
+ * Detach from all controllers and notify the guest via AEN.
+ * The guest kernel will rescan namespaces and remove the block device.
+ */
+ for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
+ NvmeCtrl *ctrl = nvme_subsys_ctrl(subsys, i);
+ if (!ctrl || !nvme_ns(ctrl, nsid)) {
+ continue;
+ }
+
+ nvme_detach_ns(ctrl, ns);
+ nvme_update_dsm_limits(ctrl, NULL);
+
+ if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
+ nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
+ NVME_LOG_CHANGED_NSLIST);
+ }
+ }
+
+ /*
+ * Unrealize: removes from subsystem (in nvme_ns_unrealize), flushes,
+ * cleans up structures, and removes from QOM.
+ */
+ qdev_unrealize(dev);
+}
+
+static void nvme_bus_class_init(ObjectClass *klass, const void *data)
+{
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
+ hc->plug = nvme_ns_hot_plug;
+ hc->unplug = nvme_ns_hot_unplug;
+}
+
static const TypeInfo nvme_bus_info = {
.name = TYPE_NVME_BUS,
.parent = TYPE_BUS,
.instance_size = sizeof(NvmeBus),
+ .class_init = nvme_bus_class_init,
+ .interfaces = (const InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { }
+ },
};
static void nvme_register_types(void)
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index b0106eaa5c..f4f755c6fc 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -719,10 +719,17 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
static void nvme_ns_unrealize(DeviceState *dev)
{
NvmeNamespace *ns = NVME_NS(dev);
+ NvmeSubsystem *subsys = ns->subsys;
+ uint32_t nsid = ns->params.nsid;
nvme_ns_drain(ns);
nvme_ns_shutdown(ns);
nvme_ns_cleanup(ns);
+
+ /* Symmetric with nvme_ns_realize() which sets subsys->namespaces[nsid]. */
+ if (subsys && nsid && subsys->namespaces[nsid] == ns) {
+ subsys->namespaces[nsid] = NULL;
+ }
}
void nvme_ns_atomic_configure_boundary(bool dn, uint16_t nabsn,
@@ -937,6 +944,7 @@ static void nvme_ns_class_init(ObjectClass *oc, const void *data)
dc->bus_type = TYPE_NVME_BUS;
dc->realize = nvme_ns_realize;
dc->unrealize = nvme_ns_unrealize;
+ dc->hotpluggable = true;
device_class_set_props(dc, nvme_ns_props);
dc->desc = "Virtual NVMe namespace";
}
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 777e1c620f..fa35055d3c 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -9,6 +9,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qapi/error.h"
+#include "hw/core/qdev.h"
#include "nvme.h"
@@ -205,6 +206,7 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
NvmeSubsystem *subsys = NVME_SUBSYS(dev);
qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
+ qbus_set_bus_hotplug_handler(BUS(&subsys->bus));
nvme_subsys_setup(subsys, errp);
}
--
2.53.0
Hello @Stefan Hajnoczi <stefanha@redhat.com> ,
Thanks for the review! v3 sent.
Wording: Fixed in v3, no more "physical PCIe slot" claim. Now describes it
as NVMe namespace-level hotplug.
I/O drain: Moved nvme_ns_drain() to the start of the unplug handler so
in-flight I/O completes before detach. Tested under warp load (16
concurrent 1MiB uploads via MinIO/DirectPV) device_del returns in ~400ms
with clean removal, no use-after-free.
Symmetry: moved subsys->namespaces[nsid] = NULL into nvme_ns_unrealize()
so the namespace lifecycle is complete (mirrors what nvme_ns_realize() sets
up).
I don't have a working Windows test setup, I'd really appreciate if you
could test it next week as you offered.
Thanks again for your time
On Fri, Apr 10, 2026 at 4:29 PM mr-083 <matthieu@minio.io> wrote:
> Add hotplug support for nvme-ns devices on the NvmeBus. This enables
> NVMe namespace-level hot-add and hot-remove via device_add and
> device_del with proper Asynchronous Event Notification (AEN), so the
> guest kernel can react to namespace topology changes.
>
> Mark nvme-ns devices as hotpluggable and register the NvmeBus as a
> hotplug handler with proper plug and unplug callbacks:
>
> - plug: attach namespace to all started controllers and send an
> Asynchronous Event Notification (AEN) with NS_ATTR_CHANGED so
> the guest kernel rescans namespaces and adds the block device
> - unplug: drain in-flight I/O, detach from all controllers, send
> AEN, then unrealize the device. The guest kernel rescans and
> removes the block device.
>
> The plug handler skips controllers that haven't started yet
> (qs_created == false) to avoid interfering with boot-time namespace
> attachment in nvme_start_ctrl().
>
> The unplug handler drains in-flight I/O via nvme_ns_drain() before
> detaching the namespace from controllers, so pending requests can
> complete normally without touching freed state.
>
> For symmetry with nvme_ns_realize() which sets subsys->namespaces[nsid],
> nvme_ns_unrealize() now clears that slot too — making the namespace
> lifecycle complete.
>
> Both the controller bus and subsystem bus are configured as hotplug
> handlers via qbus_set_bus_hotplug_handler() since nvme-ns devices
> may reparent to the subsystem bus during realize.
>
> Example hot-swap sequence using the NVMe subsystem model:
>
> # Boot with: -device nvme-subsys,id=subsys0
> # -device nvme,id=ctrl0,subsys=subsys0
> # -device nvme-ns,id=ns0,drive=drv0,bus=ctrl0,nsid=1
>
> device_del ns0 # guest receives AEN, removes /dev/nvme0n1
> drive_del drv0
> drive_add 0 file=disk.qcow2,format=qcow2,id=drv0,if=none
> device_add nvme-ns,id=ns0,drive=drv0,bus=ctrl0,nsid=1
> # guest receives AEN, adds /dev/nvme0n1
>
> Tested with Linux 6.1 guest (NVMe driver processes AEN and rescans
> namespace list automatically).
>
> Signed-off-by: Matthieu <matthieu@min.io>
> ---
> hw/nvme/ctrl.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++
> hw/nvme/ns.c | 8 +++++
> hw/nvme/subsys.c | 2 ++
> 3 files changed, 98 insertions(+)
>
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index be6c7028cb..2024b0ff75 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -206,6 +206,7 @@
> #include "system/hostmem.h"
> #include "hw/pci/msix.h"
> #include "hw/pci/pcie_sriov.h"
> +#include "hw/core/qdev.h"
> #include "system/spdm-socket.h"
> #include "migration/vmstate.h"
>
> @@ -9293,6 +9294,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error
> **errp)
> }
>
> qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
> + qbus_set_bus_hotplug_handler(BUS(&n->bus));
>
> if (nvme_init_subsys(n, errp)) {
> return;
> @@ -9553,10 +9555,96 @@ static const TypeInfo nvme_info = {
> },
> };
>
> +static void nvme_ns_hot_plug(HotplugHandler *hotplug_dev, DeviceState
> *dev,
> + Error **errp)
> +{
> + NvmeNamespace *ns = NVME_NS(dev);
> + NvmeSubsystem *subsys = ns->subsys;
> + uint32_t nsid = ns->params.nsid;
> + int i;
> +
> + /*
> + * Attach to all started controllers and notify via AEN.
> + * Skip controllers that haven't started yet (boot-time realize) —
> + * nvme_start_ctrl() will attach namespaces during controller init.
> + */
> + for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
> + NvmeCtrl *ctrl = nvme_subsys_ctrl(subsys, i);
> + if (!ctrl || !ctrl->qs_created) {
> + continue;
> + }
> +
> + if (nvme_csi_supported(ctrl, ns->csi) && !ns->params.detached) {
> + nvme_attach_ns(ctrl, ns);
> + nvme_update_dsm_limits(ctrl, ns);
> +
> + if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
> + nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
> + NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
> + NVME_LOG_CHANGED_NSLIST);
> + }
> + }
> + }
> +}
> +
> +static void nvme_ns_hot_unplug(HotplugHandler *hotplug_dev, DeviceState
> *dev,
> + Error **errp)
> +{
> + NvmeNamespace *ns = NVME_NS(dev);
> + NvmeSubsystem *subsys = ns->subsys;
> + uint32_t nsid = ns->params.nsid;
> + int i;
> +
> + /*
> + * Drain in-flight I/O before tearing down the namespace.
> + * This must happen while the namespace is still attached to the
> + * controllers so any pending requests can complete normally.
> + */
> + nvme_ns_drain(ns);
> +
> + /*
> + * Detach from all controllers and notify the guest via AEN.
> + * The guest kernel will rescan namespaces and remove the block
> device.
> + */
> + for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
> + NvmeCtrl *ctrl = nvme_subsys_ctrl(subsys, i);
> + if (!ctrl || !nvme_ns(ctrl, nsid)) {
> + continue;
> + }
> +
> + nvme_detach_ns(ctrl, ns);
> + nvme_update_dsm_limits(ctrl, NULL);
> +
> + if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
> + nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
> + NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
> + NVME_LOG_CHANGED_NSLIST);
> + }
> + }
> +
> + /*
> + * Unrealize: removes from subsystem (in nvme_ns_unrealize), flushes,
> + * cleans up structures, and removes from QOM.
> + */
> + qdev_unrealize(dev);
> +}
> +
> +static void nvme_bus_class_init(ObjectClass *klass, const void *data)
> +{
> + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
> + hc->plug = nvme_ns_hot_plug;
> + hc->unplug = nvme_ns_hot_unplug;
> +}
> +
> static const TypeInfo nvme_bus_info = {
> .name = TYPE_NVME_BUS,
> .parent = TYPE_BUS,
> .instance_size = sizeof(NvmeBus),
> + .class_init = nvme_bus_class_init,
> + .interfaces = (const InterfaceInfo[]) {
> + { TYPE_HOTPLUG_HANDLER },
> + { }
> + },
> };
>
> static void nvme_register_types(void)
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index b0106eaa5c..f4f755c6fc 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -719,10 +719,17 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
> static void nvme_ns_unrealize(DeviceState *dev)
> {
> NvmeNamespace *ns = NVME_NS(dev);
> + NvmeSubsystem *subsys = ns->subsys;
> + uint32_t nsid = ns->params.nsid;
>
> nvme_ns_drain(ns);
> nvme_ns_shutdown(ns);
> nvme_ns_cleanup(ns);
> +
> + /* Symmetric with nvme_ns_realize() which sets
> subsys->namespaces[nsid]. */
> + if (subsys && nsid && subsys->namespaces[nsid] == ns) {
> + subsys->namespaces[nsid] = NULL;
> + }
> }
>
> void nvme_ns_atomic_configure_boundary(bool dn, uint16_t nabsn,
> @@ -937,6 +944,7 @@ static void nvme_ns_class_init(ObjectClass *oc, const
> void *data)
> dc->bus_type = TYPE_NVME_BUS;
> dc->realize = nvme_ns_realize;
> dc->unrealize = nvme_ns_unrealize;
> + dc->hotpluggable = true;
> device_class_set_props(dc, nvme_ns_props);
> dc->desc = "Virtual NVMe namespace";
> }
> diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
> index 777e1c620f..fa35055d3c 100644
> --- a/hw/nvme/subsys.c
> +++ b/hw/nvme/subsys.c
> @@ -9,6 +9,7 @@
> #include "qemu/osdep.h"
> #include "qemu/units.h"
> #include "qapi/error.h"
> +#include "hw/core/qdev.h"
>
> #include "nvme.h"
>
> @@ -205,6 +206,7 @@ static void nvme_subsys_realize(DeviceState *dev,
> Error **errp)
> NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>
> qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
> + qbus_set_bus_hotplug_handler(BUS(&subsys->bus));
>
> nvme_subsys_setup(subsys, errp);
> }
> --
> 2.53.0
>
>
On Fri, Apr 10, 2026 at 04:33:47PM +0200, Matthieu Rolla wrote: > Hello @Stefan Hajnoczi <stefanha@redhat.com> , > > > Thanks for the review! v3 sent. > > > Wording: Fixed in v3, no more "physical PCIe slot" claim. Now describes it > as NVMe namespace-level hotplug. > > > I/O drain: Moved nvme_ns_drain() to the start of the unplug handler so > in-flight I/O completes before detach. Tested under warp load (16 > concurrent 1MiB uploads via MinIO/DirectPV) device_del returns in ~400ms > with clean removal, no use-after-free. > > > Symmetry: moved subsys->namespaces[nsid] = NULL into nvme_ns_unrealize() > so the namespace lifecycle is complete (mirrors what nvme_ns_realize() sets > up). > > > I don't have a working Windows test setup, I'd really appreciate if you > could test it next week as you offered. > > Thanks again for your time Awesome, thanks! I will give Windows Server a spin next week. I'm not an expert in hw/nvme/ but the patch looks good to me: Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
© 2016 - 2026 Red Hat, Inc.