From: Klaus Jensen <k.jensen@samsung.com>
Prior to this patch the nvme-ns devices are always children of the
NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
unrealized when the parent device is removed. However, when subsystems
are involved, this is not what we want since the namespaces may be
attached to other controllers as well.
This patch adds an additional NvmeBus on the subsystem device. When
nvme-ns devices are realized, if the parent controller device is linked
to a subsystem, the parent bus is set to the subsystem one instead. This
makes sure that namespaces are kept alive and not unrealized.
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/nvme/nvme.h | 15 ++++++++-------
hw/nvme/ctrl.c | 14 ++++++--------
hw/nvme/ns.c | 18 ++++++++++++++++++
hw/nvme/subsys.c | 3 +++
4 files changed, 35 insertions(+), 15 deletions(-)
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index c4065467d877..83ffabade4cf 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -33,12 +33,20 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
typedef struct NvmeCtrl NvmeCtrl;
typedef struct NvmeNamespace NvmeNamespace;
+#define TYPE_NVME_BUS "nvme-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
+
+typedef struct NvmeBus {
+ BusState parent_bus;
+} NvmeBus;
+
#define TYPE_NVME_SUBSYS "nvme-subsys"
#define NVME_SUBSYS(obj) \
OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
typedef struct NvmeSubsystem {
DeviceState parent_obj;
+ NvmeBus bus;
uint8_t subnqn[256];
NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
@@ -365,13 +373,6 @@ typedef struct NvmeCQueue {
QTAILQ_HEAD(, NvmeRequest) req_list;
} NvmeCQueue;
-#define TYPE_NVME_BUS "nvme-bus"
-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
-
-typedef struct NvmeBus {
- BusState parent_bus;
-} NvmeBus;
-
#define TYPE_NVME "nvme"
#define NVME(obj) \
OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index ead7531bde5e..2f0524e12a36 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6527,16 +6527,14 @@ static void nvme_exit(PCIDevice *pci_dev)
nvme_ctrl_reset(n);
- for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- ns = nvme_ns(n, i);
- if (!ns) {
- continue;
+ if (n->subsys) {
+ for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ ns = nvme_ns(n, i);
+ if (ns) {
+ ns->attached--;
+ }
}
- nvme_ns_cleanup(ns);
- }
-
- if (n->subsys) {
nvme_subsys_unregister_ctrl(n->subsys, n);
}
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 3c4f5b8c714a..b7cf1494e75b 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -441,6 +441,15 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
}
}
+static void nvme_ns_unrealize(DeviceState *dev)
+{
+ NvmeNamespace *ns = NVME_NS(dev);
+
+ nvme_ns_drain(ns);
+ nvme_ns_shutdown(ns);
+ nvme_ns_cleanup(ns);
+}
+
static void nvme_ns_realize(DeviceState *dev, Error **errp)
{
NvmeNamespace *ns = NVME_NS(dev);
@@ -462,6 +471,14 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
"linked to an nvme-subsys device");
return;
}
+ } else {
+ /*
+ * If this namespace belongs to a subsystem (through a link on the
+ * controller device), reparent the device.
+ */
+ if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
+ return;
+ }
}
if (nvme_ns_setup(ns, errp)) {
@@ -552,6 +569,7 @@ static void nvme_ns_class_init(ObjectClass *oc, void *data)
dc->bus_type = TYPE_NVME_BUS;
dc->realize = nvme_ns_realize;
+ dc->unrealize = nvme_ns_unrealize;
device_class_set_props(dc, nvme_ns_props);
dc->desc = "Virtual NVMe namespace";
}
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 92caa604a280..93c35950d69d 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -50,6 +50,9 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
{
NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+ qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev,
+ dev->id);
+
nvme_subsys_setup(subsys);
}
--
2.32.0
On 7/26/21 9:18 PM, Klaus Jensen wrote: > From: Klaus Jensen <k.jensen@samsung.com> > > Prior to this patch the nvme-ns devices are always children of the > NvmeBus owned by the NvmeCtrl. This causes the namespaces to be > unrealized when the parent device is removed. However, when subsystems > are involved, this is not what we want since the namespaces may be > attached to other controllers as well. > > This patch adds an additional NvmeBus on the subsystem device. When > nvme-ns devices are realized, if the parent controller device is linked > to a subsystem, the parent bus is set to the subsystem one instead. This > makes sure that namespaces are kept alive and not unrealized. > > Reviewed-by: Hannes Reinecke <hare@suse.de> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com> > --- > hw/nvme/nvme.h | 15 ++++++++------- > hw/nvme/ctrl.c | 14 ++++++-------- > hw/nvme/ns.c | 18 ++++++++++++++++++ > hw/nvme/subsys.c | 3 +++ > 4 files changed, 35 insertions(+), 15 deletions(-) > Finally got around to test this; sadly, with mixed results. On the good side: controller hotplug works. Within qemu monitor I can do: device_del <devname> device_add <device description> and OS reports: [ 56.564447] pcieport 0000:00:09.0: pciehp: Slot(0-2): Attention button pressed [ 56.564460] pcieport 0000:00:09.0: pciehp: Slot(0-2): Powering off due to button press [ 104.293335] pcieport 0000:00:09.0: pciehp: Slot(0-2): Attention button pressed [ 104.293347] pcieport 0000:00:09.0: pciehp: Slot(0-2) Powering on due to button press [ 104.293540] pcieport 0000:00:09.0: pciehp: Slot(0-2): Card present [ 104.293544] pcieport 0000:00:09.0: pciehp: Slot(0-2): Link Up [ 104.428961] pci 0000:03:00.0: [1b36:0010] type 00 class 0x010802 [ 104.429298] pci 0000:03:00.0: reg 0x10: [mem 0x00000000-0x00003fff 64bit] [ 104.431442] pci 0000:03:00.0: BAR 0: assigned [mem 0xc1200000-0xc1203fff 64bit] [ 104.431580] pcieport 0000:00:09.0: PCI bridge to [bus 03] [ 104.431604] pcieport 0000:00:09.0: bridge window [io 0x7000-0x7fff] [ 104.434815] pcieport 0000:00:09.0: bridge window [mem 0xc1200000-0xc13fffff] [ 104.436685] pcieport 0000:00:09.0: bridge window [mem 0x804000000-0x805ffffff 64bit pref] [ 104.441896] nvme nvme2: pci function 0000:03:00.0 [ 104.442151] nvme 0000:03:00.0: enabling device (0000 -> 0002) [ 104.455821] nvme nvme2: 1/0/0 default/read/poll queues So that works. But: the namespace is not reconnected. # nvme list-ns /dev/nvme2 draws a blank. So guess some fixup patch is in order... Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
On Sep 9 09:02, Hannes Reinecke wrote:
> On 7/26/21 9:18 PM, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> >
> > Prior to this patch the nvme-ns devices are always children of the
> > NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
> > unrealized when the parent device is removed. However, when subsystems
> > are involved, this is not what we want since the namespaces may be
> > attached to other controllers as well.
> >
> > This patch adds an additional NvmeBus on the subsystem device. When
> > nvme-ns devices are realized, if the parent controller device is linked
> > to a subsystem, the parent bus is set to the subsystem one instead. This
> > makes sure that namespaces are kept alive and not unrealized.
> >
> > Reviewed-by: Hannes Reinecke <hare@suse.de>
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > ---
> > hw/nvme/nvme.h | 15 ++++++++-------
> > hw/nvme/ctrl.c | 14 ++++++--------
> > hw/nvme/ns.c | 18 ++++++++++++++++++
> > hw/nvme/subsys.c | 3 +++
> > 4 files changed, 35 insertions(+), 15 deletions(-)
> >
> Finally got around to test this; sadly, with mixed results.
> On the good side: controller hotplug works.
> Within qemu monitor I can do:
>
> device_del <devname>
> device_add <device description>
>
> and OS reports:
> [ 56.564447] pcieport 0000:00:09.0: pciehp: Slot(0-2): Attention button
> pressed
> [ 56.564460] pcieport 0000:00:09.0: pciehp: Slot(0-2): Powering off due to
> button press
> [ 104.293335] pcieport 0000:00:09.0: pciehp: Slot(0-2): Attention button
> pressed
> [ 104.293347] pcieport 0000:00:09.0: pciehp: Slot(0-2) Powering on due to
> button press
> [ 104.293540] pcieport 0000:00:09.0: pciehp: Slot(0-2): Card present
> [ 104.293544] pcieport 0000:00:09.0: pciehp: Slot(0-2): Link Up
> [ 104.428961] pci 0000:03:00.0: [1b36:0010] type 00 class 0x010802
> [ 104.429298] pci 0000:03:00.0: reg 0x10: [mem 0x00000000-0x00003fff 64bit]
> [ 104.431442] pci 0000:03:00.0: BAR 0: assigned [mem 0xc1200000-0xc1203fff
> 64bit]
> [ 104.431580] pcieport 0000:00:09.0: PCI bridge to [bus 03]
> [ 104.431604] pcieport 0000:00:09.0: bridge window [io 0x7000-0x7fff]
> [ 104.434815] pcieport 0000:00:09.0: bridge window [mem
> 0xc1200000-0xc13fffff]
> [ 104.436685] pcieport 0000:00:09.0: bridge window [mem
> 0x804000000-0x805ffffff 64bit pref]
> [ 104.441896] nvme nvme2: pci function 0000:03:00.0
> [ 104.442151] nvme 0000:03:00.0: enabling device (0000 -> 0002)
> [ 104.455821] nvme nvme2: 1/0/0 default/read/poll queues
>
> So that works.
> But: the namespace is not reconnected.
>
> # nvme list-ns /dev/nvme2
>
> draws a blank. So guess some fixup patch is in order...
>
Hi Hannes,
I see. Ater the device_del/device_add dance, the namespace is there, but it is
not automatically attached.
# nvme list-ns -a /dev/nvme0
[ 0]:0x2
# nvme attach-ns /dev/nvme0 -n 2 -c 0
attach-ns: Success, nsid:2
# nvme list-ns /dev/nvme0
[ 0]:0x2
I don't *think* the spec says that namespaces *must* be re-attached
automatically? But I would have to check... If it does say that, then this is a
bug of course.
On 9/9/21 9:59 AM, Klaus Jensen wrote: > On Sep 9 09:02, Hannes Reinecke wrote: >> On 7/26/21 9:18 PM, Klaus Jensen wrote: >>> From: Klaus Jensen <k.jensen@samsung.com> >>> >>> Prior to this patch the nvme-ns devices are always children of the >>> NvmeBus owned by the NvmeCtrl. This causes the namespaces to be >>> unrealized when the parent device is removed. However, when subsystems >>> are involved, this is not what we want since the namespaces may be >>> attached to other controllers as well. >>> >>> This patch adds an additional NvmeBus on the subsystem device. When >>> nvme-ns devices are realized, if the parent controller device is linked >>> to a subsystem, the parent bus is set to the subsystem one instead. This >>> makes sure that namespaces are kept alive and not unrealized. >>> >>> Reviewed-by: Hannes Reinecke <hare@suse.de> >>> Signed-off-by: Klaus Jensen <k.jensen@samsung.com> >>> --- >>> hw/nvme/nvme.h | 15 ++++++++------- >>> hw/nvme/ctrl.c | 14 ++++++-------- >>> hw/nvme/ns.c | 18 ++++++++++++++++++ >>> hw/nvme/subsys.c | 3 +++ >>> 4 files changed, 35 insertions(+), 15 deletions(-) >>> >> Finally got around to test this; sadly, with mixed results. >> On the good side: controller hotplug works. >> Within qemu monitor I can do: >> >> device_del <devname> >> device_add <device description> >> >> and OS reports: >> [ 56.564447] pcieport 0000:00:09.0: pciehp: Slot(0-2): Attention button >> pressed >> [ 56.564460] pcieport 0000:00:09.0: pciehp: Slot(0-2): Powering off due to >> button press >> [ 104.293335] pcieport 0000:00:09.0: pciehp: Slot(0-2): Attention button >> pressed >> [ 104.293347] pcieport 0000:00:09.0: pciehp: Slot(0-2) Powering on due to >> button press >> [ 104.293540] pcieport 0000:00:09.0: pciehp: Slot(0-2): Card present >> [ 104.293544] pcieport 0000:00:09.0: pciehp: Slot(0-2): Link Up >> [ 104.428961] pci 0000:03:00.0: [1b36:0010] type 00 class 0x010802 >> [ 104.429298] pci 0000:03:00.0: reg 0x10: [mem 0x00000000-0x00003fff 64bit] >> [ 104.431442] pci 0000:03:00.0: BAR 0: assigned [mem 0xc1200000-0xc1203fff >> 64bit] >> [ 104.431580] pcieport 0000:00:09.0: PCI bridge to [bus 03] >> [ 104.431604] pcieport 0000:00:09.0: bridge window [io 0x7000-0x7fff] >> [ 104.434815] pcieport 0000:00:09.0: bridge window [mem >> 0xc1200000-0xc13fffff] >> [ 104.436685] pcieport 0000:00:09.0: bridge window [mem >> 0x804000000-0x805ffffff 64bit pref] >> [ 104.441896] nvme nvme2: pci function 0000:03:00.0 >> [ 104.442151] nvme 0000:03:00.0: enabling device (0000 -> 0002) >> [ 104.455821] nvme nvme2: 1/0/0 default/read/poll queues >> >> So that works. >> But: the namespace is not reconnected. >> >> # nvme list-ns /dev/nvme2 >> >> draws a blank. So guess some fixup patch is in order... >> > > Hi Hannes, > > I see. Ater the device_del/device_add dance, the namespace is there, but it is > not automatically attached. > > # nvme list-ns -a /dev/nvme0 > [ 0]:0x2 > > # nvme attach-ns /dev/nvme0 -n 2 -c 0 > attach-ns: Success, nsid:2 > > # nvme list-ns /dev/nvme0 > [ 0]:0x2 > > > I don't *think* the spec says that namespaces *must* be re-attached > automatically? But I would have to check... If it does say that, then this is a > bug of course. > Errm. Yes, the namespaces must be present after a 'reset' (which a hotunplug/hotplug cycle amounts to here). As per spec the namespaces are a property of the _subsystem_, not the controller. And the controller attaches to a subsystem, so it'll see any namespaces which are present in the subsystem. (whether it needs to see _all_ namespaces from that subsystem is another story, but doesn't need to bother us here :-). Just send a patch for it; is actually quite trivial. Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
© 2016 - 2026 Red Hat, Inc.