From: Klaus Jensen <k.jensen@samsung.com>
Prior to this patch the nvme-ns devices are always children of the
NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
unrealized when the parent device is removed. However, when subsystems
are involved, this is not what we want since the namespaces may be
attached to other controllers as well.
This patch adds an additional NvmeBus on the subsystem device. When
nvme-ns devices are realized, if the parent controller device is linked
to a subsystem, the parent bus is set to the subsystem one instead. This
makes sure that namespaces are kept alive and not unrealized.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/nvme/nvme.h | 18 ++++++++++--------
hw/nvme/ctrl.c | 8 +++++---
hw/nvme/ns.c | 32 +++++++++++++++++++++++++-------
hw/nvme/subsys.c | 4 ++++
4 files changed, 44 insertions(+), 18 deletions(-)
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index c4065467d877..9401e212f9f7 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
typedef struct NvmeCtrl NvmeCtrl;
typedef struct NvmeNamespace NvmeNamespace;
+#define TYPE_NVME_BUS "nvme-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
+
+typedef struct NvmeBus {
+ BusState parent_bus;
+ bool is_subsys;
+} NvmeBus;
+
#define TYPE_NVME_SUBSYS "nvme-subsys"
#define NVME_SUBSYS(obj) \
OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
typedef struct NvmeSubsystem {
DeviceState parent_obj;
+ NvmeBus bus;
uint8_t subnqn[256];
NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
@@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
QTAILQ_HEAD(, NvmeRequest) req_list;
} NvmeCQueue;
-#define TYPE_NVME_BUS "nvme-bus"
-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
-
-typedef struct NvmeBus {
- BusState parent_bus;
-} NvmeBus;
-
#define TYPE_NVME "nvme"
#define NVME(obj) \
OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
@@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
{
- if (!nsid || nsid > NVME_MAX_NAMESPACES) {
+ if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
return NULL;
}
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 90e3ee2b70ee..7c8fca36d9a5 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
ns = nvme_ns(n, i);
- if (!ns) {
- continue;
+ if (ns) {
+ ns->attached--;
}
+ }
- nvme_ns_cleanup(ns);
+ if (n->subsys) {
+ nvme_subsys_unregister_ctrl(n->subsys, n);
}
if (n->subsys) {
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 3c4f5b8c714a..612a2786d75d 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
static void nvme_ns_realize(DeviceState *dev, Error **errp)
{
NvmeNamespace *ns = NVME_NS(dev);
- BusState *s = qdev_get_parent_bus(dev);
- NvmeCtrl *n = NVME(s->parent);
- NvmeSubsystem *subsys = n->subsys;
+ BusState *qbus = qdev_get_parent_bus(dev);
+ NvmeBus *bus = NVME_BUS(qbus);
+ NvmeCtrl *n = NULL;
+ NvmeSubsystem *subsys = NULL;
uint32_t nsid = ns->params.nsid;
int i;
- if (!n->subsys) {
+ if (bus->is_subsys) {
+ subsys = NVME_SUBSYS(qbus->parent);
+ } else {
+ n = NVME(qbus->parent);
+ subsys = n->subsys;
+ }
+
+ if (subsys) {
+ /*
+ * If this namespace belongs to a subsystem (through a link on the
+ * controller device), reparent the device.
+ */
+ if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
+ return;
+ }
+ } else {
if (ns->params.detached) {
error_setg(errp, "detached requires that the nvme device is "
"linked to an nvme-subsys device");
@@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
if (!nsid) {
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
+ if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
continue;
}
@@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
return;
}
} else {
- if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
+ if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
error_setg(errp, "namespace id '%d' already allocated", nsid);
return;
}
@@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
}
}
- nvme_attach_ns(n, ns);
+ if (n) {
+ nvme_attach_ns(n, ns);
+ }
}
static Property nvme_ns_props[] = {
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 92caa604a280..fb7c3a7c55fc 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
{
NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+ qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev,
+ dev->id);
+ subsys->bus.is_subsys = true;
+
nvme_subsys_setup(subsys);
}
--
2.32.0
On 7/6/21 11:33 AM, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
>
> Prior to this patch the nvme-ns devices are always children of the
> NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
> unrealized when the parent device is removed. However, when subsystems
> are involved, this is not what we want since the namespaces may be
> attached to other controllers as well.
>
> This patch adds an additional NvmeBus on the subsystem device. When
> nvme-ns devices are realized, if the parent controller device is linked
> to a subsystem, the parent bus is set to the subsystem one instead. This
> makes sure that namespaces are kept alive and not unrealized.
>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
> hw/nvme/nvme.h | 18 ++++++++++--------
> hw/nvme/ctrl.c | 8 +++++---
> hw/nvme/ns.c | 32 +++++++++++++++++++++++++-------
> hw/nvme/subsys.c | 4 ++++
> 4 files changed, 44 insertions(+), 18 deletions(-)
>
> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> index c4065467d877..9401e212f9f7 100644
> --- a/hw/nvme/nvme.h
> +++ b/hw/nvme/nvme.h
> @@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
> typedef struct NvmeCtrl NvmeCtrl;
> typedef struct NvmeNamespace NvmeNamespace;
>
> +#define TYPE_NVME_BUS "nvme-bus"
> +OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
> +
> +typedef struct NvmeBus {
> + BusState parent_bus;
> + bool is_subsys;
> +} NvmeBus;
> +
> #define TYPE_NVME_SUBSYS "nvme-subsys"
> #define NVME_SUBSYS(obj) \
> OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
>
> typedef struct NvmeSubsystem {
> DeviceState parent_obj;
> + NvmeBus bus;
> uint8_t subnqn[256];
>
> NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
> @@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
> QTAILQ_HEAD(, NvmeRequest) req_list;
> } NvmeCQueue;
>
> -#define TYPE_NVME_BUS "nvme-bus"
> -#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
> -
> -typedef struct NvmeBus {
> - BusState parent_bus;
> -} NvmeBus;
> -
> #define TYPE_NVME "nvme"
> #define NVME(obj) \
> OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
> @@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
>
> static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
> {
> - if (!nsid || nsid > NVME_MAX_NAMESPACES) {
> + if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
> return NULL;
> }
>
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 90e3ee2b70ee..7c8fca36d9a5 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
>
> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> ns = nvme_ns(n, i);
> - if (!ns) {
> - continue;
> + if (ns) {
> + ns->attached--;
> }
> + }
>
> - nvme_ns_cleanup(ns);
> + if (n->subsys) {
> + nvme_subsys_unregister_ctrl(n->subsys, n);
> }
>
> if (n->subsys) {
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index 3c4f5b8c714a..612a2786d75d 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
> static void nvme_ns_realize(DeviceState *dev, Error **errp)
> {
> NvmeNamespace *ns = NVME_NS(dev);
> - BusState *s = qdev_get_parent_bus(dev);
> - NvmeCtrl *n = NVME(s->parent);
> - NvmeSubsystem *subsys = n->subsys;
> + BusState *qbus = qdev_get_parent_bus(dev);
> + NvmeBus *bus = NVME_BUS(qbus);
> + NvmeCtrl *n = NULL;
> + NvmeSubsystem *subsys = NULL;
> uint32_t nsid = ns->params.nsid;
> int i;
>
> - if (!n->subsys) {
> + if (bus->is_subsys) {
> + subsys = NVME_SUBSYS(qbus->parent);
> + } else {
> + n = NVME(qbus->parent);
> + subsys = n->subsys;
> + }
> +
> + if (subsys) {
> + /*
> + * If this namespace belongs to a subsystem (through a link on the
> + * controller device), reparent the device.
> + */
> + if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
> + return;
> + }
> + } else {
> if (ns->params.detached) {
> error_setg(errp, "detached requires that the nvme device is "
> "linked to an nvme-subsys device");
> @@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>
> if (!nsid) {
> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> - if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
> + if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
> continue;
> }
>
> @@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
> return;
> }
> } else {
> - if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
> + if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
> error_setg(errp, "namespace id '%d' already allocated", nsid);
> return;
> }
> @@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
> }
> }
>
> - nvme_attach_ns(n, ns);
> + if (n) {
> + nvme_attach_ns(n, ns);
> + }
> }
>
> static Property nvme_ns_props[] = {
> diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
> index 92caa604a280..fb7c3a7c55fc 100644
> --- a/hw/nvme/subsys.c
> +++ b/hw/nvme/subsys.c
> @@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
> {
> NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>
> + qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev,
> + dev->id);
> + subsys->bus.is_subsys = true;
> +
> nvme_subsys_setup(subsys);
> }
>
>
Why not always create a subsystem, and distinguish between 'shared' and
'non-shared' subsystems instead of the ->subsys pointer?
That way all namespaces can be children of the subsystem, we won't need
any reparenting, and the whole thing will be more in-line with qdev, no?
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
On Jul 7 09:49, Hannes Reinecke wrote:
>On 7/6/21 11:33 AM, Klaus Jensen wrote:
>>From: Klaus Jensen <k.jensen@samsung.com>
>>
>>Prior to this patch the nvme-ns devices are always children of the
>>NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
>>unrealized when the parent device is removed. However, when subsystems
>>are involved, this is not what we want since the namespaces may be
>>attached to other controllers as well.
>>
>>This patch adds an additional NvmeBus on the subsystem device. When
>>nvme-ns devices are realized, if the parent controller device is linked
>>to a subsystem, the parent bus is set to the subsystem one instead. This
>>makes sure that namespaces are kept alive and not unrealized.
>>
>>Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>>---
>> hw/nvme/nvme.h | 18 ++++++++++--------
>> hw/nvme/ctrl.c | 8 +++++---
>> hw/nvme/ns.c | 32 +++++++++++++++++++++++++-------
>> hw/nvme/subsys.c | 4 ++++
>> 4 files changed, 44 insertions(+), 18 deletions(-)
>>
>>diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>>index c4065467d877..9401e212f9f7 100644
>>--- a/hw/nvme/nvme.h
>>+++ b/hw/nvme/nvme.h
>>@@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
>> typedef struct NvmeCtrl NvmeCtrl;
>> typedef struct NvmeNamespace NvmeNamespace;
>>+#define TYPE_NVME_BUS "nvme-bus"
>>+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
>>+
>>+typedef struct NvmeBus {
>>+ BusState parent_bus;
>>+ bool is_subsys;
>>+} NvmeBus;
>>+
>> #define TYPE_NVME_SUBSYS "nvme-subsys"
>> #define NVME_SUBSYS(obj) \
>> OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
>> typedef struct NvmeSubsystem {
>> DeviceState parent_obj;
>>+ NvmeBus bus;
>> uint8_t subnqn[256];
>> NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
>>@@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
>> QTAILQ_HEAD(, NvmeRequest) req_list;
>> } NvmeCQueue;
>>-#define TYPE_NVME_BUS "nvme-bus"
>>-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
>>-
>>-typedef struct NvmeBus {
>>- BusState parent_bus;
>>-} NvmeBus;
>>-
>> #define TYPE_NVME "nvme"
>> #define NVME(obj) \
>> OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
>>@@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
>> static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
>> {
>>- if (!nsid || nsid > NVME_MAX_NAMESPACES) {
>>+ if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
>> return NULL;
>> }
>>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>>index 90e3ee2b70ee..7c8fca36d9a5 100644
>>--- a/hw/nvme/ctrl.c
>>+++ b/hw/nvme/ctrl.c
>>@@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>> ns = nvme_ns(n, i);
>>- if (!ns) {
>>- continue;
>>+ if (ns) {
>>+ ns->attached--;
>> }
>>+ }
>>- nvme_ns_cleanup(ns);
>>+ if (n->subsys) {
>>+ nvme_subsys_unregister_ctrl(n->subsys, n);
>> }
>> if (n->subsys) {
>>diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
>>index 3c4f5b8c714a..612a2786d75d 100644
>>--- a/hw/nvme/ns.c
>>+++ b/hw/nvme/ns.c
>>@@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
>> static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> {
>> NvmeNamespace *ns = NVME_NS(dev);
>>- BusState *s = qdev_get_parent_bus(dev);
>>- NvmeCtrl *n = NVME(s->parent);
>>- NvmeSubsystem *subsys = n->subsys;
>>+ BusState *qbus = qdev_get_parent_bus(dev);
>>+ NvmeBus *bus = NVME_BUS(qbus);
>>+ NvmeCtrl *n = NULL;
>>+ NvmeSubsystem *subsys = NULL;
>> uint32_t nsid = ns->params.nsid;
>> int i;
>>- if (!n->subsys) {
>>+ if (bus->is_subsys) {
>>+ subsys = NVME_SUBSYS(qbus->parent);
>>+ } else {
>>+ n = NVME(qbus->parent);
>>+ subsys = n->subsys;
>>+ }
>>+
>>+ if (subsys) {
>>+ /*
>>+ * If this namespace belongs to a subsystem (through a link on the
>>+ * controller device), reparent the device.
>>+ */
>>+ if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
>>+ return;
>>+ }
>>+ } else {
>> if (ns->params.detached) {
>> error_setg(errp, "detached requires that the nvme device is "
>> "linked to an nvme-subsys device");
>>@@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> if (!nsid) {
>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>- if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
>>+ if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
>> continue;
>> }
>>@@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> return;
>> }
>> } else {
>>- if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
>>+ if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
>> error_setg(errp, "namespace id '%d' already allocated", nsid);
>> return;
>> }
>>@@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>> }
>> }
>>- nvme_attach_ns(n, ns);
>>+ if (n) {
>>+ nvme_attach_ns(n, ns);
>>+ }
>> }
>> static Property nvme_ns_props[] = {
>>diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
>>index 92caa604a280..fb7c3a7c55fc 100644
>>--- a/hw/nvme/subsys.c
>>+++ b/hw/nvme/subsys.c
>>@@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
>> {
>> NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>>+ qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev,
>>+ dev->id);
>>+ subsys->bus.is_subsys = true;
>>+
>> nvme_subsys_setup(subsys);
>> }
>>
>Why not always create a subsystem, and distinguish between 'shared'
>and 'non-shared' subsystems instead of the ->subsys pointer?
>That way all namespaces can be children of the subsystem, we won't
>need any reparenting, and the whole thing will be more in-line with
>qdev, no?
>
Hi Hannes,
Thanks for your reviews and comments!
So, I have actually considered what you suggest.
The problem is that the nvme-ns device currently expects to plug into a
bus (an 'nvme-bus') and we cannot really get rid of that 'bus' parameter
without breaking compatibility. I experimented with removing the bus
from the nvme device and instead creating it in the nvme-subsys device.
My idea here was to implicitly always create an nvme-subsys if not
explicitly created by the user, but since nvme-subsys does not plug into
any bus itself, it is not exposed in the qtree and thus not reachable
(i.e., when realizing the nvme-ns device, it will complain that there
are no 'nvme-bus' available to plug into). To make this work, I believe
we'd have to have the nvme-subsys plug into the main system bus (or some
other bus rooted at main-system-bus), and I'd prefer not to do that
since this is already a flawed design and I think it would be more
intrusive than what my patch does.
I raised these issues on the mailinglist some time ago[1]. We didn't
really find a good solution, but I think the conclusion is that the
bus/device design of subsystems and namespaces is flawed. That's why I
am working on an "objectification" of the two devices as suggested by
Stefan[2] as a proper fix for this mess.
[1]: https://lore.kernel.org/qemu-devel/YJrKRsF4%2F38QheKn@apples.localdomain/T/#u
[2]: https://lore.kernel.org/qemu-devel/YKI9zh2AqX35S8wd@stefanha-x1.localdomain/
On 7/7/21 11:53 AM, Klaus Jensen wrote:
> On Jul 7 09:49, Hannes Reinecke wrote:
>> On 7/6/21 11:33 AM, Klaus Jensen wrote:
>>> From: Klaus Jensen <k.jensen@samsung.com>
>>>
>>> Prior to this patch the nvme-ns devices are always children of the
>>> NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
>>> unrealized when the parent device is removed. However, when subsystems
>>> are involved, this is not what we want since the namespaces may be
>>> attached to other controllers as well.
>>>
>>> This patch adds an additional NvmeBus on the subsystem device. When
>>> nvme-ns devices are realized, if the parent controller device is linked
>>> to a subsystem, the parent bus is set to the subsystem one instead. This
>>> makes sure that namespaces are kept alive and not unrealized.
>>>
>>> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>>> ---
>>> hw/nvme/nvme.h | 18 ++++++++++--------
>>> hw/nvme/ctrl.c | 8 +++++---
>>> hw/nvme/ns.c | 32 +++++++++++++++++++++++++-------
>>> hw/nvme/subsys.c | 4 ++++
>>> 4 files changed, 44 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>>> index c4065467d877..9401e212f9f7 100644
>>> --- a/hw/nvme/nvme.h
>>> +++ b/hw/nvme/nvme.h
>>> @@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES >
>>> NVME_NSID_BROADCAST - 1);
>>> typedef struct NvmeCtrl NvmeCtrl;
>>> typedef struct NvmeNamespace NvmeNamespace;
>>> +#define TYPE_NVME_BUS "nvme-bus"
>>> +OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
>>> +
>>> +typedef struct NvmeBus {
>>> + BusState parent_bus;
>>> + bool is_subsys;
>>> +} NvmeBus;
>>> +
>>> #define TYPE_NVME_SUBSYS "nvme-subsys"
>>> #define NVME_SUBSYS(obj) \
>>> OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
>>> typedef struct NvmeSubsystem {
>>> DeviceState parent_obj;
>>> + NvmeBus bus;
>>> uint8_t subnqn[256];
>>> NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
>>> @@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
>>> QTAILQ_HEAD(, NvmeRequest) req_list;
>>> } NvmeCQueue;
>>> -#define TYPE_NVME_BUS "nvme-bus"
>>> -#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
>>> -
>>> -typedef struct NvmeBus {
>>> - BusState parent_bus;
>>> -} NvmeBus;
>>> -
>>> #define TYPE_NVME "nvme"
>>> #define NVME(obj) \
>>> OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
>>> @@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
>>> static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
>>> {
>>> - if (!nsid || nsid > NVME_MAX_NAMESPACES) {
>>> + if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
>>> return NULL;
>>> }
>>> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>>> index 90e3ee2b70ee..7c8fca36d9a5 100644
>>> --- a/hw/nvme/ctrl.c
>>> +++ b/hw/nvme/ctrl.c
>>> @@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
>>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>> ns = nvme_ns(n, i);
>>> - if (!ns) {
>>> - continue;
>>> + if (ns) {
>>> + ns->attached--;
>>> }
>>> + }
>>> - nvme_ns_cleanup(ns);
>>> + if (n->subsys) {
>>> + nvme_subsys_unregister_ctrl(n->subsys, n);
>>> }
>>> if (n->subsys) {
>>> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
>>> index 3c4f5b8c714a..612a2786d75d 100644
>>> --- a/hw/nvme/ns.c
>>> +++ b/hw/nvme/ns.c
>>> @@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
>>> static void nvme_ns_realize(DeviceState *dev, Error **errp)
>>> {
>>> NvmeNamespace *ns = NVME_NS(dev);
>>> - BusState *s = qdev_get_parent_bus(dev);
>>> - NvmeCtrl *n = NVME(s->parent);
>>> - NvmeSubsystem *subsys = n->subsys;
>>> + BusState *qbus = qdev_get_parent_bus(dev);
>>> + NvmeBus *bus = NVME_BUS(qbus);
>>> + NvmeCtrl *n = NULL;
>>> + NvmeSubsystem *subsys = NULL;
>>> uint32_t nsid = ns->params.nsid;
>>> int i;
>>> - if (!n->subsys) {
>>> + if (bus->is_subsys) {
>>> + subsys = NVME_SUBSYS(qbus->parent);
>>> + } else {
>>> + n = NVME(qbus->parent);
>>> + subsys = n->subsys;
>>> + }
>>> +
>>> + if (subsys) {
>>> + /*
>>> + * If this namespace belongs to a subsystem (through a link
>>> on the
>>> + * controller device), reparent the device.
>>> + */
>>> + if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
>>> + return;
>>> + }
>>> + } else {
>>> if (ns->params.detached) {
>>> error_setg(errp, "detached requires that the nvme device
>>> is "
>>> "linked to an nvme-subsys device");
>>> @@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState *dev,
>>> Error **errp)
>>> if (!nsid) {
>>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>> - if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
>>> + if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
>>> continue;
>>> }
>>> @@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState *dev,
>>> Error **errp)
>>> return;
>>> }
>>> } else {
>>> - if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
>>> + if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
>>> error_setg(errp, "namespace id '%d' already allocated",
>>> nsid);
>>> return;
>>> }
>>> @@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState *dev,
>>> Error **errp)
>>> }
>>> }
>>> - nvme_attach_ns(n, ns);
>>> + if (n) {
>>> + nvme_attach_ns(n, ns);
>>> + }
>>> }
>>> static Property nvme_ns_props[] = {
>>> diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
>>> index 92caa604a280..fb7c3a7c55fc 100644
>>> --- a/hw/nvme/subsys.c
>>> +++ b/hw/nvme/subsys.c
>>> @@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState *dev,
>>> Error **errp)
>>> {
>>> NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>>> + qbus_create_inplace(&subsys->bus, sizeof(NvmeBus),
>>> TYPE_NVME_BUS, dev,
>>> + dev->id);
>>> + subsys->bus.is_subsys = true;
>>> +
>>> nvme_subsys_setup(subsys);
>>> }
>>>
>> Why not always create a subsystem, and distinguish between 'shared'
>> and 'non-shared' subsystems instead of the ->subsys pointer?
>> That way all namespaces can be children of the subsystem, we won't
>> need any reparenting, and the whole thing will be more in-line with
>> qdev, no?
>>
>
> Hi Hannes,
>
> Thanks for your reviews and comments!
>
> So, I have actually considered what you suggest.
>
> The problem is that the nvme-ns device currently expects to plug into a
> bus (an 'nvme-bus') and we cannot really get rid of that 'bus' parameter
> without breaking compatibility. I experimented with removing the bus
> from the nvme device and instead creating it in the nvme-subsys device.
> My idea here was to implicitly always create an nvme-subsys if not
> explicitly created by the user, but since nvme-subsys does not plug into
> any bus itself, it is not exposed in the qtree and thus not reachable
> (i.e., when realizing the nvme-ns device, it will complain that there
> are no 'nvme-bus' available to plug into). To make this work, I believe
> we'd have to have the nvme-subsys plug into the main system bus (or some
> other bus rooted at main-system-bus), and I'd prefer not to do that
> since this is already a flawed design and I think it would be more
> intrusive than what my patch does.
>
Sigh.
_Again_.
I seem to trip over issues for which no patch can possibly be accepted
as first the infrastructure has to be reworked.
... there is a reason why I'm avoiding posting patches to qemu-devel :-(
> I raised these issues on the mailinglist some time ago[1]. We didn't
> really find a good solution, but I think the conclusion is that the
> bus/device design of subsystems and namespaces is flawed. That's why I
> am working on an "objectification" of the two devices as suggested by
> Stefan[2] as a proper fix for this mess.
>
Actually, I _do_ think that it would be good to have an nvme-subsystem
bus, providing a list of all namespaces for that subsystem.
Creating a controller would then mean that one has to create a
controller and a namespace _separately_, as then the namespace would
_not_ be a child of the controller.
But that's arguably a good thing, as the namespaces _do_ have a separate
lifetime from controllers.
And the lifetime of the subsystem could be completely self-contained;
the controller would be looking up subsystems via the subsystem NQN if
present; I guess we'll need to create ad-hoc subsystems for PCIe.
But nothing insurmountable.
And certainly nothing which require a large-scale rework of qemu
internals, I would think.
But _if_ you work on the rework, please ensure that not only NVMe is
represented here. SCSI multipathing has the same issue currently; there
is a strict host->lun->block->OS device relationship currently in SCSI,
making it impossible to represent a multipathed SCSI device in qemu.
The only sneaky way is to allow for a shared OS device (ie specifying
the filename more than once), but that completely sidesteps qemu
internals, too.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
On Jul 7 12:43, Hannes Reinecke wrote:
>On 7/7/21 11:53 AM, Klaus Jensen wrote:
>>On Jul 7 09:49, Hannes Reinecke wrote:
>>>On 7/6/21 11:33 AM, Klaus Jensen wrote:
>>>>From: Klaus Jensen <k.jensen@samsung.com>
>>>>
>>>>Prior to this patch the nvme-ns devices are always children of the
>>>>NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
>>>>unrealized when the parent device is removed. However, when subsystems
>>>>are involved, this is not what we want since the namespaces may be
>>>>attached to other controllers as well.
>>>>
>>>>This patch adds an additional NvmeBus on the subsystem device. When
>>>>nvme-ns devices are realized, if the parent controller device is linked
>>>>to a subsystem, the parent bus is set to the subsystem one instead. This
>>>>makes sure that namespaces are kept alive and not unrealized.
>>>>
>>>>Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>>>>---
>>>> hw/nvme/nvme.h | 18 ++++++++++--------
>>>> hw/nvme/ctrl.c | 8 +++++---
>>>> hw/nvme/ns.c | 32 +++++++++++++++++++++++++-------
>>>> hw/nvme/subsys.c | 4 ++++
>>>> 4 files changed, 44 insertions(+), 18 deletions(-)
>>>>
>>>>diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>>>>index c4065467d877..9401e212f9f7 100644
>>>>--- a/hw/nvme/nvme.h
>>>>+++ b/hw/nvme/nvme.h
>>>>@@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES >
>>>>NVME_NSID_BROADCAST - 1);
>>>> typedef struct NvmeCtrl NvmeCtrl;
>>>> typedef struct NvmeNamespace NvmeNamespace;
>>>>+#define TYPE_NVME_BUS "nvme-bus"
>>>>+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
>>>>+
>>>>+typedef struct NvmeBus {
>>>>+ BusState parent_bus;
>>>>+ bool is_subsys;
>>>>+} NvmeBus;
>>>>+
>>>> #define TYPE_NVME_SUBSYS "nvme-subsys"
>>>> #define NVME_SUBSYS(obj) \
>>>> OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
>>>> typedef struct NvmeSubsystem {
>>>> DeviceState parent_obj;
>>>>+ NvmeBus bus;
>>>> uint8_t subnqn[256];
>>>> NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
>>>>@@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
>>>> QTAILQ_HEAD(, NvmeRequest) req_list;
>>>> } NvmeCQueue;
>>>>-#define TYPE_NVME_BUS "nvme-bus"
>>>>-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
>>>>-
>>>>-typedef struct NvmeBus {
>>>>- BusState parent_bus;
>>>>-} NvmeBus;
>>>>-
>>>> #define TYPE_NVME "nvme"
>>>> #define NVME(obj) \
>>>> OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
>>>>@@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
>>>> static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
>>>> {
>>>>- if (!nsid || nsid > NVME_MAX_NAMESPACES) {
>>>>+ if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
>>>> return NULL;
>>>> }
>>>>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>>>>index 90e3ee2b70ee..7c8fca36d9a5 100644
>>>>--- a/hw/nvme/ctrl.c
>>>>+++ b/hw/nvme/ctrl.c
>>>>@@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
>>>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>>> ns = nvme_ns(n, i);
>>>>- if (!ns) {
>>>>- continue;
>>>>+ if (ns) {
>>>>+ ns->attached--;
>>>> }
>>>>+ }
>>>>- nvme_ns_cleanup(ns);
>>>>+ if (n->subsys) {
>>>>+ nvme_subsys_unregister_ctrl(n->subsys, n);
>>>> }
>>>> if (n->subsys) {
>>>>diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
>>>>index 3c4f5b8c714a..612a2786d75d 100644
>>>>--- a/hw/nvme/ns.c
>>>>+++ b/hw/nvme/ns.c
>>>>@@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
>>>> static void nvme_ns_realize(DeviceState *dev, Error **errp)
>>>> {
>>>> NvmeNamespace *ns = NVME_NS(dev);
>>>>- BusState *s = qdev_get_parent_bus(dev);
>>>>- NvmeCtrl *n = NVME(s->parent);
>>>>- NvmeSubsystem *subsys = n->subsys;
>>>>+ BusState *qbus = qdev_get_parent_bus(dev);
>>>>+ NvmeBus *bus = NVME_BUS(qbus);
>>>>+ NvmeCtrl *n = NULL;
>>>>+ NvmeSubsystem *subsys = NULL;
>>>> uint32_t nsid = ns->params.nsid;
>>>> int i;
>>>>- if (!n->subsys) {
>>>>+ if (bus->is_subsys) {
>>>>+ subsys = NVME_SUBSYS(qbus->parent);
>>>>+ } else {
>>>>+ n = NVME(qbus->parent);
>>>>+ subsys = n->subsys;
>>>>+ }
>>>>+
>>>>+ if (subsys) {
>>>>+ /*
>>>>+ * If this namespace belongs to a subsystem (through a
>>>>link on the
>>>>+ * controller device), reparent the device.
>>>>+ */
>>>>+ if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
>>>>+ return;
>>>>+ }
>>>>+ } else {
>>>> if (ns->params.detached) {
>>>> error_setg(errp, "detached requires that the nvme
>>>>device is "
>>>> "linked to an nvme-subsys device");
>>>>@@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState
>>>>*dev, Error **errp)
>>>> if (!nsid) {
>>>> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>>>>- if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
>>>>+ if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
>>>> continue;
>>>> }
>>>>@@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState
>>>>*dev, Error **errp)
>>>> return;
>>>> }
>>>> } else {
>>>>- if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
>>>>+ if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
>>>> error_setg(errp, "namespace id '%d' already
>>>>allocated", nsid);
>>>> return;
>>>> }
>>>>@@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState
>>>>*dev, Error **errp)
>>>> }
>>>> }
>>>>- nvme_attach_ns(n, ns);
>>>>+ if (n) {
>>>>+ nvme_attach_ns(n, ns);
>>>>+ }
>>>> }
>>>> static Property nvme_ns_props[] = {
>>>>diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
>>>>index 92caa604a280..fb7c3a7c55fc 100644
>>>>--- a/hw/nvme/subsys.c
>>>>+++ b/hw/nvme/subsys.c
>>>>@@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState
>>>>*dev, Error **errp)
>>>> {
>>>> NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>>>>+ qbus_create_inplace(&subsys->bus, sizeof(NvmeBus),
>>>>TYPE_NVME_BUS, dev,
>>>>+ dev->id);
>>>>+ subsys->bus.is_subsys = true;
>>>>+
>>>> nvme_subsys_setup(subsys);
>>>> }
>>>>
>>>Why not always create a subsystem, and distinguish between
>>>'shared' and 'non-shared' subsystems instead of the ->subsys
>>>pointer?
>>>That way all namespaces can be children of the subsystem, we won't
>>>need any reparenting, and the whole thing will be more in-line
>>>with qdev, no?
>>>
>>
>>Hi Hannes,
>>
>>Thanks for your reviews and comments!
>>
>>So, I have actually considered what you suggest.
>>
>>The problem is that the nvme-ns device currently expects to plug
>>into a bus (an 'nvme-bus') and we cannot really get rid of that
>>'bus' parameter without breaking compatibility. I experimented with
>>removing the bus from the nvme device and instead creating it in the
>>nvme-subsys device. My idea here was to implicitly always create an
>>nvme-subsys if not explicitly created by the user, but since
>>nvme-subsys does not plug into any bus itself, it is not exposed in
>>the qtree and thus not reachable (i.e., when realizing the nvme-ns
>>device, it will complain that there are no 'nvme-bus' available to
>>plug into). To make this work, I believe we'd have to have the
>>nvme-subsys plug into the main system bus (or some other bus rooted
>>at main-system-bus), and I'd prefer not to do that since this is
>>already a flawed design and I think it would be more intrusive than
>>what my patch does.
>>
>Sigh.
>_Again_.
>
>I seem to trip over issues for which no patch can possibly be accepted
>as first the infrastructure has to be reworked.
>
>... there is a reason why I'm avoiding posting patches to qemu-devel :-(
>
Actually, I'm the one to blame for making nvme-ns a -device and partly
for why nvme-subsys is one as well... It's made sense at the time and
thats why the infrastructure is biting me in the behind now ;)
>>I raised these issues on the mailinglist some time ago[1]. We didn't
>>really find a good solution, but I think the conclusion is that the
>>bus/device design of subsystems and namespaces is flawed. That's why
>>I am working on an "objectification" of the two devices as suggested
>>by Stefan[2] as a proper fix for this mess.
>>
>Actually, I _do_ think that it would be good to have an nvme-subsystem
>bus, providinga list of all namespaces for that subsystem.
My goal was to *not* use a qbus I don't think the subsystem should be
modelled as a device. The same can be accomplished with -objects and
links.
>Creating a controller would then mean that one has to create a
>controller and a namespace _separately_, as then the namespace would
>_not_ be a child of the controller.
>But that's arguably a good thing, as the namespaces _do_ have a
>separate lifetime from controllers.
>And the lifetime of the subsystem could be completely self-contained;
>the controller would be looking up subsystems via the subsystem NQN if
>present; I guess we'll need to create ad-hoc subsystems for PCIe.
>But nothing insurmountable.
>And certainly nothing which require a large-scale rework of qemu
>internals, I would think.
>
Yes, exactly, totally agree.
>But _if_ you work on the rework, please ensure that not only NVMe is
>represented here. SCSI multipathing has the same issue currently;
>there is a strict host->lun->block->OS device relationship currently
>in SCSI, making it impossible to represent a multipathed SCSI device
>in qemu.
>The only sneaky way is to allow for a shared OS device (ie specifying
>the filename more than once), but that completely sidesteps qemu
>internals, too.
>
I'm only very superficially familiar with hw/scsi, but this sounds more
like the scsi subsystem also has some similar design problems like
hw/nvme, not an inherent issue with QDEV/QOM.
On Jul 6 11:33, Klaus Jensen wrote:
>From: Klaus Jensen <k.jensen@samsung.com>
>
>Prior to this patch the nvme-ns devices are always children of the
>NvmeBus owned by the NvmeCtrl. This causes the namespaces to be
>unrealized when the parent device is removed. However, when subsystems
>are involved, this is not what we want since the namespaces may be
>attached to other controllers as well.
>
>This patch adds an additional NvmeBus on the subsystem device. When
>nvme-ns devices are realized, if the parent controller device is linked
>to a subsystem, the parent bus is set to the subsystem one instead. This
>makes sure that namespaces are kept alive and not unrealized.
>
>Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>---
> hw/nvme/nvme.h | 18 ++++++++++--------
> hw/nvme/ctrl.c | 8 +++++---
> hw/nvme/ns.c | 32 +++++++++++++++++++++++++-------
> hw/nvme/subsys.c | 4 ++++
> 4 files changed, 44 insertions(+), 18 deletions(-)
>
>diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>index c4065467d877..9401e212f9f7 100644
>--- a/hw/nvme/nvme.h
>+++ b/hw/nvme/nvme.h
>@@ -33,12 +33,21 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
> typedef struct NvmeCtrl NvmeCtrl;
> typedef struct NvmeNamespace NvmeNamespace;
>
>+#define TYPE_NVME_BUS "nvme-bus"
>+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
>+
>+typedef struct NvmeBus {
>+ BusState parent_bus;
>+ bool is_subsys;
>+} NvmeBus;
>+
> #define TYPE_NVME_SUBSYS "nvme-subsys"
> #define NVME_SUBSYS(obj) \
> OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
>
> typedef struct NvmeSubsystem {
> DeviceState parent_obj;
>+ NvmeBus bus;
> uint8_t subnqn[256];
>
> NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
>@@ -365,13 +374,6 @@ typedef struct NvmeCQueue {
> QTAILQ_HEAD(, NvmeRequest) req_list;
> } NvmeCQueue;
>
>-#define TYPE_NVME_BUS "nvme-bus"
>-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
>-
>-typedef struct NvmeBus {
>- BusState parent_bus;
>-} NvmeBus;
>-
> #define TYPE_NVME "nvme"
> #define NVME(obj) \
> OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
>@@ -463,7 +465,7 @@ typedef struct NvmeCtrl {
>
> static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
> {
>- if (!nsid || nsid > NVME_MAX_NAMESPACES) {
>+ if (!n || !nsid || nsid > NVME_MAX_NAMESPACES) {
> return NULL;
> }
>
>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>index 90e3ee2b70ee..7c8fca36d9a5 100644
>--- a/hw/nvme/ctrl.c
>+++ b/hw/nvme/ctrl.c
>@@ -6516,11 +6516,13 @@ static void nvme_exit(PCIDevice *pci_dev)
>
> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> ns = nvme_ns(n, i);
>- if (!ns) {
>- continue;
>+ if (ns) {
>+ ns->attached--;
> }
>+ }
>
>- nvme_ns_cleanup(ns);
>+ if (n->subsys) {
>+ nvme_subsys_unregister_ctrl(n->subsys, n);
> }
>
> if (n->subsys) {
>diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
>index 3c4f5b8c714a..612a2786d75d 100644
>--- a/hw/nvme/ns.c
>+++ b/hw/nvme/ns.c
>@@ -444,13 +444,29 @@ void nvme_ns_cleanup(NvmeNamespace *ns)
> static void nvme_ns_realize(DeviceState *dev, Error **errp)
> {
> NvmeNamespace *ns = NVME_NS(dev);
>- BusState *s = qdev_get_parent_bus(dev);
>- NvmeCtrl *n = NVME(s->parent);
>- NvmeSubsystem *subsys = n->subsys;
>+ BusState *qbus = qdev_get_parent_bus(dev);
>+ NvmeBus *bus = NVME_BUS(qbus);
>+ NvmeCtrl *n = NULL;
>+ NvmeSubsystem *subsys = NULL;
> uint32_t nsid = ns->params.nsid;
> int i;
>
>- if (!n->subsys) {
>+ if (bus->is_subsys) {
>+ subsys = NVME_SUBSYS(qbus->parent);
>+ } else {
>+ n = NVME(qbus->parent);
>+ subsys = n->subsys;
>+ }
So, I realized that this if is not needed, since the device will always
attach to the bus from the nvme device, never the 'fake' one from the
subsystem.
>+
>+ if (subsys) {
>+ /*
>+ * If this namespace belongs to a subsystem (through a link on the
>+ * controller device), reparent the device.
>+ */
>+ if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
>+ return;
>+ }
>+ } else {
> if (ns->params.detached) {
> error_setg(errp, "detached requires that the nvme device is "
> "linked to an nvme-subsys device");
>@@ -470,7 +486,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
>
> if (!nsid) {
> for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
>- if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
>+ if (nvme_subsys_ns(subsys, i) || nvme_ns(n, i)) {
> continue;
> }
>
>@@ -483,7 +499,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
> return;
> }
> } else {
>- if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
>+ if (nvme_subsys_ns(subsys, nsid) || nvme_ns(n, nsid)) {
> error_setg(errp, "namespace id '%d' already allocated", nsid);
> return;
> }
>@@ -509,7 +525,9 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
> }
> }
>
>- nvme_attach_ns(n, ns);
>+ if (n) {
>+ nvme_attach_ns(n, ns);
>+ }
> }
>
> static Property nvme_ns_props[] = {
>diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
>index 92caa604a280..fb7c3a7c55fc 100644
>--- a/hw/nvme/subsys.c
>+++ b/hw/nvme/subsys.c
>@@ -50,6 +50,10 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp)
> {
> NvmeSubsystem *subsys = NVME_SUBSYS(dev);
>
>+ qbus_create_inplace(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev,
>+ dev->id);
>+ subsys->bus.is_subsys = true;
>+
> nvme_subsys_setup(subsys);
> }
>
>--
>2.32.0
>
>
© 2016 - 2026 Red Hat, Inc.