I'm running into a issue with the latest qemu-nvme with v10.0.0-rc2 with
regards to multiple controllers behind a subsystem. When I setup a
subsystem with 2 controllers, each with a private/non-shared namespace,
the two private/non-shared namespaces all get attached to one of the
controllers.
I'm sending out diffs that resolve the problem but would like to get
some feedback before sending a formal patch.
See below.
Thanks,
Alan Adamson
[root@localhost qemu-subsys]# git describe
v10.0.0-rc2
[root@localhost qemu-subsys]#
QEMU NVMe Config
================
-device nvme-subsys,id=subsys0 \
-device
nvme,serial=deadbeef,id=nvme0,subsys=subsys0,atomic.dn=off,atomic.awun=31,atomic.awupf=15
\
-device
nvme,serial=deadbeef,id=nvme1,subsys=subsys0,atomic.dn=off,atomic.awun=127,atomic.awupf=63
\
-drive id=ns1,file=/dev/nullb3,if=none \
-drive id=ns2,file=/dev/nullb2,if=none \
-drive id=ns3,file=/dev/nullb1,if=none \
-device nvme-ns,drive=ns1,bus=nvme0,nsid=1,shared=false \
-device nvme-ns,drive=ns2,bus=nvme1,nsid=2,shared=false \
-device nvme-ns,drive=ns3,bus=nvme1,nsid=3,shared=true \
1 Subsystem (subsys0)
2 Controllers (nvme0/nvme1)
nvme0
private namespace (nsid=1)
shared namespace (nsid=3)
nvme1
private namespace (nsid=2)
shared namespace (nsid=3)
What Linux is seeing
====================
[root@localhost ~]# nvme list -v
Subsystem Subsystem-NQN Controllers
----------------
------------------------------------------------------------------------------------------------
----------------
nvme-subsys1 nqn.2019-08.org.qemu:subsys0 nvme0, nvme1
Device Cntlid SN MN
FR TxPort Address Slot Subsystem Namespaces
---------------- ------ --------------------
---------------------------------------- -------- ------ --------------
------ ------------ ----------------
nvme0 0 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:04.0 4
nvme-subsys1 nvme1n1, nvme1n2, nvme1n3
nvme1 1 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:05.0 5
nvme-subsys1 nvme1n1
Device Generic NSID Usage
Format Controllers
----------------- ----------------- ----------
-------------------------- ---------------- ----------------
/dev/nvme1n1 /dev/ng1n1 0x3 268.44 GB / 268.44 GB 512 B +
0 B nvme0, nvme1
/dev/nvme1n2 /dev/ng1n2 0x2 268.44 GB / 268.44 GB 512 B +
0 B nvme0
/dev/nvme1n3 /dev/ng1n3 0x1 268.44 GB / 268.44 GB 512 B +
0 B nvme0
[root@localhost ~]# nvme id-ctrl /dev/nvme1n2 | grep awupf
awupf : 15
[root@localhost ~]# nvme id-ctrl /dev/nvme1n3 | grep awupf
awupf : 15
[root@localhost ~]#
===================
Both private namspaces are being attached to the same controller (nvme0)
Fix
===
Need to keep track of the controller that registers a namespace with the
subsystem
and only allow other controllers to attach to the namespace if it is shared.
Non-shared namespaces can only be attached to a controller than
registers it.
Proposal
========
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 518d02dc6670..883d8d4722fd 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5063,7 +5063,8 @@ static uint16_t nvme_endgrp_info(NvmeCtrl *n,
uint8_t rae, uint32_t buf_len,
}
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+ NvmeNamespace *ns = nvme_subsys_ns(n, i);
+
if (!ns) {
continue;
}
@@ -5700,7 +5701,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n,
NvmeRequest *req, bool active)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
[root@localhost qemu-subsys]# git log -p
f4b9f10a80c30f1d6b9850d476eb8226bda3f553 > /tmp/p
^C
[root@localhost qemu-subsys]# vi /tmp/p
[root@localhost qemu-subsys]# cat /tmp/p
commit f4b9f10a80c30f1d6b9850d476eb8226bda3f553
Author: Alan Adamson <alan.adamson@oracle.com>
Date: 2025-04-03 14:10:30 -0400
subsys multi controller shared fix
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 518d02dc6670..883d8d4722fd 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5063,7 +5063,8 @@ static uint16_t nvme_endgrp_info(NvmeCtrl *n,
uint8_t rae, uint32_t buf_len,
}
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+ NvmeNamespace *ns = nvme_subsys_ns(n, i);
+
if (!ns) {
continue;
}
@@ -5700,7 +5701,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n,
NvmeRequest *req, bool active)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return nvme_rpt_empty_id_struct(n, req);
}
@@ -5739,7 +5740,7 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl
*n, NvmeRequest *req,
return NVME_INVALID_FIELD | NVME_DNR;
}
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -5809,7 +5810,7 @@ static uint16_t nvme_identify_ns_ind(NvmeCtrl *n,
NvmeRequest *req, bool alloc)
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (alloc) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return nvme_rpt_empty_id_struct(n, req);
}
@@ -5837,7 +5838,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n,
NvmeRequest *req,
ns = nvme_ns(n, nsid);
if (unlikely(!ns)) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return nvme_rpt_empty_id_struct(n, req);
}
@@ -5884,7 +5885,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n,
NvmeRequest *req,
ns = nvme_ns(n, i);
if (!ns) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, i);
+ ns = nvme_subsys_ns(n, i);
if (!ns) {
continue;
}
@@ -5932,7 +5933,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl
*n, NvmeRequest *req,
ns = nvme_ns(n, i);
if (!ns) {
if (!active) {
- ns = nvme_subsys_ns(n->subsys, i);
+ ns = nvme_subsys_ns(n, i);
if (!ns) {
continue;
}
@@ -6793,7 +6794,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n,
NvmeRequest *req)
return NVME_INVALID_NSID | NVME_DNR;
}
- ns = nvme_subsys_ns(n->subsys, nsid);
+ ns = nvme_subsys_ns(n, nsid);
if (!ns) {
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -7751,17 +7752,23 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_set_timestamp(n, 0ULL);
- /* verify that the command sets of attached namespaces are supported */
- for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i);
+ if (n->subsys) {
+ for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ NvmeNamespace *ns = n->subsys->namespaces[i].namespace;
- if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
- if (!ns->attached || ns->params.shared) {
- nvme_attach_ns(n, ns);
+ if (!ns) {
+ continue;
}
+ if (!(n->subsys->namespaces[i].ctrl == n) && !ns->params.shared) {
+ continue;
+ }
+ if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) {
+ if (!ns->attached || ns->params.shared) {
+ nvme_attach_ns(n, ns);
+ }
+ }
}
}
-
nvme_update_dsm_limits(n, NULL);
return 0;
@@ -8993,7 +9000,8 @@ static void nvme_realize(PCIDevice *pci_dev, Error
**errp)
return;
}
- n->subsys->namespaces[ns->params.nsid] = ns;
+ n->subsys->namespaces[ns->params.nsid].namespace = ns;
+ n->subsys->namespaces[ns->params.nsid].ctrl = n;
}
}
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 98c1e75a5d29..9cca699b354f 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -742,7 +742,7 @@ static void nvme_ns_realize(DeviceState *dev, Error
**errp)
if (!nsid) {
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
- if (nvme_subsys_ns(subsys, i)) {
+ if (nvme_subsys_ns(n, i)) {
continue;
}
@@ -754,12 +754,13 @@ static void nvme_ns_realize(DeviceState *dev,
Error **errp)
error_setg(errp, "no free namespace id");
return;
}
- } else if (nvme_subsys_ns(subsys, nsid)) {
+ } else if (nvme_subsys_ns(n, nsid)) {
error_setg(errp, "namespace id '%d' already allocated", nsid);
return;
}
- subsys->namespaces[nsid] = ns;
+ subsys->namespaces[nsid].namespace = ns;
+ subsys->namespaces[nsid].ctrl = n;
ns->id_ns.endgid = cpu_to_le16(0x1);
ns->id_ns_ind.endgrpid = cpu_to_le16(0x1);
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 6f782ba18826..bea3b96a6dfa 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -97,6 +97,11 @@ typedef struct NvmeEnduranceGroup {
} fdp;
} NvmeEnduranceGroup;
+typedef struct Namespaces {
+ NvmeCtrl *ctrl;
+ NvmeNamespace *namespace;
+} Namespaces;
+
typedef struct NvmeSubsystem {
DeviceState parent_obj;
NvmeBus bus;
@@ -104,7 +109,7 @@ typedef struct NvmeSubsystem {
char *serial;
NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
- NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+ Namespaces namespaces[NVME_MAX_NAMESPACES + 1];
NvmeEnduranceGroup endgrp;
struct {
@@ -136,16 +141,6 @@ static inline NvmeCtrl
*nvme_subsys_ctrl(NvmeSubsystem *subsys,
return subsys->ctrls[cntlid];
}
-static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
- uint32_t nsid)
-{
- if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
- return NULL;
- }
-
- return subsys->namespaces[nsid];
-}
-
#define TYPE_NVME_NS "nvme-ns"
#define NVME_NS(obj) \
OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
@@ -711,6 +706,14 @@ static inline NvmeSecCtrlEntry
*nvme_sctrl_for_cntlid(NvmeCtrl *n,
return NULL;
}
+static inline NvmeNamespace *nvme_subsys_ns(NvmeCtrl *n, uint32_t nsid)
+{
+ if (!n->subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
+ return NULL;
+ }
+ return n->subsys->namespaces[nsid].namespace;
+}
+
void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
NvmeTxDirection dir, NvmeRequest *req);
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 2ae56f12a596..d5751564c05c 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -92,13 +92,19 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
subsys->ctrls[cntlid] = n;
- for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
- NvmeNamespace *ns = subsys->namespaces[nsid];
- if (ns && ns->params.shared && !ns->params.detached) {
- nvme_attach_ns(n, ns);
+ for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+ NvmeNamespace *ns = subsys->namespaces[nsid].namespace;
+
+ if (!ns) {
+ continue;
+ }
+ if (!(subsys->namespaces[nsid].ctrl == n) && !ns->params.shared) {
+ continue;
}
+ if (ns->params.shared && !ns->params.detached) {
+ nvme_attach_ns(n, ns);
+ }
}
-
return cntlid;
}
With this fix:
==============
[root@localhost ~]# nvme list -v
Subsystem Subsystem-NQN Controllers
----------------
------------------------------------------------------------------------------------------------
----------------
nvme-subsys1 nqn.2019-08.org.qemu:subsys0 nvme0, nvme1
Device Cntlid SN MN
FR TxPort Address Slot Subsystem Namespaces
---------------- ------ --------------------
---------------------------------------- -------- ------ --------------
------ ------------ ----------------
nvme0 0 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:04.0 4
nvme-subsys1 nvme1n2, nvme1n3
nvme1 1 deadbeef QEMU NVMe
Ctrl 9.2.92 pcie 0000:00:05.0 5
nvme-subsys1 nvme1n1, nvme1n3
Device Generic NSID Usage
Format Controllers
----------------- ----------------- ----------
-------------------------- ---------------- ----------------
/dev/nvme1n1 /dev/ng1n1 0x2 268.44 GB / 268.44 GB 512 B +
0 B nvme1
/dev/nvme1n2 /dev/ng1n2 0x1 268.44 GB / 268.44 GB 512 B +
0 B nvme0
/dev/nvme1n3 /dev/ng1n3 0x3 268.44 GB / 268.44 GB 512 B +
0 B nvme0, nvme1
[root@localhost ~]# nvme id-ctrl /dev/nvme1n1 | grep awupf
awupf : 63
[root@localhost ~]# nvme id-ctrl /dev/nvme1n2 | grep awupf
awupf : 15
[root@localhost ~]#
Each private namespace is attached to its own controller and that is
verified with the AWUPF values.
On Apr 4 10:52, alan.adamson@oracle.com wrote: > I'm running into a issue with the latest qemu-nvme with v10.0.0-rc2 with > regards to multiple controllers behind a subsystem. When I setup a > subsystem with 2 controllers, each with a private/non-shared namespace, the > two private/non-shared namespaces all get attached to one of the > controllers. > > I'm sending out diffs that resolve the problem but would like to get some > feedback before sending a formal patch. > Hi Alan, Thanks for reporting this! This is definitely a regression caused by the csi refactoring I did. Few comments below, but I'd like to try to get this into 10.0. Timeline is tight, so I'll send out a patch with my suggestings below. > @@ -7751,17 +7752,23 @@ static int nvme_start_ctrl(NvmeCtrl *n) > > nvme_set_timestamp(n, 0ULL); > > - /* verify that the command sets of attached namespaces are supported */ > - for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { > - NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i); > + if (n->subsys) { > + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { > + NvmeNamespace *ns = n->subsys->namespaces[i].namespace; > > - if (ns && nvme_csi_supported(n, ns->csi) && !ns->params.detached) { > - if (!ns->attached || ns->params.shared) { > - nvme_attach_ns(n, ns); > + if (!ns) { > + continue; > } > + if (!(n->subsys->namespaces[i].ctrl == n) && !ns->params.shared) { > + continue; > + } > + if (nvme_csi_supported(n, ns->csi) && !ns->params.detached) { > + if (!ns->attached || ns->params.shared) { > + nvme_attach_ns(n, ns); > + } > + } > } > } > - > nvme_update_dsm_limits(n, NULL); > > return 0; Yeah, this is where things went wrong. > diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h > index 6f782ba18826..bea3b96a6dfa 100644 > --- a/hw/nvme/nvme.h > +++ b/hw/nvme/nvme.h > @@ -97,6 +97,11 @@ typedef struct NvmeEnduranceGroup { > } fdp; > } NvmeEnduranceGroup; > > +typedef struct Namespaces { > + NvmeCtrl *ctrl; > + NvmeNamespace *namespace; > +} Namespaces; Let's just add an NvmeCtrl* in struct NvmeNamespace. If set, the namespace is private. > diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c > index 2ae56f12a596..d5751564c05c 100644 > --- a/hw/nvme/subsys.c > +++ b/hw/nvme/subsys.c > @@ -92,13 +92,19 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) > > subsys->ctrls[cntlid] = n; > > - for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) { > - NvmeNamespace *ns = subsys->namespaces[nsid]; > - if (ns && ns->params.shared && !ns->params.detached) { > - nvme_attach_ns(n, ns); > + for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { > + NvmeNamespace *ns = subsys->namespaces[nsid].namespace; > + > + if (!ns) { > + continue; > + } > + if (!(subsys->namespaces[nsid].ctrl == n) && !ns->params.shared) { > + continue; > } > + if (ns->params.shared && !ns->params.detached) { > + nvme_attach_ns(n, ns); > + } > } > - > return cntlid; > } The code here that attach namespaces when the controller is registers actually need to go away. This is a leftover.
© 2016 - 2025 Red Hat, Inc.