We have nvme-subsys and nvme devices mapped together. To support
multi-controller scheme to this setup, controller identifier(id) has to
be managed. Earlier, cntlid(controller id) used to be always 0 because
we didn't have any subsystem scheme that controller id matters.
This patch introduced 'cntlid' attribute to the nvme controller
instance(NvmeCtrl) and make it allocated by the nvme-subsys device
mapped to the controller. If nvme-subsys is not given to the
controller, then it will always be 0 as it was.
Added 'ctrls' array in the nvme-subsys instance to manage attached
controllers to the subsystem with a limit(32). This patch didn't take
list for the controllers to make it seamless with nvme-ns device.
Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
---
hw/block/nvme-subsys.c | 21 +++++++++++++++++++++
hw/block/nvme-subsys.h | 4 ++++
hw/block/nvme.c | 29 +++++++++++++++++++++++++++++
hw/block/nvme.h | 1 +
4 files changed, 55 insertions(+)
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index aa82911b951c..e9d61c993c90 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -22,6 +22,27 @@
#include "nvme.h"
#include "nvme-subsys.h"
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+ NvmeSubsystem *subsys = n->subsys;
+ int cntlid;
+
+ for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+ if (!subsys->ctrls[cntlid]) {
+ break;
+ }
+ }
+
+ if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+ error_setg(errp, "no more free controller id");
+ return -1;
+ }
+
+ subsys->ctrls[cntlid] = n;
+
+ return cntlid;
+}
+
static void nvme_subsys_setup(NvmeSubsystem *subsys)
{
snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 40f06a4c7db0..4eba50d96a1d 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -20,6 +20,10 @@ typedef struct NvmeNamespace NvmeNamespace;
typedef struct NvmeSubsystem {
DeviceState parent_obj;
uint8_t subnqn[256];
+
+ NvmeCtrl *ctrls[NVME_SUBSYS_MAX_CTRLS];
} NvmeSubsystem;
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+
#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b525fca14103..7138389be4bd 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -4435,6 +4435,9 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+
+ id->cntlid = cpu_to_le16(n->cntlid);
+
id->rab = 6;
id->ieee[0] = 0x00;
id->ieee[1] = 0x02;
@@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
+ if (n->subsys) {
+ id->cmic |= NVME_CMIC_MULTI_CTRL;
+ }
+
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
NVME_CAP_SET_CQR(n->bar.cap, 1);
NVME_CAP_SET_TO(n->bar.cap, 0xf);
@@ -4495,6 +4502,24 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
n->bar.intmc = n->bar.intms = 0;
}
+static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
+{
+ int cntlid;
+
+ if (!n->subsys) {
+ return 0;
+ }
+
+ cntlid = nvme_subsys_register_ctrl(n, errp);
+ if (cntlid < 0) {
+ return -1;
+ }
+
+ n->cntlid = cntlid;
+
+ return 0;
+}
+
static void nvme_realize(PCIDevice *pci_dev, Error **errp)
{
NvmeCtrl *n = NVME(pci_dev);
@@ -4515,6 +4540,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
return;
}
+ if (nvme_init_subsys(n, errp)) {
+ error_propagate(errp, local_err);
+ return;
+ }
nvme_init_ctrl(n, pci_dev);
/* setup a namespace if the controller drive property was given */
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 04d4684601fd..b8f5f2d6ffb8 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -134,6 +134,7 @@ typedef struct NvmeCtrl {
NvmeBus bus;
BlockConf conf;
+ uint16_t cntlid;
bool qs_created;
uint32_t page_size;
uint16_t page_bits;
--
2.17.1
On Jan 24 11:54, Minwoo Im wrote:
> We have nvme-subsys and nvme devices mapped together. To support
> multi-controller scheme to this setup, controller identifier(id) has to
> be managed. Earlier, cntlid(controller id) used to be always 0 because
> we didn't have any subsystem scheme that controller id matters.
>
> This patch introduced 'cntlid' attribute to the nvme controller
> instance(NvmeCtrl) and make it allocated by the nvme-subsys device
> mapped to the controller. If nvme-subsys is not given to the
> controller, then it will always be 0 as it was.
>
> Added 'ctrls' array in the nvme-subsys instance to manage attached
> controllers to the subsystem with a limit(32). This patch didn't take
> list for the controllers to make it seamless with nvme-ns device.
>
> Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
> ---
> hw/block/nvme-subsys.c | 21 +++++++++++++++++++++
> hw/block/nvme-subsys.h | 4 ++++
> hw/block/nvme.c | 29 +++++++++++++++++++++++++++++
> hw/block/nvme.h | 1 +
> 4 files changed, 55 insertions(+)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index b525fca14103..7138389be4bd 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
> id->psd[0].enlat = cpu_to_le32(0x10);
> id->psd[0].exlat = cpu_to_le32(0x4);
>
> + if (n->subsys) {
> + id->cmic |= NVME_CMIC_MULTI_CTRL;
> + }
Since multiple controllers show up with a PCIe port of their own, do we
need to set bit 0 (NVME_CMIC_MULTI_PORT?) as well? Or am I
misunderstanding that bit?
On Mon, Jan 25, 2021 at 07:03:32PM +0100, Klaus Jensen wrote:
> On Jan 24 11:54, Minwoo Im wrote:
> > We have nvme-subsys and nvme devices mapped together. To support
> > multi-controller scheme to this setup, controller identifier(id) has to
> > be managed. Earlier, cntlid(controller id) used to be always 0 because
> > we didn't have any subsystem scheme that controller id matters.
> >
> > This patch introduced 'cntlid' attribute to the nvme controller
> > instance(NvmeCtrl) and make it allocated by the nvme-subsys device
> > mapped to the controller. If nvme-subsys is not given to the
> > controller, then it will always be 0 as it was.
> >
> > Added 'ctrls' array in the nvme-subsys instance to manage attached
> > controllers to the subsystem with a limit(32). This patch didn't take
> > list for the controllers to make it seamless with nvme-ns device.
> >
> > Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
> > ---
> > hw/block/nvme-subsys.c | 21 +++++++++++++++++++++
> > hw/block/nvme-subsys.h | 4 ++++
> > hw/block/nvme.c | 29 +++++++++++++++++++++++++++++
> > hw/block/nvme.h | 1 +
> > 4 files changed, 55 insertions(+)
> >
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index b525fca14103..7138389be4bd 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
> > id->psd[0].enlat = cpu_to_le32(0x10);
> > id->psd[0].exlat = cpu_to_le32(0x4);
> >
> > + if (n->subsys) {
> > + id->cmic |= NVME_CMIC_MULTI_CTRL;
> > + }
>
> Since multiple controllers show up with a PCIe port of their own, do we
> need to set bit 0 (NVME_CMIC_MULTI_PORT?) as well? Or am I
> misunderstanding that bit?
AIUI, if you report this MULTI_PORT bit, then each PCI device in the
subsystem needs to report a different "Port Number" in their PCIe Link
Capabilities register. I don't think we can manipulate that value from
the nvme "device", but I also don't know what a host should do with this
information even if we could. So, I think it's safe to leave it at 0.
On 21-01-25 10:11:43, Keith Busch wrote:
> On Mon, Jan 25, 2021 at 07:03:32PM +0100, Klaus Jensen wrote:
> > On Jan 24 11:54, Minwoo Im wrote:
> > > We have nvme-subsys and nvme devices mapped together. To support
> > > multi-controller scheme to this setup, controller identifier(id) has to
> > > be managed. Earlier, cntlid(controller id) used to be always 0 because
> > > we didn't have any subsystem scheme that controller id matters.
> > >
> > > This patch introduced 'cntlid' attribute to the nvme controller
> > > instance(NvmeCtrl) and make it allocated by the nvme-subsys device
> > > mapped to the controller. If nvme-subsys is not given to the
> > > controller, then it will always be 0 as it was.
> > >
> > > Added 'ctrls' array in the nvme-subsys instance to manage attached
> > > controllers to the subsystem with a limit(32). This patch didn't take
> > > list for the controllers to make it seamless with nvme-ns device.
> > >
> > > Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
> > > ---
> > > hw/block/nvme-subsys.c | 21 +++++++++++++++++++++
> > > hw/block/nvme-subsys.h | 4 ++++
> > > hw/block/nvme.c | 29 +++++++++++++++++++++++++++++
> > > hw/block/nvme.h | 1 +
> > > 4 files changed, 55 insertions(+)
> > >
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index b525fca14103..7138389be4bd 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
> > > id->psd[0].enlat = cpu_to_le32(0x10);
> > > id->psd[0].exlat = cpu_to_le32(0x4);
> > >
> > > + if (n->subsys) {
> > > + id->cmic |= NVME_CMIC_MULTI_CTRL;
> > > + }
> >
> > Since multiple controllers show up with a PCIe port of their own, do we
> > need to set bit 0 (NVME_CMIC_MULTI_PORT?) as well? Or am I
> > misunderstanding that bit?
>
> AIUI, if you report this MULTI_PORT bit, then each PCI device in the
> subsystem needs to report a different "Port Number" in their PCIe Link
> Capabilities register. I don't think we can manipulate that value from
> the nvme "device", but I also don't know what a host should do with this
> information even if we could. So, I think it's safe to leave it at 0.
AFAIK, If we leave it to 0, kernel will not allocate disk for multi-path
case (e.g., nvmeXcYnZ).
On Tue, Jan 26, 2021 at 09:52:48AM +0900, Minwoo Im wrote:
> On 21-01-25 10:11:43, Keith Busch wrote:
> > On Mon, Jan 25, 2021 at 07:03:32PM +0100, Klaus Jensen wrote:
> > > On Jan 24 11:54, Minwoo Im wrote:
> > > > We have nvme-subsys and nvme devices mapped together. To support
> > > > multi-controller scheme to this setup, controller identifier(id) has to
> > > > be managed. Earlier, cntlid(controller id) used to be always 0 because
> > > > we didn't have any subsystem scheme that controller id matters.
> > > >
> > > > This patch introduced 'cntlid' attribute to the nvme controller
> > > > instance(NvmeCtrl) and make it allocated by the nvme-subsys device
> > > > mapped to the controller. If nvme-subsys is not given to the
> > > > controller, then it will always be 0 as it was.
> > > >
> > > > Added 'ctrls' array in the nvme-subsys instance to manage attached
> > > > controllers to the subsystem with a limit(32). This patch didn't take
> > > > list for the controllers to make it seamless with nvme-ns device.
> > > >
> > > > Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
> > > > ---
> > > > hw/block/nvme-subsys.c | 21 +++++++++++++++++++++
> > > > hw/block/nvme-subsys.h | 4 ++++
> > > > hw/block/nvme.c | 29 +++++++++++++++++++++++++++++
> > > > hw/block/nvme.h | 1 +
> > > > 4 files changed, 55 insertions(+)
> > > >
> > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > > index b525fca14103..7138389be4bd 100644
> > > > --- a/hw/block/nvme.c
> > > > +++ b/hw/block/nvme.c
> > > > @@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
> > > > id->psd[0].enlat = cpu_to_le32(0x10);
> > > > id->psd[0].exlat = cpu_to_le32(0x4);
> > > >
> > > > + if (n->subsys) {
> > > > + id->cmic |= NVME_CMIC_MULTI_CTRL;
> > > > + }
> > >
> > > Since multiple controllers show up with a PCIe port of their own, do we
> > > need to set bit 0 (NVME_CMIC_MULTI_PORT?) as well? Or am I
> > > misunderstanding that bit?
> >
> > AIUI, if you report this MULTI_PORT bit, then each PCI device in the
> > subsystem needs to report a different "Port Number" in their PCIe Link
> > Capabilities register. I don't think we can manipulate that value from
> > the nvme "device", but I also don't know what a host should do with this
> > information even if we could. So, I think it's safe to leave it at 0.
>
> AFAIK, If we leave it to 0, kernel will not allocate disk for multi-path
> case (e.g., nvmeXcYnZ).
Kernel only checks for MULTI_CTRL. It doesn't do anything with MULTI_PORT.
On 21-01-26 09:57:23, Keith Busch wrote:
> On Tue, Jan 26, 2021 at 09:52:48AM +0900, Minwoo Im wrote:
> > On 21-01-25 10:11:43, Keith Busch wrote:
> > > On Mon, Jan 25, 2021 at 07:03:32PM +0100, Klaus Jensen wrote:
> > > > On Jan 24 11:54, Minwoo Im wrote:
> > > > > We have nvme-subsys and nvme devices mapped together. To support
> > > > > multi-controller scheme to this setup, controller identifier(id) has to
> > > > > be managed. Earlier, cntlid(controller id) used to be always 0 because
> > > > > we didn't have any subsystem scheme that controller id matters.
> > > > >
> > > > > This patch introduced 'cntlid' attribute to the nvme controller
> > > > > instance(NvmeCtrl) and make it allocated by the nvme-subsys device
> > > > > mapped to the controller. If nvme-subsys is not given to the
> > > > > controller, then it will always be 0 as it was.
> > > > >
> > > > > Added 'ctrls' array in the nvme-subsys instance to manage attached
> > > > > controllers to the subsystem with a limit(32). This patch didn't take
> > > > > list for the controllers to make it seamless with nvme-ns device.
> > > > >
> > > > > Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
> > > > > ---
> > > > > hw/block/nvme-subsys.c | 21 +++++++++++++++++++++
> > > > > hw/block/nvme-subsys.h | 4 ++++
> > > > > hw/block/nvme.c | 29 +++++++++++++++++++++++++++++
> > > > > hw/block/nvme.h | 1 +
> > > > > 4 files changed, 55 insertions(+)
> > > > >
> > > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > > > index b525fca14103..7138389be4bd 100644
> > > > > --- a/hw/block/nvme.c
> > > > > +++ b/hw/block/nvme.c
> > > > > @@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
> > > > > id->psd[0].enlat = cpu_to_le32(0x10);
> > > > > id->psd[0].exlat = cpu_to_le32(0x4);
> > > > >
> > > > > + if (n->subsys) {
> > > > > + id->cmic |= NVME_CMIC_MULTI_CTRL;
> > > > > + }
> > > >
> > > > Since multiple controllers show up with a PCIe port of their own, do we
> > > > need to set bit 0 (NVME_CMIC_MULTI_PORT?) as well? Or am I
> > > > misunderstanding that bit?
> > >
> > > AIUI, if you report this MULTI_PORT bit, then each PCI device in the
> > > subsystem needs to report a different "Port Number" in their PCIe Link
> > > Capabilities register. I don't think we can manipulate that value from
> > > the nvme "device", but I also don't know what a host should do with this
> > > information even if we could. So, I think it's safe to leave it at 0.
> >
> > AFAIK, If we leave it to 0, kernel will not allocate disk for multi-path
> > case (e.g., nvmeXcYnZ).
>
> Kernel only checks for MULTI_CTRL. It doesn't do anything with MULTI_PORT.
Please forgive me that I took this discussion as MULTI_CTRL rather than
MULTI_PORT. Please ignore this noise ;)
Thanks!
© 2016 - 2026 Red Hat, Inc.