[PATCH v2] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`

Li Zhijian via posted 1 patch 3 months, 3 weeks ago
There is a newer version of this series
hw/cxl/cxl-device-utils.c   |  6 ++----
hw/mem/cxl_type3.c          | 10 +++++-----
include/hw/cxl/cxl_device.h |  7 +++++++
3 files changed, 14 insertions(+), 9 deletions(-)
[PATCH v2] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`
Posted by Li Zhijian via 3 months, 3 weeks ago
This assertion always happens when we sanitize the CXL memory device.
$ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize

It is incorrect to register an MSIX number beyond the device's capability.

Expand the device's MSIX number and use the enum to maintain the *USED*
and MAX MSIX number

Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
---
V2: just increase msix number and add enum to maintainer their values #
Jonathan
---
 hw/cxl/cxl-device-utils.c   |  6 ++----
 hw/mem/cxl_type3.c          | 10 +++++-----
 include/hw/cxl/cxl_device.h |  7 +++++++
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
index 035d034f6d..bc2171e3d4 100644
--- a/hw/cxl/cxl-device-utils.c
+++ b/hw/cxl/cxl-device-utils.c
@@ -354,8 +354,6 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
 
 static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
 {
-    const uint8_t msi_n = 9;
-
     /* 2048 payload size */
     ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
                      PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
@@ -364,8 +362,8 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
     ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
                      BG_INT_CAP, 1);
     ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
-                     MSI_N, msi_n);
-    cxl_dstate->mbox_msi_n = msi_n;
+                     MSI_N, CXL_MSIX_MBOX);
+    cxl_dstate->mbox_msi_n = CXL_MSIX_MBOX;
     ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
                      MBOX_READY_TIME, 0); /* Not reported */
     ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 5cf754b38f..f2f060ed9e 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -843,7 +843,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
     ComponentRegisters *regs = &cxl_cstate->crb;
     MemoryRegion *mr = &regs->component_registers;
     uint8_t *pci_conf = pci_dev->config;
-    unsigned short msix_num = 6;
     int i, rc;
     uint16_t count;
 
@@ -884,16 +883,17 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
                      &ct3d->cxl_dstate.device_registers);
 
     /* MSI(-X) Initialization */
-    rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
+    rc = msix_init_exclusive_bar(pci_dev, CXL_MSIX_MAX, 4, NULL);
     if (rc) {
         goto err_address_space_free;
     }
-    for (i = 0; i < msix_num; i++) {
+    for (i = 0; i < CXL_MSIX_MAX; i++) {
         msix_vector_use(pci_dev, i);
     }
 
     /* DOE Initialization */
-    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
+    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
+                  CXL_MSIX_PCIE_DOE);
 
     cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
     cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
@@ -908,7 +908,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
     if (rc) {
         goto err_release_cdat;
     }
-    cxl_event_init(&ct3d->cxl_dstate, 2);
+    cxl_event_init(&ct3d->cxl_dstate, CXL_MSIX_EVENT_START);
 
     /* Set default value for patrol scrub attributes */
     ct3d->patrol_scrub_attrs.scrub_cycle_cap =
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 561b375dc8..3f89b041ce 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -133,6 +133,13 @@ typedef enum {
     CXL_MBOX_MAX = 0x20
 } CXLRetCode;
 
+enum {
+    CXL_MSIX_PCIE_DOE = 0,
+    CXL_MSIX_EVENT_START = 2,
+    CXL_MSIX_MBOX = CXL_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
+    CXL_MSIX_MAX
+};
+
 typedef struct CXLCCI CXLCCI;
 typedef struct cxl_device_state CXLDeviceState;
 struct cxl_cmd;
-- 
2.41.0
Re: [PATCH v2] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`
Posted by Jonathan Cameron via 2 months, 4 weeks ago
On Fri, 13 Dec 2024 17:36:02 +0800
Li Zhijian <lizhijian@fujitsu.com> wrote:

> This assertion always happens when we sanitize the CXL memory device.
> $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize
> 
> It is incorrect to register an MSIX number beyond the device's capability.
> 
> Expand the device's MSIX number and use the enum to maintain the *USED*
> and MAX MSIX number
> 
> Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> ---
> V2: just increase msix number and add enum to maintainer their values #
> Jonathan

Ah. Sorry I was unclear. Two patches please

1. Make the number bigger to fix the bug. Only this one gets a fixes tag and
   is suitable for backporting.

2. Add an enum including all numbers currently used and use that throughout the
   type3 related code. That will prevent use accidentally introducing the
   bug in future but doesn't need to be backported. 
 
A few other comments inline.

Thanks

Jonathan

> ---
>  hw/cxl/cxl-device-utils.c   |  6 ++----
>  hw/mem/cxl_type3.c          | 10 +++++-----
>  include/hw/cxl/cxl_device.h |  7 +++++++
>  3 files changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
> index 035d034f6d..bc2171e3d4 100644
> --- a/hw/cxl/cxl-device-utils.c
> +++ b/hw/cxl/cxl-device-utils.c
> @@ -354,8 +354,6 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
>  
>  static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>  {
> -    const uint8_t msi_n = 9;
> -
>      /* 2048 payload size */
>      ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                       PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
> @@ -364,8 +362,8 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>      ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                       BG_INT_CAP, 1);
>      ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> -                     MSI_N, msi_n);
> -    cxl_dstate->mbox_msi_n = msi_n;
> +                     MSI_N, CXL_MSIX_MBOX);

Should be passed in from the type 3 specific call so add a parameter to this
function and pass this from cxl_device_register_init_t3.
Even better pass it into there from ct3d_reset()

Will potentially be a different number for the switch CCI passed in from
the call of cxl_device_register_init_swcci() in switch-mailbox-cci.c


> +    cxl_dstate->mbox_msi_n = CXL_MSIX_MBOX;
>      ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                       MBOX_READY_TIME, 0); /* Not reported */
>      ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 5cf754b38f..f2f060ed9e 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -843,7 +843,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>      ComponentRegisters *regs = &cxl_cstate->crb;
>      MemoryRegion *mr = &regs->component_registers;
>      uint8_t *pci_conf = pci_dev->config;
> -    unsigned short msix_num = 6;
>      int i, rc;
>      uint16_t count;
>  
> @@ -884,16 +883,17 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>                       &ct3d->cxl_dstate.device_registers);
>  
>      /* MSI(-X) Initialization */
> -    rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
> +    rc = msix_init_exclusive_bar(pci_dev, CXL_MSIX_MAX, 4, NULL);
>      if (rc) {
>          goto err_address_space_free;
>      }
> -    for (i = 0; i < msix_num; i++) {
> +    for (i = 0; i < CXL_MSIX_MAX; i++) {
>          msix_vector_use(pci_dev, i);
>      }
>  
>      /* DOE Initialization */
> -    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
> +    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
> +                  CXL_MSIX_PCIE_DOE);
>  
>      cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
>      cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
> @@ -908,7 +908,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>      if (rc) {
>          goto err_release_cdat;
>      }
> -    cxl_event_init(&ct3d->cxl_dstate, 2);
> +    cxl_event_init(&ct3d->cxl_dstate, CXL_MSIX_EVENT_START);
>  
>      /* Set default value for patrol scrub attributes */
>      ct3d->patrol_scrub_attrs.scrub_cycle_cap =
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 561b375dc8..3f89b041ce 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -133,6 +133,13 @@ typedef enum {
>      CXL_MBOX_MAX = 0x20
>  } CXLRetCode;
>  
> +enum {

Maybe worth naming these to be type3 specific.

> +    CXL_MSIX_PCIE_DOE = 0,
Name it to include that this is specifically the DOE for the table access protocol.

   CXL_MSIX_PCIE_DOE_TABLE_ACCESS


This should be private to cxl_type3.c which should be possible by passing
it to a few more calls from there. 

> +    CXL_MSIX_EVENT_START = 2,
> +    CXL_MSIX_MBOX = CXL_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
> +    CXL_MSIX_MAX
> +};
> +
>  typedef struct CXLCCI CXLCCI;
>  typedef struct cxl_device_state CXLDeviceState;
>  struct cxl_cmd;
Re: [PATCH v2] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`
Posted by Zhijian Li (Fujitsu) via 2 months, 3 weeks ago

On 10/01/2025 23:44, Jonathan Cameron wrote:
> On Fri, 13 Dec 2024 17:36:02 +0800
> Li Zhijian <lizhijian@fujitsu.com> wrote:
> 
>> This assertion always happens when we sanitize the CXL memory device.
>> $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize
>>
>> It is incorrect to register an MSIX number beyond the device's capability.
>>
>> Expand the device's MSIX number and use the enum to maintain the *USED*
>> and MAX MSIX number
>>
>> Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
>> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
>> ---
>> V2: just increase msix number and add enum to maintainer their values #
>> Jonathan
> 
> Ah. Sorry I was unclear. Two patches please
> 
> 1. Make the number bigger to fix the bug. Only this one gets a fixes tag and
>     is suitable for backporting.
> 
> 2. Add an enum including all numbers currently used and use that throughout the
>     type3 related code. That will prevent use accidentally introducing the
>     bug in future but doesn't need to be backported.
>   

Understood, it make sense.



> A few other comments inline.
> 
> Thanks
> 
> Jonathan
> 
>> ---
>>   hw/cxl/cxl-device-utils.c   |  6 ++----
>>   hw/mem/cxl_type3.c          | 10 +++++-----
>>   include/hw/cxl/cxl_device.h |  7 +++++++
>>   3 files changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
>> index 035d034f6d..bc2171e3d4 100644
>> --- a/hw/cxl/cxl-device-utils.c
>> +++ b/hw/cxl/cxl-device-utils.c
>> @@ -354,8 +354,6 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
>>   
>>   static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>>   {
>> -    const uint8_t msi_n = 9;
>> -
>>       /* 2048 payload size */
>>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>>                        PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
>> @@ -364,8 +362,8 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>>                        BG_INT_CAP, 1);
>>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>> -                     MSI_N, msi_n);
>> -    cxl_dstate->mbox_msi_n = msi_n;
>> +                     MSI_N, CXL_MSIX_MBOX);
> 
> Should be passed in from the type 3 specific call so add a parameter to this
> function and pass this from cxl_device_register_init_t3.
> Even better pass it into there from ct3d_reset()
> 

At a glance, `ct3d_reset()` has the following prototype: `typedef void (*DeviceReset)(DeviceState *dev)`,
which is inherited from the QEMU device framework. Consequently, it is hard to extend `ct3d_reset()`
to include an additional parameter.



> Will potentially be a different number for the switch CCI passed in from
> the call of cxl_device_register_init_swcci() in switch-mailbox-cci.c

It sounds reasonable, offering a more flexible design for the future.

Currently, mailbox_reg_init_common() will be called from type3 device and swcci,
however, I didn't see any where the swcci itself have setup the msi/msix at all.

Is this expected, feel free to let me know if I'm missing something.

> 
> 
>> +    cxl_dstate->mbox_msi_n = CXL_MSIX_MBOX;
>>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>>                        MBOX_READY_TIME, 0); /* Not reported */
>>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
>> index 5cf754b38f..f2f060ed9e 100644
>> --- a/hw/mem/cxl_type3.c
>> +++ b/hw/mem/cxl_type3.c
>> @@ -843,7 +843,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>>       ComponentRegisters *regs = &cxl_cstate->crb;
>>       MemoryRegion *mr = &regs->component_registers;
>>       uint8_t *pci_conf = pci_dev->config;
>> -    unsigned short msix_num = 6;
>>       int i, rc;
>>       uint16_t count;
>>   
>> @@ -884,16 +883,17 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>>                        &ct3d->cxl_dstate.device_registers);
>>   
>>       /* MSI(-X) Initialization */
>> -    rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
>> +    rc = msix_init_exclusive_bar(pci_dev, CXL_MSIX_MAX, 4, NULL);
>>       if (rc) {
>>           goto err_address_space_free;
>>       }
>> -    for (i = 0; i < msix_num; i++) {
>> +    for (i = 0; i < CXL_MSIX_MAX; i++) {
>>           msix_vector_use(pci_dev, i);
>>       }
>>   
>>       /* DOE Initialization */
>> -    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
>> +    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
>> +                  CXL_MSIX_PCIE_DOE);
>>   
>>       cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
>>       cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
>> @@ -908,7 +908,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>>       if (rc) {
>>           goto err_release_cdat;
>>       }
>> -    cxl_event_init(&ct3d->cxl_dstate, 2);
>> +    cxl_event_init(&ct3d->cxl_dstate, CXL_MSIX_EVENT_START);
>>   
>>       /* Set default value for patrol scrub attributes */
>>       ct3d->patrol_scrub_attrs.scrub_cycle_cap =
>> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
>> index 561b375dc8..3f89b041ce 100644
>> --- a/include/hw/cxl/cxl_device.h
>> +++ b/include/hw/cxl/cxl_device.h
>> @@ -133,6 +133,13 @@ typedef enum {
>>       CXL_MBOX_MAX = 0x20
>>   } CXLRetCode;
>>   
>> +enum {
> 
> Maybe worth naming these to be type3 specific.

It sounds good to me.


> 
>> +    CXL_MSIX_PCIE_DOE = 0,
> Name it to include that this is specifically the DOE for the table access protocol.
> 
>     CXL_MSIX_PCIE_DOE_TABLE_ACCESS

make sense.


> 
> 
> This should be private to cxl_type3.c which should be possible by passing
> it to a few more calls from there.
> 

If we make the entire enumeration `cxl_type3` private, it appears unnecessary to pass it through several more calls.

How about this

+/* type3 device private */
+enum CXL_T3_MSIX_VECTOR {
+    CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,
+    CXL_T3_MSIX_EVENT_START = 2,
+    CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
+    CXL_T3_MSIX_VECTOR_NR
+};
+


Thanks
Zhijian

>> +    CXL_MSIX_EVENT_START = 2,
>> +    CXL_MSIX_MBOX = CXL_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
>> +    CXL_MSIX_MAX
>> +};
>> +
>>   typedef struct CXLCCI CXLCCI;
>>   typedef struct cxl_device_state CXLDeviceState;
>>   struct cxl_cmd;
> 
Re: [PATCH v2] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`
Posted by Jonathan Cameron via 2 months, 3 weeks ago
On Wed, 15 Jan 2025 07:48:59 +0000
"Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> wrote:

> On 10/01/2025 23:44, Jonathan Cameron wrote:
> > On Fri, 13 Dec 2024 17:36:02 +0800
> > Li Zhijian <lizhijian@fujitsu.com> wrote:
> >   
> >> This assertion always happens when we sanitize the CXL memory device.
> >> $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize
> >>
> >> It is incorrect to register an MSIX number beyond the device's capability.
> >>
> >> Expand the device's MSIX number and use the enum to maintain the *USED*
> >> and MAX MSIX number
> >>
> >> Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
> >> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> >> ---
> >> V2: just increase msix number and add enum to maintainer their values #
> >> Jonathan  
> > 
> > Ah. Sorry I was unclear. Two patches please
> > 
> > 1. Make the number bigger to fix the bug. Only this one gets a fixes tag and
> >     is suitable for backporting.
> > 
> > 2. Add an enum including all numbers currently used and use that throughout the
> >     type3 related code. That will prevent use accidentally introducing the
> >     bug in future but doesn't need to be backported.
> >     
> 
> Understood, it make sense.
> 
> 
> 
> > A few other comments inline.
> > 
> > Thanks
> > 
> > Jonathan
> >   
> >> ---
> >>   hw/cxl/cxl-device-utils.c   |  6 ++----
> >>   hw/mem/cxl_type3.c          | 10 +++++-----
> >>   include/hw/cxl/cxl_device.h |  7 +++++++
> >>   3 files changed, 14 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
> >> index 035d034f6d..bc2171e3d4 100644
> >> --- a/hw/cxl/cxl-device-utils.c
> >> +++ b/hw/cxl/cxl-device-utils.c
> >> @@ -354,8 +354,6 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
> >>   
> >>   static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
> >>   {
> >> -    const uint8_t msi_n = 9;
> >> -
> >>       /* 2048 payload size */
> >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> >>                        PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
> >> @@ -364,8 +362,8 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
> >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> >>                        BG_INT_CAP, 1);
> >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> >> -                     MSI_N, msi_n);
> >> -    cxl_dstate->mbox_msi_n = msi_n;
> >> +                     MSI_N, CXL_MSIX_MBOX);  
> > 
> > Should be passed in from the type 3 specific call so add a parameter to this
> > function and pass this from cxl_device_register_init_t3.
> > Even better pass it into there from ct3d_reset()
> >   
> 
> At a glance, `ct3d_reset()` has the following prototype: `typedef void (*DeviceReset)(DeviceState *dev)`,
> which is inherited from the QEMU device framework. Consequently, it is hard to extend `ct3d_reset()`
> to include an additional parameter.

I wasn't clear in that statement.   ct3d_reset() calls cxl_device_register_init_t3(). Change
the signature of cxl_device_register_init_t3() so that takes the appropriate MSI index.
that puts the source of the information in the cxl_type3.c code, not the core support in hw / CXL.
No change to ct3d_reset() signature needed.

> 
> 
> 
> > Will potentially be a different number for the switch CCI passed in from
> > the call of cxl_device_register_init_swcci() in switch-mailbox-cci.c  
> 
> It sounds reasonable, offering a more flexible design for the future.
> 
> Currently, mailbox_reg_init_common() will be called from type3 device and swcci,
> however, I didn't see any where the swcci itself have setup the msi/msix at all.
> 
> Is this expected, feel free to let me know if I'm missing something.
Currently that is in cxl_device_register_init_swcci() via
mailbox_reg_init_common()
Suggestion is to like the above type 3 case move it into the specific driver
by having cxl_device_register_init_swcci() take it as a parameter and
pass that in from cswmbcci_reset() in switch-mailbox-cci.c


> 
> > 
> >   
> >> +    cxl_dstate->mbox_msi_n = CXL_MSIX_MBOX;
> >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> >>                        MBOX_READY_TIME, 0); /* Not reported */
> >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> >> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> >> index 5cf754b38f..f2f060ed9e 100644
> >> --- a/hw/mem/cxl_type3.c
> >> +++ b/hw/mem/cxl_type3.c
> >> @@ -843,7 +843,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
> >>       ComponentRegisters *regs = &cxl_cstate->crb;
> >>       MemoryRegion *mr = &regs->component_registers;
> >>       uint8_t *pci_conf = pci_dev->config;
> >> -    unsigned short msix_num = 6;
> >>       int i, rc;
> >>       uint16_t count;
> >>   
> >> @@ -884,16 +883,17 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
> >>                        &ct3d->cxl_dstate.device_registers);
> >>   
> >>       /* MSI(-X) Initialization */
> >> -    rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
> >> +    rc = msix_init_exclusive_bar(pci_dev, CXL_MSIX_MAX, 4, NULL);
> >>       if (rc) {
> >>           goto err_address_space_free;
> >>       }
> >> -    for (i = 0; i < msix_num; i++) {
> >> +    for (i = 0; i < CXL_MSIX_MAX; i++) {
> >>           msix_vector_use(pci_dev, i);
> >>       }
> >>   
> >>       /* DOE Initialization */
> >> -    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
> >> +    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
> >> +                  CXL_MSIX_PCIE_DOE);
> >>   
> >>       cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
> >>       cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
> >> @@ -908,7 +908,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
> >>       if (rc) {
> >>           goto err_release_cdat;
> >>       }
> >> -    cxl_event_init(&ct3d->cxl_dstate, 2);
> >> +    cxl_event_init(&ct3d->cxl_dstate, CXL_MSIX_EVENT_START);
> >>   
> >>       /* Set default value for patrol scrub attributes */
> >>       ct3d->patrol_scrub_attrs.scrub_cycle_cap =
> >> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> >> index 561b375dc8..3f89b041ce 100644
> >> --- a/include/hw/cxl/cxl_device.h
> >> +++ b/include/hw/cxl/cxl_device.h
> >> @@ -133,6 +133,13 @@ typedef enum {
> >>       CXL_MBOX_MAX = 0x20
> >>   } CXLRetCode;
> >>   
> >> +enum {  
> > 
> > Maybe worth naming these to be type3 specific.  
> 
> It sounds good to me.
> 
> 
> >   
> >> +    CXL_MSIX_PCIE_DOE = 0,  
> > Name it to include that this is specifically the DOE for the table access protocol.
> > 
> >     CXL_MSIX_PCIE_DOE_TABLE_ACCESS  
> 
> make sense.
> 
> 
> > 
> > 
> > This should be private to cxl_type3.c which should be possible by passing
> > it to a few more calls from there.
> >   
> 
> If we make the entire enumeration `cxl_type3` private, it appears unnecessary to pass it through several more calls.

Key is to make sure it is in cxl_type3.c  For that to work you need to pass it as
described above so that we can set the relevant values in various registers
configured by the hw/cxl/* code.

> 
> How about this
> 
> +/* type3 device private */
> +enum CXL_T3_MSIX_VECTOR {
> +    CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,

What is on 1?  We don't need to maintain backwards compatibility (migration
is broken in lots of other ways and not high on priority list to fix) so should
be no need to skip it. If you really want to add a reserved entry and we will
fill it in later.

> +    CXL_T3_MSIX_EVENT_START = 2,
> +    CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
> +    CXL_T3_MSIX_VECTOR_NR
> +};
> +
> 
Hope that helps,

Jonathan

> 
> Thanks
> Zhijian
> 
> >> +    CXL_MSIX_EVENT_START = 2,
> >> +    CXL_MSIX_MBOX = CXL_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
> >> +    CXL_MSIX_MAX
> >> +};
> >> +
> >>   typedef struct CXLCCI CXLCCI;
> >>   typedef struct cxl_device_state CXLDeviceState;
> >>   struct cxl_cmd;  
> >
Re: [PATCH v2] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`
Posted by Jonathan Cameron via 2 months, 3 weeks ago
On Wed, 15 Jan 2025 11:22:14 +0000
Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:

> On Wed, 15 Jan 2025 07:48:59 +0000
> "Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> wrote:
> 
> > On 10/01/2025 23:44, Jonathan Cameron wrote:  
> > > On Fri, 13 Dec 2024 17:36:02 +0800
> > > Li Zhijian <lizhijian@fujitsu.com> wrote:
> > >     
> > >> This assertion always happens when we sanitize the CXL memory device.
> > >> $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize
> > >>
> > >> It is incorrect to register an MSIX number beyond the device's capability.
> > >>
> > >> Expand the device's MSIX number and use the enum to maintain the *USED*
> > >> and MAX MSIX number
> > >>
> > >> Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
> > >> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> > >> ---
> > >> V2: just increase msix number and add enum to maintainer their values #
> > >> Jonathan    
> > > 
> > > Ah. Sorry I was unclear. Two patches please
> > > 
> > > 1. Make the number bigger to fix the bug. Only this one gets a fixes tag and
> > >     is suitable for backporting.
> > > 
> > > 2. Add an enum including all numbers currently used and use that throughout the
> > >     type3 related code. That will prevent use accidentally introducing the
> > >     bug in future but doesn't need to be backported.
> > >       
> > 
> > Understood, it make sense.
> > 
> > 
> >   
> > > A few other comments inline.
> > > 
> > > Thanks
> > > 
> > > Jonathan
> > >     
> > >> ---
> > >>   hw/cxl/cxl-device-utils.c   |  6 ++----
> > >>   hw/mem/cxl_type3.c          | 10 +++++-----
> > >>   include/hw/cxl/cxl_device.h |  7 +++++++
> > >>   3 files changed, 14 insertions(+), 9 deletions(-)
> > >>
> > >> diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
> > >> index 035d034f6d..bc2171e3d4 100644
> > >> --- a/hw/cxl/cxl-device-utils.c
> > >> +++ b/hw/cxl/cxl-device-utils.c
> > >> @@ -354,8 +354,6 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
> > >>   
> > >>   static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
> > >>   {
> > >> -    const uint8_t msi_n = 9;
> > >> -
> > >>       /* 2048 payload size */
> > >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> > >>                        PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
> > >> @@ -364,8 +362,8 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
> > >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> > >>                        BG_INT_CAP, 1);
> > >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> > >> -                     MSI_N, msi_n);
> > >> -    cxl_dstate->mbox_msi_n = msi_n;
> > >> +                     MSI_N, CXL_MSIX_MBOX);    
> > > 
> > > Should be passed in from the type 3 specific call so add a parameter to this
> > > function and pass this from cxl_device_register_init_t3.
> > > Even better pass it into there from ct3d_reset()
> > >     
> > 
> > At a glance, `ct3d_reset()` has the following prototype: `typedef void (*DeviceReset)(DeviceState *dev)`,
> > which is inherited from the QEMU device framework. Consequently, it is hard to extend `ct3d_reset()`
> > to include an additional parameter.  
> 
> I wasn't clear in that statement.   ct3d_reset() calls cxl_device_register_init_t3(). Change
> the signature of cxl_device_register_init_t3() so that takes the appropriate MSI index.
> that puts the source of the information in the cxl_type3.c code, not the core support in hw / CXL.
> No change to ct3d_reset() signature needed.
> 
> > 
> > 
> >   
> > > Will potentially be a different number for the switch CCI passed in from
> > > the call of cxl_device_register_init_swcci() in switch-mailbox-cci.c    
> > 
> > It sounds reasonable, offering a more flexible design for the future.
> > 
> > Currently, mailbox_reg_init_common() will be called from type3 device and swcci,
> > however, I didn't see any where the swcci itself have setup the msi/msix at all.
> > 
> > Is this expected, feel free to let me know if I'm missing something.  
> Currently that is in cxl_device_register_init_swcci() via
> mailbox_reg_init_common()
> Suggestion is to like the above type 3 case move it into the specific driver
> by having cxl_device_register_init_swcci() take it as a parameter and
> pass that in from cswmbcci_reset() in switch-mailbox-cci.c

Ah.  I now see what you mean - no msix bar setup.
I guess the switch cci support has accidentally been falling back to polled mode.  
That needs fixing but is a separate issue.

Jonathan

> 
> 
> >   
> > > 
> > >     
> > >> +    cxl_dstate->mbox_msi_n = CXL_MSIX_MBOX;
> > >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> > >>                        MBOX_READY_TIME, 0); /* Not reported */
> > >>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> > >> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > >> index 5cf754b38f..f2f060ed9e 100644
> > >> --- a/hw/mem/cxl_type3.c
> > >> +++ b/hw/mem/cxl_type3.c
> > >> @@ -843,7 +843,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
> > >>       ComponentRegisters *regs = &cxl_cstate->crb;
> > >>       MemoryRegion *mr = &regs->component_registers;
> > >>       uint8_t *pci_conf = pci_dev->config;
> > >> -    unsigned short msix_num = 6;
> > >>       int i, rc;
> > >>       uint16_t count;
> > >>   
> > >> @@ -884,16 +883,17 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
> > >>                        &ct3d->cxl_dstate.device_registers);
> > >>   
> > >>       /* MSI(-X) Initialization */
> > >> -    rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
> > >> +    rc = msix_init_exclusive_bar(pci_dev, CXL_MSIX_MAX, 4, NULL);
> > >>       if (rc) {
> > >>           goto err_address_space_free;
> > >>       }
> > >> -    for (i = 0; i < msix_num; i++) {
> > >> +    for (i = 0; i < CXL_MSIX_MAX; i++) {
> > >>           msix_vector_use(pci_dev, i);
> > >>       }
> > >>   
> > >>       /* DOE Initialization */
> > >> -    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
> > >> +    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
> > >> +                  CXL_MSIX_PCIE_DOE);
> > >>   
> > >>       cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
> > >>       cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
> > >> @@ -908,7 +908,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
> > >>       if (rc) {
> > >>           goto err_release_cdat;
> > >>       }
> > >> -    cxl_event_init(&ct3d->cxl_dstate, 2);
> > >> +    cxl_event_init(&ct3d->cxl_dstate, CXL_MSIX_EVENT_START);
> > >>   
> > >>       /* Set default value for patrol scrub attributes */
> > >>       ct3d->patrol_scrub_attrs.scrub_cycle_cap =
> > >> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > >> index 561b375dc8..3f89b041ce 100644
> > >> --- a/include/hw/cxl/cxl_device.h
> > >> +++ b/include/hw/cxl/cxl_device.h
> > >> @@ -133,6 +133,13 @@ typedef enum {
> > >>       CXL_MBOX_MAX = 0x20
> > >>   } CXLRetCode;
> > >>   
> > >> +enum {    
> > > 
> > > Maybe worth naming these to be type3 specific.    
> > 
> > It sounds good to me.
> > 
> >   
> > >     
> > >> +    CXL_MSIX_PCIE_DOE = 0,    
> > > Name it to include that this is specifically the DOE for the table access protocol.
> > > 
> > >     CXL_MSIX_PCIE_DOE_TABLE_ACCESS    
> > 
> > make sense.
> > 
> >   
> > > 
> > > 
> > > This should be private to cxl_type3.c which should be possible by passing
> > > it to a few more calls from there.
> > >     
> > 
> > If we make the entire enumeration `cxl_type3` private, it appears unnecessary to pass it through several more calls.  
> 
> Key is to make sure it is in cxl_type3.c  For that to work you need to pass it as
> described above so that we can set the relevant values in various registers
> configured by the hw/cxl/* code.
> 
> > 
> > How about this
> > 
> > +/* type3 device private */
> > +enum CXL_T3_MSIX_VECTOR {
> > +    CXL_T3_MSIX_PCIE_DOE_TABLE_ACCESS = 0,  
> 
> What is on 1?  We don't need to maintain backwards compatibility (migration
> is broken in lots of other ways and not high on priority list to fix) so should
> be no need to skip it. If you really want to add a reserved entry and we will
> fill it in later.
> 
> > +    CXL_T3_MSIX_EVENT_START = 2,
> > +    CXL_T3_MSIX_MBOX = CXL_T3_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
> > +    CXL_T3_MSIX_VECTOR_NR
> > +};
> > +
> >   
> Hope that helps,
> 
> Jonathan
> 
> > 
> > Thanks
> > Zhijian
> >   
> > >> +    CXL_MSIX_EVENT_START = 2,
> > >> +    CXL_MSIX_MBOX = CXL_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
> > >> +    CXL_MSIX_MAX
> > >> +};
> > >> +
> > >>   typedef struct CXLCCI CXLCCI;
> > >>   typedef struct cxl_device_state CXLDeviceState;
> > >>   struct cxl_cmd;    
> > >    
> 
> 
>
Re: [PATCH v2] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr`
Posted by Zhijian Li (Fujitsu) via 3 months ago
Merry Christmas and a Happy New Year!

And kindly ping...


On 13/12/2024 17:36, Li Zhijian wrote:
> This assertion always happens when we sanitize the CXL memory device.
> $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize
> 
> It is incorrect to register an MSIX number beyond the device's capability.
> 
> Expand the device's MSIX number and use the enum to maintain the *USED*
> and MAX MSIX number
> 
> Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion")
> Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
> ---
> V2: just increase msix number and add enum to maintainer their values #
> Jonathan
> ---
>   hw/cxl/cxl-device-utils.c   |  6 ++----
>   hw/mem/cxl_type3.c          | 10 +++++-----
>   include/hw/cxl/cxl_device.h |  7 +++++++
>   3 files changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
> index 035d034f6d..bc2171e3d4 100644
> --- a/hw/cxl/cxl-device-utils.c
> +++ b/hw/cxl/cxl-device-utils.c
> @@ -354,8 +354,6 @@ static void device_reg_init_common(CXLDeviceState *cxl_dstate)
>   
>   static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>   {
> -    const uint8_t msi_n = 9;
> -
>       /* 2048 payload size */
>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                        PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
> @@ -364,8 +362,8 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                        BG_INT_CAP, 1);
>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> -                     MSI_N, msi_n);
> -    cxl_dstate->mbox_msi_n = msi_n;
> +                     MSI_N, CXL_MSIX_MBOX);
> +    cxl_dstate->mbox_msi_n = CXL_MSIX_MBOX;
>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
>                        MBOX_READY_TIME, 0); /* Not reported */
>       ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 5cf754b38f..f2f060ed9e 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -843,7 +843,6 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>       ComponentRegisters *regs = &cxl_cstate->crb;
>       MemoryRegion *mr = &regs->component_registers;
>       uint8_t *pci_conf = pci_dev->config;
> -    unsigned short msix_num = 6;
>       int i, rc;
>       uint16_t count;
>   
> @@ -884,16 +883,17 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>                        &ct3d->cxl_dstate.device_registers);
>   
>       /* MSI(-X) Initialization */
> -    rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
> +    rc = msix_init_exclusive_bar(pci_dev, CXL_MSIX_MAX, 4, NULL);
>       if (rc) {
>           goto err_address_space_free;
>       }
> -    for (i = 0; i < msix_num; i++) {
> +    for (i = 0; i < CXL_MSIX_MAX; i++) {
>           msix_vector_use(pci_dev, i);
>       }
>   
>       /* DOE Initialization */
> -    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
> +    pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true,
> +                  CXL_MSIX_PCIE_DOE);
>   
>       cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
>       cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
> @@ -908,7 +908,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
>       if (rc) {
>           goto err_release_cdat;
>       }
> -    cxl_event_init(&ct3d->cxl_dstate, 2);
> +    cxl_event_init(&ct3d->cxl_dstate, CXL_MSIX_EVENT_START);
>   
>       /* Set default value for patrol scrub attributes */
>       ct3d->patrol_scrub_attrs.scrub_cycle_cap =
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 561b375dc8..3f89b041ce 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -133,6 +133,13 @@ typedef enum {
>       CXL_MBOX_MAX = 0x20
>   } CXLRetCode;
>   
> +enum {
> +    CXL_MSIX_PCIE_DOE = 0,
> +    CXL_MSIX_EVENT_START = 2,
> +    CXL_MSIX_MBOX = CXL_MSIX_EVENT_START + CXL_EVENT_TYPE_MAX,
> +    CXL_MSIX_MAX
> +};
> +
>   typedef struct CXLCCI CXLCCI;
>   typedef struct cxl_device_state CXLDeviceState;
>   struct cxl_cmd;