[PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port

Michael S. Tsirkin posted 10 patches 3 years, 7 months ago
Maintainers: "Gonglei (Arei)" <arei.gonglei@huawei.com>, "Michael S. Tsirkin" <mst@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Ani Sinha <ani@anisinha.ca>, Ben Widawsky <ben.widawsky@intel.com>, Jonathan Cameron <jonathan.cameron@huawei.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Eric Auger <eric.auger@redhat.com>
There is a newer version of this series
[PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Michael S. Tsirkin 3 years, 7 months ago
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Emulation of a simple CXL Switch downstream port.
The Device ID has been allocated for this use.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/cxl/cxl-host.c              |  43 +++++-
 hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
 hw/pci-bridge/meson.build      |   2 +-
 3 files changed, 291 insertions(+), 3 deletions(-)
 create mode 100644 hw/pci-bridge/cxl_downstream.c

diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index efa14908d8..483d8eb13f 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -129,8 +129,9 @@ static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr,
 
 static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
 {
-    CXLComponentState *hb_cstate;
+    CXLComponentState *hb_cstate, *usp_cstate;
     PCIHostState *hb;
+    CXLUpstreamPort *usp;
     int rb_index;
     uint32_t *cache_mem;
     uint8_t target;
@@ -164,8 +165,46 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
     }
 
     d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0];
+    if (!d) {
+        return NULL;
+    }
 
-    if (!d || !object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+    if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+        return d;
+    }
+
+    /*
+     * Could also be a switch.  Note only one level of switching currently
+     * supported.
+     */
+    if (!object_dynamic_cast(OBJECT(d), TYPE_CXL_USP)) {
+        return NULL;
+    }
+    usp = CXL_USP(d);
+
+    usp_cstate = cxl_usp_to_cstate(usp);
+    if (!usp_cstate) {
+        return NULL;
+    }
+
+    cache_mem = usp_cstate->crb.cache_mem_registers;
+
+    target_found = cxl_hdm_find_target(cache_mem, addr, &target);
+    if (!target_found) {
+        return NULL;
+    }
+
+    d = pcie_find_port_by_pn(&PCI_BRIDGE(d)->sec_bus, target);
+    if (!d) {
+        return NULL;
+    }
+
+    d = pci_bridge_get_sec_bus(PCI_BRIDGE(d))->devices[0];
+    if (!d) {
+        return NULL;
+    }
+
+    if (!object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
         return NULL;
     }
 
diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
new file mode 100644
index 0000000000..a361e519d0
--- /dev/null
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -0,0 +1,249 @@
+/*
+ * Emulated CXL Switch Downstream Port
+ *
+ * Copyright (c) 2022 Huawei Technologies.
+ *
+ * Based on xio3130_downstream.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/pcie.h"
+#include "hw/pci/pcie_port.h"
+#include "qapi/error.h"
+
+typedef struct CXLDownStreamPort {
+    /*< private >*/
+    PCIESlot parent_obj;
+
+    /*< public >*/
+    CXLComponentState cxl_cstate;
+} CXLDownstreamPort;
+
+#define TYPE_CXL_DSP "cxl-downstream"
+DECLARE_INSTANCE_CHECKER(CXLDownstreamPort, CXL_DSP, TYPE_CXL_DSP)
+
+#define CXL_DOWNSTREAM_PORT_MSI_OFFSET 0x70
+#define CXL_DOWNSTREAM_PORT_MSI_NR_VECTOR 1
+#define CXL_DOWNSTREAM_PORT_EXP_OFFSET 0x90
+#define CXL_DOWNSTREAM_PORT_AER_OFFSET 0x100
+#define CXL_DOWNSTREAM_PORT_DVSEC_OFFSET        \
+    (CXL_DOWNSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
+
+static void latch_registers(CXLDownstreamPort *dsp)
+{
+    uint32_t *reg_state = dsp->cxl_cstate.crb.cache_mem_registers;
+    uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
+
+    cxl_component_register_init_common(reg_state, write_msk,
+                                       CXL2_DOWNSTREAM_PORT);
+}
+
+/* TODO: Look at sharing this code acorss all CXL port types */
+static void cxl_dsp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
+                                      uint32_t val, int len)
+{
+    CXLDownstreamPort *dsp = CXL_DSP(dev);
+    CXLComponentState *cxl_cstate = &dsp->cxl_cstate;
+
+    if (range_contains(&cxl_cstate->dvsecs[EXTENSIONS_PORT_DVSEC], addr)) {
+        uint8_t *reg = &dev->config[addr];
+        addr -= cxl_cstate->dvsecs[EXTENSIONS_PORT_DVSEC].lob;
+        if (addr == PORT_CONTROL_OFFSET) {
+            if (pci_get_word(reg) & PORT_CONTROL_UNMASK_SBR) {
+                /* unmask SBR */
+                qemu_log_mask(LOG_UNIMP, "SBR mask control is not supported\n");
+            }
+            if (pci_get_word(reg) & PORT_CONTROL_ALT_MEMID_EN) {
+                /* Alt Memory & ID Space Enable */
+                qemu_log_mask(LOG_UNIMP,
+                              "Alt Memory & ID space is not supported\n");
+
+            }
+        }
+    }
+}
+
+static void cxl_dsp_config_write(PCIDevice *d, uint32_t address,
+                                 uint32_t val, int len)
+{
+    uint16_t slt_ctl, slt_sta;
+
+    pcie_cap_slot_get(d, &slt_ctl, &slt_sta);
+    pci_bridge_write_config(d, address, val, len);
+    pcie_cap_flr_write_config(d, address, val, len);
+    pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len);
+    pcie_aer_write_config(d, address, val, len);
+
+    cxl_dsp_dvsec_write_config(d, address, val, len);
+}
+
+static void cxl_dsp_reset(DeviceState *qdev)
+{
+    PCIDevice *d = PCI_DEVICE(qdev);
+    CXLDownstreamPort *dsp = CXL_DSP(qdev);
+
+    pcie_cap_deverr_reset(d);
+    pcie_cap_slot_reset(d);
+    pcie_cap_arifwd_reset(d);
+    pci_bridge_reset(qdev);
+
+    latch_registers(dsp);
+}
+
+static void build_dvsecs(CXLComponentState *cxl)
+{
+    uint8_t *dvsec;
+
+    dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 };
+    cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT,
+                               EXTENSIONS_PORT_DVSEC_LENGTH,
+                               EXTENSIONS_PORT_DVSEC,
+                               EXTENSIONS_PORT_DVSEC_REVID, dvsec);
+
+    dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
+        .cap                     = 0x27, /* Cache, IO, Mem, non-MLD */
+        .ctrl                    = 0x02, /* IO always enabled */
+        .status                  = 0x26, /* same */
+        .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
+    };
+    cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT,
+                               PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0,
+                               PCIE_FLEXBUS_PORT_DVSEC,
+                               PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec);
+
+    dvsec = (uint8_t *)&(CXLDVSECPortGPF){
+        .rsvd        = 0,
+        .phase1_ctrl = 1, /* 1μs timeout */
+        .phase2_ctrl = 1, /* 1μs timeout */
+    };
+    cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT,
+                               GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC,
+                               GPF_PORT_DVSEC_REVID, dvsec);
+
+    dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
+        .rsvd         = 0,
+        .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
+        .reg0_base_hi = 0,
+    };
+    cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT,
+                               REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
+                               REG_LOC_DVSEC_REVID, dvsec);
+}
+
+static void cxl_dsp_realize(PCIDevice *d, Error **errp)
+{
+    PCIEPort *p = PCIE_PORT(d);
+    PCIESlot *s = PCIE_SLOT(d);
+    CXLDownstreamPort *dsp = CXL_DSP(d);
+    CXLComponentState *cxl_cstate = &dsp->cxl_cstate;
+    ComponentRegisters *cregs = &cxl_cstate->crb;
+    MemoryRegion *component_bar = &cregs->component_registers;
+    int rc;
+
+    pci_bridge_initfn(d, TYPE_PCIE_BUS);
+    pcie_port_init_reg(d);
+
+    rc = msi_init(d, CXL_DOWNSTREAM_PORT_MSI_OFFSET,
+                  CXL_DOWNSTREAM_PORT_MSI_NR_VECTOR,
+                  true, true, errp);
+    if (rc) {
+        assert(rc == -ENOTSUP);
+        goto err_bridge;
+    }
+
+    rc = pcie_cap_init(d, CXL_DOWNSTREAM_PORT_EXP_OFFSET,
+                       PCI_EXP_TYPE_DOWNSTREAM, p->port,
+                       errp);
+    if (rc < 0) {
+        goto err_msi;
+    }
+
+    pcie_cap_flr_init(d);
+    pcie_cap_deverr_init(d);
+    pcie_cap_slot_init(d, s);
+    pcie_cap_arifwd_init(d);
+
+    pcie_chassis_create(s->chassis);
+    rc = pcie_chassis_add_slot(s);
+    if (rc < 0) {
+        error_setg(errp, "Can't add chassis slot, error %d", rc);
+        goto err_pcie_cap;
+    }
+
+    rc = pcie_aer_init(d, PCI_ERR_VER, CXL_DOWNSTREAM_PORT_AER_OFFSET,
+                       PCI_ERR_SIZEOF, errp);
+    if (rc < 0) {
+        goto err_chassis;
+    }
+
+    cxl_cstate->dvsec_offset = CXL_DOWNSTREAM_PORT_DVSEC_OFFSET;
+    cxl_cstate->pdev = d;
+    build_dvsecs(cxl_cstate);
+    cxl_component_register_block_init(OBJECT(d), cxl_cstate, TYPE_CXL_DSP);
+    pci_register_bar(d, CXL_COMPONENT_REG_BAR_IDX,
+                     PCI_BASE_ADDRESS_SPACE_MEMORY |
+                         PCI_BASE_ADDRESS_MEM_TYPE_64,
+                     component_bar);
+
+    return;
+
+ err_chassis:
+    pcie_chassis_del_slot(s);
+ err_pcie_cap:
+    pcie_cap_exit(d);
+ err_msi:
+    msi_uninit(d);
+ err_bridge:
+    pci_bridge_exitfn(d);
+}
+
+static void cxl_dsp_exitfn(PCIDevice *d)
+{
+    PCIESlot *s = PCIE_SLOT(d);
+
+    pcie_aer_exit(d);
+    pcie_chassis_del_slot(s);
+    pcie_cap_exit(d);
+    msi_uninit(d);
+    pci_bridge_exitfn(d);
+}
+
+static void cxl_dsp_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(oc);
+
+    k->is_bridge = true;
+    k->config_write = cxl_dsp_config_write;
+    k->realize = cxl_dsp_realize;
+    k->exit = cxl_dsp_exitfn;
+    k->vendor_id = 0x19e5; /* Huawei */
+    k->device_id = 0xa129; /* Emulated CXL Switch Downstream Port */
+    k->revision = 0;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->desc = "CXL Switch Downstream Port";
+    dc->reset = cxl_dsp_reset;
+}
+
+static const TypeInfo cxl_dsp_info = {
+    .name = TYPE_CXL_DSP,
+    .instance_size = sizeof(CXLDownstreamPort),
+    .parent = TYPE_PCIE_SLOT,
+    .class_init = cxl_dsp_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_PCIE_DEVICE },
+        { INTERFACE_CXL_DEVICE },
+        { }
+    },
+};
+
+static void cxl_dsp_register_type(void)
+{
+    type_register_static(&cxl_dsp_info);
+}
+
+type_init(cxl_dsp_register_type);
diff --git a/hw/pci-bridge/meson.build b/hw/pci-bridge/meson.build
index 6828f0e08d..243ceeda50 100644
--- a/hw/pci-bridge/meson.build
+++ b/hw/pci-bridge/meson.build
@@ -6,7 +6,7 @@ pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pci
 pci_ss.add(when: 'CONFIG_PXB', if_true: files('pci_expander_bridge.c'),
                                if_false: files('pci_expander_bridge_stubs.c'))
 pci_ss.add(when: 'CONFIG_XIO3130', if_true: files('xio3130_upstream.c', 'xio3130_downstream.c'))
-pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c', 'cxl_upstream.c'))
+pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c', 'cxl_upstream.c', 'cxl_downstream.c'))
 
 # NewWorld PowerMac
 pci_ss.add(when: 'CONFIG_DEC_PCI', if_true: files('dec.c'))
-- 
MST


Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Thomas Huth 3 years, 3 months ago
On 16/06/2022 18.57, Michael S. Tsirkin wrote:
> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> Emulation of a simple CXL Switch downstream port.
> The Device ID has been allocated for this use.
> 
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>   hw/cxl/cxl-host.c              |  43 +++++-
>   hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
>   hw/pci-bridge/meson.build      |   2 +-
>   3 files changed, 291 insertions(+), 3 deletions(-)
>   create mode 100644 hw/pci-bridge/cxl_downstream.c

  Hi!

There is a memory problem somewhere in this new device. I can make QEMU 
crash by running something like this:

$ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
     -display none -monitor stdio
QEMU 7.1.50 monitor - type 'help' for more information
(qemu) device_add cxl-downstream
./qemu/qom/object.c:1188:5: runtime error: member access within misaligned 
address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 byte 
alignment
0x3b3b3b3b3b3b3b3b: note: pointer points here
<memory cannot be printed>
Bus error (core dumped)

Could you have a look if you've got some spare minutes?

  Thomas
Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Thomas Huth 3 years, 2 months ago
On 04/11/2022 07.47, Thomas Huth wrote:
> On 16/06/2022 18.57, Michael S. Tsirkin wrote:
>> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>
>> Emulation of a simple CXL Switch downstream port.
>> The Device ID has been allocated for this use.
>>
>> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> ---
>>   hw/cxl/cxl-host.c              |  43 +++++-
>>   hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
>>   hw/pci-bridge/meson.build      |   2 +-
>>   3 files changed, 291 insertions(+), 3 deletions(-)
>>   create mode 100644 hw/pci-bridge/cxl_downstream.c
> 
>   Hi!
> 
> There is a memory problem somewhere in this new device. I can make QEMU 
> crash by running something like this:
> 
> $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
>      -display none -monitor stdio
> QEMU 7.1.50 monitor - type 'help' for more information
> (qemu) device_add cxl-downstream
> ./qemu/qom/object.c:1188:5: runtime error: member access within misaligned 
> address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 byte 
> alignment
> 0x3b3b3b3b3b3b3b3b: note: pointer points here
> <memory cannot be printed>
> Bus error (core dumped)
> 
> Could you have a look if you've got some spare minutes?

Ping! Jonathan, Michael, any news on this bug?

(this breaks one of my local tests, that's why it's annoying for me)

  Thomas


Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Jonathan Cameron via 3 years, 2 months ago
On Sun, 4 Dec 2022 08:23:55 +0100
Thomas Huth <thuth@redhat.com> wrote:

> On 04/11/2022 07.47, Thomas Huth wrote:
> > On 16/06/2022 18.57, Michael S. Tsirkin wrote:  
> >> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >>
> >> Emulation of a simple CXL Switch downstream port.
> >> The Device ID has been allocated for this use.
> >>
> >> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
> >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >> ---
> >>   hw/cxl/cxl-host.c              |  43 +++++-
> >>   hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
> >>   hw/pci-bridge/meson.build      |   2 +-
> >>   3 files changed, 291 insertions(+), 3 deletions(-)
> >>   create mode 100644 hw/pci-bridge/cxl_downstream.c  
> > 
> >   Hi!
> > 
> > There is a memory problem somewhere in this new device. I can make QEMU 
> > crash by running something like this:
> > 
> > $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
> >      -display none -monitor stdio
> > QEMU 7.1.50 monitor - type 'help' for more information
> > (qemu) device_add cxl-downstream
> > ./qemu/qom/object.c:1188:5: runtime error: member access within misaligned 
> > address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 byte 
> > alignment
> > 0x3b3b3b3b3b3b3b3b: note: pointer points here
> > <memory cannot be printed>
> > Bus error (core dumped)
> > 
> > Could you have a look if you've got some spare minutes?  
> 
> Ping! Jonathan, Michael, any news on this bug?
> 
> (this breaks one of my local tests, that's why it's annoying for me)
Sorry, my email filters ate your earlier message.

Looking into this now. I'll note that it also happens on
device_add xio3130-downstream so not specific to this new device.

So far all I've managed to do is track it to something rcu related
as failing in a call to drain_call_rcu() in qmp_device_add()

Will continue digging.

Jonathan


> 
>   Thomas
> 
Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Jonathan Cameron via 3 years, 2 months ago
On Mon, 5 Dec 2022 10:54:03 +0000
Jonathan Cameron via <qemu-devel@nongnu.org> wrote:

> On Sun, 4 Dec 2022 08:23:55 +0100
> Thomas Huth <thuth@redhat.com> wrote:
> 
> > On 04/11/2022 07.47, Thomas Huth wrote:  
> > > On 16/06/2022 18.57, Michael S. Tsirkin wrote:    
> > >> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > >>
> > >> Emulation of a simple CXL Switch downstream port.
> > >> The Device ID has been allocated for this use.
> > >>
> > >> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > >> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
> > >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > >> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > >> ---
> > >>   hw/cxl/cxl-host.c              |  43 +++++-
> > >>   hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
> > >>   hw/pci-bridge/meson.build      |   2 +-
> > >>   3 files changed, 291 insertions(+), 3 deletions(-)
> > >>   create mode 100644 hw/pci-bridge/cxl_downstream.c    
> > > 
> > >   Hi!
> > > 
> > > There is a memory problem somewhere in this new device. I can make QEMU 
> > > crash by running something like this:
> > > 
> > > $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
> > >      -display none -monitor stdio
> > > QEMU 7.1.50 monitor - type 'help' for more information
> > > (qemu) device_add cxl-downstream
> > > ./qemu/qom/object.c:1188:5: runtime error: member access within misaligned 
> > > address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 byte 
> > > alignment
> > > 0x3b3b3b3b3b3b3b3b: note: pointer points here
> > > <memory cannot be printed>
> > > Bus error (core dumped)
> > > 
> > > Could you have a look if you've got some spare minutes?    
> > 
> > Ping! Jonathan, Michael, any news on this bug?
> > 
> > (this breaks one of my local tests, that's why it's annoying for me)  
> Sorry, my email filters ate your earlier message.
> 
> Looking into this now. I'll note that it also happens on
> device_add xio3130-downstream so not specific to this new device.
> 
> So far all I've managed to do is track it to something rcu related
> as failing in a call to drain_call_rcu() in qmp_device_add()
> 
> Will continue digging.

Assuming I'm seeing the same thing...

Problem is g_free() on the PCIBridge windows: 
https://elixir.bootlin.com/qemu/latest/source/hw/pci/pci_bridge.c#L235

Is called before we get an rcu_call() to flatview_destroy() as a
result of the final call of flatview_unref() in address_space_set_flatview()
so we get a use after free.

As to what the fix is...  Suggestions welcome!

> 
> Jonathan
> 
> 
> > 
> >   Thomas
> >   
> 
> 
Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Alex Bennée 3 years, 2 months ago
Jonathan Cameron via <qemu-devel@nongnu.org> writes:

> On Mon, 5 Dec 2022 10:54:03 +0000
> Jonathan Cameron via <qemu-devel@nongnu.org> wrote:
>
>> On Sun, 4 Dec 2022 08:23:55 +0100
>> Thomas Huth <thuth@redhat.com> wrote:
>> 
>> > On 04/11/2022 07.47, Thomas Huth wrote:  
>> > > On 16/06/2022 18.57, Michael S. Tsirkin wrote:    
>> > >> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> > >>
>> > >> Emulation of a simple CXL Switch downstream port.
>> > >> The Device ID has been allocated for this use.
>> > >>
>> > >> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> > >> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
>> > >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> > >> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> > >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> > >> ---
>> > >>   hw/cxl/cxl-host.c              |  43 +++++-
>> > >>   hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
>> > >>   hw/pci-bridge/meson.build      |   2 +-
>> > >>   3 files changed, 291 insertions(+), 3 deletions(-)
>> > >>   create mode 100644 hw/pci-bridge/cxl_downstream.c    
>> > > 
>> > >   Hi!
>> > > 
>> > > There is a memory problem somewhere in this new device. I can make QEMU 
>> > > crash by running something like this:
>> > > 
>> > > $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
>> > >      -display none -monitor stdio
>> > > QEMU 7.1.50 monitor - type 'help' for more information
>> > > (qemu) device_add cxl-downstream
>> > > ./qemu/qom/object.c:1188:5: runtime error: member access within misaligned 
>> > > address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 byte 
>> > > alignment
>> > > 0x3b3b3b3b3b3b3b3b: note: pointer points here
>> > > <memory cannot be printed>
>> > > Bus error (core dumped)
>> > > 
>> > > Could you have a look if you've got some spare minutes?    
>> > 
>> > Ping! Jonathan, Michael, any news on this bug?
>> > 
>> > (this breaks one of my local tests, that's why it's annoying for me)  
>> Sorry, my email filters ate your earlier message.
>> 
>> Looking into this now. I'll note that it also happens on
>> device_add xio3130-downstream so not specific to this new device.
>> 
>> So far all I've managed to do is track it to something rcu related
>> as failing in a call to drain_call_rcu() in qmp_device_add()
>> 
>> Will continue digging.
>
> Assuming I'm seeing the same thing...
>
> Problem is g_free() on the PCIBridge windows: 
> https://elixir.bootlin.com/qemu/latest/source/hw/pci/pci_bridge.c#L235
>
> Is called before we get an rcu_call() to flatview_destroy() as a
> result of the final call of flatview_unref() in address_space_set_flatview()
> so we get a use after free.
>
> As to what the fix is...  Suggestions welcome!

It sounds like this is the wrong place to free the value then. I guess
the PCI aliases into &w->alias_io() don't get dealt with until RCU
clean-up time.

I *think* using g_free_rcu() should be enough to ensure the free occurs
after the rest of the RCU cleanups but maybe you should only be cleaning
up the windows at device unrealize time? Is this a dynamic piece of
memory which gets updated during the lifetime of the device?

If the memory is being cleared with RCU then the access to the base
pointer should be done with the appropriate qatomic_rcu_[set|read]
functions.

-- 
Alex Bennée
Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Jonathan Cameron via 3 years, 2 months ago
On Mon, 05 Dec 2022 14:59:39 +0000
Alex Bennée <alex.bennee@linaro.org> wrote:

> Jonathan Cameron via <qemu-devel@nongnu.org> writes:
> 
> > On Mon, 5 Dec 2022 10:54:03 +0000
> > Jonathan Cameron via <qemu-devel@nongnu.org> wrote:
> >  
> >> On Sun, 4 Dec 2022 08:23:55 +0100
> >> Thomas Huth <thuth@redhat.com> wrote:
> >>   
> >> > On 04/11/2022 07.47, Thomas Huth wrote:    
> >> > > On 16/06/2022 18.57, Michael S. Tsirkin wrote:      
> >> > >> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >> > >>
> >> > >> Emulation of a simple CXL Switch downstream port.
> >> > >> The Device ID has been allocated for this use.
> >> > >>
> >> > >> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >> > >> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
> >> > >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >> > >> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> >> > >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >> > >> ---
> >> > >>   hw/cxl/cxl-host.c              |  43 +++++-
> >> > >>   hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
> >> > >>   hw/pci-bridge/meson.build      |   2 +-
> >> > >>   3 files changed, 291 insertions(+), 3 deletions(-)
> >> > >>   create mode 100644 hw/pci-bridge/cxl_downstream.c      
> >> > > 
> >> > >   Hi!
> >> > > 
> >> > > There is a memory problem somewhere in this new device. I can make QEMU 
> >> > > crash by running something like this:
> >> > > 
> >> > > $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
> >> > >      -display none -monitor stdio
> >> > > QEMU 7.1.50 monitor - type 'help' for more information
> >> > > (qemu) device_add cxl-downstream
> >> > > ./qemu/qom/object.c:1188:5: runtime error: member access within misaligned 
> >> > > address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 byte 
> >> > > alignment
> >> > > 0x3b3b3b3b3b3b3b3b: note: pointer points here
> >> > > <memory cannot be printed>
> >> > > Bus error (core dumped)
> >> > > 
> >> > > Could you have a look if you've got some spare minutes?      
> >> > 
> >> > Ping! Jonathan, Michael, any news on this bug?
> >> > 
> >> > (this breaks one of my local tests, that's why it's annoying for me)    
> >> Sorry, my email filters ate your earlier message.
> >> 
> >> Looking into this now. I'll note that it also happens on
> >> device_add xio3130-downstream so not specific to this new device.
> >> 
> >> So far all I've managed to do is track it to something rcu related
> >> as failing in a call to drain_call_rcu() in qmp_device_add()
> >> 
> >> Will continue digging.  
> >
> > Assuming I'm seeing the same thing...
> >
> > Problem is g_free() on the PCIBridge windows: 
> > https://elixir.bootlin.com/qemu/latest/source/hw/pci/pci_bridge.c#L235
> >
> > Is called before we get an rcu_call() to flatview_destroy() as a
> > result of the final call of flatview_unref() in address_space_set_flatview()
> > so we get a use after free.
> >
> > As to what the fix is...  Suggestions welcome!  
> 
> It sounds like this is the wrong place to free the value then. I guess
> the PCI aliases into &w->alias_io() don't get dealt with until RCU
> clean-up time.
> 
> I *think* using g_free_rcu() should be enough to ensure the free occurs
> after the rest of the RCU cleanups but maybe you should only be cleaning
> up the windows at device unrealize time? Is this a dynamic piece of
> memory which gets updated during the lifetime of the device?

There is unfortunately code that swaps it for an updated structure
in pci_bridge_update_mappings()

> 
> If the memory is being cleared with RCU then the access to the base
> pointer should be done with the appropriate qatomic_rcu_[set|read]
> functions.
>

I'm annoyingly snowed under this week with other things, but hopefully
can get to in a few days.  Note we are looking at an old problem
here, just one that's happening for an additional device, not sure
if that really affects urgency of fix though.

Jonathan
 
Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Thomas Huth 3 years, 2 months ago
On 07/12/2022 14.21, Jonathan Cameron wrote:
> On Mon, 05 Dec 2022 14:59:39 +0000
> Alex Bennée <alex.bennee@linaro.org> wrote:
> 
>> Jonathan Cameron via <qemu-devel@nongnu.org> writes:
>>
>>> On Mon, 5 Dec 2022 10:54:03 +0000
>>> Jonathan Cameron via <qemu-devel@nongnu.org> wrote:
>>>   
>>>> On Sun, 4 Dec 2022 08:23:55 +0100
>>>> Thomas Huth <thuth@redhat.com> wrote:
>>>>    
>>>>> On 04/11/2022 07.47, Thomas Huth wrote:
>>>>>> On 16/06/2022 18.57, Michael S. Tsirkin wrote:
>>>>>>> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>>>>>
>>>>>>> Emulation of a simple CXL Switch downstream port.
>>>>>>> The Device ID has been allocated for this use.
>>>>>>>
>>>>>>> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>>>>> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
>>>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>>> ---
>>>>>>>    hw/cxl/cxl-host.c              |  43 +++++-
>>>>>>>    hw/pci-bridge/cxl_downstream.c | 249 +++++++++++++++++++++++++++++++++
>>>>>>>    hw/pci-bridge/meson.build      |   2 +-
>>>>>>>    3 files changed, 291 insertions(+), 3 deletions(-)
>>>>>>>    create mode 100644 hw/pci-bridge/cxl_downstream.c
>>>>>>
>>>>>>    Hi!
>>>>>>
>>>>>> There is a memory problem somewhere in this new device. I can make QEMU
>>>>>> crash by running something like this:
>>>>>>
>>>>>> $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
>>>>>>       -display none -monitor stdio
>>>>>> QEMU 7.1.50 monitor - type 'help' for more information
>>>>>> (qemu) device_add cxl-downstream
>>>>>> ./qemu/qom/object.c:1188:5: runtime error: member access within misaligned
>>>>>> address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 byte
>>>>>> alignment
>>>>>> 0x3b3b3b3b3b3b3b3b: note: pointer points here
>>>>>> <memory cannot be printed>
>>>>>> Bus error (core dumped)
>>>>>>
>>>>>> Could you have a look if you've got some spare minutes?
>>>>>
>>>>> Ping! Jonathan, Michael, any news on this bug?
>>>>>
>>>>> (this breaks one of my local tests, that's why it's annoying for me)
>>>> Sorry, my email filters ate your earlier message.
>>>>
>>>> Looking into this now. I'll note that it also happens on
>>>> device_add xio3130-downstream so not specific to this new device.
>>>>
>>>> So far all I've managed to do is track it to something rcu related
>>>> as failing in a call to drain_call_rcu() in qmp_device_add()
>>>>
>>>> Will continue digging.
>>>
>>> Assuming I'm seeing the same thing...
>>>
>>> Problem is g_free() on the PCIBridge windows:
>>> https://elixir.bootlin.com/qemu/latest/source/hw/pci/pci_bridge.c#L235
>>>
>>> Is called before we get an rcu_call() to flatview_destroy() as a
>>> result of the final call of flatview_unref() in address_space_set_flatview()
>>> so we get a use after free.
>>>
>>> As to what the fix is...  Suggestions welcome!
>>
>> It sounds like this is the wrong place to free the value then. I guess
>> the PCI aliases into &w->alias_io() don't get dealt with until RCU
>> clean-up time.
>>
>> I *think* using g_free_rcu() should be enough to ensure the free occurs
>> after the rest of the RCU cleanups but maybe you should only be cleaning
>> up the windows at device unrealize time? Is this a dynamic piece of
>> memory which gets updated during the lifetime of the device?
> 
> There is unfortunately code that swaps it for an updated structure
> in pci_bridge_update_mappings()
> 
>>
>> If the memory is being cleared with RCU then the access to the base
>> pointer should be done with the appropriate qatomic_rcu_[set|read]
>> functions.
>>
> 
> I'm annoyingly snowed under this week with other things, but hopefully
> can get to in a few days.  Note we are looking at an old problem
> here, just one that's happening for an additional device, not sure
> if that really affects urgency of fix though.

It's too late now for QEMU 7.2 anyway, so there is no hurry, I think.

  Thomas


Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Thomas Huth 2 years, 10 months ago
On 07/12/2022 14.26, Thomas Huth wrote:
> On 07/12/2022 14.21, Jonathan Cameron wrote:
>> On Mon, 05 Dec 2022 14:59:39 +0000
>> Alex Bennée <alex.bennee@linaro.org> wrote:
>>
>>> Jonathan Cameron via <qemu-devel@nongnu.org> writes:
>>>
>>>> On Mon, 5 Dec 2022 10:54:03 +0000
>>>> Jonathan Cameron via <qemu-devel@nongnu.org> wrote:
>>>>> On Sun, 4 Dec 2022 08:23:55 +0100
>>>>> Thomas Huth <thuth@redhat.com> wrote:
>>>>>> On 04/11/2022 07.47, Thomas Huth wrote:
>>>>>>> On 16/06/2022 18.57, Michael S. Tsirkin wrote:
>>>>>>>> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>>>>>>
>>>>>>>> Emulation of a simple CXL Switch downstream port.
>>>>>>>> The Device ID has been allocated for this use.
>>>>>>>>
>>>>>>>> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>>>>>>> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
>>>>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>>>> ---
>>>>>>>>    hw/cxl/cxl-host.c              |  43 +++++-
>>>>>>>>    hw/pci-bridge/cxl_downstream.c | 249 
>>>>>>>> +++++++++++++++++++++++++++++++++
>>>>>>>>    hw/pci-bridge/meson.build      |   2 +-
>>>>>>>>    3 files changed, 291 insertions(+), 3 deletions(-)
>>>>>>>>    create mode 100644 hw/pci-bridge/cxl_downstream.c
>>>>>>>
>>>>>>>    Hi!
>>>>>>>
>>>>>>> There is a memory problem somewhere in this new device. I can make QEMU
>>>>>>> crash by running something like this:
>>>>>>>
>>>>>>> $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
>>>>>>>       -display none -monitor stdio
>>>>>>> QEMU 7.1.50 monitor - type 'help' for more information
>>>>>>> (qemu) device_add cxl-downstream
>>>>>>> ./qemu/qom/object.c:1188:5: runtime error: member access within 
>>>>>>> misaligned
>>>>>>> address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 
>>>>>>> byte
>>>>>>> alignment
>>>>>>> 0x3b3b3b3b3b3b3b3b: note: pointer points here
>>>>>>> <memory cannot be printed>
>>>>>>> Bus error (core dumped)
>>>>>>>
>>>>>>> Could you have a look if you've got some spare minutes?
>>>>>>
>>>>>> Ping! Jonathan, Michael, any news on this bug?
>>>>>>
>>>>>> (this breaks one of my local tests, that's why it's annoying for me)
>>>>> Sorry, my email filters ate your earlier message.
>>>>>
>>>>> Looking into this now. I'll note that it also happens on
>>>>> device_add xio3130-downstream so not specific to this new device.
>>>>>
>>>>> So far all I've managed to do is track it to something rcu related
>>>>> as failing in a call to drain_call_rcu() in qmp_device_add()
>>>>>
>>>>> Will continue digging.
>>>>
>>>> Assuming I'm seeing the same thing...
>>>>
>>>> Problem is g_free() on the PCIBridge windows:
>>>> https://elixir.bootlin.com/qemu/latest/source/hw/pci/pci_bridge.c#L235
>>>>
>>>> Is called before we get an rcu_call() to flatview_destroy() as a
>>>> result of the final call of flatview_unref() in 
>>>> address_space_set_flatview()
>>>> so we get a use after free.
>>>>
>>>> As to what the fix is...  Suggestions welcome!
>>>
>>> It sounds like this is the wrong place to free the value then. I guess
>>> the PCI aliases into &w->alias_io() don't get dealt with until RCU
>>> clean-up time.
>>>
>>> I *think* using g_free_rcu() should be enough to ensure the free occurs
>>> after the rest of the RCU cleanups but maybe you should only be cleaning
>>> up the windows at device unrealize time? Is this a dynamic piece of
>>> memory which gets updated during the lifetime of the device?
>>
>> There is unfortunately code that swaps it for an updated structure
>> in pci_bridge_update_mappings()
>>
>>>
>>> If the memory is being cleared with RCU then the access to the base
>>> pointer should be done with the appropriate qatomic_rcu_[set|read]
>>> functions.
>>>
>>
>> I'm annoyingly snowed under this week with other things, but hopefully
>> can get to in a few days.  Note we are looking at an old problem
>> here, just one that's happening for an additional device, not sure
>> if that really affects urgency of fix though.
> 
> It's too late now for QEMU 7.2 anyway, so there is no hurry, I think.

I'm still seeing problems with this device, I guess it's still the
same issue:

$ valgrind -q ./qemu-system-x86_64 -M x-remote -monitor stdio
...
QEMU 7.2.91 monitor - type 'help' for more information
(qemu) device_add cxl-downstream,id=c1
==46154== Thread 2:
==46154== Invalid read of size 8
==46154==    at 0x7A0A0B: memory_region_unref (memory.c:1798)
==46154==    by 0x7A0A0B: flatview_destroy (memory.c:298)
==46154==    by 0x9A5E32: call_rcu_thread (rcu.c:284)
==46154==    by 0x99CC19: qemu_thread_start (qemu-thread-posix.c:541)
==46154==    by 0xB6A31C9: start_thread (in /usr/lib64/libpthread-2.28.so)
==46154==    by 0xB8F4E72: clone (in /usr/lib64/libc-2.28.so)
==46154==  Address 0x1a2a95c0 is 64 bytes inside a block of size 1,632 free'd
==46154==    at 0x4C39A93: free (vg_replace_malloc.c:872)
==46154==    by 0x79B62B1: g_free (in /usr/lib64/libglib-2.0.so.0.5600.4)
==46154==    by 0x55E06F: cxl_dsp_realize (cxl_downstream.c:201)
==46154==    by 0x5523AC: pci_qdev_realize (pci.c:2098)
==46154==    by 0x833A1E: device_set_realized (qdev.c:510)
==46154==    by 0x836DC5: property_set_bool (object.c:2285)
==46154==    by 0x838FA2: object_property_set (object.c:1420)
==46154==    by 0x83C01E: object_property_set_qobject (qom-qobject.c:28)
==46154==    by 0x839213: object_property_set_bool (object.c:1489)
==46154==    by 0x5F9787: qdev_device_add_from_qdict (qdev-monitor.c:714)
==46154==    by 0x5F98F0: qdev_device_add (qdev-monitor.c:733)
==46154==    by 0x5F99E1: qmp_device_add (qdev-monitor.c:855)
==46154==  Block was alloc'd at
==46154==    at 0x4C37135: malloc (vg_replace_malloc.c:381)
==46154==    by 0x79B61A5: g_malloc (in /usr/lib64/libglib-2.0.so.0.5600.4)
==46154==    by 0x553072: pci_bridge_region_init (pci_bridge.c:191)
==46154==    by 0x553575: pci_bridge_initfn (pci_bridge.c:388)
==46154==    by 0x55E032: cxl_dsp_realize (cxl_downstream.c:147)
==46154==    by 0x5523AC: pci_qdev_realize (pci.c:2098)
==46154==    by 0x833A1E: device_set_realized (qdev.c:510)
==46154==    by 0x836DC5: property_set_bool (object.c:2285)
==46154==    by 0x838FA2: object_property_set (object.c:1420)
==46154==    by 0x83C01E: object_property_set_qobject (qom-qobject.c:28)
==46154==    by 0x839213: object_property_set_bool (object.c:1489)
==46154==    by 0x5F9787: qdev_device_add_from_qdict (qdev-monitor.c:714)

Could we get a fix for QEMU 8.0 ?

  Thomas


Re: [PULL 02/10] pci-bridge/cxl_downstream: Add a CXL switch downstream port
Posted by Jonathan Cameron via 2 years, 10 months ago
On Fri, 24 Mar 2023 11:17:50 +0100
Thomas Huth <thuth@redhat.com> wrote:

> On 07/12/2022 14.26, Thomas Huth wrote:
> > On 07/12/2022 14.21, Jonathan Cameron wrote:  
> >> On Mon, 05 Dec 2022 14:59:39 +0000
> >> Alex Bennée <alex.bennee@linaro.org> wrote:
> >>  
> >>> Jonathan Cameron via <qemu-devel@nongnu.org> writes:
> >>>  
> >>>> On Mon, 5 Dec 2022 10:54:03 +0000
> >>>> Jonathan Cameron via <qemu-devel@nongnu.org> wrote:  
> >>>>> On Sun, 4 Dec 2022 08:23:55 +0100
> >>>>> Thomas Huth <thuth@redhat.com> wrote:  
> >>>>>> On 04/11/2022 07.47, Thomas Huth wrote:  
> >>>>>>> On 16/06/2022 18.57, Michael S. Tsirkin wrote:  
> >>>>>>>> From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >>>>>>>>
> >>>>>>>> Emulation of a simple CXL Switch downstream port.
> >>>>>>>> The Device ID has been allocated for this use.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >>>>>>>> Message-Id: <20220616145126.8002-3-Jonathan.Cameron@huawei.com>
> >>>>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >>>>>>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> >>>>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >>>>>>>> ---
> >>>>>>>>    hw/cxl/cxl-host.c              |  43 +++++-
> >>>>>>>>    hw/pci-bridge/cxl_downstream.c | 249 
> >>>>>>>> +++++++++++++++++++++++++++++++++
> >>>>>>>>    hw/pci-bridge/meson.build      |   2 +-
> >>>>>>>>    3 files changed, 291 insertions(+), 3 deletions(-)
> >>>>>>>>    create mode 100644 hw/pci-bridge/cxl_downstream.c  
> >>>>>>>
> >>>>>>>    Hi!
> >>>>>>>
> >>>>>>> There is a memory problem somewhere in this new device. I can make QEMU
> >>>>>>> crash by running something like this:
> >>>>>>>
> >>>>>>> $ MALLOC_PERTURB_=59 ./qemu-system-x86_64 -M x-remote \
> >>>>>>>       -display none -monitor stdio
> >>>>>>> QEMU 7.1.50 monitor - type 'help' for more information
> >>>>>>> (qemu) device_add cxl-downstream
> >>>>>>> ./qemu/qom/object.c:1188:5: runtime error: member access within 
> >>>>>>> misaligned
> >>>>>>> address 0x3b3b3b3b3b3b3b3b for type 'struct Object', which requires 8 
> >>>>>>> byte
> >>>>>>> alignment
> >>>>>>> 0x3b3b3b3b3b3b3b3b: note: pointer points here
> >>>>>>> <memory cannot be printed>
> >>>>>>> Bus error (core dumped)
> >>>>>>>
> >>>>>>> Could you have a look if you've got some spare minutes?  
> >>>>>>
> >>>>>> Ping! Jonathan, Michael, any news on this bug?
> >>>>>>
> >>>>>> (this breaks one of my local tests, that's why it's annoying for me)  
> >>>>> Sorry, my email filters ate your earlier message.
> >>>>>
> >>>>> Looking into this now. I'll note that it also happens on
> >>>>> device_add xio3130-downstream so not specific to this new device.
> >>>>>
> >>>>> So far all I've managed to do is track it to something rcu related
> >>>>> as failing in a call to drain_call_rcu() in qmp_device_add()
> >>>>>
> >>>>> Will continue digging.  
> >>>>
> >>>> Assuming I'm seeing the same thing...
> >>>>
> >>>> Problem is g_free() on the PCIBridge windows:
> >>>> https://elixir.bootlin.com/qemu/latest/source/hw/pci/pci_bridge.c#L235
> >>>>
> >>>> Is called before we get an rcu_call() to flatview_destroy() as a
> >>>> result of the final call of flatview_unref() in 
> >>>> address_space_set_flatview()
> >>>> so we get a use after free.
> >>>>
> >>>> As to what the fix is...  Suggestions welcome!  
> >>>
> >>> It sounds like this is the wrong place to free the value then. I guess
> >>> the PCI aliases into &w->alias_io() don't get dealt with until RCU
> >>> clean-up time.
> >>>
> >>> I *think* using g_free_rcu() should be enough to ensure the free occurs
> >>> after the rest of the RCU cleanups but maybe you should only be cleaning
> >>> up the windows at device unrealize time? Is this a dynamic piece of
> >>> memory which gets updated during the lifetime of the device?  
> >>
> >> There is unfortunately code that swaps it for an updated structure
> >> in pci_bridge_update_mappings()
> >>  
> >>>
> >>> If the memory is being cleared with RCU then the access to the base
> >>> pointer should be done with the appropriate qatomic_rcu_[set|read]
> >>> functions.
> >>>  
> >>
> >> I'm annoyingly snowed under this week with other things, but hopefully
> >> can get to in a few days.  Note we are looking at an old problem
> >> here, just one that's happening for an additional device, not sure
> >> if that really affects urgency of fix though.  
> > 
> > It's too late now for QEMU 7.2 anyway, so there is no hurry, I think.  
> 
> I'm still seeing problems with this device, I guess it's still the
> same issue:
> 
> $ valgrind -q ./qemu-system-x86_64 -M x-remote -monitor stdio
> ...
> QEMU 7.2.91 monitor - type 'help' for more information
> (qemu) device_add cxl-downstream,id=c1
> ==46154== Thread 2:
> ==46154== Invalid read of size 8
> ==46154==    at 0x7A0A0B: memory_region_unref (memory.c:1798)
> ==46154==    by 0x7A0A0B: flatview_destroy (memory.c:298)
> ==46154==    by 0x9A5E32: call_rcu_thread (rcu.c:284)
> ==46154==    by 0x99CC19: qemu_thread_start (qemu-thread-posix.c:541)
> ==46154==    by 0xB6A31C9: start_thread (in /usr/lib64/libpthread-2.28.so)
> ==46154==    by 0xB8F4E72: clone (in /usr/lib64/libc-2.28.so)
> ==46154==  Address 0x1a2a95c0 is 64 bytes inside a block of size 1,632 free'd
> ==46154==    at 0x4C39A93: free (vg_replace_malloc.c:872)
> ==46154==    by 0x79B62B1: g_free (in /usr/lib64/libglib-2.0.so.0.5600.4)
> ==46154==    by 0x55E06F: cxl_dsp_realize (cxl_downstream.c:201)
> ==46154==    by 0x5523AC: pci_qdev_realize (pci.c:2098)
> ==46154==    by 0x833A1E: device_set_realized (qdev.c:510)
> ==46154==    by 0x836DC5: property_set_bool (object.c:2285)
> ==46154==    by 0x838FA2: object_property_set (object.c:1420)
> ==46154==    by 0x83C01E: object_property_set_qobject (qom-qobject.c:28)
> ==46154==    by 0x839213: object_property_set_bool (object.c:1489)
> ==46154==    by 0x5F9787: qdev_device_add_from_qdict (qdev-monitor.c:714)
> ==46154==    by 0x5F98F0: qdev_device_add (qdev-monitor.c:733)
> ==46154==    by 0x5F99E1: qmp_device_add (qdev-monitor.c:855)
> ==46154==  Block was alloc'd at
> ==46154==    at 0x4C37135: malloc (vg_replace_malloc.c:381)
> ==46154==    by 0x79B61A5: g_malloc (in /usr/lib64/libglib-2.0.so.0.5600.4)
> ==46154==    by 0x553072: pci_bridge_region_init (pci_bridge.c:191)
> ==46154==    by 0x553575: pci_bridge_initfn (pci_bridge.c:388)
> ==46154==    by 0x55E032: cxl_dsp_realize (cxl_downstream.c:147)
> ==46154==    by 0x5523AC: pci_qdev_realize (pci.c:2098)
> ==46154==    by 0x833A1E: device_set_realized (qdev.c:510)
> ==46154==    by 0x836DC5: property_set_bool (object.c:2285)
> ==46154==    by 0x838FA2: object_property_set (object.c:1420)
> ==46154==    by 0x83C01E: object_property_set_qobject (qom-qobject.c:28)
> ==46154==    by 0x839213: object_property_set_bool (object.c:1489)
> ==46154==    by 0x5F9787: qdev_device_add_from_qdict (qdev-monitor.c:714)
> 
> Could we get a fix for QEMU 8.0 ?

The original option of moving over to g_free_rcu() and need to then protect
all accesses to br->windows was make a mess of things and as far as I can
immediately spot seems to be unnecessary.

Unfortunately I don't understand why the window handling is complex in the
first place.  The following patch just embeds the structure
directly in the PCI Bridge and seems to avoid the issue you have reported.

As the code to update it on the fly is protected anyway by
memory_region_transaction_begin() I don't think we care about ensuring the
exposed windows are consistent whilst we update them.

If someone else could sanity check my logic that would be great.

Thanks,

Jonathan


diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index dd5af508f9..698fd01ae6 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -184,11 +184,11 @@ static void pci_bridge_init_vga_aliases(PCIBridge *br, PCIBus *parent,
     }
 }
 
-static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
+static void pci_bridge_region_init(PCIBridge *br)
 {
     PCIDevice *pd = PCI_DEVICE(br);
     PCIBus *parent = pci_get_bus(pd);
-    PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1);
+    PCIBridgeWindows *w = &br->windows;
     uint16_t cmd = pci_get_word(pd->config + PCI_COMMAND);
 
     pci_bridge_init_alias(br, &w->alias_pref_mem,
@@ -211,8 +211,6 @@ static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
                           cmd & PCI_COMMAND_IO);
 
     pci_bridge_init_vga_aliases(br, parent, w->alias_vga);
-
-    return w;
 }
 
 static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w)
@@ -234,19 +232,17 @@ static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w)
     object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_LO]));
     object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_HI]));
     object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_MEM]));
-    g_free(w);
 }
 
 void pci_bridge_update_mappings(PCIBridge *br)
 {
-    PCIBridgeWindows *w = br->windows;
-
+    PCIBridgeWindows *w = &br->windows;
     /* Make updates atomic to: handle the case of one VCPU updating the bridge
      * while another accesses an unaffected region. */
     memory_region_transaction_begin();
-    pci_bridge_region_del(br, br->windows);
+    pci_bridge_region_del(br, w);
     pci_bridge_region_cleanup(br, w);
-    br->windows = pci_bridge_region_init(br);
+    pci_bridge_region_init(br);
     memory_region_transaction_commit();
 }
 
@@ -385,7 +381,7 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename)
     sec_bus->address_space_io = &br->address_space_io;
     memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io",
                        4 * GiB);
-    br->windows = pci_bridge_region_init(br);
+    pci_bridge_region_init(br);
     QLIST_INIT(&sec_bus->child);
     QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
 }
@@ -396,8 +392,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
     PCIBridge *s = PCI_BRIDGE(pci_dev);
     assert(QLIST_EMPTY(&s->sec_bus.child));
     QLIST_REMOVE(&s->sec_bus, sibling);
-    pci_bridge_region_del(s, s->windows);
-    pci_bridge_region_cleanup(s, s->windows);
+    pci_bridge_region_del(s, &s->windows);
+    pci_bridge_region_cleanup(s, &s->windows);
     /* object_unparent() is called automatically during device deletion */
 }
 
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 81a058bb2c..b650748a39 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -30,6 +30,7 @@
 #include "hw/pci/pci_bus.h"
 #include "hw/cxl/cxl.h"
 #include "qom/object.h"
+#include "qemu/rcu.h"
 
 typedef struct PCIBridgeWindows PCIBridgeWindows;
 
@@ -39,8 +40,11 @@ typedef struct PCIBridgeWindows PCIBridgeWindows;
  * as subregions.
  */
 struct PCIBridgeWindows {
+//    struct rcu_head rcu;
     MemoryRegion alias_pref_mem;
+    //   uint8_t pad;
     MemoryRegion alias_mem;
+    // uint8_t pad2;
     MemoryRegion alias_io;
     /*
      * When bridge control VGA forwarding is enabled, bridges will
@@ -73,7 +77,7 @@ struct PCIBridge {
     MemoryRegion address_space_mem;
     MemoryRegion address_space_io;
 
-    PCIBridgeWindows *windows;
+    PCIBridgeWindows windows;
 
     pci_map_irq_fn map_irq;
     const char *bus_name;

> 
>   Thomas
>