From: Zhi Wang <zhiwang@kernel.org>
Introduce a CXL type-2 device emulation that provides a minimum base for
testing kernel CXL core type-2 support and CXL type-2 virtualization. It
is also a good base for introducing the more emulated features.
Currently, it only supports:
- Emulating component registers with HDM decoders.
- Volatile memory backend and emualtion of region access.
The emulation is aimed to not tightly coupled with the current CXL type-3
emulation since many advanced CXL type-3 emulation features are not
implemented in a CXL type-2 device.
Co-developed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Zhi Wang <zhiwang@kernel.org>
---
MAINTAINERS | 1 +
docs/system/devices/cxl.rst | 11 ++
hw/cxl/cxl-component-utils.c | 2 +
hw/cxl/cxl-host.c | 19 +-
hw/mem/Kconfig | 5 +
hw/mem/cxl_accel.c | 319 +++++++++++++++++++++++++++++++++
hw/mem/meson.build | 1 +
include/hw/cxl/cxl_component.h | 1 +
include/hw/cxl/cxl_device.h | 25 +++
include/hw/pci/pci_ids.h | 1 +
10 files changed, 382 insertions(+), 3 deletions(-)
create mode 100644 hw/mem/cxl_accel.c
diff --git a/MAINTAINERS b/MAINTAINERS
index aaf0505a21..72a6a505eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2914,6 +2914,7 @@ R: Fan Ni <fan.ni@samsung.com>
S: Supported
F: hw/cxl/
F: hw/mem/cxl_type3.c
+F: hw/mem/cxl_accel.c
F: include/hw/cxl/
F: qapi/cxl.json
diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index 882b036f5e..13cc2417f2 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -332,6 +332,17 @@ The same volatile setup may optionally include an LSA region::
-device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \
-M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
+A very simple setup with just one directly attached CXL Type 2 Volatile Memory
+Accelerator device::
+
+ qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
+ ...
+ -object memory-backend-ram,id=vmem0,share=on,size=256M \
+ -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
+ -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
+ -device cxl-accel,bus=root_port13,volatile-memdev=vmem0,id=cxl-accel0 \
+ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
+
A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way
interleave across 2 CXL host bridges. Each host bridge has 2 CXL Root Ports, with
the CXL Type3 device directly attached (no switches).::
diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index 355103d165..717ef117ac 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -262,6 +262,7 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff;
if (type == CXL2_DEVICE ||
type == CXL2_TYPE3_DEVICE ||
+ type == CXL3_TYPE2_DEVICE ||
type == CXL2_LOGICAL_DEVICE) {
write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] =
0xf0000000;
@@ -293,6 +294,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
case CXL2_UPSTREAM_PORT:
case CXL2_TYPE3_DEVICE:
case CXL2_LOGICAL_DEVICE:
+ case CXL3_TYPE2_DEVICE:
/* + HDM */
caps = 3;
break;
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index e9f2543c43..e603a3f2fc 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -201,7 +201,8 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
return NULL;
}
- if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+ if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3) ||
+ object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) {
return d;
}
@@ -256,7 +257,13 @@ static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data,
return MEMTX_ERROR;
}
- return cxl_type3_read(d, addr + fw->base, data, size, attrs);
+ if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+ return cxl_type3_read(d, addr + fw->base, data, size, attrs);
+ } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) {
+ return cxl_accel_read(d, addr + fw->base, data, size, attrs);
+ }
+
+ return MEMTX_ERROR;
}
static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr,
@@ -272,7 +279,13 @@ static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr,
return MEMTX_OK;
}
- return cxl_type3_write(d, addr + fw->base, data, size, attrs);
+ if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+ return cxl_type3_write(d, addr + fw->base, data, size, attrs);
+ } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) {
+ return cxl_accel_write(d, addr + fw->base, data, size, attrs);
+ }
+
+ return MEMTX_ERROR;
}
const MemoryRegionOps cfmws_ops = {
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig
index 73c5ae8ad9..1f7d08c17d 100644
--- a/hw/mem/Kconfig
+++ b/hw/mem/Kconfig
@@ -16,3 +16,8 @@ config CXL_MEM_DEVICE
bool
default y if CXL
select MEM_DEVICE
+
+config CXL_ACCEL_DEVICE
+ bool
+ default y if CXL
+ select MEM_DEVICE
diff --git a/hw/mem/cxl_accel.c b/hw/mem/cxl_accel.c
new file mode 100644
index 0000000000..770072126d
--- /dev/null
+++ b/hw/mem/cxl_accel.c
@@ -0,0 +1,319 @@
+/*
+ * CXL accel (type-2) device
+ *
+ * Copyright(C) 2024 NVIDIA Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-v2-only
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "hw/mem/memory-device.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/range.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "hw/cxl/cxl.h"
+#include "hw/pci/msix.h"
+
+static void update_dvsecs(CXLAccelDev *acceld)
+{
+ CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+ uint8_t *dvsec;
+ uint32_t range1_size_hi = 0, range1_size_lo = 0,
+ range1_base_hi = 0, range1_base_lo = 0;
+
+ if (acceld->hostvmem) {
+ range1_size_hi = acceld->hostvmem->size >> 32;
+ range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+ (acceld->hostvmem->size & 0xF0000000);
+ }
+
+ dvsec = (uint8_t *)&(CXLDVSECDevice){
+ .cap = 0x1e,
+ .ctrl = 0x2,
+ .status2 = 0x2,
+ .range1_size_hi = range1_size_hi,
+ .range1_size_lo = range1_size_lo,
+ .range1_base_hi = range1_base_hi,
+ .range1_base_lo = range1_base_lo,
+ };
+ cxl_component_update_dvsec(cxl_cstate, PCIE_CXL_DEVICE_DVSEC_LENGTH,
+ PCIE_CXL_DEVICE_DVSEC, dvsec);
+
+ dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
+ .rsvd = 0,
+ .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
+ .reg0_base_hi = 0,
+ };
+ cxl_component_update_dvsec(cxl_cstate, REG_LOC_DVSEC_LENGTH,
+ REG_LOC_DVSEC, dvsec);
+
+ dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
+ .cap = 0x26, /* 68B, IO, Mem, non-MLD */
+ .ctrl = 0x02, /* IO always enabled */
+ .status = 0x26, /* same as capabilities */
+ .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
+ };
+ cxl_component_update_dvsec(cxl_cstate, PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
+ PCIE_FLEXBUS_PORT_DVSEC, dvsec);
+}
+
+static void build_dvsecs(CXLAccelDev *acceld)
+{
+ CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+
+ cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE,
+ PCIE_CXL_DEVICE_DVSEC_LENGTH,
+ PCIE_CXL_DEVICE_DVSEC,
+ PCIE_CXL31_DEVICE_DVSEC_REVID, NULL);
+
+ cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE,
+ REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
+ REG_LOC_DVSEC_REVID, NULL);
+
+ cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE,
+ PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
+ PCIE_FLEXBUS_PORT_DVSEC,
+ PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, NULL);
+ update_dvsecs(acceld);
+}
+
+static bool cxl_accel_dpa(CXLAccelDev *acceld, hwaddr host_addr, uint64_t *dpa)
+{
+ return cxl_host_addr_to_dpa(&acceld->cxl_cstate, host_addr, dpa);
+}
+
+static int cxl_accel_hpa_to_as_and_dpa(CXLAccelDev *acceld,
+ hwaddr host_addr,
+ unsigned int size,
+ AddressSpace **as,
+ uint64_t *dpa_offset)
+{
+ MemoryRegion *vmr = NULL;
+ uint64_t vmr_size = 0;
+
+ if (!acceld->hostvmem) {
+ return -ENODEV;
+ }
+
+ vmr = host_memory_backend_get_memory(acceld->hostvmem);
+ if (!vmr) {
+ return -ENODEV;
+ }
+
+ vmr_size = memory_region_size(vmr);
+
+ if (!cxl_accel_dpa(acceld, host_addr, dpa_offset)) {
+ return -EINVAL;
+ }
+
+ if (*dpa_offset >= vmr_size) {
+ return -EINVAL;
+ }
+
+ *as = &acceld->hostvmem_as;
+ return 0;
+}
+
+MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+ unsigned size, MemTxAttrs attrs)
+{
+ CXLAccelDev *acceld = CXL_ACCEL(d);
+ uint64_t dpa_offset = 0;
+ AddressSpace *as = NULL;
+ int res;
+
+ res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size,
+ &as, &dpa_offset);
+ if (res) {
+ return MEMTX_ERROR;
+ }
+
+ return address_space_read(as, dpa_offset, attrs, data, size);
+}
+
+MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+ unsigned size, MemTxAttrs attrs)
+{
+ CXLAccelDev *acceld = CXL_ACCEL(d);
+ uint64_t dpa_offset = 0;
+ AddressSpace *as = NULL;
+ int res;
+
+ res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size,
+ &as, &dpa_offset);
+ if (res) {
+ return MEMTX_ERROR;
+ }
+
+ return address_space_write(as, dpa_offset, attrs, &data, size);
+}
+
+static void clean_memory(PCIDevice *pci_dev)
+{
+ CXLAccelDev *acceld = CXL_ACCEL(pci_dev);
+
+ if (acceld->hostvmem) {
+ address_space_destroy(&acceld->hostvmem_as);
+ }
+}
+
+static bool setup_memory(PCIDevice *pci_dev, Error **errp)
+{
+ CXLAccelDev *acceld = CXL_ACCEL(pci_dev);
+
+ if (acceld->hostvmem) {
+ MemoryRegion *vmr;
+ char *v_name;
+
+ vmr = host_memory_backend_get_memory(acceld->hostvmem);
+ if (!vmr) {
+ error_setg(errp, "volatile memdev must have backing device");
+ return false;
+ }
+ if (host_memory_backend_is_mapped(acceld->hostvmem)) {
+ error_setg(errp, "memory backend %s can't be used multiple times.",
+ object_get_canonical_path_component(OBJECT(acceld->hostvmem)));
+ return false;
+ }
+ memory_region_set_nonvolatile(vmr, false);
+ memory_region_set_enabled(vmr, true);
+ host_memory_backend_set_mapped(acceld->hostvmem, true);
+ v_name = g_strdup("cxl-accel-dpa-vmem-space");
+ address_space_init(&acceld->hostvmem_as, vmr, v_name);
+ g_free(v_name);
+ }
+ return true;
+}
+
+static void setup_cxl_regs(PCIDevice *pci_dev)
+{
+ CXLAccelDev *acceld = CXL_ACCEL(pci_dev);
+ CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+ ComponentRegisters *regs = &cxl_cstate->crb;
+ MemoryRegion *mr = ®s->component_registers;
+
+ cxl_cstate->dvsec_offset = 0x100;
+ cxl_cstate->pdev = pci_dev;
+
+ build_dvsecs(acceld);
+
+ cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
+ TYPE_CXL_ACCEL);
+
+ pci_register_bar(
+ pci_dev, CXL_COMPONENT_REG_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
+}
+
+#define MSIX_NUM 6
+
+static int setup_msix(PCIDevice *pci_dev)
+{
+ int i, rc;
+
+ /* MSI(-X) Initialization */
+ rc = msix_init_exclusive_bar(pci_dev, MSIX_NUM, 4, NULL);
+ if (rc) {
+ return rc;
+ }
+
+ for (i = 0; i < MSIX_NUM; i++) {
+ msix_vector_use(pci_dev, i);
+ }
+ return 0;
+}
+
+static void cxl_accel_realize(PCIDevice *pci_dev, Error **errp)
+{
+ ERRP_GUARD();
+ int rc;
+ uint8_t *pci_conf = pci_dev->config;
+
+ if (!setup_memory(pci_dev, errp)) {
+ return;
+ }
+
+ pci_config_set_prog_interface(pci_conf, 0x10);
+ pcie_endpoint_cap_init(pci_dev, 0x80);
+
+ setup_cxl_regs(pci_dev);
+
+ /* MSI(-X) Initialization */
+ rc = setup_msix(pci_dev);
+ if (rc) {
+ clean_memory(pci_dev);
+ return;
+ }
+}
+
+static void cxl_accel_exit(PCIDevice *pci_dev)
+{
+ clean_memory(pci_dev);
+}
+
+static void cxl_accel_reset(DeviceState *dev)
+{
+ CXLAccelDev *acceld = CXL_ACCEL(dev);
+ CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+ uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
+ uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
+
+ update_dvsecs(acceld);
+ cxl_component_register_init_common(reg_state, write_msk, CXL3_TYPE2_DEVICE);
+}
+
+static Property cxl_accel_props[] = {
+ DEFINE_PROP_LINK("volatile-memdev", CXLAccelDev, hostvmem,
+ TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void cxl_accel_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+
+ pc->realize = cxl_accel_realize;
+ pc->exit = cxl_accel_exit;
+
+ pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL;
+ pc->vendor_id = PCI_VENDOR_ID_INTEL;
+ pc->device_id = 0xd94;
+ pc->revision = 1;
+
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+ dc->desc = "CXL Accelerator Device (Type 2)";
+ device_class_set_legacy_reset(dc, cxl_accel_reset);
+ device_class_set_props(dc, cxl_accel_props);
+}
+
+static const TypeInfo cxl_accel_dev_info = {
+ .name = TYPE_CXL_ACCEL,
+ .parent = TYPE_PCI_DEVICE,
+ .class_size = sizeof(struct CXLAccelClass),
+ .class_init = cxl_accel_class_init,
+ .instance_size = sizeof(CXLAccelDev),
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_CXL_DEVICE },
+ { INTERFACE_PCIE_DEVICE },
+ {}
+ },
+};
+
+static void cxl_accel_dev_registers(void)
+{
+ type_register_static(&cxl_accel_dev_info);
+}
+
+type_init(cxl_accel_dev_registers);
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index 1c1c6da24b..36a395dbb6 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -4,6 +4,7 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
+mem_ss.add(when: 'CONFIG_CXL_ACCEL_DEVICE', if_true: files('cxl_accel.c'))
system_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
system_ss.add(when: 'CONFIG_MEM_DEVICE', if_false: files('memory-device-stubs.c'))
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
index 30fe4bfa24..0e78db26b8 100644
--- a/include/hw/cxl/cxl_component.h
+++ b/include/hw/cxl/cxl_component.h
@@ -29,6 +29,7 @@ enum reg_type {
CXL2_UPSTREAM_PORT,
CXL2_DOWNSTREAM_PORT,
CXL3_SWITCH_MAILBOX_CCI,
+ CXL3_TYPE2_DEVICE,
};
/*
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 561b375dc8..ac26b264da 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -630,6 +630,26 @@ struct CSWMBCCIDev {
CXLCCI *cci;
};
+struct CXLAccelDev {
+ /* Private */
+ PCIDevice parent_obj;
+
+ /* Properties */
+ HostMemoryBackend *hostvmem;
+
+ /* State */
+ AddressSpace hostvmem_as;
+ CXLComponentState cxl_cstate;
+};
+
+struct CXLAccelClass {
+ /* Private */
+ PCIDeviceClass parent_class;
+};
+
+#define TYPE_CXL_ACCEL "cxl-accel"
+OBJECT_DECLARE_TYPE(CXLAccelDev, CXLAccelClass, CXL_ACCEL)
+
#define TYPE_CXL_SWITCH_MAILBOX_CCI "cxl-switch-mailbox-cci"
OBJECT_DECLARE_TYPE(CSWMBCCIDev, CSWMBCCIClass, CXL_SWITCH_MAILBOX_CCI)
@@ -638,6 +658,11 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
unsigned size, MemTxAttrs attrs);
+MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+ unsigned size, MemTxAttrs attrs);
+MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+ unsigned size, MemTxAttrs attrs);
+
uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num);
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index f1a53fea8d..08bc469316 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -55,6 +55,7 @@
#define PCI_CLASS_MEMORY_RAM 0x0500
#define PCI_CLASS_MEMORY_FLASH 0x0501
#define PCI_CLASS_MEMORY_CXL 0x0502
+#define PCI_CLASS_CXL_QEMU_ACCEL 0x0503
#define PCI_CLASS_MEMORY_OTHER 0x0580
#define PCI_BASE_CLASS_BRIDGE 0x06
--
2.43.5
On Thu, 12 Dec 2024 05:04:22 -0800 Zhi Wang <zhiw@nvidia.com> wrote: > From: Zhi Wang <zhiwang@kernel.org> > > Introduce a CXL type-2 device emulation that provides a minimum base for > testing kernel CXL core type-2 support and CXL type-2 virtualization. It > is also a good base for introducing the more emulated features. > > Currently, it only supports: > > - Emulating component registers with HDM decoders. > - Volatile memory backend and emualtion of region access. > > The emulation is aimed to not tightly coupled with the current CXL type-3 > emulation since many advanced CXL type-3 emulation features are not > implemented in a CXL type-2 device. > > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Zhi Wang <zhiwang@kernel.org> Hi Zhi, A few passing comments. Jonathan > diff --git a/hw/mem/cxl_accel.c b/hw/mem/cxl_accel.c > new file mode 100644 > index 0000000000..770072126d > --- /dev/null > +++ b/hw/mem/cxl_accel.c > @@ -0,0 +1,319 @@ > + > +static void update_dvsecs(CXLAccelDev *acceld) Just to make them easier to search for and void clashes, good to prefix all functions with cxlacc or something like that. > +{ /... > +static Property cxl_accel_props[] = { > + DEFINE_PROP_LINK("volatile-memdev", CXLAccelDev, hostvmem, > + TYPE_MEMORY_BACKEND, HostMemoryBackend *), Does backing a type 2 device with a memdev provide any advantages? I'd have thought a device specific memory allocation would make more sense, like we'd do for a memory BAR on a PCI device. That might complicate the cxl-host handling though so perhaps this is a good way to go for now. > + DEFINE_PROP_END_OF_LIST(), When you get time, rebase as these have gone away recently. I aim to get a fresher staging tree out shortly. > +}; > + > +static void cxl_accel_class_init(ObjectClass *oc, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(oc); > + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); > + > + pc->realize = cxl_accel_realize; > + pc->exit = cxl_accel_exit; > + > + pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL; > + pc->vendor_id = PCI_VENDOR_ID_INTEL; > + pc->device_id = 0xd94; If you are posting these I hope you have those IDs reserved (which seems unlikely ;) We need to be absolutely sure we never hit an existing ID which generally means you need to find whoever controls those allocations in your company and get them to give you an ID for this. > + pc->revision = 1; > + > + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); > + dc->desc = "CXL Accelerator Device (Type 2)"; > + device_class_set_legacy_reset(dc, cxl_accel_reset); > + device_class_set_props(dc, cxl_accel_props); > +} > void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); > diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h > index f1a53fea8d..08bc469316 100644 > --- a/include/hw/pci/pci_ids.h > +++ b/include/hw/pci/pci_ids.h > @@ -55,6 +55,7 @@ > #define PCI_CLASS_MEMORY_RAM 0x0500 > #define PCI_CLASS_MEMORY_FLASH 0x0501 > #define PCI_CLASS_MEMORY_CXL 0x0502 > +#define PCI_CLASS_CXL_QEMU_ACCEL 0x0503 Either this is a real device class (which seems unlikely given the name) or you need to choose something else. PCI maintains a big list of class codes and currently 0x0502 is the highest one define in baseclass 05h (memory controllers) https://members.pcisig.com/wg/PCI-SIG/document/20113 (behind a pcisig login) > #define PCI_CLASS_MEMORY_OTHER 0x0580 > > #define PCI_BASE_CLASS_BRIDGE 0x06
On 21/01/2025 18.16, Jonathan Cameron wrote: > On Thu, 12 Dec 2024 05:04:22 -0800 > Zhi Wang <zhiw@nvidia.com> wrote: > >> From: Zhi Wang <zhiwang@kernel.org> >> >> Introduce a CXL type-2 device emulation that provides a minimum base for >> testing kernel CXL core type-2 support and CXL type-2 virtualization. It >> is also a good base for introducing the more emulated features. >> >> Currently, it only supports: >> >> - Emulating component registers with HDM decoders. >> - Volatile memory backend and emualtion of region access. >> >> The emulation is aimed to not tightly coupled with the current CXL type-3 >> emulation since many advanced CXL type-3 emulation features are not >> implemented in a CXL type-2 device. >> >> Co-developed-by: Ira Weiny <ira.weiny@intel.com> >> Signed-off-by: Zhi Wang <zhiwang@kernel.org> > > Hi Zhi, > > A few passing comments. > > Jonathan > >> diff --git a/hw/mem/cxl_accel.c b/hw/mem/cxl_accel.c >> new file mode 100644 >> index 0000000000..770072126d >> --- /dev/null >> +++ b/hw/mem/cxl_accel.c >> @@ -0,0 +1,319 @@ > >> + >> +static void update_dvsecs(CXLAccelDev *acceld) > > Just to make them easier to search for and void clashes, good to prefix > all functions with cxlacc or something like that. > >> +{ > > /... > > >> +static Property cxl_accel_props[] = { >> + DEFINE_PROP_LINK("volatile-memdev", CXLAccelDev, hostvmem, >> + TYPE_MEMORY_BACKEND, HostMemoryBackend *), > > Does backing a type 2 device with a memdev provide any advantages? > I'd have thought a device specific memory allocation would make more > sense, like we'd do for a memory BAR on a PCI device. That might > complicate the cxl-host handling though so perhaps this is a good > way to go for now. Was thinking the same. As my current idea is for getting a emulated device the people can test CXL T2 core in the kernel and keep things as minimum as they can be in v1, this was the simplest idea I can offer. I am open for suggestions.:) > > >> + DEFINE_PROP_END_OF_LIST(), > > When you get time, rebase as these have gone away recently. > I aim to get a fresher staging tree out shortly. > Will do. >> +}; >> + >> +static void cxl_accel_class_init(ObjectClass *oc, void *data) >> +{ >> + DeviceClass *dc = DEVICE_CLASS(oc); >> + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); >> + >> + pc->realize = cxl_accel_realize; >> + pc->exit = cxl_accel_exit; >> + >> + pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL; >> + pc->vendor_id = PCI_VENDOR_ID_INTEL; >> + pc->device_id = 0xd94; > The IDs are mostly from Ira's original T2 emulated device patches. I will take a look to see if there is a better option for this. > If you are posting these I hope you have those IDs reserved > (which seems unlikely ;) > We need to be absolutely sure we never hit an existing ID which generally > means you need to find whoever controls those allocations in your company > and get them to give you an ID for this. > >> + pc->revision = 1; >> + >> + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); >> + dc->desc = "CXL Accelerator Device (Type 2)"; >> + device_class_set_legacy_reset(dc, cxl_accel_reset); >> + device_class_set_props(dc, cxl_accel_props); >> +} > >> void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); >> diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h >> index f1a53fea8d..08bc469316 100644 >> --- a/include/hw/pci/pci_ids.h >> +++ b/include/hw/pci/pci_ids.h >> @@ -55,6 +55,7 @@ >> #define PCI_CLASS_MEMORY_RAM 0x0500 >> #define PCI_CLASS_MEMORY_FLASH 0x0501 >> #define PCI_CLASS_MEMORY_CXL 0x0502 >> +#define PCI_CLASS_CXL_QEMU_ACCEL 0x0503 > > Either this is a real device class (which seems unlikely given the name) > or you need to choose something else. PCI maintains a big list of > class codes and currently 0x0502 is the highest one define in baseclass 05h > (memory controllers) > > https://members.pcisig.com/wg/PCI-SIG/document/20113 > (behind a pcisig login) > >> #define PCI_CLASS_MEMORY_OTHER 0x0580 >> >> #define PCI_BASE_CLASS_BRIDGE 0x06 >
> >> +static void cxl_accel_class_init(ObjectClass *oc, void *data) > >> +{ > >> + DeviceClass *dc = DEVICE_CLASS(oc); > >> + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); > >> + > >> + pc->realize = cxl_accel_realize; > >> + pc->exit = cxl_accel_exit; > >> + > >> + pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL; > >> + pc->vendor_id = PCI_VENDOR_ID_INTEL; > >> + pc->device_id = 0xd94; > > > > The IDs are mostly from Ira's original T2 emulated device patches. > I will take a look to see if there is a better option for this. I pinged Ira and you on the CXL discord. May be fine to use this and save you figuring out who in holds the magic list at NVidia and persuading them to let you have one ;) > > > If you are posting these I hope you have those IDs reserved > > (which seems unlikely ;) > > We need to be absolutely sure we never hit an existing ID which generally > > means you need to find whoever controls those allocations in your company > > and get them to give you an ID for this. > > > >> + pc->revision = 1; > >> + > >> + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); > >> + dc->desc = "CXL Accelerator Device (Type 2)"; > >> + device_class_set_legacy_reset(dc, cxl_accel_reset); > >> + device_class_set_props(dc, cxl_accel_props); > >> +} > > > >> void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); > >> diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h > >> index f1a53fea8d..08bc469316 100644 > >> --- a/include/hw/pci/pci_ids.h > >> +++ b/include/hw/pci/pci_ids.h > >> @@ -55,6 +55,7 @@ > >> #define PCI_CLASS_MEMORY_RAM 0x0500 > >> #define PCI_CLASS_MEMORY_FLASH 0x0501 > >> #define PCI_CLASS_MEMORY_CXL 0x0502 > >> +#define PCI_CLASS_CXL_QEMU_ACCEL 0x0503 > > > > Either this is a real device class (which seems unlikely given the name) > > or you need to choose something else. PCI maintains a big list of > > class codes and currently 0x0502 is the highest one define in baseclass 05h > > (memory controllers) > > > > https://members.pcisig.com/wg/PCI-SIG/document/20113 > > (behind a pcisig login) > > > >> #define PCI_CLASS_MEMORY_OTHER 0x0580 > >> > >> #define PCI_BASE_CLASS_BRIDGE 0x06 > > >
On 12/12/24 13:04, Zhi Wang wrote: > From: Zhi Wang <zhiwang@kernel.org> > > Introduce a CXL type-2 device emulation that provides a minimum base for > testing kernel CXL core type-2 support and CXL type-2 virtualization. It > is also a good base for introducing the more emulated features. > > Currently, it only supports: > > - Emulating component registers with HDM decoders. > - Volatile memory backend and emualtion of region access. > > The emulation is aimed to not tightly coupled with the current CXL type-3 > emulation since many advanced CXL type-3 emulation features are not > implemented in a CXL type-2 device. > > Co-developed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Zhi Wang <zhiwang@kernel.org> > --- > MAINTAINERS | 1 + > docs/system/devices/cxl.rst | 11 ++ > hw/cxl/cxl-component-utils.c | 2 + > hw/cxl/cxl-host.c | 19 +- > hw/mem/Kconfig | 5 + > hw/mem/cxl_accel.c | 319 +++++++++++++++++++++++++++++++++ > hw/mem/meson.build | 1 + > include/hw/cxl/cxl_component.h | 1 + > include/hw/cxl/cxl_device.h | 25 +++ > include/hw/pci/pci_ids.h | 1 + > 10 files changed, 382 insertions(+), 3 deletions(-) > create mode 100644 hw/mem/cxl_accel.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index aaf0505a21..72a6a505eb 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -2914,6 +2914,7 @@ R: Fan Ni <fan.ni@samsung.com> > S: Supported > F: hw/cxl/ > F: hw/mem/cxl_type3.c > +F: hw/mem/cxl_accel.c > F: include/hw/cxl/ > F: qapi/cxl.json > > diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst > index 882b036f5e..13cc2417f2 100644 > --- a/docs/system/devices/cxl.rst > +++ b/docs/system/devices/cxl.rst > @@ -332,6 +332,17 @@ The same volatile setup may optionally include an LSA region:: > -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \ > -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G > > +A very simple setup with just one directly attached CXL Type 2 Volatile Memory > +Accelerator device:: > + > + qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \ > + ... > + -object memory-backend-ram,id=vmem0,share=on,size=256M \ > + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ > + -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ > + -device cxl-accel,bus=root_port13,volatile-memdev=vmem0,id=cxl-accel0 \ > + -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G > + > A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way > interleave across 2 CXL host bridges. Each host bridge has 2 CXL Root Ports, with > the CXL Type3 device directly attached (no switches).:: > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c > index 355103d165..717ef117ac 100644 > --- a/hw/cxl/cxl-component-utils.c > +++ b/hw/cxl/cxl-component-utils.c > @@ -262,6 +262,7 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, > write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff; You are not changing this write, but I did, based on Type3 or Type2: - write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; + if (type == CXL2_TYPE2_DEVICE) + /* Bit 12 Target Range Type 0= HDM-D or HDM-DB */ + /* Bit 10 says memory already commited */ + write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x7ff; + else + /* Bit 12 Target Range Type 1= HDM-H aka Host Only Coherent Address Range */ + write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; It has been a while since I did work on this, but I guess I did so because it was needed. But maybe I'm wrong ... Bit 10 was something I needed for emulating what we had in the real device, but bit 12 looks something we should set, although maybe it is only informative. > if (type == CXL2_DEVICE || > type == CXL2_TYPE3_DEVICE || > + type == CXL3_TYPE2_DEVICE || > type == CXL2_LOGICAL_DEVICE) { > write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] = > 0xf0000000; > @@ -293,6 +294,7 @@ void cxl_component_register_init_common(uint32_t *reg_state, > case CXL2_UPSTREAM_PORT: > case CXL2_TYPE3_DEVICE: > case CXL2_LOGICAL_DEVICE: > + case CXL3_TYPE2_DEVICE: > /* + HDM */ > caps = 3; > break; > diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c > index e9f2543c43..e603a3f2fc 100644 > --- a/hw/cxl/cxl-host.c > +++ b/hw/cxl/cxl-host.c > @@ -201,7 +201,8 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) > return NULL; > } > > - if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { > + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3) || > + object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { > return d; > } > > @@ -256,7 +257,13 @@ static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data, > return MEMTX_ERROR; > } > > - return cxl_type3_read(d, addr + fw->base, data, size, attrs); > + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { > + return cxl_type3_read(d, addr + fw->base, data, size, attrs); > + } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { > + return cxl_accel_read(d, addr + fw->base, data, size, attrs); > + } > + > + return MEMTX_ERROR; > } > > static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr, > @@ -272,7 +279,13 @@ static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr, > return MEMTX_OK; > } > > - return cxl_type3_write(d, addr + fw->base, data, size, attrs); > + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { > + return cxl_type3_write(d, addr + fw->base, data, size, attrs); > + } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { > + return cxl_accel_write(d, addr + fw->base, data, size, attrs); > + } > + > + return MEMTX_ERROR; > } > > const MemoryRegionOps cfmws_ops = { > diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig > index 73c5ae8ad9..1f7d08c17d 100644 > --- a/hw/mem/Kconfig > +++ b/hw/mem/Kconfig > @@ -16,3 +16,8 @@ config CXL_MEM_DEVICE > bool > default y if CXL > select MEM_DEVICE > + > +config CXL_ACCEL_DEVICE > + bool > + default y if CXL > + select MEM_DEVICE > diff --git a/hw/mem/cxl_accel.c b/hw/mem/cxl_accel.c > new file mode 100644 > index 0000000000..770072126d > --- /dev/null > +++ b/hw/mem/cxl_accel.c > @@ -0,0 +1,319 @@ > +/* > + * CXL accel (type-2) device > + * > + * Copyright(C) 2024 NVIDIA Corporation. > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See the > + * COPYING file in the top-level directory. > + * > + * SPDX-License-Identifier: GPL-v2-only > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/units.h" > +#include "qemu/error-report.h" > +#include "hw/mem/memory-device.h" > +#include "hw/mem/pc-dimm.h" > +#include "hw/pci/pci.h" > +#include "hw/qdev-properties.h" > +#include "hw/qdev-properties-system.h" > +#include "qemu/log.h" > +#include "qemu/module.h" > +#include "qemu/range.h" > +#include "sysemu/hostmem.h" > +#include "sysemu/numa.h" > +#include "hw/cxl/cxl.h" > +#include "hw/pci/msix.h" > + > +static void update_dvsecs(CXLAccelDev *acceld) > +{ > + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; > + uint8_t *dvsec; > + uint32_t range1_size_hi = 0, range1_size_lo = 0, > + range1_base_hi = 0, range1_base_lo = 0; > + > + if (acceld->hostvmem) { > + range1_size_hi = acceld->hostvmem->size >> 32; > + range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | > + (acceld->hostvmem->size & 0xF0000000); > + } > + > + dvsec = (uint8_t *)&(CXLDVSECDevice){ > + .cap = 0x1e, > + .ctrl = 0x2, > + .status2 = 0x2, > + .range1_size_hi = range1_size_hi, > + .range1_size_lo = range1_size_lo, > + .range1_base_hi = range1_base_hi, > + .range1_base_lo = range1_base_lo, > + }; > + cxl_component_update_dvsec(cxl_cstate, PCIE_CXL_DEVICE_DVSEC_LENGTH, > + PCIE_CXL_DEVICE_DVSEC, dvsec); > + > + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ > + .rsvd = 0, > + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, > + .reg0_base_hi = 0, > + }; > + cxl_component_update_dvsec(cxl_cstate, REG_LOC_DVSEC_LENGTH, > + REG_LOC_DVSEC, dvsec); > + > + dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){ > + .cap = 0x26, /* 68B, IO, Mem, non-MLD */ > + .ctrl = 0x02, /* IO always enabled */ > + .status = 0x26, /* same as capabilities */ > + .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ > + }; > + cxl_component_update_dvsec(cxl_cstate, PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, > + PCIE_FLEXBUS_PORT_DVSEC, dvsec); > +} > + > +static void build_dvsecs(CXLAccelDev *acceld) > +{ > + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; > + > + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, > + PCIE_CXL_DEVICE_DVSEC_LENGTH, > + PCIE_CXL_DEVICE_DVSEC, > + PCIE_CXL31_DEVICE_DVSEC_REVID, NULL); > + > + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, > + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, > + REG_LOC_DVSEC_REVID, NULL); > + > + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, > + PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, > + PCIE_FLEXBUS_PORT_DVSEC, > + PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, NULL); > + update_dvsecs(acceld); > +} > + > +static bool cxl_accel_dpa(CXLAccelDev *acceld, hwaddr host_addr, uint64_t *dpa) > +{ > + return cxl_host_addr_to_dpa(&acceld->cxl_cstate, host_addr, dpa); > +} > + > +static int cxl_accel_hpa_to_as_and_dpa(CXLAccelDev *acceld, > + hwaddr host_addr, > + unsigned int size, > + AddressSpace **as, > + uint64_t *dpa_offset) > +{ > + MemoryRegion *vmr = NULL; > + uint64_t vmr_size = 0; > + > + if (!acceld->hostvmem) { > + return -ENODEV; > + } > + > + vmr = host_memory_backend_get_memory(acceld->hostvmem); > + if (!vmr) { > + return -ENODEV; > + } > + > + vmr_size = memory_region_size(vmr); > + > + if (!cxl_accel_dpa(acceld, host_addr, dpa_offset)) { > + return -EINVAL; > + } > + > + if (*dpa_offset >= vmr_size) { > + return -EINVAL; > + } > + > + *as = &acceld->hostvmem_as; > + return 0; > +} > + > +MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, > + unsigned size, MemTxAttrs attrs) > +{ > + CXLAccelDev *acceld = CXL_ACCEL(d); > + uint64_t dpa_offset = 0; > + AddressSpace *as = NULL; > + int res; > + > + res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size, > + &as, &dpa_offset); > + if (res) { > + return MEMTX_ERROR; > + } > + > + return address_space_read(as, dpa_offset, attrs, data, size); > +} > + > +MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data, > + unsigned size, MemTxAttrs attrs) > +{ > + CXLAccelDev *acceld = CXL_ACCEL(d); > + uint64_t dpa_offset = 0; > + AddressSpace *as = NULL; > + int res; > + > + res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size, > + &as, &dpa_offset); > + if (res) { > + return MEMTX_ERROR; > + } > + > + return address_space_write(as, dpa_offset, attrs, &data, size); > +} > + > +static void clean_memory(PCIDevice *pci_dev) > +{ > + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); > + > + if (acceld->hostvmem) { > + address_space_destroy(&acceld->hostvmem_as); > + } > +} > + > +static bool setup_memory(PCIDevice *pci_dev, Error **errp) > +{ > + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); > + > + if (acceld->hostvmem) { > + MemoryRegion *vmr; > + char *v_name; > + > + vmr = host_memory_backend_get_memory(acceld->hostvmem); > + if (!vmr) { > + error_setg(errp, "volatile memdev must have backing device"); > + return false; > + } > + if (host_memory_backend_is_mapped(acceld->hostvmem)) { > + error_setg(errp, "memory backend %s can't be used multiple times.", > + object_get_canonical_path_component(OBJECT(acceld->hostvmem))); > + return false; > + } > + memory_region_set_nonvolatile(vmr, false); > + memory_region_set_enabled(vmr, true); > + host_memory_backend_set_mapped(acceld->hostvmem, true); > + v_name = g_strdup("cxl-accel-dpa-vmem-space"); > + address_space_init(&acceld->hostvmem_as, vmr, v_name); > + g_free(v_name); > + } > + return true; > +} > + > +static void setup_cxl_regs(PCIDevice *pci_dev) > +{ > + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); > + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; > + ComponentRegisters *regs = &cxl_cstate->crb; > + MemoryRegion *mr = ®s->component_registers; > + > + cxl_cstate->dvsec_offset = 0x100; > + cxl_cstate->pdev = pci_dev; > + > + build_dvsecs(acceld); > + > + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, > + TYPE_CXL_ACCEL); > + > + pci_register_bar( > + pci_dev, CXL_COMPONENT_REG_BAR_IDX, > + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr); > +} > + > +#define MSIX_NUM 6 > + > +static int setup_msix(PCIDevice *pci_dev) > +{ > + int i, rc; > + > + /* MSI(-X) Initialization */ > + rc = msix_init_exclusive_bar(pci_dev, MSIX_NUM, 4, NULL); > + if (rc) { > + return rc; > + } > + > + for (i = 0; i < MSIX_NUM; i++) { > + msix_vector_use(pci_dev, i); > + } > + return 0; > +} > + > +static void cxl_accel_realize(PCIDevice *pci_dev, Error **errp) > +{ > + ERRP_GUARD(); > + int rc; > + uint8_t *pci_conf = pci_dev->config; > + > + if (!setup_memory(pci_dev, errp)) { > + return; > + } > + > + pci_config_set_prog_interface(pci_conf, 0x10); > + pcie_endpoint_cap_init(pci_dev, 0x80); > + > + setup_cxl_regs(pci_dev); > + > + /* MSI(-X) Initialization */ > + rc = setup_msix(pci_dev); > + if (rc) { > + clean_memory(pci_dev); > + return; > + } > +} > + > +static void cxl_accel_exit(PCIDevice *pci_dev) > +{ > + clean_memory(pci_dev); > +} > + > +static void cxl_accel_reset(DeviceState *dev) > +{ > + CXLAccelDev *acceld = CXL_ACCEL(dev); > + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; > + uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers; > + uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask; > + > + update_dvsecs(acceld); > + cxl_component_register_init_common(reg_state, write_msk, CXL3_TYPE2_DEVICE); > +} > + > +static Property cxl_accel_props[] = { > + DEFINE_PROP_LINK("volatile-memdev", CXLAccelDev, hostvmem, > + TYPE_MEMORY_BACKEND, HostMemoryBackend *), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > +static void cxl_accel_class_init(ObjectClass *oc, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(oc); > + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); > + > + pc->realize = cxl_accel_realize; > + pc->exit = cxl_accel_exit; > + > + pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL; > + pc->vendor_id = PCI_VENDOR_ID_INTEL; > + pc->device_id = 0xd94; > + pc->revision = 1; > + > + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); > + dc->desc = "CXL Accelerator Device (Type 2)"; > + device_class_set_legacy_reset(dc, cxl_accel_reset); > + device_class_set_props(dc, cxl_accel_props); > +} > + > +static const TypeInfo cxl_accel_dev_info = { > + .name = TYPE_CXL_ACCEL, > + .parent = TYPE_PCI_DEVICE, > + .class_size = sizeof(struct CXLAccelClass), > + .class_init = cxl_accel_class_init, > + .instance_size = sizeof(CXLAccelDev), > + .interfaces = (InterfaceInfo[]) { > + { INTERFACE_CXL_DEVICE }, > + { INTERFACE_PCIE_DEVICE }, > + {} > + }, > +}; > + > +static void cxl_accel_dev_registers(void) > +{ > + type_register_static(&cxl_accel_dev_info); > +} > + > +type_init(cxl_accel_dev_registers); > diff --git a/hw/mem/meson.build b/hw/mem/meson.build > index 1c1c6da24b..36a395dbb6 100644 > --- a/hw/mem/meson.build > +++ b/hw/mem/meson.build > @@ -4,6 +4,7 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) > mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) > mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) > mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) > +mem_ss.add(when: 'CONFIG_CXL_ACCEL_DEVICE', if_true: files('cxl_accel.c')) > system_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c')) > > system_ss.add(when: 'CONFIG_MEM_DEVICE', if_false: files('memory-device-stubs.c')) > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h > index 30fe4bfa24..0e78db26b8 100644 > --- a/include/hw/cxl/cxl_component.h > +++ b/include/hw/cxl/cxl_component.h > @@ -29,6 +29,7 @@ enum reg_type { > CXL2_UPSTREAM_PORT, > CXL2_DOWNSTREAM_PORT, > CXL3_SWITCH_MAILBOX_CCI, > + CXL3_TYPE2_DEVICE, > }; > > /* > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h > index 561b375dc8..ac26b264da 100644 > --- a/include/hw/cxl/cxl_device.h > +++ b/include/hw/cxl/cxl_device.h > @@ -630,6 +630,26 @@ struct CSWMBCCIDev { > CXLCCI *cci; > }; > > +struct CXLAccelDev { > + /* Private */ > + PCIDevice parent_obj; > + > + /* Properties */ > + HostMemoryBackend *hostvmem; > + > + /* State */ > + AddressSpace hostvmem_as; > + CXLComponentState cxl_cstate; > +}; > + > +struct CXLAccelClass { > + /* Private */ > + PCIDeviceClass parent_class; > +}; > + > +#define TYPE_CXL_ACCEL "cxl-accel" > +OBJECT_DECLARE_TYPE(CXLAccelDev, CXLAccelClass, CXL_ACCEL) > + > #define TYPE_CXL_SWITCH_MAILBOX_CCI "cxl-switch-mailbox-cci" > OBJECT_DECLARE_TYPE(CSWMBCCIDev, CSWMBCCIClass, CXL_SWITCH_MAILBOX_CCI) > > @@ -638,6 +658,11 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, > MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, > unsigned size, MemTxAttrs attrs); > > +MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, > + unsigned size, MemTxAttrs attrs); > +MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data, > + unsigned size, MemTxAttrs attrs); > + > uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); > > void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); > diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h > index f1a53fea8d..08bc469316 100644 > --- a/include/hw/pci/pci_ids.h > +++ b/include/hw/pci/pci_ids.h > @@ -55,6 +55,7 @@ > #define PCI_CLASS_MEMORY_RAM 0x0500 > #define PCI_CLASS_MEMORY_FLASH 0x0501 > #define PCI_CLASS_MEMORY_CXL 0x0502 > +#define PCI_CLASS_CXL_QEMU_ACCEL 0x0503 > #define PCI_CLASS_MEMORY_OTHER 0x0580 > > #define PCI_BASE_CLASS_BRIDGE 0x06
On 12/12/2024 19.02, Alejandro Lucero Palau wrote: > > On 12/12/24 13:04, Zhi Wang wrote: >> From: Zhi Wang <zhiwang@kernel.org> >> >> Introduce a CXL type-2 device emulation that provides a minimum base for >> testing kernel CXL core type-2 support and CXL type-2 virtualization. It >> is also a good base for introducing the more emulated features. >> >> Currently, it only supports: >> >> - Emulating component registers with HDM decoders. >> - Volatile memory backend and emualtion of region access. >> >> The emulation is aimed to not tightly coupled with the current CXL type-3 >> emulation since many advanced CXL type-3 emulation features are not >> implemented in a CXL type-2 device. >> >> Co-developed-by: Ira Weiny <ira.weiny@intel.com> >> Signed-off-by: Zhi Wang <zhiwang@kernel.org> >> --- >> MAINTAINERS | 1 + >> docs/system/devices/cxl.rst | 11 ++ >> hw/cxl/cxl-component-utils.c | 2 + >> hw/cxl/cxl-host.c | 19 +- >> hw/mem/Kconfig | 5 + >> hw/mem/cxl_accel.c | 319 +++++++++++++++++++++++++++++++++ >> hw/mem/meson.build | 1 + >> include/hw/cxl/cxl_component.h | 1 + >> include/hw/cxl/cxl_device.h | 25 +++ >> include/hw/pci/pci_ids.h | 1 + >> 10 files changed, 382 insertions(+), 3 deletions(-) >> create mode 100644 hw/mem/cxl_accel.c >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index aaf0505a21..72a6a505eb 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -2914,6 +2914,7 @@ R: Fan Ni <fan.ni@samsung.com> >> S: Supported >> F: hw/cxl/ >> F: hw/mem/cxl_type3.c >> +F: hw/mem/cxl_accel.c >> F: include/hw/cxl/ >> F: qapi/cxl.json >> diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst >> index 882b036f5e..13cc2417f2 100644 >> --- a/docs/system/devices/cxl.rst >> +++ b/docs/system/devices/cxl.rst >> @@ -332,6 +332,17 @@ The same volatile setup may optionally include an >> LSA region:: >> -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl- >> lsa0,id=cxl-vmem0 \ >> -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G >> +A very simple setup with just one directly attached CXL Type 2 >> Volatile Memory >> +Accelerator device:: >> + >> + qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \ >> + ... >> + -object memory-backend-ram,id=vmem0,share=on,size=256M \ >> + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ >> + -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ >> + -device cxl-accel,bus=root_port13,volatile-memdev=vmem0,id=cxl- >> accel0 \ >> + -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G >> + >> A setup suitable for 4 way interleave. Only one fixed window >> provided, to enable 2 way >> interleave across 2 CXL host bridges. Each host bridge has 2 CXL >> Root Ports, with >> the CXL Type3 device directly attached (no switches).:: >> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c >> index 355103d165..717ef117ac 100644 >> --- a/hw/cxl/cxl-component-utils.c >> +++ b/hw/cxl/cxl-component-utils.c >> @@ -262,6 +262,7 @@ static void hdm_init_common(uint32_t *reg_state, >> uint32_t *write_msk, >> write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff; > > > You are not changing this write, but I did, based on Type3 or Type2: > > > - write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; > + if (type == CXL2_TYPE2_DEVICE) > + /* Bit 12 Target Range Type 0= HDM-D or HDM-DB */ > + /* Bit 10 says memory already commited */ > + write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x7ff; > + else > + /* Bit 12 Target Range Type 1= HDM-H aka Host Only > Coherent Address Range */ > + write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; > > > It has been a while since I did work on this, but I guess I did so > because it was needed. But maybe I'm wrong ... > > Bit 10 was something I needed for emulating what we had in the real > device, but bit 12 looks something we should set, although maybe it is > only informative. Interesting. We can think about how to custom the mask via params. It might be helpful that you can show a list of the stuff you wish to custom. I understand they are hacks for validation? since Bit 12 is only RWL for HB, USP of switches in the spec. > > >> if (type == CXL2_DEVICE || >> type == CXL2_TYPE3_DEVICE || >> + type == CXL3_TYPE2_DEVICE || >> type == CXL2_LOGICAL_DEVICE) { >> write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * >> hdm_inc] = >> 0xf0000000; >> @@ -293,6 +294,7 @@ void cxl_component_register_init_common(uint32_t >> *reg_state, >> case CXL2_UPSTREAM_PORT: >> case CXL2_TYPE3_DEVICE: >> case CXL2_LOGICAL_DEVICE: >> + case CXL3_TYPE2_DEVICE: >> /* + HDM */ >> caps = 3; >> break; >> diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c >> index e9f2543c43..e603a3f2fc 100644 >> --- a/hw/cxl/cxl-host.c >> +++ b/hw/cxl/cxl-host.c >> @@ -201,7 +201,8 @@ static PCIDevice >> *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) >> return NULL; >> } >> - if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { >> + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3) || >> + object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { >> return d; >> } >> @@ -256,7 +257,13 @@ static MemTxResult cxl_read_cfmws(void *opaque, >> hwaddr addr, uint64_t *data, >> return MEMTX_ERROR; >> } >> - return cxl_type3_read(d, addr + fw->base, data, size, attrs); >> + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { >> + return cxl_type3_read(d, addr + fw->base, data, size, attrs); >> + } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { >> + return cxl_accel_read(d, addr + fw->base, data, size, attrs); >> + } >> + >> + return MEMTX_ERROR; >> } >> static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr, >> @@ -272,7 +279,13 @@ static MemTxResult cxl_write_cfmws(void *opaque, >> hwaddr addr, >> return MEMTX_OK; >> } >> - return cxl_type3_write(d, addr + fw->base, data, size, attrs); >> + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { >> + return cxl_type3_write(d, addr + fw->base, data, size, attrs); >> + } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) { >> + return cxl_accel_write(d, addr + fw->base, data, size, attrs); >> + } >> + >> + return MEMTX_ERROR; >> } >> const MemoryRegionOps cfmws_ops = { >> diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig >> index 73c5ae8ad9..1f7d08c17d 100644 >> --- a/hw/mem/Kconfig >> +++ b/hw/mem/Kconfig >> @@ -16,3 +16,8 @@ config CXL_MEM_DEVICE >> bool >> default y if CXL >> select MEM_DEVICE >> + >> +config CXL_ACCEL_DEVICE >> + bool >> + default y if CXL >> + select MEM_DEVICE >> diff --git a/hw/mem/cxl_accel.c b/hw/mem/cxl_accel.c >> new file mode 100644 >> index 0000000000..770072126d >> --- /dev/null >> +++ b/hw/mem/cxl_accel.c >> @@ -0,0 +1,319 @@ >> +/* >> + * CXL accel (type-2) device >> + * >> + * Copyright(C) 2024 NVIDIA Corporation. >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2. >> See the >> + * COPYING file in the top-level directory. >> + * >> + * SPDX-License-Identifier: GPL-v2-only >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "qemu/units.h" >> +#include "qemu/error-report.h" >> +#include "hw/mem/memory-device.h" >> +#include "hw/mem/pc-dimm.h" >> +#include "hw/pci/pci.h" >> +#include "hw/qdev-properties.h" >> +#include "hw/qdev-properties-system.h" >> +#include "qemu/log.h" >> +#include "qemu/module.h" >> +#include "qemu/range.h" >> +#include "sysemu/hostmem.h" >> +#include "sysemu/numa.h" >> +#include "hw/cxl/cxl.h" >> +#include "hw/pci/msix.h" >> + >> +static void update_dvsecs(CXLAccelDev *acceld) >> +{ >> + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; >> + uint8_t *dvsec; >> + uint32_t range1_size_hi = 0, range1_size_lo = 0, >> + range1_base_hi = 0, range1_base_lo = 0; >> + >> + if (acceld->hostvmem) { >> + range1_size_hi = acceld->hostvmem->size >> 32; >> + range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | >> + (acceld->hostvmem->size & 0xF0000000); >> + } >> + >> + dvsec = (uint8_t *)&(CXLDVSECDevice){ >> + .cap = 0x1e, >> + .ctrl = 0x2, >> + .status2 = 0x2, >> + .range1_size_hi = range1_size_hi, >> + .range1_size_lo = range1_size_lo, >> + .range1_base_hi = range1_base_hi, >> + .range1_base_lo = range1_base_lo, >> + }; >> + cxl_component_update_dvsec(cxl_cstate, PCIE_CXL_DEVICE_DVSEC_LENGTH, >> + PCIE_CXL_DEVICE_DVSEC, dvsec); >> + >> + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ >> + .rsvd = 0, >> + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, >> + .reg0_base_hi = 0, >> + }; >> + cxl_component_update_dvsec(cxl_cstate, REG_LOC_DVSEC_LENGTH, >> + REG_LOC_DVSEC, dvsec); >> + >> + dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){ >> + .cap = 0x26, /* 68B, IO, Mem, non-MLD */ >> + .ctrl = 0x02, /* IO always enabled */ >> + .status = 0x26, /* same as capabilities */ >> + .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ >> + }; >> + cxl_component_update_dvsec(cxl_cstate, >> PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, >> + PCIE_FLEXBUS_PORT_DVSEC, dvsec); >> +} >> + >> +static void build_dvsecs(CXLAccelDev *acceld) >> +{ >> + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; >> + >> + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, >> + PCIE_CXL_DEVICE_DVSEC_LENGTH, >> + PCIE_CXL_DEVICE_DVSEC, >> + PCIE_CXL31_DEVICE_DVSEC_REVID, NULL); >> + >> + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, >> + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, >> + REG_LOC_DVSEC_REVID, NULL); >> + >> + cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE, >> + PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH, >> + PCIE_FLEXBUS_PORT_DVSEC, >> + PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, >> NULL); >> + update_dvsecs(acceld); >> +} >> + >> +static bool cxl_accel_dpa(CXLAccelDev *acceld, hwaddr host_addr, >> uint64_t *dpa) >> +{ >> + return cxl_host_addr_to_dpa(&acceld->cxl_cstate, host_addr, dpa); >> +} >> + >> +static int cxl_accel_hpa_to_as_and_dpa(CXLAccelDev *acceld, >> + hwaddr host_addr, >> + unsigned int size, >> + AddressSpace **as, >> + uint64_t *dpa_offset) >> +{ >> + MemoryRegion *vmr = NULL; >> + uint64_t vmr_size = 0; >> + >> + if (!acceld->hostvmem) { >> + return -ENODEV; >> + } >> + >> + vmr = host_memory_backend_get_memory(acceld->hostvmem); >> + if (!vmr) { >> + return -ENODEV; >> + } >> + >> + vmr_size = memory_region_size(vmr); >> + >> + if (!cxl_accel_dpa(acceld, host_addr, dpa_offset)) { >> + return -EINVAL; >> + } >> + >> + if (*dpa_offset >= vmr_size) { >> + return -EINVAL; >> + } >> + >> + *as = &acceld->hostvmem_as; >> + return 0; >> +} >> + >> +MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t >> *data, >> + unsigned size, MemTxAttrs attrs) >> +{ >> + CXLAccelDev *acceld = CXL_ACCEL(d); >> + uint64_t dpa_offset = 0; >> + AddressSpace *as = NULL; >> + int res; >> + >> + res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size, >> + &as, &dpa_offset); >> + if (res) { >> + return MEMTX_ERROR; >> + } >> + >> + return address_space_read(as, dpa_offset, attrs, data, size); >> +} >> + >> +MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t >> data, >> + unsigned size, MemTxAttrs attrs) >> +{ >> + CXLAccelDev *acceld = CXL_ACCEL(d); >> + uint64_t dpa_offset = 0; >> + AddressSpace *as = NULL; >> + int res; >> + >> + res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size, >> + &as, &dpa_offset); >> + if (res) { >> + return MEMTX_ERROR; >> + } >> + >> + return address_space_write(as, dpa_offset, attrs, &data, size); >> +} >> + >> +static void clean_memory(PCIDevice *pci_dev) >> +{ >> + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); >> + >> + if (acceld->hostvmem) { >> + address_space_destroy(&acceld->hostvmem_as); >> + } >> +} >> + >> +static bool setup_memory(PCIDevice *pci_dev, Error **errp) >> +{ >> + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); >> + >> + if (acceld->hostvmem) { >> + MemoryRegion *vmr; >> + char *v_name; >> + >> + vmr = host_memory_backend_get_memory(acceld->hostvmem); >> + if (!vmr) { >> + error_setg(errp, "volatile memdev must have backing >> device"); >> + return false; >> + } >> + if (host_memory_backend_is_mapped(acceld->hostvmem)) { >> + error_setg(errp, "memory backend %s can't be used >> multiple times.", >> + object_get_canonical_path_component(OBJECT(acceld- >> >hostvmem))); >> + return false; >> + } >> + memory_region_set_nonvolatile(vmr, false); >> + memory_region_set_enabled(vmr, true); >> + host_memory_backend_set_mapped(acceld->hostvmem, true); >> + v_name = g_strdup("cxl-accel-dpa-vmem-space"); >> + address_space_init(&acceld->hostvmem_as, vmr, v_name); >> + g_free(v_name); >> + } >> + return true; >> +} >> + >> +static void setup_cxl_regs(PCIDevice *pci_dev) >> +{ >> + CXLAccelDev *acceld = CXL_ACCEL(pci_dev); >> + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; >> + ComponentRegisters *regs = &cxl_cstate->crb; >> + MemoryRegion *mr = ®s->component_registers; >> + >> + cxl_cstate->dvsec_offset = 0x100; >> + cxl_cstate->pdev = pci_dev; >> + >> + build_dvsecs(acceld); >> + >> + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, >> + TYPE_CXL_ACCEL); >> + >> + pci_register_bar( >> + pci_dev, CXL_COMPONENT_REG_BAR_IDX, >> + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, >> mr); >> +} >> + >> +#define MSIX_NUM 6 >> + >> +static int setup_msix(PCIDevice *pci_dev) >> +{ >> + int i, rc; >> + >> + /* MSI(-X) Initialization */ >> + rc = msix_init_exclusive_bar(pci_dev, MSIX_NUM, 4, NULL); >> + if (rc) { >> + return rc; >> + } >> + >> + for (i = 0; i < MSIX_NUM; i++) { >> + msix_vector_use(pci_dev, i); >> + } >> + return 0; >> +} >> + >> +static void cxl_accel_realize(PCIDevice *pci_dev, Error **errp) >> +{ >> + ERRP_GUARD(); >> + int rc; >> + uint8_t *pci_conf = pci_dev->config; >> + >> + if (!setup_memory(pci_dev, errp)) { >> + return; >> + } >> + >> + pci_config_set_prog_interface(pci_conf, 0x10); >> + pcie_endpoint_cap_init(pci_dev, 0x80); >> + >> + setup_cxl_regs(pci_dev); >> + >> + /* MSI(-X) Initialization */ >> + rc = setup_msix(pci_dev); >> + if (rc) { >> + clean_memory(pci_dev); >> + return; >> + } >> +} >> + >> +static void cxl_accel_exit(PCIDevice *pci_dev) >> +{ >> + clean_memory(pci_dev); >> +} >> + >> +static void cxl_accel_reset(DeviceState *dev) >> +{ >> + CXLAccelDev *acceld = CXL_ACCEL(dev); >> + CXLComponentState *cxl_cstate = &acceld->cxl_cstate; >> + uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers; >> + uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask; >> + >> + update_dvsecs(acceld); >> + cxl_component_register_init_common(reg_state, write_msk, >> CXL3_TYPE2_DEVICE); >> +} >> + >> +static Property cxl_accel_props[] = { >> + DEFINE_PROP_LINK("volatile-memdev", CXLAccelDev, hostvmem, >> + TYPE_MEMORY_BACKEND, HostMemoryBackend *), >> + DEFINE_PROP_END_OF_LIST(), >> +}; >> + >> +static void cxl_accel_class_init(ObjectClass *oc, void *data) >> +{ >> + DeviceClass *dc = DEVICE_CLASS(oc); >> + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); >> + >> + pc->realize = cxl_accel_realize; >> + pc->exit = cxl_accel_exit; >> + >> + pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL; >> + pc->vendor_id = PCI_VENDOR_ID_INTEL; >> + pc->device_id = 0xd94; >> + pc->revision = 1; >> + >> + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); >> + dc->desc = "CXL Accelerator Device (Type 2)"; >> + device_class_set_legacy_reset(dc, cxl_accel_reset); >> + device_class_set_props(dc, cxl_accel_props); >> +} >> + >> +static const TypeInfo cxl_accel_dev_info = { >> + .name = TYPE_CXL_ACCEL, >> + .parent = TYPE_PCI_DEVICE, >> + .class_size = sizeof(struct CXLAccelClass), >> + .class_init = cxl_accel_class_init, >> + .instance_size = sizeof(CXLAccelDev), >> + .interfaces = (InterfaceInfo[]) { >> + { INTERFACE_CXL_DEVICE }, >> + { INTERFACE_PCIE_DEVICE }, >> + {} >> + }, >> +}; >> + >> +static void cxl_accel_dev_registers(void) >> +{ >> + type_register_static(&cxl_accel_dev_info); >> +} >> + >> +type_init(cxl_accel_dev_registers); >> diff --git a/hw/mem/meson.build b/hw/mem/meson.build >> index 1c1c6da24b..36a395dbb6 100644 >> --- a/hw/mem/meson.build >> +++ b/hw/mem/meson.build >> @@ -4,6 +4,7 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc- >> dimm.c')) >> mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) >> mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) >> mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: >> files('cxl_type3.c')) >> +mem_ss.add(when: 'CONFIG_CXL_ACCEL_DEVICE', if_true: >> files('cxl_accel.c')) >> system_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: >> files('cxl_type3_stubs.c')) >> system_ss.add(when: 'CONFIG_MEM_DEVICE', if_false: files('memory- >> device-stubs.c')) >> diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/ >> cxl_component.h >> index 30fe4bfa24..0e78db26b8 100644 >> --- a/include/hw/cxl/cxl_component.h >> +++ b/include/hw/cxl/cxl_component.h >> @@ -29,6 +29,7 @@ enum reg_type { >> CXL2_UPSTREAM_PORT, >> CXL2_DOWNSTREAM_PORT, >> CXL3_SWITCH_MAILBOX_CCI, >> + CXL3_TYPE2_DEVICE, >> }; >> /* >> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h >> index 561b375dc8..ac26b264da 100644 >> --- a/include/hw/cxl/cxl_device.h >> +++ b/include/hw/cxl/cxl_device.h >> @@ -630,6 +630,26 @@ struct CSWMBCCIDev { >> CXLCCI *cci; >> }; >> +struct CXLAccelDev { >> + /* Private */ >> + PCIDevice parent_obj; >> + >> + /* Properties */ >> + HostMemoryBackend *hostvmem; >> + >> + /* State */ >> + AddressSpace hostvmem_as; >> + CXLComponentState cxl_cstate; >> +}; >> + >> +struct CXLAccelClass { >> + /* Private */ >> + PCIDeviceClass parent_class; >> +}; >> + >> +#define TYPE_CXL_ACCEL "cxl-accel" >> +OBJECT_DECLARE_TYPE(CXLAccelDev, CXLAccelClass, CXL_ACCEL) >> + >> #define TYPE_CXL_SWITCH_MAILBOX_CCI "cxl-switch-mailbox-cci" >> OBJECT_DECLARE_TYPE(CSWMBCCIDev, CSWMBCCIClass, CXL_SWITCH_MAILBOX_CCI) >> @@ -638,6 +658,11 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr >> host_addr, uint64_t *data, >> MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t >> data, >> unsigned size, MemTxAttrs attrs); >> +MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t >> *data, >> + unsigned size, MemTxAttrs attrs); >> +MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t >> data, >> + unsigned size, MemTxAttrs attrs); >> + >> uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); >> void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num); >> diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h >> index f1a53fea8d..08bc469316 100644 >> --- a/include/hw/pci/pci_ids.h >> +++ b/include/hw/pci/pci_ids.h >> @@ -55,6 +55,7 @@ >> #define PCI_CLASS_MEMORY_RAM 0x0500 >> #define PCI_CLASS_MEMORY_FLASH 0x0501 >> #define PCI_CLASS_MEMORY_CXL 0x0502 >> +#define PCI_CLASS_CXL_QEMU_ACCEL 0x0503 >> #define PCI_CLASS_MEMORY_OTHER 0x0580 >> #define PCI_BASE_CLASS_BRIDGE 0x06
© 2016 - 2025 Red Hat, Inc.