[PATCH v15 3/4] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl

Jonathan Cameron via posted 4 patches 5 months ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Ani Sinha <anisinha@redhat.com>, Shannon Zhao <shannon.zhaosl@gmail.com>, Peter Maydell <peter.maydell@linaro.org>, Jonathan Cameron <jonathan.cameron@huawei.com>, Fan Ni <fan.ni@samsung.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Fabiano Rosas <farosas@suse.de>, Laurent Vivier <lvivier@redhat.com>
There is a newer version of this series
[PATCH v15 3/4] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl
Posted by Jonathan Cameron via 5 months ago
Code based on i386/pc enablement. The memory layout places space for 16
host bridge register regions after the GIC_REDIST2 in the extended memmap.
The CFMWs are placed above the extended memmap.

Only create the CEDT table if cxl=on set for the machine.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

---
v15: No changes.
---
 include/hw/arm/virt.h    |  4 ++++
 hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++++++++++++
 hw/arm/virt.c            | 29 +++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+)

diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 9a1b0f53d2..4375819ea0 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -36,6 +36,7 @@
 #include "hw/arm/boot.h"
 #include "hw/arm/bsa.h"
 #include "hw/block/flash.h"
+#include "hw/cxl/cxl.h"
 #include "system/kvm.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "qom/object.h"
@@ -85,6 +86,7 @@ enum {
 /* indices of IO regions located after the RAM */
 enum {
     VIRT_HIGH_GIC_REDIST2 =  VIRT_LOWMEMMAP_LAST,
+    VIRT_CXL_HOST,
     VIRT_HIGH_PCIE_ECAM,
     VIRT_HIGH_PCIE_MMIO,
 };
@@ -140,6 +142,7 @@ struct VirtMachineState {
     bool secure;
     bool highmem;
     bool highmem_compact;
+    bool highmem_cxl;
     bool highmem_ecam;
     bool highmem_mmio;
     bool highmem_redists;
@@ -174,6 +177,7 @@ struct VirtMachineState {
     char *oem_id;
     char *oem_table_id;
     bool ns_el2_virt_timer_irq;
+    CXLState cxl_devices_state;
 };
 
 #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 7e8e0f0298..589e221b89 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -39,10 +39,12 @@
 #include "hw/acpi/aml-build.h"
 #include "hw/acpi/utils.h"
 #include "hw/acpi/pci.h"
+#include "hw/acpi/cxl.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/generic_event_device.h"
 #include "hw/acpi/tpm.h"
 #include "hw/acpi/hmat.h"
+#include "hw/cxl/cxl.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
@@ -119,10 +121,29 @@ static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap)
     aml_append(scope, dev);
 }
 
+static void build_acpi0017(Aml *table)
+{
+    Aml *dev, *scope, *method;
+
+    scope =  aml_scope("_SB");
+    dev = aml_device("CXLM");
+    aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017")));
+
+    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+    aml_append(method, aml_return(aml_int(0x0B)));
+    aml_append(dev, method);
+    build_cxl_dsm_method(dev);
+
+    aml_append(scope, dev);
+    aml_append(table, scope);
+}
+
 static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
                               uint32_t irq, VirtMachineState *vms)
 {
     int ecam_id = VIRT_ECAM_ID(vms->highmem_ecam);
+    bool cxl_present = false;
+    PCIBus *bus = vms->bus;
     struct GPEXConfig cfg = {
         .mmio32 = memmap[VIRT_PCIE_MMIO],
         .pio    = memmap[VIRT_PCIE_PIO],
@@ -136,6 +157,14 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
     }
 
     acpi_dsdt_add_gpex(scope, &cfg);
+    QLIST_FOREACH(bus, &vms->bus->child, sibling) {
+        if (pci_bus_is_cxl(bus)) {
+            cxl_present = true;
+        }
+    }
+    if (cxl_present) {
+        build_acpi0017(scope);
+    }
 }
 
 static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap,
@@ -963,6 +992,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
         }
     }
 
+    if (vms->cxl_devices_state.is_enabled) {
+        cxl_build_cedt(table_offsets, tables_blob, tables->linker,
+                       vms->oem_id, vms->oem_table_id, &vms->cxl_devices_state);
+    }
+
     if (ms->nvdimms_state->is_enabled) {
         nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
                           ms->nvdimms_state, ms->ram_slots, vms->oem_id,
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9a6cd085a3..e06d293edc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -57,6 +57,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "hw/pci-host/gpex.h"
+#include "hw/pci-bridge/pci_expander_bridge.h"
 #include "hw/virtio/virtio-pci.h"
 #include "hw/core/sysbus-fdt.h"
 #include "hw/platform-bus.h"
@@ -86,6 +87,8 @@
 #include "hw/virtio/virtio-md-pci.h"
 #include "hw/virtio/virtio-iommu.h"
 #include "hw/char/pl011.h"
+#include "hw/cxl/cxl.h"
+#include "hw/cxl/cxl_host.h"
 #include "qemu/guest-random.h"
 
 static GlobalProperty arm_virt_compat[] = {
@@ -220,6 +223,7 @@ static const MemMapEntry base_memmap[] = {
 static MemMapEntry extended_memmap[] = {
     /* Additional 64 MB redist region (can contain up to 512 redistributors) */
     [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
+    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */
     [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
     /* Second PCIe window */
     [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },
@@ -1621,6 +1625,17 @@ static void create_pcie(VirtMachineState *vms)
     }
 }
 
+static void create_cxl_host_reg_region(VirtMachineState *vms)
+{
+    MemoryRegion *sysmem = get_system_memory();
+    MemoryRegion *mr = &vms->cxl_devices_state.host_mr;
+
+    memory_region_init(mr, OBJECT(vms), "cxl_host_reg",
+                       vms->memmap[VIRT_CXL_HOST].size);
+    memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr);
+    vms->highmem_cxl = true;
+}
+
 static void create_platform_bus(VirtMachineState *vms)
 {
     DeviceState *dev;
@@ -1737,6 +1752,12 @@ void virt_machine_done(Notifier *notifier, void *data)
     struct arm_boot_info *info = &vms->bootinfo;
     AddressSpace *as = arm_boot_address_space(cpu, info);
 
+    cxl_hook_up_pxb_registers(vms->bus, &vms->cxl_devices_state,
+                              &error_fatal);
+
+    if (vms->cxl_devices_state.is_enabled) {
+        cxl_fmws_link_targets(&error_fatal);
+    }
     /*
      * If the user provided a dtb, we assume the dynamic sysbus nodes
      * already are integrated there. This corresponds to a use case where
@@ -1783,6 +1804,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
 {
     bool *enabled_array[] = {
         &vms->highmem_redists,
+        &vms->highmem_cxl,
         &vms->highmem_ecam,
         &vms->highmem_mmio,
     };
@@ -1890,6 +1912,9 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
     if (device_memory_size > 0) {
         machine_memory_devices_init(ms, device_memory_base, device_memory_size);
     }
+
+    cxl_fmws_set_memmap(ROUND_UP(vms->highest_gpa + 1, 256 * MiB),
+                        BIT_ULL(pa_bits));
 }
 
 static VirtGICType finalize_gic_version_do(const char *accel_name,
@@ -2340,6 +2365,8 @@ static void machvirt_init(MachineState *machine)
     memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base,
                                 machine->ram);
 
+    cxl_fmws_update_mmio();
+
     virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
 
     create_gic(vms, sysmem);
@@ -2395,6 +2422,7 @@ static void machvirt_init(MachineState *machine)
     create_rtc(vms);
 
     create_pcie(vms);
+    create_cxl_host_reg_region(vms);
 
     if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
         vms->acpi_dev = create_acpi_ged(vms);
@@ -3365,6 +3393,7 @@ static void virt_instance_init(Object *obj)
 
     vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
     vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
+    cxl_machine_init(obj, &vms->cxl_devices_state);
 }
 
 static const TypeInfo virt_machine_info = {
-- 
2.48.1
Re: [PATCH v15 3/4] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl
Posted by Peter Maydell 5 months ago
On Thu, 12 Jun 2025 at 14:45, Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> Code based on i386/pc enablement. The memory layout places space for 16
> host bridge register regions after the GIC_REDIST2 in the extended memmap.
> The CFMWs are placed above the extended memmap.
>
> Only create the CEDT table if cxl=on set for the machine.
>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>
> ---
> v15: No changes.
> ---
>  include/hw/arm/virt.h    |  4 ++++
>  hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++++++++++++
>  hw/arm/virt.c            | 29 +++++++++++++++++++++++++++++
>  3 files changed, 67 insertions(+)

Can we have some user-facing documentation, please?
(docs/system/arm/virt.rst -- can just be a brief mention
and link to docs/system/devices/cxl.rst if you want to put the
examples of aarch64 use in there.)

> @@ -220,6 +223,7 @@ static const MemMapEntry base_memmap[] = {
>  static MemMapEntry extended_memmap[] = {
>      /* Additional 64 MB redist region (can contain up to 512 redistributors) */
>      [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
> +    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */

This is going to shuffle the memory map around, even if CXL
isn't enabled, which will break migration compatibility.
You need to do something to ensure that the CXL region isn't
included in the calculations of the base addresses for these
regions if CXL isn't enabled.

>      [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
>      /* Second PCIe window */
>      [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },

If you're OK with having the CXL host region at the end of the
list then that's a simpler way to avoid its presence disturbing
the layout of the existing regions, but you might not like it
being at such a high physaddr.

> @@ -1621,6 +1625,17 @@ static void create_pcie(VirtMachineState *vms)
>      }
>  }
>
> +static void create_cxl_host_reg_region(VirtMachineState *vms)
> +{
> +    MemoryRegion *sysmem = get_system_memory();
> +    MemoryRegion *mr = &vms->cxl_devices_state.host_mr;

This looks odd -- why are we reaching directly into the cxl_devices_state
to fish out a MemoryRegion and init it?

> +    memory_region_init(mr, OBJECT(vms), "cxl_host_reg",
> +                       vms->memmap[VIRT_CXL_HOST].size);
> +    memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr);
> +    vms->highmem_cxl = true;
> +}
> +
>  static void create_platform_bus(VirtMachineState *vms)
>  {
>      DeviceState *dev;
> @@ -1737,6 +1752,12 @@ void virt_machine_done(Notifier *notifier, void *data)
>      struct arm_boot_info *info = &vms->bootinfo;
>      AddressSpace *as = arm_boot_address_space(cpu, info);
>
> +    cxl_hook_up_pxb_registers(vms->bus, &vms->cxl_devices_state,
> +                              &error_fatal);
> +
> +    if (vms->cxl_devices_state.is_enabled) {
> +        cxl_fmws_link_targets(&error_fatal);
> +    }
>      /*
>       * If the user provided a dtb, we assume the dynamic sysbus nodes
>       * already are integrated there. This corresponds to a use case where
> @@ -1783,6 +1804,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
>  {
>      bool *enabled_array[] = {
>          &vms->highmem_redists,
> +        &vms->highmem_cxl,
>          &vms->highmem_ecam,
>          &vms->highmem_mmio,
>      };
> @@ -1890,6 +1912,9 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
>      if (device_memory_size > 0) {
>          machine_memory_devices_init(ms, device_memory_base, device_memory_size);
>      }
> +
> +    cxl_fmws_set_memmap(ROUND_UP(vms->highest_gpa + 1, 256 * MiB),
> +                        BIT_ULL(pa_bits));

Isn't this stomping over the HIGH_PCIE memory window (or
whatever else happens to be at the top end of memory) ?

Also taking highest_gpa and then rounding it up looks suspicious:
if it's the highest GPA then anything larger than that is off
the end of the physical address space.

Plus cxl_fmws_set_memmap() names its arguments base, max_addr:
"highest gpa, rounded up" doesn't sound like a base address.

(Looking at our current code that sets and adjusts highest_gpa,
it looks a bit weird: maybe we're setting it to values that aren't
what the variable name claims it's doing, and that's why this
code happens to work ?)

>  }
>
>  static VirtGICType finalize_gic_version_do(const char *accel_name,
> @@ -2340,6 +2365,8 @@ static void machvirt_init(MachineState *machine)
>      memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base,
>                                  machine->ram);
>
> +    cxl_fmws_update_mmio();
> +
>      virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
>
>      create_gic(vms, sysmem);
> @@ -2395,6 +2422,7 @@ static void machvirt_init(MachineState *machine)
>      create_rtc(vms);
>
>      create_pcie(vms);
> +    create_cxl_host_reg_region(vms);
>
>      if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
>          vms->acpi_dev = create_acpi_ged(vms);
> @@ -3365,6 +3393,7 @@ static void virt_instance_init(Object *obj)
>
>      vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
>      vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
> +    cxl_machine_init(obj, &vms->cxl_devices_state);
>  }

cxl defaults to disabled, right? (i.e. we don't need the
machine-version specific stuff to keep it from being enabled
on old versioned machine types).

thanks
-- PMM
Re: [PATCH v15 3/4] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl
Posted by Jonathan Cameron via 5 months ago
On Fri, 13 Jun 2025 13:57:39 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Thu, 12 Jun 2025 at 14:45, Jonathan Cameron
> <Jonathan.Cameron@huawei.com> wrote:
> >
> > Code based on i386/pc enablement. The memory layout places space for 16
> > host bridge register regions after the GIC_REDIST2 in the extended memmap.
> > The CFMWs are placed above the extended memmap.
> >
> > Only create the CEDT table if cxl=on set for the machine.
> >
> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> >
> > ---
> > v15: No changes.
> > ---
> >  include/hw/arm/virt.h    |  4 ++++
> >  hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++++++++++++
> >  hw/arm/virt.c            | 29 +++++++++++++++++++++++++++++
> >  3 files changed, 67 insertions(+)  
> 
Hi Peter,

Thanks for reviewing.

> Can we have some user-facing documentation, please?
> (docs/system/arm/virt.rst -- can just be a brief mention
> and link to docs/system/devices/cxl.rst if you want to put the
> examples of aarch64 use in there.)

Given the examples should look exactly like those for x86/pc, do we need
extra examples in cxl.rst? I guess I can add one simple arm/virt example
in a follow up patch without bloating that file too badly..

Is the following sufficient for the board specific docs?

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 6a719b9586..10cbffc8a7 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -31,6 +31,7 @@ Supported devices
 The virt board supports:

 - PCI/PCIe devices
+- CXL Fixed memory windows, root bridges and devices.
 - Flash memory
 - Either one or two PL011 UARTs for the NonSecure World
 - An RTC
@@ -189,6 +190,14 @@ ras
 acpi
   Set ``on``/``off``/``auto`` to enable/disable ACPI.

+cxl
+  Set  ``on``/``off`` to enable/disable CXL. More details in
+  :doc:`../devices/cxl`. The default is off.
+
+cxl-fmw
+  Array of CXL fixed memory windows describing fixed address routing to
+  target CXL host bridges. See :doc:`../devices/cxl`.
+
 dtb-randomness
   Set ``on``/``off`` to pass random seeds via the guest DTB
   rng-seed and kaslr-seed nodes (in both "/chosen" and

> 
> > @@ -220,6 +223,7 @@ static const MemMapEntry base_memmap[] = {
> >  static MemMapEntry extended_memmap[] = {
> >      /* Additional 64 MB redist region (can contain up to 512 redistributors) */
> >      [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
> > +    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */  
> 
> This is going to shuffle the memory map around, even if CXL
> isn't enabled, which will break migration compatibility.
> You need to do something to ensure that the CXL region isn't
> included in the calculations of the base addresses for these
> regions if CXL isn't enabled.
> 

It doesn't move any existing stuff because these are naturally aligned
regions so this is in a gap before the PCIE ECAM region.

> >      [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
> >      /* Second PCIe window */
> >      [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },  
> 
> If you're OK with having the CXL host region at the end of the
> list then that's a simpler way to avoid its presence disturbing
> the layout of the existing regions, but you might not like it
> being at such a high physaddr.

From what I recall a higher address isn't a problem I just wanted to not waste any
PA space at all so used the gap.

Chunk of /proc/iomem with a random test case (in first case with the cxl bits
removed as obvious that doesn't start until this patch is in place).
Need more than 123 cpus to make the second gicv3 redist appear
(I've no idea why that number I just printed the threshold where
it was calculated to find out what I needed to wait for boot on).

before this patch.

13ffe0000-13fffffff : System RAM
  13ffe0000-13ffe1fff : reserved
  13ffe2000-13ffe2fff : reserved
  13ffe3000-13fffffff : reserved
4000000000-4003ffffff : GICR
4010000000-401fffffff : PCI ECAM
8000000000-ffffffffff : PCI Bus 0000:00
  8000000000-80001fffff : PCI Bus 0000:01
  8000200000-8000203fff : 0000:00:02.0
    8000200000-8000203fff : virtio-pci-modern
  8000204000-8000207fff : 0000:00:03.0
    8000204000-8000207fff : virtio-pci-modern

after:

13ffe0000-13fffffff : System RAM
  13ffe0000-13ffe2fff : reserved
  13ffe3000-13fff3fff : reserved
  13fff4000-13fffffff : reserved
4000000000-4003ffffff : GICR
4004001128-40040011b7 : port1
4010000000-4010bfffff : PCI ECAM
4010c00000-4010efffff : PCI ECAM
8000000000-80000fffff : PCI Bus 0000:00
  8000000000-8000003fff : 0000:00:02.0
    8000000000-8000003fff : virtio-pci-modern
  8000004000-8000007fff : 0000:00:03.0
    8000004000-8000007fff : virtio-pci-modern

The extra ECAM is the PXB stuff kicking in and stealing part of the ECAM
region of the main host bridge.

The CXL bit we care about here is port1. Despite the name which is
an artifact of how linux represents the topology, that is
the registers for the CXL PXB root bridge.


> 
> > @@ -1621,6 +1625,17 @@ static void create_pcie(VirtMachineState *vms)
> >      }
> >  }
> >
> > +static void create_cxl_host_reg_region(VirtMachineState *vms)
> > +{
> > +    MemoryRegion *sysmem = get_system_memory();
> > +    MemoryRegion *mr = &vms->cxl_devices_state.host_mr;  
> 
> This looks odd -- why are we reaching directly into the cxl_devices_state
> to fish out a MemoryRegion and init it?

cxl_devices_state state is really just grouping the CXL host stuff that
every cxl host needs so this perhaps looks like more than it actually is.
Maybe that needs a rename. It does less than used to after patch 2 took
some of the Fixed memory stuff out of there.

typedef struct CXLState {
    bool is_enabled;
    MemoryRegion host_mr;
    unsigned int next_mr_idx;
    CXLFixedMemoryWindowOptionsList *cfmw_list;
} CXLState;

It has two purposes.  The cfmw_list and is_enabled are so that
we can use cxl_host_init() to unify setting up the CXL specific machine
properties.

host_mr and next_mr_idx are for a very simple fixed chunk address space
allocator for the PXB-CXL base registers. This is a little bit nasty but
it allows the PXBs to be initialized with -device and yet have mmio
address ranges. Bit similar to the runtime instantiation of SMMUs
problem Shameer was running into, but a simpler one.  I couldn't
find a way to use sysbus or similar here because we also need this
PXB device to exist on the main PCI bus like any other PCI expander bridge
- it just needs these extra registers in the main memory map that are then
pointed to by ACPI table entries. These are what is known as RCRB in PCI
terms (Root complex register base).

See implementation of pxb_cxl_hook_up_registers() call in
virt_machine_done() that stitches this together once we know what
pxb-cxl devices are present and maps sub regions into this container.

I'd be happy to have suggestions on how to do this better as I've never
been fond of this bit.

One option might be to just make this more explicit by putting host_mr and
next_mr_idx directly in the virt machine state and passing them
as separate parameters to pxb_cxl_hook_up_registers(). I'm not sure
that gains much though.  Another path would be to wrap the region init
and add subregion up in a helper function called from here and i386/pc.c
That would mean the name at least was explicitly shared. Not sure it would
bring much other benefit.


> 
> > +    memory_region_init(mr, OBJECT(vms), "cxl_host_reg",
> > +                       vms->memmap[VIRT_CXL_HOST].size);
> > +    memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr);
> > +    vms->highmem_cxl = true;
> > +}
> > +
> >  static void create_platform_bus(VirtMachineState *vms)
> >  {
> >      DeviceState *dev;
> > @@ -1737,6 +1752,12 @@ void virt_machine_done(Notifier *notifier, void *data)
> >      struct arm_boot_info *info = &vms->bootinfo;
> >      AddressSpace *as = arm_boot_address_space(cpu, info);
> >
> > +    cxl_hook_up_pxb_registers(vms->bus, &vms->cxl_devices_state,
> > +                              &error_fatal);
> > +
> > +    if (vms->cxl_devices_state.is_enabled) {
> > +        cxl_fmws_link_targets(&error_fatal);
> > +    }
> >      /*
> >       * If the user provided a dtb, we assume the dynamic sysbus nodes
> >       * already are integrated there. This corresponds to a use case where
> > @@ -1783,6 +1804,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
> >  {
> >      bool *enabled_array[] = {
> >          &vms->highmem_redists,
> > +        &vms->highmem_cxl,
> >          &vms->highmem_ecam,
> >          &vms->highmem_mmio,
> >      };
> > @@ -1890,6 +1912,9 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
> >      if (device_memory_size > 0) {
> >          machine_memory_devices_init(ms, device_memory_base, device_memory_size);
> >      }
> > +
> > +    cxl_fmws_set_memmap(ROUND_UP(vms->highest_gpa + 1, 256 * MiB),
> > +                        BIT_ULL(pa_bits));  
> 
> Isn't this stomping over the HIGH_PCIE memory window (or
> whatever else happens to be at the top end of memory) ?

I'll admit the way that these address maps are calculated has caused
me several headaches.  The intention was to go above everything so as to
avoid any changes to the existing map.

> 
> Also taking highest_gpa and then rounding it up looks suspicious:
> if it's the highest GPA then anything larger than that is off
> the end of the physical address space.
> 
> Plus cxl_fmws_set_memmap() names its arguments base, max_addr:
> "highest gpa, rounded up" doesn't sound like a base address.
> 
> (Looking at our current code that sets and adjusts highest_gpa,
> it looks a bit weird: maybe we're setting it to values that aren't
> what the variable name claims it's doing, and that's why this
> code happens to work ?)

I think highest_gpa is a running value of how high we have allocated
to something, not a maximum that is supported.  E.g. what is going on
in virt_set_high_memmap()

I guess I should update vms->highest_gpa.  I think nothing uses it after this
but they might in future.

+    vms->highest_gpa = cxl_fmws_set_memmap(ROUND_UP(vms->highest_gpa + 1, 256 * MiB),
+                                           BIT_ULL(pa_bits)) - 1;

should update it correctly. In i386/pc.c we already updated the memory map top address
tracking so that function already returns the start of the next region (hence the - 1
is needed - that is similar to the call for virt_set_high_memmap() a few lines up.

I think that will also result in more elegant failure on CPUs with limited
address space than we had before as that is checked in virt_cpu_post_init()

Using an a55 for instance fails (Fujitsu folk ran into this a while back)
as we need more than 40 bits.

Just for reference here's the relevant bit of /proc/iomem
for a test setup intended to poke some of these corners.

8000000000-80000fffff : PCI Bus 0000:00
  8000000000-8000003fff : 0000:00:02.0
    8000000000-8000003fff : virtio-pci-modern
  8000004000-8000007fff : 0000:00:03.0
    8000004000-8000007fff : virtio-pci-modern
8000100000-800011ffff : PCI Bus 0000:0c
  8000100000-800010ffff : 0000:0c:00.0
    8000101080-80001010d7 : mem0
  8000110000-800011ffff : 0000:0c:01.0
    8000111080-80001110d7 : mem1
8000120000-ffffffffff : PCI Bus 0000:00
  8000200000-80003fffff : PCI Bus 0000:01
10000000000-101ffffffff : CXL Window 0
  10000000000-1000fffffff : region0
    10000000000-1000fffffff : dax0.0
      10000000000-1000fffffff : System RAM (kmem)

So the windows sits above the PCI region.

> 
> >  }
> >
> >  static VirtGICType finalize_gic_version_do(const char *accel_name,
> > @@ -2340,6 +2365,8 @@ static void machvirt_init(MachineState *machine)
> >      memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base,
> >                                  machine->ram);
> >
> > +    cxl_fmws_update_mmio();
> > +
> >      virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
> >
> >      create_gic(vms, sysmem);
> > @@ -2395,6 +2422,7 @@ static void machvirt_init(MachineState *machine)
> >      create_rtc(vms);
> >
> >      create_pcie(vms);
> > +    create_cxl_host_reg_region(vms);
> >
> >      if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
> >          vms->acpi_dev = create_acpi_ged(vms);
> > @@ -3365,6 +3393,7 @@ static void virt_instance_init(Object *obj)
> >
> >      vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
> >      vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
> > +    cxl_machine_init(obj, &vms->cxl_devices_state);
> >  }  
> 
> cxl defaults to disabled, right? (i.e. we don't need the
> machine-version specific stuff to keep it from being enabled
> on old versioned machine types).

It is indeed defaulting to disabled.

J

> 
> thanks
> -- PMM
Re: [PATCH v15 3/4] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl
Posted by Peter Maydell 5 months ago
On Fri, 13 Jun 2025 at 16:20, Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Fri, 13 Jun 2025 13:57:39 +0100
> Peter Maydell <peter.maydell@linaro.org> wrote:
>
> > On Thu, 12 Jun 2025 at 14:45, Jonathan Cameron
> > <Jonathan.Cameron@huawei.com> wrote:
> > >
> > > Code based on i386/pc enablement. The memory layout places space for 16
> > > host bridge register regions after the GIC_REDIST2 in the extended memmap.
> > > The CFMWs are placed above the extended memmap.
> > >
> > > Only create the CEDT table if cxl=on set for the machine.
> > >
> > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > >
> > > ---
> > > v15: No changes.
> > > ---
> > >  include/hw/arm/virt.h    |  4 ++++
> > >  hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++++++++++++
> > >  hw/arm/virt.c            | 29 +++++++++++++++++++++++++++++
> > >  3 files changed, 67 insertions(+)
> >
> Hi Peter,
>
> Thanks for reviewing.
>
> > Can we have some user-facing documentation, please?
> > (docs/system/arm/virt.rst -- can just be a brief mention
> > and link to docs/system/devices/cxl.rst if you want to put the
> > examples of aarch64 use in there.)
>
> Given the examples should look exactly like those for x86/pc, do we need
> extra examples in cxl.rst? I guess I can add one simple arm/virt example
> in a follow up patch without bloating that file too badly..

That's fine too -- if the answer is "all these command lines work
for aarch64 too", then you can just say that in cxl.rst.

> Is the following sufficient for the board specific docs?
>
> diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
> index 6a719b9586..10cbffc8a7 100644
> --- a/docs/system/arm/virt.rst
> +++ b/docs/system/arm/virt.rst
> @@ -31,6 +31,7 @@ Supported devices
>  The virt board supports:
>
>  - PCI/PCIe devices
> +- CXL Fixed memory windows, root bridges and devices.
>  - Flash memory
>  - Either one or two PL011 UARTs for the NonSecure World
>  - An RTC
> @@ -189,6 +190,14 @@ ras
>  acpi
>    Set ``on``/``off``/``auto`` to enable/disable ACPI.
>
> +cxl
> +  Set  ``on``/``off`` to enable/disable CXL. More details in
> +  :doc:`../devices/cxl`. The default is off.
> +
> +cxl-fmw
> +  Array of CXL fixed memory windows describing fixed address routing to
> +  target CXL host bridges. See :doc:`../devices/cxl`.
> +
>  dtb-randomness
>    Set ``on``/``off`` to pass random seeds via the guest DTB
>    rng-seed and kaslr-seed nodes (in both "/chosen" and

Looks OK.

> >
> > > @@ -220,6 +223,7 @@ static const MemMapEntry base_memmap[] = {
> > >  static MemMapEntry extended_memmap[] = {
> > >      /* Additional 64 MB redist region (can contain up to 512 redistributors) */
> > >      [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
> > > +    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */
> >
> > This is going to shuffle the memory map around, even if CXL
> > isn't enabled, which will break migration compatibility.
> > You need to do something to ensure that the CXL region isn't
> > included in the calculations of the base addresses for these
> > regions if CXL isn't enabled.
> >
>
> It doesn't move any existing stuff because these are naturally aligned
> regions so this is in a gap before the PCIE ECAM region.

Is that true even when we have the maximum number of CPUs and
so the max number of redistributors in that VIRT_HIGH_GIC_REDIST2
region ?

> > >      [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
> > >      /* Second PCIe window */
> > >      [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },
> >
> > If you're OK with having the CXL host region at the end of the
> > list then that's a simpler way to avoid its presence disturbing
> > the layout of the existing regions, but you might not like it
> > being at such a high physaddr.
>
> From what I recall a higher address isn't a problem I just wanted to not waste any
> PA space at all so used the gap.
>
> Chunk of /proc/iomem with a random test case (in first case with the cxl bits
> removed as obvious that doesn't start until this patch is in place).
> Need more than 123 cpus to make the second gicv3 redist appear
> (I've no idea why that number I just printed the threshold where
> it was calculated to find out what I needed to wait for boot on).

It's 123 because that's the most redistributors we can fit into
the lower redistributor region. (We didn't really allow enough
space in the lower memory map, which is why we need this awkward
split setup.

(I have to run now, will look at the rest of the email next week.)

-- PMM
Re: [PATCH v15 3/4] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl
Posted by Jonathan Cameron via 5 months ago
On Fri, 13 Jun 2025 17:07:24 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Fri, 13 Jun 2025 at 16:20, Jonathan Cameron
> <Jonathan.Cameron@huawei.com> wrote:
> >
> > On Fri, 13 Jun 2025 13:57:39 +0100
> > Peter Maydell <peter.maydell@linaro.org> wrote:
> >  
> > > On Thu, 12 Jun 2025 at 14:45, Jonathan Cameron
> > > <Jonathan.Cameron@huawei.com> wrote:  
> > > >
> > > > Code based on i386/pc enablement. The memory layout places space for 16
> > > > host bridge register regions after the GIC_REDIST2 in the extended memmap.
> > > > The CFMWs are placed above the extended memmap.
> > > >
> > > > Only create the CEDT table if cxl=on set for the machine.
> > > >
> > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > >
> > > > ---
> > > > v15: No changes.
> > > > ---
> > > >  include/hw/arm/virt.h    |  4 ++++
> > > >  hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++++++++++++
> > > >  hw/arm/virt.c            | 29 +++++++++++++++++++++++++++++
> > > >  3 files changed, 67 insertions(+)  
> > >  
> > Hi Peter,
> >
> > Thanks for reviewing.
> >  
> > > Can we have some user-facing documentation, please?
> > > (docs/system/arm/virt.rst -- can just be a brief mention
> > > and link to docs/system/devices/cxl.rst if you want to put the
> > > examples of aarch64 use in there.)  
> >
> > Given the examples should look exactly like those for x86/pc, do we need
> > extra examples in cxl.rst? I guess I can add one simple arm/virt example
> > in a follow up patch without bloating that file too badly..  
> 
> That's fine too -- if the answer is "all these command lines work
> for aarch64 too", then you can just say that in cxl.rst.

I'll put an example in just to avoid people using a default
command line and getting an a55 with too small a PA range.

> 
> > Is the following sufficient for the board specific docs?
> >
> > diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
> > index 6a719b9586..10cbffc8a7 100644
> > --- a/docs/system/arm/virt.rst
> > +++ b/docs/system/arm/virt.rst
> > @@ -31,6 +31,7 @@ Supported devices
> >  The virt board supports:
> >
> >  - PCI/PCIe devices
> > +- CXL Fixed memory windows, root bridges and devices.
> >  - Flash memory
> >  - Either one or two PL011 UARTs for the NonSecure World
> >  - An RTC
> > @@ -189,6 +190,14 @@ ras
> >  acpi
> >    Set ``on``/``off``/``auto`` to enable/disable ACPI.
> >
> > +cxl
> > +  Set  ``on``/``off`` to enable/disable CXL. More details in
> > +  :doc:`../devices/cxl`. The default is off.
> > +
> > +cxl-fmw
> > +  Array of CXL fixed memory windows describing fixed address routing to
> > +  target CXL host bridges. See :doc:`../devices/cxl`.
> > +
> >  dtb-randomness
> >    Set ``on``/``off`` to pass random seeds via the guest DTB
> >    rng-seed and kaslr-seed nodes (in both "/chosen" and  
> 
> Looks OK.
> 
> > >  
> > > > @@ -220,6 +223,7 @@ static const MemMapEntry base_memmap[] = {
> > > >  static MemMapEntry extended_memmap[] = {
> > > >      /* Additional 64 MB redist region (can contain up to 512 redistributors) */
> > > >      [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
> > > > +    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */  
> > >
> > > This is going to shuffle the memory map around, even if CXL
> > > isn't enabled, which will break migration compatibility.
> > > You need to do something to ensure that the CXL region isn't
> > > included in the calculations of the base addresses for these
> > > regions if CXL isn't enabled.
> > >  
> >
> > It doesn't move any existing stuff because these are naturally aligned
> > regions so this is in a gap before the PCIE ECAM region.  
> 
> Is that true even when we have the maximum number of CPUs and
> so the max number of redistributors in that VIRT_HIGH_GIC_REDIST2
> region ?

Yes.   The gap is between that and the ECAM window.  The CXL RCRB
stuff doesn't move whether or not that is there.  Making sure it
is present does make it easier to see the gap though - hence example below.


> 
> > > >      [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
> > > >      /* Second PCIe window */
> > > >      [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },  
> > >
> > > If you're OK with having the CXL host region at the end of the
> > > list then that's a simpler way to avoid its presence disturbing
> > > the layout of the existing regions, but you might not like it
> > > being at such a high physaddr.  
> >
> > From what I recall a higher address isn't a problem I just wanted to not waste any
> > PA space at all so used the gap.
> >
> > Chunk of /proc/iomem with a random test case (in first case with the cxl bits
> > removed as obvious that doesn't start until this patch is in place).
> > Need more than 123 cpus to make the second gicv3 redist appear
> > (I've no idea why that number I just printed the threshold where
> > it was calculated to find out what I needed to wait for boot on).  
> 
> It's 123 because that's the most redistributors we can fit into
> the lower redistributor region. (We didn't really allow enough
> space in the lower memory map, which is why we need this awkward
> split setup.
> 
> (I have to run now, will look at the rest of the email next week.)
Thanks and have a good weekend.

J
> 
> -- PMM
Re: [PATCH v15 3/4] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl
Posted by Jonathan Cameron via 4 months, 3 weeks ago
On Fri, 13 Jun 2025 18:21:25 +0100
Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:

> On Fri, 13 Jun 2025 17:07:24 +0100
> Peter Maydell <peter.maydell@linaro.org> wrote:
> 
> > On Fri, 13 Jun 2025 at 16:20, Jonathan Cameron
> > <Jonathan.Cameron@huawei.com> wrote:  
> > >
> > > On Fri, 13 Jun 2025 13:57:39 +0100
> > > Peter Maydell <peter.maydell@linaro.org> wrote:
> > >    
> > > > On Thu, 12 Jun 2025 at 14:45, Jonathan Cameron
> > > > <Jonathan.Cameron@huawei.com> wrote:    
> > > > >
> > > > > Code based on i386/pc enablement. The memory layout places space for 16
> > > > > host bridge register regions after the GIC_REDIST2 in the extended memmap.
> > > > > The CFMWs are placed above the extended memmap.
> > > > >
> > > > > Only create the CEDT table if cxl=on set for the machine.
> > > > >
> > > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > > >
> > > > > ---
> > > > > v15: No changes.
> > > > > ---
> > > > >  include/hw/arm/virt.h    |  4 ++++
> > > > >  hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++++++++++++++++
> > > > >  hw/arm/virt.c            | 29 +++++++++++++++++++++++++++++
> > > > >  3 files changed, 67 insertions(+)    
> > > >    
> > > Hi Peter,
> > >
> > > Thanks for reviewing.
> > >    
> > > > Can we have some user-facing documentation, please?
> > > > (docs/system/arm/virt.rst -- can just be a brief mention
> > > > and link to docs/system/devices/cxl.rst if you want to put the
> > > > examples of aarch64 use in there.)    
> > >
> > > Given the examples should look exactly like those for x86/pc, do we need
> > > extra examples in cxl.rst? I guess I can add one simple arm/virt example
> > > in a follow up patch without bloating that file too badly..    
> > 
> > That's fine too -- if the answer is "all these command lines work
> > for aarch64 too", then you can just say that in cxl.rst.  
> 
> I'll put an example in just to avoid people using a default
> command line and getting an a55 with too small a PA range.
> 
> >   
> > > Is the following sufficient for the board specific docs?
> > >
> > > diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
> > > index 6a719b9586..10cbffc8a7 100644
> > > --- a/docs/system/arm/virt.rst
> > > +++ b/docs/system/arm/virt.rst
> > > @@ -31,6 +31,7 @@ Supported devices
> > >  The virt board supports:
> > >
> > >  - PCI/PCIe devices
> > > +- CXL Fixed memory windows, root bridges and devices.
> > >  - Flash memory
> > >  - Either one or two PL011 UARTs for the NonSecure World
> > >  - An RTC
> > > @@ -189,6 +190,14 @@ ras
> > >  acpi
> > >    Set ``on``/``off``/``auto`` to enable/disable ACPI.
> > >
> > > +cxl
> > > +  Set  ``on``/``off`` to enable/disable CXL. More details in
> > > +  :doc:`../devices/cxl`. The default is off.
> > > +
> > > +cxl-fmw
> > > +  Array of CXL fixed memory windows describing fixed address routing to
> > > +  target CXL host bridges. See :doc:`../devices/cxl`.
> > > +
> > >  dtb-randomness
> > >    Set ``on``/``off`` to pass random seeds via the guest DTB
> > >    rng-seed and kaslr-seed nodes (in both "/chosen" and    
> > 
> > Looks OK.
> >   
> > > >    
> > > > > @@ -220,6 +223,7 @@ static const MemMapEntry base_memmap[] = {
> > > > >  static MemMapEntry extended_memmap[] = {
> > > > >      /* Additional 64 MB redist region (can contain up to 512 redistributors) */
> > > > >      [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
> > > > > +    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */    
> > > >
> > > > This is going to shuffle the memory map around, even if CXL
> > > > isn't enabled, which will break migration compatibility.
> > > > You need to do something to ensure that the CXL region isn't
> > > > included in the calculations of the base addresses for these
> > > > regions if CXL isn't enabled.
> > > >    
> > >
> > > It doesn't move any existing stuff because these are naturally aligned
> > > regions so this is in a gap before the PCIE ECAM region.    
> > 
> > Is that true even when we have the maximum number of CPUs and
> > so the max number of redistributors in that VIRT_HIGH_GIC_REDIST2
> > region ?  
> 
> Yes.   The gap is between that and the ECAM window.  The CXL RCRB
> stuff doesn't move whether or not that is there.  Making sure it
> is present does make it easier to see the gap though - hence example below.
> 
> 
> >   
> > > > >      [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
> > > > >      /* Second PCIe window */
> > > > >      [VIRT_HIGH_PCIE_MMIO] =     { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },    
> > > >
> > > > If you're OK with having the CXL host region at the end of the
> > > > list then that's a simpler way to avoid its presence disturbing
> > > > the layout of the existing regions, but you might not like it
> > > > being at such a high physaddr.    
> > >
> > > From what I recall a higher address isn't a problem I just wanted to not waste any
> > > PA space at all so used the gap.
> > >
> > > Chunk of /proc/iomem with a random test case (in first case with the cxl bits
> > > removed as obvious that doesn't start until this patch is in place).
> > > Need more than 123 cpus to make the second gicv3 redist appear
> > > (I've no idea why that number I just printed the threshold where
> > > it was calculated to find out what I needed to wait for boot on).    
> > 
> > It's 123 because that's the most redistributors we can fit into
> > the lower redistributor region. (We didn't really allow enough
> > space in the lower memory map, which is why we need this awkward
> > split setup.
> > 
> > (I have to run now, will look at the rest of the email next week.)  
> Thanks and have a good weekend.

Hi Peter,

I'll post a v16 with the changes discussed and for this patch include a few
comments on what we didn't yet resolve in this thread.

Thanks,

Jonathan

> 
> J
> > 
> > -- PMM  
> 
>