[RFC qemu 0/6] hw/cxl: Link speed and width control

Jonathan Cameron via posted 6 patches 4 months, 2 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20240712122414.1448284-1-Jonathan.Cameron@huawei.com
Maintainers: Jonathan Cameron <jonathan.cameron@huawei.com>, Fan Ni <fan.ni@samsung.com>, "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
There is a newer version of this series
include/hw/cxl/cxl_device.h               |   4 +
include/hw/pci-bridge/cxl_upstream_port.h |   4 +
include/hw/pci/pcie.h                     |   2 +
hw/mem/cxl_type3.c                        |   6 ++
hw/pci-bridge/cxl_downstream.c            |  23 +++--
hw/pci-bridge/cxl_root_port.c             |   5 ++
hw/pci-bridge/cxl_upstream.c              |   6 ++
hw/pci/pcie.c                             | 105 ++++++++++++++--------
8 files changed, 103 insertions(+), 52 deletions(-)
[RFC qemu 0/6] hw/cxl: Link speed and width control
Posted by Jonathan Cameron via 4 months, 2 weeks ago
Based-on: [PATCH v5 00/13] acpi: NUMA nodes for CXL HB as GP + complex NUMA test
Based-on: Message-ID: 20240712110837.1439736-1-Jonathan.Cameron@huawei.com

The Generic Ports support added the ability to describe the bandwidth and
Latency within a host to a CXL host bridge.  To be able to test the of the
discovery path used by Linux [1] we also need to be able to create
bottlenecks at difference places in the topology. There are two parts to
this
* CXL link characteristics as described by PCI Express Capability Link
  status etc.
* Bandwidth and latency across CXL Switches (via CDAT data from the switch
  USP)
* Bandwidth and latency from the CXL type 3 device port to the actual
  memory (Via CDAT data from the EP).

Currently we have fixed values for the CXL CDAT tables, and to test this
I recommend changing those as per the patch at the end of this cover letter
(so they aren't always the bottleneck). Making those configurable will be
handled in a future patch set.

RFC question:
- I could enable this for all PCIe device (including ports).
  Does that makes sense, or is it better to limit this to my cases.
  It is quite easy to build broken setups (downstream device reports
  faster link than the port etc) because QEMU 'link' training' is
  simplistic.  I'm not sure it is worth making it more clever.

Test case snippets I promised Dave Jiang. These rely on the tweaks to
CDAT at end of this cover letter.

Case 1:  Direct connected pair of type 3 nodes on same HB - type 3 limited
==========================================================================

 -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/t3_cxl2.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/t3_lsa1.raw,size=1M,align=1M \
 -object memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/t3_cxl3.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-mem4,share=on,mem-path=/tmp/t3_cxl4.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/t3_lsa2.raw,size=1M,align=1M \
 -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true,numa_node=0 \
 -device cxl-rp,port=0,bus=cxl.1,id=cxl_rp_port0,chassis=0,slot=2 \
 -device cxl-rp,port=1,bus=cxl.1,id=cxl_rp_port1,chassis=0,slot=3 \
 -device cxl-type3,bus=cxl_rp_port0,volatile-memdev=cxl-mem1,id=cxl-pmem1,lsa=cxl-lsa1,sn=3,x-speed=32,x-width=16 \
 -device cxl-type3,bus=cxl_rp_port1,volatile-memdev=cxl-mem3,id=cxl-pmem2,lsa=cxl-lsa2,sn=5,x-speed=32,x-width=16 \
 -machine cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=8G,cxl-fmw.0.interleave-granularity=1k \
 -numa node,nodeid=0,cpus=0-3,memdev=mem0 \
 -numa node,nodeid=1 \
 -object acpi-generic-initiator,id=bob2,pci-dev=bob,node=1 \
 -numa node,nodeid=2 \
 -object acpi-generic-port,id=bob11,pci-bus=cxl.1,node=2 \
 -numa dist,src=0,dst=0,val=10 -numa dist,src=0,dst=1,val=21 -numa dist,src=0,dst=2,val=21 \
 -numa dist,src=1,dst=0,val=21 -numa dist,src=1,dst=1,val=10 -numa dist,src=1,dst=2,val=21 \
 -numa dist,src=2,dst=0,val=21 -numa dist,src=2,dst=1,val=21 -numa dist,src=2,dst=2,val=10 \
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10 \
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=3G \
 -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=100 \
 -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=100G \
 -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=500 \
 -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
 -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=50 \
 -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=100G

Set x-width to 2 and it will be link limited.

 -device cxl-type3,bus=cxl_rp_port0,volatile-memdev=cxl-mem1,id=cxl-pmem1,lsa=cxl-lsa1,sn=3,x-speed=32,x-width=2 \
 -device cxl-type3,bus=cxl_rp_port1,volatile-memdev=cxl-mem3,id=cxl-pmem2,lsa=cxl-lsa2,sn=5,x-speed=32,x-width=2 \

Case 2: Switch connected type 3.
================================

 -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/t3_cxl3.raw,size=1G,align=256M \
 -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true \
 -device cxl-rp,port=0,bus=cxl.1,id=cxl_rp_port0,chassis=0,slot=2,mem-reserve=128M,hotplug=true \
 -device cxl-upstream,port=33,bus=cxl_rp_port0,id=us0,multifunction=on,addr=0.0,sn=33,x-speed=64,x-width=16 \
 -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4,hotplug=true \
 -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5,hotplug=true \
 -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem1,id=cxl-pmem1,sn=3,x-speed=32,x-width=16 \
 -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem3,id=cxl-pmem2,sn=5,x-speed=32,x-width=16 \
 -machine cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=1k \
 -numa node,nodeid=0,cpus=0-3,memdev=mem0 \
 -numa node,nodeid=1 \
 -object acpi-generic-initiator,id=bob2,pci-dev=bob,node=1 \
 -numa node,nodeid=2 \
 -object acpi-generic-port,id=bob11,pci-bus=cxl.1,node=2 \
 -numa dist,src=0,dst=0,val=10 -numa dist,src=0,dst=1,val=21 -numa dist,src=0,dst=2,val=21 \
 -numa dist,src=1,dst=0,val=21 -numa dist,src=1,dst=1,val=10 -numa dist,src=1,dst=2,val=21 \
 -numa dist,src=2,dst=0,val=21 -numa dist,src=2,dst=1,val=21 -numa dist,src=2,dst=2,val=10 \
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10 \
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=800M \
 -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=100 \
 -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=200G \
 -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=500 \
 -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
 -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=50 \
 -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=200G

Starting from this and...
* Changing links for the two type 3 devices to x-speed=16,x-width=4 will make it leaf
  link limited.
* Changing link for the USP to x-speed=32,x-width=8 will make it head link limited.

Case 3: 2 GP, 2 HB, direct connected type 3.
============================================

GP limited configuration (found bug in v6 of kernel series)

 -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/t3_cxl2.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/t3_lsa1.raw,size=1M,align=1M \
 -object memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/t3_cxl3.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-mem4,share=on,mem-path=/tmp/t3_cxl4.raw,size=1G,align=256M \
 -object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/t3_lsa2.raw,size=1M,align=1M \
 -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true,numa_node=2 \
 -device pxb-cxl,bus_nr=22,bus=pcie.0,id=cxl.2,hdm_for_passthrough=true,numa_node=3 \
 -device cxl-rp,port=0,bus=cxl.1,id=cxl_rp_port0,chassis=0,slot=2 \
 -device cxl-rp,port=1,bus=cxl.2,id=cxl_rp_port1,chassis=0,slot=3 \
 -device cxl-type3,bus=cxl_rp_port0,volatile-memdev=cxl-mem1,id=cxl-pmem1,lsa=cxl-lsa1,sn=3,x-speed=32,x-width=16 \
 -device cxl-type3,bus=cxl_rp_port1,volatile-memdev=cxl-mem3,id=cxl-pmem2,lsa=cxl-lsa2,sn=5,x-speed=32,x-width=16 \
 -machine cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=8G,cxl-fmw.0.interleave-granularity=1k \
 -numa node,nodeid=0,cpus=0-3,memdev=mem0 \
 -numa node,nodeid=1 \
 -object acpi-generic-initiator,id=bob2,pci-dev=bob,node=1 \
 -numa node,nodeid=2 \
 -object acpi-generic-port,id=bob11,pci-bus=cxl.1,node=2 \
 -numa node,nodeid=3 \
 -object acpi-generic-port,id=bob12,pci-bus=cxl.2,node=3 \
 -numa dist,src=0,dst=0,val=10 -numa dist,src=0,dst=1,val=21 -numa dist,src=0,dst=2,val=21 -numa dist,src=0,dst=3,val=21 \
 -numa dist,src=1,dst=0,val=21 -numa dist,src=1,dst=1,val=10 -numa dist,src=1,dst=2,val=21 -numa dist,src=1,dst=3,val=21 \
 -numa dist,src=2,dst=0,val=21 -numa dist,src=2,dst=1,val=21 -numa dist,src=2,dst=2,val=10 -numa dist,src=2,dst=3,val=21 \
 -numa dist,src=3,dst=0,val=21 -numa dist,src=3,dst=1,val=21 -numa dist,src=3,dst=2,val=21 -numa dist,src=3,dst=3,val=10 \
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10 \
 -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=3G \
 -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=100 \
 -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=13G \
 -numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,latency=100 \
 -numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,bandwidth=13G \
 -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=500 \
 -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
 -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=50 \
 -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=20G \
 -numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,latency=50 \
 -numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,bandwidth=20G \


[1] https://lore.kernel.org/linux-cxl/20240710222716.797267-1-dave.jiang@intel.com

Suggested tweaks to the CDAT values.

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 88f117576d..fe5fc8cb6c 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -109,7 +109,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
         .flags = HMAT_LB_MEM_MEMORY,
         .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
         .entry_base_unit = 1000, /* GB/s */
-        .entry[0] = 16,
+        .entry[0] = 48,
     };

     dslbis3 = g_malloc(sizeof(*dslbis3));
@@ -122,7 +122,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
         .flags = HMAT_LB_MEM_MEMORY,
         .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
         .entry_base_unit = 1000, /* GB/s */
-        .entry[0] = 16,
+        .entry[0] = 48,
     };

     dsemts = g_malloc(sizeof(*dsemts));
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index 70cb06436d..85b114d3ce 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -319,5 +319,5 @@ static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
         sslbis_bandwidth->sslbe[i] = (CDATSslbe) {
             .port_x_id = CDAT_PORT_ID_USP,
             .port_y_id = port_ids[i],
-            .latency_bandwidth = 16, /* 16 GB/s */
+            .latency_bandwidth = 128, /* 128 GB/s */
         };

Jonathan Cameron (6):
  hw/pci-bridge/cxl_root_port: Provide x-speed and x-width properties.
  hw/pci-bridge/cxl_upstream: Provide x-speed and x-width properties.
  hw/pcie: Factor out PCI Express link register filing common to EP.
  hw/pcie: Provide a utility function for control of EP / SW USP link
  hw/mem/cxl-type3: Add properties to control link speed and width
  hw/pci-bridge/cxl-upstream: Add properties to control link speed and
    width

 include/hw/cxl/cxl_device.h               |   4 +
 include/hw/pci-bridge/cxl_upstream_port.h |   4 +
 include/hw/pci/pcie.h                     |   2 +
 hw/mem/cxl_type3.c                        |   6 ++
 hw/pci-bridge/cxl_downstream.c            |  23 +++--
 hw/pci-bridge/cxl_root_port.c             |   5 ++
 hw/pci-bridge/cxl_upstream.c              |   6 ++
 hw/pci/pcie.c                             | 105 ++++++++++++++--------
 8 files changed, 103 insertions(+), 52 deletions(-)

-- 
2.43.0
Re: [RFC qemu 0/6] hw/cxl: Link speed and width control
Posted by Jonathan Cameron via 2 months, 3 weeks ago
On Fri, 12 Jul 2024 13:24:08 +0100
Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:

> Based-on: [PATCH v5 00/13] acpi: NUMA nodes for CXL HB as GP + complex NUMA test
> Based-on: Message-ID: 20240712110837.1439736-1-Jonathan.Cameron@huawei.com

Hi All,

I'd like to get this missing piece in 9.2.  So if anyone has time to give
it a quick look that would be much appreciated as then it may be
in a good state nice and early in the cycle. In particular the question
below.

If not I'll send a non RFC version early in the 9.2 cycle and see if that
gets more reaction.  I fully appreciate people are busy!

This support is proving helpful for testing the improved kernel handling
to deal with bottlenecks in shared links and I'd like to have an upstream
QEMU path for testing the full discoverable latency + bandwidth from
CPUs (and similar) part of CXL that's needed for tiered memory management.

Jonathan
> 
> The Generic Ports support added the ability to describe the bandwidth and
> Latency within a host to a CXL host bridge.  To be able to test the of the
> discovery path used by Linux [1] we also need to be able to create
> bottlenecks at difference places in the topology. There are two parts to
> this
> * CXL link characteristics as described by PCI Express Capability Link
>   status etc.
> * Bandwidth and latency across CXL Switches (via CDAT data from the switch
>   USP)
> * Bandwidth and latency from the CXL type 3 device port to the actual
>   memory (Via CDAT data from the EP).
> 
> Currently we have fixed values for the CXL CDAT tables, and to test this
> I recommend changing those as per the patch at the end of this cover letter
> (so they aren't always the bottleneck). Making those configurable will be
> handled in a future patch set.
> 
> RFC question:
> - I could enable this for all PCIe device (including ports).
>   Does that makes sense, or is it better to limit this to my cases.
>   It is quite easy to build broken setups (downstream device reports
>   faster link than the port etc) because QEMU 'link' training' is
>   simplistic.  I'm not sure it is worth making it more clever.
> 
> Test case snippets I promised Dave Jiang. These rely on the tweaks to
> CDAT at end of this cover letter.
> 
> Case 1:  Direct connected pair of type 3 nodes on same HB - type 3 limited
> ==========================================================================
> 
>  -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/t3_cxl2.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/t3_lsa1.raw,size=1M,align=1M \
>  -object memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/t3_cxl3.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-mem4,share=on,mem-path=/tmp/t3_cxl4.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/t3_lsa2.raw,size=1M,align=1M \
>  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true,numa_node=0 \
>  -device cxl-rp,port=0,bus=cxl.1,id=cxl_rp_port0,chassis=0,slot=2 \
>  -device cxl-rp,port=1,bus=cxl.1,id=cxl_rp_port1,chassis=0,slot=3 \
>  -device cxl-type3,bus=cxl_rp_port0,volatile-memdev=cxl-mem1,id=cxl-pmem1,lsa=cxl-lsa1,sn=3,x-speed=32,x-width=16 \
>  -device cxl-type3,bus=cxl_rp_port1,volatile-memdev=cxl-mem3,id=cxl-pmem2,lsa=cxl-lsa2,sn=5,x-speed=32,x-width=16 \
>  -machine cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=8G,cxl-fmw.0.interleave-granularity=1k \
>  -numa node,nodeid=0,cpus=0-3,memdev=mem0 \
>  -numa node,nodeid=1 \
>  -object acpi-generic-initiator,id=bob2,pci-dev=bob,node=1 \
>  -numa node,nodeid=2 \
>  -object acpi-generic-port,id=bob11,pci-bus=cxl.1,node=2 \
>  -numa dist,src=0,dst=0,val=10 -numa dist,src=0,dst=1,val=21 -numa dist,src=0,dst=2,val=21 \
>  -numa dist,src=1,dst=0,val=21 -numa dist,src=1,dst=1,val=10 -numa dist,src=1,dst=2,val=21 \
>  -numa dist,src=2,dst=0,val=21 -numa dist,src=2,dst=1,val=21 -numa dist,src=2,dst=2,val=10 \
>  -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10 \
>  -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=3G \
>  -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=100 \
>  -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=100G \
>  -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=500 \
>  -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
>  -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=50 \
>  -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=100G
> 
> Set x-width to 2 and it will be link limited.
> 
>  -device cxl-type3,bus=cxl_rp_port0,volatile-memdev=cxl-mem1,id=cxl-pmem1,lsa=cxl-lsa1,sn=3,x-speed=32,x-width=2 \
>  -device cxl-type3,bus=cxl_rp_port1,volatile-memdev=cxl-mem3,id=cxl-pmem2,lsa=cxl-lsa2,sn=5,x-speed=32,x-width=2 \
> 
> Case 2: Switch connected type 3.
> ================================
> 
>  -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/t3_cxl3.raw,size=1G,align=256M \
>  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true \
>  -device cxl-rp,port=0,bus=cxl.1,id=cxl_rp_port0,chassis=0,slot=2,mem-reserve=128M,hotplug=true \
>  -device cxl-upstream,port=33,bus=cxl_rp_port0,id=us0,multifunction=on,addr=0.0,sn=33,x-speed=64,x-width=16 \
>  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4,hotplug=true \
>  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5,hotplug=true \
>  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem1,id=cxl-pmem1,sn=3,x-speed=32,x-width=16 \
>  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem3,id=cxl-pmem2,sn=5,x-speed=32,x-width=16 \
>  -machine cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=1k \
>  -numa node,nodeid=0,cpus=0-3,memdev=mem0 \
>  -numa node,nodeid=1 \
>  -object acpi-generic-initiator,id=bob2,pci-dev=bob,node=1 \
>  -numa node,nodeid=2 \
>  -object acpi-generic-port,id=bob11,pci-bus=cxl.1,node=2 \
>  -numa dist,src=0,dst=0,val=10 -numa dist,src=0,dst=1,val=21 -numa dist,src=0,dst=2,val=21 \
>  -numa dist,src=1,dst=0,val=21 -numa dist,src=1,dst=1,val=10 -numa dist,src=1,dst=2,val=21 \
>  -numa dist,src=2,dst=0,val=21 -numa dist,src=2,dst=1,val=21 -numa dist,src=2,dst=2,val=10 \
>  -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10 \
>  -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=800M \
>  -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=100 \
>  -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=200G \
>  -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=500 \
>  -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
>  -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=50 \
>  -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=200G
> 
> Starting from this and...
> * Changing links for the two type 3 devices to x-speed=16,x-width=4 will make it leaf
>   link limited.
> * Changing link for the USP to x-speed=32,x-width=8 will make it head link limited.
> 
> Case 3: 2 GP, 2 HB, direct connected type 3.
> ============================================
> 
> GP limited configuration (found bug in v6 of kernel series)
> 
>  -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/t3_cxl1.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/t3_cxl2.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/t3_lsa1.raw,size=1M,align=1M \
>  -object memory-backend-file,id=cxl-mem3,share=on,mem-path=/tmp/t3_cxl3.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-mem4,share=on,mem-path=/tmp/t3_cxl4.raw,size=1G,align=256M \
>  -object memory-backend-file,id=cxl-lsa2,share=on,mem-path=/tmp/t3_lsa2.raw,size=1M,align=1M \
>  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1,hdm_for_passthrough=true,numa_node=2 \
>  -device pxb-cxl,bus_nr=22,bus=pcie.0,id=cxl.2,hdm_for_passthrough=true,numa_node=3 \
>  -device cxl-rp,port=0,bus=cxl.1,id=cxl_rp_port0,chassis=0,slot=2 \
>  -device cxl-rp,port=1,bus=cxl.2,id=cxl_rp_port1,chassis=0,slot=3 \
>  -device cxl-type3,bus=cxl_rp_port0,volatile-memdev=cxl-mem1,id=cxl-pmem1,lsa=cxl-lsa1,sn=3,x-speed=32,x-width=16 \
>  -device cxl-type3,bus=cxl_rp_port1,volatile-memdev=cxl-mem3,id=cxl-pmem2,lsa=cxl-lsa2,sn=5,x-speed=32,x-width=16 \
>  -machine cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=8G,cxl-fmw.0.interleave-granularity=1k \
>  -numa node,nodeid=0,cpus=0-3,memdev=mem0 \
>  -numa node,nodeid=1 \
>  -object acpi-generic-initiator,id=bob2,pci-dev=bob,node=1 \
>  -numa node,nodeid=2 \
>  -object acpi-generic-port,id=bob11,pci-bus=cxl.1,node=2 \
>  -numa node,nodeid=3 \
>  -object acpi-generic-port,id=bob12,pci-bus=cxl.2,node=3 \
>  -numa dist,src=0,dst=0,val=10 -numa dist,src=0,dst=1,val=21 -numa dist,src=0,dst=2,val=21 -numa dist,src=0,dst=3,val=21 \
>  -numa dist,src=1,dst=0,val=21 -numa dist,src=1,dst=1,val=10 -numa dist,src=1,dst=2,val=21 -numa dist,src=1,dst=3,val=21 \
>  -numa dist,src=2,dst=0,val=21 -numa dist,src=2,dst=1,val=21 -numa dist,src=2,dst=2,val=10 -numa dist,src=2,dst=3,val=21 \
>  -numa dist,src=3,dst=0,val=21 -numa dist,src=3,dst=1,val=21 -numa dist,src=3,dst=2,val=21 -numa dist,src=3,dst=3,val=10 \
>  -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10 \
>  -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=3G \
>  -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=100 \
>  -numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=13G \
>  -numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,latency=100 \
>  -numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,bandwidth=13G \
>  -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=500 \
>  -numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
>  -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=50 \
>  -numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=20G \
>  -numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,latency=50 \
>  -numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,bandwidth=20G \
> 
> 
> [1] https://lore.kernel.org/linux-cxl/20240710222716.797267-1-dave.jiang@intel.com
> 
> Suggested tweaks to the CDAT values.
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 88f117576d..fe5fc8cb6c 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -109,7 +109,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>          .flags = HMAT_LB_MEM_MEMORY,
>          .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
>          .entry_base_unit = 1000, /* GB/s */
> -        .entry[0] = 16,
> +        .entry[0] = 48,
>      };
> 
>      dslbis3 = g_malloc(sizeof(*dslbis3));
> @@ -122,7 +122,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
>          .flags = HMAT_LB_MEM_MEMORY,
>          .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
>          .entry_base_unit = 1000, /* GB/s */
> -        .entry[0] = 16,
> +        .entry[0] = 48,
>      };
> 
>      dsemts = g_malloc(sizeof(*dsemts));
> diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> index 70cb06436d..85b114d3ce 100644
> --- a/hw/pci-bridge/cxl_upstream.c
> +++ b/hw/pci-bridge/cxl_upstream.c
> @@ -319,5 +319,5 @@ static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
>          sslbis_bandwidth->sslbe[i] = (CDATSslbe) {
>              .port_x_id = CDAT_PORT_ID_USP,
>              .port_y_id = port_ids[i],
> -            .latency_bandwidth = 16, /* 16 GB/s */
> +            .latency_bandwidth = 128, /* 128 GB/s */
>          };
> 
> Jonathan Cameron (6):
>   hw/pci-bridge/cxl_root_port: Provide x-speed and x-width properties.
>   hw/pci-bridge/cxl_upstream: Provide x-speed and x-width properties.
>   hw/pcie: Factor out PCI Express link register filing common to EP.
>   hw/pcie: Provide a utility function for control of EP / SW USP link
>   hw/mem/cxl-type3: Add properties to control link speed and width
>   hw/pci-bridge/cxl-upstream: Add properties to control link speed and
>     width
> 
>  include/hw/cxl/cxl_device.h               |   4 +
>  include/hw/pci-bridge/cxl_upstream_port.h |   4 +
>  include/hw/pci/pcie.h                     |   2 +
>  hw/mem/cxl_type3.c                        |   6 ++
>  hw/pci-bridge/cxl_downstream.c            |  23 +++--
>  hw/pci-bridge/cxl_root_port.c             |   5 ++
>  hw/pci-bridge/cxl_upstream.c              |   6 ++
>  hw/pci/pcie.c                             | 105 ++++++++++++++--------
>  8 files changed, 103 insertions(+), 52 deletions(-)
>
[RFC qemu 1/6] hw/pci-bridge/cxl_root_port: Provide x-speed and x-width properties.
Posted by Jonathan Cameron via 4 months, 2 weeks ago
Approach copied from gen_pcie_root_port.c
Previously the link defaulted to a maximum of 2.5GT/s and 1x.  Enable setting
it's maximum values.  The actual value after 'training' will depend on the
downstream device configuration.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 hw/pci-bridge/cxl_root_port.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
index 2dd10239bd..5e2156d7ba 100644
--- a/hw/pci-bridge/cxl_root_port.c
+++ b/hw/pci-bridge/cxl_root_port.c
@@ -24,6 +24,7 @@
 #include "hw/pci/pcie_port.h"
 #include "hw/pci/msi.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "hw/sysbus.h"
 #include "qapi/error.h"
 #include "hw/cxl/cxl.h"
@@ -206,6 +207,10 @@ static Property gen_rp_props[] = {
                      -1),
     DEFINE_PROP_SIZE("pref64-reserve", CXLRootPort, res_reserve.mem_pref_64,
                      -1),
+    DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot,
+                                speed, PCIE_LINK_SPEED_64),
+    DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot,
+                                width, PCIE_LINK_WIDTH_32),
     DEFINE_PROP_END_OF_LIST()
 };
 
-- 
2.43.0
[RFC qemu 2/6] hw/pci-bridge/cxl_upstream: Provide x-speed and x-width properties.
Posted by Jonathan Cameron via 4 months, 2 weeks ago
Copied from gen_pcie_root_port.c
Drop the previous code that ensured a valid value in s->width, s->speed
as now a default is provided so this will always be set.

Note this changes the default settings but it is unlikely to have a negative
effect on software as will only affect ports with now downstream device.
All other ports will use the settings from that device.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 hw/pci-bridge/cxl_downstream.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index 742da07a01..c0b4fac735 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -13,6 +13,8 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/pcie.h"
 #include "hw/pci/pcie_port.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "hw/cxl/cxl.h"
 #include "qapi/error.h"
 
@@ -210,24 +212,20 @@ static void cxl_dsp_exitfn(PCIDevice *d)
     pci_bridge_exitfn(d);
 }
 
-static void cxl_dsp_instance_post_init(Object *obj)
-{
-    PCIESlot *s = PCIE_SLOT(obj);
-
-    if (!s->speed) {
-        s->speed = QEMU_PCI_EXP_LNK_2_5GT;
-    }
-
-    if (!s->width) {
-        s->width = QEMU_PCI_EXP_LNK_X1;
-    }
-}
+static Property cxl_dsp_props[] = {
+    DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot,
+                                speed, PCIE_LINK_SPEED_64),
+    DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot,
+                                width, PCIE_LINK_WIDTH_16),
+    DEFINE_PROP_END_OF_LIST()
+};
 
 static void cxl_dsp_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(oc);
 
+    device_class_set_props(dc, cxl_dsp_props);
     k->config_write = cxl_dsp_config_write;
     k->realize = cxl_dsp_realize;
     k->exit = cxl_dsp_exitfn;
@@ -243,7 +241,6 @@ static const TypeInfo cxl_dsp_info = {
     .name = TYPE_CXL_DSP,
     .instance_size = sizeof(CXLDownstreamPort),
     .parent = TYPE_PCIE_SLOT,
-    .instance_post_init = cxl_dsp_instance_post_init,
     .class_init = cxl_dsp_class_init,
     .interfaces = (InterfaceInfo[]) {
         { INTERFACE_PCIE_DEVICE },
-- 
2.43.0
[RFC qemu 3/6] hw/pcie: Factor out PCI Express link register filing common to EP.
Posted by Jonathan Cameron via 4 months, 2 weeks ago
Whilst not all link related registers are common between RP / Switch DSP
and EP / Switch USP many of them are.  Factor that group out to save
on duplication when adding EP / Swtich USP configurability.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 hw/pci/pcie.c | 87 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 4b2f0805c6..b14d59573e 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -105,46 +105,18 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
     pci_set_word(cmask + PCI_EXP_LNKSTA, 0);
 }
 
-static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
+/* Includes setting the target speed default */
+static void pcie_cap_fill_lnk(uint8_t *exp_cap, PCIExpLinkWidth width,
+                              PCIExpLinkSpeed speed)
 {
-    PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
-    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
-
-    /* Skip anything that isn't a PCIESlot */
-    if (!s) {
-        return;
-    }
-
     /* Clear and fill LNKCAP from what was configured above */
     pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP,
                                  PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
     pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
-                               QEMU_PCI_EXP_LNKCAP_MLW(s->width) |
-                               QEMU_PCI_EXP_LNKCAP_MLS(s->speed));
-
-    /*
-     * Link bandwidth notification is required for all root ports and
-     * downstream ports supporting links wider than x1 or multiple link
-     * speeds.
-     */
-    if (s->width > QEMU_PCI_EXP_LNK_X1 ||
-        s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
-        pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
-                                   PCI_EXP_LNKCAP_LBNC);
-    }
-
-    if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
-        /*
-         * Hot-plug capable downstream ports and downstream ports supporting
-         * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC
-         * to 1b.  PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which
-         * we also hardwire to 1b here.  2.5GT/s hot-plug slots should also
-         * technically implement this, but it's not done here for compatibility.
-         */
-        pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
-                                   PCI_EXP_LNKCAP_DLLLARC);
-        /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */
+                               QEMU_PCI_EXP_LNKCAP_MLW(width) |
+                               QEMU_PCI_EXP_LNKCAP_MLS(speed));
 
+    if (speed > QEMU_PCI_EXP_LNK_2_5GT) {
         /*
          * Target Link Speed defaults to the highest link speed supported by
          * the component.  2.5GT/s devices are permitted to hardwire to zero.
@@ -152,7 +124,7 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
         pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2,
                                      PCI_EXP_LNKCTL2_TLS);
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2,
-                                   QEMU_PCI_EXP_LNKCAP_MLS(s->speed) &
+                                   QEMU_PCI_EXP_LNKCAP_MLS(speed) &
                                    PCI_EXP_LNKCTL2_TLS);
     }
 
@@ -161,25 +133,62 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
      * actually a reference to the highest bit supported in this register.
      * We assume the device supports all link speeds.
      */
-    if (s->speed > QEMU_PCI_EXP_LNK_5GT) {
+    if (speed > QEMU_PCI_EXP_LNK_5GT) {
         pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U);
         pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
                                    PCI_EXP_LNKCAP2_SLS_2_5GB |
                                    PCI_EXP_LNKCAP2_SLS_5_0GB |
                                    PCI_EXP_LNKCAP2_SLS_8_0GB);
-        if (s->speed > QEMU_PCI_EXP_LNK_8GT) {
+        if (speed > QEMU_PCI_EXP_LNK_8GT) {
             pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
                                        PCI_EXP_LNKCAP2_SLS_16_0GB);
         }
-        if (s->speed > QEMU_PCI_EXP_LNK_16GT) {
+        if (speed > QEMU_PCI_EXP_LNK_16GT) {
             pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
                                        PCI_EXP_LNKCAP2_SLS_32_0GB);
         }
-        if (s->speed > QEMU_PCI_EXP_LNK_32GT) {
+        if (speed > QEMU_PCI_EXP_LNK_32GT) {
             pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
                                        PCI_EXP_LNKCAP2_SLS_64_0GB);
         }
+    }    
+}
+
+static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
+{
+    PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
+    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+    /* Skip anything that isn't a PCIESlot */
+    if (!s) {
+        return;
     }
+
+    /*
+     * Link bandwidth notification is required for all root ports and
+     * downstream ports supporting links wider than x1 or multiple link
+     * speeds.
+     */
+    if (s->width > QEMU_PCI_EXP_LNK_X1 ||
+        s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+        pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+                                   PCI_EXP_LNKCAP_LBNC);
+    }
+
+    if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+        /*
+         * Hot-plug capable downstream ports and downstream ports supporting
+         * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC
+         * to 1b.  PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which
+         * we also hardwire to 1b here.  2.5GT/s hot-plug slots should also
+         * technically implement this, but it's not done here for compatibility.
+         */
+        pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+                                   PCI_EXP_LNKCAP_DLLLARC);
+        /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */
+    }
+
+    pcie_cap_fill_lnk(exp_cap, s->width, s->speed);
 }
 
 int pcie_cap_init(PCIDevice *dev, uint8_t offset,
-- 
2.43.0
[RFC qemu 4/6] hw/pcie: Provide a utility function for control of EP / SW USP link
Posted by Jonathan Cameron via 4 months, 2 weeks ago
Whilst similar to existing PCIESlot link configuration a few registers
need to be set differently so that the downstream device presents
a 'configured' state that is then used to 'train' the upstream port
on the link.  Basically that means setting the status register to
reflect it succeeding in training up to target settings.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/hw/pci/pcie.h |  2 ++
 hw/pci/pcie.c         | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 5eddb90976..b8d59732bc 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -141,6 +141,8 @@ void pcie_acs_reset(PCIDevice *dev);
 void pcie_ari_init(PCIDevice *dev, uint16_t offset);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned);
+void pcie_cap_fill_link_ep_usp(PCIDevice *dev, PCIExpLinkWidth width,
+                               PCIExpLinkSpeed speed);
 
 void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
                                Error **errp);
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index b14d59573e..89734b50a2 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -154,6 +154,24 @@ static void pcie_cap_fill_lnk(uint8_t *exp_cap, PCIExpLinkWidth width,
     }    
 }
 
+void pcie_cap_fill_link_ep_usp(PCIDevice *dev, PCIExpLinkWidth width,
+                               PCIExpLinkSpeed speed)
+{
+    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+    /*
+     * For an end point or USP need to set the current status as well
+     * as the capabilities.
+     */
+    pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+                                 PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
+    pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+                               QEMU_PCI_EXP_LNKSTA_NLW(width) |
+                               QEMU_PCI_EXP_LNKSTA_CLS(speed));
+    
+    pcie_cap_fill_lnk(exp_cap, width, speed);
+}
+
 static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
 {
     PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
-- 
2.43.0
[RFC qemu 5/6] hw/mem/cxl-type3: Add properties to control link speed and width
Posted by Jonathan Cameron via 4 months, 2 weeks ago
To establish performance characteristics of a CXL device when used via a
particular CXL topology (root ports, switches, end points) it is necessary
to set the appropriate link speed and width in the PCI Express capability
structure.  Provide x-speed and x-link properties for this.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/hw/cxl/cxl_device.h | 4 ++++
 hw/mem/cxl_type3.c          | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index fdd0f4e62b..e14e56ae4b 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -549,6 +549,10 @@ struct CXLType3Dev {
     CXLCCI vdm_fm_owned_ld_mctp_cci;
     CXLCCI ld0_cci;
 
+    /* PCIe link characteristics */
+    PCIExpLinkSpeed speed;
+    PCIExpLinkWidth width;
+
     /* DOE */
     DOECap doe_cdat;
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b3a401bc6d..adfcc28a6e 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -17,6 +17,7 @@
 #include "hw/mem/pc-dimm.h"
 #include "hw/pci/pci.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
@@ -1200,6 +1201,7 @@ static void ct3d_reset(DeviceState *dev)
     uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
     uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
 
+    pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed);
     cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
     cxl_device_register_init_t3(ct3d);
 
@@ -1229,6 +1231,10 @@ static Property ct3_props[] = {
     DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
     DEFINE_PROP_LINK("volatile-dc-memdev", CXLType3Dev, dc.host_dc,
                      TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLType3Dev,
+                                speed, PCIE_LINK_SPEED_32),
+    DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
+                                width, PCIE_LINK_WIDTH_16),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.43.0
[RFC qemu 6/6] hw/pci-bridge/cxl-upstream: Add properties to control link speed and width
Posted by Jonathan Cameron via 4 months, 2 weeks ago
To establish performance characteristics of a CXL device when used via a
particular CXL topology (root ports, switches, end points) it is necessary
to set the appropriate link speed and width in the PCI Express capability
structure.  Provide x-speed and x-link properties for this.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/hw/pci-bridge/cxl_upstream_port.h | 4 ++++
 hw/pci-bridge/cxl_upstream.c              | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/include/hw/pci-bridge/cxl_upstream_port.h b/include/hw/pci-bridge/cxl_upstream_port.h
index 12635139f6..f208397ffe 100644
--- a/include/hw/pci-bridge/cxl_upstream_port.h
+++ b/include/hw/pci-bridge/cxl_upstream_port.h
@@ -12,6 +12,10 @@ typedef struct CXLUpstreamPort {
     /*< public >*/
     CXLComponentState cxl_cstate;
     CXLCCI swcci;
+
+    PCIExpLinkSpeed speed;
+    PCIExpLinkWidth width;
+
     DOECap doe_cdat;
     uint64_t sn;
 } CXLUpstreamPort;
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index e51221a5f3..e673d69220 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -11,6 +11,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/pcie.h"
 #include "hw/pci/pcie_port.h"
@@ -100,6 +101,7 @@ static void cxl_usp_reset(DeviceState *qdev)
 
     pci_bridge_reset(qdev);
     pcie_cap_deverr_reset(d);
+    pcie_cap_fill_link_ep_usp(d, usp->width, usp->speed);
     latch_registers(usp);
 }
 
@@ -363,6 +365,10 @@ static void cxl_usp_exitfn(PCIDevice *d)
 static Property cxl_upstream_props[] = {
     DEFINE_PROP_UINT64("sn", CXLUpstreamPort, sn, UI64_NULL),
     DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename),
+    DEFINE_PROP_PCIE_LINK_SPEED("x-speed", CXLUpstreamPort,
+                                speed, PCIE_LINK_SPEED_32),
+    DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLUpstreamPort,
+                                width, PCIE_LINK_WIDTH_16),
     DEFINE_PROP_END_OF_LIST()
 };
 
-- 
2.43.0