[PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB

Jonathan Cameron via qemu development posted 7 patches 2 days, 17 hours ago
There is a newer version of this series
[PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB
Posted by Jonathan Cameron via qemu development 2 days, 17 hours ago
From: Davidlohr Bueso <dave@stgolabs.net>

Add basic plumbing for memory expander devices that support Back
Invalidation. This introduces a 'hdm-db=on|off' parameter and
exposes the relevant BI RT/Decoder component cachemem registers.

Some noteworthy properties:
 - Devices require enabling Flit mode across the CXL topology.
 - Explicit BI-ID commit is required.
 - HDM decoder support both host and dev coherency models.

Tested-by: Dongjoo Seo <dongjoo.seo1@samsung.com>
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
v6: Initialize type variable to avoid a false compiler warning
    from some versions of gcc. (Michael Tsirkin)
---
 docs/system/devices/cxl.rst         |  23 +++++
 include/hw/cxl/cxl_component.h      |  54 ++++++++++-
 include/hw/cxl/cxl_device.h         |   3 +
 hw/cxl/cxl-component-utils.c        | 142 ++++++++++++++++++++++++++--
 hw/mem/cxl_type3.c                  |   9 +-
 hw/pci-bridge/cxl_downstream.c      |   2 +-
 hw/pci-bridge/cxl_root_port.c       |   3 +-
 hw/pci-bridge/cxl_upstream.c        |   2 +-
 hw/pci-bridge/pci_expander_bridge.c |   2 +-
 9 files changed, 225 insertions(+), 15 deletions(-)

diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index ca15a0da1c1d..9d0771cdfd73 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -384,6 +384,29 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
   -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
   -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
 
+An example of 4 type3 devices with volatile memory below a switch. Two of the devices
+use HDM-DB for coherence, which requires operating in Flit mode::
+
+  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
+  ...
+  -object memory-backend-ram,id=cxl-mem0,share=on,size=256M \
+  -object memory-backend-ram,id=cxl-mem1,share=on,size=256M \
+  -object memory-backend-ram,id=cxl-mem2,share=on,size=256M \
+  -object memory-backend-ram,id=cxl-mem3,share=on,size=256M \
+  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
+  -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
+  -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
+  -device cxl-upstream,bus=root_port0,id=us0,x-256b-flit=on \
+  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
+  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-mem0,sn=0x1,x-256b-flit=on,hdm-db=on \
+  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
+  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-mem1,sn=0x2,x-256b-flit=on,hdm-db=on \
+  -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
+  -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-mem2,sn=0x3 \
+  -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
+  -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-mem3,sn=0x4 \
+  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
+
 A simple arm/virt example featuring a single direct connected CXL Type 3
 Volatile Memory device::
 
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
index 1f167d3ef79b..ffc82202206c 100644
--- a/include/hw/cxl/cxl_component.h
+++ b/include/hw/cxl/cxl_component.h
@@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
 CXLx_CAPABILITY_HEADER(HDM, 3)
 CXLx_CAPABILITY_HEADER(EXTSEC, 4)
 CXLx_CAPABILITY_HEADER(SNOOP, 5)
+CXLx_CAPABILITY_HEADER(BI_RT, 6)
+CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
 
 /*
  * Capability structures contain the actual registers that the CXL component
@@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
     (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
 #define CXL_SNOOP_REGISTERS_SIZE   0x8
 
-QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
-                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
+#define CXL_BI_RT_CAP_VERSION 1
+#define CXL_BI_RT_REGISTERS_OFFSET \
+    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
+#define CXL_BI_RT_REGISTERS_SIZE   0xC
+
+REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
+    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
+REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
+    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
+REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
+    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
+    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
+    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
+    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
+
+/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
+#define CXL_BI_DECODER_CAP_VERSION 1
+#define CXL_BI_DECODER_REGISTERS_OFFSET \
+    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
+#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
+
+REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
+    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
+    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
+REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
+    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
+    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
+    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
+REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
+    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
+    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
+    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
+    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
+
+QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
+                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
                    "No space for registers");
 
+/* track BI explicit commit handling for route table and decoder */
+enum {
+    CXL_BISTATE_RT = 0,
+    CXL_BISTATE_DECODER,
+    CXL_BISTATE_MAX
+};
+
+typedef struct bi_state {
+    uint64_t last_commit;  /* last 0->1 transition */
+} BIState;
+
 typedef struct component_registers {
     /*
      * Main memory region to be registered with QEMU core.
@@ -259,6 +306,7 @@ typedef struct cxl_component {
     };
 
     CDATObject cdat;
+    BIState bi_state[CXL_BISTATE_MAX];
 } CXLComponentState;
 
 void cxl_component_register_block_init(Object *obj,
@@ -266,7 +314,7 @@ void cxl_component_register_block_init(Object *obj,
                                        const char *type);
 void cxl_component_register_init_common(uint32_t *reg_state,
                                         uint32_t *write_msk,
-                                        enum reg_type type);
+                                        enum reg_type type, bool bi);
 
 void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
                                 enum reg_type cxl_dev_type, uint16_t length,
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 7d9236db8c85..393f3122173b 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -770,6 +770,9 @@ struct CXLType3Dev {
     CXLMemSparingReadAttrs rank_sparing_attrs;
     CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
 
+    /* BI flows */
+    bool hdmdb;
+
     struct dynamic_capacity {
         HostMemoryBackend *host_dc;
         AddressSpace host_dc_as;
diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index 91770f103a85..a7d36e1128c2 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -71,10 +71,40 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
     case 4:
         if (cregs->special_ops && cregs->special_ops->read) {
             return cregs->special_ops->read(cxl_cstate, offset, 4);
-        } else {
-            QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
-            return cregs->cache_mem_registers[offset / 4];
         }
+
+        QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
+
+        if (offset == A_CXL_BI_RT_STATUS ||
+            offset == A_CXL_BI_DECODER_STATUS) {
+            int type;
+            uint64_t started;
+
+            type = (offset == A_CXL_BI_RT_STATUS) ?
+                    CXL_BISTATE_RT : CXL_BISTATE_DECODER;
+            started = cxl_cstate->bi_state[type].last_commit;
+
+            if (started) {
+                uint32_t *cache_mem = cregs->cache_mem_registers;
+                uint32_t val = cache_mem[offset / 4];
+                uint64_t now;
+                int set;
+
+                now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+                /* arbitrary 100 ms to do the commit */
+                set = !!(now >= started + 100);
+
+                if (offset == A_CXL_BI_RT_STATUS) {
+                    val = FIELD_DP32(val, CXL_BI_RT_STATUS, COMMITTED, set);
+                } else {
+                    val = FIELD_DP32(val, CXL_BI_DECODER_STATUS, COMMITTED,
+                                     set);
+                }
+                stl_le_p((uint8_t *)cache_mem + offset, val);
+            }
+        }
+
+        return cregs->cache_mem_registers[offset / 4];
     case 8:
         qemu_log_mask(LOG_UNIMP,
                       "CXL 8 byte cache mem registers not implemented\n");
@@ -118,6 +148,47 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
     stl_le_p((uint8_t *)cache_mem + offset, value);
 }
 
+static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
+                            uint32_t value)
+{
+    ComponentRegisters *cregs = &cxl_cstate->crb;
+    uint32_t sts, *cache_mem = cregs->cache_mem_registers;
+    bool to_commit = false;
+    int type = 0; /* Unused value - work around for compiler warning */
+
+    switch (offset) {
+    case A_CXL_BI_RT_CTRL:
+        to_commit = FIELD_EX32(value, CXL_BI_RT_CTRL, COMMIT);
+        if (to_commit) {
+            sts = cxl_cache_mem_read_reg(cxl_cstate,
+                                         R_CXL_BI_RT_STATUS, 4);
+            sts = FIELD_DP32(sts, CXL_BI_RT_STATUS, COMMITTED, 0);
+            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_RT_STATUS, sts);
+            type = CXL_BISTATE_RT;
+        }
+        break;
+    case A_CXL_BI_DECODER_CTRL:
+        to_commit = FIELD_EX32(value, CXL_BI_DECODER_CTRL, COMMIT);
+        if (to_commit) {
+            sts = cxl_cache_mem_read_reg(cxl_cstate,
+                                         R_CXL_BI_DECODER_STATUS, 4);
+            sts = FIELD_DP32(sts, CXL_BI_DECODER_STATUS, COMMITTED, 0);
+            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_DECODER_STATUS, sts);
+            type = CXL_BISTATE_DECODER;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (to_commit) {
+        cxl_cstate->bi_state[type].last_commit =
+                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    }
+
+    stl_le_p((uint8_t *)cache_mem + offset, value);
+}
+
 static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
                                     unsigned size)
 {
@@ -141,6 +212,9 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
         if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
             offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) {
             dumb_hdm_handler(cxl_cstate, offset, value);
+        } else if (offset == A_CXL_BI_RT_CTRL ||
+                   offset == A_CXL_BI_DECODER_CTRL) {
+            bi_handler(cxl_cstate, offset, value);
         } else {
             cregs->cache_mem_registers[offset / 4] = value;
         }
@@ -230,7 +304,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
 }
 
 static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
-                            enum reg_type type)
+                            enum reg_type type, bool bi)
 {
     int decoder_count = CXL_HDM_DECODER_COUNT;
     int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
@@ -255,7 +329,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
                      UIO_DECODER_COUNT, 0);
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0);
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
-                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
+                     SUPPORTED_COHERENCY_MODEL,
+                     /* host+dev or Unknown */
+                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
                      HDM_DECODER_ENABLE, 0);
     write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
@@ -278,9 +354,43 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
     }
 }
 
+static void bi_rt_init_common(uint32_t *reg_state, uint32_t *write_msk)
+{
+    /* switch usp must commit the new BI-ID, timeout of 2secs */
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 1);
+
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CTRL, COMMIT, 0);
+    write_msk[R_CXL_BI_RT_CTRL] = 0x1;
+
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMITTED, 0);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 0);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 0x6);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 0x2);
+}
+
+static void bi_decoder_init_common(uint32_t *reg_state, uint32_t *write_msk,
+                                   enum reg_type type)
+{
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, HDM_D, 0);
+    /* switch dsp must commit the new BI-ID, timeout of 2secs */
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT,
+                     (type != CXL2_ROOT_PORT && type != CXL2_TYPE3_DEVICE));
+
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_FW, 0);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_ENABLE, 0);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, COMMIT, 0);
+    write_msk[R_CXL_BI_DECODER_CTRL] = 0x7;
+
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMITTED, 0);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 0);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 0x6);
+    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 0x2);
+}
+
 void cxl_component_register_init_common(uint32_t *reg_state,
                                         uint32_t *write_msk,
-                                        enum reg_type type)
+                                        enum reg_type type,
+                                        bool bi)
 {
     int caps = 0;
 
@@ -320,7 +430,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
     case CXL2_LOGICAL_DEVICE:
         /* + HDM */
         init_cap_reg(HDM, 5, 1);
-        hdm_init_common(reg_state, write_msk, type);
+        hdm_init_common(reg_state, write_msk, type, bi);
         /* fallthrough */
     case CXL2_DOWNSTREAM_PORT:
     case CXL2_DEVICE:
@@ -335,6 +445,24 @@ void cxl_component_register_init_common(uint32_t *reg_state,
         abort();
     }
 
+    /* back invalidate */
+    if (bi) {
+        switch (type) {
+        case CXL2_UPSTREAM_PORT:
+            init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
+            bi_rt_init_common(reg_state, write_msk);
+            break;
+        case CXL2_ROOT_PORT:
+        case CXL2_DOWNSTREAM_PORT:
+        case CXL2_TYPE3_DEVICE:
+            init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
+            bi_decoder_init_common(reg_state, write_msk, type);
+            break;
+        default:
+            break;
+        }
+    }
+
     ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
 #undef init_cap_reg
 }
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 3c7ecd8c48bc..3f09c589ae58 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -748,6 +748,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
         return false;
     }
 
+    if (!ct3d->flitmode && ct3d->hdmdb) {
+        error_setg(errp, "hdm-db requires operating in 256b flit");
+        return false;
+    }
+
     if (ct3d->hostvmem) {
         MemoryRegion *vmr;
         char *v_name;
@@ -1317,7 +1322,8 @@ static void ct3d_reset(DeviceState *dev)
 
     pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed,
                               ct3d->flitmode);
-    cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
+    cxl_component_register_init_common(reg_state, write_msk,
+                                       CXL2_TYPE3_DEVICE, ct3d->hdmdb);
     cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
 
     /*
@@ -1356,6 +1362,7 @@ static const Property ct3_props[] = {
     DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
                                 width, PCIE_LINK_WIDTH_16),
     DEFINE_PROP_BOOL("x-256b-flit", CXLType3Dev, flitmode, false),
+    DEFINE_PROP_BOOL("hdm-db", CXLType3Dev, hdmdb, false),
 };
 
 static uint64_t get_lsa_size(CXLType3Dev *ct3d)
diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index 64086d8ec2f2..320818a8f1ce 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -39,7 +39,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
     uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
 
     cxl_component_register_init_common(reg_state, write_msk,
-                                       CXL2_DOWNSTREAM_PORT);
+                                       CXL2_DOWNSTREAM_PORT, true);
 }
 
 /* TODO: Look at sharing this code across all CXL port types */
diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
index 5641048084a4..e2093ac39ee6 100644
--- a/hw/pci-bridge/cxl_root_port.c
+++ b/hw/pci-bridge/cxl_root_port.c
@@ -101,7 +101,8 @@ static void latch_registers(CXLRootPort *crp)
     uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
     uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
 
-    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
+    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT,
+                                       true);
 }
 
 static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index c352d11dc7b7..fb8d19539c9f 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -90,7 +90,7 @@ static void latch_registers(CXLUpstreamPort *usp)
     uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
 
     cxl_component_register_init_common(reg_state, write_msk,
-                                       CXL2_UPSTREAM_PORT);
+                                       CXL2_UPSTREAM_PORT, usp->flitmode);
     ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
 }
 
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index b6e2eb796951..11623a5666f6 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -300,7 +300,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
     uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
     int dsp_count = 0;
 
-    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC);
+    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC, false);
     /*
      * The CXL specification allows for host bridges with no HDM decoders
      * if they only have a single root port.
-- 
2.51.0
Re: [PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB
Posted by Michael S. Tsirkin 2 days, 16 hours ago
On Wed, Feb 04, 2026 at 12:12:14PM +0000, Jonathan Cameron wrote:
> From: Davidlohr Bueso <dave@stgolabs.net>
> 
> Add basic plumbing for memory expander devices that support Back
> Invalidation. This introduces a 'hdm-db=on|off' parameter and
> exposes the relevant BI RT/Decoder component cachemem registers.
> 
> Some noteworthy properties:
>  - Devices require enabling Flit mode across the CXL topology.
>  - Explicit BI-ID commit is required.
>  - HDM decoder support both host and dev coherency models.
> 
> Tested-by: Dongjoo Seo <dongjoo.seo1@samsung.com>
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

more troubles:
https://gitlab.com/mstredhat/qemu/-/jobs/12984846000


> ---
> v6: Initialize type variable to avoid a false compiler warning
>     from some versions of gcc. (Michael Tsirkin)
> ---
>  docs/system/devices/cxl.rst         |  23 +++++
>  include/hw/cxl/cxl_component.h      |  54 ++++++++++-
>  include/hw/cxl/cxl_device.h         |   3 +
>  hw/cxl/cxl-component-utils.c        | 142 ++++++++++++++++++++++++++--
>  hw/mem/cxl_type3.c                  |   9 +-
>  hw/pci-bridge/cxl_downstream.c      |   2 +-
>  hw/pci-bridge/cxl_root_port.c       |   3 +-
>  hw/pci-bridge/cxl_upstream.c        |   2 +-
>  hw/pci-bridge/pci_expander_bridge.c |   2 +-
>  9 files changed, 225 insertions(+), 15 deletions(-)
> 
> diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> index ca15a0da1c1d..9d0771cdfd73 100644
> --- a/docs/system/devices/cxl.rst
> +++ b/docs/system/devices/cxl.rst
> @@ -384,6 +384,29 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
>    -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
>    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
>  
> +An example of 4 type3 devices with volatile memory below a switch. Two of the devices
> +use HDM-DB for coherence, which requires operating in Flit mode::
> +
> +  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
> +  ...
> +  -object memory-backend-ram,id=cxl-mem0,share=on,size=256M \
> +  -object memory-backend-ram,id=cxl-mem1,share=on,size=256M \
> +  -object memory-backend-ram,id=cxl-mem2,share=on,size=256M \
> +  -object memory-backend-ram,id=cxl-mem3,share=on,size=256M \
> +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> +  -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
> +  -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
> +  -device cxl-upstream,bus=root_port0,id=us0,x-256b-flit=on \
> +  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> +  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-mem0,sn=0x1,x-256b-flit=on,hdm-db=on \
> +  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> +  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-mem1,sn=0x2,x-256b-flit=on,hdm-db=on \
> +  -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> +  -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-mem2,sn=0x3 \
> +  -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> +  -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-mem3,sn=0x4 \
> +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> +
>  A simple arm/virt example featuring a single direct connected CXL Type 3
>  Volatile Memory device::
>  
> diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> index 1f167d3ef79b..ffc82202206c 100644
> --- a/include/hw/cxl/cxl_component.h
> +++ b/include/hw/cxl/cxl_component.h
> @@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
>  CXLx_CAPABILITY_HEADER(HDM, 3)
>  CXLx_CAPABILITY_HEADER(EXTSEC, 4)
>  CXLx_CAPABILITY_HEADER(SNOOP, 5)
> +CXLx_CAPABILITY_HEADER(BI_RT, 6)
> +CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
>  
>  /*
>   * Capability structures contain the actual registers that the CXL component
> @@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
>      (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
>  #define CXL_SNOOP_REGISTERS_SIZE   0x8
>  
> -QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
> -                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
> +#define CXL_BI_RT_CAP_VERSION 1
> +#define CXL_BI_RT_REGISTERS_OFFSET \
> +    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
> +#define CXL_BI_RT_REGISTERS_SIZE   0xC
> +
> +REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
> +    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
> +REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
> +    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
> +REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
> +    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
> +    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
> +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
> +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
> +
> +/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
> +#define CXL_BI_DECODER_CAP_VERSION 1
> +#define CXL_BI_DECODER_REGISTERS_OFFSET \
> +    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
> +#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
> +
> +REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
> +    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
> +    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
> +REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
> +    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
> +    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
> +    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
> +REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
> +    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
> +    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
> +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
> +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
> +
> +QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
> +                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
>                     "No space for registers");
>  
> +/* track BI explicit commit handling for route table and decoder */
> +enum {
> +    CXL_BISTATE_RT = 0,
> +    CXL_BISTATE_DECODER,
> +    CXL_BISTATE_MAX
> +};
> +
> +typedef struct bi_state {
> +    uint64_t last_commit;  /* last 0->1 transition */
> +} BIState;
> +
>  typedef struct component_registers {
>      /*
>       * Main memory region to be registered with QEMU core.
> @@ -259,6 +306,7 @@ typedef struct cxl_component {
>      };
>  
>      CDATObject cdat;
> +    BIState bi_state[CXL_BISTATE_MAX];
>  } CXLComponentState;
>  
>  void cxl_component_register_block_init(Object *obj,
> @@ -266,7 +314,7 @@ void cxl_component_register_block_init(Object *obj,
>                                         const char *type);
>  void cxl_component_register_init_common(uint32_t *reg_state,
>                                          uint32_t *write_msk,
> -                                        enum reg_type type);
> +                                        enum reg_type type, bool bi);
>  
>  void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
>                                  enum reg_type cxl_dev_type, uint16_t length,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 7d9236db8c85..393f3122173b 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -770,6 +770,9 @@ struct CXLType3Dev {
>      CXLMemSparingReadAttrs rank_sparing_attrs;
>      CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
>  
> +    /* BI flows */
> +    bool hdmdb;
> +
>      struct dynamic_capacity {
>          HostMemoryBackend *host_dc;
>          AddressSpace host_dc_as;
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index 91770f103a85..a7d36e1128c2 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -71,10 +71,40 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
>      case 4:
>          if (cregs->special_ops && cregs->special_ops->read) {
>              return cregs->special_ops->read(cxl_cstate, offset, 4);
> -        } else {
> -            QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> -            return cregs->cache_mem_registers[offset / 4];
>          }
> +
> +        QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> +
> +        if (offset == A_CXL_BI_RT_STATUS ||
> +            offset == A_CXL_BI_DECODER_STATUS) {
> +            int type;
> +            uint64_t started;
> +
> +            type = (offset == A_CXL_BI_RT_STATUS) ?
> +                    CXL_BISTATE_RT : CXL_BISTATE_DECODER;
> +            started = cxl_cstate->bi_state[type].last_commit;
> +
> +            if (started) {
> +                uint32_t *cache_mem = cregs->cache_mem_registers;
> +                uint32_t val = cache_mem[offset / 4];
> +                uint64_t now;
> +                int set;
> +
> +                now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> +                /* arbitrary 100 ms to do the commit */
> +                set = !!(now >= started + 100);
> +
> +                if (offset == A_CXL_BI_RT_STATUS) {
> +                    val = FIELD_DP32(val, CXL_BI_RT_STATUS, COMMITTED, set);
> +                } else {
> +                    val = FIELD_DP32(val, CXL_BI_DECODER_STATUS, COMMITTED,
> +                                     set);
> +                }
> +                stl_le_p((uint8_t *)cache_mem + offset, val);
> +            }
> +        }
> +
> +        return cregs->cache_mem_registers[offset / 4];
>      case 8:
>          qemu_log_mask(LOG_UNIMP,
>                        "CXL 8 byte cache mem registers not implemented\n");
> @@ -118,6 +148,47 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
>      stl_le_p((uint8_t *)cache_mem + offset, value);
>  }
>  
> +static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> +                            uint32_t value)
> +{
> +    ComponentRegisters *cregs = &cxl_cstate->crb;
> +    uint32_t sts, *cache_mem = cregs->cache_mem_registers;
> +    bool to_commit = false;
> +    int type = 0; /* Unused value - work around for compiler warning */
> +
> +    switch (offset) {
> +    case A_CXL_BI_RT_CTRL:
> +        to_commit = FIELD_EX32(value, CXL_BI_RT_CTRL, COMMIT);
> +        if (to_commit) {
> +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> +                                         R_CXL_BI_RT_STATUS, 4);
> +            sts = FIELD_DP32(sts, CXL_BI_RT_STATUS, COMMITTED, 0);
> +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_RT_STATUS, sts);
> +            type = CXL_BISTATE_RT;
> +        }
> +        break;
> +    case A_CXL_BI_DECODER_CTRL:
> +        to_commit = FIELD_EX32(value, CXL_BI_DECODER_CTRL, COMMIT);
> +        if (to_commit) {
> +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> +                                         R_CXL_BI_DECODER_STATUS, 4);
> +            sts = FIELD_DP32(sts, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_DECODER_STATUS, sts);
> +            type = CXL_BISTATE_DECODER;
> +        }
> +        break;
> +    default:
> +        break;
> +    }
> +
> +    if (to_commit) {
> +        cxl_cstate->bi_state[type].last_commit =
> +                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> +    }
> +
> +    stl_le_p((uint8_t *)cache_mem + offset, value);
> +}
> +
>  static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
>                                      unsigned size)
>  {
> @@ -141,6 +212,9 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
>          if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
>              offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) {
>              dumb_hdm_handler(cxl_cstate, offset, value);
> +        } else if (offset == A_CXL_BI_RT_CTRL ||
> +                   offset == A_CXL_BI_DECODER_CTRL) {
> +            bi_handler(cxl_cstate, offset, value);
>          } else {
>              cregs->cache_mem_registers[offset / 4] = value;
>          }
> @@ -230,7 +304,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
>  }
>  
>  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> -                            enum reg_type type)
> +                            enum reg_type type, bool bi)
>  {
>      int decoder_count = CXL_HDM_DECODER_COUNT;
>      int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
> @@ -255,7 +329,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
>                       UIO_DECODER_COUNT, 0);
>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0);
>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
> -                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
> +                     SUPPORTED_COHERENCY_MODEL,
> +                     /* host+dev or Unknown */
> +                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
>                       HDM_DECODER_ENABLE, 0);
>      write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
> @@ -278,9 +354,43 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
>      }
>  }
>  
> +static void bi_rt_init_common(uint32_t *reg_state, uint32_t *write_msk)
> +{
> +    /* switch usp must commit the new BI-ID, timeout of 2secs */
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 1);
> +
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CTRL, COMMIT, 0);
> +    write_msk[R_CXL_BI_RT_CTRL] = 0x1;
> +
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMITTED, 0);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 0);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 0x6);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 0x2);
> +}
> +
> +static void bi_decoder_init_common(uint32_t *reg_state, uint32_t *write_msk,
> +                                   enum reg_type type)
> +{
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, HDM_D, 0);
> +    /* switch dsp must commit the new BI-ID, timeout of 2secs */
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT,
> +                     (type != CXL2_ROOT_PORT && type != CXL2_TYPE3_DEVICE));
> +
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_FW, 0);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_ENABLE, 0);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, COMMIT, 0);
> +    write_msk[R_CXL_BI_DECODER_CTRL] = 0x7;
> +
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 0);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 0x6);
> +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 0x2);
> +}
> +
>  void cxl_component_register_init_common(uint32_t *reg_state,
>                                          uint32_t *write_msk,
> -                                        enum reg_type type)
> +                                        enum reg_type type,
> +                                        bool bi)
>  {
>      int caps = 0;
>  
> @@ -320,7 +430,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
>      case CXL2_LOGICAL_DEVICE:
>          /* + HDM */
>          init_cap_reg(HDM, 5, 1);
> -        hdm_init_common(reg_state, write_msk, type);
> +        hdm_init_common(reg_state, write_msk, type, bi);
>          /* fallthrough */
>      case CXL2_DOWNSTREAM_PORT:
>      case CXL2_DEVICE:
> @@ -335,6 +445,24 @@ void cxl_component_register_init_common(uint32_t *reg_state,
>          abort();
>      }
>  
> +    /* back invalidate */
> +    if (bi) {
> +        switch (type) {
> +        case CXL2_UPSTREAM_PORT:
> +            init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
> +            bi_rt_init_common(reg_state, write_msk);
> +            break;
> +        case CXL2_ROOT_PORT:
> +        case CXL2_DOWNSTREAM_PORT:
> +        case CXL2_TYPE3_DEVICE:
> +            init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
> +            bi_decoder_init_common(reg_state, write_msk, type);
> +            break;
> +        default:
> +            break;
> +        }
> +    }
> +
>      ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
>  #undef init_cap_reg
>  }
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 3c7ecd8c48bc..3f09c589ae58 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -748,6 +748,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
>          return false;
>      }
>  
> +    if (!ct3d->flitmode && ct3d->hdmdb) {
> +        error_setg(errp, "hdm-db requires operating in 256b flit");
> +        return false;
> +    }
> +
>      if (ct3d->hostvmem) {
>          MemoryRegion *vmr;
>          char *v_name;
> @@ -1317,7 +1322,8 @@ static void ct3d_reset(DeviceState *dev)
>  
>      pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed,
>                                ct3d->flitmode);
> -    cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
> +    cxl_component_register_init_common(reg_state, write_msk,
> +                                       CXL2_TYPE3_DEVICE, ct3d->hdmdb);
>      cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
>  
>      /*
> @@ -1356,6 +1362,7 @@ static const Property ct3_props[] = {
>      DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
>                                  width, PCIE_LINK_WIDTH_16),
>      DEFINE_PROP_BOOL("x-256b-flit", CXLType3Dev, flitmode, false),
> +    DEFINE_PROP_BOOL("hdm-db", CXLType3Dev, hdmdb, false),
>  };
>  
>  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
> diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
> index 64086d8ec2f2..320818a8f1ce 100644
> --- a/hw/pci-bridge/cxl_downstream.c
> +++ b/hw/pci-bridge/cxl_downstream.c
> @@ -39,7 +39,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
>      uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
>      cxl_component_register_init_common(reg_state, write_msk,
> -                                       CXL2_DOWNSTREAM_PORT);
> +                                       CXL2_DOWNSTREAM_PORT, true);
>  }
>  
>  /* TODO: Look at sharing this code across all CXL port types */
> diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
> index 5641048084a4..e2093ac39ee6 100644
> --- a/hw/pci-bridge/cxl_root_port.c
> +++ b/hw/pci-bridge/cxl_root_port.c
> @@ -101,7 +101,8 @@ static void latch_registers(CXLRootPort *crp)
>      uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
>      uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
> -    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT,
> +                                       true);
>  }
>  
>  static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
> diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> index c352d11dc7b7..fb8d19539c9f 100644
> --- a/hw/pci-bridge/cxl_upstream.c
> +++ b/hw/pci-bridge/cxl_upstream.c
> @@ -90,7 +90,7 @@ static void latch_registers(CXLUpstreamPort *usp)
>      uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
>      cxl_component_register_init_common(reg_state, write_msk,
> -                                       CXL2_UPSTREAM_PORT);
> +                                       CXL2_UPSTREAM_PORT, usp->flitmode);
>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
>  }
>  
> diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> index b6e2eb796951..11623a5666f6 100644
> --- a/hw/pci-bridge/pci_expander_bridge.c
> +++ b/hw/pci-bridge/pci_expander_bridge.c
> @@ -300,7 +300,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
>      uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
>      int dsp_count = 0;
>  
> -    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC);
> +    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC, false);
>      /*
>       * The CXL specification allows for host bridges with no HDM decoders
>       * if they only have a single root port.
> -- 
> 2.51.0
Re: [PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB
Posted by Michael S. Tsirkin 2 days, 16 hours ago
On Wed, Feb 04, 2026 at 08:24:55AM -0500, Michael S. Tsirkin wrote:
> On Wed, Feb 04, 2026 at 12:12:14PM +0000, Jonathan Cameron wrote:
> > From: Davidlohr Bueso <dave@stgolabs.net>
> > 
> > Add basic plumbing for memory expander devices that support Back
> > Invalidation. This introduces a 'hdm-db=on|off' parameter and
> > exposes the relevant BI RT/Decoder component cachemem registers.
> > 
> > Some noteworthy properties:
> >  - Devices require enabling Flit mode across the CXL topology.
> >  - Explicit BI-ID commit is required.
> >  - HDM decoder support both host and dev coherency models.
> > 
> > Tested-by: Dongjoo Seo <dongjoo.seo1@samsung.com>
> > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> more troubles:
> https://gitlab.com/mstredhat/qemu/-/jobs/12984846000


clang just as unhappy:

https://gitlab.com/mstredhat/qemu/-/jobs/12984846060

> 
> > ---
> > v6: Initialize type variable to avoid a false compiler warning
> >     from some versions of gcc. (Michael Tsirkin)
> > ---
> >  docs/system/devices/cxl.rst         |  23 +++++
> >  include/hw/cxl/cxl_component.h      |  54 ++++++++++-
> >  include/hw/cxl/cxl_device.h         |   3 +
> >  hw/cxl/cxl-component-utils.c        | 142 ++++++++++++++++++++++++++--
> >  hw/mem/cxl_type3.c                  |   9 +-
> >  hw/pci-bridge/cxl_downstream.c      |   2 +-
> >  hw/pci-bridge/cxl_root_port.c       |   3 +-
> >  hw/pci-bridge/cxl_upstream.c        |   2 +-
> >  hw/pci-bridge/pci_expander_bridge.c |   2 +-
> >  9 files changed, 225 insertions(+), 15 deletions(-)
> > 
> > diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> > index ca15a0da1c1d..9d0771cdfd73 100644
> > --- a/docs/system/devices/cxl.rst
> > +++ b/docs/system/devices/cxl.rst
> > @@ -384,6 +384,29 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
> >    -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
> >    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> >  
> > +An example of 4 type3 devices with volatile memory below a switch. Two of the devices
> > +use HDM-DB for coherence, which requires operating in Flit mode::
> > +
> > +  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
> > +  ...
> > +  -object memory-backend-ram,id=cxl-mem0,share=on,size=256M \
> > +  -object memory-backend-ram,id=cxl-mem1,share=on,size=256M \
> > +  -object memory-backend-ram,id=cxl-mem2,share=on,size=256M \
> > +  -object memory-backend-ram,id=cxl-mem3,share=on,size=256M \
> > +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> > +  -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
> > +  -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
> > +  -device cxl-upstream,bus=root_port0,id=us0,x-256b-flit=on \
> > +  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> > +  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-mem0,sn=0x1,x-256b-flit=on,hdm-db=on \
> > +  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> > +  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-mem1,sn=0x2,x-256b-flit=on,hdm-db=on \
> > +  -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> > +  -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-mem2,sn=0x3 \
> > +  -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> > +  -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-mem3,sn=0x4 \
> > +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > +
> >  A simple arm/virt example featuring a single direct connected CXL Type 3
> >  Volatile Memory device::
> >  
> > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> > index 1f167d3ef79b..ffc82202206c 100644
> > --- a/include/hw/cxl/cxl_component.h
> > +++ b/include/hw/cxl/cxl_component.h
> > @@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
> >  CXLx_CAPABILITY_HEADER(HDM, 3)
> >  CXLx_CAPABILITY_HEADER(EXTSEC, 4)
> >  CXLx_CAPABILITY_HEADER(SNOOP, 5)
> > +CXLx_CAPABILITY_HEADER(BI_RT, 6)
> > +CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
> >  
> >  /*
> >   * Capability structures contain the actual registers that the CXL component
> > @@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
> >      (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
> >  #define CXL_SNOOP_REGISTERS_SIZE   0x8
> >  
> > -QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
> > -                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
> > +#define CXL_BI_RT_CAP_VERSION 1
> > +#define CXL_BI_RT_REGISTERS_OFFSET \
> > +    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
> > +#define CXL_BI_RT_REGISTERS_SIZE   0xC
> > +
> > +REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
> > +    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
> > +REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
> > +    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
> > +REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
> > +    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
> > +    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
> > +
> > +/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
> > +#define CXL_BI_DECODER_CAP_VERSION 1
> > +#define CXL_BI_DECODER_REGISTERS_OFFSET \
> > +    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
> > +#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
> > +
> > +REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
> > +    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
> > +    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
> > +REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
> > +    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
> > +    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
> > +    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
> > +REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
> > +    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
> > +    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
> > +
> > +QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
> > +                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
> >                     "No space for registers");
> >  
> > +/* track BI explicit commit handling for route table and decoder */
> > +enum {
> > +    CXL_BISTATE_RT = 0,
> > +    CXL_BISTATE_DECODER,
> > +    CXL_BISTATE_MAX
> > +};
> > +
> > +typedef struct bi_state {
> > +    uint64_t last_commit;  /* last 0->1 transition */
> > +} BIState;
> > +
> >  typedef struct component_registers {
> >      /*
> >       * Main memory region to be registered with QEMU core.
> > @@ -259,6 +306,7 @@ typedef struct cxl_component {
> >      };
> >  
> >      CDATObject cdat;
> > +    BIState bi_state[CXL_BISTATE_MAX];
> >  } CXLComponentState;
> >  
> >  void cxl_component_register_block_init(Object *obj,
> > @@ -266,7 +314,7 @@ void cxl_component_register_block_init(Object *obj,
> >                                         const char *type);
> >  void cxl_component_register_init_common(uint32_t *reg_state,
> >                                          uint32_t *write_msk,
> > -                                        enum reg_type type);
> > +                                        enum reg_type type, bool bi);
> >  
> >  void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
> >                                  enum reg_type cxl_dev_type, uint16_t length,
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > index 7d9236db8c85..393f3122173b 100644
> > --- a/include/hw/cxl/cxl_device.h
> > +++ b/include/hw/cxl/cxl_device.h
> > @@ -770,6 +770,9 @@ struct CXLType3Dev {
> >      CXLMemSparingReadAttrs rank_sparing_attrs;
> >      CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
> >  
> > +    /* BI flows */
> > +    bool hdmdb;
> > +
> >      struct dynamic_capacity {
> >          HostMemoryBackend *host_dc;
> >          AddressSpace host_dc_as;
> > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> > index 91770f103a85..a7d36e1128c2 100644
> > --- a/hw/cxl/cxl-component-utils.c
> > +++ b/hw/cxl/cxl-component-utils.c
> > @@ -71,10 +71,40 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
> >      case 4:
> >          if (cregs->special_ops && cregs->special_ops->read) {
> >              return cregs->special_ops->read(cxl_cstate, offset, 4);
> > -        } else {
> > -            QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > -            return cregs->cache_mem_registers[offset / 4];
> >          }
> > +
> > +        QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > +
> > +        if (offset == A_CXL_BI_RT_STATUS ||
> > +            offset == A_CXL_BI_DECODER_STATUS) {
> > +            int type;
> > +            uint64_t started;
> > +
> > +            type = (offset == A_CXL_BI_RT_STATUS) ?
> > +                    CXL_BISTATE_RT : CXL_BISTATE_DECODER;
> > +            started = cxl_cstate->bi_state[type].last_commit;
> > +
> > +            if (started) {
> > +                uint32_t *cache_mem = cregs->cache_mem_registers;
> > +                uint32_t val = cache_mem[offset / 4];
> > +                uint64_t now;
> > +                int set;
> > +
> > +                now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > +                /* arbitrary 100 ms to do the commit */
> > +                set = !!(now >= started + 100);
> > +
> > +                if (offset == A_CXL_BI_RT_STATUS) {
> > +                    val = FIELD_DP32(val, CXL_BI_RT_STATUS, COMMITTED, set);
> > +                } else {
> > +                    val = FIELD_DP32(val, CXL_BI_DECODER_STATUS, COMMITTED,
> > +                                     set);
> > +                }
> > +                stl_le_p((uint8_t *)cache_mem + offset, val);
> > +            }
> > +        }
> > +
> > +        return cregs->cache_mem_registers[offset / 4];
> >      case 8:
> >          qemu_log_mask(LOG_UNIMP,
> >                        "CXL 8 byte cache mem registers not implemented\n");
> > @@ -118,6 +148,47 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> >      stl_le_p((uint8_t *)cache_mem + offset, value);
> >  }
> >  
> > +static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > +                            uint32_t value)
> > +{
> > +    ComponentRegisters *cregs = &cxl_cstate->crb;
> > +    uint32_t sts, *cache_mem = cregs->cache_mem_registers;
> > +    bool to_commit = false;
> > +    int type = 0; /* Unused value - work around for compiler warning */
> > +
> > +    switch (offset) {
> > +    case A_CXL_BI_RT_CTRL:
> > +        to_commit = FIELD_EX32(value, CXL_BI_RT_CTRL, COMMIT);
> > +        if (to_commit) {
> > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > +                                         R_CXL_BI_RT_STATUS, 4);
> > +            sts = FIELD_DP32(sts, CXL_BI_RT_STATUS, COMMITTED, 0);
> > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_RT_STATUS, sts);
> > +            type = CXL_BISTATE_RT;
> > +        }
> > +        break;
> > +    case A_CXL_BI_DECODER_CTRL:
> > +        to_commit = FIELD_EX32(value, CXL_BI_DECODER_CTRL, COMMIT);
> > +        if (to_commit) {
> > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > +                                         R_CXL_BI_DECODER_STATUS, 4);
> > +            sts = FIELD_DP32(sts, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_DECODER_STATUS, sts);
> > +            type = CXL_BISTATE_DECODER;
> > +        }
> > +        break;
> > +    default:
> > +        break;
> > +    }
> > +
> > +    if (to_commit) {
> > +        cxl_cstate->bi_state[type].last_commit =
> > +                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > +    }
> > +
> > +    stl_le_p((uint8_t *)cache_mem + offset, value);
> > +}
> > +
> >  static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> >                                      unsigned size)
> >  {
> > @@ -141,6 +212,9 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> >          if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
> >              offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) {
> >              dumb_hdm_handler(cxl_cstate, offset, value);
> > +        } else if (offset == A_CXL_BI_RT_CTRL ||
> > +                   offset == A_CXL_BI_DECODER_CTRL) {
> > +            bi_handler(cxl_cstate, offset, value);
> >          } else {
> >              cregs->cache_mem_registers[offset / 4] = value;
> >          }
> > @@ -230,7 +304,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
> >  }
> >  
> >  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > -                            enum reg_type type)
> > +                            enum reg_type type, bool bi)
> >  {
> >      int decoder_count = CXL_HDM_DECODER_COUNT;
> >      int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
> > @@ -255,7 +329,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> >                       UIO_DECODER_COUNT, 0);
> >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0);
> >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
> > -                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
> > +                     SUPPORTED_COHERENCY_MODEL,
> > +                     /* host+dev or Unknown */
> > +                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
> >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
> >                       HDM_DECODER_ENABLE, 0);
> >      write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
> > @@ -278,9 +354,43 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> >      }
> >  }
> >  
> > +static void bi_rt_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > +{
> > +    /* switch usp must commit the new BI-ID, timeout of 2secs */
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 1);
> > +
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CTRL, COMMIT, 0);
> > +    write_msk[R_CXL_BI_RT_CTRL] = 0x1;
> > +
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMITTED, 0);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 0);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 0x6);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 0x2);
> > +}
> > +
> > +static void bi_decoder_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > +                                   enum reg_type type)
> > +{
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, HDM_D, 0);
> > +    /* switch dsp must commit the new BI-ID, timeout of 2secs */
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT,
> > +                     (type != CXL2_ROOT_PORT && type != CXL2_TYPE3_DEVICE));
> > +
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_FW, 0);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_ENABLE, 0);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, COMMIT, 0);
> > +    write_msk[R_CXL_BI_DECODER_CTRL] = 0x7;
> > +
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 0);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 0x6);
> > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 0x2);
> > +}
> > +
> >  void cxl_component_register_init_common(uint32_t *reg_state,
> >                                          uint32_t *write_msk,
> > -                                        enum reg_type type)
> > +                                        enum reg_type type,
> > +                                        bool bi)
> >  {
> >      int caps = 0;
> >  
> > @@ -320,7 +430,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> >      case CXL2_LOGICAL_DEVICE:
> >          /* + HDM */
> >          init_cap_reg(HDM, 5, 1);
> > -        hdm_init_common(reg_state, write_msk, type);
> > +        hdm_init_common(reg_state, write_msk, type, bi);
> >          /* fallthrough */
> >      case CXL2_DOWNSTREAM_PORT:
> >      case CXL2_DEVICE:
> > @@ -335,6 +445,24 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> >          abort();
> >      }
> >  
> > +    /* back invalidate */
> > +    if (bi) {
> > +        switch (type) {
> > +        case CXL2_UPSTREAM_PORT:
> > +            init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
> > +            bi_rt_init_common(reg_state, write_msk);
> > +            break;
> > +        case CXL2_ROOT_PORT:
> > +        case CXL2_DOWNSTREAM_PORT:
> > +        case CXL2_TYPE3_DEVICE:
> > +            init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
> > +            bi_decoder_init_common(reg_state, write_msk, type);
> > +            break;
> > +        default:
> > +            break;
> > +        }
> > +    }
> > +
> >      ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
> >  #undef init_cap_reg
> >  }
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 3c7ecd8c48bc..3f09c589ae58 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -748,6 +748,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> >          return false;
> >      }
> >  
> > +    if (!ct3d->flitmode && ct3d->hdmdb) {
> > +        error_setg(errp, "hdm-db requires operating in 256b flit");
> > +        return false;
> > +    }
> > +
> >      if (ct3d->hostvmem) {
> >          MemoryRegion *vmr;
> >          char *v_name;
> > @@ -1317,7 +1322,8 @@ static void ct3d_reset(DeviceState *dev)
> >  
> >      pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed,
> >                                ct3d->flitmode);
> > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
> > +    cxl_component_register_init_common(reg_state, write_msk,
> > +                                       CXL2_TYPE3_DEVICE, ct3d->hdmdb);
> >      cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
> >  
> >      /*
> > @@ -1356,6 +1362,7 @@ static const Property ct3_props[] = {
> >      DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
> >                                  width, PCIE_LINK_WIDTH_16),
> >      DEFINE_PROP_BOOL("x-256b-flit", CXLType3Dev, flitmode, false),
> > +    DEFINE_PROP_BOOL("hdm-db", CXLType3Dev, hdmdb, false),
> >  };
> >  
> >  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
> > diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
> > index 64086d8ec2f2..320818a8f1ce 100644
> > --- a/hw/pci-bridge/cxl_downstream.c
> > +++ b/hw/pci-bridge/cxl_downstream.c
> > @@ -39,7 +39,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
> >      uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
> >  
> >      cxl_component_register_init_common(reg_state, write_msk,
> > -                                       CXL2_DOWNSTREAM_PORT);
> > +                                       CXL2_DOWNSTREAM_PORT, true);
> >  }
> >  
> >  /* TODO: Look at sharing this code across all CXL port types */
> > diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
> > index 5641048084a4..e2093ac39ee6 100644
> > --- a/hw/pci-bridge/cxl_root_port.c
> > +++ b/hw/pci-bridge/cxl_root_port.c
> > @@ -101,7 +101,8 @@ static void latch_registers(CXLRootPort *crp)
> >      uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
> >      uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
> >  
> > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT,
> > +                                       true);
> >  }
> >  
> >  static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
> > diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> > index c352d11dc7b7..fb8d19539c9f 100644
> > --- a/hw/pci-bridge/cxl_upstream.c
> > +++ b/hw/pci-bridge/cxl_upstream.c
> > @@ -90,7 +90,7 @@ static void latch_registers(CXLUpstreamPort *usp)
> >      uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
> >  
> >      cxl_component_register_init_common(reg_state, write_msk,
> > -                                       CXL2_UPSTREAM_PORT);
> > +                                       CXL2_UPSTREAM_PORT, usp->flitmode);
> >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
> >  }
> >  
> > diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> > index b6e2eb796951..11623a5666f6 100644
> > --- a/hw/pci-bridge/pci_expander_bridge.c
> > +++ b/hw/pci-bridge/pci_expander_bridge.c
> > @@ -300,7 +300,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
> >      uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
> >      int dsp_count = 0;
> >  
> > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC);
> > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC, false);
> >      /*
> >       * The CXL specification allows for host bridges with no HDM decoders
> >       * if they only have a single root port.
> > -- 
> > 2.51.0
Re: [PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB
Posted by Jonathan Cameron via qemu development 2 days, 14 hours ago
On Wed, 4 Feb 2026 08:44:42 -0500
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Wed, Feb 04, 2026 at 08:24:55AM -0500, Michael S. Tsirkin wrote:
> > On Wed, Feb 04, 2026 at 12:12:14PM +0000, Jonathan Cameron wrote:  
> > > From: Davidlohr Bueso <dave@stgolabs.net>
> > > 
> > > Add basic plumbing for memory expander devices that support Back
> > > Invalidation. This introduces a 'hdm-db=on|off' parameter and
> > > exposes the relevant BI RT/Decoder component cachemem registers.
> > > 
> > > Some noteworthy properties:
> > >  - Devices require enabling Flit mode across the CXL topology.
> > >  - Explicit BI-ID commit is required.
> > >  - HDM decoder support both host and dev coherency models.
> > > 
> > > Tested-by: Dongjoo Seo <dongjoo.seo1@samsung.com>
> > > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  
> > 
> > more troubles:
> > https://gitlab.com/mstredhat/qemu/-/jobs/12984846000  
> 
> 
> clang just as unhappy:
> 
> https://gitlab.com/mstredhat/qemu/-/jobs/12984846060
They both seem to be clang.

Replicated.

Ah.  This old gotcha - at one point I got one of my compiler colleagues to report
it as a clang bug but he lost the argument vs the spec.  Though I believe
it is being fixed in the C spec longer term.  I can't find the thread
now but we did discuss adding local scope in the macro a while back.

Anyhow work around is to add scope to avoid label for the case statement
being followed by _Static_assert()
Despite smelling like a function, it's not and has special rules...

Would you like a v7 or squash the following in?
Thanks

Jonathan


diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index a3077184e230..07aabe331c44 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -68,7 +68,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
     ComponentRegisters *cregs = &cxl_cstate->crb;

     switch (size) {
-    case 4:
+    case 4: {
         QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);

         if (offset == A_CXL_BI_RT_STATUS ||
@@ -101,6 +101,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
         }

         return cregs->cache_mem_registers[offset / 4];
+    }
     case 8:
         qemu_log_mask(LOG_UNIMP,
                       "CXL 8 byte cache mem registers not implemented\n");


> 
> >   
> > > ---
> > > v6: Initialize type variable to avoid a false compiler warning
> > >     from some versions of gcc. (Michael Tsirkin)
> > > ---
> > >  docs/system/devices/cxl.rst         |  23 +++++
> > >  include/hw/cxl/cxl_component.h      |  54 ++++++++++-
> > >  include/hw/cxl/cxl_device.h         |   3 +
> > >  hw/cxl/cxl-component-utils.c        | 142 ++++++++++++++++++++++++++--
> > >  hw/mem/cxl_type3.c                  |   9 +-
> > >  hw/pci-bridge/cxl_downstream.c      |   2 +-
> > >  hw/pci-bridge/cxl_root_port.c       |   3 +-
> > >  hw/pci-bridge/cxl_upstream.c        |   2 +-
> > >  hw/pci-bridge/pci_expander_bridge.c |   2 +-
> > >  9 files changed, 225 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> > > index ca15a0da1c1d..9d0771cdfd73 100644
> > > --- a/docs/system/devices/cxl.rst
> > > +++ b/docs/system/devices/cxl.rst
> > > @@ -384,6 +384,29 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
> > >    -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
> > >    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > >  
> > > +An example of 4 type3 devices with volatile memory below a switch. Two of the devices
> > > +use HDM-DB for coherence, which requires operating in Flit mode::
> > > +
> > > +  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
> > > +  ...
> > > +  -object memory-backend-ram,id=cxl-mem0,share=on,size=256M \
> > > +  -object memory-backend-ram,id=cxl-mem1,share=on,size=256M \
> > > +  -object memory-backend-ram,id=cxl-mem2,share=on,size=256M \
> > > +  -object memory-backend-ram,id=cxl-mem3,share=on,size=256M \
> > > +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> > > +  -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
> > > +  -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
> > > +  -device cxl-upstream,bus=root_port0,id=us0,x-256b-flit=on \
> > > +  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> > > +  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-mem0,sn=0x1,x-256b-flit=on,hdm-db=on \
> > > +  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> > > +  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-mem1,sn=0x2,x-256b-flit=on,hdm-db=on \
> > > +  -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> > > +  -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-mem2,sn=0x3 \
> > > +  -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> > > +  -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-mem3,sn=0x4 \
> > > +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > > +
> > >  A simple arm/virt example featuring a single direct connected CXL Type 3
> > >  Volatile Memory device::
> > >  
> > > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> > > index 1f167d3ef79b..ffc82202206c 100644
> > > --- a/include/hw/cxl/cxl_component.h
> > > +++ b/include/hw/cxl/cxl_component.h
> > > @@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
> > >  CXLx_CAPABILITY_HEADER(HDM, 3)
> > >  CXLx_CAPABILITY_HEADER(EXTSEC, 4)
> > >  CXLx_CAPABILITY_HEADER(SNOOP, 5)
> > > +CXLx_CAPABILITY_HEADER(BI_RT, 6)
> > > +CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
> > >  
> > >  /*
> > >   * Capability structures contain the actual registers that the CXL component
> > > @@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
> > >      (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
> > >  #define CXL_SNOOP_REGISTERS_SIZE   0x8
> > >  
> > > -QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
> > > -                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
> > > +#define CXL_BI_RT_CAP_VERSION 1
> > > +#define CXL_BI_RT_REGISTERS_OFFSET \
> > > +    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
> > > +#define CXL_BI_RT_REGISTERS_SIZE   0xC
> > > +
> > > +REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
> > > +    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
> > > +REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
> > > +    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
> > > +REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
> > > +    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
> > > +    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > +
> > > +/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
> > > +#define CXL_BI_DECODER_CAP_VERSION 1
> > > +#define CXL_BI_DECODER_REGISTERS_OFFSET \
> > > +    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
> > > +#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
> > > +
> > > +REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
> > > +    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
> > > +    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
> > > +REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
> > > +    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
> > > +    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
> > > +    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
> > > +REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
> > > +    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
> > > +    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > +
> > > +QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
> > > +                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
> > >                     "No space for registers");
> > >  
> > > +/* track BI explicit commit handling for route table and decoder */
> > > +enum {
> > > +    CXL_BISTATE_RT = 0,
> > > +    CXL_BISTATE_DECODER,
> > > +    CXL_BISTATE_MAX
> > > +};
> > > +
> > > +typedef struct bi_state {
> > > +    uint64_t last_commit;  /* last 0->1 transition */
> > > +} BIState;
> > > +
> > >  typedef struct component_registers {
> > >      /*
> > >       * Main memory region to be registered with QEMU core.
> > > @@ -259,6 +306,7 @@ typedef struct cxl_component {
> > >      };
> > >  
> > >      CDATObject cdat;
> > > +    BIState bi_state[CXL_BISTATE_MAX];
> > >  } CXLComponentState;
> > >  
> > >  void cxl_component_register_block_init(Object *obj,
> > > @@ -266,7 +314,7 @@ void cxl_component_register_block_init(Object *obj,
> > >                                         const char *type);
> > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > >                                          uint32_t *write_msk,
> > > -                                        enum reg_type type);
> > > +                                        enum reg_type type, bool bi);
> > >  
> > >  void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
> > >                                  enum reg_type cxl_dev_type, uint16_t length,
> > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > index 7d9236db8c85..393f3122173b 100644
> > > --- a/include/hw/cxl/cxl_device.h
> > > +++ b/include/hw/cxl/cxl_device.h
> > > @@ -770,6 +770,9 @@ struct CXLType3Dev {
> > >      CXLMemSparingReadAttrs rank_sparing_attrs;
> > >      CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
> > >  
> > > +    /* BI flows */
> > > +    bool hdmdb;
> > > +
> > >      struct dynamic_capacity {
> > >          HostMemoryBackend *host_dc;
> > >          AddressSpace host_dc_as;
> > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> > > index 91770f103a85..a7d36e1128c2 100644
> > > --- a/hw/cxl/cxl-component-utils.c
> > > +++ b/hw/cxl/cxl-component-utils.c
> > > @@ -71,10 +71,40 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
> > >      case 4:
> > >          if (cregs->special_ops && cregs->special_ops->read) {
> > >              return cregs->special_ops->read(cxl_cstate, offset, 4);
> > > -        } else {
> > > -            QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > -            return cregs->cache_mem_registers[offset / 4];
> > >          }
> > > +
> > > +        QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > +
> > > +        if (offset == A_CXL_BI_RT_STATUS ||
> > > +            offset == A_CXL_BI_DECODER_STATUS) {
> > > +            int type;
> > > +            uint64_t started;
> > > +
> > > +            type = (offset == A_CXL_BI_RT_STATUS) ?
> > > +                    CXL_BISTATE_RT : CXL_BISTATE_DECODER;
> > > +            started = cxl_cstate->bi_state[type].last_commit;
> > > +
> > > +            if (started) {
> > > +                uint32_t *cache_mem = cregs->cache_mem_registers;
> > > +                uint32_t val = cache_mem[offset / 4];
> > > +                uint64_t now;
> > > +                int set;
> > > +
> > > +                now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > +                /* arbitrary 100 ms to do the commit */
> > > +                set = !!(now >= started + 100);
> > > +
> > > +                if (offset == A_CXL_BI_RT_STATUS) {
> > > +                    val = FIELD_DP32(val, CXL_BI_RT_STATUS, COMMITTED, set);
> > > +                } else {
> > > +                    val = FIELD_DP32(val, CXL_BI_DECODER_STATUS, COMMITTED,
> > > +                                     set);
> > > +                }
> > > +                stl_le_p((uint8_t *)cache_mem + offset, val);
> > > +            }
> > > +        }
> > > +
> > > +        return cregs->cache_mem_registers[offset / 4];
> > >      case 8:
> > >          qemu_log_mask(LOG_UNIMP,
> > >                        "CXL 8 byte cache mem registers not implemented\n");
> > > @@ -118,6 +148,47 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > >      stl_le_p((uint8_t *)cache_mem + offset, value);
> > >  }
> > >  
> > > +static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > > +                            uint32_t value)
> > > +{
> > > +    ComponentRegisters *cregs = &cxl_cstate->crb;
> > > +    uint32_t sts, *cache_mem = cregs->cache_mem_registers;
> > > +    bool to_commit = false;
> > > +    int type = 0; /* Unused value - work around for compiler warning */
> > > +
> > > +    switch (offset) {
> > > +    case A_CXL_BI_RT_CTRL:
> > > +        to_commit = FIELD_EX32(value, CXL_BI_RT_CTRL, COMMIT);
> > > +        if (to_commit) {
> > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > +                                         R_CXL_BI_RT_STATUS, 4);
> > > +            sts = FIELD_DP32(sts, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_RT_STATUS, sts);
> > > +            type = CXL_BISTATE_RT;
> > > +        }
> > > +        break;
> > > +    case A_CXL_BI_DECODER_CTRL:
> > > +        to_commit = FIELD_EX32(value, CXL_BI_DECODER_CTRL, COMMIT);
> > > +        if (to_commit) {
> > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > +                                         R_CXL_BI_DECODER_STATUS, 4);
> > > +            sts = FIELD_DP32(sts, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_DECODER_STATUS, sts);
> > > +            type = CXL_BISTATE_DECODER;
> > > +        }
> > > +        break;
> > > +    default:
> > > +        break;
> > > +    }
> > > +
> > > +    if (to_commit) {
> > > +        cxl_cstate->bi_state[type].last_commit =
> > > +                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > +    }
> > > +
> > > +    stl_le_p((uint8_t *)cache_mem + offset, value);
> > > +}
> > > +
> > >  static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > >                                      unsigned size)
> > >  {
> > > @@ -141,6 +212,9 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > >          if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
> > >              offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) {
> > >              dumb_hdm_handler(cxl_cstate, offset, value);
> > > +        } else if (offset == A_CXL_BI_RT_CTRL ||
> > > +                   offset == A_CXL_BI_DECODER_CTRL) {
> > > +            bi_handler(cxl_cstate, offset, value);
> > >          } else {
> > >              cregs->cache_mem_registers[offset / 4] = value;
> > >          }
> > > @@ -230,7 +304,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > >  }
> > >  
> > >  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > -                            enum reg_type type)
> > > +                            enum reg_type type, bool bi)
> > >  {
> > >      int decoder_count = CXL_HDM_DECODER_COUNT;
> > >      int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
> > > @@ -255,7 +329,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > >                       UIO_DECODER_COUNT, 0);
> > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0);
> > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
> > > -                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
> > > +                     SUPPORTED_COHERENCY_MODEL,
> > > +                     /* host+dev or Unknown */
> > > +                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
> > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
> > >                       HDM_DECODER_ENABLE, 0);
> > >      write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
> > > @@ -278,9 +354,43 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > >      }
> > >  }
> > >  
> > > +static void bi_rt_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > > +{
> > > +    /* switch usp must commit the new BI-ID, timeout of 2secs */
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 1);
> > > +
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CTRL, COMMIT, 0);
> > > +    write_msk[R_CXL_BI_RT_CTRL] = 0x1;
> > > +
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 0);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 0x2);
> > > +}
> > > +
> > > +static void bi_decoder_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > +                                   enum reg_type type)
> > > +{
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, HDM_D, 0);
> > > +    /* switch dsp must commit the new BI-ID, timeout of 2secs */
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT,
> > > +                     (type != CXL2_ROOT_PORT && type != CXL2_TYPE3_DEVICE));
> > > +
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_FW, 0);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_ENABLE, 0);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, COMMIT, 0);
> > > +    write_msk[R_CXL_BI_DECODER_CTRL] = 0x7;
> > > +
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 0);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 0x2);
> > > +}
> > > +
> > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > >                                          uint32_t *write_msk,
> > > -                                        enum reg_type type)
> > > +                                        enum reg_type type,
> > > +                                        bool bi)
> > >  {
> > >      int caps = 0;
> > >  
> > > @@ -320,7 +430,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > >      case CXL2_LOGICAL_DEVICE:
> > >          /* + HDM */
> > >          init_cap_reg(HDM, 5, 1);
> > > -        hdm_init_common(reg_state, write_msk, type);
> > > +        hdm_init_common(reg_state, write_msk, type, bi);
> > >          /* fallthrough */
> > >      case CXL2_DOWNSTREAM_PORT:
> > >      case CXL2_DEVICE:
> > > @@ -335,6 +445,24 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > >          abort();
> > >      }
> > >  
> > > +    /* back invalidate */
> > > +    if (bi) {
> > > +        switch (type) {
> > > +        case CXL2_UPSTREAM_PORT:
> > > +            init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
> > > +            bi_rt_init_common(reg_state, write_msk);
> > > +            break;
> > > +        case CXL2_ROOT_PORT:
> > > +        case CXL2_DOWNSTREAM_PORT:
> > > +        case CXL2_TYPE3_DEVICE:
> > > +            init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
> > > +            bi_decoder_init_common(reg_state, write_msk, type);
> > > +            break;
> > > +        default:
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > >      ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
> > >  #undef init_cap_reg
> > >  }
> > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > index 3c7ecd8c48bc..3f09c589ae58 100644
> > > --- a/hw/mem/cxl_type3.c
> > > +++ b/hw/mem/cxl_type3.c
> > > @@ -748,6 +748,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > >          return false;
> > >      }
> > >  
> > > +    if (!ct3d->flitmode && ct3d->hdmdb) {
> > > +        error_setg(errp, "hdm-db requires operating in 256b flit");
> > > +        return false;
> > > +    }
> > > +
> > >      if (ct3d->hostvmem) {
> > >          MemoryRegion *vmr;
> > >          char *v_name;
> > > @@ -1317,7 +1322,8 @@ static void ct3d_reset(DeviceState *dev)
> > >  
> > >      pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed,
> > >                                ct3d->flitmode);
> > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
> > > +    cxl_component_register_init_common(reg_state, write_msk,
> > > +                                       CXL2_TYPE3_DEVICE, ct3d->hdmdb);
> > >      cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
> > >  
> > >      /*
> > > @@ -1356,6 +1362,7 @@ static const Property ct3_props[] = {
> > >      DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
> > >                                  width, PCIE_LINK_WIDTH_16),
> > >      DEFINE_PROP_BOOL("x-256b-flit", CXLType3Dev, flitmode, false),
> > > +    DEFINE_PROP_BOOL("hdm-db", CXLType3Dev, hdmdb, false),
> > >  };
> > >  
> > >  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
> > > diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
> > > index 64086d8ec2f2..320818a8f1ce 100644
> > > --- a/hw/pci-bridge/cxl_downstream.c
> > > +++ b/hw/pci-bridge/cxl_downstream.c
> > > @@ -39,7 +39,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
> > >      uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > >  
> > >      cxl_component_register_init_common(reg_state, write_msk,
> > > -                                       CXL2_DOWNSTREAM_PORT);
> > > +                                       CXL2_DOWNSTREAM_PORT, true);
> > >  }
> > >  
> > >  /* TODO: Look at sharing this code across all CXL port types */
> > > diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
> > > index 5641048084a4..e2093ac39ee6 100644
> > > --- a/hw/pci-bridge/cxl_root_port.c
> > > +++ b/hw/pci-bridge/cxl_root_port.c
> > > @@ -101,7 +101,8 @@ static void latch_registers(CXLRootPort *crp)
> > >      uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
> > >      uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > >  
> > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT,
> > > +                                       true);
> > >  }
> > >  
> > >  static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
> > > diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> > > index c352d11dc7b7..fb8d19539c9f 100644
> > > --- a/hw/pci-bridge/cxl_upstream.c
> > > +++ b/hw/pci-bridge/cxl_upstream.c
> > > @@ -90,7 +90,7 @@ static void latch_registers(CXLUpstreamPort *usp)
> > >      uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > >  
> > >      cxl_component_register_init_common(reg_state, write_msk,
> > > -                                       CXL2_UPSTREAM_PORT);
> > > +                                       CXL2_UPSTREAM_PORT, usp->flitmode);
> > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
> > >  }
> > >  
> > > diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> > > index b6e2eb796951..11623a5666f6 100644
> > > --- a/hw/pci-bridge/pci_expander_bridge.c
> > > +++ b/hw/pci-bridge/pci_expander_bridge.c
> > > @@ -300,7 +300,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
> > >      uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
> > >      int dsp_count = 0;
> > >  
> > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC);
> > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC, false);
> > >      /*
> > >       * The CXL specification allows for host bridges with no HDM decoders
> > >       * if they only have a single root port.
> > > -- 
> > > 2.51.0  
> 
>
Re: [PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB
Posted by Michael S. Tsirkin 2 days, 13 hours ago
On Wed, Feb 04, 2026 at 03:21:02PM +0000, Jonathan Cameron wrote:
> On Wed, 4 Feb 2026 08:44:42 -0500
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Wed, Feb 04, 2026 at 08:24:55AM -0500, Michael S. Tsirkin wrote:
> > > On Wed, Feb 04, 2026 at 12:12:14PM +0000, Jonathan Cameron wrote:  
> > > > From: Davidlohr Bueso <dave@stgolabs.net>
> > > > 
> > > > Add basic plumbing for memory expander devices that support Back
> > > > Invalidation. This introduces a 'hdm-db=on|off' parameter and
> > > > exposes the relevant BI RT/Decoder component cachemem registers.
> > > > 
> > > > Some noteworthy properties:
> > > >  - Devices require enabling Flit mode across the CXL topology.
> > > >  - Explicit BI-ID commit is required.
> > > >  - HDM decoder support both host and dev coherency models.
> > > > 
> > > > Tested-by: Dongjoo Seo <dongjoo.seo1@samsung.com>
> > > > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  
> > > 
> > > more troubles:
> > > https://gitlab.com/mstredhat/qemu/-/jobs/12984846000  
> > 
> > 
> > clang just as unhappy:
> > 
> > https://gitlab.com/mstredhat/qemu/-/jobs/12984846060
> They both seem to be clang.
> 
> Replicated.
> 
> Ah.  This old gotcha - at one point I got one of my compiler colleagues to report
> it as a clang bug but he lost the argument vs the spec.  Though I believe
> it is being fixed in the C spec longer term.

Yes the compiler actually says it's been fixed in C23.

>  I can't find the thread
> now but we did discuss adding local scope in the macro a while back.
> 
> Anyhow work around is to add scope to avoid label for the case statement
> being followed by _Static_assert()
> Despite smelling like a function, it's not and has special rules...
> 
> Would you like a v7 or squash the following in?
> Thanks
> 
> Jonathan

v7 pls.

> 
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index a3077184e230..07aabe331c44 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -68,7 +68,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
>      ComponentRegisters *cregs = &cxl_cstate->crb;
> 
>      switch (size) {
> -    case 4:
> +    case 4: {
>          QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> 
>          if (offset == A_CXL_BI_RT_STATUS ||
> @@ -101,6 +101,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
>          }
> 
>          return cregs->cache_mem_registers[offset / 4];
> +    }
>      case 8:
>          qemu_log_mask(LOG_UNIMP,
>                        "CXL 8 byte cache mem registers not implemented\n");
> 
> 
> > 
> > >   
> > > > ---
> > > > v6: Initialize type variable to avoid a false compiler warning
> > > >     from some versions of gcc. (Michael Tsirkin)
> > > > ---
> > > >  docs/system/devices/cxl.rst         |  23 +++++
> > > >  include/hw/cxl/cxl_component.h      |  54 ++++++++++-
> > > >  include/hw/cxl/cxl_device.h         |   3 +
> > > >  hw/cxl/cxl-component-utils.c        | 142 ++++++++++++++++++++++++++--
> > > >  hw/mem/cxl_type3.c                  |   9 +-
> > > >  hw/pci-bridge/cxl_downstream.c      |   2 +-
> > > >  hw/pci-bridge/cxl_root_port.c       |   3 +-
> > > >  hw/pci-bridge/cxl_upstream.c        |   2 +-
> > > >  hw/pci-bridge/pci_expander_bridge.c |   2 +-
> > > >  9 files changed, 225 insertions(+), 15 deletions(-)
> > > > 
> > > > diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> > > > index ca15a0da1c1d..9d0771cdfd73 100644
> > > > --- a/docs/system/devices/cxl.rst
> > > > +++ b/docs/system/devices/cxl.rst
> > > > @@ -384,6 +384,29 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
> > > >    -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
> > > >    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > > >  
> > > > +An example of 4 type3 devices with volatile memory below a switch. Two of the devices
> > > > +use HDM-DB for coherence, which requires operating in Flit mode::
> > > > +
> > > > +  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
> > > > +  ...
> > > > +  -object memory-backend-ram,id=cxl-mem0,share=on,size=256M \
> > > > +  -object memory-backend-ram,id=cxl-mem1,share=on,size=256M \
> > > > +  -object memory-backend-ram,id=cxl-mem2,share=on,size=256M \
> > > > +  -object memory-backend-ram,id=cxl-mem3,share=on,size=256M \
> > > > +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> > > > +  -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
> > > > +  -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
> > > > +  -device cxl-upstream,bus=root_port0,id=us0,x-256b-flit=on \
> > > > +  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> > > > +  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-mem0,sn=0x1,x-256b-flit=on,hdm-db=on \
> > > > +  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> > > > +  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-mem1,sn=0x2,x-256b-flit=on,hdm-db=on \
> > > > +  -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> > > > +  -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-mem2,sn=0x3 \
> > > > +  -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> > > > +  -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-mem3,sn=0x4 \
> > > > +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > > > +
> > > >  A simple arm/virt example featuring a single direct connected CXL Type 3
> > > >  Volatile Memory device::
> > > >  
> > > > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> > > > index 1f167d3ef79b..ffc82202206c 100644
> > > > --- a/include/hw/cxl/cxl_component.h
> > > > +++ b/include/hw/cxl/cxl_component.h
> > > > @@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
> > > >  CXLx_CAPABILITY_HEADER(HDM, 3)
> > > >  CXLx_CAPABILITY_HEADER(EXTSEC, 4)
> > > >  CXLx_CAPABILITY_HEADER(SNOOP, 5)
> > > > +CXLx_CAPABILITY_HEADER(BI_RT, 6)
> > > > +CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
> > > >  
> > > >  /*
> > > >   * Capability structures contain the actual registers that the CXL component
> > > > @@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
> > > >      (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
> > > >  #define CXL_SNOOP_REGISTERS_SIZE   0x8
> > > >  
> > > > -QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
> > > > -                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
> > > > +#define CXL_BI_RT_CAP_VERSION 1
> > > > +#define CXL_BI_RT_REGISTERS_OFFSET \
> > > > +    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
> > > > +#define CXL_BI_RT_REGISTERS_SIZE   0xC
> > > > +
> > > > +REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
> > > > +    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
> > > > +REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
> > > > +    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
> > > > +REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
> > > > +    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
> > > > +    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > > +
> > > > +/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
> > > > +#define CXL_BI_DECODER_CAP_VERSION 1
> > > > +#define CXL_BI_DECODER_REGISTERS_OFFSET \
> > > > +    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
> > > > +#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
> > > > +
> > > > +REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
> > > > +    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
> > > > +    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
> > > > +REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
> > > > +    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
> > > > +    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
> > > > +    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
> > > > +REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > > +
> > > > +QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
> > > > +                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
> > > >                     "No space for registers");
> > > >  
> > > > +/* track BI explicit commit handling for route table and decoder */
> > > > +enum {
> > > > +    CXL_BISTATE_RT = 0,
> > > > +    CXL_BISTATE_DECODER,
> > > > +    CXL_BISTATE_MAX
> > > > +};
> > > > +
> > > > +typedef struct bi_state {
> > > > +    uint64_t last_commit;  /* last 0->1 transition */
> > > > +} BIState;
> > > > +
> > > >  typedef struct component_registers {
> > > >      /*
> > > >       * Main memory region to be registered with QEMU core.
> > > > @@ -259,6 +306,7 @@ typedef struct cxl_component {
> > > >      };
> > > >  
> > > >      CDATObject cdat;
> > > > +    BIState bi_state[CXL_BISTATE_MAX];
> > > >  } CXLComponentState;
> > > >  
> > > >  void cxl_component_register_block_init(Object *obj,
> > > > @@ -266,7 +314,7 @@ void cxl_component_register_block_init(Object *obj,
> > > >                                         const char *type);
> > > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > > >                                          uint32_t *write_msk,
> > > > -                                        enum reg_type type);
> > > > +                                        enum reg_type type, bool bi);
> > > >  
> > > >  void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
> > > >                                  enum reg_type cxl_dev_type, uint16_t length,
> > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > index 7d9236db8c85..393f3122173b 100644
> > > > --- a/include/hw/cxl/cxl_device.h
> > > > +++ b/include/hw/cxl/cxl_device.h
> > > > @@ -770,6 +770,9 @@ struct CXLType3Dev {
> > > >      CXLMemSparingReadAttrs rank_sparing_attrs;
> > > >      CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
> > > >  
> > > > +    /* BI flows */
> > > > +    bool hdmdb;
> > > > +
> > > >      struct dynamic_capacity {
> > > >          HostMemoryBackend *host_dc;
> > > >          AddressSpace host_dc_as;
> > > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> > > > index 91770f103a85..a7d36e1128c2 100644
> > > > --- a/hw/cxl/cxl-component-utils.c
> > > > +++ b/hw/cxl/cxl-component-utils.c
> > > > @@ -71,10 +71,40 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
> > > >      case 4:
> > > >          if (cregs->special_ops && cregs->special_ops->read) {
> > > >              return cregs->special_ops->read(cxl_cstate, offset, 4);
> > > > -        } else {
> > > > -            QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > > -            return cregs->cache_mem_registers[offset / 4];
> > > >          }
> > > > +
> > > > +        QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > > +
> > > > +        if (offset == A_CXL_BI_RT_STATUS ||
> > > > +            offset == A_CXL_BI_DECODER_STATUS) {
> > > > +            int type;
> > > > +            uint64_t started;
> > > > +
> > > > +            type = (offset == A_CXL_BI_RT_STATUS) ?
> > > > +                    CXL_BISTATE_RT : CXL_BISTATE_DECODER;
> > > > +            started = cxl_cstate->bi_state[type].last_commit;
> > > > +
> > > > +            if (started) {
> > > > +                uint32_t *cache_mem = cregs->cache_mem_registers;
> > > > +                uint32_t val = cache_mem[offset / 4];
> > > > +                uint64_t now;
> > > > +                int set;
> > > > +
> > > > +                now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > > +                /* arbitrary 100 ms to do the commit */
> > > > +                set = !!(now >= started + 100);
> > > > +
> > > > +                if (offset == A_CXL_BI_RT_STATUS) {
> > > > +                    val = FIELD_DP32(val, CXL_BI_RT_STATUS, COMMITTED, set);
> > > > +                } else {
> > > > +                    val = FIELD_DP32(val, CXL_BI_DECODER_STATUS, COMMITTED,
> > > > +                                     set);
> > > > +                }
> > > > +                stl_le_p((uint8_t *)cache_mem + offset, val);
> > > > +            }
> > > > +        }
> > > > +
> > > > +        return cregs->cache_mem_registers[offset / 4];
> > > >      case 8:
> > > >          qemu_log_mask(LOG_UNIMP,
> > > >                        "CXL 8 byte cache mem registers not implemented\n");
> > > > @@ -118,6 +148,47 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > > >      stl_le_p((uint8_t *)cache_mem + offset, value);
> > > >  }
> > > >  
> > > > +static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > > > +                            uint32_t value)
> > > > +{
> > > > +    ComponentRegisters *cregs = &cxl_cstate->crb;
> > > > +    uint32_t sts, *cache_mem = cregs->cache_mem_registers;
> > > > +    bool to_commit = false;
> > > > +    int type = 0; /* Unused value - work around for compiler warning */
> > > > +
> > > > +    switch (offset) {
> > > > +    case A_CXL_BI_RT_CTRL:
> > > > +        to_commit = FIELD_EX32(value, CXL_BI_RT_CTRL, COMMIT);
> > > > +        if (to_commit) {
> > > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > > +                                         R_CXL_BI_RT_STATUS, 4);
> > > > +            sts = FIELD_DP32(sts, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_RT_STATUS, sts);
> > > > +            type = CXL_BISTATE_RT;
> > > > +        }
> > > > +        break;
> > > > +    case A_CXL_BI_DECODER_CTRL:
> > > > +        to_commit = FIELD_EX32(value, CXL_BI_DECODER_CTRL, COMMIT);
> > > > +        if (to_commit) {
> > > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > > +                                         R_CXL_BI_DECODER_STATUS, 4);
> > > > +            sts = FIELD_DP32(sts, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_DECODER_STATUS, sts);
> > > > +            type = CXL_BISTATE_DECODER;
> > > > +        }
> > > > +        break;
> > > > +    default:
> > > > +        break;
> > > > +    }
> > > > +
> > > > +    if (to_commit) {
> > > > +        cxl_cstate->bi_state[type].last_commit =
> > > > +                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > > +    }
> > > > +
> > > > +    stl_le_p((uint8_t *)cache_mem + offset, value);
> > > > +}
> > > > +
> > > >  static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > > >                                      unsigned size)
> > > >  {
> > > > @@ -141,6 +212,9 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > > >          if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
> > > >              offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) {
> > > >              dumb_hdm_handler(cxl_cstate, offset, value);
> > > > +        } else if (offset == A_CXL_BI_RT_CTRL ||
> > > > +                   offset == A_CXL_BI_DECODER_CTRL) {
> > > > +            bi_handler(cxl_cstate, offset, value);
> > > >          } else {
> > > >              cregs->cache_mem_registers[offset / 4] = value;
> > > >          }
> > > > @@ -230,7 +304,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > > >  }
> > > >  
> > > >  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > -                            enum reg_type type)
> > > > +                            enum reg_type type, bool bi)
> > > >  {
> > > >      int decoder_count = CXL_HDM_DECODER_COUNT;
> > > >      int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
> > > > @@ -255,7 +329,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > >                       UIO_DECODER_COUNT, 0);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
> > > > -                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
> > > > +                     SUPPORTED_COHERENCY_MODEL,
> > > > +                     /* host+dev or Unknown */
> > > > +                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
> > > >                       HDM_DECODER_ENABLE, 0);
> > > >      write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
> > > > @@ -278,9 +354,43 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > >      }
> > > >  }
> > > >  
> > > > +static void bi_rt_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > > > +{
> > > > +    /* switch usp must commit the new BI-ID, timeout of 2secs */
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 1);
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CTRL, COMMIT, 0);
> > > > +    write_msk[R_CXL_BI_RT_CTRL] = 0x1;
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 0x2);
> > > > +}
> > > > +
> > > > +static void bi_decoder_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > +                                   enum reg_type type)
> > > > +{
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, HDM_D, 0);
> > > > +    /* switch dsp must commit the new BI-ID, timeout of 2secs */
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT,
> > > > +                     (type != CXL2_ROOT_PORT && type != CXL2_TYPE3_DEVICE));
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_FW, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_ENABLE, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, COMMIT, 0);
> > > > +    write_msk[R_CXL_BI_DECODER_CTRL] = 0x7;
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 0x2);
> > > > +}
> > > > +
> > > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > > >                                          uint32_t *write_msk,
> > > > -                                        enum reg_type type)
> > > > +                                        enum reg_type type,
> > > > +                                        bool bi)
> > > >  {
> > > >      int caps = 0;
> > > >  
> > > > @@ -320,7 +430,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > > >      case CXL2_LOGICAL_DEVICE:
> > > >          /* + HDM */
> > > >          init_cap_reg(HDM, 5, 1);
> > > > -        hdm_init_common(reg_state, write_msk, type);
> > > > +        hdm_init_common(reg_state, write_msk, type, bi);
> > > >          /* fallthrough */
> > > >      case CXL2_DOWNSTREAM_PORT:
> > > >      case CXL2_DEVICE:
> > > > @@ -335,6 +445,24 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > > >          abort();
> > > >      }
> > > >  
> > > > +    /* back invalidate */
> > > > +    if (bi) {
> > > > +        switch (type) {
> > > > +        case CXL2_UPSTREAM_PORT:
> > > > +            init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
> > > > +            bi_rt_init_common(reg_state, write_msk);
> > > > +            break;
> > > > +        case CXL2_ROOT_PORT:
> > > > +        case CXL2_DOWNSTREAM_PORT:
> > > > +        case CXL2_TYPE3_DEVICE:
> > > > +            init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
> > > > +            bi_decoder_init_common(reg_state, write_msk, type);
> > > > +            break;
> > > > +        default:
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
> > > >  #undef init_cap_reg
> > > >  }
> > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > index 3c7ecd8c48bc..3f09c589ae58 100644
> > > > --- a/hw/mem/cxl_type3.c
> > > > +++ b/hw/mem/cxl_type3.c
> > > > @@ -748,6 +748,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > >          return false;
> > > >      }
> > > >  
> > > > +    if (!ct3d->flitmode && ct3d->hdmdb) {
> > > > +        error_setg(errp, "hdm-db requires operating in 256b flit");
> > > > +        return false;
> > > > +    }
> > > > +
> > > >      if (ct3d->hostvmem) {
> > > >          MemoryRegion *vmr;
> > > >          char *v_name;
> > > > @@ -1317,7 +1322,8 @@ static void ct3d_reset(DeviceState *dev)
> > > >  
> > > >      pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed,
> > > >                                ct3d->flitmode);
> > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
> > > > +    cxl_component_register_init_common(reg_state, write_msk,
> > > > +                                       CXL2_TYPE3_DEVICE, ct3d->hdmdb);
> > > >      cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
> > > >  
> > > >      /*
> > > > @@ -1356,6 +1362,7 @@ static const Property ct3_props[] = {
> > > >      DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
> > > >                                  width, PCIE_LINK_WIDTH_16),
> > > >      DEFINE_PROP_BOOL("x-256b-flit", CXLType3Dev, flitmode, false),
> > > > +    DEFINE_PROP_BOOL("hdm-db", CXLType3Dev, hdmdb, false),
> > > >  };
> > > >  
> > > >  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
> > > > diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
> > > > index 64086d8ec2f2..320818a8f1ce 100644
> > > > --- a/hw/pci-bridge/cxl_downstream.c
> > > > +++ b/hw/pci-bridge/cxl_downstream.c
> > > > @@ -39,7 +39,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
> > > >      uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > >  
> > > >      cxl_component_register_init_common(reg_state, write_msk,
> > > > -                                       CXL2_DOWNSTREAM_PORT);
> > > > +                                       CXL2_DOWNSTREAM_PORT, true);
> > > >  }
> > > >  
> > > >  /* TODO: Look at sharing this code across all CXL port types */
> > > > diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
> > > > index 5641048084a4..e2093ac39ee6 100644
> > > > --- a/hw/pci-bridge/cxl_root_port.c
> > > > +++ b/hw/pci-bridge/cxl_root_port.c
> > > > @@ -101,7 +101,8 @@ static void latch_registers(CXLRootPort *crp)
> > > >      uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
> > > >      uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > >  
> > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> > > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT,
> > > > +                                       true);
> > > >  }
> > > >  
> > > >  static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
> > > > diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> > > > index c352d11dc7b7..fb8d19539c9f 100644
> > > > --- a/hw/pci-bridge/cxl_upstream.c
> > > > +++ b/hw/pci-bridge/cxl_upstream.c
> > > > @@ -90,7 +90,7 @@ static void latch_registers(CXLUpstreamPort *usp)
> > > >      uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > >  
> > > >      cxl_component_register_init_common(reg_state, write_msk,
> > > > -                                       CXL2_UPSTREAM_PORT);
> > > > +                                       CXL2_UPSTREAM_PORT, usp->flitmode);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
> > > >  }
> > > >  
> > > > diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> > > > index b6e2eb796951..11623a5666f6 100644
> > > > --- a/hw/pci-bridge/pci_expander_bridge.c
> > > > +++ b/hw/pci-bridge/pci_expander_bridge.c
> > > > @@ -300,7 +300,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
> > > >      uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
> > > >      int dsp_count = 0;
> > > >  
> > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC);
> > > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC, false);
> > > >      /*
> > > >       * The CXL specification allows for host bridges with no HDM decoders
> > > >       * if they only have a single root port.
> > > > -- 
> > > > 2.51.0  
> > 
> >
Re: [PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB
Posted by Jonathan Cameron via qemu development 2 days, 14 hours ago
On Wed, 4 Feb 2026 15:21:02 +0000
Jonathan Cameron via qemu development <qemu-devel@nongnu.org> wrote:

> On Wed, 4 Feb 2026 08:44:42 -0500
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Wed, Feb 04, 2026 at 08:24:55AM -0500, Michael S. Tsirkin wrote:  
> > > On Wed, Feb 04, 2026 at 12:12:14PM +0000, Jonathan Cameron wrote:    
> > > > From: Davidlohr Bueso <dave@stgolabs.net>
> > > > 
> > > > Add basic plumbing for memory expander devices that support Back
> > > > Invalidation. This introduces a 'hdm-db=on|off' parameter and
> > > > exposes the relevant BI RT/Decoder component cachemem registers.
> > > > 
> > > > Some noteworthy properties:
> > > >  - Devices require enabling Flit mode across the CXL topology.
> > > >  - Explicit BI-ID commit is required.
> > > >  - HDM decoder support both host and dev coherency models.
> > > > 
> > > > Tested-by: Dongjoo Seo <dongjoo.seo1@samsung.com>
> > > > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>    
> > > 
> > > more troubles:
> > > https://gitlab.com/mstredhat/qemu/-/jobs/12984846000    
> > 
> > 
> > clang just as unhappy:
> > 
> > https://gitlab.com/mstredhat/qemu/-/jobs/12984846060  
> They both seem to be clang.
> 
> Replicated.
> 
> Ah.  This old gotcha - at one point I got one of my compiler colleagues to report
> it as a clang bug but he lost the argument vs the spec.  Though I believe
> it is being fixed in the C spec longer term.  I can't find the thread
> now but we did discuss adding local scope in the macro a while back.
> 
> Anyhow work around is to add scope to avoid label for the case statement
> being followed by _Static_assert()
> Despite smelling like a function, it's not and has special rules...
> 
> Would you like a v7 or squash the following in?

Or. The problem only actually occurs after the next patch (which happens
to bring the _static assert to the top of the case statement.)
So drop patch 7 and I'll revisit just that little one at a later date.

> Thanks
> 
> Jonathan
> 
> 
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index a3077184e230..07aabe331c44 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -68,7 +68,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
>      ComponentRegisters *cregs = &cxl_cstate->crb;
> 
>      switch (size) {
> -    case 4:
> +    case 4: {
>          QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> 
>          if (offset == A_CXL_BI_RT_STATUS ||
> @@ -101,6 +101,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
>          }
> 
>          return cregs->cache_mem_registers[offset / 4];
> +    }
>      case 8:
>          qemu_log_mask(LOG_UNIMP,
>                        "CXL 8 byte cache mem registers not implemented\n");
> 
> 
> >   
> > >     
> > > > ---
> > > > v6: Initialize type variable to avoid a false compiler warning
> > > >     from some versions of gcc. (Michael Tsirkin)
> > > > ---
> > > >  docs/system/devices/cxl.rst         |  23 +++++
> > > >  include/hw/cxl/cxl_component.h      |  54 ++++++++++-
> > > >  include/hw/cxl/cxl_device.h         |   3 +
> > > >  hw/cxl/cxl-component-utils.c        | 142 ++++++++++++++++++++++++++--
> > > >  hw/mem/cxl_type3.c                  |   9 +-
> > > >  hw/pci-bridge/cxl_downstream.c      |   2 +-
> > > >  hw/pci-bridge/cxl_root_port.c       |   3 +-
> > > >  hw/pci-bridge/cxl_upstream.c        |   2 +-
> > > >  hw/pci-bridge/pci_expander_bridge.c |   2 +-
> > > >  9 files changed, 225 insertions(+), 15 deletions(-)
> > > > 
> > > > diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> > > > index ca15a0da1c1d..9d0771cdfd73 100644
> > > > --- a/docs/system/devices/cxl.rst
> > > > +++ b/docs/system/devices/cxl.rst
> > > > @@ -384,6 +384,29 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
> > > >    -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
> > > >    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > > >  
> > > > +An example of 4 type3 devices with volatile memory below a switch. Two of the devices
> > > > +use HDM-DB for coherence, which requires operating in Flit mode::
> > > > +
> > > > +  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
> > > > +  ...
> > > > +  -object memory-backend-ram,id=cxl-mem0,share=on,size=256M \
> > > > +  -object memory-backend-ram,id=cxl-mem1,share=on,size=256M \
> > > > +  -object memory-backend-ram,id=cxl-mem2,share=on,size=256M \
> > > > +  -object memory-backend-ram,id=cxl-mem3,share=on,size=256M \
> > > > +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> > > > +  -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
> > > > +  -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
> > > > +  -device cxl-upstream,bus=root_port0,id=us0,x-256b-flit=on \
> > > > +  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> > > > +  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-mem0,sn=0x1,x-256b-flit=on,hdm-db=on \
> > > > +  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> > > > +  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-mem1,sn=0x2,x-256b-flit=on,hdm-db=on \
> > > > +  -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> > > > +  -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-mem2,sn=0x3 \
> > > > +  -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> > > > +  -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-mem3,sn=0x4 \
> > > > +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > > > +
> > > >  A simple arm/virt example featuring a single direct connected CXL Type 3
> > > >  Volatile Memory device::
> > > >  
> > > > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> > > > index 1f167d3ef79b..ffc82202206c 100644
> > > > --- a/include/hw/cxl/cxl_component.h
> > > > +++ b/include/hw/cxl/cxl_component.h
> > > > @@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
> > > >  CXLx_CAPABILITY_HEADER(HDM, 3)
> > > >  CXLx_CAPABILITY_HEADER(EXTSEC, 4)
> > > >  CXLx_CAPABILITY_HEADER(SNOOP, 5)
> > > > +CXLx_CAPABILITY_HEADER(BI_RT, 6)
> > > > +CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
> > > >  
> > > >  /*
> > > >   * Capability structures contain the actual registers that the CXL component
> > > > @@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
> > > >      (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
> > > >  #define CXL_SNOOP_REGISTERS_SIZE   0x8
> > > >  
> > > > -QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
> > > > -                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
> > > > +#define CXL_BI_RT_CAP_VERSION 1
> > > > +#define CXL_BI_RT_REGISTERS_OFFSET \
> > > > +    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
> > > > +#define CXL_BI_RT_REGISTERS_SIZE   0xC
> > > > +
> > > > +REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
> > > > +    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
> > > > +REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
> > > > +    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
> > > > +REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
> > > > +    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
> > > > +    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > > +
> > > > +/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
> > > > +#define CXL_BI_DECODER_CAP_VERSION 1
> > > > +#define CXL_BI_DECODER_REGISTERS_OFFSET \
> > > > +    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
> > > > +#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
> > > > +
> > > > +REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
> > > > +    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
> > > > +    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
> > > > +REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
> > > > +    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
> > > > +    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
> > > > +    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
> > > > +REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > > +
> > > > +QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
> > > > +                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
> > > >                     "No space for registers");
> > > >  
> > > > +/* track BI explicit commit handling for route table and decoder */
> > > > +enum {
> > > > +    CXL_BISTATE_RT = 0,
> > > > +    CXL_BISTATE_DECODER,
> > > > +    CXL_BISTATE_MAX
> > > > +};
> > > > +
> > > > +typedef struct bi_state {
> > > > +    uint64_t last_commit;  /* last 0->1 transition */
> > > > +} BIState;
> > > > +
> > > >  typedef struct component_registers {
> > > >      /*
> > > >       * Main memory region to be registered with QEMU core.
> > > > @@ -259,6 +306,7 @@ typedef struct cxl_component {
> > > >      };
> > > >  
> > > >      CDATObject cdat;
> > > > +    BIState bi_state[CXL_BISTATE_MAX];
> > > >  } CXLComponentState;
> > > >  
> > > >  void cxl_component_register_block_init(Object *obj,
> > > > @@ -266,7 +314,7 @@ void cxl_component_register_block_init(Object *obj,
> > > >                                         const char *type);
> > > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > > >                                          uint32_t *write_msk,
> > > > -                                        enum reg_type type);
> > > > +                                        enum reg_type type, bool bi);
> > > >  
> > > >  void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
> > > >                                  enum reg_type cxl_dev_type, uint16_t length,
> > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > index 7d9236db8c85..393f3122173b 100644
> > > > --- a/include/hw/cxl/cxl_device.h
> > > > +++ b/include/hw/cxl/cxl_device.h
> > > > @@ -770,6 +770,9 @@ struct CXLType3Dev {
> > > >      CXLMemSparingReadAttrs rank_sparing_attrs;
> > > >      CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
> > > >  
> > > > +    /* BI flows */
> > > > +    bool hdmdb;
> > > > +
> > > >      struct dynamic_capacity {
> > > >          HostMemoryBackend *host_dc;
> > > >          AddressSpace host_dc_as;
> > > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> > > > index 91770f103a85..a7d36e1128c2 100644
> > > > --- a/hw/cxl/cxl-component-utils.c
> > > > +++ b/hw/cxl/cxl-component-utils.c
> > > > @@ -71,10 +71,40 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
> > > >      case 4:
> > > >          if (cregs->special_ops && cregs->special_ops->read) {
> > > >              return cregs->special_ops->read(cxl_cstate, offset, 4);
> > > > -        } else {
> > > > -            QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > > -            return cregs->cache_mem_registers[offset / 4];
> > > >          }
> > > > +
> > > > +        QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > > +
> > > > +        if (offset == A_CXL_BI_RT_STATUS ||
> > > > +            offset == A_CXL_BI_DECODER_STATUS) {
> > > > +            int type;
> > > > +            uint64_t started;
> > > > +
> > > > +            type = (offset == A_CXL_BI_RT_STATUS) ?
> > > > +                    CXL_BISTATE_RT : CXL_BISTATE_DECODER;
> > > > +            started = cxl_cstate->bi_state[type].last_commit;
> > > > +
> > > > +            if (started) {
> > > > +                uint32_t *cache_mem = cregs->cache_mem_registers;
> > > > +                uint32_t val = cache_mem[offset / 4];
> > > > +                uint64_t now;
> > > > +                int set;
> > > > +
> > > > +                now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > > +                /* arbitrary 100 ms to do the commit */
> > > > +                set = !!(now >= started + 100);
> > > > +
> > > > +                if (offset == A_CXL_BI_RT_STATUS) {
> > > > +                    val = FIELD_DP32(val, CXL_BI_RT_STATUS, COMMITTED, set);
> > > > +                } else {
> > > > +                    val = FIELD_DP32(val, CXL_BI_DECODER_STATUS, COMMITTED,
> > > > +                                     set);
> > > > +                }
> > > > +                stl_le_p((uint8_t *)cache_mem + offset, val);
> > > > +            }
> > > > +        }
> > > > +
> > > > +        return cregs->cache_mem_registers[offset / 4];
> > > >      case 8:
> > > >          qemu_log_mask(LOG_UNIMP,
> > > >                        "CXL 8 byte cache mem registers not implemented\n");
> > > > @@ -118,6 +148,47 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > > >      stl_le_p((uint8_t *)cache_mem + offset, value);
> > > >  }
> > > >  
> > > > +static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > > > +                            uint32_t value)
> > > > +{
> > > > +    ComponentRegisters *cregs = &cxl_cstate->crb;
> > > > +    uint32_t sts, *cache_mem = cregs->cache_mem_registers;
> > > > +    bool to_commit = false;
> > > > +    int type = 0; /* Unused value - work around for compiler warning */
> > > > +
> > > > +    switch (offset) {
> > > > +    case A_CXL_BI_RT_CTRL:
> > > > +        to_commit = FIELD_EX32(value, CXL_BI_RT_CTRL, COMMIT);
> > > > +        if (to_commit) {
> > > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > > +                                         R_CXL_BI_RT_STATUS, 4);
> > > > +            sts = FIELD_DP32(sts, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_RT_STATUS, sts);
> > > > +            type = CXL_BISTATE_RT;
> > > > +        }
> > > > +        break;
> > > > +    case A_CXL_BI_DECODER_CTRL:
> > > > +        to_commit = FIELD_EX32(value, CXL_BI_DECODER_CTRL, COMMIT);
> > > > +        if (to_commit) {
> > > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > > +                                         R_CXL_BI_DECODER_STATUS, 4);
> > > > +            sts = FIELD_DP32(sts, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_DECODER_STATUS, sts);
> > > > +            type = CXL_BISTATE_DECODER;
> > > > +        }
> > > > +        break;
> > > > +    default:
> > > > +        break;
> > > > +    }
> > > > +
> > > > +    if (to_commit) {
> > > > +        cxl_cstate->bi_state[type].last_commit =
> > > > +                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > > +    }
> > > > +
> > > > +    stl_le_p((uint8_t *)cache_mem + offset, value);
> > > > +}
> > > > +
> > > >  static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > > >                                      unsigned size)
> > > >  {
> > > > @@ -141,6 +212,9 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > > >          if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
> > > >              offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) {
> > > >              dumb_hdm_handler(cxl_cstate, offset, value);
> > > > +        } else if (offset == A_CXL_BI_RT_CTRL ||
> > > > +                   offset == A_CXL_BI_DECODER_CTRL) {
> > > > +            bi_handler(cxl_cstate, offset, value);
> > > >          } else {
> > > >              cregs->cache_mem_registers[offset / 4] = value;
> > > >          }
> > > > @@ -230,7 +304,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > > >  }
> > > >  
> > > >  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > -                            enum reg_type type)
> > > > +                            enum reg_type type, bool bi)
> > > >  {
> > > >      int decoder_count = CXL_HDM_DECODER_COUNT;
> > > >      int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
> > > > @@ -255,7 +329,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > >                       UIO_DECODER_COUNT, 0);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
> > > > -                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
> > > > +                     SUPPORTED_COHERENCY_MODEL,
> > > > +                     /* host+dev or Unknown */
> > > > +                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
> > > >                       HDM_DECODER_ENABLE, 0);
> > > >      write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
> > > > @@ -278,9 +354,43 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > >      }
> > > >  }
> > > >  
> > > > +static void bi_rt_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > > > +{
> > > > +    /* switch usp must commit the new BI-ID, timeout of 2secs */
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 1);
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CTRL, COMMIT, 0);
> > > > +    write_msk[R_CXL_BI_RT_CTRL] = 0x1;
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 0x2);
> > > > +}
> > > > +
> > > > +static void bi_decoder_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > +                                   enum reg_type type)
> > > > +{
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, HDM_D, 0);
> > > > +    /* switch dsp must commit the new BI-ID, timeout of 2secs */
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT,
> > > > +                     (type != CXL2_ROOT_PORT && type != CXL2_TYPE3_DEVICE));
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_FW, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_ENABLE, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, COMMIT, 0);
> > > > +    write_msk[R_CXL_BI_DECODER_CTRL] = 0x7;
> > > > +
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 0);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 0x2);
> > > > +}
> > > > +
> > > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > > >                                          uint32_t *write_msk,
> > > > -                                        enum reg_type type)
> > > > +                                        enum reg_type type,
> > > > +                                        bool bi)
> > > >  {
> > > >      int caps = 0;
> > > >  
> > > > @@ -320,7 +430,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > > >      case CXL2_LOGICAL_DEVICE:
> > > >          /* + HDM */
> > > >          init_cap_reg(HDM, 5, 1);
> > > > -        hdm_init_common(reg_state, write_msk, type);
> > > > +        hdm_init_common(reg_state, write_msk, type, bi);
> > > >          /* fallthrough */
> > > >      case CXL2_DOWNSTREAM_PORT:
> > > >      case CXL2_DEVICE:
> > > > @@ -335,6 +445,24 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > > >          abort();
> > > >      }
> > > >  
> > > > +    /* back invalidate */
> > > > +    if (bi) {
> > > > +        switch (type) {
> > > > +        case CXL2_UPSTREAM_PORT:
> > > > +            init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
> > > > +            bi_rt_init_common(reg_state, write_msk);
> > > > +            break;
> > > > +        case CXL2_ROOT_PORT:
> > > > +        case CXL2_DOWNSTREAM_PORT:
> > > > +        case CXL2_TYPE3_DEVICE:
> > > > +            init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
> > > > +            bi_decoder_init_common(reg_state, write_msk, type);
> > > > +            break;
> > > > +        default:
> > > > +            break;
> > > > +        }
> > > > +    }
> > > > +
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
> > > >  #undef init_cap_reg
> > > >  }
> > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > index 3c7ecd8c48bc..3f09c589ae58 100644
> > > > --- a/hw/mem/cxl_type3.c
> > > > +++ b/hw/mem/cxl_type3.c
> > > > @@ -748,6 +748,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > >          return false;
> > > >      }
> > > >  
> > > > +    if (!ct3d->flitmode && ct3d->hdmdb) {
> > > > +        error_setg(errp, "hdm-db requires operating in 256b flit");
> > > > +        return false;
> > > > +    }
> > > > +
> > > >      if (ct3d->hostvmem) {
> > > >          MemoryRegion *vmr;
> > > >          char *v_name;
> > > > @@ -1317,7 +1322,8 @@ static void ct3d_reset(DeviceState *dev)
> > > >  
> > > >      pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed,
> > > >                                ct3d->flitmode);
> > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
> > > > +    cxl_component_register_init_common(reg_state, write_msk,
> > > > +                                       CXL2_TYPE3_DEVICE, ct3d->hdmdb);
> > > >      cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
> > > >  
> > > >      /*
> > > > @@ -1356,6 +1362,7 @@ static const Property ct3_props[] = {
> > > >      DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
> > > >                                  width, PCIE_LINK_WIDTH_16),
> > > >      DEFINE_PROP_BOOL("x-256b-flit", CXLType3Dev, flitmode, false),
> > > > +    DEFINE_PROP_BOOL("hdm-db", CXLType3Dev, hdmdb, false),
> > > >  };
> > > >  
> > > >  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
> > > > diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
> > > > index 64086d8ec2f2..320818a8f1ce 100644
> > > > --- a/hw/pci-bridge/cxl_downstream.c
> > > > +++ b/hw/pci-bridge/cxl_downstream.c
> > > > @@ -39,7 +39,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
> > > >      uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > >  
> > > >      cxl_component_register_init_common(reg_state, write_msk,
> > > > -                                       CXL2_DOWNSTREAM_PORT);
> > > > +                                       CXL2_DOWNSTREAM_PORT, true);
> > > >  }
> > > >  
> > > >  /* TODO: Look at sharing this code across all CXL port types */
> > > > diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
> > > > index 5641048084a4..e2093ac39ee6 100644
> > > > --- a/hw/pci-bridge/cxl_root_port.c
> > > > +++ b/hw/pci-bridge/cxl_root_port.c
> > > > @@ -101,7 +101,8 @@ static void latch_registers(CXLRootPort *crp)
> > > >      uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
> > > >      uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > >  
> > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> > > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT,
> > > > +                                       true);
> > > >  }
> > > >  
> > > >  static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
> > > > diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> > > > index c352d11dc7b7..fb8d19539c9f 100644
> > > > --- a/hw/pci-bridge/cxl_upstream.c
> > > > +++ b/hw/pci-bridge/cxl_upstream.c
> > > > @@ -90,7 +90,7 @@ static void latch_registers(CXLUpstreamPort *usp)
> > > >      uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > >  
> > > >      cxl_component_register_init_common(reg_state, write_msk,
> > > > -                                       CXL2_UPSTREAM_PORT);
> > > > +                                       CXL2_UPSTREAM_PORT, usp->flitmode);
> > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
> > > >  }
> > > >  
> > > > diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> > > > index b6e2eb796951..11623a5666f6 100644
> > > > --- a/hw/pci-bridge/pci_expander_bridge.c
> > > > +++ b/hw/pci-bridge/pci_expander_bridge.c
> > > > @@ -300,7 +300,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
> > > >      uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
> > > >      int dsp_count = 0;
> > > >  
> > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC);
> > > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC, false);
> > > >      /*
> > > >       * The CXL specification allows for host bridges with no HDM decoders
> > > >       * if they only have a single root port.
> > > > -- 
> > > > 2.51.0    
> > 
> >   
> 
>
Re: [PATCH qemu v6 6/7] hw/cxl: Support type3 HDM-DB
Posted by Jonathan Cameron via qemu development 2 days, 14 hours ago
On Wed, 4 Feb 2026 15:40:36 +0000
Jonathan Cameron <jonathan.cameron@huawei.com> wrote:

> On Wed, 4 Feb 2026 15:21:02 +0000
> Jonathan Cameron via qemu development <qemu-devel@nongnu.org> wrote:
> 
> > On Wed, 4 Feb 2026 08:44:42 -0500
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >   
> > > On Wed, Feb 04, 2026 at 08:24:55AM -0500, Michael S. Tsirkin wrote:    
> > > > On Wed, Feb 04, 2026 at 12:12:14PM +0000, Jonathan Cameron wrote:      
> > > > > From: Davidlohr Bueso <dave@stgolabs.net>
> > > > > 
> > > > > Add basic plumbing for memory expander devices that support Back
> > > > > Invalidation. This introduces a 'hdm-db=on|off' parameter and
> > > > > exposes the relevant BI RT/Decoder component cachemem registers.
> > > > > 
> > > > > Some noteworthy properties:
> > > > >  - Devices require enabling Flit mode across the CXL topology.
> > > > >  - Explicit BI-ID commit is required.
> > > > >  - HDM decoder support both host and dev coherency models.
> > > > > 
> > > > > Tested-by: Dongjoo Seo <dongjoo.seo1@samsung.com>
> > > > > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> > > > > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>      
> > > > 
> > > > more troubles:
> > > > https://gitlab.com/mstredhat/qemu/-/jobs/12984846000      
> > > 
> > > 
> > > clang just as unhappy:
> > > 
> > > https://gitlab.com/mstredhat/qemu/-/jobs/12984846060    
> > They both seem to be clang.
> > 
> > Replicated.
> > 
> > Ah.  This old gotcha - at one point I got one of my compiler colleagues to report
> > it as a clang bug but he lost the argument vs the spec.  Though I believe
> > it is being fixed in the C spec longer term.  I can't find the thread
> > now but we did discuss adding local scope in the macro a while back.
Did some archaeology 
https://lore.kernel.org/qemu-devel/20231019124819.00005f6a@Huawei.com/

Can't add scope inside the macro as it's sometimes called outside
of functions.  So it's a case of paper over this whenever we happen to hit
it.

Jonathan


> > 
> > Anyhow work around is to add scope to avoid label for the case statement
> > being followed by _Static_assert()
> > Despite smelling like a function, it's not and has special rules...
> > 
> > Would you like a v7 or squash the following in?  
> 
> Or. The problem only actually occurs after the next patch (which happens
> to bring the _static assert to the top of the case statement.)
> So drop patch 7 and I'll revisit just that little one at a later date.

> 
> > Thanks
> > 
> > Jonathan
> > 
> > 
> > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> > index a3077184e230..07aabe331c44 100644
> > --- a/hw/cxl/cxl-component-utils.c
> > +++ b/hw/cxl/cxl-component-utils.c
> > @@ -68,7 +68,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
> >      ComponentRegisters *cregs = &cxl_cstate->crb;
> > 
> >      switch (size) {
> > -    case 4:
> > +    case 4: {
> >          QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > 
> >          if (offset == A_CXL_BI_RT_STATUS ||
> > @@ -101,6 +101,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
> >          }
> > 
> >          return cregs->cache_mem_registers[offset / 4];
> > +    }
> >      case 8:
> >          qemu_log_mask(LOG_UNIMP,
> >                        "CXL 8 byte cache mem registers not implemented\n");
> > 
> >   
> > >     
> > > >       
> > > > > ---
> > > > > v6: Initialize type variable to avoid a false compiler warning
> > > > >     from some versions of gcc. (Michael Tsirkin)
> > > > > ---
> > > > >  docs/system/devices/cxl.rst         |  23 +++++
> > > > >  include/hw/cxl/cxl_component.h      |  54 ++++++++++-
> > > > >  include/hw/cxl/cxl_device.h         |   3 +
> > > > >  hw/cxl/cxl-component-utils.c        | 142 ++++++++++++++++++++++++++--
> > > > >  hw/mem/cxl_type3.c                  |   9 +-
> > > > >  hw/pci-bridge/cxl_downstream.c      |   2 +-
> > > > >  hw/pci-bridge/cxl_root_port.c       |   3 +-
> > > > >  hw/pci-bridge/cxl_upstream.c        |   2 +-
> > > > >  hw/pci-bridge/pci_expander_bridge.c |   2 +-
> > > > >  9 files changed, 225 insertions(+), 15 deletions(-)
> > > > > 
> > > > > diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
> > > > > index ca15a0da1c1d..9d0771cdfd73 100644
> > > > > --- a/docs/system/devices/cxl.rst
> > > > > +++ b/docs/system/devices/cxl.rst
> > > > > @@ -384,6 +384,29 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave::
> > > > >    -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,sn=0x4 \
> > > > >    -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > > > >  
> > > > > +An example of 4 type3 devices with volatile memory below a switch. Two of the devices
> > > > > +use HDM-DB for coherence, which requires operating in Flit mode::
> > > > > +
> > > > > +  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
> > > > > +  ...
> > > > > +  -object memory-backend-ram,id=cxl-mem0,share=on,size=256M \
> > > > > +  -object memory-backend-ram,id=cxl-mem1,share=on,size=256M \
> > > > > +  -object memory-backend-ram,id=cxl-mem2,share=on,size=256M \
> > > > > +  -object memory-backend-ram,id=cxl-mem3,share=on,size=256M \
> > > > > +  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
> > > > > +  -device cxl-rp,port=0,bus=cxl.1,id=root_port0,chassis=0,slot=0 \
> > > > > +  -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \
> > > > > +  -device cxl-upstream,bus=root_port0,id=us0,x-256b-flit=on \
> > > > > +  -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \
> > > > > +  -device cxl-type3,bus=swport0,volatile-memdev=cxl-mem0,id=cxl-mem0,sn=0x1,x-256b-flit=on,hdm-db=on \
> > > > > +  -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \
> > > > > +  -device cxl-type3,bus=swport1,volatile-memdev=cxl-mem1,id=cxl-mem1,sn=0x2,x-256b-flit=on,hdm-db=on \
> > > > > +  -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \
> > > > > +  -device cxl-type3,bus=swport2,volatile-memdev=cxl-mem2,id=cxl-mem2,sn=0x3 \
> > > > > +  -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \
> > > > > +  -device cxl-type3,bus=swport3,volatile-memdev=cxl-mem3,id=cxl-mem3,sn=0x4 \
> > > > > +  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k
> > > > > +
> > > > >  A simple arm/virt example featuring a single direct connected CXL Type 3
> > > > >  Volatile Memory device::
> > > > >  
> > > > > diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> > > > > index 1f167d3ef79b..ffc82202206c 100644
> > > > > --- a/include/hw/cxl/cxl_component.h
> > > > > +++ b/include/hw/cxl/cxl_component.h
> > > > > @@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
> > > > >  CXLx_CAPABILITY_HEADER(HDM, 3)
> > > > >  CXLx_CAPABILITY_HEADER(EXTSEC, 4)
> > > > >  CXLx_CAPABILITY_HEADER(SNOOP, 5)
> > > > > +CXLx_CAPABILITY_HEADER(BI_RT, 6)
> > > > > +CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
> > > > >  
> > > > >  /*
> > > > >   * Capability structures contain the actual registers that the CXL component
> > > > > @@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
> > > > >      (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
> > > > >  #define CXL_SNOOP_REGISTERS_SIZE   0x8
> > > > >  
> > > > > -QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
> > > > > -                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
> > > > > +#define CXL_BI_RT_CAP_VERSION 1
> > > > > +#define CXL_BI_RT_REGISTERS_OFFSET \
> > > > > +    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
> > > > > +#define CXL_BI_RT_REGISTERS_SIZE   0xC
> > > > > +
> > > > > +REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
> > > > > +    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
> > > > > +REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
> > > > > +    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
> > > > > +REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
> > > > > +    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
> > > > > +    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > > > +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > > > +
> > > > > +/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
> > > > > +#define CXL_BI_DECODER_CAP_VERSION 1
> > > > > +#define CXL_BI_DECODER_REGISTERS_OFFSET \
> > > > > +    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
> > > > > +#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
> > > > > +
> > > > > +REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
> > > > > +    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
> > > > > +    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
> > > > > +REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
> > > > > +    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
> > > > > +    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
> > > > > +    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
> > > > > +REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
> > > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
> > > > > +    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
> > > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
> > > > > +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
> > > > > +
> > > > > +QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
> > > > > +                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
> > > > >                     "No space for registers");
> > > > >  
> > > > > +/* track BI explicit commit handling for route table and decoder */
> > > > > +enum {
> > > > > +    CXL_BISTATE_RT = 0,
> > > > > +    CXL_BISTATE_DECODER,
> > > > > +    CXL_BISTATE_MAX
> > > > > +};
> > > > > +
> > > > > +typedef struct bi_state {
> > > > > +    uint64_t last_commit;  /* last 0->1 transition */
> > > > > +} BIState;
> > > > > +
> > > > >  typedef struct component_registers {
> > > > >      /*
> > > > >       * Main memory region to be registered with QEMU core.
> > > > > @@ -259,6 +306,7 @@ typedef struct cxl_component {
> > > > >      };
> > > > >  
> > > > >      CDATObject cdat;
> > > > > +    BIState bi_state[CXL_BISTATE_MAX];
> > > > >  } CXLComponentState;
> > > > >  
> > > > >  void cxl_component_register_block_init(Object *obj,
> > > > > @@ -266,7 +314,7 @@ void cxl_component_register_block_init(Object *obj,
> > > > >                                         const char *type);
> > > > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > > > >                                          uint32_t *write_msk,
> > > > > -                                        enum reg_type type);
> > > > > +                                        enum reg_type type, bool bi);
> > > > >  
> > > > >  void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
> > > > >                                  enum reg_type cxl_dev_type, uint16_t length,
> > > > > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> > > > > index 7d9236db8c85..393f3122173b 100644
> > > > > --- a/include/hw/cxl/cxl_device.h
> > > > > +++ b/include/hw/cxl/cxl_device.h
> > > > > @@ -770,6 +770,9 @@ struct CXLType3Dev {
> > > > >      CXLMemSparingReadAttrs rank_sparing_attrs;
> > > > >      CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
> > > > >  
> > > > > +    /* BI flows */
> > > > > +    bool hdmdb;
> > > > > +
> > > > >      struct dynamic_capacity {
> > > > >          HostMemoryBackend *host_dc;
> > > > >          AddressSpace host_dc_as;
> > > > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> > > > > index 91770f103a85..a7d36e1128c2 100644
> > > > > --- a/hw/cxl/cxl-component-utils.c
> > > > > +++ b/hw/cxl/cxl-component-utils.c
> > > > > @@ -71,10 +71,40 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
> > > > >      case 4:
> > > > >          if (cregs->special_ops && cregs->special_ops->read) {
> > > > >              return cregs->special_ops->read(cxl_cstate, offset, 4);
> > > > > -        } else {
> > > > > -            QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > > > -            return cregs->cache_mem_registers[offset / 4];
> > > > >          }
> > > > > +
> > > > > +        QEMU_BUILD_BUG_ON(sizeof(*cregs->cache_mem_registers) != 4);
> > > > > +
> > > > > +        if (offset == A_CXL_BI_RT_STATUS ||
> > > > > +            offset == A_CXL_BI_DECODER_STATUS) {
> > > > > +            int type;
> > > > > +            uint64_t started;
> > > > > +
> > > > > +            type = (offset == A_CXL_BI_RT_STATUS) ?
> > > > > +                    CXL_BISTATE_RT : CXL_BISTATE_DECODER;
> > > > > +            started = cxl_cstate->bi_state[type].last_commit;
> > > > > +
> > > > > +            if (started) {
> > > > > +                uint32_t *cache_mem = cregs->cache_mem_registers;
> > > > > +                uint32_t val = cache_mem[offset / 4];
> > > > > +                uint64_t now;
> > > > > +                int set;
> > > > > +
> > > > > +                now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > > > +                /* arbitrary 100 ms to do the commit */
> > > > > +                set = !!(now >= started + 100);
> > > > > +
> > > > > +                if (offset == A_CXL_BI_RT_STATUS) {
> > > > > +                    val = FIELD_DP32(val, CXL_BI_RT_STATUS, COMMITTED, set);
> > > > > +                } else {
> > > > > +                    val = FIELD_DP32(val, CXL_BI_DECODER_STATUS, COMMITTED,
> > > > > +                                     set);
> > > > > +                }
> > > > > +                stl_le_p((uint8_t *)cache_mem + offset, val);
> > > > > +            }
> > > > > +        }
> > > > > +
> > > > > +        return cregs->cache_mem_registers[offset / 4];
> > > > >      case 8:
> > > > >          qemu_log_mask(LOG_UNIMP,
> > > > >                        "CXL 8 byte cache mem registers not implemented\n");
> > > > > @@ -118,6 +148,47 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > > > >      stl_le_p((uint8_t *)cache_mem + offset, value);
> > > > >  }
> > > > >  
> > > > > +static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> > > > > +                            uint32_t value)
> > > > > +{
> > > > > +    ComponentRegisters *cregs = &cxl_cstate->crb;
> > > > > +    uint32_t sts, *cache_mem = cregs->cache_mem_registers;
> > > > > +    bool to_commit = false;
> > > > > +    int type = 0; /* Unused value - work around for compiler warning */
> > > > > +
> > > > > +    switch (offset) {
> > > > > +    case A_CXL_BI_RT_CTRL:
> > > > > +        to_commit = FIELD_EX32(value, CXL_BI_RT_CTRL, COMMIT);
> > > > > +        if (to_commit) {
> > > > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > > > +                                         R_CXL_BI_RT_STATUS, 4);
> > > > > +            sts = FIELD_DP32(sts, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_RT_STATUS, sts);
> > > > > +            type = CXL_BISTATE_RT;
> > > > > +        }
> > > > > +        break;
> > > > > +    case A_CXL_BI_DECODER_CTRL:
> > > > > +        to_commit = FIELD_EX32(value, CXL_BI_DECODER_CTRL, COMMIT);
> > > > > +        if (to_commit) {
> > > > > +            sts = cxl_cache_mem_read_reg(cxl_cstate,
> > > > > +                                         R_CXL_BI_DECODER_STATUS, 4);
> > > > > +            sts = FIELD_DP32(sts, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > > > +            stl_le_p((uint8_t *)cache_mem + R_CXL_BI_DECODER_STATUS, sts);
> > > > > +            type = CXL_BISTATE_DECODER;
> > > > > +        }
> > > > > +        break;
> > > > > +    default:
> > > > > +        break;
> > > > > +    }
> > > > > +
> > > > > +    if (to_commit) {
> > > > > +        cxl_cstate->bi_state[type].last_commit =
> > > > > +                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > > > > +    }
> > > > > +
> > > > > +    stl_le_p((uint8_t *)cache_mem + offset, value);
> > > > > +}
> > > > > +
> > > > >  static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > > > >                                      unsigned size)
> > > > >  {
> > > > > @@ -141,6 +212,9 @@ static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
> > > > >          if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
> > > > >              offset <= A_CXL_HDM_DECODER3_TARGET_LIST_HI) {
> > > > >              dumb_hdm_handler(cxl_cstate, offset, value);
> > > > > +        } else if (offset == A_CXL_BI_RT_CTRL ||
> > > > > +                   offset == A_CXL_BI_DECODER_CTRL) {
> > > > > +            bi_handler(cxl_cstate, offset, value);
> > > > >          } else {
> > > > >              cregs->cache_mem_registers[offset / 4] = value;
> > > > >          }
> > > > > @@ -230,7 +304,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > > > >  }
> > > > >  
> > > > >  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > > -                            enum reg_type type)
> > > > > +                            enum reg_type type, bool bi)
> > > > >  {
> > > > >      int decoder_count = CXL_HDM_DECODER_COUNT;
> > > > >      int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
> > > > > @@ -255,7 +329,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > >                       UIO_DECODER_COUNT, 0);
> > > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 0);
> > > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
> > > > > -                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
> > > > > +                     SUPPORTED_COHERENCY_MODEL,
> > > > > +                     /* host+dev or Unknown */
> > > > > +                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
> > > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
> > > > >                       HDM_DECODER_ENABLE, 0);
> > > > >      write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
> > > > > @@ -278,9 +354,43 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > >      }
> > > > >  }
> > > > >  
> > > > > +static void bi_rt_init_common(uint32_t *reg_state, uint32_t *write_msk)
> > > > > +{
> > > > > +    /* switch usp must commit the new BI-ID, timeout of 2secs */
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 1);
> > > > > +
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_CTRL, COMMIT, 0);
> > > > > +    write_msk[R_CXL_BI_RT_CTRL] = 0x1;
> > > > > +
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMITTED, 0);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 0);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 0x2);
> > > > > +}
> > > > > +
> > > > > +static void bi_decoder_init_common(uint32_t *reg_state, uint32_t *write_msk,
> > > > > +                                   enum reg_type type)
> > > > > +{
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, HDM_D, 0);
> > > > > +    /* switch dsp must commit the new BI-ID, timeout of 2secs */
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT,
> > > > > +                     (type != CXL2_ROOT_PORT && type != CXL2_TYPE3_DEVICE));
> > > > > +
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_FW, 0);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, BI_ENABLE, 0);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_CTRL, COMMIT, 0);
> > > > > +    write_msk[R_CXL_BI_DECODER_CTRL] = 0x7;
> > > > > +
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMITTED, 0);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 0);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 0x6);
> > > > > +    ARRAY_FIELD_DP32(reg_state, CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 0x2);
> > > > > +}
> > > > > +
> > > > >  void cxl_component_register_init_common(uint32_t *reg_state,
> > > > >                                          uint32_t *write_msk,
> > > > > -                                        enum reg_type type)
> > > > > +                                        enum reg_type type,
> > > > > +                                        bool bi)
> > > > >  {
> > > > >      int caps = 0;
> > > > >  
> > > > > @@ -320,7 +430,7 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > > > >      case CXL2_LOGICAL_DEVICE:
> > > > >          /* + HDM */
> > > > >          init_cap_reg(HDM, 5, 1);
> > > > > -        hdm_init_common(reg_state, write_msk, type);
> > > > > +        hdm_init_common(reg_state, write_msk, type, bi);
> > > > >          /* fallthrough */
> > > > >      case CXL2_DOWNSTREAM_PORT:
> > > > >      case CXL2_DEVICE:
> > > > > @@ -335,6 +445,24 @@ void cxl_component_register_init_common(uint32_t *reg_state,
> > > > >          abort();
> > > > >      }
> > > > >  
> > > > > +    /* back invalidate */
> > > > > +    if (bi) {
> > > > > +        switch (type) {
> > > > > +        case CXL2_UPSTREAM_PORT:
> > > > > +            init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
> > > > > +            bi_rt_init_common(reg_state, write_msk);
> > > > > +            break;
> > > > > +        case CXL2_ROOT_PORT:
> > > > > +        case CXL2_DOWNSTREAM_PORT:
> > > > > +        case CXL2_TYPE3_DEVICE:
> > > > > +            init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
> > > > > +            bi_decoder_init_common(reg_state, write_msk, type);
> > > > > +            break;
> > > > > +        default:
> > > > > +            break;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > >      ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
> > > > >  #undef init_cap_reg
> > > > >  }
> > > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > > index 3c7ecd8c48bc..3f09c589ae58 100644
> > > > > --- a/hw/mem/cxl_type3.c
> > > > > +++ b/hw/mem/cxl_type3.c
> > > > > @@ -748,6 +748,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > > >          return false;
> > > > >      }
> > > > >  
> > > > > +    if (!ct3d->flitmode && ct3d->hdmdb) {
> > > > > +        error_setg(errp, "hdm-db requires operating in 256b flit");
> > > > > +        return false;
> > > > > +    }
> > > > > +
> > > > >      if (ct3d->hostvmem) {
> > > > >          MemoryRegion *vmr;
> > > > >          char *v_name;
> > > > > @@ -1317,7 +1322,8 @@ static void ct3d_reset(DeviceState *dev)
> > > > >  
> > > > >      pcie_cap_fill_link_ep_usp(PCI_DEVICE(dev), ct3d->width, ct3d->speed,
> > > > >                                ct3d->flitmode);
> > > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
> > > > > +    cxl_component_register_init_common(reg_state, write_msk,
> > > > > +                                       CXL2_TYPE3_DEVICE, ct3d->hdmdb);
> > > > >      cxl_device_register_init_t3(ct3d, CXL_T3_MSIX_MBOX);
> > > > >  
> > > > >      /*
> > > > > @@ -1356,6 +1362,7 @@ static const Property ct3_props[] = {
> > > > >      DEFINE_PROP_PCIE_LINK_WIDTH("x-width", CXLType3Dev,
> > > > >                                  width, PCIE_LINK_WIDTH_16),
> > > > >      DEFINE_PROP_BOOL("x-256b-flit", CXLType3Dev, flitmode, false),
> > > > > +    DEFINE_PROP_BOOL("hdm-db", CXLType3Dev, hdmdb, false),
> > > > >  };
> > > > >  
> > > > >  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
> > > > > diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
> > > > > index 64086d8ec2f2..320818a8f1ce 100644
> > > > > --- a/hw/pci-bridge/cxl_downstream.c
> > > > > +++ b/hw/pci-bridge/cxl_downstream.c
> > > > > @@ -39,7 +39,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
> > > > >      uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > > >  
> > > > >      cxl_component_register_init_common(reg_state, write_msk,
> > > > > -                                       CXL2_DOWNSTREAM_PORT);
> > > > > +                                       CXL2_DOWNSTREAM_PORT, true);
> > > > >  }
> > > > >  
> > > > >  /* TODO: Look at sharing this code across all CXL port types */
> > > > > diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
> > > > > index 5641048084a4..e2093ac39ee6 100644
> > > > > --- a/hw/pci-bridge/cxl_root_port.c
> > > > > +++ b/hw/pci-bridge/cxl_root_port.c
> > > > > @@ -101,7 +101,8 @@ static void latch_registers(CXLRootPort *crp)
> > > > >      uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
> > > > >      uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > > >  
> > > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> > > > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT,
> > > > > +                                       true);
> > > > >  }
> > > > >  
> > > > >  static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
> > > > > diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> > > > > index c352d11dc7b7..fb8d19539c9f 100644
> > > > > --- a/hw/pci-bridge/cxl_upstream.c
> > > > > +++ b/hw/pci-bridge/cxl_upstream.c
> > > > > @@ -90,7 +90,7 @@ static void latch_registers(CXLUpstreamPort *usp)
> > > > >      uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
> > > > >  
> > > > >      cxl_component_register_init_common(reg_state, write_msk,
> > > > > -                                       CXL2_UPSTREAM_PORT);
> > > > > +                                       CXL2_UPSTREAM_PORT, usp->flitmode);
> > > > >      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
> > > > >  }
> > > > >  
> > > > > diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
> > > > > index b6e2eb796951..11623a5666f6 100644
> > > > > --- a/hw/pci-bridge/pci_expander_bridge.c
> > > > > +++ b/hw/pci-bridge/pci_expander_bridge.c
> > > > > @@ -300,7 +300,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev)
> > > > >      uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
> > > > >      int dsp_count = 0;
> > > > >  
> > > > > -    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC);
> > > > > +    cxl_component_register_init_common(reg_state, write_msk, CXL2_RC, false);
> > > > >      /*
> > > > >       * The CXL specification allows for host bridges with no HDM decoders
> > > > >       * if they only have a single root port.
> > > > > -- 
> > > > > 2.51.0      
> > > 
> > >     
> > 
> >   
>