[PATCH v2 11/20] vfio/cxl: Introduce HDM decoder register emulation framework

mhonap@nvidia.com posted 20 patches 6 hours ago
[PATCH v2 11/20] vfio/cxl: Introduce HDM decoder register emulation framework
Posted by mhonap@nvidia.com 5 hours ago
From: Manish Honap <mhonap@nvidia.com>

Add HDM decoder register emulation for CXL devices assigned to a guest.

New file vfio_cxl_emu.c allocates comp_reg_virt[] covering the full
component register block (CXL_COMPONENT_REG_BLOCK_SIZE), snapshots it
from MMIO after probe, and registers a VFIO device region
(VFIO_REGION_SUBTYPE_CXL_COMP_REGS) with read/write ops but no mmap,
so every access hits the emulated buffer and write dispatchers.

vfio_cxl_setup_virt_regs() is called from the tail of
vfio_cxl_setup_regs(); vfio_cxl_clean_virt_regs() runs on cleanup.

HDM decoder register defines come from include/uapi/cxl/cxl_regs.h.
Bits with no hardware equivalent stay in vfio_cxl_priv.h.

hdm_decoder_n_ctrl_write() allows the guest to clear the LOCK bit.
A firmware-committed decoder arrives with LOCK=1; the guest driver
must clear it before reprogramming BASE and SIZE with the VM's GPA.
Such a write clears the bit in the shadow while preserving all other
fields.

Co-developed-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
 drivers/vfio/pci/Makefile            |   2 +-
 drivers/vfio/pci/cxl/vfio_cxl_core.c |   5 +
 drivers/vfio/pci/cxl/vfio_cxl_emu.c  | 433 +++++++++++++++++++++++++++
 drivers/vfio/pci/cxl/vfio_cxl_priv.h |  47 +++
 include/uapi/cxl/cxl_regs.h          |   5 +
 5 files changed, 491 insertions(+), 1 deletion(-)
 create mode 100644 drivers/vfio/pci/cxl/vfio_cxl_emu.c

diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index ecb0eacbc089..bef916495eae 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
-vfio-pci-core-$(CONFIG_VFIO_CXL_CORE) += cxl/vfio_cxl_core.o
+vfio-pci-core-$(CONFIG_VFIO_CXL_CORE) += cxl/vfio_cxl_core.o cxl/vfio_cxl_emu.o
 vfio-pci-core-$(CONFIG_VFIO_PCI_ZDEV_KVM) += vfio_pci_zdev.o
 vfio-pci-core-$(CONFIG_VFIO_PCI_DMABUF) += vfio_pci_dmabuf.o
 obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
diff --git a/drivers/vfio/pci/cxl/vfio_cxl_core.c b/drivers/vfio/pci/cxl/vfio_cxl_core.c
index b1c7603590b5..0b9e4419cd47 100644
--- a/drivers/vfio/pci/cxl/vfio_cxl_core.c
+++ b/drivers/vfio/pci/cxl/vfio_cxl_core.c
@@ -149,8 +149,11 @@ static int vfio_cxl_setup_regs(struct vfio_pci_core_device *vdev,
 	cxl->comp_reg_offset = bar_offset;
 	cxl->comp_reg_size = CXL_COMPONENT_REG_BLOCK_SIZE;
 
+	ret = vfio_cxl_setup_virt_regs(vdev, cxl, base);
 	iounmap(base);
 	release_mem_region(map->resource, map->max_size);
+	if (ret)
+		return ret;
 
 	return 0;
 
@@ -253,6 +256,8 @@ void vfio_pci_cxl_cleanup(struct vfio_pci_core_device *vdev)
 
 	if (!cxl)
 		return;
+
+	vfio_cxl_clean_virt_regs(cxl);
 }
 
 MODULE_IMPORT_NS("CXL");
diff --git a/drivers/vfio/pci/cxl/vfio_cxl_emu.c b/drivers/vfio/pci/cxl/vfio_cxl_emu.c
new file mode 100644
index 000000000000..6fb02253e631
--- /dev/null
+++ b/drivers/vfio/pci/cxl/vfio_cxl_emu.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/bitops.h>
+#include <linux/vfio_pci_core.h>
+
+#include "../vfio_pci_priv.h"
+#include "vfio_cxl_priv.h"
+
+/*
+ * comp_reg_virt[] shadow layout:
+ *   Covers the full CXL.mem register area (starting at CXL_CM_OFFSET
+ *   within the component register block).  Index 0 is the CXL Capability
+ *   Array Header; the HDM decoder block starts at index
+ *   hdm_reg_offset / sizeof(__le32).
+ *
+ * Register layout within the HDM block (CXL spec 4.0 8.2.4.20 CXL HDM Decoder
+ * Capability Structure):
+ *   0x00: HDM Decoder Capability
+ *   0x04: HDM Decoder Global Control
+ *   0x08: (reserved)
+ *   0x0c: (reserved)
+ *   For each decoder N (N=0..hdm_count-1), at base 0x10 + N*0x20:
+ *     +0x00: BASE_LO
+ *     +0x04: BASE_HI
+ *     +0x08: SIZE_LO
+ *     +0x0c: SIZE_HI
+ *     +0x10: CTRL
+ *     +0x14: TARGET_LIST_LO
+ *     +0x18: TARGET_LIST_HI
+ *     +0x1c: (reserved)
+ */
+
+static inline __le32 *hdm_reg_ptr(struct vfio_pci_cxl_state *cxl, u32 hdm_off)
+{
+	/*
+	 * hdm_off is a byte offset within the HDM decoder block.
+	 * comp_reg_virt covers the CXL.mem register area starting at
+	 * CXL_CM_OFFSET within the component register block.
+	 * hdm_reg_offset is CXL.mem-relative, so adding hdm_reg_offset
+	 * gives the correct index into comp_reg_virt[].
+	 */
+	return &cxl->comp_reg_virt[(cxl->hdm_reg_offset + hdm_off) /
+				   sizeof(__le32)];
+}
+
+static ssize_t virt_hdm_rev_reg_write(struct vfio_pci_core_device *vdev,
+				      const __le32 *val32, u64 offset, u64 size)
+{
+	/* Discard writes on reserved registers. */
+	return size;
+}
+
+static ssize_t hdm_decoder_n_lo_write(struct vfio_pci_core_device *vdev,
+				      const __le32 *val32, u64 offset, u64 size)
+{
+	u32 new_val = le32_to_cpu(*val32);
+
+	if (WARN_ON_ONCE(size != CXL_REG_SIZE_DWORD))
+		return -EINVAL;
+
+	/* Bits [27:0] are reserved. */
+	new_val &= ~CXL_HDM_DECODER_BASE_LO_RESERVED_MASK;
+
+	*hdm_reg_ptr(vdev->cxl, offset) = cpu_to_le32(new_val);
+
+	return size;
+}
+
+static ssize_t hdm_decoder_global_ctrl_write(struct vfio_pci_core_device *vdev,
+					     const __le32 *val32, u64 size)
+{
+	u32 hdm_gcap;
+	u32 new_val = le32_to_cpu(*val32);
+
+	if (WARN_ON_ONCE(size != CXL_REG_SIZE_DWORD))
+		return -EINVAL;
+
+	/* Bit [31:2] are reserved. */
+	new_val &= ~CXL_HDM_DECODER_GLOBAL_CTRL_RESERVED_MASK;
+
+	/* Poison On Decode Error Enable (bit 0) is RO=0 if not supported. */
+	hdm_gcap = le32_to_cpu(*hdm_reg_ptr(vdev->cxl,
+					    CXL_HDM_DECODER_CAP_OFFSET));
+	if (!(hdm_gcap & CXL_HDM_DECODER_POISON_ON_DECODE_ERR))
+		new_val &= ~CXL_HDM_DECODER_GLOBAL_CTRL_POISON_EN_BIT;
+
+	*hdm_reg_ptr(vdev->cxl, CXL_HDM_DECODER_CTRL_OFFSET) =
+		cpu_to_le32(new_val);
+
+	return size;
+}
+
+/**
+ * hdm_decoder_n_ctrl_write - Write handler for HDM decoder CTRL register.
+ * @vdev:   VFIO PCI core device
+ * @val32:  New register value supplied by userspace (little-endian)
+ * @offset: Byte offset within the HDM block for this decoder's CTRL register
+ * @size:   Access size in bytes; must equal CXL_REG_SIZE_DWORD
+ *
+ * The COMMIT bit (bit 9) is the key: setting it requests the hardware to
+ * lock the decoder.  The emulated COMMITTED bit (bit 10) mirrors COMMIT
+ * immediately to allow QEMU's notify_change to detect the transition and
+ * map/unmap the DPA MemoryRegion in the guest address space.
+ *
+ * Note: the actual hardware HDM decoder programming (writing the real
+ * BASE/SIZE with host physical addresses) happens in the QEMU notify_change
+ * callback BEFORE this write reaches the hardware.  This ordering is
+ * correct because vfio_region_write() calls notify_change() first.
+ *
+ * Return: @size on success, %-EINVAL if @size is not %CXL_REG_SIZE_DWORD.
+ */
+static ssize_t hdm_decoder_n_ctrl_write(struct vfio_pci_core_device *vdev,
+					const __le32 *val32, u64 offset, u64 size)
+{
+	u32 hdm_gcap;
+	u32 ro_mask = CXL_HDM_DECODER_CTRL_RO_BITS_MASK;
+	u32 rev_mask = CXL_HDM_DECODER_CTRL_RESERVED_MASK;
+	u32 new_val = le32_to_cpu(*val32);
+	u32 cur_val;
+
+	if (WARN_ON_ONCE(size != CXL_REG_SIZE_DWORD))
+		return -EINVAL;
+
+	cur_val = le32_to_cpu(*hdm_reg_ptr(vdev->cxl, offset));
+	if (cur_val & CXL_HDM_DECODER0_CTRL_LOCK) {
+		if (new_val & CXL_HDM_DECODER0_CTRL_LOCK)
+			return size;
+
+		/* LOCK_0 only: preserve all other bits, clear LOCK */
+		*hdm_reg_ptr(vdev->cxl, offset) = cpu_to_le32(
+			cur_val & ~CXL_HDM_DECODER0_CTRL_LOCK);
+		return size;
+	}
+
+	hdm_gcap = le32_to_cpu(*hdm_reg_ptr(vdev->cxl,
+					    CXL_HDM_DECODER_CAP_OFFSET));
+	ro_mask |= CXL_HDM_DECODER_CTRL_DEVICE_BITS_RO;
+	rev_mask |= CXL_HDM_DECODER_CTRL_DEVICE_RESERVED;
+
+	if (!(hdm_gcap & CXL_HDM_DECODER_UIO_CAPABLE))
+		rev_mask |= CXL_HDM_DECODER_CTRL_UIO_RESERVED;
+
+	new_val &= ~rev_mask;
+	cur_val &= ro_mask;
+	new_val = (new_val & ~ro_mask) | cur_val;
+
+	/*
+	 * Mirror COMMIT to COMMITTED immediately in the emulated state.
+	 */
+	if (new_val & CXL_HDM_DECODER0_CTRL_COMMIT)
+		new_val |= CXL_HDM_DECODER0_CTRL_COMMITTED;
+	else
+		new_val &= ~CXL_HDM_DECODER0_CTRL_COMMITTED;
+
+	*hdm_reg_ptr(vdev->cxl, offset) = cpu_to_le32(new_val);
+
+	return size;
+}
+
+/*
+ * Dispatch table for COMP_REGS region writes. Indexed by byte offset within
+ * the HDM decoder block. Returns the appropriate write handler.
+ *
+ * Layout:
+ *   0x00	  HDM Decoder Capability  (RO)
+ *   0x04	  HDM Global Control	  (RW with reserved masking)
+ *   0x08-0x0f	  (reserved)		  (ignored)
+ *   Per decoder N, base = 0x10 + N*0x20:
+ *     base+0x00  BASE_LO  (RW, [27:0] reserved)
+ *     base+0x04  BASE_HI  (RW)
+ *     base+0x08  SIZE_LO  (RW, [27:0] reserved)
+ *     base+0x0c  SIZE_HI  (RW)
+ *     base+0x10  CTRL	   (RW, complex rules)
+ *     base+0x14  TARGET_LIST_LO  (ignored for Type-2)
+ *     base+0x18  TARGET_LIST_HI  (ignored for Type-2)
+ *     base+0x1c  (reserved)	 (ignored)
+ */
+static ssize_t comp_regs_dispatch_write(struct vfio_pci_core_device *vdev,
+					u32 off, const __le32 *val32, u32 size)
+{
+	struct vfio_pci_cxl_state *cxl = vdev->cxl;
+	u32 dec_base, dec_off;
+
+	/* HDM Decoder Capability (0x00): RO */
+	if (off == CXL_HDM_DECODER_CAP_OFFSET)
+		return size;
+
+	/* HDM Global Control (0x04) */
+	if (off == CXL_HDM_DECODER_CTRL_OFFSET)
+		return hdm_decoder_global_ctrl_write(vdev, val32, size);
+
+	/*
+	 * Offsets 0x08-0x0f are reserved per CXL 4.0 Table 8-115.
+	 * Per-decoder registers start at 0x10, stride 0x20
+	 */
+	if (off < CXL_HDM_DECODER_FIRST_BLOCK_OFFSET)
+		return size; /* reserved gap */
+
+	dec_base = CXL_HDM_DECODER_FIRST_BLOCK_OFFSET;
+	/*
+	 * Reject accesses beyond the last implemented HDM decoder.
+	 * Without this check an out-of-bounds offset would silently
+	 * corrupt comp_reg_virt[] memory past the end of the allocation.
+	 */
+	if ((off - dec_base) / CXL_HDM_DECODER_BLOCK_STRIDE >= cxl->hdm_count)
+		return size;
+
+	dec_off = (off - dec_base) % CXL_HDM_DECODER_BLOCK_STRIDE;
+
+	switch (dec_off) {
+	case CXL_HDM_DECODER_N_BASE_LOW_OFFSET:	 /* BASE_LO */
+	case CXL_HDM_DECODER_N_SIZE_LOW_OFFSET:	 /* SIZE_LO */
+		return hdm_decoder_n_lo_write(vdev, val32, off, size);
+	case CXL_HDM_DECODER_N_BASE_HIGH_OFFSET: /* BASE_HI */
+	case CXL_HDM_DECODER_N_SIZE_HIGH_OFFSET: /* SIZE_HI */
+	{
+		/* Full 32-bit write, no reserved bits; frozen when COMMIT_LOCK set */
+		u32 ctrl_off = off - dec_off + CXL_HDM_DECODER_N_CTRL_OFFSET;
+		u32 ctrl = le32_to_cpu(*hdm_reg_ptr(cxl, ctrl_off));
+
+		if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
+			return size;
+		*hdm_reg_ptr(cxl, off) = *val32;
+		return size;
+	}
+	case CXL_HDM_DECODER_N_CTRL_OFFSET:	  /* CTRL */
+		return hdm_decoder_n_ctrl_write(vdev, val32, off, size);
+	case CXL_HDM_DECODER_N_TARGET_LIST_LOW_OFFSET:
+	case CXL_HDM_DECODER_N_TARGET_LIST_HIGH_OFFSET:
+	case CXL_HDM_DECODER_N_REV_OFFSET:
+		return virt_hdm_rev_reg_write(vdev, val32, off, size);
+	default:
+		return size;
+	}
+}
+
+/*
+ * vfio_cxl_comp_regs_rw - regops rw handler for
+ * VFIO_REGION_SUBTYPE_CXL_COMP_REGS.
+ *
+ * Reads return the emulated HDM state (comp_reg_virt[]).
+ * Writes go through comp_regs_dispatch_write() for bit-field enforcement.
+ * Only 4-byte aligned 4-byte accesses are supported (hardware requirement).
+ */
+static ssize_t vfio_cxl_comp_regs_rw(struct vfio_pci_core_device *vdev,
+				     char __user *buf, size_t count,
+				     loff_t *ppos, bool iswrite)
+{
+	struct vfio_pci_cxl_state *cxl = vdev->cxl;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	size_t done = 0;
+
+	if (!count)
+		return 0;
+
+	/* Clamp to total region size: cap array prefix + HDM block */
+	if (pos >= cxl->hdm_reg_offset + cxl->hdm_reg_size)
+		return -EINVAL;
+	count = min(count,
+		    (size_t)(cxl->hdm_reg_offset + cxl->hdm_reg_size - pos));
+
+	while (done < count) {
+		u32 sz	 = count - done;
+		u32 off	 = pos + done;
+		__le32 v;
+
+		/* Enforce exactly 4-byte, 4-byte-aligned accesses */
+		if (sz != CXL_REG_SIZE_DWORD || (off & 0x3))
+			return done ? (ssize_t)done : -EINVAL;
+
+		if (iswrite) {
+			if (off < cxl->hdm_reg_offset) {
+				/* Cap array area is read-only; discard writes */
+				done += sizeof(v);
+				continue;
+			}
+			if (copy_from_user(&v, buf + done, sizeof(v)))
+				return done ? (ssize_t)done : -EFAULT;
+			comp_regs_dispatch_write(vdev,
+						 off - cxl->hdm_reg_offset,
+						 &v, sizeof(v));
+		} else {
+			/* Read from extended buffer _ covers cap array and HDM */
+			v = cxl->comp_reg_virt[off / sizeof(__le32)];
+			if (copy_to_user(buf + done, &v, sizeof(v)))
+				return done ? (ssize_t)done : -EFAULT;
+		}
+		done += sizeof(v);
+	}
+
+	*ppos += done;
+	return done;
+}
+
+static void vfio_cxl_comp_regs_release(struct vfio_pci_core_device *vdev,
+				       struct vfio_pci_region *region)
+{
+	/* comp_reg_virt is freed in vfio_cxl_clean_virt_regs() */
+}
+
+static const struct vfio_pci_regops vfio_cxl_comp_regs_ops = {
+	.rw	 = vfio_cxl_comp_regs_rw,
+	.release = vfio_cxl_comp_regs_release,
+};
+
+/*
+ * vfio_cxl_setup_virt_regs - Allocate emulated HDM register state.
+ *
+ * Allocates comp_reg_virt as a compact __le32 array covering only
+ * hdm_reg_size bytes of HDM decoder registers. The initial values
+ * are read from hardware via the BAR ioremap established by the caller.
+ *
+ * DVSEC state is accessed via vdev->vconfig (see the following patch).
+ */
+int vfio_cxl_setup_virt_regs(struct vfio_pci_core_device *vdev,
+			     struct vfio_pci_cxl_state *cxl,
+			     void __iomem *cap_base)
+{
+	size_t total_size, nregs, i;
+
+	if (WARN_ON(!cxl->hdm_reg_size))
+		return -EINVAL;
+
+	total_size = cxl->hdm_reg_offset + cxl->hdm_reg_size;
+
+	if (pci_resource_len(vdev->pdev, cxl->comp_reg_bar) <
+	    cxl->comp_reg_offset + CXL_CM_OFFSET + total_size)
+		return -ENODEV;
+
+	nregs = total_size / sizeof(__le32);
+	cxl->comp_reg_virt = kcalloc(nregs, sizeof(__le32), GFP_KERNEL);
+	if (!cxl->comp_reg_virt)
+		return -ENOMEM;
+
+	/*
+	 * Snapshot the CXL.mem register area from the caller's mapping.
+	 * cap_base maps the component register block from comp_reg_offset.
+	 * The CXL.mem registers start at CXL_CM_OFFSET (= 0x1000) within that
+	 * block; reading from cap_base + CXL_CM_OFFSET ensures comp_reg_virt[0]
+	 * holds the CXL Capability Array Header required by guest drivers.
+	 */
+	for (i = 0; i < nregs; i++)
+		cxl->comp_reg_virt[i] =
+			cpu_to_le32(readl(cap_base + CXL_CM_OFFSET +
+					  i * sizeof(__le32)));
+
+	/*
+	 * Establish persistent mapping; kept alive until
+	 * vfio_cxl_clean_virt_regs().
+	 */
+	cxl->hdm_iobase = ioremap(pci_resource_start(vdev->pdev,
+						     cxl->comp_reg_bar) +
+				  cxl->comp_reg_offset + CXL_CM_OFFSET +
+				  cxl->hdm_reg_offset,
+				  cxl->hdm_reg_size);
+	if (!cxl->hdm_iobase) {
+		kfree(cxl->comp_reg_virt);
+		cxl->comp_reg_virt = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Called with memory_lock write side held (from vfio_cxl_reactivate_region).
+ * Uses the pre-established hdm_iobase, no ioremap() under the lock,
+ * which would deadlock on PREEMPT_RT where ioremap() can sleep.
+ */
+void vfio_cxl_reinit_comp_regs(struct vfio_pci_cxl_state *cxl)
+{
+	size_t i, nregs;
+	u32 n;
+
+	if (!cxl || !cxl->comp_reg_virt || !cxl->hdm_iobase)
+		return;
+
+	nregs = cxl->hdm_reg_size / sizeof(__le32);
+
+	for (i = 0; i < nregs; i++)
+		*hdm_reg_ptr(cxl, i * sizeof(__le32)) =
+			cpu_to_le32(readl(cxl->hdm_iobase +
+					  i * sizeof(__le32)));
+
+	/*
+	 * For firmware-committed decoders, clear COMMIT_LOCK (bit 8) and zero
+	 * BASE in comp_reg_virt[] so QEMU can write the correct guest GPA via
+	 * setup_locked_hdm() before guest DPA access begins.
+	 *
+	 * Check the COMMITTED bit (bit 10) directly from the freshly-snapshotted
+	 * ctrl register rather than relying on cxl->precommitted.  At probe time
+	 * this function is called before cxl->precommitted is set (it is set
+	 * after vfio_cxl_read_committed_decoder_size() succeeds), so using
+	 * cxl->precommitted here would silently skip the LOCK clearing and leave
+	 * the hardware HPA in comp_reg_virt[].
+	 */
+	for (n = 0; n < cxl->hdm_count; n++) {
+		u32 ctrl_off = CXL_HDM_DECODER_FIRST_BLOCK_OFFSET +
+			n * CXL_HDM_DECODER_BLOCK_STRIDE +
+			CXL_HDM_DECODER_N_CTRL_OFFSET;
+		u32 base_lo_off = CXL_HDM_DECODER_FIRST_BLOCK_OFFSET +
+			n * CXL_HDM_DECODER_BLOCK_STRIDE +
+			CXL_HDM_DECODER_N_BASE_LOW_OFFSET;
+		u32 base_hi_off = CXL_HDM_DECODER_FIRST_BLOCK_OFFSET +
+			n * CXL_HDM_DECODER_BLOCK_STRIDE +
+			CXL_HDM_DECODER_N_BASE_HIGH_OFFSET;
+		u32 ctrl = le32_to_cpu(*hdm_reg_ptr(cxl, ctrl_off));
+
+		if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED))
+			continue;
+
+		if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK) {
+			*hdm_reg_ptr(cxl, ctrl_off) =
+				cpu_to_le32(ctrl &
+					    ~CXL_HDM_DECODER0_CTRL_LOCK);
+			*hdm_reg_ptr(cxl, base_lo_off) = 0;
+			*hdm_reg_ptr(cxl, base_hi_off) = 0;
+		}
+	}
+}
+
+void vfio_cxl_clean_virt_regs(struct vfio_pci_cxl_state *cxl)
+{
+	if (cxl->hdm_iobase) {
+		iounmap(cxl->hdm_iobase);
+		cxl->hdm_iobase = NULL;
+	}
+	kfree(cxl->comp_reg_virt);
+	cxl->comp_reg_virt = NULL;
+}
diff --git a/drivers/vfio/pci/cxl/vfio_cxl_priv.h b/drivers/vfio/pci/cxl/vfio_cxl_priv.h
index 54b1f6d885aa..463a55062144 100644
--- a/drivers/vfio/pci/cxl/vfio_cxl_priv.h
+++ b/drivers/vfio/pci/cxl/vfio_cxl_priv.h
@@ -21,12 +21,53 @@ struct vfio_pci_cxl_state {
 	size_t                       hdm_reg_size;
 	resource_size_t              comp_reg_offset;
 	size_t                       comp_reg_size;
+	__le32                      *comp_reg_virt;
+	void __iomem                *hdm_iobase;
 	u16                          dvsec_len;
 	u8                           hdm_count;
 	u8                           comp_reg_bar;
 	bool                         cache_capable;
 };
 
+/* Register access sizes */
+#define CXL_REG_SIZE_WORD  2
+#define CXL_REG_SIZE_DWORD 4
+
+/* HDM Decoder - register offsets (CXL 4.0 Table 8-115) */
+#define CXL_HDM_DECODER_GLOBAL_CTRL_OFFSET        0x4
+#define CXL_HDM_DECODER_FIRST_BLOCK_OFFSET        0x10
+#define CXL_HDM_DECODER_BLOCK_STRIDE              0x20
+#define CXL_HDM_DECODER_N_BASE_LOW_OFFSET         0x0
+#define CXL_HDM_DECODER_N_BASE_HIGH_OFFSET        0x4
+#define CXL_HDM_DECODER_N_SIZE_LOW_OFFSET         0x8
+#define CXL_HDM_DECODER_N_SIZE_HIGH_OFFSET        0xc
+#define CXL_HDM_DECODER_N_CTRL_OFFSET             0x10
+#define CXL_HDM_DECODER_N_TARGET_LIST_LOW_OFFSET  0x14
+#define CXL_HDM_DECODER_N_TARGET_LIST_HIGH_OFFSET 0x18
+#define CXL_HDM_DECODER_N_REV_OFFSET              0x1c
+
+/*
+ * HDM Decoder N Control emulation masks.
+ *
+ * Single-bit hardware definitions are in <uapi/cxl/cxl_regs.h> as
+ * CXL_HDM_DECODER0_CTRL_* (bits 0-14) and CXL_HDM_DECODER_*_CAP.
+ * The masks below express emulation policy for a CXL.mem device.
+ */
+#define CXL_HDM_DECODER_CTRL_RO_BITS_MASK    (BIT(10) | BIT(11))
+#define CXL_HDM_DECODER_CTRL_RESERVED_MASK   (BIT(15) | GENMASK(31, 28))
+#define CXL_HDM_DECODER_CTRL_DEVICE_BITS_RO  BIT(12)
+#define CXL_HDM_DECODER_CTRL_DEVICE_RESERVED (GENMASK(19, 16) | GENMASK(23, 20))
+#define CXL_HDM_DECODER_CTRL_UIO_RESERVED    (BIT(14) | GENMASK(27, 24))
+/*
+ * bit 13 (BI) is RsvdP for devices without CXL.cache (Cache_Capable=0).
+ * HDM-D (CXL.mem only) decoders must not have BI set by the guest.
+ */
+#define CXL_HDM_DECODER_CTRL_BI_RESERVED          BIT(13)
+#define CXL_HDM_DECODER_BASE_LO_RESERVED_MASK     GENMASK(27, 0)
+
+#define CXL_HDM_DECODER_GLOBAL_CTRL_RESERVED_MASK GENMASK(31, 2)
+#define CXL_HDM_DECODER_GLOBAL_CTRL_POISON_EN_BIT BIT(0)
+
 /*
  * CXL DVSEC for CXL Devices - register offsets within the DVSEC
  * (CXL 4.0 8.1.3).
@@ -37,4 +78,10 @@ struct vfio_pci_cxl_state {
 /* CXL DVSEC Capability register bit 0: device supports CXL.cache (HDM-DB) */
 #define CXL_DVSEC_CACHE_CAPABLE	    BIT(0)
 
+int vfio_cxl_setup_virt_regs(struct vfio_pci_core_device *vdev,
+			     struct vfio_pci_cxl_state *cxl,
+			     void __iomem *cap_base);
+void vfio_cxl_clean_virt_regs(struct vfio_pci_cxl_state *cxl);
+void vfio_cxl_reinit_comp_regs(struct vfio_pci_cxl_state *cxl);
+
 #endif /* __LINUX_VFIO_CXL_PRIV_H */
diff --git a/include/uapi/cxl/cxl_regs.h b/include/uapi/cxl/cxl_regs.h
index 1a48a3805f52..b6fcae91d216 100644
--- a/include/uapi/cxl/cxl_regs.h
+++ b/include/uapi/cxl/cxl_regs.h
@@ -33,8 +33,13 @@
 #define   CXL_HDM_DECODER_TARGET_COUNT_MASK __GENMASK(7, 4)
 #define   CXL_HDM_DECODER_INTERLEAVE_11_8 _BITUL(8)
 #define   CXL_HDM_DECODER_INTERLEAVE_14_12 _BITUL(9)
+#define   CXL_HDM_DECODER_POISON_ON_DECODE_ERR _BITUL(10)
 #define   CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY _BITUL(11)
 #define   CXL_HDM_DECODER_INTERLEAVE_16_WAY _BITUL(12)
+#define   CXL_HDM_DECODER_UIO_CAPABLE _BITUL(13)
+#define   CXL_HDM_DECODER_UIO_COUNT_MASK __GENMASK(19, 16)
+#define   CXL_HDM_DECODER_MEMDATA_NXM _BITUL(20)
+#define   CXL_HDM_DECODER_COHERENCY_MODELS_MASK    __GENMASK(22, 21)
 #define CXL_HDM_DECODER_CTRL_OFFSET 0x4
 #define   CXL_HDM_DECODER_ENABLE _BITUL(1)
 #define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
-- 
2.25.1