[PATCH v2 16/20] vfio/cxl: Register regions with VFIO layer

mhonap@nvidia.com posted 20 patches 6 hours ago
[PATCH v2 16/20] vfio/cxl: Register regions with VFIO layer
Posted by mhonap@nvidia.com 6 hours ago
From: Manish Honap <mhonap@nvidia.com>

Register the DPA and component register region with VFIO layer.
Region indices for both these regions are cached for quick lookup.

vfio_cxl_register_cxl_region()
- memremap(WB) the region HPA (treat CXL.mem as RAM, not MMIO)
- Register VFIO_REGION_SUBTYPE_CXL
- Records dpa_region_idx.

vfio_cxl_register_comp_regs_region()
- Registers VFIO_REGION_SUBTYPE_CXL_COMP_REGS with size
  hdm_reg_offset + hdm_reg_size
- Records comp_reg_region_idx.

Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
 drivers/vfio/pci/cxl/vfio_cxl_core.c | 98 +++++++++++++++++++++++++++-
 drivers/vfio/pci/cxl/vfio_cxl_emu.c  | 34 ++++++++++
 drivers/vfio/pci/cxl/vfio_cxl_priv.h |  2 +
 drivers/vfio/pci/vfio_pci.c          | 23 +++++++
 drivers/vfio/pci/vfio_pci_priv.h     | 11 ++++
 5 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/cxl/vfio_cxl_core.c b/drivers/vfio/pci/cxl/vfio_cxl_core.c
index a3ff90b7a22c..b38a04301660 100644
--- a/drivers/vfio/pci/cxl/vfio_cxl_core.c
+++ b/drivers/vfio/pci/cxl/vfio_cxl_core.c
@@ -75,6 +75,8 @@ vfio_cxl_create_device_state(struct pci_dev *pdev, u16 dvsec)
 	}
 
 	cxl->cache_capable = FIELD_GET(CXL_DVSEC_CAP_CACHE_CAPABLE, cap_word);
+	cxl->dpa_region_idx = -1;
+	cxl->comp_reg_region_idx = -1;
 
 	return cxl;
 }
@@ -509,14 +511,19 @@ static int vfio_cxl_region_mmap(struct vfio_pci_core_device *vdev,
  */
 void vfio_cxl_zap_region_locked(struct vfio_pci_core_device *vdev)
 {
+	struct vfio_device *core_vdev = &vdev->vdev;
 	struct vfio_pci_cxl_state *cxl = vdev->cxl;
 
 	lockdep_assert_held_write(&vdev->memory_lock);
 
-	if (!cxl)
+	if (!cxl || cxl->dpa_region_idx < 0)
 		return;
 
 	WRITE_ONCE(cxl->region_active, false);
+	unmap_mapping_range(core_vdev->inode->i_mapping,
+			    VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_NUM_REGIONS +
+						     cxl->dpa_region_idx),
+			    cxl->region_size, true);
 }
 
 /*
@@ -601,6 +608,7 @@ static ssize_t vfio_cxl_region_rw(struct vfio_pci_core_device *core_dev,
 static void vfio_cxl_region_release(struct vfio_pci_core_device *vdev,
 				    struct vfio_pci_region *region)
 {
+	struct vfio_device *core_vdev = &vdev->vdev;
 	struct vfio_pci_cxl_state *cxl = region->data;
 
 	/*
@@ -610,6 +618,16 @@ static void vfio_cxl_region_release(struct vfio_pci_core_device *vdev,
 	 */
 	WRITE_ONCE(cxl->region_active, false);
 
+	/*
+	 * Remove all user mappings of the DPA region while the device is
+	 * still alive.
+	 */
+	if (cxl->dpa_region_idx >= 0)
+		unmap_mapping_range(core_vdev->inode->i_mapping,
+			    VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_NUM_REGIONS +
+						     cxl->dpa_region_idx),
+				    cxl->region_size, true);
+
 	if (cxl->region_vaddr) {
 		memunmap(cxl->region_vaddr);
 		cxl->region_vaddr = NULL;
@@ -622,4 +640,82 @@ static const struct vfio_pci_regops vfio_cxl_regops = {
 	.release	= vfio_cxl_region_release,
 };
 
+int vfio_cxl_register_cxl_region(struct vfio_pci_core_device *vdev)
+{
+	struct vfio_pci_cxl_state *cxl = vdev->cxl;
+	u32 flags;
+	int ret;
+
+	if (!cxl)
+		return -ENODEV;
+
+	if (!cxl->region || cxl->region_vaddr)
+		return -ENODEV;
+
+	/*
+	 * CXL device memory is RAM, not MMIO.  Use memremap() rather than
+	 * ioremap_cache() so the correct memory-mapping API is used.
+	 * The WB attribute matches the cache-coherent nature of CXL.mem.
+	 */
+	cxl->region_vaddr = memremap(cxl->region_hpa, cxl->region_size,
+				     MEMREMAP_WB);
+	if (!cxl->region_vaddr)
+		return -ENOMEM;
+
+	flags = VFIO_REGION_INFO_FLAG_READ |
+		VFIO_REGION_INFO_FLAG_WRITE |
+		VFIO_REGION_INFO_FLAG_MMAP;
+
+	ret = vfio_pci_core_register_dev_region(vdev,
+						PCI_VENDOR_ID_CXL |
+						VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+						VFIO_REGION_SUBTYPE_CXL,
+						&vfio_cxl_regops,
+						cxl->region_size, flags,
+						cxl);
+	if (ret) {
+		memunmap(cxl->region_vaddr);
+		cxl->region_vaddr = NULL;
+		return ret;
+	}
+
+	/*
+	 * Cache the vdev->region[] index before activating the region.
+	 * vfio_pci_core_register_dev_region() placed the new entry at
+	 * vdev->region[num_regions - 1] and incremented num_regions.
+	 * vfio_cxl_zap_region_locked() uses this to avoid scanning
+	 * vdev->region[] on every FLR.
+	 */
+	cxl->dpa_region_idx = vdev->num_regions - 1;
+
+	vfio_cxl_reinit_comp_regs(cxl);
+
+	WRITE_ONCE(cxl->region_active, true);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_register_cxl_region);
+
+/**
+ * vfio_cxl_unregister_cxl_region - Undo vfio_cxl_register_cxl_region()
+ * @vdev: VFIO PCI device
+ *
+ * Marks the DPA region inactive and resets dpa_region_idx.
+ * Does NOT touch CXL subsystem state (cxl->region, cxl->cxled, cxl->cxlrd).
+ * The caller must call vfio_cxl_destroy_cxl_region() separately to release
+ * those objects.
+ */
+void vfio_cxl_unregister_cxl_region(struct vfio_pci_core_device *vdev)
+{
+	struct vfio_pci_cxl_state *cxl = vdev->cxl;
+
+	if (!cxl || cxl->dpa_region_idx < 0)
+		return;
+
+	WRITE_ONCE(cxl->region_active, false);
+
+	cxl->dpa_region_idx = -1;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_unregister_cxl_region);
+
 MODULE_IMPORT_NS("CXL");
diff --git a/drivers/vfio/pci/cxl/vfio_cxl_emu.c b/drivers/vfio/pci/cxl/vfio_cxl_emu.c
index 781328a79b43..50d3718b101d 100644
--- a/drivers/vfio/pci/cxl/vfio_cxl_emu.c
+++ b/drivers/vfio/pci/cxl/vfio_cxl_emu.c
@@ -473,3 +473,37 @@ void vfio_cxl_clean_virt_regs(struct vfio_pci_cxl_state *cxl)
 	kfree(cxl->comp_reg_virt);
 	cxl->comp_reg_virt = NULL;
 }
+
+/*
+ * vfio_cxl_register_comp_regs_region - Register the COMP_REGS device region.
+ *
+ * Exposes the emulated HDM decoder register state as a VFIO device region
+ * with type VFIO_REGION_SUBTYPE_CXL_COMP_REGS.	 QEMU attaches a
+ * notify_change callback to this region to intercept HDM COMMIT writes
+ * and map the DPA MemoryRegion at the appropriate GPA.
+ *
+ * The region is read+write only (no mmap) to ensure all accesses pass
+ * through comp_regs_dispatch_write() for proper bit-field enforcement.
+ */
+int vfio_cxl_register_comp_regs_region(struct vfio_pci_core_device *vdev)
+{
+	struct vfio_pci_cxl_state *cxl = vdev->cxl;
+	u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE;
+	int ret;
+
+	if (!cxl || !cxl->comp_reg_virt)
+		return -ENODEV;
+
+	ret = vfio_pci_core_register_dev_region(vdev,
+						PCI_VENDOR_ID_CXL |
+						VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+						VFIO_REGION_SUBTYPE_CXL_COMP_REGS,
+						&vfio_cxl_comp_regs_ops,
+						cxl->hdm_reg_offset +
+						cxl->hdm_reg_size, flags, cxl);
+	if (!ret)
+		cxl->comp_reg_region_idx = vdev->num_regions - 1;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vfio_cxl_register_comp_regs_region);
diff --git a/drivers/vfio/pci/cxl/vfio_cxl_priv.h b/drivers/vfio/pci/cxl/vfio_cxl_priv.h
index b86ee691d050..b884689a1226 100644
--- a/drivers/vfio/pci/cxl/vfio_cxl_priv.h
+++ b/drivers/vfio/pci/cxl/vfio_cxl_priv.h
@@ -28,6 +28,8 @@ struct vfio_pci_cxl_state {
 	__le32                      *comp_reg_virt;
 	size_t                       dpa_size;
 	void __iomem                *hdm_iobase;
+	int                          dpa_region_idx;
+	int                          comp_reg_region_idx;
 	u16                          dvsec_len;
 	u8                           hdm_count;
 	u8                           comp_reg_bar;
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 0c771064c0b8..22cf9ea831f9 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -120,6 +120,29 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev)
 		}
 	}
 
+	if (vdev->cxl) {
+		/*
+		 * pci_config_map and vconfig are valid now (allocated by
+		 * vfio_config_init() inside vfio_pci_core_enable() above).
+		 */
+		vfio_cxl_setup_dvsec_perms(vdev);
+
+		ret = vfio_cxl_register_cxl_region(vdev);
+		if (ret) {
+			pci_warn(pdev, "Failed to setup CXL region\n");
+			vfio_pci_core_disable(vdev);
+			return ret;
+		}
+
+		ret = vfio_cxl_register_comp_regs_region(vdev);
+		if (ret) {
+			pci_warn(pdev, "Failed to register COMP_REGS region\n");
+			vfio_cxl_unregister_cxl_region(vdev);
+			vfio_pci_core_disable(vdev);
+			return ret;
+		}
+	}
+
 	vfio_pci_core_finish_enable(vdev);
 
 	return 0;
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 96f8361ce6f3..ae0091d5096c 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -148,6 +148,9 @@ void vfio_pci_cxl_cleanup(struct vfio_pci_core_device *vdev);
 void vfio_cxl_zap_region_locked(struct vfio_pci_core_device *vdev);
 void vfio_cxl_reactivate_region(struct vfio_pci_core_device *vdev);
 void vfio_cxl_setup_dvsec_perms(struct vfio_pci_core_device *vdev);
+int vfio_cxl_register_cxl_region(struct vfio_pci_core_device *vdev);
+void vfio_cxl_unregister_cxl_region(struct vfio_pci_core_device *vdev);
+int  vfio_cxl_register_comp_regs_region(struct vfio_pci_core_device *vdev);
 
 #else
 
@@ -161,6 +164,14 @@ static inline void
 vfio_cxl_reactivate_region(struct vfio_pci_core_device *vdev) { }
 static inline void
 vfio_cxl_setup_dvsec_perms(struct vfio_pci_core_device *vdev) { }
+static inline int
+vfio_cxl_register_cxl_region(struct vfio_pci_core_device *vdev)
+{ return 0; }
+static inline void
+vfio_cxl_unregister_cxl_region(struct vfio_pci_core_device *vdev) { }
+static inline int
+vfio_cxl_register_comp_regs_region(struct vfio_pci_core_device *vdev)
+{ return 0; }
 
 #endif /* CONFIG_VFIO_CXL_CORE */
 
-- 
2.25.1