[PATCH 16/20] vfio/pci: Expose CXL device and region info via VFIO ioctl

mhonap@nvidia.com posted 20 patches 3 weeks, 5 days ago
There is a newer version of this series
[PATCH 16/20] vfio/pci: Expose CXL device and region info via VFIO ioctl
Posted by mhonap@nvidia.com 3 weeks, 5 days ago
From: Manish Honap <mhonap@nvidia.com>

Expose CXL device capability information through the VFIO device info
ioctl and hide the CXL component BAR from direct userspace access via
the standard region info path.

Add vfio_cxl_get_info() which fills a VFIO_DEVICE_INFO_CAP_CXL
capability structure with HDM register location, DPA size, commit
flags, and the region indices of the two CXL VFIO device regions (DPA
and COMP_REGS) so userspace does not need to scan all regions.

Add vfio_cxl_get_region_info() which intercepts BAR queries for the
component register BAR and returns size=0 to hide it, directing
userspace to use VFIO_REGION_SUBTYPE_CXL_COMP_REGS instead.

Hook both helpers into vfio_pci_ioctl_get_info() and
vfio_pci_ioctl_get_region_info() in vfio_pci_core.c.

The CXL component register BAR contains the HDM decoder MMIO registers.
Userspace must use the VFIO_REGION_SUBTYPE_CXL_COMP_REGS emulated region
instead of directly mapping or reading/writing this BAR, to ensure that
all accesses go through the emulation layer for correct bit-field
enforcement.

Reject mmap(), barmap setup, and BAR r/w for the CXL component BAR
index in vfio_pci_core_mmap(), vfio_pci_core_setup_barmap(), and
vfio_pci_bar_rw() respectively.

Signed-off-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Manish Honap <mhonap@nvidia.com>
---
 drivers/vfio/pci/cxl/vfio_cxl_core.c | 84 ++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_core.c     | 16 ++++++
 drivers/vfio/pci/vfio_pci_priv.h     | 19 +++++++
 drivers/vfio/pci/vfio_pci_rdwr.c     |  8 +++
 4 files changed, 127 insertions(+)

diff --git a/drivers/vfio/pci/cxl/vfio_cxl_core.c b/drivers/vfio/pci/cxl/vfio_cxl_core.c
index e18e992800f6..bda11f99746f 100644
--- a/drivers/vfio/pci/cxl/vfio_cxl_core.c
+++ b/drivers/vfio/pci/cxl/vfio_cxl_core.c
@@ -18,6 +18,90 @@
 
 MODULE_IMPORT_NS("CXL");
 
+u8 vfio_cxl_get_component_reg_bar(struct vfio_pci_core_device *vdev)
+{
+	return vdev->cxl->comp_reg_bar;
+}
+
+int vfio_cxl_get_region_info(struct vfio_pci_core_device *vdev,
+			     struct vfio_region_info *info,
+			     struct vfio_info_cap *caps)
+{
+	unsigned long minsz = offsetofend(struct vfio_region_info, offset);
+	struct vfio_pci_cxl_state *cxl = vdev->cxl;
+
+	if (!cxl)
+		return -ENOTTY;
+
+	if (!info)
+		return -ENOTTY;
+
+	if (info->argsz < minsz)
+		return -EINVAL;
+
+	if (info->index != cxl->comp_reg_bar)
+		return -ENOTTY;
+
+	/*
+	 * Hide the component BAR for CXL. Report size 0 so userspace
+	 * uses only the VFIO_REGION_SUBTYPE_CXL_COMP_REGS device region
+	 * for BAR MMIO (HDM) emulation.
+	 */
+	info->argsz = sizeof(*info);
+	info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
+	info->size = 0;
+	info->flags = 0;
+	info->cap_offset = 0;
+
+	return 0;
+}
+
+int vfio_cxl_get_info(struct vfio_pci_core_device *vdev,
+		      struct vfio_info_cap *caps)
+{
+	struct vfio_pci_cxl_state *cxl = vdev->cxl;
+	struct vfio_device_info_cap_cxl cxl_cap = {0};
+
+	if (!cxl)
+		return 0;
+
+	/*
+	 * Region indices are set at open time after
+	 * vfio_pci_core_register_dev_region() succeeds.  If either is still
+	 * -1, the device is not yet fully initialised; return EAGAIN so
+	 * userspace knows to retry rather than receiving 0xFFFFFFFF.
+	 */
+	if (cxl->dpa_region_idx < 0 || cxl->comp_reg_region_idx < 0)
+		return -EAGAIN;
+
+	/* Fill in from CXL device structure */
+	cxl_cap.header.id = VFIO_DEVICE_INFO_CAP_CXL;
+	cxl_cap.header.version = 1;
+	cxl_cap.hdm_count = cxl->hdm_count;
+	cxl_cap.hdm_regs_offset = cxl->comp_reg_offset + cxl->hdm_reg_offset;
+	cxl_cap.hdm_regs_size = cxl->hdm_reg_size;
+	cxl_cap.hdm_regs_bar_index = cxl->comp_reg_bar;
+	cxl_cap.dpa_size = cxl->dpa_size;
+
+	if (cxl->precommitted) {
+		cxl_cap.flags |= VFIO_CXL_CAP_COMMITTED |
+			VFIO_CXL_CAP_PRECOMMITTED;
+	}
+
+	/*
+         * Populate absolute VFIO region indices so userspace can query them
+         * directly with VFIO_DEVICE_GET_REGION_INFO.  Custom device regions
+         * live at VFIO_PCI_NUM_REGIONS + local_idx (see vfio_pci_core.c:999).
+         * dpa_region_idx / comp_reg_region_idx are 0-based local indices, so
+         * add VFIO_PCI_NUM_REGIONS to get the index VFIO_DEVICE_GET_REGION_INFO
+         * expects.
+         */
+        cxl_cap.dpa_region_index = VFIO_PCI_NUM_REGIONS + cxl->dpa_region_idx;
+        cxl_cap.comp_regs_region_index = VFIO_PCI_NUM_REGIONS + cxl->comp_reg_region_idx;
+
+	return vfio_info_add_capability(caps, &cxl_cap.header, sizeof(cxl_cap));
+}
+
 static int vfio_cxl_create_device_state(struct vfio_pci_core_device *vdev,
 					u16 dvsec)
 {
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 48e0274c19aa..5352e7810fed 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -989,6 +989,13 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
 	if (vdev->reset_works)
 		info.flags |= VFIO_DEVICE_FLAGS_RESET;
 
+	if (vdev->cxl) {
+		ret = vfio_cxl_get_info(vdev, &caps);
+		if (ret)
+			return ret;
+		info.flags |= VFIO_DEVICE_FLAGS_CXL;
+	}
+
 	info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
 	info.num_irqs = VFIO_PCI_NUM_IRQS;
 
@@ -1034,6 +1041,12 @@ int vfio_pci_ioctl_get_region_info(struct vfio_device *core_vdev,
 	struct pci_dev *pdev = vdev->pdev;
 	int i, ret;
 
+	if (vdev->cxl) {
+		ret = vfio_cxl_get_region_info(vdev, info, caps);
+		if (ret != -ENOTTY)
+			return ret;
+	}
+
 	switch (info->index) {
 	case VFIO_PCI_CONFIG_REGION_INDEX:
 		info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
@@ -1756,6 +1769,9 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
 	}
 	if (index >= VFIO_PCI_ROM_REGION_INDEX)
 		return -EINVAL;
+	/* Reject mmap of CXL component BAR; use COMP_REGS region only. */
+	if (vdev->cxl && index == vfio_cxl_get_component_reg_bar(vdev))
+		return -EINVAL;
 	if (!vdev->bar_mmap_supported[index])
 		return -EINVAL;
 
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index d778107fa908..c1befe7d028d 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -156,6 +156,13 @@ int  vfio_cxl_register_comp_regs_region(struct vfio_pci_core_device *vdev);
 void vfio_cxl_reinit_comp_regs(struct vfio_pci_core_device *vdev);
 void vfio_cxl_setup_dvsec_perms(struct vfio_pci_core_device *vdev);
 
+int vfio_cxl_get_info(struct vfio_pci_core_device *vdev,
+		      struct vfio_info_cap *caps);
+int vfio_cxl_get_region_info(struct vfio_pci_core_device *vdev,
+			     struct vfio_region_info *info,
+			     struct vfio_info_cap *caps);
+u8 vfio_cxl_get_component_reg_bar(struct vfio_pci_core_device *vdev);
+
 #else
 
 static inline void
@@ -183,6 +190,18 @@ static inline void
 vfio_cxl_reinit_comp_regs(struct vfio_pci_core_device *vdev) { }
 static inline void
 vfio_cxl_setup_dvsec_perms(struct vfio_pci_core_device *vdev) { }
+static inline int
+vfio_cxl_get_info(struct vfio_pci_core_device *vdev,
+		  struct vfio_info_cap *caps)
+{ return -ENOTTY; }
+static inline int
+vfio_cxl_get_region_info(struct vfio_pci_core_device *vdev,
+			 struct vfio_region_info *info,
+			 struct vfio_info_cap *caps)
+{ return -ENOTTY; }
+static inline u8
+vfio_cxl_get_component_reg_bar(struct vfio_pci_core_device *vdev)
+{ return U8_MAX; }
 
 #endif /* CONFIG_VFIO_CXL_CORE */
 
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index b38627b35c35..4f1f4882265a 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -207,6 +207,10 @@ int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
 	if (vdev->barmap[bar])
 		return 0;
 
+	/* Do not map the CXL component BAR; use COMP_REGS region only. */
+	if (vdev->cxl && bar == vfio_cxl_get_component_reg_bar(vdev))
+		return -EINVAL;
+
 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
 	if (ret)
 		return ret;
@@ -236,6 +240,10 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 	ssize_t done;
 	enum vfio_pci_io_width max_width = VFIO_PCI_IO_WIDTH_8;
 
+	/* Reject BAR r/w for CXL component BAR; use COMP_REGS region only. */
+	if (vdev->cxl && bar == vfio_cxl_get_component_reg_bar(vdev))
+		return -EINVAL;
+
 	if (pci_resource_start(pdev, bar))
 		end = pci_resource_len(pdev, bar);
 	else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen)
-- 
2.25.1