[RFC PATCH 3/7] vfio/pci: add RCU locking for regions access

Mahmoud Adam posted 7 patches 1 week ago
[RFC PATCH 3/7] vfio/pci: add RCU locking for regions access
Posted by Mahmoud Adam 1 week ago
Since we could request to add more regions after initialization. We
would need locking to avoid racing with readers and cause UAF. use RCU
for read-write synchronization. And region_lock mutex is used to
synchronize the write section.

Changing the value of num_regions is done under the mutex. Since the
num_regions can only increase, using READ_ONCE and WRITE_ONCE should
be enough to make sure we have a valid value. On the write section,
synchronize_rcu() is run before incrementing num_regions. Doing that
makes sure read sections are passed before increasing num_regions to
avoid causing out-of-bound access.

Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
---
 drivers/vfio/pci/vfio_pci_core.c | 59 +++++++++++++++++++++++---------
 drivers/vfio/pci/vfio_pci_igd.c  | 16 ++++++---
 include/linux/vfio_pci_core.h    |  1 +
 3 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 6629490c0e46f..78e18bfd973e5 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -882,7 +882,8 @@ static int msix_mmappable_cap(struct vfio_pci_core_device *vdev,
 }
 
 /*
- * Registers a new region to vfio_pci_core_device.
+ * Registers a new region to vfio_pci_core_device. region_lock should
+ * be held when multiple registers could happen.
  * Returns region index on success or a negative errno.
  */
 int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
@@ -890,15 +891,20 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
 				      const struct vfio_pci_regops *ops,
 				      size_t size, u32 flags, void *data)
 {
-	int num_regions = vdev->num_regions;
 	struct vfio_pci_region *region, *old_region;
+	int num_regions;
+
+	mutex_lock(&vdev->region_lock);
+	num_regions = READ_ONCE(vdev->num_regions);
 
 	region = kmalloc((num_regions + 1) * sizeof(*region),
 			 GFP_KERNEL_ACCOUNT);
 	if (!region)
 		return -ENOMEM;
 
-	old_region = vdev->region;
+	old_region =
+		rcu_dereference_protected(vdev->region,
+					  lockdep_is_held(&vdev->region_lock));
 	if (old_region)
 		memcpy(region, old_region, num_regions * sizeof(*region));
 
@@ -909,8 +915,10 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
 	region[num_regions].flags = flags;
 	region[num_regions].data = data;
 
-	vdev->region = region;
-	vdev->num_regions++;
+	rcu_assign_pointer(vdev->region, region);
+	synchronize_rcu();
+	WRITE_ONCE(vdev->num_regions, READ_ONCE(vdev->num_regions) + 1);
+	mutex_unlock(&vdev->region_lock);
 	kfree(old_region);
 	return num_regions;
 }
@@ -968,7 +976,7 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
 	if (vdev->reset_works)
 		info.flags |= VFIO_DEVICE_FLAGS_RESET;
 
-	info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
+	info.num_regions = VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions);
 	info.num_irqs = VFIO_PCI_NUM_IRQS;
 
 	ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
@@ -1094,13 +1102,16 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
 			.header.version = 1
 		};
 
-		if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+		if (info.index >= VFIO_PCI_NUM_REGIONS +
+					  READ_ONCE(vdev->num_regions))
 			return -EINVAL;
-		info.index = array_index_nospec(
-			info.index, VFIO_PCI_NUM_REGIONS + vdev->num_regions);
+		info.index = array_index_nospec(info.index,
+						VFIO_PCI_NUM_REGIONS +
+						READ_ONCE(vdev->num_regions));
 
 		i = info.index - VFIO_PCI_NUM_REGIONS;
-		region = &vdev->region[i];
+		rcu_read_lock();
+		region = &rcu_dereference(vdev->region)[i];
 
 		info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
 		info.size = region->size;
@@ -1111,15 +1122,20 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
 
 		ret = vfio_info_add_capability(&caps, &cap_type.header,
 					       sizeof(cap_type));
-		if (ret)
+		if (ret) {
+			rcu_read_unlock();
 			return ret;
+		}
 
 		if (region->ops->add_capability) {
 			ret = region->ops->add_capability(
 				vdev, region, &caps);
-			if (ret)
+			if (ret) {
+				rcu_read_unlock();
 				return ret;
+			}
 		}
+		rcu_read_unlock();
 	}
 	}
 
@@ -1536,7 +1552,7 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 	int ret;
 
-	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+	if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions))
 		return -EINVAL;
 
 	ret = pm_runtime_resume_and_get(&vdev->pdev->dev);
@@ -1568,8 +1584,11 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 
 	default:
 		index -= VFIO_PCI_NUM_REGIONS;
-		ret = vdev->region[index].ops->rw(vdev, buf,
-						   count, ppos, iswrite);
+		rcu_read_lock();
+		ret = rcu_dereference(vdev->region)[index].ops->rw(vdev, buf,
+								   count, ppos,
+								   iswrite);
+		rcu_read_unlock();
 		break;
 	}
 
@@ -1726,7 +1745,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
 
 	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
 
-	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+	if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions))
 		return -EINVAL;
 	if (vma->vm_end < vma->vm_start)
 		return -EINVAL;
@@ -1734,12 +1753,16 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
 		return -EINVAL;
 	if (index >= VFIO_PCI_NUM_REGIONS) {
 		int regnum = index - VFIO_PCI_NUM_REGIONS;
-		struct vfio_pci_region *region = vdev->region + regnum;
+		struct vfio_pci_region *region;
+
+		rcu_read_lock();
+		region = rcu_dereference(vdev->region) + regnum;
 
 		ret = -EINVAL;
 		if (region->ops && region->ops->mmap &&
 		    (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
 			ret = region->ops->mmap(vdev, region, vma);
+		rcu_read_unlock();
 		return ret;
 	}
 	if (index >= VFIO_PCI_ROM_REGION_INDEX)
@@ -2107,6 +2130,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
 	INIT_LIST_HEAD(&vdev->sriov_pfs_item);
 	init_rwsem(&vdev->memory_lock);
 	xa_init(&vdev->ctx);
+	mutex_init(&vdev->region_lock);
 
 	return 0;
 }
@@ -2119,6 +2143,7 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
 
 	mutex_destroy(&vdev->igate);
 	mutex_destroy(&vdev->ioeventfds_lock);
+	mutex_destroy(&vdev->region_lock);
 	kfree(vdev->region);
 	kfree(vdev->pm_save);
 }
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 93ddef48e4e4c..1f7e9e82ac08c 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -71,13 +71,17 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
 	struct vfio_pci_region *region;
 	struct igd_opregion_vbt *opregionvbt;
 
-	region = &vdev->region[i];
+	rcu_read_lock();
+	region = &rcu_dereference(vdev->region)[i];
 	opregionvbt = region->data;
 
-	if (pos >= region->size || iswrite)
+	if (pos >= region->size || iswrite) {
+		rcu_read_unlock();
 		return -EINVAL;
+	}
 
 	count = min_t(size_t, count, region->size - pos);
+	rcu_read_unlock();
 	remaining = count;
 
 	/* Copy until OpRegion version */
@@ -293,13 +297,17 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
 	struct vfio_pci_region *region;
 	struct pci_dev *pdev;
 
-	region = &vdev->region[i];
+	rcu_read_lock();
+	region = &rcu_dereference(vdev->region)[i];
 	pdev = region->data;
 
-	if (pos >= region->size || iswrite)
+	if (pos >= region->size || iswrite) {
+		rcu_read_unlock();
 		return -EINVAL;
+	}
 
 	size = count = min(count, (size_t)(region->size - pos));
+	rcu_read_unlock();
 
 	if ((pos & 1) && size) {
 		u8 val;
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2a..e106e58f297e9 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -63,6 +63,7 @@ struct vfio_pci_core_device {
 	int			irq_type;
 	int			num_regions;
 	struct vfio_pci_region	*region;
+	struct mutex		region_lock;
 	u8			msi_qmax;
 	u8			msix_bar;
 	u16			msix_size;
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
Re: [RFC PATCH 3/7] vfio/pci: add RCU locking for regions access
Posted by Mahmoud Nagy Adam 1 week ago
..Having a second look on the rcu read sections. Some of these read
sections could sleep/block. simple RCU with these sections will not
work. Need to fix this on the next send.

-MNAdam



Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597