Since we could request to add more regions after initialization. We
would need locking to avoid racing with readers and cause UAF. use RCU
for read-write synchronization. And region_lock mutex is used to
synchronize the write section.
Changing the value of num_regions is done under the mutex. Since the
num_regions can only increase, using READ_ONCE and WRITE_ONCE should
be enough to make sure we have a valid value. On the write section,
synchronize_rcu() is run before incrementing num_regions. Doing that
makes sure read sections are passed before increasing num_regions to
avoid causing out-of-bound access.
Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
---
drivers/vfio/pci/vfio_pci_core.c | 59 +++++++++++++++++++++++---------
drivers/vfio/pci/vfio_pci_igd.c | 16 ++++++---
include/linux/vfio_pci_core.h | 1 +
3 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 6629490c0e46f..78e18bfd973e5 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -882,7 +882,8 @@ static int msix_mmappable_cap(struct vfio_pci_core_device *vdev,
}
/*
- * Registers a new region to vfio_pci_core_device.
+ * Registers a new region to vfio_pci_core_device. region_lock should
+ * be held when multiple registers could happen.
* Returns region index on success or a negative errno.
*/
int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
@@ -890,15 +891,20 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
const struct vfio_pci_regops *ops,
size_t size, u32 flags, void *data)
{
- int num_regions = vdev->num_regions;
struct vfio_pci_region *region, *old_region;
+ int num_regions;
+
+ mutex_lock(&vdev->region_lock);
+ num_regions = READ_ONCE(vdev->num_regions);
region = kmalloc((num_regions + 1) * sizeof(*region),
GFP_KERNEL_ACCOUNT);
if (!region)
return -ENOMEM;
- old_region = vdev->region;
+ old_region =
+ rcu_dereference_protected(vdev->region,
+ lockdep_is_held(&vdev->region_lock));
if (old_region)
memcpy(region, old_region, num_regions * sizeof(*region));
@@ -909,8 +915,10 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
region[num_regions].flags = flags;
region[num_regions].data = data;
- vdev->region = region;
- vdev->num_regions++;
+ rcu_assign_pointer(vdev->region, region);
+ synchronize_rcu();
+ WRITE_ONCE(vdev->num_regions, READ_ONCE(vdev->num_regions) + 1);
+ mutex_unlock(&vdev->region_lock);
kfree(old_region);
return num_regions;
}
@@ -968,7 +976,7 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
if (vdev->reset_works)
info.flags |= VFIO_DEVICE_FLAGS_RESET;
- info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
+ info.num_regions = VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions);
info.num_irqs = VFIO_PCI_NUM_IRQS;
ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
@@ -1094,13 +1102,16 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
.header.version = 1
};
- if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+ if (info.index >= VFIO_PCI_NUM_REGIONS +
+ READ_ONCE(vdev->num_regions))
return -EINVAL;
- info.index = array_index_nospec(
- info.index, VFIO_PCI_NUM_REGIONS + vdev->num_regions);
+ info.index = array_index_nospec(info.index,
+ VFIO_PCI_NUM_REGIONS +
+ READ_ONCE(vdev->num_regions));
i = info.index - VFIO_PCI_NUM_REGIONS;
- region = &vdev->region[i];
+ rcu_read_lock();
+ region = &rcu_dereference(vdev->region)[i];
info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
info.size = region->size;
@@ -1111,15 +1122,20 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
ret = vfio_info_add_capability(&caps, &cap_type.header,
sizeof(cap_type));
- if (ret)
+ if (ret) {
+ rcu_read_unlock();
return ret;
+ }
if (region->ops->add_capability) {
ret = region->ops->add_capability(
vdev, region, &caps);
- if (ret)
+ if (ret) {
+ rcu_read_unlock();
return ret;
+ }
}
+ rcu_read_unlock();
}
}
@@ -1536,7 +1552,7 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
int ret;
- if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+ if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions))
return -EINVAL;
ret = pm_runtime_resume_and_get(&vdev->pdev->dev);
@@ -1568,8 +1584,11 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
default:
index -= VFIO_PCI_NUM_REGIONS;
- ret = vdev->region[index].ops->rw(vdev, buf,
- count, ppos, iswrite);
+ rcu_read_lock();
+ ret = rcu_dereference(vdev->region)[index].ops->rw(vdev, buf,
+ count, ppos,
+ iswrite);
+ rcu_read_unlock();
break;
}
@@ -1726,7 +1745,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
- if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+ if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions))
return -EINVAL;
if (vma->vm_end < vma->vm_start)
return -EINVAL;
@@ -1734,12 +1753,16 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
return -EINVAL;
if (index >= VFIO_PCI_NUM_REGIONS) {
int regnum = index - VFIO_PCI_NUM_REGIONS;
- struct vfio_pci_region *region = vdev->region + regnum;
+ struct vfio_pci_region *region;
+
+ rcu_read_lock();
+ region = rcu_dereference(vdev->region) + regnum;
ret = -EINVAL;
if (region->ops && region->ops->mmap &&
(region->flags & VFIO_REGION_INFO_FLAG_MMAP))
ret = region->ops->mmap(vdev, region, vma);
+ rcu_read_unlock();
return ret;
}
if (index >= VFIO_PCI_ROM_REGION_INDEX)
@@ -2107,6 +2130,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
init_rwsem(&vdev->memory_lock);
xa_init(&vdev->ctx);
+ mutex_init(&vdev->region_lock);
return 0;
}
@@ -2119,6 +2143,7 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
mutex_destroy(&vdev->igate);
mutex_destroy(&vdev->ioeventfds_lock);
+ mutex_destroy(&vdev->region_lock);
kfree(vdev->region);
kfree(vdev->pm_save);
}
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 93ddef48e4e4c..1f7e9e82ac08c 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -71,13 +71,17 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
struct vfio_pci_region *region;
struct igd_opregion_vbt *opregionvbt;
- region = &vdev->region[i];
+ rcu_read_lock();
+ region = &rcu_dereference(vdev->region)[i];
opregionvbt = region->data;
- if (pos >= region->size || iswrite)
+ if (pos >= region->size || iswrite) {
+ rcu_read_unlock();
return -EINVAL;
+ }
count = min_t(size_t, count, region->size - pos);
+ rcu_read_unlock();
remaining = count;
/* Copy until OpRegion version */
@@ -293,13 +297,17 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
struct vfio_pci_region *region;
struct pci_dev *pdev;
- region = &vdev->region[i];
+ rcu_read_lock();
+ region = &rcu_dereference(vdev->region)[i];
pdev = region->data;
- if (pos >= region->size || iswrite)
+ if (pos >= region->size || iswrite) {
+ rcu_read_unlock();
return -EINVAL;
+ }
size = count = min(count, (size_t)(region->size - pos));
+ rcu_read_unlock();
if ((pos & 1) && size) {
u8 val;
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2a..e106e58f297e9 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -63,6 +63,7 @@ struct vfio_pci_core_device {
int irq_type;
int num_regions;
struct vfio_pci_region *region;
+ struct mutex region_lock;
u8 msi_qmax;
u8 msix_bar;
u16 msix_size;
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
..Having a second look on the rcu read sections. Some of these read sections could sleep/block. simple RCU with these sections will not work. Need to fix this on the next send. -MNAdam Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597
© 2016 - 2025 Red Hat, Inc.