From: Ankit Agrawal <ankita@nvidia.com>
Refactor vfio_pci_mmap_huge_fault to take out the implementation
to map the VMA to the PTE/PMD/PUD as a separate function.
Export the new function to be used by nvgrace-gpu module.
No functional change is intended.
Cc: Shameer Kolothum <skolothumtho@nvidia.com>
Cc: Alex Williamson <alex@shazbot.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
drivers/vfio/pci/vfio_pci_core.c | 54 +++++++++++++++++---------------
include/linux/vfio_pci_core.h | 16 ++++++++++
2 files changed, 45 insertions(+), 25 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 7dcf5439dedc..52e3a10d776b 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1640,48 +1640,52 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
}
-static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
- unsigned int order)
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+ struct vm_fault *vmf,
+ unsigned long pfn,
+ unsigned int order)
{
- struct vm_area_struct *vma = vmf->vma;
- struct vfio_pci_core_device *vdev = vma->vm_private_data;
- unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
- unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
- unsigned long pfn = vma_to_pfn(vma) + pgoff;
- vm_fault_t ret = VM_FAULT_SIGBUS;
-
- if (order && (addr < vma->vm_start ||
- addr + (PAGE_SIZE << order) > vma->vm_end ||
- pfn & ((1 << order) - 1))) {
- ret = VM_FAULT_FALLBACK;
- goto out;
- }
-
- down_read(&vdev->memory_lock);
+ lockdep_assert_held_read(&vdev->memory_lock);
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
- goto out_unlock;
+ return VM_FAULT_SIGBUS;
switch (order) {
case 0:
- ret = vmf_insert_pfn(vma, vmf->address, pfn);
- break;
+ return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
case PMD_ORDER:
- ret = vmf_insert_pfn_pmd(vmf, pfn, false);
- break;
+ return vmf_insert_pfn_pmd(vmf, pfn, false);
#endif
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
case PUD_ORDER:
- ret = vmf_insert_pfn_pud(vmf, pfn, false);
+ return vmf_insert_pfn_pud(vmf, pfn, false);
break;
#endif
default:
+ return VM_FAULT_FALLBACK;
+ }
+}
+EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
+
+static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
+ unsigned int order)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vfio_pci_core_device *vdev = vma->vm_private_data;
+ unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
+ unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long pfn = vma_to_pfn(vma) + pgoff;
+ vm_fault_t ret;
+
+ if (unmappable_for_order(vma, addr, pfn, order)) {
ret = VM_FAULT_FALLBACK;
+ goto out;
}
-out_unlock:
- up_read(&vdev->memory_lock);
+ scoped_guard(rwsem_read, &vdev->memory_lock)
+ ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
+
out:
dev_dbg_ratelimited(&vdev->pdev->dev,
"%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2..1d457216ce4d 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
size_t count, loff_t *ppos);
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+ struct vm_fault *vmf, unsigned long pfn,
+ unsigned int order);
int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
@@ -161,4 +164,17 @@ VFIO_IOREAD_DECLARATION(32)
VFIO_IOREAD_DECLARATION(64)
#endif
+static inline bool unmappable_for_order(struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long pfn,
+ unsigned int order)
+{
+ if (order && (addr < vma->vm_start ||
+ addr + (PAGE_SIZE << order) > vma->vm_end ||
+ !IS_ALIGNED(pfn, 1 << order)))
+ return true;
+
+ return false;
+}
+
#endif /* VFIO_PCI_CORE_H */
--
2.34.1
On Wed, 26 Nov 2025 05:26:22 +0000
<ankita@nvidia.com> wrote:
> From: Ankit Agrawal <ankita@nvidia.com>
>
> Refactor vfio_pci_mmap_huge_fault to take out the implementation
> to map the VMA to the PTE/PMD/PUD as a separate function.
>
> Export the new function to be used by nvgrace-gpu module.
>
> No functional change is intended.
>
> Cc: Shameer Kolothum <skolothumtho@nvidia.com>
> Cc: Alex Williamson <alex@shazbot.org>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
> drivers/vfio/pci/vfio_pci_core.c | 54 +++++++++++++++++---------------
> include/linux/vfio_pci_core.h | 16 ++++++++++
> 2 files changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 7dcf5439dedc..52e3a10d776b 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1640,48 +1640,52 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
> return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
> }
>
> -static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> - unsigned int order)
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> + struct vm_fault *vmf,
> + unsigned long pfn,
> + unsigned int order)
> {
> - struct vm_area_struct *vma = vmf->vma;
> - struct vfio_pci_core_device *vdev = vma->vm_private_data;
> - unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> - unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> - unsigned long pfn = vma_to_pfn(vma) + pgoff;
> - vm_fault_t ret = VM_FAULT_SIGBUS;
> -
> - if (order && (addr < vma->vm_start ||
> - addr + (PAGE_SIZE << order) > vma->vm_end ||
> - pfn & ((1 << order) - 1))) {
> - ret = VM_FAULT_FALLBACK;
> - goto out;
> - }
> -
> - down_read(&vdev->memory_lock);
> + lockdep_assert_held_read(&vdev->memory_lock);
>
> if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
> - goto out_unlock;
> + return VM_FAULT_SIGBUS;
>
> switch (order) {
> case 0:
> - ret = vmf_insert_pfn(vma, vmf->address, pfn);
> - break;
> + return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
> #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
> case PMD_ORDER:
> - ret = vmf_insert_pfn_pmd(vmf, pfn, false);
> - break;
> + return vmf_insert_pfn_pmd(vmf, pfn, false);
> #endif
> #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
> case PUD_ORDER:
> - ret = vmf_insert_pfn_pud(vmf, pfn, false);
> + return vmf_insert_pfn_pud(vmf, pfn, false);
> break;
> #endif
> default:
> + return VM_FAULT_FALLBACK;
> + }
> +}
> +EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
> +
> +static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> + unsigned int order)
> +{
> + struct vm_area_struct *vma = vmf->vma;
> + struct vfio_pci_core_device *vdev = vma->vm_private_data;
> + unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> + unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> + unsigned long pfn = vma_to_pfn(vma) + pgoff;
> + vm_fault_t ret;
> +
> + if (unmappable_for_order(vma, addr, pfn, order)) {
> ret = VM_FAULT_FALLBACK;
> + goto out;
> }
>
> -out_unlock:
> - up_read(&vdev->memory_lock);
> + scoped_guard(rwsem_read, &vdev->memory_lock)
> + ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
> +
> out:
We really don't need a goto to jump over this tiny section of code.
With the naming/polarity change below this can just be:
vm_fault_t ret = VM_FAULT_FALLBACK;
if (is_aligned_for_order(vma, addr, pfn, order)) {
scoped_guard(rwsem_read, &vdev->memory_lock)
ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
}
> dev_dbg_ratelimited(&vdev->pdev->dev,
> "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index f541044e42a2..1d457216ce4d 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
> size_t count, loff_t *ppos);
> ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
> size_t count, loff_t *ppos);
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> + struct vm_fault *vmf, unsigned long pfn,
> + unsigned int order);
> int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
> void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
> int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
> @@ -161,4 +164,17 @@ VFIO_IOREAD_DECLARATION(32)
> VFIO_IOREAD_DECLARATION(64)
> #endif
>
> +static inline bool unmappable_for_order(struct vm_area_struct *vma,
> + unsigned long addr,
> + unsigned long pfn,
> + unsigned int order)
> +{
> + if (order && (addr < vma->vm_start ||
> + addr + (PAGE_SIZE << order) > vma->vm_end ||
> + !IS_ALIGNED(pfn, 1 << order)))
> + return true;
> +
> + return false;
> +}
Change polarity and rename to is_aligned_for_order()? No need for
branched return.
return !(order && (addr < vma->vm_start ||
addr + (PAGE_SIZE << order) > vma->vm_end ||
!IS_ALIGNED(pfn, 1 << order)));
Describe this change in the commit log. Thanks,
Alex
© 2016 - 2025 Red Hat, Inc.