[PATCH v7 1/6] vfio: refactor vfio_pci_mmap_huge_fault function

ankita@nvidia.com posted 6 patches 5 days, 18 hours ago
There is a newer version of this series
[PATCH v7 1/6] vfio: refactor vfio_pci_mmap_huge_fault function
Posted by ankita@nvidia.com 5 days, 18 hours ago
From: Ankit Agrawal <ankita@nvidia.com>

Refactor vfio_pci_mmap_huge_fault to take out the implementation
to map the VMA to the PTE/PMD/PUD as a separate function.

Export the new function to be used by nvgrace-gpu module.

No functional change is intended.

Cc: Shameer Kolothum <skolothumtho@nvidia.com>
Cc: Alex Williamson <alex@shazbot.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 54 +++++++++++++++++---------------
 include/linux/vfio_pci_core.h    | 16 ++++++++++
 2 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 7dcf5439dedc..52e3a10d776b 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1640,48 +1640,52 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
 	return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
 }
 
-static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
-					   unsigned int order)
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+				   struct vm_fault *vmf,
+				   unsigned long pfn,
+				   unsigned int order)
 {
-	struct vm_area_struct *vma = vmf->vma;
-	struct vfio_pci_core_device *vdev = vma->vm_private_data;
-	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
-	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
-	unsigned long pfn = vma_to_pfn(vma) + pgoff;
-	vm_fault_t ret = VM_FAULT_SIGBUS;
-
-	if (order && (addr < vma->vm_start ||
-		      addr + (PAGE_SIZE << order) > vma->vm_end ||
-		      pfn & ((1 << order) - 1))) {
-		ret = VM_FAULT_FALLBACK;
-		goto out;
-	}
-
-	down_read(&vdev->memory_lock);
+	lockdep_assert_held_read(&vdev->memory_lock);
 
 	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
-		goto out_unlock;
+		return VM_FAULT_SIGBUS;
 
 	switch (order) {
 	case 0:
-		ret = vmf_insert_pfn(vma, vmf->address, pfn);
-		break;
+		return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
 #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
 	case PMD_ORDER:
-		ret = vmf_insert_pfn_pmd(vmf, pfn, false);
-		break;
+		return vmf_insert_pfn_pmd(vmf, pfn, false);
 #endif
 #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
 	case PUD_ORDER:
-		ret = vmf_insert_pfn_pud(vmf, pfn, false);
+		return vmf_insert_pfn_pud(vmf, pfn, false);
 		break;
 #endif
 	default:
+		return VM_FAULT_FALLBACK;
+	}
+}
+EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
+
+static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
+					   unsigned int order)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
+	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long pfn = vma_to_pfn(vma) + pgoff;
+	vm_fault_t ret;
+
+	if (unmappable_for_order(vma, addr, pfn, order)) {
 		ret = VM_FAULT_FALLBACK;
+		goto out;
 	}
 
-out_unlock:
-	up_read(&vdev->memory_lock);
+	scoped_guard(rwsem_read, &vdev->memory_lock)
+		ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
+
 out:
 	dev_dbg_ratelimited(&vdev->pdev->dev,
 			   "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2..1d457216ce4d 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
 		size_t count, loff_t *ppos);
 ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
 		size_t count, loff_t *ppos);
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+				   struct vm_fault *vmf, unsigned long pfn,
+				   unsigned int order);
 int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
 void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
 int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
@@ -161,4 +164,17 @@ VFIO_IOREAD_DECLARATION(32)
 VFIO_IOREAD_DECLARATION(64)
 #endif
 
+static inline bool unmappable_for_order(struct vm_area_struct *vma,
+					unsigned long addr,
+					unsigned long pfn,
+					unsigned int order)
+{
+	if (order && (addr < vma->vm_start ||
+		      addr + (PAGE_SIZE << order) > vma->vm_end ||
+		      !IS_ALIGNED(pfn, 1 << order)))
+		return true;
+
+	return false;
+}
+
 #endif /* VFIO_PCI_CORE_H */
-- 
2.34.1
Re: [PATCH v7 1/6] vfio: refactor vfio_pci_mmap_huge_fault function
Posted by Alex Williamson 5 days, 8 hours ago
On Wed, 26 Nov 2025 05:26:22 +0000
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> Refactor vfio_pci_mmap_huge_fault to take out the implementation
> to map the VMA to the PTE/PMD/PUD as a separate function.
> 
> Export the new function to be used by nvgrace-gpu module.
> 
> No functional change is intended.
> 
> Cc: Shameer Kolothum <skolothumtho@nvidia.com>
> Cc: Alex Williamson <alex@shazbot.org>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 54 +++++++++++++++++---------------
>  include/linux/vfio_pci_core.h    | 16 ++++++++++
>  2 files changed, 45 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 7dcf5439dedc..52e3a10d776b 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1640,48 +1640,52 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
>  	return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
>  }
>  
> -static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> -					   unsigned int order)
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> +				   struct vm_fault *vmf,
> +				   unsigned long pfn,
> +				   unsigned int order)
>  {
> -	struct vm_area_struct *vma = vmf->vma;
> -	struct vfio_pci_core_device *vdev = vma->vm_private_data;
> -	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> -	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> -	unsigned long pfn = vma_to_pfn(vma) + pgoff;
> -	vm_fault_t ret = VM_FAULT_SIGBUS;
> -
> -	if (order && (addr < vma->vm_start ||
> -		      addr + (PAGE_SIZE << order) > vma->vm_end ||
> -		      pfn & ((1 << order) - 1))) {
> -		ret = VM_FAULT_FALLBACK;
> -		goto out;
> -	}
> -
> -	down_read(&vdev->memory_lock);
> +	lockdep_assert_held_read(&vdev->memory_lock);
>  
>  	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
> -		goto out_unlock;
> +		return VM_FAULT_SIGBUS;
>  
>  	switch (order) {
>  	case 0:
> -		ret = vmf_insert_pfn(vma, vmf->address, pfn);
> -		break;
> +		return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
>  #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
>  	case PMD_ORDER:
> -		ret = vmf_insert_pfn_pmd(vmf, pfn, false);
> -		break;
> +		return vmf_insert_pfn_pmd(vmf, pfn, false);
>  #endif
>  #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
>  	case PUD_ORDER:
> -		ret = vmf_insert_pfn_pud(vmf, pfn, false);
> +		return vmf_insert_pfn_pud(vmf, pfn, false);
>  		break;
>  #endif
>  	default:
> +		return VM_FAULT_FALLBACK;
> +	}
> +}
> +EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
> +
> +static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> +					   unsigned int order)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct vfio_pci_core_device *vdev = vma->vm_private_data;
> +	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> +	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> +	unsigned long pfn = vma_to_pfn(vma) + pgoff;
> +	vm_fault_t ret;
> +
> +	if (unmappable_for_order(vma, addr, pfn, order)) {
>  		ret = VM_FAULT_FALLBACK;
> +		goto out;
>  	}
>  
> -out_unlock:
> -	up_read(&vdev->memory_lock);
> +	scoped_guard(rwsem_read, &vdev->memory_lock)
> +		ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
> +
>  out:

We really don't need a goto to jump over this tiny section of code.
With the naming/polarity change below this can just be:

	vm_fault_t ret = VM_FAULT_FALLBACK;

	if (is_aligned_for_order(vma, addr, pfn, order)) {
		scoped_guard(rwsem_read, &vdev->memory_lock)
			ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
	}


>  	dev_dbg_ratelimited(&vdev->pdev->dev,
>  			   "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index f541044e42a2..1d457216ce4d 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
>  		size_t count, loff_t *ppos);
>  ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
>  		size_t count, loff_t *ppos);
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> +				   struct vm_fault *vmf, unsigned long pfn,
> +				   unsigned int order);
>  int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
>  void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
>  int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
> @@ -161,4 +164,17 @@ VFIO_IOREAD_DECLARATION(32)
>  VFIO_IOREAD_DECLARATION(64)
>  #endif
>  
> +static inline bool unmappable_for_order(struct vm_area_struct *vma,
> +					unsigned long addr,
> +					unsigned long pfn,
> +					unsigned int order)
> +{
> +	if (order && (addr < vma->vm_start ||
> +		      addr + (PAGE_SIZE << order) > vma->vm_end ||
> +		      !IS_ALIGNED(pfn, 1 << order)))
> +		return true;
> +
> +	return false;
> +}


Change polarity and rename to is_aligned_for_order()?  No need for
branched return.

	return !(order && (addr < vma->vm_start ||
			   addr + (PAGE_SIZE << order) > vma->vm_end ||
			   !IS_ALIGNED(pfn, 1 << order)));

Describe this change in the commit log.  Thanks,

Alex