[PATCH v6 1/6] vfio: export function to map the VMA

ankita@nvidia.com posted 6 patches 6 days, 6 hours ago
There is a newer version of this series
[PATCH v6 1/6] vfio: export function to map the VMA
Posted by ankita@nvidia.com 6 days, 6 hours ago
From: Ankit Agrawal <ankita@nvidia.com>

Take out the implementation to map the VMA to the PTE/PMD/PUD
as a separate function.

Export the function to be used by nvgrace-gpu module.

cc: Shameer Kolothum <skolothumtho@nvidia.com>
cc: Alex Williamson <alex@shazbot.org>
cc: Jason Gunthorpe <jgg@ziepe.ca>
Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 50 ++++++++++++++++++++------------
 include/linux/vfio_pci_core.h    |  3 ++
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 7dcf5439dedc..c445a53ee12e 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1640,31 +1640,21 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
 	return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
 }
 
-static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
-					   unsigned int order)
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+				   struct vm_fault *vmf,
+				   unsigned long pfn,
+				   unsigned int order)
 {
-	struct vm_area_struct *vma = vmf->vma;
-	struct vfio_pci_core_device *vdev = vma->vm_private_data;
-	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
-	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
-	unsigned long pfn = vma_to_pfn(vma) + pgoff;
-	vm_fault_t ret = VM_FAULT_SIGBUS;
+	vm_fault_t ret;
 
-	if (order && (addr < vma->vm_start ||
-		      addr + (PAGE_SIZE << order) > vma->vm_end ||
-		      pfn & ((1 << order) - 1))) {
-		ret = VM_FAULT_FALLBACK;
-		goto out;
-	}
-
-	down_read(&vdev->memory_lock);
+	lockdep_assert_held_read(&vdev->memory_lock);
 
 	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
-		goto out_unlock;
+		return VM_FAULT_SIGBUS;
 
 	switch (order) {
 	case 0:
-		ret = vmf_insert_pfn(vma, vmf->address, pfn);
+		ret = vmf_insert_pfn(vmf->vma, vmf->address, pfn);
 		break;
 #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
 	case PMD_ORDER:
@@ -1680,7 +1670,29 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
 		ret = VM_FAULT_FALLBACK;
 	}
 
-out_unlock:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
+
+static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
+					   unsigned int order)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct vfio_pci_core_device *vdev = vma->vm_private_data;
+	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
+	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long pfn = vma_to_pfn(vma) + pgoff;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
+
+	if (order && (addr < vma->vm_start ||
+		      addr + (PAGE_SIZE << order) > vma->vm_end ||
+		      pfn & ((1 << order) - 1))) {
+		ret = VM_FAULT_FALLBACK;
+		goto out;
+	}
+
+	down_read(&vdev->memory_lock);
+	ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
 	up_read(&vdev->memory_lock);
 out:
 	dev_dbg_ratelimited(&vdev->pdev->dev,
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2..6f7c6c0d4278 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
 		size_t count, loff_t *ppos);
 ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
 		size_t count, loff_t *ppos);
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+				   struct vm_fault *vmf, unsigned long pfn,
+				   unsigned int order);
 int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
 void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
 int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
-- 
2.34.1
Re: [PATCH v6 1/6] vfio: export function to map the VMA
Posted by Alex Williamson 6 days, 3 hours ago
On Tue, 25 Nov 2025 17:30:08 +0000
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> Take out the implementation to map the VMA to the PTE/PMD/PUD
> as a separate function.
> 
> Export the function to be used by nvgrace-gpu module.
> 
> cc: Shameer Kolothum <skolothumtho@nvidia.com>
> cc: Alex Williamson <alex@shazbot.org>
> cc: Jason Gunthorpe <jgg@ziepe.ca>
> Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 50 ++++++++++++++++++++------------
>  include/linux/vfio_pci_core.h    |  3 ++
>  2 files changed, 34 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 7dcf5439dedc..c445a53ee12e 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1640,31 +1640,21 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
>  	return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
>  }
>  
> -static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> -					   unsigned int order)
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> +				   struct vm_fault *vmf,
> +				   unsigned long pfn,
> +				   unsigned int order)
>  {
> -	struct vm_area_struct *vma = vmf->vma;
> -	struct vfio_pci_core_device *vdev = vma->vm_private_data;
> -	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> -	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> -	unsigned long pfn = vma_to_pfn(vma) + pgoff;
> -	vm_fault_t ret = VM_FAULT_SIGBUS;
> +	vm_fault_t ret;
>  
> -	if (order && (addr < vma->vm_start ||
> -		      addr + (PAGE_SIZE << order) > vma->vm_end ||
> -		      pfn & ((1 << order) - 1))) {
> -		ret = VM_FAULT_FALLBACK;
> -		goto out;
> -	}
> -
> -	down_read(&vdev->memory_lock);
> +	lockdep_assert_held_read(&vdev->memory_lock);
>  
>  	if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
> -		goto out_unlock;
> +		return VM_FAULT_SIGBUS;
>  
>  	switch (order) {
>  	case 0:
> -		ret = vmf_insert_pfn(vma, vmf->address, pfn);
> +		ret = vmf_insert_pfn(vmf->vma, vmf->address, pfn);
>  		break;
>  #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
>  	case PMD_ORDER:
> @@ -1680,7 +1670,29 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
>  		ret = VM_FAULT_FALLBACK;
>  	}
>  
> -out_unlock:
> +	return ret;
> +}

At this point we no longer need @ret, we can return directly in all
cases.

> +EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
> +
> +static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> +					   unsigned int order)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct vfio_pci_core_device *vdev = vma->vm_private_data;
> +	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
> +	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> +	unsigned long pfn = vma_to_pfn(vma) + pgoff;
> +	vm_fault_t ret = VM_FAULT_SIGBUS;

The only use case of this initialization is now in the new function.

> +
> +	if (order && (addr < vma->vm_start ||
> +		      addr + (PAGE_SIZE << order) > vma->vm_end ||
> +		      pfn & ((1 << order) - 1))) {
> +		ret = VM_FAULT_FALLBACK;
> +		goto out;
> +	}

Should we make a static inline in a vfio header for the above to avoid
the duplicate implementation in the next patch?  Also we might as well
use an else branch rather than goto with the bulk of the code moved
now.  Maybe also just convert to a scoped_guard as well.  Thanks,

Alex

> +
> +	down_read(&vdev->memory_lock);
> +	ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
>  	up_read(&vdev->memory_lock);
>  out:
>  	dev_dbg_ratelimited(&vdev->pdev->dev,
> diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
> index f541044e42a2..6f7c6c0d4278 100644
> --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
>  		size_t count, loff_t *ppos);
>  ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
>  		size_t count, loff_t *ppos);
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> +				   struct vm_fault *vmf, unsigned long pfn,
> +				   unsigned int order);
>  int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma);
>  void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count);
>  int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
Re: [PATCH v6 1/6] vfio: export function to map the VMA
Posted by Zhi Wang 6 days, 3 hours ago
On Tue, 25 Nov 2025 17:30:08 +0000
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> Take out the implementation to map the VMA to the PTE/PMD/PUD
> as a separate function.
> 
> Export the function to be used by nvgrace-gpu module.
> 

This looks more like a re-factor than a simple symbol export. Let's add:

No functional change is intended. 

> cc: Shameer Kolothum <skolothumtho@nvidia.com>
> cc: Alex Williamson <alex@shazbot.org>
> cc: Jason Gunthorpe <jgg@ziepe.ca>

Nit: I saw "cc" tag is also used somewhere else in the git log. I was
suprised that checkpatch.pl doesn't complain about it. I did test it.

In VFIO, people tend to use "Cc:" according to a search of the git log.
Let's keep using "Cc:" in VFIO.

> Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 50
> ++++++++++++++++++++------------ include/linux/vfio_pci_core.h    |
> 3 ++ 2 files changed, 34 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c
> b/drivers/vfio/pci/vfio_pci_core.c index 7dcf5439dedc..c445a53ee12e
> 100644 --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1640,31 +1640,21 @@ static unsigned long vma_to_pfn(struct
> vm_area_struct *vma) return (pci_resource_start(vdev->pdev, index) >>
> PAGE_SHIFT) + pgoff; }
>  
> -static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> -					   unsigned int order)
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> +				   struct vm_fault *vmf,
> +				   unsigned long pfn,
> +				   unsigned int order)
>  {
> -	struct vm_area_struct *vma = vmf->vma;
> -	struct vfio_pci_core_device *vdev = vma->vm_private_data;
> -	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) -
> 1);
> -	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> -	unsigned long pfn = vma_to_pfn(vma) + pgoff;
> -	vm_fault_t ret = VM_FAULT_SIGBUS;
> +	vm_fault_t ret;
>  
> -	if (order && (addr < vma->vm_start ||
> -		      addr + (PAGE_SIZE << order) > vma->vm_end ||
> -		      pfn & ((1 << order) - 1))) {
> -		ret = VM_FAULT_FALLBACK;
> -		goto out;
> -	}
> -
> -	down_read(&vdev->memory_lock);
> +	lockdep_assert_held_read(&vdev->memory_lock);
>  
>  	if (vdev->pm_runtime_engaged ||
> !__vfio_pci_memory_enabled(vdev))
> -		goto out_unlock;
> +		return VM_FAULT_SIGBUS;
>  
>  	switch (order) {
>  	case 0:
> -		ret = vmf_insert_pfn(vma, vmf->address, pfn);
> +		ret = vmf_insert_pfn(vmf->vma, vmf->address, pfn);
>  		break;
>  #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
>  	case PMD_ORDER:
> @@ -1680,7 +1670,29 @@ static vm_fault_t
> vfio_pci_mmap_huge_fault(struct vm_fault *vmf, ret =
> VM_FAULT_FALLBACK; }
>  
> -out_unlock:
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
> +
> +static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
> +					   unsigned int order)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct vfio_pci_core_device *vdev = vma->vm_private_data;
> +	unsigned long addr = vmf->address & ~((PAGE_SIZE << order) -
> 1);
> +	unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
> +	unsigned long pfn = vma_to_pfn(vma) + pgoff;
> +	vm_fault_t ret = VM_FAULT_SIGBUS;
> +
> +	if (order && (addr < vma->vm_start ||
> +		      addr + (PAGE_SIZE << order) > vma->vm_end ||
> +		      pfn & ((1 << order) - 1))) {
> +		ret = VM_FAULT_FALLBACK;
> +		goto out;
> +	}
> +
> +	down_read(&vdev->memory_lock);
> +	ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
>  	up_read(&vdev->memory_lock);
>  out:
>  	dev_dbg_ratelimited(&vdev->pdev->dev,
> diff --git a/include/linux/vfio_pci_core.h
> b/include/linux/vfio_pci_core.h index f541044e42a2..6f7c6c0d4278
> 100644 --- a/include/linux/vfio_pci_core.h
> +++ b/include/linux/vfio_pci_core.h
> @@ -119,6 +119,9 @@ ssize_t vfio_pci_core_read(struct vfio_device
> *core_vdev, char __user *buf, size_t count, loff_t *ppos);
>  ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const
> char __user *buf, size_t count, loff_t *ppos);
> +vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
> +				   struct vm_fault *vmf, unsigned
> long pfn,
> +				   unsigned int order);
>  int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct
> vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device
> *core_vdev, unsigned int count); int vfio_pci_core_match(struct
> vfio_device *core_vdev, char *buf);