Add a hook to vfio_device_ops to allow sub-modules provide virtual
addresses for an mmap() request.
Note that the fallback will be mm_get_unmapped_area(), which should
maintain the old behavior of generic VA allocation (__get_unmapped_area).
It's a bit unfortunate that is needed, as the current get_unmapped_area()
file ops cannot support a retval which fallbacks to the default. So that
is needed both here and whenever sub-module will opt-in with its own.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
drivers/vfio/vfio_main.c | 18 ++++++++++++++++++
include/linux/vfio.h | 7 +++++++
2 files changed, 25 insertions(+)
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 1fd261efc582..19db8e58d223 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1354,6 +1354,23 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
return device->ops->mmap(device, vma);
}
+static unsigned long vfio_device_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+ struct vfio_device_file *df = file->private_data;
+ struct vfio_device *device = df->device;
+
+ if (!device->ops->get_unmapped_area)
+ return mm_get_unmapped_area(current->mm, file, addr,
+ len, pgoff, flags);
+
+ return device->ops->get_unmapped_area(device, file, addr, len,
+ pgoff, flags);
+}
+
const struct file_operations vfio_device_fops = {
.owner = THIS_MODULE,
.open = vfio_device_fops_cdev_open,
@@ -1363,6 +1380,7 @@ const struct file_operations vfio_device_fops = {
.unlocked_ioctl = vfio_device_fops_unl_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.mmap = vfio_device_fops_mmap,
+ .get_unmapped_area = vfio_device_get_unmapped_area,
};
static struct vfio_device *vfio_device_from_file(struct file *file)
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 707b00772ce1..48fe71c61ed2 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -108,6 +108,7 @@ struct vfio_device {
* @dma_unmap: Called when userspace unmaps IOVA from the container
* this device is attached to.
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
+ * @get_unmapped_area: Optional, provide virtual address hint for mmap()
*/
struct vfio_device_ops {
char *name;
@@ -135,6 +136,12 @@ struct vfio_device_ops {
void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
int (*device_feature)(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
+ unsigned long (*get_unmapped_area)(struct vfio_device *device,
+ struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags);
};
#if IS_ENABLED(CONFIG_IOMMUFD)
--
2.49.0
Hi Peter, kernel test robot noticed the following build errors: [auto build test ERROR on akpm-mm/mm-everything] url: https://github.com/intel-lab-lkp/linux/commits/Peter-Xu/mm-Deduplicate-mm_get_unmapped_area/20250613-214307 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20250613134111.469884-5-peterx%40redhat.com patch subject: [PATCH 4/5] vfio: Introduce vfio_device_ops.get_unmapped_area hook config: sh-randconfig-002-20250614 (https://download.01.org/0day-ci/archive/20250614/202506142215.koMEU2rT-lkp@intel.com/config) compiler: sh4-linux-gcc (GCC) 12.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250614/202506142215.koMEU2rT-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202506142215.koMEU2rT-lkp@intel.com/ All errors (new ones prefixed by >>): drivers/vfio/vfio_main.c: In function 'vfio_device_get_unmapped_area': >> drivers/vfio/vfio_main.c:1367:24: error: implicit declaration of function 'mm_get_unmapped_area'; did you mean 'get_unmapped_area'? [-Werror=implicit-function-declaration] 1367 | return mm_get_unmapped_area(current->mm, file, addr, | ^~~~~~~~~~~~~~~~~~~~ | get_unmapped_area cc1: some warnings being treated as errors vim +1367 drivers/vfio/vfio_main.c 1356 1357 static unsigned long vfio_device_get_unmapped_area(struct file *file, 1358 unsigned long addr, 1359 unsigned long len, 1360 unsigned long pgoff, 1361 unsigned long flags) 1362 { 1363 struct vfio_device_file *df = file->private_data; 1364 struct vfio_device *device = df->device; 1365 1366 if (!device->ops->get_unmapped_area) > 1367 return mm_get_unmapped_area(current->mm, file, addr, 1368 len, pgoff, flags); 1369 1370 return device->ops->get_unmapped_area(device, file, addr, len, 1371 pgoff, flags); 1372 } 1373 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Sat, Jun 14, 2025 at 10:46:45PM +0800, kernel test robot wrote: > Hi Peter, > > kernel test robot noticed the following build errors: > > [auto build test ERROR on akpm-mm/mm-everything] > > url: https://github.com/intel-lab-lkp/linux/commits/Peter-Xu/mm-Deduplicate-mm_get_unmapped_area/20250613-214307 > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > patch link: https://lore.kernel.org/r/20250613134111.469884-5-peterx%40redhat.com > patch subject: [PATCH 4/5] vfio: Introduce vfio_device_ops.get_unmapped_area hook > config: sh-randconfig-002-20250614 (https://download.01.org/0day-ci/archive/20250614/202506142215.koMEU2rT-lkp@intel.com/config) > compiler: sh4-linux-gcc (GCC) 12.4.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250614/202506142215.koMEU2rT-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202506142215.koMEU2rT-lkp@intel.com/ > > All errors (new ones prefixed by >>): > > drivers/vfio/vfio_main.c: In function 'vfio_device_get_unmapped_area': > >> drivers/vfio/vfio_main.c:1367:24: error: implicit declaration of function 'mm_get_unmapped_area'; did you mean 'get_unmapped_area'? [-Werror=implicit-function-declaration] > 1367 | return mm_get_unmapped_area(current->mm, file, addr, > | ^~~~~~~~~~~~~~~~~~~~ > | get_unmapped_area > cc1: some warnings being treated as errors > > > vim +1367 drivers/vfio/vfio_main.c > > 1356 > 1357 static unsigned long vfio_device_get_unmapped_area(struct file *file, > 1358 unsigned long addr, > 1359 unsigned long len, > 1360 unsigned long pgoff, > 1361 unsigned long flags) > 1362 { > 1363 struct vfio_device_file *df = file->private_data; > 1364 struct vfio_device *device = df->device; > 1365 > 1366 if (!device->ops->get_unmapped_area) > > 1367 return mm_get_unmapped_area(current->mm, file, addr, > 1368 len, pgoff, flags); > 1369 > 1370 return device->ops->get_unmapped_area(device, file, addr, len, > 1371 pgoff, flags); > 1372 } > 1373 This is "ARCH_SH + VFIO + !MMU".. I'll make sure to cover this config when repost. I'll squash below into the patch: diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 19db8e58d223..cc14884d282f 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1354,6 +1354,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) return device->ops->mmap(device, vma); } +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP static unsigned long vfio_device_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, @@ -1370,6 +1371,7 @@ static unsigned long vfio_device_get_unmapped_area(struct file *file, return device->ops->get_unmapped_area(device, file, addr, len, pgoff, flags); } +#endif const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, @@ -1380,7 +1382,9 @@ const struct file_operations vfio_device_fops = { .unlocked_ioctl = vfio_device_fops_unl_ioctl, .compat_ioctl = compat_ptr_ioctl, .mmap = vfio_device_fops_mmap, +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP .get_unmapped_area = vfio_device_get_unmapped_area, +#endif }; static struct vfio_device *vfio_device_from_file(struct file *file) -- Peter Xu
On Tue, Jun 17, 2025 at 11:39:07AM -0400, Peter Xu wrote: > > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > static unsigned long vfio_device_get_unmapped_area(struct file *file, > unsigned long addr, > unsigned long len, > @@ -1370,6 +1371,7 @@ static unsigned long vfio_device_get_unmapped_area(struct file *file, > return device->ops->get_unmapped_area(device, file, addr, len, > pgoff, flags); > } > +#endif > > const struct file_operations vfio_device_fops = { > .owner = THIS_MODULE, > @@ -1380,7 +1382,9 @@ const struct file_operations vfio_device_fops = { > .unlocked_ioctl = vfio_device_fops_unl_ioctl, > .compat_ioctl = compat_ptr_ioctl, > .mmap = vfio_device_fops_mmap, > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > .get_unmapped_area = vfio_device_get_unmapped_area, > +#endif > }; IMHO this also seems like something the core code should be dealing with and not putting weird ifdefs in drivers. Jason
On Tue, Jun 17, 2025 at 12:41:57PM -0300, Jason Gunthorpe wrote: > On Tue, Jun 17, 2025 at 11:39:07AM -0400, Peter Xu wrote: > > > > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > > static unsigned long vfio_device_get_unmapped_area(struct file *file, > > unsigned long addr, > > unsigned long len, > > @@ -1370,6 +1371,7 @@ static unsigned long vfio_device_get_unmapped_area(struct file *file, > > return device->ops->get_unmapped_area(device, file, addr, len, > > pgoff, flags); > > } > > +#endif > > > > const struct file_operations vfio_device_fops = { > > .owner = THIS_MODULE, > > @@ -1380,7 +1382,9 @@ const struct file_operations vfio_device_fops = { > > .unlocked_ioctl = vfio_device_fops_unl_ioctl, > > .compat_ioctl = compat_ptr_ioctl, > > .mmap = vfio_device_fops_mmap, > > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > > .get_unmapped_area = vfio_device_get_unmapped_area, > > +#endif > > }; > > IMHO this also seems like something the core code should be dealing > with and not putting weird ifdefs in drivers. It may depend on whether we want to still do the fallbacks to mm_get_unmapped_area(). I get your point in the other email but not yet get a chance to reply. I'll try that out to see how it looks and reply there. -- Peter Xu
On Tue, Jun 17, 2025 at 12:47:35PM -0400, Peter Xu wrote: > On Tue, Jun 17, 2025 at 12:41:57PM -0300, Jason Gunthorpe wrote: > > On Tue, Jun 17, 2025 at 11:39:07AM -0400, Peter Xu wrote: > > > > > > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > > > static unsigned long vfio_device_get_unmapped_area(struct file *file, > > > unsigned long addr, > > > unsigned long len, > > > @@ -1370,6 +1371,7 @@ static unsigned long vfio_device_get_unmapped_area(struct file *file, > > > return device->ops->get_unmapped_area(device, file, addr, len, > > > pgoff, flags); > > > } > > > +#endif > > > > > > const struct file_operations vfio_device_fops = { > > > .owner = THIS_MODULE, > > > @@ -1380,7 +1382,9 @@ const struct file_operations vfio_device_fops = { > > > .unlocked_ioctl = vfio_device_fops_unl_ioctl, > > > .compat_ioctl = compat_ptr_ioctl, > > > .mmap = vfio_device_fops_mmap, > > > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > > > .get_unmapped_area = vfio_device_get_unmapped_area, > > > +#endif > > > }; > > > > IMHO this also seems like something the core code should be dealing > > with and not putting weird ifdefs in drivers. > > It may depend on whether we want to still do the fallbacks to > mm_get_unmapped_area(). I get your point in the other email but not yet > get a chance to reply. I'll try that out to see how it looks and reply > there. I just noticed this is unfortunate and special; I yet don't see a way to avoid the fallback here. Note that this is the vfio_device's fallback, even if the new helper (whatever we name it..) could do fallback internally, vfio_device still would need to be accessible to mm_get_unmapped_area() to make this config build pass. So I think I'll need my fixup here.. -- Peter Xu
On Tue, Jun 17, 2025 at 03:39:19PM -0400, Peter Xu wrote: > On Tue, Jun 17, 2025 at 12:47:35PM -0400, Peter Xu wrote: > > On Tue, Jun 17, 2025 at 12:41:57PM -0300, Jason Gunthorpe wrote: > > > On Tue, Jun 17, 2025 at 11:39:07AM -0400, Peter Xu wrote: > > > > > > > > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > > > > static unsigned long vfio_device_get_unmapped_area(struct file *file, > > > > unsigned long addr, > > > > unsigned long len, > > > > @@ -1370,6 +1371,7 @@ static unsigned long vfio_device_get_unmapped_area(struct file *file, > > > > return device->ops->get_unmapped_area(device, file, addr, len, > > > > pgoff, flags); > > > > } > > > > +#endif > > > > > > > > const struct file_operations vfio_device_fops = { > > > > .owner = THIS_MODULE, > > > > @@ -1380,7 +1382,9 @@ const struct file_operations vfio_device_fops = { > > > > .unlocked_ioctl = vfio_device_fops_unl_ioctl, > > > > .compat_ioctl = compat_ptr_ioctl, > > > > .mmap = vfio_device_fops_mmap, > > > > +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP > > > > .get_unmapped_area = vfio_device_get_unmapped_area, > > > > +#endif > > > > }; > > > > > > IMHO this also seems like something the core code should be dealing > > > with and not putting weird ifdefs in drivers. > > > > It may depend on whether we want to still do the fallbacks to > > mm_get_unmapped_area(). I get your point in the other email but not yet > > get a chance to reply. I'll try that out to see how it looks and reply > > there. > > I just noticed this is unfortunate and special; I yet don't see a way to > avoid the fallback here. > > Note that this is the vfio_device's fallback, even if the new helper > (whatever we name it..) could do fallback internally, vfio_device still > would need to be accessible to mm_get_unmapped_area() to make this config > build pass. I don't understand this remark? get_unmapped_area is not conditional on CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? Some new mm_get_unmapped_area_aligned() should not be conditional on CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? (This is Lorenzo's and Liam's remark) So what is VFIO doing that requires CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? Jason
On Tue, Jun 17, 2025 at 04:46:21PM -0300, Jason Gunthorpe wrote: > > I just noticed this is unfortunate and special; I yet don't see a way to > > avoid the fallback here. > > > > Note that this is the vfio_device's fallback, even if the new helper > > (whatever we name it..) could do fallback internally, vfio_device still > > would need to be accessible to mm_get_unmapped_area() to make this config > > build pass. > > I don't understand this remark? > > get_unmapped_area is not conditional on CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? > > Some new mm_get_unmapped_area_aligned() should not be conditional on > CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? (This is Lorenzo's and Liam's remark) Yes, this will be addressed. > > So what is VFIO doing that requires CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? It's the fallback part for vfio device, not vfio_pci device. vfio_pci device doesn't need this special treatment after moving to the new helper because that hides everything. vfio_device still needs it. So, we have two ops that need to be touched to support this: vfio_device_fops vfio_pci_ops For the 1st one's vfio_device_fops.get_unmapped_area(), it'll need its own fallback which must be mm_get_unmapped_area() to keep the old behavior, and that was defined only if CONFIG_MMU. IOW, if one day file_operations.get_unmapped_area() would allow some other retval to be able to fallback to the default (mm_get_unmapped_area()), then we don't need this special ifdef. But now it's not ready for that.. -- Peter Xu
On Tue, Jun 17, 2025 at 04:01:11PM -0400, Peter Xu wrote: > > So what is VFIO doing that requires CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? > > It's the fallback part for vfio device, not vfio_pci device. vfio_pci > device doesn't need this special treatment after moving to the new helper > because that hides everything. vfio_device still needs it. > > So, we have two ops that need to be touched to support this: > > vfio_device_fops > vfio_pci_ops > > For the 1st one's vfio_device_fops.get_unmapped_area(), it'll need its own > fallback which must be mm_get_unmapped_area() to keep the old behavior, and > that was defined only if CONFIG_MMU. OK, CONFIG_MMU makes a little bit of sense > IOW, if one day file_operations.get_unmapped_area() would allow some other > retval to be able to fallback to the default (mm_get_unmapped_area()), then > we don't need this special ifdef. But now it's not ready for that.. That can't be fixed with a config, the logic in vfio_device_fops has to be if (!device->ops->get_unmapped_area() return .. do_default thing.. return device->ops->get_unmapped() Has nothing to do with CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP, there are more device->ops that just PCI. If you do the API with an align/order argument then the default behavior should happen when passing PAGE_SIZE. Jason
On Tue, Jun 17, 2025 at 08:00:30PM -0300, Jason Gunthorpe wrote: > On Tue, Jun 17, 2025 at 04:01:11PM -0400, Peter Xu wrote: > > > > So what is VFIO doing that requires CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP? > > > > It's the fallback part for vfio device, not vfio_pci device. vfio_pci > > device doesn't need this special treatment after moving to the new helper > > because that hides everything. vfio_device still needs it. > > > > So, we have two ops that need to be touched to support this: > > > > vfio_device_fops > > vfio_pci_ops > > > > For the 1st one's vfio_device_fops.get_unmapped_area(), it'll need its own > > fallback which must be mm_get_unmapped_area() to keep the old behavior, and > > that was defined only if CONFIG_MMU. > > OK, CONFIG_MMU makes a little bit of sense > > > IOW, if one day file_operations.get_unmapped_area() would allow some other > > retval to be able to fallback to the default (mm_get_unmapped_area()), then > > we don't need this special ifdef. But now it's not ready for that.. > > That can't be fixed with a config, the logic in vfio_device_fops has > to be > > if (!device->ops->get_unmapped_area() > return .. do_default thing.. > > return device->ops->get_unmapped() > > Has nothing to do with CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP, there are > more device->ops that just PCI. IMHO CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP doesn't imply anything PCI specific either, it only says an arch supports PFNMAP in larger than PAGE_SIZE. IIUC it doesn't necessarily need to be PCI. So here in this case, get_unmapped_area() will only be customized if the kernel is compiled with any possible huge mapping on pfnmaps. Otherwise the customized hook isn't needed. > > If you do the API with an align/order argument then the default > behavior should happen when passing PAGE_SIZE. This should indeed also work. I'll wait for comments in the other threads. So far I didn't yet add the "order" parameter or anything like it. If we would like to have the parameter, I can use it here to avoid the ifdef with PAGE_SIZE / PAGE_SHIFT / .... when repost. Said that, I don't think I understand at all the use of get_unmapped_area() for !MMU use case. Thanks, -- Peter Xu
On 13.06.25 15:41, Peter Xu wrote: > Add a hook to vfio_device_ops to allow sub-modules provide virtual > addresses for an mmap() request. > > Note that the fallback will be mm_get_unmapped_area(), which should > maintain the old behavior of generic VA allocation (__get_unmapped_area). > It's a bit unfortunate that is needed, as the current get_unmapped_area() > file ops cannot support a retval which fallbacks to the default. So that > is needed both here and whenever sub-module will opt-in with its own. > > Signed-off-by: Peter Xu <peterx@redhat.com> > --- Reviewed-by: David Hildenbrand <david@redhat.com> -- Cheers, David / dhildenb
On Fri, Jun 13, 2025 at 09:41:10AM -0400, Peter Xu wrote: > Add a hook to vfio_device_ops to allow sub-modules provide virtual > addresses for an mmap() request. > > Note that the fallback will be mm_get_unmapped_area(), which should > maintain the old behavior of generic VA allocation (__get_unmapped_area). > It's a bit unfortunate that is needed, as the current get_unmapped_area() > file ops cannot support a retval which fallbacks to the default. So that > is needed both here and whenever sub-module will opt-in with its own. > > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > drivers/vfio/vfio_main.c | 18 ++++++++++++++++++ > include/linux/vfio.h | 7 +++++++ > 2 files changed, 25 insertions(+) Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Jason
© 2016 - 2025 Red Hat, Inc.