Check whether the starting address in the fault-around handler or the
faulty address in the fault handler is part of a huge page in order to
attempt a PMD sized PFN insertion into the VMA.
On builds with CONFIG_TRANSPARENT_HUGEPAGE enabled, if the mmap() user
address is PMD size aligned, if the GEM object is backed by shmem
buffers on mount points setting the 'huge=' option and if the shmem
backing store manages to allocate a huge folio, CPU mapping would then
benefit from significantly increased memcpy() performance. When these
conditions are met on a system with 2 MiB huge pages, an aligned copy
of 2 MiB would raise a single page fault instead of 4096.
v4:
- Implement map_pages instead of huge_fault
Signed-off-by: Loïc Molinari <loic.molinari@collabora.com>
---
drivers/gpu/drm/drm_gem_shmem_helper.c | 57 +++++++++++++++++++++-----
1 file changed, 46 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index e151262332f9..5d11bea573fe 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -580,6 +580,26 @@ static bool drm_gem_shmem_fault_is_valid(struct drm_gem_object *obj,
return true;
}
+static bool drm_gem_shmem_map_pmd(struct vm_fault *vmf, unsigned long addr,
+ struct page *page)
+{
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long paddr = pfn << PAGE_SHIFT;
+ bool aligned = (addr & ~PMD_MASK) == (paddr & ~PMD_MASK);
+
+ if (aligned &&
+ pmd_none(*vmf->pmd) &&
+ folio_test_pmd_mappable(page_folio(page))) {
+ pfn &= PMD_MASK >> PAGE_SHIFT;
+ if (vmf_insert_pfn_pmd(vmf, pfn, false) == VM_FAULT_NOPAGE)
+ return true;
+ }
+#endif
+
+ return false;
+}
+
static vm_fault_t drm_gem_shmem_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff,
pgoff_t end_pgoff)
@@ -587,6 +607,7 @@ static vm_fault_t drm_gem_shmem_map_pages(struct vm_fault *vmf,
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+ struct page **pages = shmem->pages;
unsigned long addr, pfn;
vm_fault_t ret;
@@ -598,15 +619,22 @@ static vm_fault_t drm_gem_shmem_map_pages(struct vm_fault *vmf,
if (unlikely(!drm_gem_shmem_fault_is_valid(obj, start_pgoff))) {
ret = VM_FAULT_SIGBUS;
- } else {
- /* Map a range of pages around the faulty address. */
- do {
- pfn = page_to_pfn(shmem->pages[start_pgoff]);
- ret = vmf_insert_pfn(vma, addr, pfn);
- addr += PAGE_SIZE;
- } while (++start_pgoff <= end_pgoff && ret == VM_FAULT_NOPAGE);
+ goto out;
}
+ if (drm_gem_shmem_map_pmd(vmf, addr, pages[start_pgoff])) {
+ ret = VM_FAULT_NOPAGE;
+ goto out;
+ }
+
+ /* Map a range of pages around the faulty address. */
+ do {
+ pfn = page_to_pfn(pages[start_pgoff]);
+ ret = vmf_insert_pfn(vma, addr, pfn);
+ addr += PAGE_SIZE;
+ } while (++start_pgoff <= end_pgoff && ret == VM_FAULT_NOPAGE);
+
+ out:
dma_resv_unlock(shmem->base.resv);
return ret;
@@ -617,8 +645,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
- struct page *page;
+ struct page **pages = shmem->pages;
pgoff_t page_offset;
+ unsigned long pfn;
vm_fault_t ret;
/* Offset to faulty address in the VMA (without the fake offset). */
@@ -628,12 +657,18 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
if (unlikely(!drm_gem_shmem_fault_is_valid(obj, page_offset))) {
ret = VM_FAULT_SIGBUS;
- } else {
- page = shmem->pages[page_offset];
+ goto out;
+ }
- ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
+ if (drm_gem_shmem_map_pmd(vmf, vmf->address, pages[page_offset])) {
+ ret = VM_FAULT_NOPAGE;
+ goto out;
}
+ pfn = page_to_pfn(pages[page_offset]);
+ ret = vmf_insert_pfn(vma, vmf->address, pfn);
+
+ out:
dma_resv_unlock(shmem->base.resv);
return ret;
--
2.47.3
On Wed, Oct 15, 2025 at 05:30:07PM +0200, Loïc Molinari wrote:
This looks fine, no need to resend to fix this, but if you'd written
the previous patch slightly differently, you'd've reduced the amount of
code you moved around in this patch, which would have made it easier to
review.
> + /* Map a range of pages around the faulty address. */
> + do {
> + pfn = page_to_pfn(pages[start_pgoff]);
> + ret = vmf_insert_pfn(vma, addr, pfn);
> + addr += PAGE_SIZE;
> + } while (++start_pgoff <= end_pgoff && ret == VM_FAULT_NOPAGE);
It looks to me like we have an opportunity to do better here by
adding a vmf_insert_pfns() interface. I don't think we should delay
your patch series to add it, but let's not forget to do that; it can
have very good performnce effects on ARM to use contptes.
> @@ -617,8 +645,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
[...]
>
> - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
> + if (drm_gem_shmem_map_pmd(vmf, vmf->address, pages[page_offset])) {
> + ret = VM_FAULT_NOPAGE;
> + goto out;
> }
Does this actually work?
Hi Matthew,
On 15/10/2025 19:27, Matthew Wilcox wrote:
> On Wed, Oct 15, 2025 at 05:30:07PM +0200, Loïc Molinari wrote:
>
> This looks fine, no need to resend to fix this, but if you'd written
> the previous patch slightly differently, you'd've reduced the amount of
> code you moved around in this patch, which would have made it easier to
> review.
>
>> + /* Map a range of pages around the faulty address. */
>> + do {
>> + pfn = page_to_pfn(pages[start_pgoff]);
>> + ret = vmf_insert_pfn(vma, addr, pfn);
>> + addr += PAGE_SIZE;
>> + } while (++start_pgoff <= end_pgoff && ret == VM_FAULT_NOPAGE);
>
> It looks to me like we have an opportunity to do better here by
> adding a vmf_insert_pfns() interface. I don't think we should delay
> your patch series to add it, but let's not forget to do that; it can
> have very good performnce effects on ARM to use contptes.
Agreed. I initially wanted to provide such an interface based on
set_ptes() to benefit from arm64 contptes but thought it'd better be a
distinct patch series.
>
>> @@ -617,8 +645,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
> [...]
>>
>> - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
>> + if (drm_gem_shmem_map_pmd(vmf, vmf->address, pages[page_offset])) {
>> + ret = VM_FAULT_NOPAGE;
>> + goto out;
>> }
>
> Does this actually work?
Yes, it does. Huge pages are successfully mapped from both map_pages and
fault handlers. Anything wrong with it?
There seems to be an another issue thought. There are failures [1], all
looking like that one [2]. I think it's because map_pages is called with
the RCU read lock taken and the DRM GEM map_pages handler must lock the
GEM object before accessing pages with dma_resv_lock(). The locking doc
says: "If it's not possible to reach a page without blocking, filesystem
should skip it.". Unlocking the RCU read lock in the handler seems wrong
and doing without a map_pages implementation would be unfortunate. What
would you recommend here?
Loïc
[1] https://patchwork.freedesktop.org/series/156001/
[2]
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_156001v1/bat-dg1-7/igt@vgem_basic@dmabuf-mmap.html
On Thu, Oct 16, 2025 at 01:17:07PM +0200, Loïc Molinari wrote:
> > It looks to me like we have an opportunity to do better here by
> > adding a vmf_insert_pfns() interface. I don't think we should delay
> > your patch series to add it, but let's not forget to do that; it can
> > have very good performnce effects on ARM to use contptes.
>
> Agreed. I initially wanted to provide such an interface based on set_ptes()
> to benefit from arm64 contptes but thought it'd better be a distinct patch
> series.
Agreed.
> >
> > > @@ -617,8 +645,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
> > [...]
> > > - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
> > > + if (drm_gem_shmem_map_pmd(vmf, vmf->address, pages[page_offset])) {
> > > + ret = VM_FAULT_NOPAGE;
> > > + goto out;
> > > }
> >
> > Does this actually work?
>
> Yes, it does. Huge pages are successfully mapped from both map_pages and
> fault handlers. Anything wrong with it?
No, I just wasn't sure that this would work correctly.
> There seems to be an another issue thought. There are failures [1], all
> looking like that one [2]. I think it's because map_pages is called with the
> RCU read lock taken and the DRM GEM map_pages handler must lock the GEM
> object before accessing pages with dma_resv_lock(). The locking doc says:
> "If it's not possible to reach a page without blocking, filesystem should
> skip it.". Unlocking the RCU read lock in the handler seems wrong and doing
> without a map_pages implementation would be unfortunate. What would you
> recommend here?
I'm not familiar with GEM locking, so let me describe briefly how
pagecache locking works.
Calling mmap bumps the refcount on the inode. That keeps the inode
around while the page fault handler runs. For each folio, we
get a refcount on it, then we trylock it. Then we map each page in the
folio.
So maybe you can trylock the GEM object? It isn't clear to me whether
you want finer grained locking than that. If the trylock fails, no big
deal, you just fall through to the fault path (with the slightly more
heavy-weight locking that allows you to sleep).
Hi Matthew,
On 17/10/2025 23:42, Matthew Wilcox wrote:
> On Thu, Oct 16, 2025 at 01:17:07PM +0200, Loïc Molinari wrote:
>>> It looks to me like we have an opportunity to do better here by
>>> adding a vmf_insert_pfns() interface. I don't think we should delay
>>> your patch series to add it, but let's not forget to do that; it can
>>> have very good performnce effects on ARM to use contptes.
>>
>> Agreed. I initially wanted to provide such an interface based on set_ptes()
>> to benefit from arm64 contptes but thought it'd better be a distinct patch
>> series.
>
> Agreed.
>
>>>
>>>> @@ -617,8 +645,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
>>> [...]
>>>> - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
>>>> + if (drm_gem_shmem_map_pmd(vmf, vmf->address, pages[page_offset])) {
>>>> + ret = VM_FAULT_NOPAGE;
>>>> + goto out;
>>>> }
>>>
>>> Does this actually work?
>>
>> Yes, it does. Huge pages are successfully mapped from both map_pages and
>> fault handlers. Anything wrong with it?
>
> No, I just wasn't sure that this would work correctly.
>
>> There seems to be an another issue thought. There are failures [1], all
>> looking like that one [2]. I think it's because map_pages is called with the
>> RCU read lock taken and the DRM GEM map_pages handler must lock the GEM
>> object before accessing pages with dma_resv_lock(). The locking doc says:
>> "If it's not possible to reach a page without blocking, filesystem should
>> skip it.". Unlocking the RCU read lock in the handler seems wrong and doing
>> without a map_pages implementation would be unfortunate. What would you
>> recommend here?
>
> I'm not familiar with GEM locking, so let me describe briefly how
> pagecache locking works.
>
> Calling mmap bumps the refcount on the inode. That keeps the inode
> around while the page fault handler runs. For each folio, we
> get a refcount on it, then we trylock it. Then we map each page in the
> folio.
>
> So maybe you can trylock the GEM object? It isn't clear to me whether
> you want finer grained locking than that. If the trylock fails, no big
> deal, you just fall through to the fault path (with the slightly more
> heavy-weight locking that allows you to sleep).
I proposed a series v5 using dma_resv_trylock(). This actually fails
later because the vmf_insert_pfn*() functions end up locking too. Not
sure how to fix that yet so I proposed a v6 with no fault-around path
and will get back to it (along with contptes) later.
Loïc
© 2016 - 2025 Red Hat, Inc.