drivers/char/mem.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-)
Attempt to map aligned to huge page size for private mapping which
could achieve performance gains, the mprot_tw4m in libMicro average
execution time on arm64:
- Test case: mprot_tw4m
- Before the patch: 22 us
- After the patch: 17 us
If THP config is not set, we fall back to system page size mappings.
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
---
v3:
- collect Acked-by
- factor out the #ifdef CONFIG_MMU in get_unmapped_area_zero(), per Lorenzo
- explicitly use #ifdef CONFIG_TRANSPARENT_HUGEPAGE, per Lorenzo and Matthew
v2:
- add comments on code suggested by Lorenzo
- use IS_ENABLED to check THP config
drivers/char/mem.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 48839958b0b1..b7c4dbe4d2c1 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -510,31 +510,44 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
return shmem_zero_setup(vma);
vma_set_anonymous(vma);
return 0;
}
+#ifndef CONFIG_MMU
+static unsigned long get_unmapped_area_zero(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ return -ENOSYS;
+}
+#else
static unsigned long get_unmapped_area_zero(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
-#ifdef CONFIG_MMU
if (flags & MAP_SHARED) {
/*
* mmap_zero() will call shmem_zero_setup() to create a file,
* so use shmem's get_unmapped_area in case it can be huge;
* and pass NULL for file as in mmap.c's get_unmapped_area(),
* so as not to confuse shmem with our handle on "/dev/zero".
*/
return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
}
- /* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
- return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
+ /*
+ * Otherwise flags & MAP_PRIVATE: with no shmem object beneath it,
+ * attempt to map aligned to huge page size if possible, otherwise we
+ * fall back to system page size mappings.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ return thp_get_unmapped_area(file, addr, len, pgoff, flags);
#else
- return -ENOSYS;
+ return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
#endif
}
+#endif
static ssize_t write_full(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return -ENOSPC;
--
2.43.0
On Thu, Jul 31, 2025 at 04:36:55PM +0800, Zhang Qilong wrote: > Attempt to map aligned to huge page size for private mapping which > could achieve performance gains, the mprot_tw4m in libMicro average > execution time on arm64: > - Test case: mprot_tw4m > - Before the patch: 22 us > - After the patch: 17 us > > If THP config is not set, we fall back to system page size mappings. > > Acked-by: David Hildenbrand <david@redhat.com> > Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com> LGTM, so: Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> I did some rudimentary testing on this with a simple MAP_PRIVATE thing which caused no issues and I observed a 20 MB mapping getting aligned and thus benefitting from 10 x huge PMD mappings vs. mainline benefitting from 9 due to misalignment. So feel free to add: Tested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cheers, Lorenzo > --- > v3: > - collect Acked-by > - factor out the #ifdef CONFIG_MMU in get_unmapped_area_zero(), per Lorenzo > - explicitly use #ifdef CONFIG_TRANSPARENT_HUGEPAGE, per Lorenzo and Matthew > > v2: > - add comments on code suggested by Lorenzo > - use IS_ENABLED to check THP config > > drivers/char/mem.c | 21 +++++++++++++++++---- > 1 file changed, 17 insertions(+), 4 deletions(-) > > diff --git a/drivers/char/mem.c b/drivers/char/mem.c > index 48839958b0b1..b7c4dbe4d2c1 100644 > --- a/drivers/char/mem.c > +++ b/drivers/char/mem.c > @@ -510,31 +510,44 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma) > return shmem_zero_setup(vma); > vma_set_anonymous(vma); > return 0; > } > > +#ifndef CONFIG_MMU OK, the convention in this file is to invert this check so this is fine. > +static unsigned long get_unmapped_area_zero(struct file *file, > + unsigned long addr, unsigned long len, > + unsigned long pgoff, unsigned long flags) > +{ > + return -ENOSYS; > +} > +#else > static unsigned long get_unmapped_area_zero(struct file *file, > unsigned long addr, unsigned long len, > unsigned long pgoff, unsigned long flags) > { > -#ifdef CONFIG_MMU > if (flags & MAP_SHARED) { > /* > * mmap_zero() will call shmem_zero_setup() to create a file, > * so use shmem's get_unmapped_area in case it can be huge; > * and pass NULL for file as in mmap.c's get_unmapped_area(), > * so as not to confuse shmem with our handle on "/dev/zero". > */ > return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags); > } > > - /* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */ > - return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); > + /* > + * Otherwise flags & MAP_PRIVATE: with no shmem object beneath it, > + * attempt to map aligned to huge page size if possible, otherwise we > + * fall back to system page size mappings. > + */ > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > + return thp_get_unmapped_area(file, addr, len, pgoff, flags); > #else > - return -ENOSYS; > + return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); > #endif > } > +#endif Nit, but can we add a /* CONFIG_MMU */ here please since we have a bunch of ifdef's and it's noisy. > > static ssize_t write_full(struct file *file, const char __user *buf, > size_t count, loff_t *ppos) > { > return -ENOSPC; > -- > 2.43.0 >
© 2016 - 2025 Red Hat, Inc.