This is so that flag setting can be resused later in other functions,
to reduce code duplication (including the s390 exception).
No functional change intended with this patch.
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
---
include/linux/huge_mm.h | 1 +
mm/khugepaged.c | 26 +++++++++++++++++---------
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2f190c90192d..23580a43787c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -431,6 +431,7 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
__split_huge_pud(__vma, __pud, __address); \
} while (0)
+int hugepage_set_vmflags(unsigned long *vm_flags, int advice);
int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
int advice);
int madvise_collapse(struct vm_area_struct *vma,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b04b6a770afe..ab3427c87422 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -346,8 +346,7 @@ struct attribute_group khugepaged_attr_group = {
};
#endif /* CONFIG_SYSFS */
-int hugepage_madvise(struct vm_area_struct *vma,
- unsigned long *vm_flags, int advice)
+int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
{
switch (advice) {
case MADV_HUGEPAGE:
@@ -358,16 +357,10 @@ int hugepage_madvise(struct vm_area_struct *vma,
* ignore the madvise to prevent qemu from causing a SIGSEGV.
*/
if (mm_has_pgste(vma->vm_mm))
- return 0;
+ return -EPERM;
#endif
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
- /*
- * If the vma become good for khugepaged to scan,
- * register it here without waiting a page fault that
- * may not happen any time soon.
- */
- khugepaged_enter_vma(vma, *vm_flags);
break;
case MADV_NOHUGEPAGE:
*vm_flags &= ~VM_HUGEPAGE;
@@ -383,6 +376,21 @@ int hugepage_madvise(struct vm_area_struct *vma,
return 0;
}
+int hugepage_madvise(struct vm_area_struct *vma,
+ unsigned long *vm_flags, int advice)
+{
+ if (advice == MADV_HUGEPAGE && !hugepage_set_vmflags(vm_flags, advice)) {
+ /*
+ * If the vma become good for khugepaged to scan,
+ * register it here without waiting a page fault that
+ * may not happen any time soon.
+ */
+ khugepaged_enter_vma(vma, *vm_flags);
+ }
+
+ return 0;
+}
+
int __init khugepaged_init(void)
{
mm_slot_cache = KMEM_CACHE(khugepaged_mm_slot, 0);
--
2.47.1
This commit message is really poor. You're also not mentioning that you're
changing s390 behaviour?
On Mon, May 19, 2025 at 11:29:53PM +0100, Usama Arif wrote:
> This is so that flag setting can be resused later in other functions,
Typo.
> to reduce code duplication (including the s390 exception).
>
> No functional change intended with this patch.
I'm pretty sure somebody reviewed that this should just be merged with whatever
uses this? I'm not sure this is all that valuable as you're not really changing
this structurally very much.
>
> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Yeah I'm not a fan of this patch, it's buggy and really unclear what the
purpose is here.
> ---
> include/linux/huge_mm.h | 1 +
> mm/khugepaged.c | 26 +++++++++++++++++---------
> 2 files changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 2f190c90192d..23580a43787c 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -431,6 +431,7 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
> __split_huge_pud(__vma, __pud, __address); \
> } while (0)
>
> +int hugepage_set_vmflags(unsigned long *vm_flags, int advice);
> int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
> int advice);
> int madvise_collapse(struct vm_area_struct *vma,
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index b04b6a770afe..ab3427c87422 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -346,8 +346,7 @@ struct attribute_group khugepaged_attr_group = {
> };
> #endif /* CONFIG_SYSFS */
>
> -int hugepage_madvise(struct vm_area_struct *vma,
> - unsigned long *vm_flags, int advice)
> +int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
> {
> switch (advice) {
> case MADV_HUGEPAGE:
> @@ -358,16 +357,10 @@ int hugepage_madvise(struct vm_area_struct *vma,
> * ignore the madvise to prevent qemu from causing a SIGSEGV.
> */
> if (mm_has_pgste(vma->vm_mm))
This is broken, you refer to vma which doesn't exist.
As the kernel bots are telling you...
> - return 0;
> + return -EPERM;
Why are you now returning an error?
This seems like a super broken way of making the caller return 0. Just make this
whole thing a bool return if you're going to treat it like a boolean function.
> #endif
> *vm_flags &= ~VM_NOHUGEPAGE;
> *vm_flags |= VM_HUGEPAGE;
> - /*
> - * If the vma become good for khugepaged to scan,
> - * register it here without waiting a page fault that
> - * may not happen any time soon.
> - */
> - khugepaged_enter_vma(vma, *vm_flags);
> break;
> case MADV_NOHUGEPAGE:
> *vm_flags &= ~VM_HUGEPAGE;
> @@ -383,6 +376,21 @@ int hugepage_madvise(struct vm_area_struct *vma,
> return 0;
> }
>
> +int hugepage_madvise(struct vm_area_struct *vma,
> + unsigned long *vm_flags, int advice)
> +{
> + if (advice == MADV_HUGEPAGE && !hugepage_set_vmflags(vm_flags, advice)) {
So now you've completely broken MADV_NOHUGEPAGE haven't you?
> + /*
> + * If the vma become good for khugepaged to scan,
> + * register it here without waiting a page fault that
> + * may not happen any time soon.
> + */
> + khugepaged_enter_vma(vma, *vm_flags);
> + }
> +
> + return 0;
> +}
> +
> int __init khugepaged_init(void)
> {
> mm_slot_cache = KMEM_CACHE(khugepaged_mm_slot, 0);
> --
> 2.47.1
>
On 20/05/2025 15:43, Lorenzo Stoakes wrote:
> This commit message is really poor. You're also not mentioning that you're
> changing s390 behaviour?
>
> On Mon, May 19, 2025 at 11:29:53PM +0100, Usama Arif wrote:
>> This is so that flag setting can be resused later in other functions,
>
> Typo.
>
>> to reduce code duplication (including the s390 exception).
>>
>> No functional change intended with this patch.
>
> I'm pretty sure somebody reviewed that this should just be merged with whatever
> uses this? I'm not sure this is all that valuable as you're not really changing
> this structurally very much.
>
Please see patch 2 where hugepage_set_vmflags is reused.
I was just trying to follow your feedback from previous revision that the flag
setting and s390 code part is duplicate code and should be common in the prctl
and madvise function.
I realize I messed up the arg not having vma and the order of the if statement.
>>
>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
>
> Yeah I'm not a fan of this patch, it's buggy and really unclear what the
> purpose is here.
No functional change was intended (I realized the order below broke it but can be fixed).
In the previous revision it was:
+ case PR_SET_THP_POLICY:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ if (mmap_write_lock_killable(me->mm))
+ return -EINTR;
+ switch (arg2) {
+ case PR_DEFAULT_MADV_HUGEPAGE:
+ if (!hugepage_global_enabled())
+ error = -EPERM;
+#ifdef CONFIG_S390
+ /*
+ * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
+ * can't handle this properly after s390_enable_sie, so we simply
+ * ignore the madvise to prevent qemu from causing a SIGSEGV.
+ */
+ else if (mm_has_pgste(vma->vm_mm))
+ error = -EPERM;
+#endif
+ else {
+ me->mm->def_flags &= ~VM_NOHUGEPAGE;
+ me->mm->def_flags |= VM_HUGEPAGE;
+ process_default_madv_hugepage(me->mm, MADV_HUGEPAGE);
+ }
+ break;
...
Now with this hugepage_set_vmflags, it would be
+ case PR_SET_THP_POLICY:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+ switch (arg2) {
+ case PR_DEFAULT_MADV_HUGEPAGE:
+ if (!hugepage_global_enabled())
+ error = -EPERM;
+ error = hugepage_set_vmflags(&mm->def_flags, MADV_HUGEPAGE);
+ if (!error)
+ process_default_madv_hugepage(mm, MADV_HUGEPAGE);
+ break;
I am happy to go with either of the methods above, but was just trying to
incorporate your feedback :)
Would you like the method from previous version?
>
>> ---
>> include/linux/huge_mm.h | 1 +
>> mm/khugepaged.c | 26 +++++++++++++++++---------
>> 2 files changed, 18 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 2f190c90192d..23580a43787c 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -431,6 +431,7 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
>> __split_huge_pud(__vma, __pud, __address); \
>> } while (0)
>>
>> +int hugepage_set_vmflags(unsigned long *vm_flags, int advice);
>> int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
>> int advice);
>> int madvise_collapse(struct vm_area_struct *vma,
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index b04b6a770afe..ab3427c87422 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -346,8 +346,7 @@ struct attribute_group khugepaged_attr_group = {
>> };
>> #endif /* CONFIG_SYSFS */
>>
>> -int hugepage_madvise(struct vm_area_struct *vma,
>> - unsigned long *vm_flags, int advice)
>> +int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
>
>
>> {
>> switch (advice) {
>> case MADV_HUGEPAGE:
>> @@ -358,16 +357,10 @@ int hugepage_madvise(struct vm_area_struct *vma,
>> * ignore the madvise to prevent qemu from causing a SIGSEGV.
>> */
>> if (mm_has_pgste(vma->vm_mm))
>
> This is broken, you refer to vma which doesn't exist.
>
> As the kernel bots are telling you...
>
>> - return 0;
>> + return -EPERM;
>
> Why are you now returning an error?
>
> This seems like a super broken way of making the caller return 0. Just make this
> whole thing a bool return if you're going to treat it like a boolean function.
>
>> #endif
>> *vm_flags &= ~VM_NOHUGEPAGE;
>> *vm_flags |= VM_HUGEPAGE;
>> - /*
>> - * If the vma become good for khugepaged to scan,
>> - * register it here without waiting a page fault that
>> - * may not happen any time soon.
>> - */
>> - khugepaged_enter_vma(vma, *vm_flags);
>> break;
>> case MADV_NOHUGEPAGE:
>> *vm_flags &= ~VM_HUGEPAGE;
>> @@ -383,6 +376,21 @@ int hugepage_madvise(struct vm_area_struct *vma,
>> return 0;
>> }
>>
>> +int hugepage_madvise(struct vm_area_struct *vma,
>> + unsigned long *vm_flags, int advice)
>> +{
>> + if (advice == MADV_HUGEPAGE && !hugepage_set_vmflags(vm_flags, advice)) {
>
> So now you've completely broken MADV_NOHUGEPAGE haven't you?
>
Yeah order needs to be reversed.
>> + /*
>> + * If the vma become good for khugepaged to scan,
>> + * register it here without waiting a page fault that
>> + * may not happen any time soon.
>> + */
>> + khugepaged_enter_vma(vma, *vm_flags);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> int __init khugepaged_init(void)
>> {
>> mm_slot_cache = KMEM_CACHE(khugepaged_mm_slot, 0);
>> --
>> 2.47.1
>>
On Tue, May 20, 2025 at 03:57:35PM +0100, Usama Arif wrote:
>
>
> On 20/05/2025 15:43, Lorenzo Stoakes wrote:
> > This commit message is really poor. You're also not mentioning that you're
> > changing s390 behaviour?
> >
> > On Mon, May 19, 2025 at 11:29:53PM +0100, Usama Arif wrote:
> >> This is so that flag setting can be resused later in other functions,
> >
> > Typo.
> >
> >> to reduce code duplication (including the s390 exception).
> >>
> >> No functional change intended with this patch.
> >
> > I'm pretty sure somebody reviewed that this should just be merged with whatever
> > uses this? I'm not sure this is all that valuable as you're not really changing
> > this structurally very much.
> >
>
> Please see patch 2 where hugepage_set_vmflags is reused.
> I was just trying to follow your feedback from previous revision that the flag
> setting and s390 code part is duplicate code and should be common in the prctl
> and madvise function.
Sure, but I think it'd be better as part of that patch probably. Perhaps I
was thinking of another comment in reference to a 'no function change'
remark.
>
> I realize I messed up the arg not having vma and the order of the if statement.
I am getting the strong impression here that you're rushing :)
I strongly suggest slowing thing down here. We're in RC7, this is (or
should be) an RFC for us to explore concepts. There's no need for it.
I appreciate your input and enthusiasm, but clearly rushing is causing you
to make mistakes. I get it, we've all been there.
But right now we have what 5 maybe? THP series in-flight at the same time,
all touching similar stuff, and it'll make everybody's lives easier and
less chaotic if we take a little more time to assess.
We are ultimately going to choose what's best for the kernel, there's no
'race' as to which series is 'ready' first.
>
> >>
> >> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> >
> > Yeah I'm not a fan of this patch, it's buggy and really unclear what the
> > purpose is here.
>
> No functional change was intended (I realized the order below broke it but can be fixed).
>
> In the previous revision it was:
> + case PR_SET_THP_POLICY:
> + if (arg3 || arg4 || arg5)
> + return -EINVAL;
> + if (mmap_write_lock_killable(me->mm))
> + return -EINTR;
> + switch (arg2) {
> + case PR_DEFAULT_MADV_HUGEPAGE:
> + if (!hugepage_global_enabled())
> + error = -EPERM;
> +#ifdef CONFIG_S390
> + /*
> + * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
> + * can't handle this properly after s390_enable_sie, so we simply
> + * ignore the madvise to prevent qemu from causing a SIGSEGV.
> + */
> + else if (mm_has_pgste(vma->vm_mm))
> + error = -EPERM;
> +#endif
> + else {
> + me->mm->def_flags &= ~VM_NOHUGEPAGE;
> + me->mm->def_flags |= VM_HUGEPAGE;
> + process_default_madv_hugepage(me->mm, MADV_HUGEPAGE);
> + }
> + break;
> ...
>
> Now with this hugepage_set_vmflags, it would be
>
> + case PR_SET_THP_POLICY:
> + if (arg3 || arg4 || arg5)
> + return -EINVAL;
> + if (mmap_write_lock_killable(mm))
> + return -EINTR;
> + switch (arg2) {
> + case PR_DEFAULT_MADV_HUGEPAGE:
> + if (!hugepage_global_enabled())
> + error = -EPERM;
> + error = hugepage_set_vmflags(&mm->def_flags, MADV_HUGEPAGE);
> + if (!error)
> + process_default_madv_hugepage(mm, MADV_HUGEPAGE);
> + break;
>
>
> I am happy to go with either of the methods above, but was just trying to
> incorporate your feedback :)
>
> Would you like the method from previous version?
I'm going to go ahead and overlook what would be in the UK 100% a
deployment of the finest British sarcasm here, and assume not intended :)
Very obviously we do not want to duplicate architecture-specific code. I'm
a little concerned you're ok with both (imagine if one changed but not the
other for instance), but clearly this series is unmergeable without
de-duplicating this.
My objections here are that you submitted a totally broken patch with a
poor commit message that seems that it could well be merged with the
subsequent patch.
I also have concerns about your levels of testing here - you completely
broken MADV_NOHUGEPAGE but didn't notice? Are you running self-tests? Do we
have one that'd pick that up? If not, can we have one like that?
Thanks!
>
> >
> >> ---
> >> include/linux/huge_mm.h | 1 +
> >> mm/khugepaged.c | 26 +++++++++++++++++---------
> >> 2 files changed, 18 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> >> index 2f190c90192d..23580a43787c 100644
> >> --- a/include/linux/huge_mm.h
> >> +++ b/include/linux/huge_mm.h
> >> @@ -431,6 +431,7 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
> >> __split_huge_pud(__vma, __pud, __address); \
> >> } while (0)
> >>
> >> +int hugepage_set_vmflags(unsigned long *vm_flags, int advice);
> >> int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
> >> int advice);
> >> int madvise_collapse(struct vm_area_struct *vma,
> >> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> >> index b04b6a770afe..ab3427c87422 100644
> >> --- a/mm/khugepaged.c
> >> +++ b/mm/khugepaged.c
> >> @@ -346,8 +346,7 @@ struct attribute_group khugepaged_attr_group = {
> >> };
> >> #endif /* CONFIG_SYSFS */
> >>
> >> -int hugepage_madvise(struct vm_area_struct *vma,
> >> - unsigned long *vm_flags, int advice)
> >> +int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
> >
> >
> >> {
> >> switch (advice) {
> >> case MADV_HUGEPAGE:
> >> @@ -358,16 +357,10 @@ int hugepage_madvise(struct vm_area_struct *vma,
> >> * ignore the madvise to prevent qemu from causing a SIGSEGV.
> >> */
> >> if (mm_has_pgste(vma->vm_mm))
> >
> > This is broken, you refer to vma which doesn't exist.
> >
> > As the kernel bots are telling you...
> >
> >> - return 0;
> >> + return -EPERM;
> >
> > Why are you now returning an error?
> >
> > This seems like a super broken way of making the caller return 0. Just make this
> > whole thing a bool return if you're going to treat it like a boolean function.
> >
> >> #endif
> >> *vm_flags &= ~VM_NOHUGEPAGE;
> >> *vm_flags |= VM_HUGEPAGE;
> >> - /*
> >> - * If the vma become good for khugepaged to scan,
> >> - * register it here without waiting a page fault that
> >> - * may not happen any time soon.
> >> - */
> >> - khugepaged_enter_vma(vma, *vm_flags);
> >> break;
> >> case MADV_NOHUGEPAGE:
> >> *vm_flags &= ~VM_HUGEPAGE;
> >> @@ -383,6 +376,21 @@ int hugepage_madvise(struct vm_area_struct *vma,
> >> return 0;
> >> }
> >>
> >> +int hugepage_madvise(struct vm_area_struct *vma,
> >> + unsigned long *vm_flags, int advice)
> >> +{
> >> + if (advice == MADV_HUGEPAGE && !hugepage_set_vmflags(vm_flags, advice)) {
> >
> > So now you've completely broken MADV_NOHUGEPAGE haven't you?
> >
>
> Yeah order needs to be reversed.
>
> >> + /*
> >> + * If the vma become good for khugepaged to scan,
> >> + * register it here without waiting a page fault that
> >> + * may not happen any time soon.
> >> + */
> >> + khugepaged_enter_vma(vma, *vm_flags);
> >> + }
> >> +
> >> + return 0;
> >> +}
> >> +
> >> int __init khugepaged_init(void)
> >> {
> >> mm_slot_cache = KMEM_CACHE(khugepaged_mm_slot, 0);
> >> --
> >> 2.47.1
> >>
>
On 20/05/2025 15:57, Usama Arif wrote:
>
>
> On 20/05/2025 15:43, Lorenzo Stoakes wrote:
>> This commit message is really poor. You're also not mentioning that you're
>> changing s390 behaviour?
>>
>> On Mon, May 19, 2025 at 11:29:53PM +0100, Usama Arif wrote:
>>> This is so that flag setting can be resused later in other functions,
>>
>> Typo.
>>
>>> to reduce code duplication (including the s390 exception).
>>>
>>> No functional change intended with this patch.
>>
>> I'm pretty sure somebody reviewed that this should just be merged with whatever
>> uses this? I'm not sure this is all that valuable as you're not really changing
>> this structurally very much.
>>
>
So I unfortunately never tested s390 build which the kernel bot is complaining.
So If I want to reuse hugepage_set_vmflags in patch 2 and 3 for the prctls,
the fix over here would be at the end.
If you don't like the approach of trying to abstract the flag setting away
and reusing it in prctl in this patch I can change it to the way in previous
revision and just do something like below. Happy with either approach and
can drop patch 1 if you prefer.
+ case PR_SET_THP_POLICY:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ if (mmap_write_lock_killable(me->mm))
+ return -EINTR;
+ switch (arg2) {
+ case PR_DEFAULT_MADV_HUGEPAGE:
+ if (!hugepage_global_enabled())
+ error = -EPERM;
+#ifdef CONFIG_S390
+ /*
+ * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
+ * can't handle this properly after s390_enable_sie, so we simply
+ * ignore the madvise to prevent qemu from causing a SIGSEGV.
+ */
+ else if (mm_has_pgste(vma->vm_mm))
+ error = -EPERM;
+#endif
+ else {
+ me->mm->def_flags &= ~VM_NOHUGEPAGE;
+ me->mm->def_flags |= VM_HUGEPAGE;
+ process_default_madv_hugepage(me->mm, MADV_HUGEPAGE);
+ }
+ break;
+ default:
+ error = -EINVAL;
+ }
+ mmap_write_unlock(me->mm);
+ break;
Thanks!
diff for fixing this patch:
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b24a2e0ae642..e5176afaaffe 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -432,7 +432,7 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
} while (0)
void process_default_madv_hugepage(struct mm_struct *mm, int advice);
-int hugepage_set_vmflags(unsigned long *vm_flags, int advice);
+int hugepage_set_vmflags(struct mm_struct* mm, unsigned long *vm_flags, int advice);
int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
int advice);
int madvise_collapse(struct vm_area_struct *vma,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ab3427c87422..b6c9ed6bb442 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -346,7 +346,7 @@ struct attribute_group khugepaged_attr_group = {
};
#endif /* CONFIG_SYSFS */
-int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
+int hugepage_set_vmflags(struct mm_struct * mm, unsigned long *vm_flags, int advice)
{
switch (advice) {
case MADV_HUGEPAGE:
@@ -356,8 +356,8 @@ int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
* can't handle this properly after s390_enable_sie, so we simply
* ignore the madvise to prevent qemu from causing a SIGSEGV.
*/
- if (mm_has_pgste(vma->vm_mm))
- return -EPERM;
+ if (mm_has_pgste(mm))
+ return 0;
#endif
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
@@ -373,13 +373,14 @@ int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
break;
}
- return 0;
+ return 1;
}
int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
- if (advice == MADV_HUGEPAGE && !hugepage_set_vmflags(vm_flags, advice)) {
+ if (hugepage_set_vmflags(vma->vm_mm, vm_flags, advice)
+ && advice == MADV_HUGEPAGE) {
/*
* If the vma become good for khugepaged to scan,
* register it here without waiting a page fault that
Hi Usama,
kernel test robot noticed the following build errors:
[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on perf-tools-next/perf-tools-next tip/perf/core perf-tools/perf-tools linus/master acme/perf/core v6.15-rc7 next-20250516]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Usama-Arif/mm-khugepaged-extract-vm-flag-setting-outside-of-hugepage_madvise/20250520-063452
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20250519223307.3601786-2-usamaarif642%40gmail.com
patch subject: [PATCH v3 1/7] mm: khugepaged: extract vm flag setting outside of hugepage_madvise
config: s390-randconfig-002-20250520 (https://download.01.org/0day-ci/archive/20250520/202505201734.8Fyk3qKi-lkp@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project f819f46284f2a79790038e1f6649172789734ae8)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250520/202505201734.8Fyk3qKi-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202505201734.8Fyk3qKi-lkp@intel.com/
All errors (new ones prefixed by >>):
>> mm/khugepaged.c:359:20: error: use of undeclared identifier 'vma'
359 | if (mm_has_pgste(vma->vm_mm))
| ^
1 error generated.
vim +/vma +359 mm/khugepaged.c
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 348
d2a8f83f11a4ba Usama Arif 2025-05-19 349 int hugepage_set_vmflags(unsigned long *vm_flags, int advice)
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 350 {
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 351 switch (advice) {
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 352 case MADV_HUGEPAGE:
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 353 #ifdef CONFIG_S390
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 354 /*
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 355 * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 356 * can't handle this properly after s390_enable_sie, so we simply
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 357 * ignore the madvise to prevent qemu from causing a SIGSEGV.
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 358 */
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 @359 if (mm_has_pgste(vma->vm_mm))
d2a8f83f11a4ba Usama Arif 2025-05-19 360 return -EPERM;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 361 #endif
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 362 *vm_flags &= ~VM_NOHUGEPAGE;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 363 *vm_flags |= VM_HUGEPAGE;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 364 break;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 365 case MADV_NOHUGEPAGE:
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 366 *vm_flags &= ~VM_HUGEPAGE;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 367 *vm_flags |= VM_NOHUGEPAGE;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 368 /*
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 369 * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 370 * this vma even if we leave the mm registered in khugepaged if
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 371 * it got registered before VM_NOHUGEPAGE was set.
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 372 */
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 373 break;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 374 }
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 375
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 376 return 0;
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 377 }
b46e756f5e4703 Kirill A. Shutemov 2016-07-26 378
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.