Consolidate the repeated spin_lock/set_bit/clear_bit pattern in
anon_enabled_store() into a new change_anon_orders() helper that
loops over an orders[] array, setting the bit for the selected mode
and clearing the others.
Introduce enum anon_enabled_mode and anon_enabled_mode_strings[]
for the per-order anon THP setting.
Use sysfs_match_string() with the anon_enabled_mode_strings[] table
to replace the if/else chain of sysfs_streq() calls.
The helper uses test_and_set_bit()/test_and_clear_bit() to track
whether the state actually changed, so start_stop_khugepaged() is
only called when needed. When the mode is unchanged,
set_recommended_min_free_kbytes() is called directly to preserve
the watermark recalculation behavior of the original code.
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 32 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8e2746ea74adf..e19dda5aaf195 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj,
return sysfs_emit(buf, "%s\n", output);
}
+enum anon_enabled_mode {
+ ANON_ENABLED_ALWAYS = 0,
+ ANON_ENABLED_MADVISE = 1,
+ ANON_ENABLED_INHERIT = 2,
+ ANON_ENABLED_NEVER = 3,
+};
+
+static const char * const anon_enabled_mode_strings[] = {
+ [ANON_ENABLED_ALWAYS] = "always",
+ [ANON_ENABLED_MADVISE] = "madvise",
+ [ANON_ENABLED_INHERIT] = "inherit",
+ [ANON_ENABLED_NEVER] = "never",
+};
+
static ssize_t enabled_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
@@ -515,48 +529,54 @@ static ssize_t anon_enabled_show(struct kobject *kobj,
return sysfs_emit(buf, "%s\n", output);
}
+static bool set_anon_enabled_mode(int order, enum anon_enabled_mode mode)
+{
+ static unsigned long *enabled_orders[] = {
+ &huge_anon_orders_always,
+ &huge_anon_orders_madvise,
+ &huge_anon_orders_inherit,
+ };
+ enum anon_enabled_mode m;
+ bool changed = false;
+
+ spin_lock(&huge_anon_orders_lock);
+ for (m = 0; m < ARRAY_SIZE(enabled_orders); m++) {
+ if (m == mode)
+ changed |= !__test_and_set_bit(order, enabled_orders[m]);
+ else
+ changed |= __test_and_clear_bit(order, enabled_orders[m]);
+ }
+ spin_unlock(&huge_anon_orders_lock);
+
+ return changed;
+}
+
static ssize_t anon_enabled_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
int order = to_thpsize(kobj)->order;
- ssize_t ret = count;
+ int mode;
- if (sysfs_streq(buf, "always")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_inherit);
- clear_bit(order, &huge_anon_orders_madvise);
- set_bit(order, &huge_anon_orders_always);
- spin_unlock(&huge_anon_orders_lock);
- } else if (sysfs_streq(buf, "inherit")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_always);
- clear_bit(order, &huge_anon_orders_madvise);
- set_bit(order, &huge_anon_orders_inherit);
- spin_unlock(&huge_anon_orders_lock);
- } else if (sysfs_streq(buf, "madvise")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_always);
- clear_bit(order, &huge_anon_orders_inherit);
- set_bit(order, &huge_anon_orders_madvise);
- spin_unlock(&huge_anon_orders_lock);
- } else if (sysfs_streq(buf, "never")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_always);
- clear_bit(order, &huge_anon_orders_inherit);
- clear_bit(order, &huge_anon_orders_madvise);
- spin_unlock(&huge_anon_orders_lock);
- } else
- ret = -EINVAL;
+ mode = sysfs_match_string(anon_enabled_mode_strings, buf);
+ if (mode < 0)
+ return -EINVAL;
- if (ret > 0) {
- int err;
+ if (set_anon_enabled_mode(order, mode)) {
+ int err = start_stop_khugepaged();
- err = start_stop_khugepaged();
if (err)
- ret = err;
+ return err;
+ } else {
+ /*
+ * Recalculate watermarks even when the mode didn't
+ * change, as the previous code always called
+ * start_stop_khugepaged() which does this internally.
+ */
+ set_recommended_min_free_kbytes();
}
- return ret;
+
+ return count;
}
static struct kobj_attribute anon_enabled_attr =
--
2.52.0
On Tue, Mar 10, 2026 at 10:57:08AM -0700, Breno Leitao wrote:
>Consolidate the repeated spin_lock/set_bit/clear_bit pattern in
>anon_enabled_store() into a new change_anon_orders() helper that
>loops over an orders[] array, setting the bit for the selected mode
>and clearing the others.
>
>Introduce enum anon_enabled_mode and anon_enabled_mode_strings[]
>for the per-order anon THP setting.
>
>Use sysfs_match_string() with the anon_enabled_mode_strings[] table
>to replace the if/else chain of sysfs_streq() calls.
>
>The helper uses test_and_set_bit()/test_and_clear_bit() to track
>whether the state actually changed, so start_stop_khugepaged() is
>only called when needed. When the mode is unchanged,
>set_recommended_min_free_kbytes() is called directly to preserve
>the watermark recalculation behavior of the original code.
>
>Signed-off-by: Breno Leitao <leitao@debian.org>
>Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
>---
> mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 52 insertions(+), 32 deletions(-)
>
>diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>index 8e2746ea74adf..e19dda5aaf195 100644
>--- a/mm/huge_memory.c
>+++ b/mm/huge_memory.c
>@@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
>+enum anon_enabled_mode {
>+ ANON_ENABLED_ALWAYS = 0,
>+ ANON_ENABLED_MADVISE = 1,
>+ ANON_ENABLED_INHERIT = 2,
>+ ANON_ENABLED_NEVER = 3,
>+};
>+
>+static const char * const anon_enabled_mode_strings[] = {
>+ [ANON_ENABLED_ALWAYS] = "always",
>+ [ANON_ENABLED_MADVISE] = "madvise",
>+ [ANON_ENABLED_INHERIT] = "inherit",
>+ [ANON_ENABLED_NEVER] = "never",
>+};
>+
Just one trivial thing, maybe keep the sequence as the sysfs output?
Currently the output of /sys/kernel/transparent_hugepage/hugepages-xxxkB is
always inherit madvise [never]
But no strong opinion on this.
> static ssize_t enabled_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
>@@ -515,48 +529,54 @@ static ssize_t anon_enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
>+static bool set_anon_enabled_mode(int order, enum anon_enabled_mode mode)
>+{
>+ static unsigned long *enabled_orders[] = {
>+ &huge_anon_orders_always,
>+ &huge_anon_orders_madvise,
>+ &huge_anon_orders_inherit,
>+ };
>+ enum anon_enabled_mode m;
>+ bool changed = false;
>+
>+ spin_lock(&huge_anon_orders_lock);
>+ for (m = 0; m < ARRAY_SIZE(enabled_orders); m++) {
>+ if (m == mode)
>+ changed |= !__test_and_set_bit(order, enabled_orders[m]);
>+ else
>+ changed |= __test_and_clear_bit(order, enabled_orders[m]);
>+ }
>+ spin_unlock(&huge_anon_orders_lock);
>+
>+ return changed;
>+}
>+
Nice cleanup, so
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
> static ssize_t anon_enabled_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
> {
> int order = to_thpsize(kobj)->order;
>- ssize_t ret = count;
>+ int mode;
>
>- if (sysfs_streq(buf, "always")) {
>- spin_lock(&huge_anon_orders_lock);
>- clear_bit(order, &huge_anon_orders_inherit);
>- clear_bit(order, &huge_anon_orders_madvise);
>- set_bit(order, &huge_anon_orders_always);
>- spin_unlock(&huge_anon_orders_lock);
>- } else if (sysfs_streq(buf, "inherit")) {
>- spin_lock(&huge_anon_orders_lock);
>- clear_bit(order, &huge_anon_orders_always);
>- clear_bit(order, &huge_anon_orders_madvise);
>- set_bit(order, &huge_anon_orders_inherit);
>- spin_unlock(&huge_anon_orders_lock);
>- } else if (sysfs_streq(buf, "madvise")) {
>- spin_lock(&huge_anon_orders_lock);
>- clear_bit(order, &huge_anon_orders_always);
>- clear_bit(order, &huge_anon_orders_inherit);
>- set_bit(order, &huge_anon_orders_madvise);
>- spin_unlock(&huge_anon_orders_lock);
>- } else if (sysfs_streq(buf, "never")) {
>- spin_lock(&huge_anon_orders_lock);
>- clear_bit(order, &huge_anon_orders_always);
>- clear_bit(order, &huge_anon_orders_inherit);
>- clear_bit(order, &huge_anon_orders_madvise);
>- spin_unlock(&huge_anon_orders_lock);
>- } else
>- ret = -EINVAL;
>+ mode = sysfs_match_string(anon_enabled_mode_strings, buf);
>+ if (mode < 0)
>+ return -EINVAL;
>
>- if (ret > 0) {
>- int err;
>+ if (set_anon_enabled_mode(order, mode)) {
>+ int err = start_stop_khugepaged();
>
>- err = start_stop_khugepaged();
> if (err)
>- ret = err;
>+ return err;
>+ } else {
>+ /*
>+ * Recalculate watermarks even when the mode didn't
>+ * change, as the previous code always called
>+ * start_stop_khugepaged() which does this internally.
>+ */
>+ set_recommended_min_free_kbytes();
> }
>- return ret;
>+
>+ return count;
> }
>
> static struct kobj_attribute anon_enabled_attr =
>
>--
>2.52.0
>
--
Wei Yang
Help you, Help me
On 2026/3/11 11:12, Wei Yang wrote:
> On Tue, Mar 10, 2026 at 10:57:08AM -0700, Breno Leitao wrote:
>> Consolidate the repeated spin_lock/set_bit/clear_bit pattern in
>> anon_enabled_store() into a new change_anon_orders() helper that
>> loops over an orders[] array, setting the bit for the selected mode
>> and clearing the others.
>>
>> Introduce enum anon_enabled_mode and anon_enabled_mode_strings[]
>> for the per-order anon THP setting.
>>
>> Use sysfs_match_string() with the anon_enabled_mode_strings[] table
>> to replace the if/else chain of sysfs_streq() calls.
>>
>> The helper uses test_and_set_bit()/test_and_clear_bit() to track
>> whether the state actually changed, so start_stop_khugepaged() is
>> only called when needed. When the mode is unchanged,
>> set_recommended_min_free_kbytes() is called directly to preserve
>> the watermark recalculation behavior of the original code.
>>
>> Signed-off-by: Breno Leitao <leitao@debian.org>
>> Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
>> ---
>> mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++---------------------
>> 1 file changed, 52 insertions(+), 32 deletions(-)
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 8e2746ea74adf..e19dda5aaf195 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj,
>> return sysfs_emit(buf, "%s\n", output);
>> }
>>
>> +enum anon_enabled_mode {
>> + ANON_ENABLED_ALWAYS = 0,
>> + ANON_ENABLED_MADVISE = 1,
>> + ANON_ENABLED_INHERIT = 2,
>> + ANON_ENABLED_NEVER = 3,
>> +};
>> +
>> +static const char * const anon_enabled_mode_strings[] = {
>> + [ANON_ENABLED_ALWAYS] = "always",
>> + [ANON_ENABLED_MADVISE] = "madvise",
>> + [ANON_ENABLED_INHERIT] = "inherit",
>> + [ANON_ENABLED_NEVER] = "never",
>> +};
>> +
>
> Just one trivial thing, maybe keep the sequence as the sysfs output?
>
> Currently the output of /sys/kernel/transparent_hugepage/hugepages-xxxkB is
>
> always inherit madvise [never]
>
> But no strong opinion on this.
Yeah, keeping the enum order consistent with the sysfs output looks
sensible :)
Apart from that, LGTM.
Reviewed-by: Lance Yang <lance.yang@linux.dev>
On Wed, Mar 11, 2026 at 12:52:02PM +0800, Lance Yang wrote:
> On 2026/3/11 11:12, Wei Yang wrote:
> > On Tue, Mar 10, 2026 at 10:57:08AM -0700, Breno Leitao wrote:
> > > +enum anon_enabled_mode {
> > > + ANON_ENABLED_ALWAYS = 0,
> > > + ANON_ENABLED_MADVISE = 1,
> > > + ANON_ENABLED_INHERIT = 2,
> > > + ANON_ENABLED_NEVER = 3,
> > > +};
> > > +
> > > +static const char * const anon_enabled_mode_strings[] = {
> > > + [ANON_ENABLED_ALWAYS] = "always",
> > > + [ANON_ENABLED_MADVISE] = "madvise",
> > > + [ANON_ENABLED_INHERIT] = "inherit",
> > > + [ANON_ENABLED_NEVER] = "never",
> > > +};
> > > +
> >
> > Just one trivial thing, maybe keep the sequence as the sysfs output?
> >
> > Currently the output of /sys/kernel/transparent_hugepage/hugepages-xxxkB is
> >
> > always inherit madvise [never]
> >
> > But no strong opinion on this.
>
> Yeah, keeping the enum order consistent with the sysfs output looks sensible
> :)
Good point, I'll adjust the order to match the sysfs output and send out
a new version.
Thanks for the feedback!
--breno
On 3/11/26 1:57 AM, Breno Leitao wrote: > Consolidate the repeated spin_lock/set_bit/clear_bit pattern in > anon_enabled_store() into a new change_anon_orders() helper that > loops over an orders[] array, setting the bit for the selected mode > and clearing the others. > > Introduce enum anon_enabled_mode and anon_enabled_mode_strings[] > for the per-order anon THP setting. > > Use sysfs_match_string() with the anon_enabled_mode_strings[] table > to replace the if/else chain of sysfs_streq() calls. > > The helper uses test_and_set_bit()/test_and_clear_bit() to track > whether the state actually changed, so start_stop_khugepaged() is > only called when needed. When the mode is unchanged, > set_recommended_min_free_kbytes() is called directly to preserve > the watermark recalculation behavior of the original code. > > Signed-off-by: Breno Leitao <leitao@debian.org> > Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org> > --- LGTM. Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
On Wed, Mar 11, 2026 at 1:58 AM Breno Leitao <leitao@debian.org> wrote:
>
> Consolidate the repeated spin_lock/set_bit/clear_bit pattern in
> anon_enabled_store() into a new change_anon_orders() helper that
> loops over an orders[] array, setting the bit for the selected mode
> and clearing the others.
>
> Introduce enum anon_enabled_mode and anon_enabled_mode_strings[]
> for the per-order anon THP setting.
>
> Use sysfs_match_string() with the anon_enabled_mode_strings[] table
> to replace the if/else chain of sysfs_streq() calls.
>
> The helper uses test_and_set_bit()/test_and_clear_bit() to track
> whether the state actually changed, so start_stop_khugepaged() is
> only called when needed. When the mode is unchanged,
> set_recommended_min_free_kbytes() is called directly to preserve
> the watermark recalculation behavior of the original code.
>
> Signed-off-by: Breno Leitao <leitao@debian.org>
> Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
> ---
> mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 52 insertions(+), 32 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 8e2746ea74adf..e19dda5aaf195 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
> +enum anon_enabled_mode {
> + ANON_ENABLED_ALWAYS = 0,
> + ANON_ENABLED_MADVISE = 1,
> + ANON_ENABLED_INHERIT = 2,
> + ANON_ENABLED_NEVER = 3,
> +};
> +
> +static const char * const anon_enabled_mode_strings[] = {
> + [ANON_ENABLED_ALWAYS] = "always",
> + [ANON_ENABLED_MADVISE] = "madvise",
> + [ANON_ENABLED_INHERIT] = "inherit",
> + [ANON_ENABLED_NEVER] = "never",
> +};
Do we really need anon_enabled_mode here? Would it be ok
to just use ...
static const char * const anon_enabled_mode_strings[] = {
"always", "madvise", "inherit", "never",
};
Nobody else cares about the values like ANON_ENABLED_ALWAYS
except the sysfs interface, right? Maybe we don't need to
force people to understand the enum.
> +
> static ssize_t enabled_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
> @@ -515,48 +529,54 @@ static ssize_t anon_enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
> +static bool set_anon_enabled_mode(int order, enum anon_enabled_mode mode)
> +{
> + static unsigned long *enabled_orders[] = {
> + &huge_anon_orders_always,
> + &huge_anon_orders_madvise,
> + &huge_anon_orders_inherit,
> + };
Is there a reason enabled_orders needs to be static?
Could it instead be allocated on the stack?
Thanks
Barry
On Wed, 11 Mar 2026 04:11:36 +0800 Barry Song <21cnbao@gmail.com> wrote:
> > +static bool set_anon_enabled_mode(int order, enum anon_enabled_mode mode)
> > +{
> > + static unsigned long *enabled_orders[] = {
> > + &huge_anon_orders_always,
> > + &huge_anon_orders_madvise,
> > + &huge_anon_orders_inherit,
> > + };
>
> Is there a reason enabled_orders needs to be static?
> Could it instead be allocated on the stack?
Could, but then the array would be rebuilt each time the function is called.
The idea here is to save a bit of runtime cost.
On Wed, Mar 11, 2026 at 4:17 AM Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Wed, 11 Mar 2026 04:11:36 +0800 Barry Song <21cnbao@gmail.com> wrote:
>
> > > +static bool set_anon_enabled_mode(int order, enum anon_enabled_mode mode)
> > > +{
> > > + static unsigned long *enabled_orders[] = {
> > > + &huge_anon_orders_always,
> > > + &huge_anon_orders_madvise,
> > > + &huge_anon_orders_inherit,
> > > + };
> >
> > Is there a reason enabled_orders needs to be static?
> > Could it instead be allocated on the stack?
>
> Could, but then the array would be rebuilt each time the function is called.
> The idea here is to save a bit of runtime cost.
I see. My understanding is that this function is very cold,
so we don't really care about its performance. But I'm fine
with keeping it static if you prefer.
Thanks
Barry
On 10 Mar 2026, at 13:57, Breno Leitao wrote: > Consolidate the repeated spin_lock/set_bit/clear_bit pattern in > anon_enabled_store() into a new change_anon_orders() helper that > loops over an orders[] array, setting the bit for the selected mode > and clearing the others. > > Introduce enum anon_enabled_mode and anon_enabled_mode_strings[] > for the per-order anon THP setting. > > Use sysfs_match_string() with the anon_enabled_mode_strings[] table > to replace the if/else chain of sysfs_streq() calls. > > The helper uses test_and_set_bit()/test_and_clear_bit() to track > whether the state actually changed, so start_stop_khugepaged() is > only called when needed. When the mode is unchanged, > set_recommended_min_free_kbytes() is called directly to preserve > the watermark recalculation behavior of the original code. > > Signed-off-by: Breno Leitao <leitao@debian.org> > Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org> > --- > mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++--------------------- > 1 file changed, 52 insertions(+), 32 deletions(-) > Nice cleanup. Reviewed-by: Zi Yan <ziy@nvidia.com> Best Regards, Yan, Zi
© 2016 - 2026 Red Hat, Inc.