Consolidate the repeated spin_lock/set_bit/clear_bit pattern in
anon_enabled_store() into a new change_anon_orders() helper that
loops over an orders[] array, setting the bit for the selected mode
and clearing the others.
Introduce enum enabled_mode and enabled_mode_strings[] to be shared
with enabled_store() in a subsequent patch.
Use sysfs_match_string() with the enabled_mode_strings[] table to
replace the if/else chain of sysfs_streq() calls.
The helper uses test_and_set_bit()/test_and_clear_bit() to track
whether the state actually changed, so start_stop_khugepaged() is
only called when needed. When the mode is unchanged,
set_recommended_min_free_kbytes() is called directly to preserve
the watermark recalculation behavior of the original code.
Signed-off-by: Breno Leitao <leitao@debian.org>
---
mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 32 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8e2746ea74adf..19619213f54d1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj,
return sysfs_emit(buf, "%s\n", output);
}
+enum enabled_mode {
+ ENABLED_ALWAYS,
+ ENABLED_MADVISE,
+ ENABLED_INHERIT,
+ ENABLED_NEVER,
+};
+
+static const char * const enabled_mode_strings[] = {
+ [ENABLED_ALWAYS] = "always",
+ [ENABLED_MADVISE] = "madvise",
+ [ENABLED_INHERIT] = "inherit",
+ [ENABLED_NEVER] = "never",
+};
+
static ssize_t enabled_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
@@ -515,48 +529,54 @@ static ssize_t anon_enabled_show(struct kobject *kobj,
return sysfs_emit(buf, "%s\n", output);
}
+static bool change_anon_orders(int order, enum enabled_mode mode)
+{
+ static unsigned long *orders[] = {
+ &huge_anon_orders_always,
+ &huge_anon_orders_madvise,
+ &huge_anon_orders_inherit,
+ };
+ bool changed = false;
+ int i;
+
+ spin_lock(&huge_anon_orders_lock);
+ for (i = 0; i < ARRAY_SIZE(orders); i++) {
+ if (i == mode)
+ changed |= !test_and_set_bit(order, orders[i]);
+ else
+ changed |= test_and_clear_bit(order, orders[i]);
+ }
+ spin_unlock(&huge_anon_orders_lock);
+
+ return changed;
+}
+
static ssize_t anon_enabled_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
int order = to_thpsize(kobj)->order;
- ssize_t ret = count;
+ int mode;
- if (sysfs_streq(buf, "always")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_inherit);
- clear_bit(order, &huge_anon_orders_madvise);
- set_bit(order, &huge_anon_orders_always);
- spin_unlock(&huge_anon_orders_lock);
- } else if (sysfs_streq(buf, "inherit")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_always);
- clear_bit(order, &huge_anon_orders_madvise);
- set_bit(order, &huge_anon_orders_inherit);
- spin_unlock(&huge_anon_orders_lock);
- } else if (sysfs_streq(buf, "madvise")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_always);
- clear_bit(order, &huge_anon_orders_inherit);
- set_bit(order, &huge_anon_orders_madvise);
- spin_unlock(&huge_anon_orders_lock);
- } else if (sysfs_streq(buf, "never")) {
- spin_lock(&huge_anon_orders_lock);
- clear_bit(order, &huge_anon_orders_always);
- clear_bit(order, &huge_anon_orders_inherit);
- clear_bit(order, &huge_anon_orders_madvise);
- spin_unlock(&huge_anon_orders_lock);
- } else
- ret = -EINVAL;
+ mode = sysfs_match_string(enabled_mode_strings, buf);
+ if (mode < 0)
+ return -EINVAL;
- if (ret > 0) {
- int err;
+ if (change_anon_orders(order, mode)) {
+ int err = start_stop_khugepaged();
- err = start_stop_khugepaged();
if (err)
- ret = err;
+ return err;
+ } else {
+ /*
+ * Recalculate watermarks even when the mode didn't
+ * change, as the previous code always called
+ * start_stop_khugepaged() which does this internally.
+ */
+ set_recommended_min_free_kbytes();
}
- return ret;
+
+ return count;
}
static struct kobj_attribute anon_enabled_attr =
--
2.47.3
On Thu, Mar 05, 2026 at 06:04:54AM -0800, Breno Leitao wrote:
> Consolidate the repeated spin_lock/set_bit/clear_bit pattern in
> anon_enabled_store() into a new change_anon_orders() helper that
> loops over an orders[] array, setting the bit for the selected mode
> and clearing the others.
>
> Introduce enum enabled_mode and enabled_mode_strings[] to be shared
> with enabled_store() in a subsequent patch.
>
> Use sysfs_match_string() with the enabled_mode_strings[] table to
> replace the if/else chain of sysfs_streq() calls.
>
> The helper uses test_and_set_bit()/test_and_clear_bit() to track
> whether the state actually changed, so start_stop_khugepaged() is
> only called when needed. When the mode is unchanged,
> set_recommended_min_free_kbytes() is called directly to preserve
> the watermark recalculation behavior of the original code.
>
> Signed-off-by: Breno Leitao <leitao@debian.org>
This is nice, you've improved my idea :)
I know you change another instance of this in 3/3, but I wonder if there are
other cases like this in the THP code we can fixup too?
Anyway, passing tests locally and LGTM so:
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
> ---
> mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 52 insertions(+), 32 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 8e2746ea74adf..19619213f54d1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
> +enum enabled_mode {
> + ENABLED_ALWAYS,
> + ENABLED_MADVISE,
> + ENABLED_INHERIT,
> + ENABLED_NEVER,
> +};
> +
> +static const char * const enabled_mode_strings[] = {
> + [ENABLED_ALWAYS] = "always",
> + [ENABLED_MADVISE] = "madvise",
> + [ENABLED_INHERIT] = "inherit",
> + [ENABLED_NEVER] = "never",
> +};
> +
> static ssize_t enabled_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
> @@ -515,48 +529,54 @@ static ssize_t anon_enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
> +static bool change_anon_orders(int order, enum enabled_mode mode)
> +{
> + static unsigned long *orders[] = {
> + &huge_anon_orders_always,
> + &huge_anon_orders_madvise,
> + &huge_anon_orders_inherit,
> + };
> + bool changed = false;
> + int i;
> +
> + spin_lock(&huge_anon_orders_lock);
> + for (i = 0; i < ARRAY_SIZE(orders); i++) {
> + if (i == mode)
> + changed |= !test_and_set_bit(order, orders[i]);
> + else
> + changed |= test_and_clear_bit(order, orders[i]);
> + }
> + spin_unlock(&huge_anon_orders_lock);
> +
> + return changed;
> +}
> +
> static ssize_t anon_enabled_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
> {
> int order = to_thpsize(kobj)->order;
> - ssize_t ret = count;
> + int mode;
>
> - if (sysfs_streq(buf, "always")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_inherit);
> - clear_bit(order, &huge_anon_orders_madvise);
> - set_bit(order, &huge_anon_orders_always);
> - spin_unlock(&huge_anon_orders_lock);
> - } else if (sysfs_streq(buf, "inherit")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_always);
> - clear_bit(order, &huge_anon_orders_madvise);
> - set_bit(order, &huge_anon_orders_inherit);
> - spin_unlock(&huge_anon_orders_lock);
> - } else if (sysfs_streq(buf, "madvise")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_always);
> - clear_bit(order, &huge_anon_orders_inherit);
> - set_bit(order, &huge_anon_orders_madvise);
> - spin_unlock(&huge_anon_orders_lock);
> - } else if (sysfs_streq(buf, "never")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_always);
> - clear_bit(order, &huge_anon_orders_inherit);
> - clear_bit(order, &huge_anon_orders_madvise);
> - spin_unlock(&huge_anon_orders_lock);
> - } else
> - ret = -EINVAL;
> + mode = sysfs_match_string(enabled_mode_strings, buf);
> + if (mode < 0)
> + return -EINVAL;
>
> - if (ret > 0) {
> - int err;
> + if (change_anon_orders(order, mode)) {
> + int err = start_stop_khugepaged();
>
> - err = start_stop_khugepaged();
> if (err)
> - ret = err;
> + return err;
> + } else {
> + /*
> + * Recalculate watermarks even when the mode didn't
> + * change, as the previous code always called
> + * start_stop_khugepaged() which does this internally.
> + */
> + set_recommended_min_free_kbytes();
> }
> - return ret;
> +
> + return count;
> }
>
> static struct kobj_attribute anon_enabled_attr =
>
> --
> 2.47.3
>
On Fri, Mar 06, 2026 at 11:32:44AM +0000, Lorenzo Stoakes (Oracle) wrote: > On Thu, Mar 05, 2026 at 06:04:54AM -0800, Breno Leitao wrote: > > Signed-off-by: Breno Leitao <leitao@debian.org> > > This is nice, you've improved my idea :) Thanks! Your idea was a good starting point, and digging deeper led me to sysfs_match_string(), which I wasn't familiar with before — it turned out to be exactly what we needed here. Having the modes in anon_enabled_mode_strings[] also opens the door to further simplification. I haven't thought it through completely yet, but I suspect we could consolidate quite a few of the _store()/_show() helpers as a follow-up. > I know you change another instance of this in 3/3, but I wonder if there are > other cases like this in the THP code we can fixup too? Yes, there are several other candidates — defrag_store, the ones Baolin Wang reported, shmem_enabled_store(), and thpsize_shmem_enabled_store() among them. I'd prefer to address those in a separate patch series once this one lands, if that works for everyone.
On 3/5/26 10:04 PM, Breno Leitao wrote:
> Consolidate the repeated spin_lock/set_bit/clear_bit pattern in
> anon_enabled_store() into a new change_anon_orders() helper that
> loops over an orders[] array, setting the bit for the selected mode
> and clearing the others.
>
> Introduce enum enabled_mode and enabled_mode_strings[] to be shared
> with enabled_store() in a subsequent patch.
>
> Use sysfs_match_string() with the enabled_mode_strings[] table to
> replace the if/else chain of sysfs_streq() calls.
>
> The helper uses test_and_set_bit()/test_and_clear_bit() to track
> whether the state actually changed, so start_stop_khugepaged() is
> only called when needed. When the mode is unchanged,
> set_recommended_min_free_kbytes() is called directly to preserve
> the watermark recalculation behavior of the original code.
>
> Signed-off-by: Breno Leitao <leitao@debian.org>
> ---
> mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 52 insertions(+), 32 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 8e2746ea74adf..19619213f54d1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
> +enum enabled_mode {
> + ENABLED_ALWAYS,
> + ENABLED_MADVISE,
> + ENABLED_INHERIT,
> + ENABLED_NEVER,
> +};
> +
> +static const char * const enabled_mode_strings[] = {
> + [ENABLED_ALWAYS] = "always",
> + [ENABLED_MADVISE] = "madvise",
> + [ENABLED_INHERIT] = "inherit",
> + [ENABLED_NEVER] = "never",
> +};
> +
> static ssize_t enabled_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
> @@ -515,48 +529,54 @@ static ssize_t anon_enabled_show(struct kobject *kobj,
> return sysfs_emit(buf, "%s\n", output);
> }
>
> +static bool change_anon_orders(int order, enum enabled_mode mode)
> +{
> + static unsigned long *orders[] = {
> + &huge_anon_orders_always,
> + &huge_anon_orders_madvise,
> + &huge_anon_orders_inherit,
> + };
> + bool changed = false;
> + int i;
> +
> + spin_lock(&huge_anon_orders_lock);
> + for (i = 0; i < ARRAY_SIZE(orders); i++) {
> + if (i == mode)
> + changed |= !test_and_set_bit(order, orders[i]);
> + else
> + changed |= test_and_clear_bit(order, orders[i]);
> + }
> + spin_unlock(&huge_anon_orders_lock);
> +
> + return changed;
> +}
> +
> static ssize_t anon_enabled_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
> {
> int order = to_thpsize(kobj)->order;
> - ssize_t ret = count;
> + int mode;
>
> - if (sysfs_streq(buf, "always")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_inherit);
> - clear_bit(order, &huge_anon_orders_madvise);
> - set_bit(order, &huge_anon_orders_always);
> - spin_unlock(&huge_anon_orders_lock);
> - } else if (sysfs_streq(buf, "inherit")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_always);
> - clear_bit(order, &huge_anon_orders_madvise);
> - set_bit(order, &huge_anon_orders_inherit);
> - spin_unlock(&huge_anon_orders_lock);
> - } else if (sysfs_streq(buf, "madvise")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_always);
> - clear_bit(order, &huge_anon_orders_inherit);
> - set_bit(order, &huge_anon_orders_madvise);
> - spin_unlock(&huge_anon_orders_lock);
> - } else if (sysfs_streq(buf, "never")) {
> - spin_lock(&huge_anon_orders_lock);
> - clear_bit(order, &huge_anon_orders_always);
> - clear_bit(order, &huge_anon_orders_inherit);
> - clear_bit(order, &huge_anon_orders_madvise);
> - spin_unlock(&huge_anon_orders_lock);
> - } else
> - ret = -EINVAL;
> + mode = sysfs_match_string(enabled_mode_strings, buf);
> + if (mode < 0)
> + return -EINVAL;
>
> - if (ret > 0) {
> - int err;
> + if (change_anon_orders(order, mode)) {
> + int err = start_stop_khugepaged();
Thanks for the cleanup, and the code looks better.
>
> - err = start_stop_khugepaged();
> if (err)
> - ret = err;
> + return err;
> + } else {
> + /*
> + * Recalculate watermarks even when the mode didn't
> + * change, as the previous code always called
> + * start_stop_khugepaged() which does this internally.
> + */
> + set_recommended_min_free_kbytes();
However, this won’t fix your issue. You will still get lots of warning
messages even if no hugepage options are changed.
On Fri, Mar 06, 2026 at 02:07:50PM +0800, Baolin Wang wrote:
> > + mode = sysfs_match_string(enabled_mode_strings, buf);
> > + if (mode < 0)
> > + return -EINVAL;
> > - if (ret > 0) {
> > - int err;
> > + if (change_anon_orders(order, mode)) {
> > + int err = start_stop_khugepaged();
>
> Thanks for the cleanup, and the code looks better.
>
> > - err = start_stop_khugepaged();
> > if (err)
> > - ret = err;
> > + return err;
> > + } else {
> > + /*
> > + * Recalculate watermarks even when the mode didn't
> > + * change, as the previous code always called
> > + * start_stop_khugepaged() which does this internally.
> > + */
> > + set_recommended_min_free_kbytes();
>
> However, this won't fix your issue. You will still get lots of warning
> messages even if no hugepage options are changed.
Correct — this was part of the earlier discussion, where the suggestion
was to retain the set_recommended_min_free_kbytes() call even when it
is a no-op.
As for the warnings, I see a few possible approaches:
* Remove them entirely — are they providing any real value?
* Apply rate limiting to reduce the noise
* Something else?
Thanks for bringing this up,
--breno
On Fri, Mar 06, 2026 at 02:30:22AM -0800, Breno Leitao wrote:
> On Fri, Mar 06, 2026 at 02:07:50PM +0800, Baolin Wang wrote:
> > > + mode = sysfs_match_string(enabled_mode_strings, buf);
> > > + if (mode < 0)
> > > + return -EINVAL;
> > > - if (ret > 0) {
> > > - int err;
> > > + if (change_anon_orders(order, mode)) {
> > > + int err = start_stop_khugepaged();
> >
> > Thanks for the cleanup, and the code looks better.
> >
> > > - err = start_stop_khugepaged();
> > > if (err)
> > > - ret = err;
> > > + return err;
> > > + } else {
> > > + /*
> > > + * Recalculate watermarks even when the mode didn't
> > > + * change, as the previous code always called
> > > + * start_stop_khugepaged() which does this internally.
> > > + */
> > > + set_recommended_min_free_kbytes();
> >
> > However, this won't fix your issue. You will still get lots of warning
> > messages even if no hugepage options are changed.
>
> Correct — this was part of the earlier discussion, where the suggestion
> was to retain the set_recommended_min_free_kbytes() call even when it
> is a no-op.
>
> As for the warnings, I see a few possible approaches:
>
> * Remove them entirely — are they providing any real value?
> * Apply rate limiting to reduce the noise
I think rate limiting is the sensible approach.
> * Something else?
>
> Thanks for bringing this up,
> --breno
>
Cheers, Lorenzo
On 3/6/26 7:22 PM, Lorenzo Stoakes (Oracle) wrote:
> On Fri, Mar 06, 2026 at 02:30:22AM -0800, Breno Leitao wrote:
>> On Fri, Mar 06, 2026 at 02:07:50PM +0800, Baolin Wang wrote:
>>>> + mode = sysfs_match_string(enabled_mode_strings, buf);
>>>> + if (mode < 0)
>>>> + return -EINVAL;
>>>> - if (ret > 0) {
>>>> - int err;
>>>> + if (change_anon_orders(order, mode)) {
>>>> + int err = start_stop_khugepaged();
>>>
>>> Thanks for the cleanup, and the code looks better.
>>>
>>>> - err = start_stop_khugepaged();
>>>> if (err)
>>>> - ret = err;
>>>> + return err;
>>>> + } else {
>>>> + /*
>>>> + * Recalculate watermarks even when the mode didn't
>>>> + * change, as the previous code always called
>>>> + * start_stop_khugepaged() which does this internally.
>>>> + */
>>>> + set_recommended_min_free_kbytes();
>>>
>>> However, this won't fix your issue. You will still get lots of warning
>>> messages even if no hugepage options are changed.
>>
>> Correct — this was part of the earlier discussion, where the suggestion
>> was to retain the set_recommended_min_free_kbytes() call even when it
>> is a no-op.
>>
>> As for the warnings, I see a few possible approaches:
>>
>> * Remove them entirely — are they providing any real value?
>> * Apply rate limiting to reduce the noise
>
> I think rate limiting is the sensible approach.
+1. Make sense to me.
© 2016 - 2026 Red Hat, Inc.