[PATCH] erofs: introduce nolargefolio mount option

Chao Yu posted 1 patch 1 month ago
Documentation/filesystems/erofs.rst | 1 +
fs/erofs/inode.c                    | 3 ++-
fs/erofs/internal.h                 | 1 +
fs/erofs/super.c                    | 8 +++++++-
4 files changed, 11 insertions(+), 2 deletions(-)
[PATCH] erofs: introduce nolargefolio mount option
Posted by Chao Yu 1 month ago
This patch introduces a new mount option 'nolargefolio' for EROFS.
When this option is specified, large folio will be disabled by
default for all inodes, this option can be used for environments
where large folio resources are limited, it's necessary to only
let specified user to allocate large folios on demand.

Signed-off-by: Chao Yu <chao@kernel.org>
---
 Documentation/filesystems/erofs.rst | 1 +
 fs/erofs/inode.c                    | 3 ++-
 fs/erofs/internal.h                 | 1 +
 fs/erofs/super.c                    | 8 +++++++-
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index fe06308e546c..d692a1d9f32c 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -137,6 +137,7 @@ fsoffset=%llu          Specify block-aligned filesystem offset for the primary d
 inode_share            Enable inode page sharing for this filesystem.  Inodes with
                        identical content within the same domain ID can share the
                        page cache.
+nolargefolio           Disable large folio support for all files.
 ===================    =========================================================
 
 Sysfs Entries
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 4b3d21402e10..26361e86a354 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -254,7 +254,8 @@ static int erofs_fill_inode(struct inode *inode)
 		return 0;
 	}
 
-	mapping_set_large_folios(inode->i_mapping);
+	if (!test_opt(&EROFS_SB(inode->i_sb)->opt, NO_LARGE_FOLIO))
+		mapping_set_large_folios(inode->i_mapping);
 	aops = erofs_get_aops(inode, false);
 	if (IS_ERR(aops))
 		return PTR_ERR(aops);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index a4f0a42cf8c3..b5d98410c699 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -177,6 +177,7 @@ struct erofs_sb_info {
 #define EROFS_MOUNT_DAX_NEVER		0x00000080
 #define EROFS_MOUNT_DIRECT_IO		0x00000100
 #define EROFS_MOUNT_INODE_SHARE		0x00000200
+#define EROFS_MOUNT_NO_LARGE_FOLIO	0x00000400
 
 #define clear_opt(opt, option)	((opt)->mount_opt &= ~EROFS_MOUNT_##option)
 #define set_opt(opt, option)	((opt)->mount_opt |= EROFS_MOUNT_##option)
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 972a0c82198d..a353369d4db8 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -390,7 +390,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
 enum {
 	Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
 	Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
-	Opt_inode_share,
+	Opt_inode_share, Opt_nolargefolio,
 };
 
 static const struct constant_table erofs_param_cache_strategy[] = {
@@ -419,6 +419,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
 	fsparam_flag_no("directio",	Opt_directio),
 	fsparam_u64("fsoffset",		Opt_fsoffset),
 	fsparam_flag("inode_share",	Opt_inode_share),
+	fsparam_flag("nolargefolio",	Opt_nolargefolio),
 	{}
 };
 
@@ -541,6 +542,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
 		else
 			set_opt(&sbi->opt, INODE_SHARE);
 		break;
+	case Opt_nolargefolio:
+		set_opt(&sbi->opt, NO_LARGE_FOLIO);
+		break;
 	}
 	return 0;
 }
@@ -1105,6 +1109,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
 	if (test_opt(opt, INODE_SHARE))
 		seq_puts(seq, ",inode_share");
+	if (test_opt(opt, NO_LARGE_FOLIO))
+		seq_puts(seq, ",nolargefolio");
 	return 0;
 }
 
-- 
2.49.0
Re: [PATCH] erofs: introduce nolargefolio mount option
Posted by Gao Xiang 1 month ago
Hi Chao,

(+cc -fsdevel, willy, Jan kara)

On 2026/3/9 10:30, Chao Yu wrote:
> This patch introduces a new mount option 'nolargefolio' for EROFS.
> When this option is specified, large folio will be disabled by
> default for all inodes, this option can be used for environments
> where large folio resources are limited, it's necessary to only
> let specified user to allocate large folios on demand.

For this kind of options, I think more real backgrounds
about avoiding high-order allocations are needed in the
commit message (at least for later reference) also like
what I observed in:
https://android-review.googlesource.com/c/kernel/common/+/3877981

because the entire community tends to enable large folios
unconditionally if possible.  Without enough clarification,
even I merge this, there will be endless questions again
and again about this.

And Jan once raised up if it should be a user interface
or auto-tuning one:
https://lore.kernel.org/r/z2ule3ilnnpoevo5mvt3intvjtuyud7vg3pbfauon47fhr4owa@giaehpbie4a5

My question is that if the needs are real, I wonder if
it should be a vfs generic decision instead (because
it's not due to the filesystem restriction but due to
real system memory pressure or heavy workload for
example).  However, if the answer is that others don't
really care about this, I'm fine to leave it as an
erofs-specific option as long as the actual case is
clear in the commit message.

Thanks,
Gao Xiang


> 
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
>   Documentation/filesystems/erofs.rst | 1 +
>   fs/erofs/inode.c                    | 3 ++-
>   fs/erofs/internal.h                 | 1 +
>   fs/erofs/super.c                    | 8 +++++++-
>   4 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
> index fe06308e546c..d692a1d9f32c 100644
> --- a/Documentation/filesystems/erofs.rst
> +++ b/Documentation/filesystems/erofs.rst
> @@ -137,6 +137,7 @@ fsoffset=%llu          Specify block-aligned filesystem offset for the primary d
>   inode_share            Enable inode page sharing for this filesystem.  Inodes with
>                          identical content within the same domain ID can share the
>                          page cache.
> +nolargefolio           Disable large folio support for all files.
>   ===================    =========================================================
>   
>   Sysfs Entries
> diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> index 4b3d21402e10..26361e86a354 100644
> --- a/fs/erofs/inode.c
> +++ b/fs/erofs/inode.c
> @@ -254,7 +254,8 @@ static int erofs_fill_inode(struct inode *inode)
>   		return 0;
>   	}
>   
> -	mapping_set_large_folios(inode->i_mapping);
> +	if (!test_opt(&EROFS_SB(inode->i_sb)->opt, NO_LARGE_FOLIO))
> +		mapping_set_large_folios(inode->i_mapping);
>   	aops = erofs_get_aops(inode, false);
>   	if (IS_ERR(aops))
>   		return PTR_ERR(aops);
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index a4f0a42cf8c3..b5d98410c699 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -177,6 +177,7 @@ struct erofs_sb_info {
>   #define EROFS_MOUNT_DAX_NEVER		0x00000080
>   #define EROFS_MOUNT_DIRECT_IO		0x00000100
>   #define EROFS_MOUNT_INODE_SHARE		0x00000200
> +#define EROFS_MOUNT_NO_LARGE_FOLIO	0x00000400
>   
>   #define clear_opt(opt, option)	((opt)->mount_opt &= ~EROFS_MOUNT_##option)
>   #define set_opt(opt, option)	((opt)->mount_opt |= EROFS_MOUNT_##option)
> diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> index 972a0c82198d..a353369d4db8 100644
> --- a/fs/erofs/super.c
> +++ b/fs/erofs/super.c
> @@ -390,7 +390,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
>   enum {
>   	Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
>   	Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
> -	Opt_inode_share,
> +	Opt_inode_share, Opt_nolargefolio,
>   };
>   
>   static const struct constant_table erofs_param_cache_strategy[] = {
> @@ -419,6 +419,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
>   	fsparam_flag_no("directio",	Opt_directio),
>   	fsparam_u64("fsoffset",		Opt_fsoffset),
>   	fsparam_flag("inode_share",	Opt_inode_share),
> +	fsparam_flag("nolargefolio",	Opt_nolargefolio),
>   	{}
>   };
>   
> @@ -541,6 +542,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
>   		else
>   			set_opt(&sbi->opt, INODE_SHARE);
>   		break;
> +	case Opt_nolargefolio:
> +		set_opt(&sbi->opt, NO_LARGE_FOLIO);
> +		break;
>   	}
>   	return 0;
>   }
> @@ -1105,6 +1109,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
>   		seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
>   	if (test_opt(opt, INODE_SHARE))
>   		seq_puts(seq, ",inode_share");
> +	if (test_opt(opt, NO_LARGE_FOLIO))
> +		seq_puts(seq, ",nolargefolio");
>   	return 0;
>   }
>
Re: [PATCH] erofs: introduce nolargefolio mount option
Posted by Chao Yu 1 month ago
Xiang,

On 3/9/26 11:03, Gao Xiang wrote:
> Hi Chao,
> 
> (+cc -fsdevel, willy, Jan kara)
> 
> On 2026/3/9 10:30, Chao Yu wrote:
>> This patch introduces a new mount option 'nolargefolio' for EROFS.
>> When this option is specified, large folio will be disabled by
>> default for all inodes, this option can be used for environments
>> where large folio resources are limited, it's necessary to only
>> let specified user to allocate large folios on demand.
> 
> For this kind of options, I think more real backgrounds
> about avoiding high-order allocations are needed in the
> commit message (at least for later reference) also like
> what I observed in:
> https://android-review.googlesource.com/c/kernel/common/+/3877981

Basically, the background is about contention scenario on large folio allocation,
it's among multiple users including EROFS in Android-system, as it's related to
internal scene of product, so I can not provide more details now, I'm sorry
about that, but I'm glad to discuss based on the background and pain point once
if I can share more, let's see. :)

> 
> because the entire community tends to enable large folios
> unconditionally if possible.  Without enough clarification,
> even I merge this, there will be endless questions again
> and again about this.
> 
> And Jan once raised up if it should be a user interface
> or auto-tuning one:
> https://lore.kernel.org/r/z2ule3ilnnpoevo5mvt3intvjtuyud7vg3pbfauon47fhr4owa@giaehpbie4a5

Thanks for sharing this anyway, I didn't notice this previously...

Thanks,

> > My question is that if the needs are real, I wonder if
> it should be a vfs generic decision instead (because
> it's not due to the filesystem restriction but due to
> real system memory pressure or heavy workload for
> example).  However, if the answer is that others don't
> really care about this, I'm fine to leave it as an
> erofs-specific option as long as the actual case is
> clear in the commit message.
> > Thanks,
> Gao Xiang
> 
> 
>>
>> Signed-off-by: Chao Yu <chao@kernel.org>
>> ---
>>   Documentation/filesystems/erofs.rst | 1 +
>>   fs/erofs/inode.c                    | 3 ++-
>>   fs/erofs/internal.h                 | 1 +
>>   fs/erofs/super.c                    | 8 +++++++-
>>   4 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
>> index fe06308e546c..d692a1d9f32c 100644
>> --- a/Documentation/filesystems/erofs.rst
>> +++ b/Documentation/filesystems/erofs.rst
>> @@ -137,6 +137,7 @@ fsoffset=%llu          Specify block-aligned filesystem offset for the primary d
>>   inode_share            Enable inode page sharing for this filesystem.  Inodes with
>>                          identical content within the same domain ID can share the
>>                          page cache.
>> +nolargefolio           Disable large folio support for all files.
>>   ===================    =========================================================
>>     Sysfs Entries
>> diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
>> index 4b3d21402e10..26361e86a354 100644
>> --- a/fs/erofs/inode.c
>> +++ b/fs/erofs/inode.c
>> @@ -254,7 +254,8 @@ static int erofs_fill_inode(struct inode *inode)
>>           return 0;
>>       }
>>   -    mapping_set_large_folios(inode->i_mapping);
>> +    if (!test_opt(&EROFS_SB(inode->i_sb)->opt, NO_LARGE_FOLIO))
>> +        mapping_set_large_folios(inode->i_mapping);
>>       aops = erofs_get_aops(inode, false);
>>       if (IS_ERR(aops))
>>           return PTR_ERR(aops);
>> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
>> index a4f0a42cf8c3..b5d98410c699 100644
>> --- a/fs/erofs/internal.h
>> +++ b/fs/erofs/internal.h
>> @@ -177,6 +177,7 @@ struct erofs_sb_info {
>>   #define EROFS_MOUNT_DAX_NEVER        0x00000080
>>   #define EROFS_MOUNT_DIRECT_IO        0x00000100
>>   #define EROFS_MOUNT_INODE_SHARE        0x00000200
>> +#define EROFS_MOUNT_NO_LARGE_FOLIO    0x00000400
>>     #define clear_opt(opt, option)    ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
>>   #define set_opt(opt, option)    ((opt)->mount_opt |= EROFS_MOUNT_##option)
>> diff --git a/fs/erofs/super.c b/fs/erofs/super.c
>> index 972a0c82198d..a353369d4db8 100644
>> --- a/fs/erofs/super.c
>> +++ b/fs/erofs/super.c
>> @@ -390,7 +390,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
>>   enum {
>>       Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
>>       Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
>> -    Opt_inode_share,
>> +    Opt_inode_share, Opt_nolargefolio,
>>   };
>>     static const struct constant_table erofs_param_cache_strategy[] = {
>> @@ -419,6 +419,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
>>       fsparam_flag_no("directio",    Opt_directio),
>>       fsparam_u64("fsoffset",        Opt_fsoffset),
>>       fsparam_flag("inode_share",    Opt_inode_share),
>> +    fsparam_flag("nolargefolio",    Opt_nolargefolio),
>>       {}
>>   };
>>   @@ -541,6 +542,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
>>           else
>>               set_opt(&sbi->opt, INODE_SHARE);
>>           break;
>> +    case Opt_nolargefolio:
>> +        set_opt(&sbi->opt, NO_LARGE_FOLIO);
>> +        break;
>>       }
>>       return 0;
>>   }
>> @@ -1105,6 +1109,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
>>           seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
>>       if (test_opt(opt, INODE_SHARE))
>>           seq_puts(seq, ",inode_share");
>> +    if (test_opt(opt, NO_LARGE_FOLIO))
>> +        seq_puts(seq, ",nolargefolio");
>>       return 0;
>>   }
>>   
> 

Re: [PATCH] erofs: introduce nolargefolio mount option
Posted by Gao Xiang 1 month ago

On 2026/3/10 14:43, Chao Yu wrote:
> Xiang,
> 
> On 3/9/26 11:03, Gao Xiang wrote:
>> Hi Chao,
>>
>> (+cc -fsdevel, willy, Jan kara)
>>
>> On 2026/3/9 10:30, Chao Yu wrote:
>>> This patch introduces a new mount option 'nolargefolio' for EROFS.
>>> When this option is specified, large folio will be disabled by
>>> default for all inodes, this option can be used for environments
>>> where large folio resources are limited, it's necessary to only
>>> let specified user to allocate large folios on demand.
>>
>> For this kind of options, I think more real backgrounds
>> about avoiding high-order allocations are needed in the
>> commit message (at least for later reference) also like
>> what I observed in:
>> https://android-review.googlesource.com/c/kernel/common/+/3877981
> 
> Basically, the background is about contention scenario on large folio allocation,
> it's among multiple users including EROFS in Android-system, as it's related to
> internal scene of product, so I can not provide more details now, I'm sorry
> about that, but I'm glad to discuss based on the background and pain point once
> if I can share more, let's see. :)

Understood, but I think it's hard to justify an upstream
solution without a public load for discussion.  Anyway,
I can imagine some real workloads which large folios could
cause unnecessary pressure since I once worked for Android,
but I think others need an explicit one anyway to justify
this.

As Matthew and Jan mentioned, it's hard to add a per-fs
knob like this.  If it's Android-specific and no possible
public infos, I suggest leaving the changes Android
downstream for now, until the workloads can be made public.

Thanks,
Gao Xiang
Re: [PATCH] erofs: introduce nolargefolio mount option
Posted by Chao Yu 1 month ago
On 3/10/26 15:02, Gao Xiang wrote:
> 
> 
> On 2026/3/10 14:43, Chao Yu wrote:
>> Xiang,
>>
>> On 3/9/26 11:03, Gao Xiang wrote:
>>> Hi Chao,
>>>
>>> (+cc -fsdevel, willy, Jan kara)
>>>
>>> On 2026/3/9 10:30, Chao Yu wrote:
>>>> This patch introduces a new mount option 'nolargefolio' for EROFS.
>>>> When this option is specified, large folio will be disabled by
>>>> default for all inodes, this option can be used for environments
>>>> where large folio resources are limited, it's necessary to only
>>>> let specified user to allocate large folios on demand.
>>>
>>> For this kind of options, I think more real backgrounds
>>> about avoiding high-order allocations are needed in the
>>> commit message (at least for later reference) also like
>>> what I observed in:
>>> https://android-review.googlesource.com/c/kernel/common/+/3877981
>>
>> Basically, the background is about contention scenario on large folio allocation,
>> it's among multiple users including EROFS in Android-system, as it's related to
>> internal scene of product, so I can not provide more details now, I'm sorry
>> about that, but I'm glad to discuss based on the background and pain point once
>> if I can share more, let's see. :)
> 
> Understood, but I think it's hard to justify an upstream
> solution without a public load for discussion.  Anyway,
> I can imagine some real workloads which large folios could
> cause unnecessary pressure since I once worked for Android,
> but I think others need an explicit one anyway to justify
> this.

Yes,

> 
> As Matthew and Jan mentioned, it's hard to add a per-fs
> knob like this.  If it's Android-specific and no possible
> public infos, I suggest leaving the changes Android
> downstream for now, until the workloads can be made public.

Sure, I can understand that we're not going to accept per-fs change
on large folio policy, as if there is conclusion or agreement about
this in previous discussion from community.

Thanks for the suggestion, I can take a look from downstream side.

Thanks,

> 
> Thanks,
> Gao Xiang

Re: [PATCH] erofs: introduce nolargefolio mount option
Posted by Matthew Wilcox 1 month ago
On Mon, Mar 09, 2026 at 11:03:43AM +0800, Gao Xiang wrote:
> Hi Chao,
> 
> (+cc -fsdevel, willy, Jan kara)
> 
> On 2026/3/9 10:30, Chao Yu wrote:
> > This patch introduces a new mount option 'nolargefolio' for EROFS.
> > When this option is specified, large folio will be disabled by
> > default for all inodes, this option can be used for environments
> > where large folio resources are limited, it's necessary to only
> > let specified user to allocate large folios on demand.
> 
> For this kind of options, I think more real backgrounds
> about avoiding high-order allocations are needed in the
> commit message (at least for later reference) also like
> what I observed in:
> https://android-review.googlesource.com/c/kernel/common/+/3877981
> 
> because the entire community tends to enable large folios
> unconditionally if possible.  Without enough clarification,
> even I merge this, there will be endless questions again
> and again about this.

This was a decision made early on.  If the heuristics are wrong, they
need to be fixed.  It's very disappointing to see people try to sneak
these changes into individual filesystems.  Thanks for catching it and
preventing it from sneaking in.  Chao is not a new contributor; he
should know better than this by now.

> And Jan once raised up if it should be a user interface
> or auto-tuning one:
> https://lore.kernel.org/r/z2ule3ilnnpoevo5mvt3intvjtuyud7vg3pbfauon47fhr4owa@giaehpbie4a5
> 
> My question is that if the needs are real, I wonder if
> it should be a vfs generic decision instead (because
> it's not due to the filesystem restriction but due to
> real system memory pressure or heavy workload for
> example).  However, if the answer is that others don't
> really care about this, I'm fine to leave it as an
> erofs-specific option as long as the actual case is
> clear in the commit message.
> 
> Thanks,
> Gao Xiang
> 
> 
> > 
> > Signed-off-by: Chao Yu <chao@kernel.org>
> > ---
> >   Documentation/filesystems/erofs.rst | 1 +
> >   fs/erofs/inode.c                    | 3 ++-
> >   fs/erofs/internal.h                 | 1 +
> >   fs/erofs/super.c                    | 8 +++++++-
> >   4 files changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
> > index fe06308e546c..d692a1d9f32c 100644
> > --- a/Documentation/filesystems/erofs.rst
> > +++ b/Documentation/filesystems/erofs.rst
> > @@ -137,6 +137,7 @@ fsoffset=%llu          Specify block-aligned filesystem offset for the primary d
> >   inode_share            Enable inode page sharing for this filesystem.  Inodes with
> >                          identical content within the same domain ID can share the
> >                          page cache.
> > +nolargefolio           Disable large folio support for all files.
> >   ===================    =========================================================
> >   Sysfs Entries
> > diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> > index 4b3d21402e10..26361e86a354 100644
> > --- a/fs/erofs/inode.c
> > +++ b/fs/erofs/inode.c
> > @@ -254,7 +254,8 @@ static int erofs_fill_inode(struct inode *inode)
> >   		return 0;
> >   	}
> > -	mapping_set_large_folios(inode->i_mapping);
> > +	if (!test_opt(&EROFS_SB(inode->i_sb)->opt, NO_LARGE_FOLIO))
> > +		mapping_set_large_folios(inode->i_mapping);
> >   	aops = erofs_get_aops(inode, false);
> >   	if (IS_ERR(aops))
> >   		return PTR_ERR(aops);
> > diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> > index a4f0a42cf8c3..b5d98410c699 100644
> > --- a/fs/erofs/internal.h
> > +++ b/fs/erofs/internal.h
> > @@ -177,6 +177,7 @@ struct erofs_sb_info {
> >   #define EROFS_MOUNT_DAX_NEVER		0x00000080
> >   #define EROFS_MOUNT_DIRECT_IO		0x00000100
> >   #define EROFS_MOUNT_INODE_SHARE		0x00000200
> > +#define EROFS_MOUNT_NO_LARGE_FOLIO	0x00000400
> >   #define clear_opt(opt, option)	((opt)->mount_opt &= ~EROFS_MOUNT_##option)
> >   #define set_opt(opt, option)	((opt)->mount_opt |= EROFS_MOUNT_##option)
> > diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> > index 972a0c82198d..a353369d4db8 100644
> > --- a/fs/erofs/super.c
> > +++ b/fs/erofs/super.c
> > @@ -390,7 +390,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
> >   enum {
> >   	Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
> >   	Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
> > -	Opt_inode_share,
> > +	Opt_inode_share, Opt_nolargefolio,
> >   };
> >   static const struct constant_table erofs_param_cache_strategy[] = {
> > @@ -419,6 +419,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
> >   	fsparam_flag_no("directio",	Opt_directio),
> >   	fsparam_u64("fsoffset",		Opt_fsoffset),
> >   	fsparam_flag("inode_share",	Opt_inode_share),
> > +	fsparam_flag("nolargefolio",	Opt_nolargefolio),
> >   	{}
> >   };
> > @@ -541,6 +542,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
> >   		else
> >   			set_opt(&sbi->opt, INODE_SHARE);
> >   		break;
> > +	case Opt_nolargefolio:
> > +		set_opt(&sbi->opt, NO_LARGE_FOLIO);
> > +		break;
> >   	}
> >   	return 0;
> >   }
> > @@ -1105,6 +1109,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
> >   		seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
> >   	if (test_opt(opt, INODE_SHARE))
> >   		seq_puts(seq, ",inode_share");
> > +	if (test_opt(opt, NO_LARGE_FOLIO))
> > +		seq_puts(seq, ",nolargefolio");
> >   	return 0;
> >   }
>
Re: [PATCH] erofs: introduce nolargefolio mount option
Posted by Jan Kara 1 month ago
On Mon 09-03-26 13:03:33, Matthew Wilcox wrote:
> On Mon, Mar 09, 2026 at 11:03:43AM +0800, Gao Xiang wrote:
> > Hi Chao,
> > 
> > (+cc -fsdevel, willy, Jan kara)
> > 
> > On 2026/3/9 10:30, Chao Yu wrote:
> > > This patch introduces a new mount option 'nolargefolio' for EROFS.
> > > When this option is specified, large folio will be disabled by
> > > default for all inodes, this option can be used for environments
> > > where large folio resources are limited, it's necessary to only
> > > let specified user to allocate large folios on demand.
> > 
> > For this kind of options, I think more real backgrounds
> > about avoiding high-order allocations are needed in the
> > commit message (at least for later reference) also like
> > what I observed in:
> > https://android-review.googlesource.com/c/kernel/common/+/3877981
> > 
> > because the entire community tends to enable large folios
> > unconditionally if possible.  Without enough clarification,
> > even I merge this, there will be endless questions again
> > and again about this.
> 
> This was a decision made early on.  If the heuristics are wrong, they
> need to be fixed.  It's very disappointing to see people try to sneak
> these changes into individual filesystems.  Thanks for catching it and
> preventing it from sneaking in.  Chao is not a new contributor; he
> should know better than this by now.

I agree improving the heuristics is much better (fixes the problem for
everyone) than disabling large folios (even more so on per-fs basis). As
I'm rereading the old thread Gao referenced the concern here is about small
(as in below 1g of memory) devices where apparently the memory overhead of
large folios hurts significantly. Perhaps we could tune the folio order we
allocate based on the current size of the page cache on the device or
something like that?

								Honza

> > And Jan once raised up if it should be a user interface
> > or auto-tuning one:
> > https://lore.kernel.org/r/z2ule3ilnnpoevo5mvt3intvjtuyud7vg3pbfauon47fhr4owa@giaehpbie4a5
> > 
> > My question is that if the needs are real, I wonder if
> > it should be a vfs generic decision instead (because
> > it's not due to the filesystem restriction but due to
> > real system memory pressure or heavy workload for
> > example).  However, if the answer is that others don't
> > really care about this, I'm fine to leave it as an
> > erofs-specific option as long as the actual case is
> > clear in the commit message.
> > 
> > Thanks,
> > Gao Xiang
> > 
> > 
> > > 
> > > Signed-off-by: Chao Yu <chao@kernel.org>
> > > ---
> > >   Documentation/filesystems/erofs.rst | 1 +
> > >   fs/erofs/inode.c                    | 3 ++-
> > >   fs/erofs/internal.h                 | 1 +
> > >   fs/erofs/super.c                    | 8 +++++++-
> > >   4 files changed, 11 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
> > > index fe06308e546c..d692a1d9f32c 100644
> > > --- a/Documentation/filesystems/erofs.rst
> > > +++ b/Documentation/filesystems/erofs.rst
> > > @@ -137,6 +137,7 @@ fsoffset=%llu          Specify block-aligned filesystem offset for the primary d
> > >   inode_share            Enable inode page sharing for this filesystem.  Inodes with
> > >                          identical content within the same domain ID can share the
> > >                          page cache.
> > > +nolargefolio           Disable large folio support for all files.
> > >   ===================    =========================================================
> > >   Sysfs Entries
> > > diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> > > index 4b3d21402e10..26361e86a354 100644
> > > --- a/fs/erofs/inode.c
> > > +++ b/fs/erofs/inode.c
> > > @@ -254,7 +254,8 @@ static int erofs_fill_inode(struct inode *inode)
> > >   		return 0;
> > >   	}
> > > -	mapping_set_large_folios(inode->i_mapping);
> > > +	if (!test_opt(&EROFS_SB(inode->i_sb)->opt, NO_LARGE_FOLIO))
> > > +		mapping_set_large_folios(inode->i_mapping);
> > >   	aops = erofs_get_aops(inode, false);
> > >   	if (IS_ERR(aops))
> > >   		return PTR_ERR(aops);
> > > diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> > > index a4f0a42cf8c3..b5d98410c699 100644
> > > --- a/fs/erofs/internal.h
> > > +++ b/fs/erofs/internal.h
> > > @@ -177,6 +177,7 @@ struct erofs_sb_info {
> > >   #define EROFS_MOUNT_DAX_NEVER		0x00000080
> > >   #define EROFS_MOUNT_DIRECT_IO		0x00000100
> > >   #define EROFS_MOUNT_INODE_SHARE		0x00000200
> > > +#define EROFS_MOUNT_NO_LARGE_FOLIO	0x00000400
> > >   #define clear_opt(opt, option)	((opt)->mount_opt &= ~EROFS_MOUNT_##option)
> > >   #define set_opt(opt, option)	((opt)->mount_opt |= EROFS_MOUNT_##option)
> > > diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> > > index 972a0c82198d..a353369d4db8 100644
> > > --- a/fs/erofs/super.c
> > > +++ b/fs/erofs/super.c
> > > @@ -390,7 +390,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
> > >   enum {
> > >   	Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
> > >   	Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
> > > -	Opt_inode_share,
> > > +	Opt_inode_share, Opt_nolargefolio,
> > >   };
> > >   static const struct constant_table erofs_param_cache_strategy[] = {
> > > @@ -419,6 +419,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
> > >   	fsparam_flag_no("directio",	Opt_directio),
> > >   	fsparam_u64("fsoffset",		Opt_fsoffset),
> > >   	fsparam_flag("inode_share",	Opt_inode_share),
> > > +	fsparam_flag("nolargefolio",	Opt_nolargefolio),
> > >   	{}
> > >   };
> > > @@ -541,6 +542,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
> > >   		else
> > >   			set_opt(&sbi->opt, INODE_SHARE);
> > >   		break;
> > > +	case Opt_nolargefolio:
> > > +		set_opt(&sbi->opt, NO_LARGE_FOLIO);
> > > +		break;
> > >   	}
> > >   	return 0;
> > >   }
> > > @@ -1105,6 +1109,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
> > >   		seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
> > >   	if (test_opt(opt, INODE_SHARE))
> > >   		seq_puts(seq, ",inode_share");
> > > +	if (test_opt(opt, NO_LARGE_FOLIO))
> > > +		seq_puts(seq, ",nolargefolio");
> > >   	return 0;
> > >   }
> > 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR
Re: [PATCH] erofs: introduce nolargefolio mount option
Posted by Gao Xiang 1 month ago
Hi Chao,

(+cc -fsdevel, willy, Jan kara)

On 2026/3/9 10:30, Chao Yu wrote:
> This patch introduces a new mount option 'nolargefolio' for EROFS.
> When this option is specified, large folio will be disabled by
> default for all inodes, this option can be used for environments
> where large folio resources are limited, it's necessary to only
> let specified user to allocate large folios on demand.

For this kind of options, I think more real backgrounds
about avoiding high-order allocations are needed in the
commit message (at least for later reference) also like
what I observed in:
https://android-review.googlesource.com/c/kernel/common/+/3877981

because the entire community tends to enable large folios
unconditionally if possible.  Without enough clarification,
even I merge this, there will be endless questions again
and again about this.

And Jan once raised up if it should be a user interface
or auto-tuning one:
https://lore.kernel.org/r/z2ule3ilnnpoevo5mvt3intvjtuyud7vg3pbfauon47fhr4owa@giaehpbie4a5

My question is that if the needs are real, I wonder if
it should be a vfs generic decision instead (because
it's not due to the filesystem restriction but due to
real system memory pressure or heavy workload for
example).  However, if the answer is that others don't
really care about this, I'm fine to leave it as an
erofs-specific option as long as the actual case is
clear in the commit message.

Thanks,
Gao Xiang


> 
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
>   Documentation/filesystems/erofs.rst | 1 +
>   fs/erofs/inode.c                    | 3 ++-
>   fs/erofs/internal.h                 | 1 +
>   fs/erofs/super.c                    | 8 +++++++-
>   4 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
> index fe06308e546c..d692a1d9f32c 100644
> --- a/Documentation/filesystems/erofs.rst
> +++ b/Documentation/filesystems/erofs.rst
> @@ -137,6 +137,7 @@ fsoffset=%llu          Specify block-aligned filesystem offset for the primary d
>   inode_share            Enable inode page sharing for this filesystem.  Inodes with
>                          identical content within the same domain ID can share the
>                          page cache.
> +nolargefolio           Disable large folio support for all files.
>   ===================    =========================================================
>   
>   Sysfs Entries
> diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> index 4b3d21402e10..26361e86a354 100644
> --- a/fs/erofs/inode.c
> +++ b/fs/erofs/inode.c
> @@ -254,7 +254,8 @@ static int erofs_fill_inode(struct inode *inode)
>   		return 0;
>   	}
>   
> -	mapping_set_large_folios(inode->i_mapping);
> +	if (!test_opt(&EROFS_SB(inode->i_sb)->opt, NO_LARGE_FOLIO))
> +		mapping_set_large_folios(inode->i_mapping);
>   	aops = erofs_get_aops(inode, false);
>   	if (IS_ERR(aops))
>   		return PTR_ERR(aops);
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index a4f0a42cf8c3..b5d98410c699 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -177,6 +177,7 @@ struct erofs_sb_info {
>   #define EROFS_MOUNT_DAX_NEVER		0x00000080
>   #define EROFS_MOUNT_DIRECT_IO		0x00000100
>   #define EROFS_MOUNT_INODE_SHARE		0x00000200
> +#define EROFS_MOUNT_NO_LARGE_FOLIO	0x00000400
>   
>   #define clear_opt(opt, option)	((opt)->mount_opt &= ~EROFS_MOUNT_##option)
>   #define set_opt(opt, option)	((opt)->mount_opt |= EROFS_MOUNT_##option)
> diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> index 972a0c82198d..a353369d4db8 100644
> --- a/fs/erofs/super.c
> +++ b/fs/erofs/super.c
> @@ -390,7 +390,7 @@ static void erofs_default_options(struct erofs_sb_info *sbi)
>   enum {
>   	Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum,
>   	Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, Opt_fsoffset,
> -	Opt_inode_share,
> +	Opt_inode_share, Opt_nolargefolio,
>   };
>   
>   static const struct constant_table erofs_param_cache_strategy[] = {
> @@ -419,6 +419,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
>   	fsparam_flag_no("directio",	Opt_directio),
>   	fsparam_u64("fsoffset",		Opt_fsoffset),
>   	fsparam_flag("inode_share",	Opt_inode_share),
> +	fsparam_flag("nolargefolio",	Opt_nolargefolio),
>   	{}
>   };
>   
> @@ -541,6 +542,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
>   		else
>   			set_opt(&sbi->opt, INODE_SHARE);
>   		break;
> +	case Opt_nolargefolio:
> +		set_opt(&sbi->opt, NO_LARGE_FOLIO);
> +		break;
>   	}
>   	return 0;
>   }
> @@ -1105,6 +1109,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
>   		seq_printf(seq, ",fsoffset=%llu", sbi->dif0.fsoff);
>   	if (test_opt(opt, INODE_SHARE))
>   		seq_puts(seq, ",inode_share");
> +	if (test_opt(opt, NO_LARGE_FOLIO))
> +		seq_puts(seq, ",nolargefolio");
>   	return 0;
>   }
>