fs/nfs/fs_context.c | 8 ++++++-- fs/super.c | 11 ++++++----- include/linux/fs.h | 1 + 3 files changed, 13 insertions(+), 7 deletions(-)
Commit e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems
without FS_USERNS_MOUNT") prevents the mount of any filesystem inside a
container that doesn't have FS_USERNS_MOUNT set.
This broke NFS mounts in our containerized environment. We have a daemon
somewhat like systemd-mountfsd running in the init_ns. A process does a
fsopen() inside the container and passes it to the daemon via unix
socket.
The daemon then vets that the request is for an allowed NFS server and
performs the mount. This now fails because the fc->user_ns is set to the
value in the container and NFS doesn't set FS_USERNS_MOUNT. We don't
want to add FS_USERNS_MOUNT to NFS since that would allow the container
to mount any NFS server (even malicious ones).
Add a new FS_USERNS_DELEGATABLE flag, and enable it on NFS.
Fixes: e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems without FS_USERNS_MOUNT")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
fs/nfs/fs_context.c | 8 ++++++--
fs/super.c | 11 ++++++-----
include/linux/fs.h | 1 +
3 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index b4679b7161b0968810e13f57c889052ea015bf56..128ebd48b4f4ba1c17e8b5b1b9dcefbd7a97db1a 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -1768,7 +1768,9 @@ struct file_system_type nfs_fs_type = {
.init_fs_context = nfs_init_fs_context,
.parameters = nfs_fs_parameters,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE |
+ FS_BINARY_MOUNTDATA |
+ FS_USERNS_DELEGATABLE,
};
MODULE_ALIAS_FS("nfs");
EXPORT_SYMBOL_GPL(nfs_fs_type);
@@ -1780,7 +1782,9 @@ struct file_system_type nfs4_fs_type = {
.init_fs_context = nfs_init_fs_context,
.parameters = nfs_fs_parameters,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE |
+ FS_BINARY_MOUNTDATA |
+ FS_USERNS_DELEGATABLE,
};
MODULE_ALIAS_FS("nfs4");
MODULE_ALIAS("nfs4");
diff --git a/fs/super.c b/fs/super.c
index 3d85265d14001d51524dbaec0778af8f12c048ac..b7f1bb2b679b43261fbdcd586971c551b85e8372 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -738,12 +738,13 @@ struct super_block *sget_fc(struct fs_context *fc,
int err;
/*
- * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT is
- * not set, as the filesystem is likely unprepared to handle it.
- * This can happen when fsconfig() is called from init_user_ns with
- * an fs_fd opened in another user namespace.
+ * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT or
+ * FS_USERNS_DELEGATABLE is not set, as the filesystem is likely
+ * unprepared to handle it. This can happen when fsconfig() is called
+ * from init_user_ns with an fs_fd opened in another user namespace.
*/
- if (user_ns != &init_user_ns && !(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) {
+ if (user_ns != &init_user_ns &&
+ !(fc->fs_type->fs_flags & (FS_USERNS_MOUNT | FS_USERNS_DELEGATABLE))) {
errorfc(fc, "VFS: Mounting from non-initial user namespace is not allowed");
return ERR_PTR(-EPERM);
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a01621fa636a60764e1dfe83f2260caf50c4037e..94695ce5e25b5fbe4f321d5478172b8cb24e00d1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2273,6 +2273,7 @@ struct file_system_type {
#define FS_MGTIME 64 /* FS uses multigrain timestamps */
#define FS_LBS 128 /* FS supports LBS */
#define FS_POWER_FREEZE 256 /* Always freeze on suspend/hibernate */
+#define FS_USERNS_DELEGATABLE 512 /* Can be mounted inside userns from outside */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
---
base-commit: 8dfce8991b95d8625d0a1d2896e42f93b9d7f68d
change-id: 20260129-twmount-114ddfd43420
Best regards,
--
Jeff Layton <jlayton@kernel.org>
Am Do., 29. Jan. 2026 um 22:48 Uhr schrieb Jeff Layton <jlayton@kernel.org>:
>
> Commit e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems
> without FS_USERNS_MOUNT") prevents the mount of any filesystem inside a
> container that doesn't have FS_USERNS_MOUNT set.
>
Hi Jeff,
> This broke NFS mounts in our containerized environment. We have a daemon
> somewhat like systemd-mountfsd running in the init_ns. A process does a
> fsopen() inside the container and passes it to the daemon via unix
> socket.
>
> The daemon then vets that the request is for an allowed NFS server and
> performs the mount. This now fails because the fc->user_ns is set to the
> value in the container and NFS doesn't set FS_USERNS_MOUNT. We don't
> want to add FS_USERNS_MOUNT to NFS since that would allow the container
> to mount any NFS server (even malicious ones).
>
> Add a new FS_USERNS_DELEGATABLE flag, and enable it on NFS.
Great idea, very similar to what we have with BPFFS/BPF Tokens.
Taking into account this patch, shouldn't we drop FS_USERNS_MOUNT and
replace it with
FS_USERNS_DELEGATABLE for bpffs too?
I mean something like:
======================
$ git diff
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 9f866a010dad..d8dfdc846bd0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -1009,10 +1009,6 @@ static int bpf_fill_super(struct super_block
*sb, struct fs_context *fc)
struct inode *inode;
int ret;
- /* Mounting an instance of BPF FS requires privileges */
- if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
if (ret)
return ret;
@@ -1085,7 +1081,7 @@ static struct file_system_type bpf_fs_type = {
.init_fs_context = bpf_init_fs_context,
.parameters = bpf_fs_parameters,
.kill_sb = bpf_kill_super,
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_DELEGATABLE,
};
static int __init bpf_init(void)
======================
Because it feels like we were basically implementing this FS_USERNS_DELEGATABLE
flag implicitly for BPFFS before. I can submit a patch for BPFFS later
after testing.
>
> Fixes: e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems without FS_USERNS_MOUNT")
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
Kind regards,
Alex
> ---
> fs/nfs/fs_context.c | 8 ++++++--
> fs/super.c | 11 ++++++-----
> include/linux/fs.h | 1 +
> 3 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> index b4679b7161b0968810e13f57c889052ea015bf56..128ebd48b4f4ba1c17e8b5b1b9dcefbd7a97db1a 100644
> --- a/fs/nfs/fs_context.c
> +++ b/fs/nfs/fs_context.c
> @@ -1768,7 +1768,9 @@ struct file_system_type nfs_fs_type = {
> .init_fs_context = nfs_init_fs_context,
> .parameters = nfs_fs_parameters,
> .kill_sb = nfs_kill_super,
> - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
> + .fs_flags = FS_RENAME_DOES_D_MOVE |
> + FS_BINARY_MOUNTDATA |
> + FS_USERNS_DELEGATABLE,
> };
> MODULE_ALIAS_FS("nfs");
> EXPORT_SYMBOL_GPL(nfs_fs_type);
> @@ -1780,7 +1782,9 @@ struct file_system_type nfs4_fs_type = {
> .init_fs_context = nfs_init_fs_context,
> .parameters = nfs_fs_parameters,
> .kill_sb = nfs_kill_super,
> - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
> + .fs_flags = FS_RENAME_DOES_D_MOVE |
> + FS_BINARY_MOUNTDATA |
> + FS_USERNS_DELEGATABLE,
> };
> MODULE_ALIAS_FS("nfs4");
> MODULE_ALIAS("nfs4");
> diff --git a/fs/super.c b/fs/super.c
> index 3d85265d14001d51524dbaec0778af8f12c048ac..b7f1bb2b679b43261fbdcd586971c551b85e8372 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -738,12 +738,13 @@ struct super_block *sget_fc(struct fs_context *fc,
> int err;
>
> /*
> - * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT is
> - * not set, as the filesystem is likely unprepared to handle it.
> - * This can happen when fsconfig() is called from init_user_ns with
> - * an fs_fd opened in another user namespace.
> + * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT or
> + * FS_USERNS_DELEGATABLE is not set, as the filesystem is likely
> + * unprepared to handle it. This can happen when fsconfig() is called
> + * from init_user_ns with an fs_fd opened in another user namespace.
> */
> - if (user_ns != &init_user_ns && !(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) {
> + if (user_ns != &init_user_ns &&
> + !(fc->fs_type->fs_flags & (FS_USERNS_MOUNT | FS_USERNS_DELEGATABLE))) {
> errorfc(fc, "VFS: Mounting from non-initial user namespace is not allowed");
> return ERR_PTR(-EPERM);
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a01621fa636a60764e1dfe83f2260caf50c4037e..94695ce5e25b5fbe4f321d5478172b8cb24e00d1 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2273,6 +2273,7 @@ struct file_system_type {
> #define FS_MGTIME 64 /* FS uses multigrain timestamps */
> #define FS_LBS 128 /* FS supports LBS */
> #define FS_POWER_FREEZE 256 /* Always freeze on suspend/hibernate */
> +#define FS_USERNS_DELEGATABLE 512 /* Can be mounted inside userns from outside */
> #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
> int (*init_fs_context)(struct fs_context *);
> const struct fs_parameter_spec *parameters;
>
> ---
> base-commit: 8dfce8991b95d8625d0a1d2896e42f93b9d7f68d
> change-id: 20260129-twmount-114ddfd43420
>
> Best regards,
> --
> Jeff Layton <jlayton@kernel.org>
>
On Tue, 2026-02-03 at 17:11 +0100, Alexander Mikhalitsyn wrote:
> Am Do., 29. Jan. 2026 um 22:48 Uhr schrieb Jeff Layton <jlayton@kernel.org>:
> >
> > Commit e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems
> > without FS_USERNS_MOUNT") prevents the mount of any filesystem inside a
> > container that doesn't have FS_USERNS_MOUNT set.
> >
>
> Hi Jeff,
>
> > This broke NFS mounts in our containerized environment. We have a daemon
> > somewhat like systemd-mountfsd running in the init_ns. A process does a
> > fsopen() inside the container and passes it to the daemon via unix
> > socket.
> >
> > The daemon then vets that the request is for an allowed NFS server and
> > performs the mount. This now fails because the fc->user_ns is set to the
> > value in the container and NFS doesn't set FS_USERNS_MOUNT. We don't
> > want to add FS_USERNS_MOUNT to NFS since that would allow the container
> > to mount any NFS server (even malicious ones).
> >
> > Add a new FS_USERNS_DELEGATABLE flag, and enable it on NFS.
>
> Great idea, very similar to what we have with BPFFS/BPF Tokens.
>
> Taking into account this patch, shouldn't we drop FS_USERNS_MOUNT and
> replace it with
> FS_USERNS_DELEGATABLE for bpffs too?
>
> I mean something like:
>
> ======================
> $ git diff
> diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
> index 9f866a010dad..d8dfdc846bd0 100644
> --- a/kernel/bpf/inode.c
> +++ b/kernel/bpf/inode.c
> @@ -1009,10 +1009,6 @@ static int bpf_fill_super(struct super_block
> *sb, struct fs_context *fc)
> struct inode *inode;
> int ret;
>
> - /* Mounting an instance of BPF FS requires privileges */
> - if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
> - return -EPERM;
> -
> ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
> if (ret)
> return ret;
> @@ -1085,7 +1081,7 @@ static struct file_system_type bpf_fs_type = {
> .init_fs_context = bpf_init_fs_context,
> .parameters = bpf_fs_parameters,
> .kill_sb = bpf_kill_super,
> - .fs_flags = FS_USERNS_MOUNT,
> + .fs_flags = FS_USERNS_DELEGATABLE,
> };
>
> static int __init bpf_init(void)
> ======================
>
> Because it feels like we were basically implementing this FS_USERNS_DELEGATABLE
> flag implicitly for BPFFS before. I can submit a patch for BPFFS later
> after testing.
>
Yeah, looks like a good thing to change, since it should have the same
effect. Assuming that works the way it should:
Reviewed-by: Jeff Layton <jlayton@kernel.org>
On Tue, Feb 03, 2026 at 11:21:25AM -0500, Jeff Layton wrote:
> On Tue, 2026-02-03 at 17:11 +0100, Alexander Mikhalitsyn wrote:
> > Am Do., 29. Jan. 2026 um 22:48 Uhr schrieb Jeff Layton <jlayton@kernel.org>:
> > >
> > > Commit e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems
> > > without FS_USERNS_MOUNT") prevents the mount of any filesystem inside a
> > > container that doesn't have FS_USERNS_MOUNT set.
> > >
> >
> > Hi Jeff,
> >
> > > This broke NFS mounts in our containerized environment. We have a daemon
> > > somewhat like systemd-mountfsd running in the init_ns. A process does a
> > > fsopen() inside the container and passes it to the daemon via unix
> > > socket.
> > >
> > > The daemon then vets that the request is for an allowed NFS server and
> > > performs the mount. This now fails because the fc->user_ns is set to the
> > > value in the container and NFS doesn't set FS_USERNS_MOUNT. We don't
> > > want to add FS_USERNS_MOUNT to NFS since that would allow the container
> > > to mount any NFS server (even malicious ones).
> > >
> > > Add a new FS_USERNS_DELEGATABLE flag, and enable it on NFS.
> >
> > Great idea, very similar to what we have with BPFFS/BPF Tokens.
> >
> > Taking into account this patch, shouldn't we drop FS_USERNS_MOUNT and
> > replace it with
> > FS_USERNS_DELEGATABLE for bpffs too?
> >
> > I mean something like:
> >
> > ======================
> > $ git diff
> > diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
> > index 9f866a010dad..d8dfdc846bd0 100644
> > --- a/kernel/bpf/inode.c
> > +++ b/kernel/bpf/inode.c
> > @@ -1009,10 +1009,6 @@ static int bpf_fill_super(struct super_block
> > *sb, struct fs_context *fc)
> > struct inode *inode;
> > int ret;
> >
> > - /* Mounting an instance of BPF FS requires privileges */
> > - if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
> > - return -EPERM;
> > -
> > ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
> > if (ret)
> > return ret;
> > @@ -1085,7 +1081,7 @@ static struct file_system_type bpf_fs_type = {
> > .init_fs_context = bpf_init_fs_context,
> > .parameters = bpf_fs_parameters,
> > .kill_sb = bpf_kill_super,
> > - .fs_flags = FS_USERNS_MOUNT,
> > + .fs_flags = FS_USERNS_DELEGATABLE,
> > };
> >
> > static int __init bpf_init(void)
> > ======================
> >
> > Because it feels like we were basically implementing this FS_USERNS_DELEGATABLE
> > flag implicitly for BPFFS before. I can submit a patch for BPFFS later
> > after testing.
Can you send that to the list, please?
Thanks!
Am Di., 3. Feb. 2026 um 17:41 Uhr schrieb Christian Brauner
<brauner@kernel.org>:
>
> On Tue, Feb 03, 2026 at 11:21:25AM -0500, Jeff Layton wrote:
> > On Tue, 2026-02-03 at 17:11 +0100, Alexander Mikhalitsyn wrote:
> > > Am Do., 29. Jan. 2026 um 22:48 Uhr schrieb Jeff Layton <jlayton@kernel.org>:
> > > >
> > > > Commit e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems
> > > > without FS_USERNS_MOUNT") prevents the mount of any filesystem inside a
> > > > container that doesn't have FS_USERNS_MOUNT set.
> > > >
> > >
> > > Hi Jeff,
> > >
> > > > This broke NFS mounts in our containerized environment. We have a daemon
> > > > somewhat like systemd-mountfsd running in the init_ns. A process does a
> > > > fsopen() inside the container and passes it to the daemon via unix
> > > > socket.
> > > >
> > > > The daemon then vets that the request is for an allowed NFS server and
> > > > performs the mount. This now fails because the fc->user_ns is set to the
> > > > value in the container and NFS doesn't set FS_USERNS_MOUNT. We don't
> > > > want to add FS_USERNS_MOUNT to NFS since that would allow the container
> > > > to mount any NFS server (even malicious ones).
> > > >
> > > > Add a new FS_USERNS_DELEGATABLE flag, and enable it on NFS.
> > >
> > > Great idea, very similar to what we have with BPFFS/BPF Tokens.
> > >
> > > Taking into account this patch, shouldn't we drop FS_USERNS_MOUNT and
> > > replace it with
> > > FS_USERNS_DELEGATABLE for bpffs too?
> > >
> > > I mean something like:
> > >
> > > ======================
> > > $ git diff
> > > diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
> > > index 9f866a010dad..d8dfdc846bd0 100644
> > > --- a/kernel/bpf/inode.c
> > > +++ b/kernel/bpf/inode.c
> > > @@ -1009,10 +1009,6 @@ static int bpf_fill_super(struct super_block
> > > *sb, struct fs_context *fc)
> > > struct inode *inode;
> > > int ret;
> > >
> > > - /* Mounting an instance of BPF FS requires privileges */
> > > - if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
> > > - return -EPERM;
> > > -
> > > ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
> > > if (ret)
> > > return ret;
> > > @@ -1085,7 +1081,7 @@ static struct file_system_type bpf_fs_type = {
> > > .init_fs_context = bpf_init_fs_context,
> > > .parameters = bpf_fs_parameters,
> > > .kill_sb = bpf_kill_super,
> > > - .fs_flags = FS_USERNS_MOUNT,
> > > + .fs_flags = FS_USERNS_DELEGATABLE,
> > > };
> > >
> > > static int __init bpf_init(void)
> > > ======================
> > >
> > > Because it feels like we were basically implementing this FS_USERNS_DELEGATABLE
> > > flag implicitly for BPFFS before. I can submit a patch for BPFFS later
> > > after testing.
>
> Can you send that to the list, please?
Sure, I'll do that a bit later! I'm still in Brussels ;-)
> Thanks!
On Thu, 29 Jan 2026 16:47:43 -0500, Jeff Layton wrote:
> Commit e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems
> without FS_USERNS_MOUNT") prevents the mount of any filesystem inside a
> container that doesn't have FS_USERNS_MOUNT set.
>
> This broke NFS mounts in our containerized environment. We have a daemon
> somewhat like systemd-mountfsd running in the init_ns. A process does a
> fsopen() inside the container and passes it to the daemon via unix
> socket.
>
> [...]
Applied to the vfs-7.0.misc branch of the vfs/vfs.git tree.
Patches in the vfs-7.0.misc branch should appear in linux-next soon.
Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.
It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.
Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.
tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-7.0.misc
[1/1] vfs: add FS_USERNS_DELEGATABLE flag and set it for NFS
https://git.kernel.org/vfs/vfs/c/269c46e936f3
On Tue, Feb 3, 2026, at 10:08 AM, Christian Brauner wrote:
> On Thu, 29 Jan 2026 16:47:43 -0500, Jeff Layton wrote:
>> Commit e1c5ae59c0f2 ("fs: don't allow non-init s_user_ns for filesystems
>> without FS_USERNS_MOUNT") prevents the mount of any filesystem inside a
>> container that doesn't have FS_USERNS_MOUNT set.
>>
>> This broke NFS mounts in our containerized environment. We have a daemon
>> somewhat like systemd-mountfsd running in the init_ns. A process does a
>> fsopen() inside the container and passes it to the daemon via unix
>> socket.
>>
>> [...]
>
> Applied to the vfs-7.0.misc branch of the vfs/vfs.git tree.
> Patches in the vfs-7.0.misc branch should appear in linux-next soon.
>
> Please report any outstanding bugs that were missed during review in a
> new review to the original patch series allowing us to drop it.
>
> It's encouraged to provide Acked-bys and Reviewed-bys even though the
> patch has now been applied. If possible patch trailers will be updated.
Acked-by: Anna Schumaker <anna.schumaker@oracle.com>
>
> Note that commit hashes shown below are subject to change due to rebase,
> trailer updates or similar. If in doubt, please check the listed branch.
>
> tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
> branch: vfs-7.0.misc
>
> [1/1] vfs: add FS_USERNS_DELEGATABLE flag and set it for NFS
> https://git.kernel.org/vfs/vfs/c/269c46e936f3
© 2016 - 2026 Red Hat, Inc.