If a mount namespace contains autofs mounts, and they are propagation
private, and there is no namespace specific automount daemon to handle
possible automounting then attempted path resolution will loop until
MAXSYMLINKS is reached before failing causing quite a bit of noise in
the log.
Add a check for this in autofs ->d_automount() so that the VFS can
immediately return an error in this case. Since the mount is propagation
private an EPERM return seems most appropriate.
Signed-off-by: Ian Kent <raven@themaw.net>
---
fs/autofs/autofs_i.h | 4 ++++
fs/autofs/inode.c | 1 +
fs/autofs/root.c | 8 ++++++++
fs/namespace.c | 6 ++++++
include/linux/fs.h | 1 +
5 files changed, 20 insertions(+)
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 23cea74f9933..34533587c66b 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -16,6 +16,7 @@
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
+#include <uapi/linux/mount.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/uaccess.h>
@@ -109,11 +110,14 @@ struct autofs_wait_queue {
#define AUTOFS_SBI_STRICTEXPIRE 0x0002
#define AUTOFS_SBI_IGNORE 0x0004
+struct mnt_namespace;
+
struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
struct pid *oz_pgrp;
+ struct mnt_namespace *owner;
int version;
int sub_version;
int min_proto;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f5c16ffba013..0a29761f39c0 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
sbi->pipefd = -1;
+ sbi->owner = current->nsproxy->mnt_ns;
set_autofs_type_indirect(&sbi->type);
mutex_init(&sbi->wq_mutex);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 174c7205fee4..8cce86158f20 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
if (autofs_oz_mode(sbi))
return NULL;
+ /* Refuse to trigger mount if current namespace is not the owner
+ * and the mount is propagation private.
+ */
+ if (sbi->owner != current->nsproxy->mnt_ns) {
+ if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
+ return ERR_PTR(-EPERM);
+ }
+
/*
* If an expire request is pending everyone must wait.
* If the expire fails we're still mounted so continue
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..27bb12693cba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
return propagation;
}
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
+{
+ return mnt_to_propagation_flags(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
+
static void statmount_sb_basic(struct kstatmount *s)
{
struct super_block *sb = s->mnt->mnt_sb;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..a5c2077ce6ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
extern bool path_is_under(const struct path *, const struct path *);
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
extern char *file_path(struct file *, char *, int);
--
2.51.1
On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
> If a mount namespace contains autofs mounts, and they are propagation
> private, and there is no namespace specific automount daemon to handle
> possible automounting then attempted path resolution will loop until
> MAXSYMLINKS is reached before failing causing quite a bit of noise in
> the log.
>
> Add a check for this in autofs ->d_automount() so that the VFS can
> immediately return an error in this case. Since the mount is propagation
> private an EPERM return seems most appropriate.
>
> Signed-off-by: Ian Kent <raven@themaw.net>
> ---
> fs/autofs/autofs_i.h | 4 ++++
> fs/autofs/inode.c | 1 +
> fs/autofs/root.c | 8 ++++++++
> fs/namespace.c | 6 ++++++
> include/linux/fs.h | 1 +
> 5 files changed, 20 insertions(+)
>
> diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
> index 23cea74f9933..34533587c66b 100644
> --- a/fs/autofs/autofs_i.h
> +++ b/fs/autofs/autofs_i.h
> @@ -16,6 +16,7 @@
> #include <linux/wait.h>
> #include <linux/sched.h>
> #include <linux/sched/signal.h>
> +#include <uapi/linux/mount.h>
> #include <linux/mount.h>
> #include <linux/namei.h>
> #include <linux/uaccess.h>
> @@ -109,11 +110,14 @@ struct autofs_wait_queue {
> #define AUTOFS_SBI_STRICTEXPIRE 0x0002
> #define AUTOFS_SBI_IGNORE 0x0004
>
> +struct mnt_namespace;
> +
> struct autofs_sb_info {
> u32 magic;
> int pipefd;
> struct file *pipe;
> struct pid *oz_pgrp;
> + struct mnt_namespace *owner;
> int version;
> int sub_version;
> int min_proto;
> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..0a29761f39c0 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
> sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
> sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
> sbi->pipefd = -1;
> + sbi->owner = current->nsproxy->mnt_ns;
ns_ref_get()
Can be called directly on the mount namespace.
On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: > > + sbi->owner = current->nsproxy->mnt_ns; > > ns_ref_get() > Can be called directly on the mount namespace. ... and would leak all mounts in the mount tree, unless I'm missing something subtle.
On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>
> > > + sbi->owner = current->nsproxy->mnt_ns;
> >
> > ns_ref_get()
> > Can be called directly on the mount namespace.
>
> ... and would leak all mounts in the mount tree, unless I'm missing
> something subtle.
Right, I thought you actually wanted to pin it.
Anyway, you could take a passive reference but I think that's nonsense
as well. The following should do it:
UNTESTED, UNCOMPILED
---
fs/autofs/autofs_i.h | 4 ++++
fs/autofs/inode.c | 3 +++
fs/autofs/root.c | 10 ++++++++++
fs/namespace.c | 6 ++++++
include/linux/fs.h | 1 +
5 files changed, 24 insertions(+)
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 23cea74f9933..2b9d2300d351 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -16,6 +16,7 @@
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
+#include <uapi/linux/mount.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/uaccess.h>
@@ -109,11 +110,14 @@ struct autofs_wait_queue {
#define AUTOFS_SBI_STRICTEXPIRE 0x0002
#define AUTOFS_SBI_IGNORE 0x0004
struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
struct pid *oz_pgrp;
+ u64 mnt_ns_id;
int version;
int sub_version;
int min_proto;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f5c16ffba013..247a5784d192 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -6,8 +6,10 @@
#include <linux/seq_file.h>
#include <linux/pagemap.h>
+#include <linux/ns_common.h>
#include "autofs_i.h"
+#include "../mount.h"
struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
{
@@ -251,6 +253,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
sbi->pipefd = -1;
+ sbi->mnt_ns_id = to_ns_common(current->nsproxy->mnt_ns)->ns_id;
set_autofs_type_indirect(&sbi->type);
mutex_init(&sbi->wq_mutex);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 174c7205fee4..f06f62d23e76 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -7,8 +7,10 @@
#include <linux/capability.h>
#include <linux/compat.h>
+#include <linux/ns_common.h>
#include "autofs_i.h"
+#include "../mount.h"
static int autofs_dir_permission(struct mnt_idmap *, struct inode *, int);
static int autofs_dir_symlink(struct mnt_idmap *, struct inode *,
@@ -341,6 +343,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
if (autofs_oz_mode(sbi))
return NULL;
+ /* Refuse to trigger mount if current namespace is not the owner
+ * and the mount is propagation private.
+ */
+ if (sbi->mnt_ns_id != to_ns_common(current->nsproxy->mnt_ns)->ns_id) {
+ if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
+ return ERR_PTR(-EPERM);
+ }
+
/*
* If an expire request is pending everyone must wait.
* If the expire fails we're still mounted so continue
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..27bb12693cba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
return propagation;
}
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
+{
+ return mnt_to_propagation_flags(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
+
static void statmount_sb_basic(struct kstatmount *s)
{
struct super_block *sb = s->mnt->mnt_sb;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..a5c2077ce6ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
extern bool path_is_under(const struct path *, const struct path *);
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
extern char *file_path(struct file *, char *, int);
On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..0a29761f39c0 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
> sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
> sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
> sbi->pipefd = -1;
> + sbi->owner = current->nsproxy->mnt_ns;
>
> set_autofs_type_indirect(&sbi->type);
> mutex_init(&sbi->wq_mutex);
> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
> index 174c7205fee4..8cce86158f20 100644
> --- a/fs/autofs/root.c
> +++ b/fs/autofs/root.c
> @@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
> if (autofs_oz_mode(sbi))
> return NULL;
>
> + /* Refuse to trigger mount if current namespace is not the owner
> + * and the mount is propagation private.
> + */
> + if (sbi->owner != current->nsproxy->mnt_ns) {
> + if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
> + return ERR_PTR(-EPERM);
> + }
> +
Huh? What's to guarantee that superblock won't outlive the namespace?
That looks seriously bogus.
On 11/11/25 14:59, Al Viro wrote:
> On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
>
>> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
>> index f5c16ffba013..0a29761f39c0 100644
>> --- a/fs/autofs/inode.c
>> +++ b/fs/autofs/inode.c
>> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
>> sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
>> sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
>> sbi->pipefd = -1;
>> + sbi->owner = current->nsproxy->mnt_ns;
>>
>> set_autofs_type_indirect(&sbi->type);
>> mutex_init(&sbi->wq_mutex);
>> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
>> index 174c7205fee4..8cce86158f20 100644
>> --- a/fs/autofs/root.c
>> +++ b/fs/autofs/root.c
>> @@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
>> if (autofs_oz_mode(sbi))
>> return NULL;
>>
>> + /* Refuse to trigger mount if current namespace is not the owner
>> + * and the mount is propagation private.
>> + */
>> + if (sbi->owner != current->nsproxy->mnt_ns) {
>> + if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
>> + return ERR_PTR(-EPERM);
>> + }
>> +
> Huh? What's to guarantee that superblock won't outlive the namespace?
Not 30 minutes after I posted these I was thinking about the case the daemon
(that mounted this) going away, very loosely similar I think. Setting the
mounting process's namespace when it mounts it is straight forward but what
can I do if the process crashes ...
I did think that if the namespace is saved away by the process that mounts
it that the mount namespace would be valid at least until it umounts it but
yes there are a few things that can go wrong ...
Any ideas how I can identify this case?
Ian
On Tue, Nov 11, 2025 at 04:25:29PM +0800, Ian Kent wrote: > > Huh? What's to guarantee that superblock won't outlive the namespace? > > Not 30 minutes after I posted these I was thinking about the case the daemon > > (that mounted this) going away, very loosely similar I think. Setting the > > mounting process's namespace when it mounts it is straight forward but what > > can I do if the process crashes ... > > > I did think that if the namespace is saved away by the process that mounts > > it that the mount namespace would be valid at least until it umounts it but > > yes there are a few things that can go wrong ... Umm... 1) super_block != mount; unshare(CLONE_NEWNS) by anyone in the namespace of that mount *will* create a clone of that mount, with exact same ->mnt_sb and yes, in a separate namespace. 2) mount does not pin a namespace. chdir into it, umount -l and there you go... 3) mount(2) can bloody well create more than one struct mount, all with the same ->mnt_sb. So I'd say there's more than a few things that can go wrong here. Said that, this "we need a daemon in that namespace" has been a source of assorted headaches for decades now; could we do anything about that? After all, a thread can switch from one namespace to another these days (setns(2))...
On 11/11/25 17:04, Al Viro wrote: > On Tue, Nov 11, 2025 at 04:25:29PM +0800, Ian Kent wrote: > >>> Huh? What's to guarantee that superblock won't outlive the namespace? >> Not 30 minutes after I posted these I was thinking about the case the daemon >> >> (that mounted this) going away, very loosely similar I think. Setting the >> >> mounting process's namespace when it mounts it is straight forward but what >> >> can I do if the process crashes ... >> >> >> I did think that if the namespace is saved away by the process that mounts >> >> it that the mount namespace would be valid at least until it umounts it but >> >> yes there are a few things that can go wrong ... > Umm... > > 1) super_block != mount; unshare(CLONE_NEWNS) by anyone in the namespace of > that mount *will* create a clone of that mount, with exact same ->mnt_sb > and yes, in a separate namespace. > > 2) mount does not pin a namespace. chdir into it, umount -l and there you go... > > 3) mount(2) can bloody well create more than one struct mount, all with the > same ->mnt_sb. > > So I'd say there's more than a few things that can go wrong here. > > Said that, this "we need a daemon in that namespace" has been a source of > assorted headaches for decades now; could we do anything about that? > After all, a thread can switch from one namespace to another these > days (setns(2))... Not sure the motivation here is "we need a daemon in that namespace", it's more my struggling to find a suitable check for the problem case even though it does look a lot like what you say. Thinking back the problem has always been the amount of stuff that's not appropriate for the kernel, automount map parsing is pretty ugly, for example. A better split of duties is probably what we would need to do. But if we did that then we'd probably want to re-define/re-write the user space <-> kernel space communications as well to assist with the division of labour. In the beginning there was a proposal to do just that, not sure I still have that stuff, I'll have a look around ... or do you have something else specific in mind? Anyway, for the moment, I think your saying just taking an ns_common reference will be problematic as well. Ian
On Tue, Nov 11, 2025 at 06:13:19PM +0800, Ian Kent wrote: > Anyway, for the moment, I think your saying just taking an ns_common > reference > > will be problematic as well. If it is persistent - definitely. We'd get a massive leak, since namespace is torn apart when the refcount drops to zero.
© 2016 - 2025 Red Hat, Inc.