If a mount namespace contains autofs mounts, and they are propagation
private, and there is no namespace specific automount daemon to handle
possible automounting then attempted path resolution will loop until
MAXSYMLINKS is reached before failing causing quite a bit of noise in
the log.
Add a check for this in autofs ->d_automount() so that the VFS can
immediately return an error in this case. Since the mount is propagation
private an EPERM return seems most appropriate.
Signed-off-by: Ian Kent <raven@themaw.net>
---
fs/autofs/autofs_i.h | 4 ++++
fs/autofs/inode.c | 1 +
fs/autofs/root.c | 8 ++++++++
fs/namespace.c | 6 ++++++
include/linux/fs.h | 1 +
5 files changed, 20 insertions(+)
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 23cea74f9933..34533587c66b 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -16,6 +16,7 @@
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
+#include <uapi/linux/mount.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/uaccess.h>
@@ -109,11 +110,14 @@ struct autofs_wait_queue {
#define AUTOFS_SBI_STRICTEXPIRE 0x0002
#define AUTOFS_SBI_IGNORE 0x0004
+struct mnt_namespace;
+
struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
struct pid *oz_pgrp;
+ struct mnt_namespace *owner;
int version;
int sub_version;
int min_proto;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f5c16ffba013..0a29761f39c0 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
sbi->pipefd = -1;
+ sbi->owner = current->nsproxy->mnt_ns;
set_autofs_type_indirect(&sbi->type);
mutex_init(&sbi->wq_mutex);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 174c7205fee4..8cce86158f20 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
if (autofs_oz_mode(sbi))
return NULL;
+ /* Refuse to trigger mount if current namespace is not the owner
+ * and the mount is propagation private.
+ */
+ if (sbi->owner != current->nsproxy->mnt_ns) {
+ if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
+ return ERR_PTR(-EPERM);
+ }
+
/*
* If an expire request is pending everyone must wait.
* If the expire fails we're still mounted so continue
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..27bb12693cba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
return propagation;
}
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
+{
+ return mnt_to_propagation_flags(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
+
static void statmount_sb_basic(struct kstatmount *s)
{
struct super_block *sb = s->mnt->mnt_sb;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..a5c2077ce6ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
extern bool path_is_under(const struct path *, const struct path *);
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
extern char *file_path(struct file *, char *, int);
--
2.51.1
On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
> If a mount namespace contains autofs mounts, and they are propagation
> private, and there is no namespace specific automount daemon to handle
> possible automounting then attempted path resolution will loop until
> MAXSYMLINKS is reached before failing causing quite a bit of noise in
> the log.
>
> Add a check for this in autofs ->d_automount() so that the VFS can
> immediately return an error in this case. Since the mount is propagation
> private an EPERM return seems most appropriate.
>
> Signed-off-by: Ian Kent <raven@themaw.net>
> ---
> fs/autofs/autofs_i.h | 4 ++++
> fs/autofs/inode.c | 1 +
> fs/autofs/root.c | 8 ++++++++
> fs/namespace.c | 6 ++++++
> include/linux/fs.h | 1 +
> 5 files changed, 20 insertions(+)
>
> diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
> index 23cea74f9933..34533587c66b 100644
> --- a/fs/autofs/autofs_i.h
> +++ b/fs/autofs/autofs_i.h
> @@ -16,6 +16,7 @@
> #include <linux/wait.h>
> #include <linux/sched.h>
> #include <linux/sched/signal.h>
> +#include <uapi/linux/mount.h>
> #include <linux/mount.h>
> #include <linux/namei.h>
> #include <linux/uaccess.h>
> @@ -109,11 +110,14 @@ struct autofs_wait_queue {
> #define AUTOFS_SBI_STRICTEXPIRE 0x0002
> #define AUTOFS_SBI_IGNORE 0x0004
>
> +struct mnt_namespace;
> +
> struct autofs_sb_info {
> u32 magic;
> int pipefd;
> struct file *pipe;
> struct pid *oz_pgrp;
> + struct mnt_namespace *owner;
> int version;
> int sub_version;
> int min_proto;
> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..0a29761f39c0 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
> sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
> sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
> sbi->pipefd = -1;
> + sbi->owner = current->nsproxy->mnt_ns;
ns_ref_get()
Can be called directly on the mount namespace.
On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: > > + sbi->owner = current->nsproxy->mnt_ns; > > ns_ref_get() > Can be called directly on the mount namespace. ... and would leak all mounts in the mount tree, unless I'm missing something subtle.
On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>
> > > + sbi->owner = current->nsproxy->mnt_ns;
> >
> > ns_ref_get()
> > Can be called directly on the mount namespace.
>
> ... and would leak all mounts in the mount tree, unless I'm missing
> something subtle.
Right, I thought you actually wanted to pin it.
Anyway, you could take a passive reference but I think that's nonsense
as well. The following should do it:
UNTESTED, UNCOMPILED
---
fs/autofs/autofs_i.h | 4 ++++
fs/autofs/inode.c | 3 +++
fs/autofs/root.c | 10 ++++++++++
fs/namespace.c | 6 ++++++
include/linux/fs.h | 1 +
5 files changed, 24 insertions(+)
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 23cea74f9933..2b9d2300d351 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -16,6 +16,7 @@
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
+#include <uapi/linux/mount.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/uaccess.h>
@@ -109,11 +110,14 @@ struct autofs_wait_queue {
#define AUTOFS_SBI_STRICTEXPIRE 0x0002
#define AUTOFS_SBI_IGNORE 0x0004
struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
struct pid *oz_pgrp;
+ u64 mnt_ns_id;
int version;
int sub_version;
int min_proto;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f5c16ffba013..247a5784d192 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -6,8 +6,10 @@
#include <linux/seq_file.h>
#include <linux/pagemap.h>
+#include <linux/ns_common.h>
#include "autofs_i.h"
+#include "../mount.h"
struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
{
@@ -251,6 +253,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
sbi->pipefd = -1;
+ sbi->mnt_ns_id = to_ns_common(current->nsproxy->mnt_ns)->ns_id;
set_autofs_type_indirect(&sbi->type);
mutex_init(&sbi->wq_mutex);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 174c7205fee4..f06f62d23e76 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -7,8 +7,10 @@
#include <linux/capability.h>
#include <linux/compat.h>
+#include <linux/ns_common.h>
#include "autofs_i.h"
+#include "../mount.h"
static int autofs_dir_permission(struct mnt_idmap *, struct inode *, int);
static int autofs_dir_symlink(struct mnt_idmap *, struct inode *,
@@ -341,6 +343,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
if (autofs_oz_mode(sbi))
return NULL;
+ /* Refuse to trigger mount if current namespace is not the owner
+ * and the mount is propagation private.
+ */
+ if (sbi->mnt_ns_id != to_ns_common(current->nsproxy->mnt_ns)->ns_id) {
+ if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
+ return ERR_PTR(-EPERM);
+ }
+
/*
* If an expire request is pending everyone must wait.
* If the expire fails we're still mounted so continue
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..27bb12693cba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
return propagation;
}
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
+{
+ return mnt_to_propagation_flags(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
+
static void statmount_sb_basic(struct kstatmount *s)
{
struct super_block *sb = s->mnt->mnt_sb;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..a5c2077ce6ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
extern bool path_is_under(const struct path *, const struct path *);
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
extern char *file_path(struct file *, char *, int);
On 11/11/25 18:55, Christian Brauner wrote:
> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>>
>>>> + sbi->owner = current->nsproxy->mnt_ns;
>>> ns_ref_get()
>>> Can be called directly on the mount namespace.
>> ... and would leak all mounts in the mount tree, unless I'm missing
>> something subtle.
> Right, I thought you actually wanted to pin it.
> Anyway, you could take a passive reference but I think that's nonsense
> as well. The following should do it:
Right, I'll need to think about this for a little while, I did think
of using an id for the comparison but I diverged down the wrong path so
this is a very welcome suggestion. There's still the handling of where
the daemon goes away (crash or SIGKILL, yes people deliberately do this
at times, think simulated disaster recovery) which I've missed in this
revision.
Al, thoughts please?
>
> UNTESTED, UNCOMPILED
I'll give it a whirl, ;)
>
> ---
> fs/autofs/autofs_i.h | 4 ++++
> fs/autofs/inode.c | 3 +++
> fs/autofs/root.c | 10 ++++++++++
> fs/namespace.c | 6 ++++++
> include/linux/fs.h | 1 +
> 5 files changed, 24 insertions(+)
>
> diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
> index 23cea74f9933..2b9d2300d351 100644
> --- a/fs/autofs/autofs_i.h
> +++ b/fs/autofs/autofs_i.h
> @@ -16,6 +16,7 @@
> #include <linux/wait.h>
> #include <linux/sched.h>
> #include <linux/sched/signal.h>
> +#include <uapi/linux/mount.h>
> #include <linux/mount.h>
> #include <linux/namei.h>
> #include <linux/uaccess.h>
> @@ -109,11 +110,14 @@ struct autofs_wait_queue {
> #define AUTOFS_SBI_STRICTEXPIRE 0x0002
> #define AUTOFS_SBI_IGNORE 0x0004
>
> struct autofs_sb_info {
> u32 magic;
> int pipefd;
> struct file *pipe;
> struct pid *oz_pgrp;
> + u64 mnt_ns_id;
> int version;
> int sub_version;
> int min_proto;
> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..247a5784d192 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -6,8 +6,10 @@
>
> #include <linux/seq_file.h>
> #include <linux/pagemap.h>
> +#include <linux/ns_common.h>
>
> #include "autofs_i.h"
> +#include "../mount.h"
As a module I try really hard to avoid use of non-public definitions.
But if everyone is happy I'm happy too!
>
> struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
> {
> @@ -251,6 +253,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
> sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
> sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
> sbi->pipefd = -1;
> + sbi->mnt_ns_id = to_ns_common(current->nsproxy->mnt_ns)->ns_id;
>
> set_autofs_type_indirect(&sbi->type);
> mutex_init(&sbi->wq_mutex);
> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
> index 174c7205fee4..f06f62d23e76 100644
> --- a/fs/autofs/root.c
> +++ b/fs/autofs/root.c
> @@ -7,8 +7,10 @@
>
> #include <linux/capability.h>
> #include <linux/compat.h>
> +#include <linux/ns_common.h>
>
> #include "autofs_i.h"
> +#include "../mount.h"
>
> static int autofs_dir_permission(struct mnt_idmap *, struct inode *, int);
> static int autofs_dir_symlink(struct mnt_idmap *, struct inode *,
> @@ -341,6 +343,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
> if (autofs_oz_mode(sbi))
> return NULL;
>
> + /* Refuse to trigger mount if current namespace is not the owner
> + * and the mount is propagation private.
> + */
> + if (sbi->mnt_ns_id != to_ns_common(current->nsproxy->mnt_ns)->ns_id) {
> + if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
> + return ERR_PTR(-EPERM);
> + }
> +
> /*
> * If an expire request is pending everyone must wait.
> * If the expire fails we're still mounted so continue
> diff --git a/fs/namespace.c b/fs/namespace.c
> index d82910f33dc4..27bb12693cba 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
> return propagation;
> }
>
> +u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
> +{
> + return mnt_to_propagation_flags(real_mount(mnt));
> +}
> +EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
> +
> static void statmount_sb_basic(struct kstatmount *s)
> {
> struct super_block *sb = s->mnt->mnt_sb;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index c895146c1444..a5c2077ce6ed 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
> /* fs/dcache.c -- generic fs support functions */
> extern bool is_subdir(struct dentry *, struct dentry *);
> extern bool path_is_under(const struct path *, const struct path *);
> +u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
>
> extern char *file_path(struct file *, char *, int);
>
On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote: > On 11/11/25 18:55, Christian Brauner wrote: > > On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote: > > > On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: > > > > > > > > + sbi->owner = current->nsproxy->mnt_ns; > > > > ns_ref_get() > > > > Can be called directly on the mount namespace. > > > ... and would leak all mounts in the mount tree, unless I'm missing > > > something subtle. > > Right, I thought you actually wanted to pin it. > > Anyway, you could take a passive reference but I think that's nonsense > > as well. The following should do it: > > Right, I'll need to think about this for a little while, I did think > > of using an id for the comparison but I diverged down the wrong path so > > this is a very welcome suggestion. There's still the handling of where > > the daemon goes away (crash or SIGKILL, yes people deliberately do this > > at times, think simulated disaster recovery) which I've missed in this Can you describe the problem in more detail and I'm happy to help you out here. I don't yet understand what the issue is.
On 12/11/25 19:01, Christian Brauner wrote: > On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote: >> On 11/11/25 18:55, Christian Brauner wrote: >>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote: >>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: >>>> >>>>>> + sbi->owner = current->nsproxy->mnt_ns; >>>>> ns_ref_get() >>>>> Can be called directly on the mount namespace. >>>> ... and would leak all mounts in the mount tree, unless I'm missing >>>> something subtle. >>> Right, I thought you actually wanted to pin it. >>> Anyway, you could take a passive reference but I think that's nonsense >>> as well. The following should do it: >> Right, I'll need to think about this for a little while, I did think >> >> of using an id for the comparison but I diverged down the wrong path so >> >> this is a very welcome suggestion. There's still the handling of where >> >> the daemon goes away (crash or SIGKILL, yes people deliberately do this >> >> at times, think simulated disaster recovery) which I've missed in this > Can you describe the problem in more detail and I'm happy to help you > out here. I don't yet understand what the issue is. I thought the patch description was ok but I'll certainly try. Consider using automount in a container. For people to use autofs in a container either automount(8) in the init mount namespace or an independently running automount(8) entirely within the container can be used. The later is done by adding a volume option (or options) to the container to essentially bind mount the autofs mount into the container and the option syntax allows the volume to be set propagation slave if it is not already set by default (shared is bad, the automounts must not propagate back to where they came from). If the automount(8) instance is entirely within the container that also works fine as everything is isolated within the container (no volume options are needed). Now with unshare(1) (and there are other problematic cases, I think systemd private temp gets caught here too) where using something like "unshare -Urm" will create a mount namespace that includes any autofs mounts and sets them propagation private. These mounts cannot be unmounted within the mount namepsace by the namespace creator and accessing a directory within the autofs mount will trigger a callback to automount(8) in the init namespace which mounts the requested mount. But the newly created mount namespace is propagation private so the process in the new mount namespace loops around sending mount requests that cannot be satisfied. The odd thing is that on the second callback to automount(8) returns an error which does complete the ->d_automount() call but doesn't seem to result in breaking the loop in __traverse_mounts() for some unknown reason. One way to resolve this is to check if the mount can be satisfied and if not bail out immediately and returning an error in this case does work. I was tempted to work out how to not include the autofs mounts in the cloned namespace but that's file system specific code in the VFS which is not ok and it (should) also be possible for the namespace creator to "mount --make-shared" in the case the creator wants the mount to function and this would prevent that. So I don't think this is the right thing to do. There's also the inability of the mount namespace creator to umount the autofs mount which could also resolve the problem which I haven't looked into yet. Have I made sense? Clearly there's nothing on autofs itself and why one would want to use it but I don't think that matters for the description. Ian
On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote: > On 12/11/25 19:01, Christian Brauner wrote: > > On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote: > > > On 11/11/25 18:55, Christian Brauner wrote: > > > > On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote: > > > > > On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: > > > > > > > > > > > > + sbi->owner = current->nsproxy->mnt_ns; > > > > > > ns_ref_get() > > > > > > Can be called directly on the mount namespace. > > > > > ... and would leak all mounts in the mount tree, unless I'm missing > > > > > something subtle. > > > > Right, I thought you actually wanted to pin it. > > > > Anyway, you could take a passive reference but I think that's nonsense > > > > as well. The following should do it: > > > Right, I'll need to think about this for a little while, I did think > > > > > > of using an id for the comparison but I diverged down the wrong path so > > > > > > this is a very welcome suggestion. There's still the handling of where > > > > > > the daemon goes away (crash or SIGKILL, yes people deliberately do this > > > > > > at times, think simulated disaster recovery) which I've missed in this > > Can you describe the problem in more detail and I'm happy to help you > > out here. I don't yet understand what the issue is. > > I thought the patch description was ok but I'll certainly try. I'm sorry, we're talking past each other: I was interested in your SIGKILL problem when the daemon crashes. You seemed to say that you needed additional changes for that case. So I'm trying to understand what the fundamental additional problem is with a crashing daemon that would require additional changes here. > > > Consider using automount in a container. > > > For people to use autofs in a container either automount(8) in the init > > mount namespace or an independently running automount(8) entirely within > > the container can be used. The later is done by adding a volume option > > (or options) to the container to essentially bind mount the autofs mount > > into the container and the option syntax allows the volume to be set > > propagation slave if it is not already set by default (shared is bad, > > the automounts must not propagate back to where they came from). If the > > automount(8) instance is entirely within the container that also works > > fine as everything is isolated within the container (no volume options > > are needed). > > > Now with unshare(1) (and there are other problematic cases, I think systemd > > private temp gets caught here too) where using something like "unshare -Urm" > > will create a mount namespace that includes any autofs mounts and sets them > > propagation private. These mounts cannot be unmounted within the mount > > namepsace by the namespace creator and accessing a directory within the Right, but that should only be true for unprivileged containers where we lock mounts at copy_mnt_ns(). > > autofs mount will trigger a callback to automount(8) in the init namespace > > which mounts the requested mount. But the newly created mount namespace is > > propagation private so the process in the new mount namespace loops around > > sending mount requests that cannot be satisfied. The odd thing is that on > the > > second callback to automount(8) returns an error which does complete the > > ->d_automount() call but doesn't seem to result in breaking the loop in > > __traverse_mounts() for some unknown reason. One way to resolve this is to > > check if the mount can be satisfied and if not bail out immediately and > > returning an error in this case does work. Yes, that's sensible. And fwiw, I think for private mounts that's the semantics you want. You have disconnected from the "managing" mount namespace - for lack of a better phrase - so you shouldn't get the mount events. > I was tempted to work out how to not include the autofs mounts in the cloned > > namespace but that's file system specific code in the VFS which is not ok > and > > it (should) also be possible for the namespace creator to "mount > --make-shared" > > in the case the creator wants the mount to function and this would prevent > that. > > So I don't think this is the right thing to do. > > > There's also the inability of the mount namespace creator to umount the > autofs > > mount which could also resolve the problem which I haven't looked into yet. Ok, again, that should only be an issue with unprivileged mount namespaces, i.e., owned by another user namespace. This isn't easily doable. If the unprivileged mount namespaces can unmount the automount it might reveal hidden/overmounted directories that weren't supposed to be exposed to the container - I hate these semantics btw. > > > Have I made sense? Yes, though that's not the question I tried to ask you. :) > > > Clearly there's nothing on autofs itself and why one would want to use it > > but I don't think that matters for the description. > > > Ian >
On 13/11/25 21:19, Christian Brauner wrote: > On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote: >> On 12/11/25 19:01, Christian Brauner wrote: >>> On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote: >>>> On 11/11/25 18:55, Christian Brauner wrote: >>>>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote: >>>>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: >>>>>> >>>>>>>> + sbi->owner = current->nsproxy->mnt_ns; >>>>>>> ns_ref_get() >>>>>>> Can be called directly on the mount namespace. >>>>>> ... and would leak all mounts in the mount tree, unless I'm missing >>>>>> something subtle. >>>>> Right, I thought you actually wanted to pin it. >>>>> Anyway, you could take a passive reference but I think that's nonsense >>>>> as well. The following should do it: >>>> Right, I'll need to think about this for a little while, I did think >>>> >>>> of using an id for the comparison but I diverged down the wrong path so >>>> >>>> this is a very welcome suggestion. There's still the handling of where >>>> >>>> the daemon goes away (crash or SIGKILL, yes people deliberately do this >>>> >>>> at times, think simulated disaster recovery) which I've missed in this >>> Can you describe the problem in more detail and I'm happy to help you >>> out here. I don't yet understand what the issue is. >> I thought the patch description was ok but I'll certainly try. > I'm sorry, we're talking past each other: I was interested in your > SIGKILL problem when the daemon crashes. You seemed to say that you > needed additional changes for that case. So I'm trying to understand > what the fundamental additional problem is with a crashing daemon that > would require additional changes here. Right, sorry. It's pretty straight forward. If the daemon is shutdown (or killed summarily) and there are busy mounts left mounted then when started again they are "re-connected to" by the newly running daemon. So there's a need to update the mnt_ns_id in the ioctl that is used to set the new pipefd. I can't provide a patch fragment because I didn't realise the id in ns_common was added a your recent patch series and I briefly went down a path trying to compile against 6.16.7 before I realised I hadn't been paying attention. The setting needs to be put in fs/autofs/dev-ioctl.c:autofs_dev_ioctl_setpipefd(). Ian > >> >> Consider using automount in a container. >> >> >> For people to use autofs in a container either automount(8) in the init >> >> mount namespace or an independently running automount(8) entirely within >> >> the container can be used. The later is done by adding a volume option >> >> (or options) to the container to essentially bind mount the autofs mount >> >> into the container and the option syntax allows the volume to be set >> >> propagation slave if it is not already set by default (shared is bad, >> >> the automounts must not propagate back to where they came from). If the >> >> automount(8) instance is entirely within the container that also works >> >> fine as everything is isolated within the container (no volume options >> >> are needed). >> >> >> Now with unshare(1) (and there are other problematic cases, I think systemd >> >> private temp gets caught here too) where using something like "unshare -Urm" >> >> will create a mount namespace that includes any autofs mounts and sets them >> >> propagation private. These mounts cannot be unmounted within the mount >> >> namepsace by the namespace creator and accessing a directory within the > Right, but that should only be true for unprivileged containers where we > lock mounts at copy_mnt_ns(). > >> autofs mount will trigger a callback to automount(8) in the init namespace >> >> which mounts the requested mount. But the newly created mount namespace is >> >> propagation private so the process in the new mount namespace loops around >> >> sending mount requests that cannot be satisfied. The odd thing is that on >> the >> >> second callback to automount(8) returns an error which does complete the >> >> ->d_automount() call but doesn't seem to result in breaking the loop in >> >> __traverse_mounts() for some unknown reason. One way to resolve this is to >> >> check if the mount can be satisfied and if not bail out immediately and >> >> returning an error in this case does work. > Yes, that's sensible. And fwiw, I think for private mounts that's the > semantics you want. You have disconnected from the "managing" mount > namespace - for lack of a better phrase - so you shouldn't get the mount > events. > >> I was tempted to work out how to not include the autofs mounts in the cloned >> >> namespace but that's file system specific code in the VFS which is not ok >> and >> >> it (should) also be possible for the namespace creator to "mount >> --make-shared" >> >> in the case the creator wants the mount to function and this would prevent >> that. >> >> So I don't think this is the right thing to do. >> >> >> There's also the inability of the mount namespace creator to umount the >> autofs >> >> mount which could also resolve the problem which I haven't looked into yet. > Ok, again, that should only be an issue with unprivileged mount > namespaces, i.e., owned by another user namespace. This isn't easily > doable. If the unprivileged mount namespaces can unmount the automount > it might reveal hidden/overmounted directories that weren't supposed to > be exposed to the container - I hate these semantics btw. > >> >> Have I made sense? > Yes, though that's not the question I tried to ask you. :) > >> >> Clearly there's nothing on autofs itself and why one would want to use it >> >> but I don't think that matters for the description. >> >> >> Ian >>
On Fri, Nov 14, 2025 at 07:49:53AM +0800, Ian Kent wrote: > On 13/11/25 21:19, Christian Brauner wrote: > > On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote: > > > On 12/11/25 19:01, Christian Brauner wrote: > > > > On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote: > > > > > On 11/11/25 18:55, Christian Brauner wrote: > > > > > > On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote: > > > > > > > On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: > > > > > > > > > > > > > > > > + sbi->owner = current->nsproxy->mnt_ns; > > > > > > > > ns_ref_get() > > > > > > > > Can be called directly on the mount namespace. > > > > > > > ... and would leak all mounts in the mount tree, unless I'm missing > > > > > > > something subtle. > > > > > > Right, I thought you actually wanted to pin it. > > > > > > Anyway, you could take a passive reference but I think that's nonsense > > > > > > as well. The following should do it: > > > > > Right, I'll need to think about this for a little while, I did think > > > > > > > > > > of using an id for the comparison but I diverged down the wrong path so > > > > > > > > > > this is a very welcome suggestion. There's still the handling of where > > > > > > > > > > the daemon goes away (crash or SIGKILL, yes people deliberately do this > > > > > > > > > > at times, think simulated disaster recovery) which I've missed in this > > > > Can you describe the problem in more detail and I'm happy to help you > > > > out here. I don't yet understand what the issue is. > > > I thought the patch description was ok but I'll certainly try. > > I'm sorry, we're talking past each other: I was interested in your > > SIGKILL problem when the daemon crashes. You seemed to say that you > > needed additional changes for that case. So I'm trying to understand > > what the fundamental additional problem is with a crashing daemon that > > would require additional changes here. > > Right, sorry. > > It's pretty straight forward. > > > If the daemon is shutdown (or killed summarily) and there are busy > > mounts left mounted then when started again they are "re-connected to" > > by the newly running daemon. So there's a need to update the mnt_ns_id in > > the ioctl that is used to set the new pipefd. > > > I can't provide a patch fragment because I didn't realise the id in > ns_common Before that you can grab it from the mount namespace directly from the mntns->seq field.
On 14/11/25 19:44, Christian Brauner wrote: > On Fri, Nov 14, 2025 at 07:49:53AM +0800, Ian Kent wrote: >> On 13/11/25 21:19, Christian Brauner wrote: >>> On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote: >>>> On 12/11/25 19:01, Christian Brauner wrote: >>>>> On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote: >>>>>> On 11/11/25 18:55, Christian Brauner wrote: >>>>>>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote: >>>>>>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: >>>>>>>> >>>>>>>>>> + sbi->owner = current->nsproxy->mnt_ns; >>>>>>>>> ns_ref_get() >>>>>>>>> Can be called directly on the mount namespace. >>>>>>>> ... and would leak all mounts in the mount tree, unless I'm missing >>>>>>>> something subtle. >>>>>>> Right, I thought you actually wanted to pin it. >>>>>>> Anyway, you could take a passive reference but I think that's nonsense >>>>>>> as well. The following should do it: >>>>>> Right, I'll need to think about this for a little while, I did think >>>>>> >>>>>> of using an id for the comparison but I diverged down the wrong path so >>>>>> >>>>>> this is a very welcome suggestion. There's still the handling of where >>>>>> >>>>>> the daemon goes away (crash or SIGKILL, yes people deliberately do this >>>>>> >>>>>> at times, think simulated disaster recovery) which I've missed in this >>>>> Can you describe the problem in more detail and I'm happy to help you >>>>> out here. I don't yet understand what the issue is. >>>> I thought the patch description was ok but I'll certainly try. >>> I'm sorry, we're talking past each other: I was interested in your >>> SIGKILL problem when the daemon crashes. You seemed to say that you >>> needed additional changes for that case. So I'm trying to understand >>> what the fundamental additional problem is with a crashing daemon that >>> would require additional changes here. >> Right, sorry. >> >> It's pretty straight forward. >> >> >> If the daemon is shutdown (or killed summarily) and there are busy >> >> mounts left mounted then when started again they are "re-connected to" >> >> by the newly running daemon. So there's a need to update the mnt_ns_id in >> >> the ioctl that is used to set the new pipefd. >> >> >> I can't provide a patch fragment because I didn't realise the id in >> ns_common > Before that you can grab it from the mount namespace directly from the > mntns->seq field. I'd noticed some of that type of usage, using that will certainly help with the backports I need to do too but I've started looking at the series so I'll probably back port it for most recent kernel. I'll send my current path once I get a kernel built that boots in my VM ... Thanks for your help. Ian
On 14/11/25 07:49, Ian Kent wrote: > On 13/11/25 21:19, Christian Brauner wrote: >> On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote: >>> On 12/11/25 19:01, Christian Brauner wrote: >>>> On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote: >>>>> On 11/11/25 18:55, Christian Brauner wrote: >>>>>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote: >>>>>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote: >>>>>>> >>>>>>>>> + sbi->owner = current->nsproxy->mnt_ns; >>>>>>>> ns_ref_get() >>>>>>>> Can be called directly on the mount namespace. >>>>>>> ... and would leak all mounts in the mount tree, unless I'm missing >>>>>>> something subtle. >>>>>> Right, I thought you actually wanted to pin it. >>>>>> Anyway, you could take a passive reference but I think that's >>>>>> nonsense >>>>>> as well. The following should do it: >>>>> Right, I'll need to think about this for a little while, I did think >>>>> >>>>> of using an id for the comparison but I diverged down the wrong >>>>> path so >>>>> >>>>> this is a very welcome suggestion. There's still the handling of >>>>> where >>>>> >>>>> the daemon goes away (crash or SIGKILL, yes people deliberately do >>>>> this >>>>> >>>>> at times, think simulated disaster recovery) which I've missed in >>>>> this >>>> Can you describe the problem in more detail and I'm happy to help you >>>> out here. I don't yet understand what the issue is. >>> I thought the patch description was ok but I'll certainly try. >> I'm sorry, we're talking past each other: I was interested in your >> SIGKILL problem when the daemon crashes. You seemed to say that you >> needed additional changes for that case. So I'm trying to understand >> what the fundamental additional problem is with a crashing daemon that >> would require additional changes here. > > Right, sorry. > > It's pretty straight forward. > > > If the daemon is shutdown (or killed summarily) and there are busy > > mounts left mounted then when started again they are "re-connected to" > > by the newly running daemon. So there's a need to update the mnt_ns_id in > > the ioctl that is used to set the new pipefd. > > > I can't provide a patch fragment because I didn't realise the id in > ns_common > > was added a your recent patch series and I briefly went down a path > trying to > > compile against 6.16.7 before I realised I hadn't been paying attention. > > > The setting needs to be put in > fs/autofs/dev-ioctl.c:autofs_dev_ioctl_setpipefd(). As an fyi, unrelated to what I'm trying to fix here but related to SIGKILL behaviour. I have a user that uses a custom disaster recovery procedure that essentially does a SIGKILL and expects automount(8) to startup and continue using changed servers which it doesn't. I'm pretty sure I just need to re-queue or discard any entries in the wait queue but haven't got it quite right yet (I think I'm forgetting to update the flags). So eventually there'll be another autofs patch which might look like it belongs with this one but is actually quite different. > > Ian > >> >>> >>> Consider using automount in a container. >>> >>> >>> For people to use autofs in a container either automount(8) in the init >>> >>> mount namespace or an independently running automount(8) entirely >>> within >>> >>> the container can be used. The later is done by adding a volume option >>> >>> (or options) to the container to essentially bind mount the autofs >>> mount >>> >>> into the container and the option syntax allows the volume to be set >>> >>> propagation slave if it is not already set by default (shared is bad, >>> >>> the automounts must not propagate back to where they came from). If the >>> >>> automount(8) instance is entirely within the container that also works >>> >>> fine as everything is isolated within the container (no volume options >>> >>> are needed). >>> >>> >>> Now with unshare(1) (and there are other problematic cases, I think >>> systemd >>> >>> private temp gets caught here too) where using something like >>> "unshare -Urm" >>> >>> will create a mount namespace that includes any autofs mounts and >>> sets them >>> >>> propagation private. These mounts cannot be unmounted within the mount >>> >>> namepsace by the namespace creator and accessing a directory within the >> Right, but that should only be true for unprivileged containers where we >> lock mounts at copy_mnt_ns(). >> >>> autofs mount will trigger a callback to automount(8) in the init >>> namespace >>> >>> which mounts the requested mount. But the newly created mount >>> namespace is >>> >>> propagation private so the process in the new mount namespace loops >>> around >>> >>> sending mount requests that cannot be satisfied. The odd thing is >>> that on >>> the >>> >>> second callback to automount(8) returns an error which does complete >>> the >>> >>> ->d_automount() call but doesn't seem to result in breaking the loop in >>> >>> __traverse_mounts() for some unknown reason. One way to resolve this >>> is to >>> >>> check if the mount can be satisfied and if not bail out immediately and >>> >>> returning an error in this case does work. >> Yes, that's sensible. And fwiw, I think for private mounts that's the >> semantics you want. You have disconnected from the "managing" mount >> namespace - for lack of a better phrase - so you shouldn't get the mount >> events. >> >>> I was tempted to work out how to not include the autofs mounts in >>> the cloned >>> >>> namespace but that's file system specific code in the VFS which is >>> not ok >>> and >>> >>> it (should) also be possible for the namespace creator to "mount >>> --make-shared" >>> >>> in the case the creator wants the mount to function and this would >>> prevent >>> that. >>> >>> So I don't think this is the right thing to do. >>> >>> >>> There's also the inability of the mount namespace creator to umount the >>> autofs >>> >>> mount which could also resolve the problem which I haven't looked >>> into yet. >> Ok, again, that should only be an issue with unprivileged mount >> namespaces, i.e., owned by another user namespace. This isn't easily >> doable. If the unprivileged mount namespaces can unmount the automount >> it might reveal hidden/overmounted directories that weren't supposed to >> be exposed to the container - I hate these semantics btw. >> >>> >>> Have I made sense? >> Yes, though that's not the question I tried to ask you. :) >> >>> >>> Clearly there's nothing on autofs itself and why one would want to >>> use it >>> >>> but I don't think that matters for the description. >>> >>> >>> Ian >>> >
On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..0a29761f39c0 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
> sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
> sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
> sbi->pipefd = -1;
> + sbi->owner = current->nsproxy->mnt_ns;
>
> set_autofs_type_indirect(&sbi->type);
> mutex_init(&sbi->wq_mutex);
> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
> index 174c7205fee4..8cce86158f20 100644
> --- a/fs/autofs/root.c
> +++ b/fs/autofs/root.c
> @@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
> if (autofs_oz_mode(sbi))
> return NULL;
>
> + /* Refuse to trigger mount if current namespace is not the owner
> + * and the mount is propagation private.
> + */
> + if (sbi->owner != current->nsproxy->mnt_ns) {
> + if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
> + return ERR_PTR(-EPERM);
> + }
> +
Huh? What's to guarantee that superblock won't outlive the namespace?
That looks seriously bogus.
On 11/11/25 14:59, Al Viro wrote:
> On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
>
>> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
>> index f5c16ffba013..0a29761f39c0 100644
>> --- a/fs/autofs/inode.c
>> +++ b/fs/autofs/inode.c
>> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
>> sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
>> sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
>> sbi->pipefd = -1;
>> + sbi->owner = current->nsproxy->mnt_ns;
>>
>> set_autofs_type_indirect(&sbi->type);
>> mutex_init(&sbi->wq_mutex);
>> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
>> index 174c7205fee4..8cce86158f20 100644
>> --- a/fs/autofs/root.c
>> +++ b/fs/autofs/root.c
>> @@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
>> if (autofs_oz_mode(sbi))
>> return NULL;
>>
>> + /* Refuse to trigger mount if current namespace is not the owner
>> + * and the mount is propagation private.
>> + */
>> + if (sbi->owner != current->nsproxy->mnt_ns) {
>> + if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
>> + return ERR_PTR(-EPERM);
>> + }
>> +
> Huh? What's to guarantee that superblock won't outlive the namespace?
Not 30 minutes after I posted these I was thinking about the case the daemon
(that mounted this) going away, very loosely similar I think. Setting the
mounting process's namespace when it mounts it is straight forward but what
can I do if the process crashes ...
I did think that if the namespace is saved away by the process that mounts
it that the mount namespace would be valid at least until it umounts it but
yes there are a few things that can go wrong ...
Any ideas how I can identify this case?
Ian
On Tue, Nov 11, 2025 at 04:25:29PM +0800, Ian Kent wrote: > > Huh? What's to guarantee that superblock won't outlive the namespace? > > Not 30 minutes after I posted these I was thinking about the case the daemon > > (that mounted this) going away, very loosely similar I think. Setting the > > mounting process's namespace when it mounts it is straight forward but what > > can I do if the process crashes ... > > > I did think that if the namespace is saved away by the process that mounts > > it that the mount namespace would be valid at least until it umounts it but > > yes there are a few things that can go wrong ... Umm... 1) super_block != mount; unshare(CLONE_NEWNS) by anyone in the namespace of that mount *will* create a clone of that mount, with exact same ->mnt_sb and yes, in a separate namespace. 2) mount does not pin a namespace. chdir into it, umount -l and there you go... 3) mount(2) can bloody well create more than one struct mount, all with the same ->mnt_sb. So I'd say there's more than a few things that can go wrong here. Said that, this "we need a daemon in that namespace" has been a source of assorted headaches for decades now; could we do anything about that? After all, a thread can switch from one namespace to another these days (setns(2))...
On 11/11/25 17:04, Al Viro wrote: > On Tue, Nov 11, 2025 at 04:25:29PM +0800, Ian Kent wrote: > >>> Huh? What's to guarantee that superblock won't outlive the namespace? >> Not 30 minutes after I posted these I was thinking about the case the daemon >> >> (that mounted this) going away, very loosely similar I think. Setting the >> >> mounting process's namespace when it mounts it is straight forward but what >> >> can I do if the process crashes ... >> >> >> I did think that if the namespace is saved away by the process that mounts >> >> it that the mount namespace would be valid at least until it umounts it but >> >> yes there are a few things that can go wrong ... > Umm... > > 1) super_block != mount; unshare(CLONE_NEWNS) by anyone in the namespace of > that mount *will* create a clone of that mount, with exact same ->mnt_sb > and yes, in a separate namespace. > > 2) mount does not pin a namespace. chdir into it, umount -l and there you go... > > 3) mount(2) can bloody well create more than one struct mount, all with the > same ->mnt_sb. > > So I'd say there's more than a few things that can go wrong here. > > Said that, this "we need a daemon in that namespace" has been a source of > assorted headaches for decades now; could we do anything about that? > After all, a thread can switch from one namespace to another these > days (setns(2))... Not sure the motivation here is "we need a daemon in that namespace", it's more my struggling to find a suitable check for the problem case even though it does look a lot like what you say. Thinking back the problem has always been the amount of stuff that's not appropriate for the kernel, automount map parsing is pretty ugly, for example. A better split of duties is probably what we would need to do. But if we did that then we'd probably want to re-define/re-write the user space <-> kernel space communications as well to assist with the division of labour. In the beginning there was a proposal to do just that, not sure I still have that stuff, I'll have a look around ... or do you have something else specific in mind? Anyway, for the moment, I think your saying just taking an ns_common reference will be problematic as well. Ian
On Tue, Nov 11, 2025 at 06:13:19PM +0800, Ian Kent wrote: > Anyway, for the moment, I think your saying just taking an ns_common > reference > > will be problematic as well. If it is persistent - definitely. We'd get a massive leak, since namespace is torn apart when the refcount drops to zero.
© 2016 - 2026 Red Hat, Inc.