autofs: fairly minor fixes

[PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 3 months ago

If a mount namespace contains autofs mounts, and they are propagation
private, and there is no namespace specific automount daemon to handle
possible automounting then attempted path resolution will loop until
MAXSYMLINKS is reached before failing causing quite a bit of noise in
the log.

Add a check for this in autofs ->d_automount() so that the VFS can
immediately return an error in this case. Since the mount is propagation
private an EPERM return seems most appropriate.

Signed-off-by: Ian Kent <raven@themaw.net>
---
 fs/autofs/autofs_i.h | 4 ++++
 fs/autofs/inode.c    | 1 +
 fs/autofs/root.c     | 8 ++++++++
 fs/namespace.c       | 6 ++++++
 include/linux/fs.h   | 1 +
 5 files changed, 20 insertions(+)

diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 23cea74f9933..34533587c66b 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -16,6 +16,7 @@
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
+#include <uapi/linux/mount.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/uaccess.h>
@@ -109,11 +110,14 @@ struct autofs_wait_queue {
 #define AUTOFS_SBI_STRICTEXPIRE 0x0002
 #define AUTOFS_SBI_IGNORE	0x0004
 
+struct mnt_namespace;
+
 struct autofs_sb_info {
 	u32 magic;
 	int pipefd;
 	struct file *pipe;
 	struct pid *oz_pgrp;
+	struct mnt_namespace *owner;
 	int version;
 	int sub_version;
 	int min_proto;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f5c16ffba013..0a29761f39c0 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
 	sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
 	sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
 	sbi->pipefd = -1;
+	sbi->owner = current->nsproxy->mnt_ns;
 
 	set_autofs_type_indirect(&sbi->type);
 	mutex_init(&sbi->wq_mutex);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 174c7205fee4..8cce86158f20 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
 	if (autofs_oz_mode(sbi))
 		return NULL;
 
+	/* Refuse to trigger mount if current namespace is not the owner
+	 * and the mount is propagation private.
+	 */
+	if (sbi->owner != current->nsproxy->mnt_ns) {
+		if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
+			return ERR_PTR(-EPERM);
+	}
+
 	/*
 	 * If an expire request is pending everyone must wait.
 	 * If the expire fails we're still mounted so continue
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..27bb12693cba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
 	return propagation;
 }
 
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
+{
+	return mnt_to_propagation_flags(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
+
 static void statmount_sb_basic(struct kstatmount *s)
 {
 	struct super_block *sb = s->mnt->mnt_sb;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..a5c2077ce6ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
 /* fs/dcache.c -- generic fs support functions */
 extern bool is_subdir(struct dentry *, struct dentry *);
 extern bool path_is_under(const struct path *, const struct path *);
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
 
 extern char *file_path(struct file *, char *, int);
 
-- 
2.51.1

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Christian Brauner 3 months ago

On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
> If a mount namespace contains autofs mounts, and they are propagation
> private, and there is no namespace specific automount daemon to handle
> possible automounting then attempted path resolution will loop until
> MAXSYMLINKS is reached before failing causing quite a bit of noise in
> the log.
> 
> Add a check for this in autofs ->d_automount() so that the VFS can
> immediately return an error in this case. Since the mount is propagation
> private an EPERM return seems most appropriate.
> 
> Signed-off-by: Ian Kent <raven@themaw.net>
> ---
>  fs/autofs/autofs_i.h | 4 ++++
>  fs/autofs/inode.c    | 1 +
>  fs/autofs/root.c     | 8 ++++++++
>  fs/namespace.c       | 6 ++++++
>  include/linux/fs.h   | 1 +
>  5 files changed, 20 insertions(+)
> 
> diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
> index 23cea74f9933..34533587c66b 100644
> --- a/fs/autofs/autofs_i.h
> +++ b/fs/autofs/autofs_i.h
> @@ -16,6 +16,7 @@
>  #include <linux/wait.h>
>  #include <linux/sched.h>
>  #include <linux/sched/signal.h>
> +#include <uapi/linux/mount.h>
>  #include <linux/mount.h>
>  #include <linux/namei.h>
>  #include <linux/uaccess.h>
> @@ -109,11 +110,14 @@ struct autofs_wait_queue {
>  #define AUTOFS_SBI_STRICTEXPIRE 0x0002
>  #define AUTOFS_SBI_IGNORE	0x0004
>  
> +struct mnt_namespace;
> +
>  struct autofs_sb_info {
>  	u32 magic;
>  	int pipefd;
>  	struct file *pipe;
>  	struct pid *oz_pgrp;
> +	struct mnt_namespace *owner;
>  	int version;
>  	int sub_version;
>  	int min_proto;
> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..0a29761f39c0 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
>  	sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
>  	sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
>  	sbi->pipefd = -1;
> +	sbi->owner = current->nsproxy->mnt_ns;

ns_ref_get()
Can be called directly on the mount namespace.

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Al Viro 3 months ago

On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:

> > +	sbi->owner = current->nsproxy->mnt_ns;
> 
> ns_ref_get()
> Can be called directly on the mount namespace.

... and would leak all mounts in the mount tree, unless I'm missing
something subtle.

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Christian Brauner 3 months ago

On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
> 
> > > +	sbi->owner = current->nsproxy->mnt_ns;
> > 
> > ns_ref_get()
> > Can be called directly on the mount namespace.
> 
> ... and would leak all mounts in the mount tree, unless I'm missing
> something subtle.

Right, I thought you actually wanted to pin it.
Anyway, you could take a passive reference but I think that's nonsense
as well. The following should do it:

UNTESTED, UNCOMPILED

---
 fs/autofs/autofs_i.h |  4 ++++
 fs/autofs/inode.c    |  3 +++
 fs/autofs/root.c     | 10 ++++++++++
 fs/namespace.c       |  6 ++++++
 include/linux/fs.h   |  1 +
 5 files changed, 24 insertions(+)

diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 23cea74f9933..2b9d2300d351 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -16,6 +16,7 @@
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
+#include <uapi/linux/mount.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/uaccess.h>
@@ -109,11 +110,14 @@ struct autofs_wait_queue {
 #define AUTOFS_SBI_STRICTEXPIRE 0x0002
 #define AUTOFS_SBI_IGNORE	0x0004
 
 struct autofs_sb_info {
 	u32 magic;
 	int pipefd;
 	struct file *pipe;
 	struct pid *oz_pgrp;
+	u64 mnt_ns_id;
 	int version;
 	int sub_version;
 	int min_proto;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f5c16ffba013..247a5784d192 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -6,8 +6,10 @@
 
 #include <linux/seq_file.h>
 #include <linux/pagemap.h>
+#include <linux/ns_common.h>
 
 #include "autofs_i.h"
+#include "../mount.h"
 
 struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
 {
@@ -251,6 +253,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
 	sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
 	sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
 	sbi->pipefd = -1;
+	sbi->mnt_ns_id = to_ns_common(current->nsproxy->mnt_ns)->ns_id;
 
 	set_autofs_type_indirect(&sbi->type);
 	mutex_init(&sbi->wq_mutex);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 174c7205fee4..f06f62d23e76 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -7,8 +7,10 @@
 
 #include <linux/capability.h>
 #include <linux/compat.h>
+#include <linux/ns_common.h>
 
 #include "autofs_i.h"
+#include "../mount.h"
 
 static int autofs_dir_permission(struct mnt_idmap *, struct inode *, int);
 static int autofs_dir_symlink(struct mnt_idmap *, struct inode *,
@@ -341,6 +343,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
 	if (autofs_oz_mode(sbi))
 		return NULL;
 
+	/* Refuse to trigger mount if current namespace is not the owner
+	 * and the mount is propagation private.
+	 */
+	if (sbi->mnt_ns_id != to_ns_common(current->nsproxy->mnt_ns)->ns_id) {
+		if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
+			return ERR_PTR(-EPERM);
+	}
+
 	/*
 	 * If an expire request is pending everyone must wait.
 	 * If the expire fails we're still mounted so continue
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..27bb12693cba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
 	return propagation;
 }
 
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
+{
+	return mnt_to_propagation_flags(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
+
 static void statmount_sb_basic(struct kstatmount *s)
 {
 	struct super_block *sb = s->mnt->mnt_sb;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..a5c2077ce6ed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
 /* fs/dcache.c -- generic fs support functions */
 extern bool is_subdir(struct dentry *, struct dentry *);
 extern bool path_is_under(const struct path *, const struct path *);
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
 
 extern char *file_path(struct file *, char *, int);

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 3 months ago

On 11/11/25 18:55, Christian Brauner wrote:
> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>>
>>>> +	sbi->owner = current->nsproxy->mnt_ns;
>>> ns_ref_get()
>>> Can be called directly on the mount namespace.
>> ... and would leak all mounts in the mount tree, unless I'm missing
>> something subtle.
> Right, I thought you actually wanted to pin it.
> Anyway, you could take a passive reference but I think that's nonsense
> as well. The following should do it:

Right, I'll need to think about this for a little while, I did think

of using an id for the comparison but I diverged down the wrong path so

this is a very welcome suggestion. There's still the handling of where

the daemon goes away (crash or SIGKILL, yes people deliberately do this

at times, think simulated disaster recovery) which I've missed in this

revision.


Al, thoughts please?


>
> UNTESTED, UNCOMPILED

I'll give it a whirl, ;)


>
> ---
>   fs/autofs/autofs_i.h |  4 ++++
>   fs/autofs/inode.c    |  3 +++
>   fs/autofs/root.c     | 10 ++++++++++
>   fs/namespace.c       |  6 ++++++
>   include/linux/fs.h   |  1 +
>   5 files changed, 24 insertions(+)
>
> diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
> index 23cea74f9933..2b9d2300d351 100644
> --- a/fs/autofs/autofs_i.h
> +++ b/fs/autofs/autofs_i.h
> @@ -16,6 +16,7 @@
>   #include <linux/wait.h>
>   #include <linux/sched.h>
>   #include <linux/sched/signal.h>
> +#include <uapi/linux/mount.h>
>   #include <linux/mount.h>
>   #include <linux/namei.h>
>   #include <linux/uaccess.h>
> @@ -109,11 +110,14 @@ struct autofs_wait_queue {
>   #define AUTOFS_SBI_STRICTEXPIRE 0x0002
>   #define AUTOFS_SBI_IGNORE	0x0004
>   
>   struct autofs_sb_info {
>   	u32 magic;
>   	int pipefd;
>   	struct file *pipe;
>   	struct pid *oz_pgrp;
> +	u64 mnt_ns_id;
>   	int version;
>   	int sub_version;
>   	int min_proto;
> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..247a5784d192 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -6,8 +6,10 @@
>   
>   #include <linux/seq_file.h>
>   #include <linux/pagemap.h>
> +#include <linux/ns_common.h>
>   
>   #include "autofs_i.h"
> +#include "../mount.h"

As a module I try really hard to avoid use of non-public definitions.

But if everyone is happy I'm happy too!


>   
>   struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
>   {
> @@ -251,6 +253,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
>   	sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
>   	sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
>   	sbi->pipefd = -1;
> +	sbi->mnt_ns_id = to_ns_common(current->nsproxy->mnt_ns)->ns_id;
>   
>   	set_autofs_type_indirect(&sbi->type);
>   	mutex_init(&sbi->wq_mutex);
> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
> index 174c7205fee4..f06f62d23e76 100644
> --- a/fs/autofs/root.c
> +++ b/fs/autofs/root.c
> @@ -7,8 +7,10 @@
>   
>   #include <linux/capability.h>
>   #include <linux/compat.h>
> +#include <linux/ns_common.h>
>   
>   #include "autofs_i.h"
> +#include "../mount.h"
>   
>   static int autofs_dir_permission(struct mnt_idmap *, struct inode *, int);
>   static int autofs_dir_symlink(struct mnt_idmap *, struct inode *,
> @@ -341,6 +343,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
>   	if (autofs_oz_mode(sbi))
>   		return NULL;
>   
> +	/* Refuse to trigger mount if current namespace is not the owner
> +	 * and the mount is propagation private.
> +	 */
> +	if (sbi->mnt_ns_id != to_ns_common(current->nsproxy->mnt_ns)->ns_id) {
> +		if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
> +			return ERR_PTR(-EPERM);
> +	}
> +
>   	/*
>   	 * If an expire request is pending everyone must wait.
>   	 * If the expire fails we're still mounted so continue
> diff --git a/fs/namespace.c b/fs/namespace.c
> index d82910f33dc4..27bb12693cba 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -5150,6 +5150,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
>   	return propagation;
>   }
>   
> +u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
> +{
> +	return mnt_to_propagation_flags(real_mount(mnt));
> +}
> +EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
> +
>   static void statmount_sb_basic(struct kstatmount *s)
>   {
>   	struct super_block *sb = s->mnt->mnt_sb;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index c895146c1444..a5c2077ce6ed 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3269,6 +3269,7 @@ extern struct file * open_exec(const char *);
>   /* fs/dcache.c -- generic fs support functions */
>   extern bool is_subdir(struct dentry *, struct dentry *);
>   extern bool path_is_under(const struct path *, const struct path *);
> +u64 vfsmount_to_propagation_flags(struct vfsmount *mnt);
>   
>   extern char *file_path(struct file *, char *, int);
>

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Christian Brauner 2 months, 4 weeks ago

On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote:
> On 11/11/25 18:55, Christian Brauner wrote:
> > On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
> > > On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
> > > 
> > > > > +	sbi->owner = current->nsproxy->mnt_ns;
> > > > ns_ref_get()
> > > > Can be called directly on the mount namespace.
> > > ... and would leak all mounts in the mount tree, unless I'm missing
> > > something subtle.
> > Right, I thought you actually wanted to pin it.
> > Anyway, you could take a passive reference but I think that's nonsense
> > as well. The following should do it:
> 
> Right, I'll need to think about this for a little while, I did think
> 
> of using an id for the comparison but I diverged down the wrong path so
> 
> this is a very welcome suggestion. There's still the handling of where
> 
> the daemon goes away (crash or SIGKILL, yes people deliberately do this
> 
> at times, think simulated disaster recovery) which I've missed in this

Can you describe the problem in more detail and I'm happy to help you
out here. I don't yet understand what the issue is.

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 2 months, 4 weeks ago

On 12/11/25 19:01, Christian Brauner wrote:
> On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote:
>> On 11/11/25 18:55, Christian Brauner wrote:
>>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
>>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>>>>
>>>>>> +	sbi->owner = current->nsproxy->mnt_ns;
>>>>> ns_ref_get()
>>>>> Can be called directly on the mount namespace.
>>>> ... and would leak all mounts in the mount tree, unless I'm missing
>>>> something subtle.
>>> Right, I thought you actually wanted to pin it.
>>> Anyway, you could take a passive reference but I think that's nonsense
>>> as well. The following should do it:
>> Right, I'll need to think about this for a little while, I did think
>>
>> of using an id for the comparison but I diverged down the wrong path so
>>
>> this is a very welcome suggestion. There's still the handling of where
>>
>> the daemon goes away (crash or SIGKILL, yes people deliberately do this
>>
>> at times, think simulated disaster recovery) which I've missed in this
> Can you describe the problem in more detail and I'm happy to help you
> out here. I don't yet understand what the issue is.

I thought the patch description was ok but I'll certainly try.


Consider using automount in a container.


For people to use autofs in a container either automount(8) in the init

mount namespace or an independently running automount(8) entirely within

the container can be used. The later is done by adding a volume option

(or options) to the container to essentially bind mount the autofs mount

into the container and the option syntax allows the volume to be set

propagation slave if it is not already set by default (shared is bad,

the automounts must not propagate back to where they came from). If the

automount(8) instance is entirely within the container that also works

fine as everything is isolated within the container (no volume options

are needed).


Now with unshare(1) (and there are other problematic cases, I think systemd

private temp gets caught here too) where using something like "unshare -Urm"

will create a mount namespace that includes any autofs mounts and sets them

propagation private. These mounts cannot be unmounted within the mount

namepsace by the namespace creator and accessing a directory within the

autofs mount will trigger a callback to automount(8) in the init namespace

which mounts the requested mount. But the newly created mount namespace is

propagation private so the process in the new mount namespace loops around

sending mount requests that cannot be satisfied. The odd thing is that 
on the

second callback to automount(8) returns an error which does complete the

->d_automount() call but doesn't seem to result in breaking the loop in

__traverse_mounts() for some unknown reason. One way to resolve this is to

check if the mount can be satisfied and if not bail out immediately and

returning an error in this case does work.


I was tempted to work out how to not include the autofs mounts in the cloned

namespace but that's file system specific code in the VFS which is not 
ok and

it (should) also be possible for the namespace creator to "mount 
--make-shared"

in the case the creator wants the mount to function and this would 
prevent that.

So I don't think this is the right thing to do.


There's also the inability of the mount namespace creator to umount the 
autofs

mount which could also resolve the problem which I haven't looked into yet.


Have I made sense?


Clearly there's nothing on autofs itself and why one would want to use it

but I don't think that matters for the description.


Ian

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Christian Brauner 2 months, 4 weeks ago

On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote:
> On 12/11/25 19:01, Christian Brauner wrote:
> > On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote:
> > > On 11/11/25 18:55, Christian Brauner wrote:
> > > > On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
> > > > > On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
> > > > > 
> > > > > > > +	sbi->owner = current->nsproxy->mnt_ns;
> > > > > > ns_ref_get()
> > > > > > Can be called directly on the mount namespace.
> > > > > ... and would leak all mounts in the mount tree, unless I'm missing
> > > > > something subtle.
> > > > Right, I thought you actually wanted to pin it.
> > > > Anyway, you could take a passive reference but I think that's nonsense
> > > > as well. The following should do it:
> > > Right, I'll need to think about this for a little while, I did think
> > > 
> > > of using an id for the comparison but I diverged down the wrong path so
> > > 
> > > this is a very welcome suggestion. There's still the handling of where
> > > 
> > > the daemon goes away (crash or SIGKILL, yes people deliberately do this
> > > 
> > > at times, think simulated disaster recovery) which I've missed in this
> > Can you describe the problem in more detail and I'm happy to help you
> > out here. I don't yet understand what the issue is.
> 
> I thought the patch description was ok but I'll certainly try.

I'm sorry, we're talking past each other: I was interested in your
SIGKILL problem when the daemon crashes. You seemed to say that you
needed additional changes for that case. So I'm trying to understand
what the fundamental additional problem is with a crashing daemon that
would require additional changes here.

> 
> 
> Consider using automount in a container.
> 
> 
> For people to use autofs in a container either automount(8) in the init
> 
> mount namespace or an independently running automount(8) entirely within
> 
> the container can be used. The later is done by adding a volume option
> 
> (or options) to the container to essentially bind mount the autofs mount
> 
> into the container and the option syntax allows the volume to be set
> 
> propagation slave if it is not already set by default (shared is bad,
> 
> the automounts must not propagate back to where they came from). If the
> 
> automount(8) instance is entirely within the container that also works
> 
> fine as everything is isolated within the container (no volume options
> 
> are needed).
> 
> 
> Now with unshare(1) (and there are other problematic cases, I think systemd
> 
> private temp gets caught here too) where using something like "unshare -Urm"
> 
> will create a mount namespace that includes any autofs mounts and sets them
> 
> propagation private. These mounts cannot be unmounted within the mount
> 
> namepsace by the namespace creator and accessing a directory within the

Right, but that should only be true for unprivileged containers where we
lock mounts at copy_mnt_ns().

> 
> autofs mount will trigger a callback to automount(8) in the init namespace
> 
> which mounts the requested mount. But the newly created mount namespace is
> 
> propagation private so the process in the new mount namespace loops around
> 
> sending mount requests that cannot be satisfied. The odd thing is that on
> the
> 
> second callback to automount(8) returns an error which does complete the
> 
> ->d_automount() call but doesn't seem to result in breaking the loop in
> 
> __traverse_mounts() for some unknown reason. One way to resolve this is to
> 
> check if the mount can be satisfied and if not bail out immediately and
> 
> returning an error in this case does work.

Yes, that's sensible. And fwiw, I think for private mounts that's the
semantics you want. You have disconnected from the "managing" mount
namespace - for lack of a better phrase - so you shouldn't get the mount
events.

> I was tempted to work out how to not include the autofs mounts in the cloned
> 
> namespace but that's file system specific code in the VFS which is not ok
> and
> 
> it (should) also be possible for the namespace creator to "mount
> --make-shared"
> 
> in the case the creator wants the mount to function and this would prevent
> that.
> 
> So I don't think this is the right thing to do.
> 
> 
> There's also the inability of the mount namespace creator to umount the
> autofs
> 
> mount which could also resolve the problem which I haven't looked into yet.

Ok, again, that should only be an issue with unprivileged mount
namespaces, i.e., owned by another user namespace. This isn't easily
doable. If the unprivileged mount namespaces can unmount the automount
it might reveal hidden/overmounted directories that weren't supposed to
be exposed to the container - I hate these semantics btw.

> 
> 
> Have I made sense?

Yes, though that's not the question I tried to ask you. :)

> 
> 
> Clearly there's nothing on autofs itself and why one would want to use it
> 
> but I don't think that matters for the description.
> 
> 
> Ian
>

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 2 months, 3 weeks ago

On 13/11/25 21:19, Christian Brauner wrote:
> On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote:
>> On 12/11/25 19:01, Christian Brauner wrote:
>>> On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote:
>>>> On 11/11/25 18:55, Christian Brauner wrote:
>>>>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
>>>>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>>>>>>
>>>>>>>> +	sbi->owner = current->nsproxy->mnt_ns;
>>>>>>> ns_ref_get()
>>>>>>> Can be called directly on the mount namespace.
>>>>>> ... and would leak all mounts in the mount tree, unless I'm missing
>>>>>> something subtle.
>>>>> Right, I thought you actually wanted to pin it.
>>>>> Anyway, you could take a passive reference but I think that's nonsense
>>>>> as well. The following should do it:
>>>> Right, I'll need to think about this for a little while, I did think
>>>>
>>>> of using an id for the comparison but I diverged down the wrong path so
>>>>
>>>> this is a very welcome suggestion. There's still the handling of where
>>>>
>>>> the daemon goes away (crash or SIGKILL, yes people deliberately do this
>>>>
>>>> at times, think simulated disaster recovery) which I've missed in this
>>> Can you describe the problem in more detail and I'm happy to help you
>>> out here. I don't yet understand what the issue is.
>> I thought the patch description was ok but I'll certainly try.
> I'm sorry, we're talking past each other: I was interested in your
> SIGKILL problem when the daemon crashes. You seemed to say that you
> needed additional changes for that case. So I'm trying to understand
> what the fundamental additional problem is with a crashing daemon that
> would require additional changes here.

Right, sorry.

It's pretty straight forward.


If the daemon is shutdown (or killed summarily) and there are busy

mounts left mounted then when started again they are "re-connected to"

by the newly running daemon. So there's a need to update the mnt_ns_id in

the ioctl that is used to set the new pipefd.


I can't provide a patch fragment because I didn't realise the id in 
ns_common

was added a your recent patch series and I briefly went down a path 
trying to

compile against 6.16.7 before I realised I hadn't been paying attention.


The setting needs to be put in 
fs/autofs/dev-ioctl.c:autofs_dev_ioctl_setpipefd().


Ian

>
>>
>> Consider using automount in a container.
>>
>>
>> For people to use autofs in a container either automount(8) in the init
>>
>> mount namespace or an independently running automount(8) entirely within
>>
>> the container can be used. The later is done by adding a volume option
>>
>> (or options) to the container to essentially bind mount the autofs mount
>>
>> into the container and the option syntax allows the volume to be set
>>
>> propagation slave if it is not already set by default (shared is bad,
>>
>> the automounts must not propagate back to where they came from). If the
>>
>> automount(8) instance is entirely within the container that also works
>>
>> fine as everything is isolated within the container (no volume options
>>
>> are needed).
>>
>>
>> Now with unshare(1) (and there are other problematic cases, I think systemd
>>
>> private temp gets caught here too) where using something like "unshare -Urm"
>>
>> will create a mount namespace that includes any autofs mounts and sets them
>>
>> propagation private. These mounts cannot be unmounted within the mount
>>
>> namepsace by the namespace creator and accessing a directory within the
> Right, but that should only be true for unprivileged containers where we
> lock mounts at copy_mnt_ns().
>
>> autofs mount will trigger a callback to automount(8) in the init namespace
>>
>> which mounts the requested mount. But the newly created mount namespace is
>>
>> propagation private so the process in the new mount namespace loops around
>>
>> sending mount requests that cannot be satisfied. The odd thing is that on
>> the
>>
>> second callback to automount(8) returns an error which does complete the
>>
>> ->d_automount() call but doesn't seem to result in breaking the loop in
>>
>> __traverse_mounts() for some unknown reason. One way to resolve this is to
>>
>> check if the mount can be satisfied and if not bail out immediately and
>>
>> returning an error in this case does work.
> Yes, that's sensible. And fwiw, I think for private mounts that's the
> semantics you want. You have disconnected from the "managing" mount
> namespace - for lack of a better phrase - so you shouldn't get the mount
> events.
>
>> I was tempted to work out how to not include the autofs mounts in the cloned
>>
>> namespace but that's file system specific code in the VFS which is not ok
>> and
>>
>> it (should) also be possible for the namespace creator to "mount
>> --make-shared"
>>
>> in the case the creator wants the mount to function and this would prevent
>> that.
>>
>> So I don't think this is the right thing to do.
>>
>>
>> There's also the inability of the mount namespace creator to umount the
>> autofs
>>
>> mount which could also resolve the problem which I haven't looked into yet.
> Ok, again, that should only be an issue with unprivileged mount
> namespaces, i.e., owned by another user namespace. This isn't easily
> doable. If the unprivileged mount namespaces can unmount the automount
> it might reveal hidden/overmounted directories that weren't supposed to
> be exposed to the container - I hate these semantics btw.
>
>>
>> Have I made sense?
> Yes, though that's not the question I tried to ask you. :)
>
>>
>> Clearly there's nothing on autofs itself and why one would want to use it
>>
>> but I don't think that matters for the description.
>>
>>
>> Ian
>>

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Christian Brauner 2 months, 3 weeks ago

On Fri, Nov 14, 2025 at 07:49:53AM +0800, Ian Kent wrote:
> On 13/11/25 21:19, Christian Brauner wrote:
> > On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote:
> > > On 12/11/25 19:01, Christian Brauner wrote:
> > > > On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote:
> > > > > On 11/11/25 18:55, Christian Brauner wrote:
> > > > > > On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
> > > > > > > On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
> > > > > > > 
> > > > > > > > > +	sbi->owner = current->nsproxy->mnt_ns;
> > > > > > > > ns_ref_get()
> > > > > > > > Can be called directly on the mount namespace.
> > > > > > > ... and would leak all mounts in the mount tree, unless I'm missing
> > > > > > > something subtle.
> > > > > > Right, I thought you actually wanted to pin it.
> > > > > > Anyway, you could take a passive reference but I think that's nonsense
> > > > > > as well. The following should do it:
> > > > > Right, I'll need to think about this for a little while, I did think
> > > > > 
> > > > > of using an id for the comparison but I diverged down the wrong path so
> > > > > 
> > > > > this is a very welcome suggestion. There's still the handling of where
> > > > > 
> > > > > the daemon goes away (crash or SIGKILL, yes people deliberately do this
> > > > > 
> > > > > at times, think simulated disaster recovery) which I've missed in this
> > > > Can you describe the problem in more detail and I'm happy to help you
> > > > out here. I don't yet understand what the issue is.
> > > I thought the patch description was ok but I'll certainly try.
> > I'm sorry, we're talking past each other: I was interested in your
> > SIGKILL problem when the daemon crashes. You seemed to say that you
> > needed additional changes for that case. So I'm trying to understand
> > what the fundamental additional problem is with a crashing daemon that
> > would require additional changes here.
> 
> Right, sorry.
> 
> It's pretty straight forward.
> 
> 
> If the daemon is shutdown (or killed summarily) and there are busy
> 
> mounts left mounted then when started again they are "re-connected to"
> 
> by the newly running daemon. So there's a need to update the mnt_ns_id in
> 
> the ioctl that is used to set the new pipefd.
> 
> 
> I can't provide a patch fragment because I didn't realise the id in
> ns_common

Before that you can grab it from the mount namespace directly from the
mntns->seq field.

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 2 months, 3 weeks ago

On 14/11/25 19:44, Christian Brauner wrote:
> On Fri, Nov 14, 2025 at 07:49:53AM +0800, Ian Kent wrote:
>> On 13/11/25 21:19, Christian Brauner wrote:
>>> On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote:
>>>> On 12/11/25 19:01, Christian Brauner wrote:
>>>>> On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote:
>>>>>> On 11/11/25 18:55, Christian Brauner wrote:
>>>>>>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
>>>>>>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>>>>>>>>
>>>>>>>>>> +	sbi->owner = current->nsproxy->mnt_ns;
>>>>>>>>> ns_ref_get()
>>>>>>>>> Can be called directly on the mount namespace.
>>>>>>>> ... and would leak all mounts in the mount tree, unless I'm missing
>>>>>>>> something subtle.
>>>>>>> Right, I thought you actually wanted to pin it.
>>>>>>> Anyway, you could take a passive reference but I think that's nonsense
>>>>>>> as well. The following should do it:
>>>>>> Right, I'll need to think about this for a little while, I did think
>>>>>>
>>>>>> of using an id for the comparison but I diverged down the wrong path so
>>>>>>
>>>>>> this is a very welcome suggestion. There's still the handling of where
>>>>>>
>>>>>> the daemon goes away (crash or SIGKILL, yes people deliberately do this
>>>>>>
>>>>>> at times, think simulated disaster recovery) which I've missed in this
>>>>> Can you describe the problem in more detail and I'm happy to help you
>>>>> out here. I don't yet understand what the issue is.
>>>> I thought the patch description was ok but I'll certainly try.
>>> I'm sorry, we're talking past each other: I was interested in your
>>> SIGKILL problem when the daemon crashes. You seemed to say that you
>>> needed additional changes for that case. So I'm trying to understand
>>> what the fundamental additional problem is with a crashing daemon that
>>> would require additional changes here.
>> Right, sorry.
>>
>> It's pretty straight forward.
>>
>>
>> If the daemon is shutdown (or killed summarily) and there are busy
>>
>> mounts left mounted then when started again they are "re-connected to"
>>
>> by the newly running daemon. So there's a need to update the mnt_ns_id in
>>
>> the ioctl that is used to set the new pipefd.
>>
>>
>> I can't provide a patch fragment because I didn't realise the id in
>> ns_common
> Before that you can grab it from the mount namespace directly from the
> mntns->seq field.

I'd noticed some of that type of usage, using that will certainly help

with the backports I need to do too but I've started looking at the series

so I'll probably back port it for most recent kernel.


I'll send my current path once I get a kernel built that boots in my

VM ...


Thanks for your help.

Ian

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 2 months, 3 weeks ago

On 14/11/25 07:49, Ian Kent wrote:
> On 13/11/25 21:19, Christian Brauner wrote:
>> On Thu, Nov 13, 2025 at 08:14:36AM +0800, Ian Kent wrote:
>>> On 12/11/25 19:01, Christian Brauner wrote:
>>>> On Tue, Nov 11, 2025 at 08:27:42PM +0800, Ian Kent wrote:
>>>>> On 11/11/25 18:55, Christian Brauner wrote:
>>>>>> On Tue, Nov 11, 2025 at 10:24:35AM +0000, Al Viro wrote:
>>>>>>> On Tue, Nov 11, 2025 at 11:19:59AM +0100, Christian Brauner wrote:
>>>>>>>
>>>>>>>>> +    sbi->owner = current->nsproxy->mnt_ns;
>>>>>>>> ns_ref_get()
>>>>>>>> Can be called directly on the mount namespace.
>>>>>>> ... and would leak all mounts in the mount tree, unless I'm missing
>>>>>>> something subtle.
>>>>>> Right, I thought you actually wanted to pin it.
>>>>>> Anyway, you could take a passive reference but I think that's 
>>>>>> nonsense
>>>>>> as well. The following should do it:
>>>>> Right, I'll need to think about this for a little while, I did think
>>>>>
>>>>> of using an id for the comparison but I diverged down the wrong 
>>>>> path so
>>>>>
>>>>> this is a very welcome suggestion. There's still the handling of 
>>>>> where
>>>>>
>>>>> the daemon goes away (crash or SIGKILL, yes people deliberately do 
>>>>> this
>>>>>
>>>>> at times, think simulated disaster recovery) which I've missed in 
>>>>> this
>>>> Can you describe the problem in more detail and I'm happy to help you
>>>> out here. I don't yet understand what the issue is.
>>> I thought the patch description was ok but I'll certainly try.
>> I'm sorry, we're talking past each other: I was interested in your
>> SIGKILL problem when the daemon crashes. You seemed to say that you
>> needed additional changes for that case. So I'm trying to understand
>> what the fundamental additional problem is with a crashing daemon that
>> would require additional changes here.
>
> Right, sorry.
>
> It's pretty straight forward.
>
>
> If the daemon is shutdown (or killed summarily) and there are busy
>
> mounts left mounted then when started again they are "re-connected to"
>
> by the newly running daemon. So there's a need to update the mnt_ns_id in
>
> the ioctl that is used to set the new pipefd.
>
>
> I can't provide a patch fragment because I didn't realise the id in 
> ns_common
>
> was added a your recent patch series and I briefly went down a path 
> trying to
>
> compile against 6.16.7 before I realised I hadn't been paying attention.
>
>
> The setting needs to be put in 
> fs/autofs/dev-ioctl.c:autofs_dev_ioctl_setpipefd().

As an fyi, unrelated to what I'm trying to fix here but related

to SIGKILL behaviour. I have a user that uses a custom disaster

recovery procedure that essentially does a SIGKILL and expects

automount(8) to startup and continue using changed servers which

it doesn't. I'm pretty sure I just need to re-queue or discard

any entries in the wait queue but haven't got it quite right yet

(I think I'm forgetting to update the flags). So eventually

there'll be another autofs patch which might look like it belongs

with this one but is actually quite different.


>
> Ian
>
>>
>>>
>>> Consider using automount in a container.
>>>
>>>
>>> For people to use autofs in a container either automount(8) in the init
>>>
>>> mount namespace or an independently running automount(8) entirely 
>>> within
>>>
>>> the container can be used. The later is done by adding a volume option
>>>
>>> (or options) to the container to essentially bind mount the autofs 
>>> mount
>>>
>>> into the container and the option syntax allows the volume to be set
>>>
>>> propagation slave if it is not already set by default (shared is bad,
>>>
>>> the automounts must not propagate back to where they came from). If the
>>>
>>> automount(8) instance is entirely within the container that also works
>>>
>>> fine as everything is isolated within the container (no volume options
>>>
>>> are needed).
>>>
>>>
>>> Now with unshare(1) (and there are other problematic cases, I think 
>>> systemd
>>>
>>> private temp gets caught here too) where using something like 
>>> "unshare -Urm"
>>>
>>> will create a mount namespace that includes any autofs mounts and 
>>> sets them
>>>
>>> propagation private. These mounts cannot be unmounted within the mount
>>>
>>> namepsace by the namespace creator and accessing a directory within the
>> Right, but that should only be true for unprivileged containers where we
>> lock mounts at copy_mnt_ns().
>>
>>> autofs mount will trigger a callback to automount(8) in the init 
>>> namespace
>>>
>>> which mounts the requested mount. But the newly created mount 
>>> namespace is
>>>
>>> propagation private so the process in the new mount namespace loops 
>>> around
>>>
>>> sending mount requests that cannot be satisfied. The odd thing is 
>>> that on
>>> the
>>>
>>> second callback to automount(8) returns an error which does complete 
>>> the
>>>
>>> ->d_automount() call but doesn't seem to result in breaking the loop in
>>>
>>> __traverse_mounts() for some unknown reason. One way to resolve this 
>>> is to
>>>
>>> check if the mount can be satisfied and if not bail out immediately and
>>>
>>> returning an error in this case does work.
>> Yes, that's sensible. And fwiw, I think for private mounts that's the
>> semantics you want. You have disconnected from the "managing" mount
>> namespace - for lack of a better phrase - so you shouldn't get the mount
>> events.
>>
>>> I was tempted to work out how to not include the autofs mounts in 
>>> the cloned
>>>
>>> namespace but that's file system specific code in the VFS which is 
>>> not ok
>>> and
>>>
>>> it (should) also be possible for the namespace creator to "mount
>>> --make-shared"
>>>
>>> in the case the creator wants the mount to function and this would 
>>> prevent
>>> that.
>>>
>>> So I don't think this is the right thing to do.
>>>
>>>
>>> There's also the inability of the mount namespace creator to umount the
>>> autofs
>>>
>>> mount which could also resolve the problem which I haven't looked 
>>> into yet.
>> Ok, again, that should only be an issue with unprivileged mount
>> namespaces, i.e., owned by another user namespace. This isn't easily
>> doable. If the unprivileged mount namespaces can unmount the automount
>> it might reveal hidden/overmounted directories that weren't supposed to
>> be exposed to the container - I hate these semantics btw.
>>
>>>
>>> Have I made sense?
>> Yes, though that's not the question I tried to ask you. :)
>>
>>>
>>> Clearly there's nothing on autofs itself and why one would want to 
>>> use it
>>>
>>> but I don't think that matters for the description.
>>>
>>>
>>> Ian
>>>
>

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Al Viro 3 months ago

On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:

> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
> index f5c16ffba013..0a29761f39c0 100644
> --- a/fs/autofs/inode.c
> +++ b/fs/autofs/inode.c
> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
>  	sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
>  	sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
>  	sbi->pipefd = -1;
> +	sbi->owner = current->nsproxy->mnt_ns;
>  
>  	set_autofs_type_indirect(&sbi->type);
>  	mutex_init(&sbi->wq_mutex);
> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
> index 174c7205fee4..8cce86158f20 100644
> --- a/fs/autofs/root.c
> +++ b/fs/autofs/root.c
> @@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
>  	if (autofs_oz_mode(sbi))
>  		return NULL;
>  
> +	/* Refuse to trigger mount if current namespace is not the owner
> +	 * and the mount is propagation private.
> +	 */
> +	if (sbi->owner != current->nsproxy->mnt_ns) {
> +		if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
> +			return ERR_PTR(-EPERM);
> +	}
> +

Huh?  What's to guarantee that superblock won't outlive the namespace?

That looks seriously bogus.

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 3 months ago

On 11/11/25 14:59, Al Viro wrote:
> On Tue, Nov 11, 2025 at 02:04:39PM +0800, Ian Kent wrote:
>
>> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
>> index f5c16ffba013..0a29761f39c0 100644
>> --- a/fs/autofs/inode.c
>> +++ b/fs/autofs/inode.c
>> @@ -251,6 +251,7 @@ static struct autofs_sb_info *autofs_alloc_sbi(void)
>>   	sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
>>   	sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
>>   	sbi->pipefd = -1;
>> +	sbi->owner = current->nsproxy->mnt_ns;
>>   
>>   	set_autofs_type_indirect(&sbi->type);
>>   	mutex_init(&sbi->wq_mutex);
>> diff --git a/fs/autofs/root.c b/fs/autofs/root.c
>> index 174c7205fee4..8cce86158f20 100644
>> --- a/fs/autofs/root.c
>> +++ b/fs/autofs/root.c
>> @@ -341,6 +341,14 @@ static struct vfsmount *autofs_d_automount(struct path *path)
>>   	if (autofs_oz_mode(sbi))
>>   		return NULL;
>>   
>> +	/* Refuse to trigger mount if current namespace is not the owner
>> +	 * and the mount is propagation private.
>> +	 */
>> +	if (sbi->owner != current->nsproxy->mnt_ns) {
>> +		if (vfsmount_to_propagation_flags(path->mnt) & MS_PRIVATE)
>> +			return ERR_PTR(-EPERM);
>> +	}
>> +
> Huh?  What's to guarantee that superblock won't outlive the namespace?

Not 30 minutes after I posted these I was thinking about the case the daemon

(that mounted this) going away, very loosely similar I think. Setting the

mounting process's namespace when it mounts it is straight forward but what

can I do if the process crashes ...


I did think that if the namespace is saved away by the process that mounts

it that the mount namespace would be valid at least until it umounts it but

yes there are a few things that can go wrong ...


Any ideas how I can identify this case?

Ian

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Al Viro 3 months ago

On Tue, Nov 11, 2025 at 04:25:29PM +0800, Ian Kent wrote:

> > Huh?  What's to guarantee that superblock won't outlive the namespace?
> 
> Not 30 minutes after I posted these I was thinking about the case the daemon
> 
> (that mounted this) going away, very loosely similar I think. Setting the
> 
> mounting process's namespace when it mounts it is straight forward but what
> 
> can I do if the process crashes ...
> 
> 
> I did think that if the namespace is saved away by the process that mounts
> 
> it that the mount namespace would be valid at least until it umounts it but
> 
> yes there are a few things that can go wrong ...

Umm...

1) super_block != mount; unshare(CLONE_NEWNS) by anyone in the namespace of
that mount *will* create a clone of that mount, with exact same ->mnt_sb
and yes, in a separate namespace.

2) mount does not pin a namespace.  chdir into it, umount -l and there you go...

3) mount(2) can bloody well create more than one struct mount, all with the
same ->mnt_sb.

So I'd say there's more than a few things that can go wrong here.

Said that, this "we need a daemon in that namespace" has been a source of
assorted headaches for decades now; could we do anything about that?
After all, a thread can switch from one namespace to another these
days (setns(2))...

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Ian Kent 3 months ago

On 11/11/25 17:04, Al Viro wrote:
> On Tue, Nov 11, 2025 at 04:25:29PM +0800, Ian Kent wrote:
>
>>> Huh?  What's to guarantee that superblock won't outlive the namespace?
>> Not 30 minutes after I posted these I was thinking about the case the daemon
>>
>> (that mounted this) going away, very loosely similar I think. Setting the
>>
>> mounting process's namespace when it mounts it is straight forward but what
>>
>> can I do if the process crashes ...
>>
>>
>> I did think that if the namespace is saved away by the process that mounts
>>
>> it that the mount namespace would be valid at least until it umounts it but
>>
>> yes there are a few things that can go wrong ...
> Umm...
>
> 1) super_block != mount; unshare(CLONE_NEWNS) by anyone in the namespace of
> that mount *will* create a clone of that mount, with exact same ->mnt_sb
> and yes, in a separate namespace.
>
> 2) mount does not pin a namespace.  chdir into it, umount -l and there you go...
>
> 3) mount(2) can bloody well create more than one struct mount, all with the
> same ->mnt_sb.
>
> So I'd say there's more than a few things that can go wrong here.
>
> Said that, this "we need a daemon in that namespace" has been a source of
> assorted headaches for decades now; could we do anything about that?
> After all, a thread can switch from one namespace to another these
> days (setns(2))...

Not sure the motivation here is "we need a daemon in that namespace", it's

more my struggling to find a suitable check for the problem case even though

it does look a lot like what you say.


Thinking back the problem has always been the amount of stuff that's not

appropriate for the kernel, automount map parsing is pretty ugly, for 
example.


A better split of duties is probably what we would need to do.


But if we did that then we'd probably want to re-define/re-write the user

space <-> kernel space communications as well to assist with the division

of labour. In the beginning there was a proposal to do just that, not sure

I still have that stuff, I'll have a look around ... or do you have 
something

else specific in mind?


Anyway, for the moment, I think your saying just taking an ns_common 
reference

will be problematic as well.


Ian

Re: [PATCH 2/2] autofs: dont trigger mount if it cant succeed

Posted by Al Viro 3 months ago

On Tue, Nov 11, 2025 at 06:13:19PM +0800, Ian Kent wrote:

> Anyway, for the moment, I think your saying just taking an ns_common
> reference
> 
> will be problematic as well.

If it is persistent - definitely.  We'd get a massive leak, since
namespace is torn apart when the refcount drops to zero.