virtio-fs: Query rootmode during mount

[PATCH] virtio-fs: Query rootmode during mount

Posted by Hanna Czenczek 1 month ago

During mount, receive the root inode's mode (e.g. whether it is a
directory or a regular file) via GETATTR.

Currently, the only way to set this mode to anything but S_IFDIR is by
passing the mode's numerical value via the 'rootmode' mount option.
This is done automatically by libfuse, so users generally do not need to
worry about it.

For virtio-fs, no such option exists.  We could add it, but then users
would need to manually set it: In case of virtio-fs, the server runs on
a separate system, so it cannot issue mount() itself, leaving it a
manual operation for the user.  So instead, Miklos suggested to simply
ask the virtio-fs server (virtiofsd) for this rootmode.

We can do this via a GETATTR call during mount(): virtio_fs_fill_super()
currently submits INIT in the background.  To do a GETATTR, we need to
instead await the INIT reply (i.e. do a synchronous request), and then
do GETATTR afterwards on the root inode.

To be able to issue INIT (and GETATTR), we need to at least partially
initialize the super_block structure, which is currently done via
fuse_fill_super_common().  However, the last part of that function sets
up the root inode, which we will now have to delay until we have the
GETATTR result.  Therefore, we split fuse_fill_super_common() in two:
One part (initializing the super_block) we run before INIT+GETATTR, the
other (setting up the root inode) we run afterwards.

Note that this patch only changes virtio-fs's behavior.  For other FUSE
filesystems, we will continue to rely on the 'rootmode' mount option
instead of issuing GETATTR during mount(): It is reasonable for a FUSE
server to issue the mount() system call in the same thread that will
later process FUSE requests.  Blocking on a FUSE request (INIT and
GETATTR) during mount() would then cause a deadlock.

Also, using the 'rootmode' mount option in case of non-virtio-fs FUSE
filesystems just works well, there is no need to change it.

Suggested-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
Sorry for the delay, I wanted to find out whether I could come up with a
good way of testing that all non-virtio-fs filesystems will be
unaffected.  Unfortunately I didn't, so I really only tested manually
that e.g. sshfs works as before (also qemu's FUSE export as an example
for an S_IFREG root node).
---
 fs/fuse/fuse_i.h    | 35 +++++++++++++++++++++--
 fs/fuse/dir.c       | 28 ++++++++++++++++++
 fs/fuse/inode.c     | 70 ++++++++++++++++++++++++++++++++++-----------
 fs/fuse/virtio_fs.c | 38 +++++++++++++++++++++---
 4 files changed, 148 insertions(+), 23 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e6cc3d552b13..1312705ecb16 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -936,6 +936,9 @@ struct fuse_mount {
 	/* Entry on fc->mounts */
 	struct list_head fc_entry;
 	struct rcu_head rcu;
+
+	/* Whether this is a submount */
+	bool submount;
 };
 
 static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
@@ -1219,15 +1222,43 @@ struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
 struct fuse_dev *fuse_dev_alloc(void);
 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
 void fuse_dev_free(struct fuse_dev *fud);
-void fuse_send_init(struct fuse_mount *fm);
 
 /**
- * Fill in superblock and initialize fuse connection
+ * Send INIT request
+ * @fm: FUSE mount
+ * @await_reply: If true, send a synchronous request, awaiting the server's
+ *		 reply.  Otherwise, just submit the request, not awaiting a
+ *		 reply; it will then be processed in the background once it
+ *		 arrives.
+ */
+void fuse_send_init(struct fuse_mount *fm, bool await_reply);
+
+/**
+ * Query the root inode's mode via GETATTR from the server; for use with
+ * fuse_make_root_inode()
+ * @fm: FUSE mount
+ * @rootmode: on success, receives the root inode's mode
+ */
+int fuse_get_rootmode(struct fuse_mount *fm, unsigned int *rootmode);
+
+/**
+ * Fill in superblock, without creating the root inode
  * @sb: partially-initialized superblock to fill in
  * @ctx: mount context
  */
 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx);
 
+/**
+ * Create the root inode and initialize fuse connection, completing
+ * initialization of the superblock
+ * @sb: almost-initialized superblock (from fuse_fill_super_common()) to
+ *	complete
+ * @ctx: mount context
+ * @mode: inode mode of the root inode
+ */
+int fuse_make_root_inode(struct super_block *sb, struct fuse_fs_context *ctx,
+			 unsigned int mode);
+
 /*
  * Remove the mount from the connection
  *
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 54104dd48af7..36fc899c5acc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1297,6 +1297,34 @@ static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
 	return err;
 }
 
+int fuse_get_rootmode(struct fuse_mount *fm, unsigned int *rootmode)
+{
+	int res;
+	struct fuse_getattr_in inarg;
+	struct fuse_attr_out outarg;
+	FUSE_ARGS(args);
+
+	memset(&inarg, 0, sizeof(inarg));
+	memset(&outarg, 0, sizeof(outarg));
+	args.opcode = FUSE_GETATTR;
+	args.nodeid = FUSE_ROOT_ID;
+	args.in_numargs = 1;
+	args.in_args[0].size = sizeof(inarg);
+	args.in_args[0].value = &inarg;
+	args.out_numargs = 1;
+	args.out_args[0].size = sizeof(outarg);
+	args.out_args[0].value = &outarg;
+	res = fuse_simple_request(fm, &args);
+	if (!res) {
+		if (fuse_invalid_attr(&outarg.attr))
+			res = -EIO;
+		else
+			*rootmode = outarg.attr.mode;
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(fuse_get_rootmode);
+
 static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
 				struct file *file, struct kstat *stat,
 				u32 request_mask, unsigned int flags)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd3321e29a3e..3e19b92928ad 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1378,10 +1378,11 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
 	wake_up_all(&fc->blocked_waitq);
 }
 
-void fuse_send_init(struct fuse_mount *fm)
+void fuse_send_init(struct fuse_mount *fm, bool await_reply)
 {
 	struct fuse_init_args *ia;
 	u64 flags;
+	ssize_t ret;
 
 	ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
 
@@ -1431,7 +1432,18 @@ void fuse_send_init(struct fuse_mount *fm)
 	ia->args.nocreds = true;
 	ia->args.end = process_init_reply;
 
-	if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
+	if (await_reply)
+		ret = fuse_simple_request(fm, &ia->args);
+	else
+		ret = fuse_simple_background(fm, &ia->args, GFP_KERNEL);
+
+	/*
+	 * Errors from fuse_simple_request() may mean that we failed to submit
+	 * the request, or may be error codes from the server, which would have
+	 * been processed already.  Either way, ensure that we always call
+	 * process_init_reply() at least once.
+	 */
+	if (ret < 0)
 		process_init_reply(fm, &ia->args, -ENOTCONN);
 }
 EXPORT_SYMBOL_GPL(fuse_send_init);
@@ -1653,6 +1665,7 @@ static int fuse_get_tree_submount(struct fs_context *fsc)
 	if (!fm)
 		return -ENOMEM;
 
+	fm->submount = true;
 	fm->fc = fuse_conn_get(fc);
 	fsc->s_fs_info = fm;
 	sb = sget_fc(fsc, NULL, set_anon_super_fc);
@@ -1694,8 +1707,6 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 	struct fuse_dev *fud = NULL;
 	struct fuse_mount *fm = get_fuse_mount_super(sb);
 	struct fuse_conn *fc = fm->fc;
-	struct inode *root;
-	struct dentry *root_dentry;
 	int err;
 
 	err = -EINVAL;
@@ -1752,8 +1763,36 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 	fc->no_control = ctx->no_control;
 	fc->no_force_umount = ctx->no_force_umount;
 
+	err = -EINVAL;
+	if (ctx->fudptr) {
+		if (*ctx->fudptr)
+			goto err_dev_free;
+		*ctx->fudptr = fud;
+	}
+	return 0;
+
+ err_dev_free:
+	if (fud)
+		fuse_dev_free(fud);
+ err_free_dax:
+	if (IS_ENABLED(CONFIG_FUSE_DAX))
+		fuse_dax_conn_free(fc);
+ err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(fuse_fill_super_common);
+
+int fuse_make_root_inode(struct super_block *sb, struct fuse_fs_context *ctx,
+			 unsigned int mode)
+{
+	struct fuse_mount *fm = get_fuse_mount_super(sb);
+	struct fuse_conn *fc = fm->fc;
+	struct inode *root;
+	struct dentry *root_dentry;
+	int err;
+
 	err = -ENOMEM;
-	root = fuse_get_root_inode(sb, ctx->rootmode);
+	root = fuse_get_root_inode(sb, mode);
 	sb->s_d_op = &fuse_root_dentry_operations;
 	root_dentry = d_make_root(root);
 	if (!root_dentry)
@@ -1762,18 +1801,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 	sb->s_d_op = &fuse_dentry_operations;
 
 	mutex_lock(&fuse_mutex);
-	err = -EINVAL;
-	if (ctx->fudptr && *ctx->fudptr)
-		goto err_unlock;
-
 	err = fuse_ctl_add_conn(fc);
 	if (err)
 		goto err_unlock;
 
 	list_add_tail(&fc->entry, &fuse_conn_list);
 	sb->s_root = root_dentry;
-	if (ctx->fudptr)
-		*ctx->fudptr = fud;
 	mutex_unlock(&fuse_mutex);
 	return 0;
 
@@ -1781,15 +1814,15 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 	mutex_unlock(&fuse_mutex);
 	dput(root_dentry);
  err_dev_free:
-	if (fud)
-		fuse_dev_free(fud);
- err_free_dax:
+	if (ctx->fudptr && *ctx->fudptr) {
+		fuse_dev_free(*ctx->fudptr);
+		*ctx->fudptr = NULL;
+	}
 	if (IS_ENABLED(CONFIG_FUSE_DAX))
 		fuse_dax_conn_free(fc);
- err:
 	return err;
 }
-EXPORT_SYMBOL_GPL(fuse_fill_super_common);
+EXPORT_SYMBOL_GPL(fuse_make_root_inode);
 
 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
 {
@@ -1810,11 +1843,14 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
 	ctx->fudptr = &ctx->file->private_data;
 
 	err = fuse_fill_super_common(sb, ctx);
+	if (err)
+		return err;
+	err = fuse_make_root_inode(sb, ctx, ctx->rootmode);
 	if (err)
 		return err;
 	/* file->private_data shall be visible on all CPUs after this */
 	smp_mb();
-	fuse_send_init(get_fuse_mount_super(sb));
+	fuse_send_init(get_fuse_mount_super(sb), false);
 	return 0;
 }
 
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 6404a189e989..ae6e9ee15556 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1541,6 +1541,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
 	struct virtio_fs *fs = fc->iq.priv;
 	struct fuse_fs_context *ctx = fsc->fs_private;
 	unsigned int i;
+	unsigned int rootmode;
 	int err;
 
 	virtio_fs_ctx_set_defaults(ctx);
@@ -1577,6 +1578,12 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
 		}
 		ctx->dax_dev = fs->dax_dev;
 	}
+
+	/*
+	 * Begin sb initialization, fuse_send_init() and process_init_reply()
+	 * need it.  Cannot create the root inode yet, we need to wait for the
+	 * INIT reply to know its inode mode.
+	 */
 	err = fuse_fill_super_common(sb, ctx);
 	if (err < 0)
 		goto err_free_fuse_devs;
@@ -1589,12 +1596,27 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
 
 	/* Previous unmount will stop all queues. Start these again */
 	virtio_fs_start_all_queues(fs);
-	fuse_send_init(fm);
+	fuse_send_init(fm, true);
+
+	err = fuse_get_rootmode(fm, &rootmode);
+	/* On error, fall back to the default (probably S_IFDIR) */
+	if (err < 0)
+		rootmode = ctx->rootmode;
+
+	err = fuse_make_root_inode(sb, ctx, rootmode);
+	if (err < 0)
+		goto err_free_fuse_devs;
+
 	mutex_unlock(&virtio_fs_mutex);
 	return 0;
 
 err_free_fuse_devs:
-	virtio_fs_free_devs(fs);
+	/*
+	 * After INIT, virtio_fs_conn_destroy() will be called by
+	 * virtio_kill_sb(), so there is no need to clean up here
+	 */
+	if (!fc->initialized)
+		virtio_fs_free_devs(fs);
 err:
 	mutex_unlock(&virtio_fs_mutex);
 	return err;
@@ -1635,8 +1657,16 @@ static void virtio_kill_sb(struct super_block *sb)
 	struct fuse_mount *fm = get_fuse_mount_super(sb);
 	bool last;
 
-	/* If mount failed, we can still be called without any fc */
-	if (sb->s_root) {
+	/*
+	 * Only destroy the connection after full initialization, i.e.
+	 * once s_root is set (see commit d534d31d6a45d).
+	 * One exception: For virtio-fs, we call INIT before s_root is
+	 * set so we can determine the root node's mode.  We must call
+	 * DESTROY after INIT.  So if an error occurs during that time
+	 * window (specifically in fuse_make_root_inode()), we still
+	 * need to call virtio_fs_conn_destroy() here.
+	 */
+	if (sb->s_root || (fm->fc && fm->fc->initialized && !fm->submount)) {
 		last = fuse_mount_remove(fm);
 		if (last)
 			virtio_fs_conn_destroy(fm);
-- 
2.47.0

Re: [PATCH] virtio-fs: Query rootmode during mount

Posted by Miklos Szeredi 2 weeks, 4 days ago

On Thu, 24 Oct 2024 at 18:47, Hanna Czenczek <hreitz@redhat.com> wrote:

> To be able to issue INIT (and GETATTR), we need to at least partially
> initialize the super_block structure, which is currently done via
> fuse_fill_super_common().

What exactly is needed to be initialized?

> @@ -1762,18 +1801,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>         sb->s_d_op = &fuse_dentry_operations;
>
>         mutex_lock(&fuse_mutex);
> -       err = -EINVAL;
> -       if (ctx->fudptr && *ctx->fudptr)
> -               goto err_unlock;
> -
>         err = fuse_ctl_add_conn(fc);
>         if (err)
>                 goto err_unlock;
>
>         list_add_tail(&fc->entry, &fuse_conn_list);
>         sb->s_root = root_dentry;
> -       if (ctx->fudptr)
> -               *ctx->fudptr = fud;

This is wrong, because we need the fuse_mutex protection for checking
and setting the private_data on the fuse device file.

If this split is needed (which I'm not sure) then fud allocation
should probably be moved to part2 instead of moving the *ctx->fudptr
setup to part1.


> @@ -1635,8 +1657,16 @@ static void virtio_kill_sb(struct super_block *sb)
>         struct fuse_mount *fm = get_fuse_mount_super(sb);
>         bool last;
>
> -       /* If mount failed, we can still be called without any fc */
> -       if (sb->s_root) {
> +       /*
> +        * Only destroy the connection after full initialization, i.e.
> +        * once s_root is set (see commit d534d31d6a45d).
> +        * One exception: For virtio-fs, we call INIT before s_root is
> +        * set so we can determine the root node's mode.  We must call
> +        * DESTROY after INIT.  So if an error occurs during that time
> +        * window (specifically in fuse_make_root_inode()), we still
> +        * need to call virtio_fs_conn_destroy() here.
> +        */
> +       if (sb->s_root || (fm->fc && fm->fc->initialized && !fm->submount)) {

How could fm->submount be set if sb->s_root isn't?  Or sb->s_root set
and fc->initialized isn't?

Seems it would be sufficient to check fm->fc->initialized, no?

Thanks,
Miklos

Re: [PATCH] virtio-fs: Query rootmode during mount

Posted by Hanna Czenczek 2 weeks, 4 days ago

On 07.11.24 09:58, Miklos Szeredi wrote:
> On Thu, 24 Oct 2024 at 18:47, Hanna Czenczek <hreitz@redhat.com> wrote:
>
>> To be able to issue INIT (and GETATTR), we need to at least partially
>> initialize the super_block structure, which is currently done via
>> fuse_fill_super_common().
> What exactly is needed to be initialized?

It isn’t much, but I believe it’s most of fuse_fill_super_common() 
(without restructuring the code so flags returned by INIT are put into a 
separate structure and then re-joined into sb and fc later).

fuse_send_init() reads sb->s_bdi->ra_pages, process_init_reply() writes 
it and sb->s_time_gran, ->s_flags, ->s_stack_depth, ->s_export_op, and 
->s_iflags.  In addition, process_init_reply() depends on several flags 
and objects in fc being set up (among those are fc->dax and 
fc->default_permissions), which is done by fuse_fill_super_common().

So I think what we need from fuse_fill_super_common() is:
- fuse_sb_defaults() (so these values can then be overwritten by 
process_init_reply()),
- fuse_dax_conn_alloc(),
- fuse_bdi_init(),
- fc->default_permissions at least, but I’d just take the fc->[flag] 
setting block as a whole then.

I assume we’ll also want the SB_MANDLOCK check then, and 
rcu_assign_pointer().  Then we might as well also set the block sizes 
and the subtype.

The problem is that I don’t know the order things in 
fuse_fill_super_common() need to be in, and fuse_dev_alloc_install() is 
called before fuse_bdi_init(), so I didn’t want to move that.

So what I understand is that calling fuse_dev_alloc_install() there 
isn’t necessary?  I’m happy to move that to part 2, as you suggest, but 
I’m not sure we can really omit much from part 1 without changing how 
process_init_reply() operates.  We could in theory delay 
process_init_reply() until after GETATTR (and thus after setting 
s_root), but that seems kind of wrong, and would still require setting 
up BDI and DAX for fuse_send_init().

>> @@ -1762,18 +1801,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>>          sb->s_d_op = &fuse_dentry_operations;
>>
>>          mutex_lock(&fuse_mutex);
>> -       err = -EINVAL;
>> -       if (ctx->fudptr && *ctx->fudptr)
>> -               goto err_unlock;
>> -
>>          err = fuse_ctl_add_conn(fc);
>>          if (err)
>>                  goto err_unlock;
>>
>>          list_add_tail(&fc->entry, &fuse_conn_list);
>>          sb->s_root = root_dentry;
>> -       if (ctx->fudptr)
>> -               *ctx->fudptr = fud;
> This is wrong, because we need the fuse_mutex protection for checking
> and setting the private_data on the fuse device file.
>
> If this split is needed (which I'm not sure) then fud allocation
> should probably be moved to part2 instead of moving the *ctx->fudptr
> setup to part1.
>
>
>> @@ -1635,8 +1657,16 @@ static void virtio_kill_sb(struct super_block *sb)
>>          struct fuse_mount *fm = get_fuse_mount_super(sb);
>>          bool last;
>>
>> -       /* If mount failed, we can still be called without any fc */
>> -       if (sb->s_root) {
>> +       /*
>> +        * Only destroy the connection after full initialization, i.e.
>> +        * once s_root is set (see commit d534d31d6a45d).
>> +        * One exception: For virtio-fs, we call INIT before s_root is
>> +        * set so we can determine the root node's mode.  We must call
>> +        * DESTROY after INIT.  So if an error occurs during that time
>> +        * window (specifically in fuse_make_root_inode()), we still
>> +        * need to call virtio_fs_conn_destroy() here.
>> +        */
>> +       if (sb->s_root || (fm->fc && fm->fc->initialized && !fm->submount)) {
> How could fm->submount be set if sb->s_root isn't?

fuse_get_tree_submount(), specifically fuse_fill_super_submount() whose 
error path leads to deactivate_locked_super(), can fail before 
sb->s_root is set.

Still, the idea was rather to make it clear that this condition (INIT 
sent but s_root not set) is unique to non-submounts, so as not to mess 
with the submount code unintentionally.

> Or sb->s_root set
> and fc->initialized isn't?

That would be the non-virtio-fs non-submount case (fuse_fill_super()), 
where s_root is set first and INIT sent after.

Hanna

> Seems it would be sufficient to check fm->fc->initialized, no?
>
> Thanks,
> Miklos
>

Re: [PATCH] virtio-fs: Query rootmode during mount

Posted by Miklos Szeredi 2 weeks, 4 days ago

On Thu, 7 Nov 2024 at 11:00, Hanna Czenczek <hreitz@redhat.com> wrote:
> It isn’t much, but I believe it’s most of fuse_fill_super_common()
> (without restructuring the code so flags returned by INIT are put into a
> separate structure and then re-joined into sb and fc later).

Probably not worth it.

> fuse_send_init() reads sb->s_bdi->ra_pages, process_init_reply() writes
> it and sb->s_time_gran, ->s_flags, ->s_stack_depth, ->s_export_op, and
> ->s_iflags.  In addition, process_init_reply() depends on several flags
> and objects in fc being set up (among those are fc->dax and
> fc->default_permissions), which is done by fuse_fill_super_common().

Okay, got it.

> So I think what we need from fuse_fill_super_common() is:
> - fuse_sb_defaults() (so these values can then be overwritten by
> process_init_reply()),
> - fuse_dax_conn_alloc(),
> - fuse_bdi_init(),
> - fc->default_permissions at least, but I’d just take the fc->[flag]
> setting block as a whole then.
>
> I assume we’ll also want the SB_MANDLOCK check then, and
> rcu_assign_pointer().  Then we might as well also set the block sizes
> and the subtype.
>
> The problem is that I don’t know the order things in
> fuse_fill_super_common() need to be in, and fuse_dev_alloc_install() is
> called before fuse_bdi_init(), so I didn’t want to move that.
>
> So what I understand is that calling fuse_dev_alloc_install() there
> isn’t necessary?  I’m happy to move that to part 2, as you suggest, but

Hmm, fuse_dev_install() chains the fud onto fc->devices.  This is used
by fuse_resend() and fuse_abort_conn().  Resending isn't really
interesting at this point, but aborting should work from the start, so
this should not be moved after sending requests.

> I’m not sure we can really omit much from part 1 without changing how
> process_init_reply() operates.  We could in theory delay
> process_init_reply() until after GETATTR (and thus after setting
> s_root), but that seems kind of wrong, and would still require setting
> up BDI and DAX for fuse_send_init().

Agree, let's keep the split as is, but store the fud temporarily in
fuse_fs_context and leave setting *ctx->fudptr to part2.

> >> +       if (sb->s_root || (fm->fc && fm->fc->initialized && !fm->submount)) {
> > How could fm->submount be set if sb->s_root isn't?
>
> fuse_get_tree_submount(), specifically fuse_fill_super_submount() whose
> error path leads to deactivate_locked_super(), can fail before
> sb->s_root is set.

Right.

> Still, the idea was rather to make it clear that this condition (INIT
> sent but s_root not set) is unique to non-submounts, so as not to mess
> with the submount code unintentionally.
>
> > Or sb->s_root set
> > and fc->initialized isn't?
>
> That would be the non-virtio-fs non-submount case (fuse_fill_super()),
> where s_root is set first and INIT sent after.

But this is virtiofs specific code.

Regardless, something smells here: fuse_mount_remove() is only called
if sb->s_root is set (both plain fuse and virtiofs).  The top level
fuse_mount is added to fc->mounts in fuse_conn_init(), way before
sb->s_root is set...

Will look into this.

Thanks,
Miklos

Re: [PATCH] virtio-fs: Query rootmode during mount

Posted by Hanna Czenczek 2 weeks, 4 days ago

On 07.11.24 13:18, Miklos Szeredi wrote:
> On Thu, 7 Nov 2024 at 11:00, Hanna Czenczek <hreitz@redhat.com> wrote:
>> It isn’t much, but I believe it’s most of fuse_fill_super_common()
>> (without restructuring the code so flags returned by INIT are put into a
>> separate structure and then re-joined into sb and fc later).
> Probably not worth it.
>
>> fuse_send_init() reads sb->s_bdi->ra_pages, process_init_reply() writes
>> it and sb->s_time_gran, ->s_flags, ->s_stack_depth, ->s_export_op, and
>> ->s_iflags.  In addition, process_init_reply() depends on several flags
>> and objects in fc being set up (among those are fc->dax and
>> fc->default_permissions), which is done by fuse_fill_super_common().
> Okay, got it.
>
>> So I think what we need from fuse_fill_super_common() is:
>> - fuse_sb_defaults() (so these values can then be overwritten by
>> process_init_reply()),
>> - fuse_dax_conn_alloc(),
>> - fuse_bdi_init(),
>> - fc->default_permissions at least, but I’d just take the fc->[flag]
>> setting block as a whole then.
>>
>> I assume we’ll also want the SB_MANDLOCK check then, and
>> rcu_assign_pointer().  Then we might as well also set the block sizes
>> and the subtype.
>>
>> The problem is that I don’t know the order things in
>> fuse_fill_super_common() need to be in, and fuse_dev_alloc_install() is
>> called before fuse_bdi_init(), so I didn’t want to move that.
>>
>> So what I understand is that calling fuse_dev_alloc_install() there
>> isn’t necessary?  I’m happy to move that to part 2, as you suggest, but
> Hmm, fuse_dev_install() chains the fud onto fc->devices.  This is used
> by fuse_resend() and fuse_abort_conn().  Resending isn't really
> interesting at this point, but aborting should work from the start, so
> this should not be moved after sending requests.
>
>> I’m not sure we can really omit much from part 1 without changing how
>> process_init_reply() operates.  We could in theory delay
>> process_init_reply() until after GETATTR (and thus after setting
>> s_root), but that seems kind of wrong, and would still require setting
>> up BDI and DAX for fuse_send_init().
> Agree, let's keep the split as is, but store the fud temporarily in
> fuse_fs_context and leave setting *ctx->fudptr to part2.

Sure!

>>>> +       if (sb->s_root || (fm->fc && fm->fc->initialized && !fm->submount)) {
>>> How could fm->submount be set if sb->s_root isn't?
>> fuse_get_tree_submount(), specifically fuse_fill_super_submount() whose
>> error path leads to deactivate_locked_super(), can fail before
>> sb->s_root is set.
> Right.
>
>> Still, the idea was rather to make it clear that this condition (INIT
>> sent but s_root not set) is unique to non-submounts, so as not to mess
>> with the submount code unintentionally.
>>
>>> Or sb->s_root set
>>> and fc->initialized isn't?
>> That would be the non-virtio-fs non-submount case (fuse_fill_super()),
>> where s_root is set first and INIT sent after.
> But this is virtiofs specific code.

Ah, right…  I must have forgotten that at some point.

> Regardless, something smells here: fuse_mount_remove() is only called
> if sb->s_root is set (both plain fuse and virtiofs).  The top level
> fuse_mount is added to fc->mounts in fuse_conn_init(), way before
> sb->s_root is set...
>
> Will look into this.

Thanks!

Hanna

>
> Thanks,
> Miklos
>

Re: [PATCH] virtio-fs: Query rootmode during mount

Posted by Miklos Szeredi 2 weeks, 3 days ago

On Thu, 7 Nov 2024 at 18:59, Hanna Czenczek <hreitz@redhat.com> wrote:

> > Regardless, something smells here: fuse_mount_remove() is only called
> > if sb->s_root is set (both plain fuse and virtiofs).  The top level
> > fuse_mount is added to fc->mounts in fuse_conn_init(), way before
> > sb->s_root is set...
> >
> > Will look into this.

This is the deal:

1) When the top sb is created the fm->fc_entry is chained on
fc->mounts in fuse_conn_init() which is called near the top of
fuse_get_tree() and virtio_fs_get_tree(). If things fail during
->get_tree() or an existing fc is used instead of the newly allocated
one, then fuse_mount_destroy() is called, which frees the fm and
fm->fc, ignoring the mounts list.  This is okay, though a bit
confusing.

2) When a submount is created, then fm is only chained onto fc->mounts
towards the end of fuse_get_tree_submount() in the success case.

There should be no way for fuse_mount_destroy() to see fm chained onto
fc yet fc having other fuse mounts.  The below patch reflects this
with a WARN_ON().  Not tested yet, but there would be memory
corruption if it wasn't the case.

As for your patch, the condition (sb->s_root || (fm &&
fm->fc->initialized)) should work AFAICS.

Thanks,
Miklos

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd3321e29a3e..0c4eb5b89e71 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1653,6 +1653,7 @@ static int fuse_get_tree_submount(struct fs_context *fsc)
        if (!fm)
                return -ENOMEM;

+       INIT_LIST_HEAD(&fm->fc_entry);
        fm->fc = fuse_conn_get(fc);
        fsc->s_fs_info = fm;
        sb = sget_fc(fsc, NULL, set_anon_super_fc);
@@ -1976,6 +1977,13 @@ static void fuse_sb_destroy(struct super_block *sb)

 void fuse_mount_destroy(struct fuse_mount *fm)
 {
+       /*
+        * We can get here in case of an error before the top sb is fully set
+        * up.  The sole reference to the fc must come from fm in that case
+        * otherwise may end up with corruption on fc->mounts list.
+        */
+       WARN_ON(!list_empty(&fm->fc_entry) &&
refcount_read(&fm->fc->count) != 1);
+
        fuse_conn_put(fm->fc);
        kfree_rcu(fm, rcu);
 }