In order to add directory delegation support, we need to break
delegations on the parent whenever there is going to be a change in the
directory.
Add a delegated_inode parameter to struct createdata. Most callers just
leave that as a NULL pointer, but do_mknodat() is changed to wait for a
delegation break if there is one.
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
fs/namei.c | 26 +++++++++++++++++---------
include/linux/fs.h | 2 +-
2 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index fdf4e78cd041de8c564b7d1d89a46ba2aaf79d53..e8973000a312fb05ebb63a0d9bd83b9a5f8f805d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3487,6 +3487,9 @@ int vfs_create(struct createdata *args)
mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
error = security_inode_create(dir, dentry, mode);
+ if (error)
+ return error;
+ error = try_break_deleg(dir, args->delegated_inode);
if (error)
return error;
error = dir->i_op->create(idmap, dir, dentry, mode, args->excl);
@@ -4359,6 +4362,8 @@ static int may_mknod(umode_t mode)
static int do_mknodat(int dfd, struct filename *name, umode_t mode,
unsigned int dev)
{
+ struct delegated_inode delegated_inode = { };
+ struct createdata cargs = { };
struct mnt_idmap *idmap;
struct dentry *dentry;
struct path path;
@@ -4383,18 +4388,16 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
switch (mode & S_IFMT) {
case 0:
case S_IFREG:
- {
- struct createdata args = { .idmap = idmap,
- .dir = path.dentry->d_inode,
- .dentry = dentry,
- .mode = mode,
- .excl = true };
-
- error = vfs_create(&args);
+ cargs.idmap = idmap,
+ cargs.dir = path.dentry->d_inode,
+ cargs.dentry = dentry,
+ cargs.delegated_inode = &delegated_inode;
+ cargs.mode = mode,
+ cargs.excl = true,
+ error = vfs_create(&cargs);
if (!error)
security_path_post_mknod(idmap, dentry);
break;
- }
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(idmap, path.dentry->d_inode,
dentry, mode, new_decode_dev(dev));
@@ -4406,6 +4409,11 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
}
out2:
end_creating_path(&path, dentry);
+ if (is_delegated(&delegated_inode)) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry;
+ }
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b61873767b37591aecadd147623d7dfc866bef82..cfcb20a7c4ce4b6dcec98b3eccbdb5ec8bab6fa9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2116,12 +2116,12 @@ struct createdata {
struct mnt_idmap *idmap; // idmap of the mount the inode was found from
struct inode *dir; // inode of parent directory
struct dentry *dentry; // dentry of the child file
+ struct delegated_inode *delegated_inode; // returns parent inode, if delegated
umode_t mode; // mode of the child file
bool excl; // whether the file must not yet exist
};
int vfs_create(struct createdata *);
-
struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, struct delegated_inode *);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
--
2.51.1
On Mon, Nov 03, 2025 at 07:52:38AM -0500, Jeff Layton wrote:
> In order to add directory delegation support, we need to break
> delegations on the parent whenever there is going to be a change in the
> directory.
>
> Add a delegated_inode parameter to struct createdata. Most callers just
> leave that as a NULL pointer, but do_mknodat() is changed to wait for a
> delegation break if there is one.
>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
> fs/namei.c | 26 +++++++++++++++++---------
> include/linux/fs.h | 2 +-
> 2 files changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
...
> @@ -4359,6 +4362,8 @@ static int may_mknod(umode_t mode)
> static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> unsigned int dev)
> {
> + struct delegated_inode delegated_inode = { };
> + struct createdata cargs = { };
> struct mnt_idmap *idmap;
> struct dentry *dentry;
> struct path path;
> @@ -4383,18 +4388,16 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> switch (mode & S_IFMT) {
> case 0:
> case S_IFREG:
> - {
> - struct createdata args = { .idmap = idmap,
> - .dir = path.dentry->d_inode,
> - .dentry = dentry,
> - .mode = mode,
> - .excl = true };
> -
> - error = vfs_create(&args);
> + cargs.idmap = idmap,
> + cargs.dir = path.dentry->d_inode,
> + cargs.dentry = dentry,
> + cargs.delegated_inode = &delegated_inode;
> + cargs.mode = mode,
> + cargs.excl = true,
Hi Jeff,
I don't think it makes any difference to the generated code.
But I think it would be more intuitive to use ';' rather than ','
at the end of the lines immediately above.
> + error = vfs_create(&cargs);
> if (!error)
> security_path_post_mknod(idmap, dentry);
> break;
> - }
> case S_IFCHR: case S_IFBLK:
> error = vfs_mknod(idmap, path.dentry->d_inode,
> dentry, mode, new_decode_dev(dev));
...
On Tue, 2025-11-04 at 17:38 +0000, Simon Horman wrote:
> On Mon, Nov 03, 2025 at 07:52:38AM -0500, Jeff Layton wrote:
> > In order to add directory delegation support, we need to break
> > delegations on the parent whenever there is going to be a change in the
> > directory.
> >
> > Add a delegated_inode parameter to struct createdata. Most callers just
> > leave that as a NULL pointer, but do_mknodat() is changed to wait for a
> > delegation break if there is one.
> >
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> > fs/namei.c | 26 +++++++++++++++++---------
> > include/linux/fs.h | 2 +-
> > 2 files changed, 18 insertions(+), 10 deletions(-)
> >
> > diff --git a/fs/namei.c b/fs/namei.c
>
> ...
>
> > @@ -4359,6 +4362,8 @@ static int may_mknod(umode_t mode)
> > static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > unsigned int dev)
> > {
> > + struct delegated_inode delegated_inode = { };
> > + struct createdata cargs = { };
> > struct mnt_idmap *idmap;
> > struct dentry *dentry;
> > struct path path;
> > @@ -4383,18 +4388,16 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > switch (mode & S_IFMT) {
> > case 0:
> > case S_IFREG:
> > - {
> > - struct createdata args = { .idmap = idmap,
> > - .dir = path.dentry->d_inode,
> > - .dentry = dentry,
> > - .mode = mode,
> > - .excl = true };
> > -
> > - error = vfs_create(&args);
> > + cargs.idmap = idmap,
> > + cargs.dir = path.dentry->d_inode,
> > + cargs.dentry = dentry,
> > + cargs.delegated_inode = &delegated_inode;
> > + cargs.mode = mode,
> > + cargs.excl = true,
>
> Hi Jeff,
>
> I don't think it makes any difference to the generated code.
> But I think it would be more intuitive to use ';' rather than ','
> at the end of the lines immediately above.
>
Definitely. Will fix.
> > + error = vfs_create(&cargs);
> > if (!error)
> > security_path_post_mknod(idmap, dentry);
> > break;
> > - }
> > case S_IFCHR: case S_IFBLK:
> > error = vfs_mknod(idmap, path.dentry->d_inode,
> > dentry, mode, new_decode_dev(dev));
>
> ...
Thanks!
--
Jeff Layton <jlayton@kernel.org>
On Mon, 03 Nov 2025, Jeff Layton wrote:
> In order to add directory delegation support, we need to break
> delegations on the parent whenever there is going to be a change in the
> directory.
>
> Add a delegated_inode parameter to struct createdata. Most callers just
> leave that as a NULL pointer, but do_mknodat() is changed to wait for a
> delegation break if there is one.
>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
> fs/namei.c | 26 +++++++++++++++++---------
> include/linux/fs.h | 2 +-
> 2 files changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index fdf4e78cd041de8c564b7d1d89a46ba2aaf79d53..e8973000a312fb05ebb63a0d9bd83b9a5f8f805d 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3487,6 +3487,9 @@ int vfs_create(struct createdata *args)
>
> mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
> error = security_inode_create(dir, dentry, mode);
> + if (error)
> + return error;
> + error = try_break_deleg(dir, args->delegated_inode);
> if (error)
> return error;
> error = dir->i_op->create(idmap, dir, dentry, mode, args->excl);
> @@ -4359,6 +4362,8 @@ static int may_mknod(umode_t mode)
> static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> unsigned int dev)
> {
> + struct delegated_inode delegated_inode = { };
> + struct createdata cargs = { };
If we must have 'createdata', can it have a 'struct delegated_inode'
rather than a pointer to it?
NeilBrown
> struct mnt_idmap *idmap;
> struct dentry *dentry;
> struct path path;
> @@ -4383,18 +4388,16 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> switch (mode & S_IFMT) {
> case 0:
> case S_IFREG:
> - {
> - struct createdata args = { .idmap = idmap,
> - .dir = path.dentry->d_inode,
> - .dentry = dentry,
> - .mode = mode,
> - .excl = true };
> -
> - error = vfs_create(&args);
> + cargs.idmap = idmap,
> + cargs.dir = path.dentry->d_inode,
> + cargs.dentry = dentry,
> + cargs.delegated_inode = &delegated_inode;
> + cargs.mode = mode,
> + cargs.excl = true,
> + error = vfs_create(&cargs);
> if (!error)
> security_path_post_mknod(idmap, dentry);
> break;
> - }
> case S_IFCHR: case S_IFBLK:
> error = vfs_mknod(idmap, path.dentry->d_inode,
> dentry, mode, new_decode_dev(dev));
> @@ -4406,6 +4409,11 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> }
> out2:
> end_creating_path(&path, dentry);
> + if (is_delegated(&delegated_inode)) {
> + error = break_deleg_wait(&delegated_inode);
> + if (!error)
> + goto retry;
> + }
> if (retry_estale(error, lookup_flags)) {
> lookup_flags |= LOOKUP_REVAL;
> goto retry;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index b61873767b37591aecadd147623d7dfc866bef82..cfcb20a7c4ce4b6dcec98b3eccbdb5ec8bab6fa9 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2116,12 +2116,12 @@ struct createdata {
> struct mnt_idmap *idmap; // idmap of the mount the inode was found from
> struct inode *dir; // inode of parent directory
> struct dentry *dentry; // dentry of the child file
> + struct delegated_inode *delegated_inode; // returns parent inode, if delegated
> umode_t mode; // mode of the child file
> bool excl; // whether the file must not yet exist
> };
>
> int vfs_create(struct createdata *);
> -
> struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
> struct dentry *, umode_t, struct delegated_inode *);
> int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
>
> --
> 2.51.1
>
>
On Tue, 2025-11-04 at 11:14 +1100, NeilBrown wrote:
> On Mon, 03 Nov 2025, Jeff Layton wrote:
> > In order to add directory delegation support, we need to break
> > delegations on the parent whenever there is going to be a change in the
> > directory.
> >
> > Add a delegated_inode parameter to struct createdata. Most callers just
> > leave that as a NULL pointer, but do_mknodat() is changed to wait for a
> > delegation break if there is one.
> >
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> > fs/namei.c | 26 +++++++++++++++++---------
> > include/linux/fs.h | 2 +-
> > 2 files changed, 18 insertions(+), 10 deletions(-)
> >
> > diff --git a/fs/namei.c b/fs/namei.c
> > index fdf4e78cd041de8c564b7d1d89a46ba2aaf79d53..e8973000a312fb05ebb63a0d9bd83b9a5f8f805d 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -3487,6 +3487,9 @@ int vfs_create(struct createdata *args)
> >
> > mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
> > error = security_inode_create(dir, dentry, mode);
> > + if (error)
> > + return error;
> > + error = try_break_deleg(dir, args->delegated_inode);
> > if (error)
> > return error;
> > error = dir->i_op->create(idmap, dir, dentry, mode, args->excl);
> > @@ -4359,6 +4362,8 @@ static int may_mknod(umode_t mode)
> > static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > unsigned int dev)
> > {
> > + struct delegated_inode delegated_inode = { };
> > + struct createdata cargs = { };
>
> If we must have 'createdata', can it have a 'struct delegated_inode'
> rather than a pointer to it?
>
If we do that, then we'd need some way to signal that the caller
doesn't want to wait on the delegation break. Currently that's
indicated by setting cargs.delegated_inode to NULL. I suppose we could
add a bool for this or something.
I confess that I too am lukewarm on struct createdata. I can live with
it, but it's not clearly a win to me either.
Christian, thoughts?
>
> > struct mnt_idmap *idmap;
> > struct dentry *dentry;
> > struct path path;
> > @@ -4383,18 +4388,16 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > switch (mode & S_IFMT) {
> > case 0:
> > case S_IFREG:
> > - {
> > - struct createdata args = { .idmap = idmap,
> > - .dir = path.dentry->d_inode,
> > - .dentry = dentry,
> > - .mode = mode,
> > - .excl = true };
> > -
> > - error = vfs_create(&args);
> > + cargs.idmap = idmap,
> > + cargs.dir = path.dentry->d_inode,
> > + cargs.dentry = dentry,
> > + cargs.delegated_inode = &delegated_inode;
> > + cargs.mode = mode,
> > + cargs.excl = true,
> > + error = vfs_create(&cargs);
> > if (!error)
> > security_path_post_mknod(idmap, dentry);
> > break;
> > - }
> > case S_IFCHR: case S_IFBLK:
> > error = vfs_mknod(idmap, path.dentry->d_inode,
> > dentry, mode, new_decode_dev(dev));
> > @@ -4406,6 +4409,11 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > }
> > out2:
> > end_creating_path(&path, dentry);
> > + if (is_delegated(&delegated_inode)) {
> > + error = break_deleg_wait(&delegated_inode);
> > + if (!error)
> > + goto retry;
> > + }
> > if (retry_estale(error, lookup_flags)) {
> > lookup_flags |= LOOKUP_REVAL;
> > goto retry;
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index b61873767b37591aecadd147623d7dfc866bef82..cfcb20a7c4ce4b6dcec98b3eccbdb5ec8bab6fa9 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -2116,12 +2116,12 @@ struct createdata {
> > struct mnt_idmap *idmap; // idmap of the mount the inode was found from
> > struct inode *dir; // inode of parent directory
> > struct dentry *dentry; // dentry of the child file
> > + struct delegated_inode *delegated_inode; // returns parent inode, if delegated
> > umode_t mode; // mode of the child file
> > bool excl; // whether the file must not yet exist
> > };
> >
> > int vfs_create(struct createdata *);
> > -
> > struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
> > struct dentry *, umode_t, struct delegated_inode *);
> > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
> >
> > --
> > 2.51.1
> >
> >
--
Jeff Layton <jlayton@kernel.org>
On Mon, Nov 03, 2025 at 07:30:57PM -0500, Jeff Layton wrote:
> On Tue, 2025-11-04 at 11:14 +1100, NeilBrown wrote:
> > On Mon, 03 Nov 2025, Jeff Layton wrote:
> > > In order to add directory delegation support, we need to break
> > > delegations on the parent whenever there is going to be a change in the
> > > directory.
> > >
> > > Add a delegated_inode parameter to struct createdata. Most callers just
> > > leave that as a NULL pointer, but do_mknodat() is changed to wait for a
> > > delegation break if there is one.
> > >
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > > fs/namei.c | 26 +++++++++++++++++---------
> > > include/linux/fs.h | 2 +-
> > > 2 files changed, 18 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/fs/namei.c b/fs/namei.c
> > > index fdf4e78cd041de8c564b7d1d89a46ba2aaf79d53..e8973000a312fb05ebb63a0d9bd83b9a5f8f805d 100644
> > > --- a/fs/namei.c
> > > +++ b/fs/namei.c
> > > @@ -3487,6 +3487,9 @@ int vfs_create(struct createdata *args)
> > >
> > > mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
> > > error = security_inode_create(dir, dentry, mode);
> > > + if (error)
> > > + return error;
> > > + error = try_break_deleg(dir, args->delegated_inode);
> > > if (error)
> > > return error;
> > > error = dir->i_op->create(idmap, dir, dentry, mode, args->excl);
> > > @@ -4359,6 +4362,8 @@ static int may_mknod(umode_t mode)
> > > static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > > unsigned int dev)
> > > {
> > > + struct delegated_inode delegated_inode = { };
> > > + struct createdata cargs = { };
> >
> > If we must have 'createdata', can it have a 'struct delegated_inode'
> > rather than a pointer to it?
> >
>
> If we do that, then we'd need some way to signal that the caller
> doesn't want to wait on the delegation break. Currently that's
> indicated by setting cargs.delegated_inode to NULL. I suppose we could
> add a bool for this or something.
>
> I confess that I too am lukewarm on struct createdata. I can live with
> it, but it's not clearly a win to me either.
>
> Christian, thoughts?
If two stable voices of the community seem to have consensus that this
isn't worth it then it's obviously fine to not do it.
>
> >
> > > struct mnt_idmap *idmap;
> > > struct dentry *dentry;
> > > struct path path;
> > > @@ -4383,18 +4388,16 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > > switch (mode & S_IFMT) {
> > > case 0:
> > > case S_IFREG:
> > > - {
> > > - struct createdata args = { .idmap = idmap,
> > > - .dir = path.dentry->d_inode,
> > > - .dentry = dentry,
> > > - .mode = mode,
> > > - .excl = true };
> > > -
> > > - error = vfs_create(&args);
> > > + cargs.idmap = idmap,
> > > + cargs.dir = path.dentry->d_inode,
> > > + cargs.dentry = dentry,
> > > + cargs.delegated_inode = &delegated_inode;
> > > + cargs.mode = mode,
> > > + cargs.excl = true,
> > > + error = vfs_create(&cargs);
> > > if (!error)
> > > security_path_post_mknod(idmap, dentry);
> > > break;
> > > - }
> > > case S_IFCHR: case S_IFBLK:
> > > error = vfs_mknod(idmap, path.dentry->d_inode,
> > > dentry, mode, new_decode_dev(dev));
> > > @@ -4406,6 +4409,11 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
> > > }
> > > out2:
> > > end_creating_path(&path, dentry);
> > > + if (is_delegated(&delegated_inode)) {
> > > + error = break_deleg_wait(&delegated_inode);
> > > + if (!error)
> > > + goto retry;
> > > + }
> > > if (retry_estale(error, lookup_flags)) {
> > > lookup_flags |= LOOKUP_REVAL;
> > > goto retry;
> > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > index b61873767b37591aecadd147623d7dfc866bef82..cfcb20a7c4ce4b6dcec98b3eccbdb5ec8bab6fa9 100644
> > > --- a/include/linux/fs.h
> > > +++ b/include/linux/fs.h
> > > @@ -2116,12 +2116,12 @@ struct createdata {
> > > struct mnt_idmap *idmap; // idmap of the mount the inode was found from
> > > struct inode *dir; // inode of parent directory
> > > struct dentry *dentry; // dentry of the child file
> > > + struct delegated_inode *delegated_inode; // returns parent inode, if delegated
> > > umode_t mode; // mode of the child file
> > > bool excl; // whether the file must not yet exist
> > > };
> > >
> > > int vfs_create(struct createdata *);
> > > -
> > > struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
> > > struct dentry *, umode_t, struct delegated_inode *);
> > > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
> > >
> > > --
> > > 2.51.1
> > >
> > >
>
> --
> Jeff Layton <jlayton@kernel.org>
© 2016 - 2026 Red Hat, Inc.