[PATCH v2 2/2] nfs: update inode ctime after removexattr operation

Jeff Layton posted 2 patches 1 week, 4 days ago
[PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Jeff Layton 1 week, 4 days ago
xfstest generic/728 fails with delegated timestamps. The client does a
removexattr and then a stat to test the ctime, which doesn't change. The
stat() doesn't trigger a GETATTR because of the delegated timestamps, so
it relies on the cached ctime, which is wrong.

The setxattr compound has a trailing GETATTR, which ensures that its
ctime gets updated. Follow the same strategy with removexattr.

Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
Reported-by: Olga Kornievskaia <aglo@umich.edu>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
 fs/nfs/nfs42xdr.c       | 10 ++++++++--
 include/linux/nfs_xdr.h |  3 +++
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
 static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
+	__u32 bitmask[NFS_BITMASK_SZ];
 	struct nfs42_removexattrargs args = {
 		.fh = NFS_FH(inode),
+		.bitmask = bitmask,
 		.xattr_name = name,
 	};
-	struct nfs42_removexattrres res;
+	struct nfs42_removexattrres res = {
+		.server = server,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
 		.rpc_argp = &args,
@@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
 	int ret;
 	unsigned long timestamp = jiffies;
 
+	res.fattr = nfs_alloc_fattr();
+	if (!res.fattr)
+		return -ENOMEM;
+
+	nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
+			 inode, NFS_INO_INVALID_CHANGE);
+
 	ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
 	    &res.seq_res, 1);
 	trace_nfs4_removexattr(inode, name, ret);
-	if (!ret)
+	if (!ret) {
 		nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+		ret = nfs_post_op_update_inode(inode, res.fattr);
+	}
 
+	kfree(res.fattr);
 	return ret;
 }
 
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -263,11 +263,13 @@
 #define NFS4_enc_removexattr_sz		(compound_encode_hdr_maxsz + \
 					 encode_sequence_maxsz + \
 					 encode_putfh_maxsz + \
-					 encode_removexattr_maxsz)
+					 encode_removexattr_maxsz + \
+					 encode_getattr_maxsz)
 #define NFS4_dec_removexattr_sz		(compound_decode_hdr_maxsz + \
 					 decode_sequence_maxsz + \
 					 decode_putfh_maxsz + \
-					 decode_removexattr_maxsz)
+					 decode_removexattr_maxsz + \
+					 decode_getattr_maxsz)
 
 /*
  * These values specify the maximum amount of data that is not
@@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_removexattr(xdr, args->xattr_name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
 		goto out;
 
 	status = decode_removexattr(xdr, &res->cinfo);
+	if (status)
+		goto out;
+	status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
 struct nfs42_removexattrargs {
 	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh			*fh;
+	const u32			*bitmask;
 	const char			*xattr_name;
 };
 
 struct nfs42_removexattrres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs4_change_info		cinfo;
+	struct nfs_fattr		*fattr;
+	const struct nfs_server		*server;
 };
 
 #endif /* CONFIG_NFS_V4_2 */

-- 
2.53.0
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Olga Kornievskaia 1 week, 1 day ago
On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> xfstest generic/728 fails with delegated timestamps. The client does a
> removexattr and then a stat to test the ctime, which doesn't change. The
> stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> it relies on the cached ctime, which is wrong.
>
> The setxattr compound has a trailing GETATTR, which ensures that its
> ctime gets updated. Follow the same strategy with removexattr.

This approach relies on the fact that the server the serves delegated
attributes would update change_attr on operations which might now
necessarily happen (ie, linux server does not update change_attribute
on writes or clone). I propose an alternative fix for the failing
generic/728.

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7b3ca68fb4bb..ede1835a45b3 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
*inode, const char *name)
            &res.seq_res, 1);
        trace_nfs4_removexattr(inode, name, ret);
        if (!ret)
-               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+               if (nfs_have_delegated_attributes(inode)) {
+                       nfs_update_delegated_mtime(inode);
+                       spin_lock(&inode->i_lock);
+                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
+                       spin_unlock(&inode->i_lock);
+               } else
+                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);

        return ret;
 }

>
> Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> Reported-by: Olga Kornievskaia <aglo@umich.edu>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
>  fs/nfs/nfs42xdr.c       | 10 ++++++++--
>  include/linux/nfs_xdr.h |  3 +++
>  3 files changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
>  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
>  {
>         struct nfs_server *server = NFS_SERVER(inode);
> +       __u32 bitmask[NFS_BITMASK_SZ];
>         struct nfs42_removexattrargs args = {
>                 .fh = NFS_FH(inode),
> +               .bitmask = bitmask,
>                 .xattr_name = name,
>         };
> -       struct nfs42_removexattrres res;
> +       struct nfs42_removexattrres res = {
> +               .server = server,
> +       };
>         struct rpc_message msg = {
>                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
>                 .rpc_argp = &args,
> @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
>         int ret;
>         unsigned long timestamp = jiffies;
>
> +       res.fattr = nfs_alloc_fattr();
> +       if (!res.fattr)
> +               return -ENOMEM;
> +
> +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> +                        inode, NFS_INO_INVALID_CHANGE);
> +
>         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
>             &res.seq_res, 1);
>         trace_nfs4_removexattr(inode, name, ret);
> -       if (!ret)
> +       if (!ret) {
>                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> +               ret = nfs_post_op_update_inode(inode, res.fattr);
> +       }
>
> +       kfree(res.fattr);
>         return ret;
>  }
>
> diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> --- a/fs/nfs/nfs42xdr.c
> +++ b/fs/nfs/nfs42xdr.c
> @@ -263,11 +263,13 @@
>  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
>                                          encode_sequence_maxsz + \
>                                          encode_putfh_maxsz + \
> -                                        encode_removexattr_maxsz)
> +                                        encode_removexattr_maxsz + \
> +                                        encode_getattr_maxsz)
>  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
>                                          decode_sequence_maxsz + \
>                                          decode_putfh_maxsz + \
> -                                        decode_removexattr_maxsz)
> +                                        decode_removexattr_maxsz + \
> +                                        decode_getattr_maxsz)
>
>  /*
>   * These values specify the maximum amount of data that is not
> @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
>         encode_sequence(xdr, &args->seq_args, &hdr);
>         encode_putfh(xdr, args->fh, &hdr);
>         encode_removexattr(xdr, args->xattr_name, &hdr);
> +       encode_getfattr(xdr, args->bitmask, &hdr);
>         encode_nops(&hdr);
>  }
>
> @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
>                 goto out;
>
>         status = decode_removexattr(xdr, &res->cinfo);
> +       if (status)
> +               goto out;
> +       status = decode_getfattr(xdr, res->fattr, res->server);
>  out:
>         return status;
>  }
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
>  struct nfs42_removexattrargs {
>         struct nfs4_sequence_args       seq_args;
>         struct nfs_fh                   *fh;
> +       const u32                       *bitmask;
>         const char                      *xattr_name;
>  };
>
>  struct nfs42_removexattrres {
>         struct nfs4_sequence_res        seq_res;
>         struct nfs4_change_info         cinfo;
> +       struct nfs_fattr                *fattr;
> +       const struct nfs_server         *server;
>  };
>
>  #endif /* CONFIG_NFS_V4_2 */
>
> --
> 2.53.0
>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Jeff Layton 1 week, 1 day ago
On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > xfstest generic/728 fails with delegated timestamps. The client does a
> > removexattr and then a stat to test the ctime, which doesn't change. The
> > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > it relies on the cached ctime, which is wrong.
> > 
> > The setxattr compound has a trailing GETATTR, which ensures that its
> > ctime gets updated. Follow the same strategy with removexattr.
> 
> This approach relies on the fact that the server the serves delegated
> attributes would update change_attr on operations which might now
> necessarily happen (ie, linux server does not update change_attribute
> on writes or clone). I propose an alternative fix for the failing
> generic/728.
> 
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index 7b3ca68fb4bb..ede1835a45b3 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> *inode, const char *name)
>             &res.seq_res, 1);
>         trace_nfs4_removexattr(inode, name, ret);
>         if (!ret)
> -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> +               if (nfs_have_delegated_attributes(inode)) {
> +                       nfs_update_delegated_mtime(inode);
> +                       spin_lock(&inode->i_lock);
> +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> +                       spin_unlock(&inode->i_lock);
> +               } else
> +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> 
>         return ret;
>  }
> 

What's the advantage of doing it this way?

You just sent a REMOVEXATTR operation to the server that will change
the mtime there. The server has the most up-to-date version of the
mtime and ctime at that point.

It's certainly possible that the REMOVEXATTR is the only change that
occurred. With what I'm proposing, we don't even need to do a SETATTR
at all if nothing else changed. With your version, you would.

> > 
> > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> >  include/linux/nfs_xdr.h |  3 +++
> >  3 files changed, 27 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > --- a/fs/nfs/nfs42proc.c
> > +++ b/fs/nfs/nfs42proc.c
> > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> >  {
> >         struct nfs_server *server = NFS_SERVER(inode);
> > +       __u32 bitmask[NFS_BITMASK_SZ];
> >         struct nfs42_removexattrargs args = {
> >                 .fh = NFS_FH(inode),
> > +               .bitmask = bitmask,
> >                 .xattr_name = name,
> >         };
> > -       struct nfs42_removexattrres res;
> > +       struct nfs42_removexattrres res = {
> > +               .server = server,
> > +       };
> >         struct rpc_message msg = {
> >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> >                 .rpc_argp = &args,
> > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> >         int ret;
> >         unsigned long timestamp = jiffies;
> > 
> > +       res.fattr = nfs_alloc_fattr();
> > +       if (!res.fattr)
> > +               return -ENOMEM;
> > +
> > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > +                        inode, NFS_INO_INVALID_CHANGE);
> > +
> >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> >             &res.seq_res, 1);
> >         trace_nfs4_removexattr(inode, name, ret);
> > -       if (!ret)
> > +       if (!ret) {
> >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > +       }
> > 
> > +       kfree(res.fattr);
> >         return ret;
> >  }
> > 
> > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > --- a/fs/nfs/nfs42xdr.c
> > +++ b/fs/nfs/nfs42xdr.c
> > @@ -263,11 +263,13 @@
> >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> >                                          encode_sequence_maxsz + \
> >                                          encode_putfh_maxsz + \
> > -                                        encode_removexattr_maxsz)
> > +                                        encode_removexattr_maxsz + \
> > +                                        encode_getattr_maxsz)
> >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> >                                          decode_sequence_maxsz + \
> >                                          decode_putfh_maxsz + \
> > -                                        decode_removexattr_maxsz)
> > +                                        decode_removexattr_maxsz + \
> > +                                        decode_getattr_maxsz)
> > 
> >  /*
> >   * These values specify the maximum amount of data that is not
> > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> >         encode_sequence(xdr, &args->seq_args, &hdr);
> >         encode_putfh(xdr, args->fh, &hdr);
> >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > +       encode_getfattr(xdr, args->bitmask, &hdr);
> >         encode_nops(&hdr);
> >  }
> > 
> > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> >                 goto out;
> > 
> >         status = decode_removexattr(xdr, &res->cinfo);
> > +       if (status)
> > +               goto out;
> > +       status = decode_getfattr(xdr, res->fattr, res->server);
> >  out:
> >         return status;
> >  }
> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > --- a/include/linux/nfs_xdr.h
> > +++ b/include/linux/nfs_xdr.h
> > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> >  struct nfs42_removexattrargs {
> >         struct nfs4_sequence_args       seq_args;
> >         struct nfs_fh                   *fh;
> > +       const u32                       *bitmask;
> >         const char                      *xattr_name;
> >  };
> > 
> >  struct nfs42_removexattrres {
> >         struct nfs4_sequence_res        seq_res;
> >         struct nfs4_change_info         cinfo;
> > +       struct nfs_fattr                *fattr;
> > +       const struct nfs_server         *server;
> >  };
> > 
> >  #endif /* CONFIG_NFS_V4_2 */
> > 
> > --
> > 2.53.0
> > 

-- 
Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Olga Kornievskaia 1 week, 1 day ago
On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > >
> > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > it relies on the cached ctime, which is wrong.
> > >
> > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > ctime gets updated. Follow the same strategy with removexattr.
> >
> > This approach relies on the fact that the server the serves delegated
> > attributes would update change_attr on operations which might now
> > necessarily happen (ie, linux server does not update change_attribute
> > on writes or clone). I propose an alternative fix for the failing
> > generic/728.
> >
> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > index 7b3ca68fb4bb..ede1835a45b3 100644
> > --- a/fs/nfs/nfs42proc.c
> > +++ b/fs/nfs/nfs42proc.c
> > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > *inode, const char *name)
> >             &res.seq_res, 1);
> >         trace_nfs4_removexattr(inode, name, ret);
> >         if (!ret)
> > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > +               if (nfs_have_delegated_attributes(inode)) {
> > +                       nfs_update_delegated_mtime(inode);
> > +                       spin_lock(&inode->i_lock);
> > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > +                       spin_unlock(&inode->i_lock);
> > +               } else
> > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> >
> >         return ret;
> >  }
> >
>
> What's the advantage of doing it this way?
>
> You just sent a REMOVEXATTR operation to the server that will change
> the mtime there. The server has the most up-to-date version of the
> mtime and ctime at that point.

In presence of delegated attributes, Is the server required to update
its mtime/ctime on an operation? As I mentioned, the linux server does
not update its ctime/mtime for WRITE, CLONE, COPY. Is possible that
some implementations might be different and also do not update the
ctime/mtime on REMOVEXATTR? Therefore I was suggesting that the patch
relies on the fact that it would receive an updated value. Of course
perhaps all implementations are done the same as the linux server and
my point is moot. I didn't see anything in the spec that clarifies
what the server supposed to do (and client rely on).

> It's certainly possible that the REMOVEXATTR is the only change that
> occurred. With what I'm proposing, we don't even need to do a SETATTR
> at all if nothing else changed. With your version, you would.
>
> > >
> > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > >  include/linux/nfs_xdr.h |  3 +++
> > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > --- a/fs/nfs/nfs42proc.c
> > > +++ b/fs/nfs/nfs42proc.c
> > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > >  {
> > >         struct nfs_server *server = NFS_SERVER(inode);
> > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > >         struct nfs42_removexattrargs args = {
> > >                 .fh = NFS_FH(inode),
> > > +               .bitmask = bitmask,
> > >                 .xattr_name = name,
> > >         };
> > > -       struct nfs42_removexattrres res;
> > > +       struct nfs42_removexattrres res = {
> > > +               .server = server,
> > > +       };
> > >         struct rpc_message msg = {
> > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > >                 .rpc_argp = &args,
> > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > >         int ret;
> > >         unsigned long timestamp = jiffies;
> > >
> > > +       res.fattr = nfs_alloc_fattr();
> > > +       if (!res.fattr)
> > > +               return -ENOMEM;
> > > +
> > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > +
> > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > >             &res.seq_res, 1);
> > >         trace_nfs4_removexattr(inode, name, ret);
> > > -       if (!ret)
> > > +       if (!ret) {
> > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > +       }
> > >
> > > +       kfree(res.fattr);
> > >         return ret;
> > >  }
> > >
> > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > --- a/fs/nfs/nfs42xdr.c
> > > +++ b/fs/nfs/nfs42xdr.c
> > > @@ -263,11 +263,13 @@
> > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > >                                          encode_sequence_maxsz + \
> > >                                          encode_putfh_maxsz + \
> > > -                                        encode_removexattr_maxsz)
> > > +                                        encode_removexattr_maxsz + \
> > > +                                        encode_getattr_maxsz)
> > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > >                                          decode_sequence_maxsz + \
> > >                                          decode_putfh_maxsz + \
> > > -                                        decode_removexattr_maxsz)
> > > +                                        decode_removexattr_maxsz + \
> > > +                                        decode_getattr_maxsz)
> > >
> > >  /*
> > >   * These values specify the maximum amount of data that is not
> > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > >         encode_putfh(xdr, args->fh, &hdr);
> > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > >         encode_nops(&hdr);
> > >  }
> > >
> > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > >                 goto out;
> > >
> > >         status = decode_removexattr(xdr, &res->cinfo);
> > > +       if (status)
> > > +               goto out;
> > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > >  out:
> > >         return status;
> > >  }
> > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > --- a/include/linux/nfs_xdr.h
> > > +++ b/include/linux/nfs_xdr.h
> > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > >  struct nfs42_removexattrargs {
> > >         struct nfs4_sequence_args       seq_args;
> > >         struct nfs_fh                   *fh;
> > > +       const u32                       *bitmask;
> > >         const char                      *xattr_name;
> > >  };
> > >
> > >  struct nfs42_removexattrres {
> > >         struct nfs4_sequence_res        seq_res;
> > >         struct nfs4_change_info         cinfo;
> > > +       struct nfs_fattr                *fattr;
> > > +       const struct nfs_server         *server;
> > >  };
> > >
> > >  #endif /* CONFIG_NFS_V4_2 */
> > >
> > > --
> > > 2.53.0
> > >
>
> --
> Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Jeff Layton 1 week, 1 day ago
On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > 
> > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > it relies on the cached ctime, which is wrong.
> > > > 
> > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > ctime gets updated. Follow the same strategy with removexattr.
> > > 
> > > This approach relies on the fact that the server the serves delegated
> > > attributes would update change_attr on operations which might now
> > > necessarily happen (ie, linux server does not update change_attribute
> > > on writes or clone). I propose an alternative fix for the failing
> > > generic/728.
> > > 
> > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > --- a/fs/nfs/nfs42proc.c
> > > +++ b/fs/nfs/nfs42proc.c
> > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > *inode, const char *name)
> > >             &res.seq_res, 1);
> > >         trace_nfs4_removexattr(inode, name, ret);
> > >         if (!ret)
> > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > +               if (nfs_have_delegated_attributes(inode)) {
> > > +                       nfs_update_delegated_mtime(inode);
> > > +                       spin_lock(&inode->i_lock);
> > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > +                       spin_unlock(&inode->i_lock);
> > > +               } else
> > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > 
> > >         return ret;
> > >  }
> > > 
> > 
> > What's the advantage of doing it this way?
> > 
> > You just sent a REMOVEXATTR operation to the server that will change
> > the mtime there. The server has the most up-to-date version of the
> > mtime and ctime at that point.
> 
> In presence of delegated attributes, Is the server required to update
> its mtime/ctime on an operation? As I mentioned, the linux server does
> not update its ctime/mtime for WRITE, CLONE, COPY.
> 
> Is possible that
> some implementations might be different and also do not update the
> ctime/mtime on REMOVEXATTR?
> 
> Therefore I was suggesting that the patch
> relies on the fact that it would receive an updated value. Of course
> perhaps all implementations are done the same as the linux server and
> my point is moot. I didn't see anything in the spec that clarifies
> what the server supposed to do (and client rely on).
> 

(cc'ing Tom)

That is a very good point.

My interpretation was that delegated timestamps generally covered
writes, but SETATTR style operations that do anything beyond only
changing the mtime can't be cached.

We probably need some delstid spec clarification: for what operations
is the server required to disable timestamp updates when a write
delegation is outstanding?

In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
but not SETATTR/SETXATTR/REMOVEXATTR.

How does the Hammerspace anvil behave? Does it disable c/mtime updates
for writes when there is an outstanding timestamp delegation like we're
doing in nfsd? If so, does it do the same for
SETATTR/SETXATTR/REMOVEXATTR operations as well?



> > It's certainly possible that the REMOVEXATTR is the only change that
> > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > at all if nothing else changed. With your version, you would.
> > 
> > > > 
> > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > ---
> > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > >  include/linux/nfs_xdr.h |  3 +++
> > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > --- a/fs/nfs/nfs42proc.c
> > > > +++ b/fs/nfs/nfs42proc.c
> > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > >  {
> > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > >         struct nfs42_removexattrargs args = {
> > > >                 .fh = NFS_FH(inode),
> > > > +               .bitmask = bitmask,
> > > >                 .xattr_name = name,
> > > >         };
> > > > -       struct nfs42_removexattrres res;
> > > > +       struct nfs42_removexattrres res = {
> > > > +               .server = server,
> > > > +       };
> > > >         struct rpc_message msg = {
> > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > >                 .rpc_argp = &args,
> > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > >         int ret;
> > > >         unsigned long timestamp = jiffies;
> > > > 
> > > > +       res.fattr = nfs_alloc_fattr();
> > > > +       if (!res.fattr)
> > > > +               return -ENOMEM;
> > > > +
> > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > +
> > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > >             &res.seq_res, 1);
> > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > -       if (!ret)
> > > > +       if (!ret) {
> > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > +       }
> > > > 
> > > > +       kfree(res.fattr);
> > > >         return ret;
> > > >  }
> > > > 
> > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > --- a/fs/nfs/nfs42xdr.c
> > > > +++ b/fs/nfs/nfs42xdr.c
> > > > @@ -263,11 +263,13 @@
> > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > >                                          encode_sequence_maxsz + \
> > > >                                          encode_putfh_maxsz + \
> > > > -                                        encode_removexattr_maxsz)
> > > > +                                        encode_removexattr_maxsz + \
> > > > +                                        encode_getattr_maxsz)
> > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > >                                          decode_sequence_maxsz + \
> > > >                                          decode_putfh_maxsz + \
> > > > -                                        decode_removexattr_maxsz)
> > > > +                                        decode_removexattr_maxsz + \
> > > > +                                        decode_getattr_maxsz)
> > > > 
> > > >  /*
> > > >   * These values specify the maximum amount of data that is not
> > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > >         encode_putfh(xdr, args->fh, &hdr);
> > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > >         encode_nops(&hdr);
> > > >  }
> > > > 
> > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > >                 goto out;
> > > > 
> > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > +       if (status)
> > > > +               goto out;
> > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > >  out:
> > > >         return status;
> > > >  }
> > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > --- a/include/linux/nfs_xdr.h
> > > > +++ b/include/linux/nfs_xdr.h
> > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > >  struct nfs42_removexattrargs {
> > > >         struct nfs4_sequence_args       seq_args;
> > > >         struct nfs_fh                   *fh;
> > > > +       const u32                       *bitmask;
> > > >         const char                      *xattr_name;
> > > >  };
> > > > 
> > > >  struct nfs42_removexattrres {
> > > >         struct nfs4_sequence_res        seq_res;
> > > >         struct nfs4_change_info         cinfo;
> > > > +       struct nfs_fattr                *fattr;
> > > > +       const struct nfs_server         *server;
> > > >  };
> > > > 
> > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > 
> > > > --
> > > > 2.53.0
> > > > 
> > 
> > --
> > Jeff Layton <jlayton@kernel.org>

-- 
Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Thomas Haynes 1 week, 1 day ago
On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > 
> > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > 
> > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > it relies on the cached ctime, which is wrong.
> > > > > 
> > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > 
> > > > This approach relies on the fact that the server the serves delegated
> > > > attributes would update change_attr on operations which might now
> > > > necessarily happen (ie, linux server does not update change_attribute
> > > > on writes or clone). I propose an alternative fix for the failing
> > > > generic/728.
> > > > 
> > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > --- a/fs/nfs/nfs42proc.c
> > > > +++ b/fs/nfs/nfs42proc.c
> > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > *inode, const char *name)
> > > >             &res.seq_res, 1);
> > > >         trace_nfs4_removexattr(inode, name, ret);
> > > >         if (!ret)
> > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > +                       nfs_update_delegated_mtime(inode);
> > > > +                       spin_lock(&inode->i_lock);
> > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > +                       spin_unlock(&inode->i_lock);
> > > > +               } else
> > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > 
> > > >         return ret;
> > > >  }
> > > > 
> > > 
> > > What's the advantage of doing it this way?
> > > 
> > > You just sent a REMOVEXATTR operation to the server that will change
> > > the mtime there. The server has the most up-to-date version of the
> > > mtime and ctime at that point.
> > 
> > In presence of delegated attributes, Is the server required to update
> > its mtime/ctime on an operation? As I mentioned, the linux server does
> > not update its ctime/mtime for WRITE, CLONE, COPY.
> > 
> > Is possible that
> > some implementations might be different and also do not update the
> > ctime/mtime on REMOVEXATTR?
> > 
> > Therefore I was suggesting that the patch
> > relies on the fact that it would receive an updated value. Of course
> > perhaps all implementations are done the same as the linux server and
> > my point is moot. I didn't see anything in the spec that clarifies
> > what the server supposed to do (and client rely on).
> > 
> 
> (cc'ing Tom)
> 
> That is a very good point.
> 
> My interpretation was that delegated timestamps generally covered
> writes, but SETATTR style operations that do anything beyond only
> changing the mtime can't be cached.
> 
> We probably need some delstid spec clarification: for what operations
> is the server required to disable timestamp updates when a write
> delegation is outstanding?
> 
> In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> but not SETATTR/SETXATTR/REMOVEXATTR.
> 
> How does the Hammerspace anvil behave? Does it disable c/mtime updates
> for writes when there is an outstanding timestamp delegation like we're
> doing in nfsd? If so, does it do the same for
> SETATTR/SETXATTR/REMOVEXATTR operations as well?

Jeff,

I think the right way to look at this is closer to how size is
handled under delegation in RFC8881, rather than as a per-op rule.

In our implementation, because we are acting as an MDS and data I/O
goes to DSes, we already treat size as effectively delegated when
a write layout is outstanding. The MDS does not maintain authoritative
size locally in that case. We may refresh size/timestamps internally
(e.g., on GETATTR by querying DSes), but we don’t treat that as
overriding the delegated authority.

For timestamps, our behavior is effectively the same model. When
the client holds the relevant delegation, the server does not
consider itself authoritative for ctime/mtime. If current values
are needed, we can obtain them from the client (e.g., via CB_GETATTR),
and the client must present the delegation stateid to demonstrate
that authority. So the authority follows the delegation, not the
specific operation.

That said, I don’t think we’ve fully resolved the semantics for all
metadata-style ops either. WRITE and SETATTR are clear in our model,
but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
been relying on assumptions rather than a fully consistent rule.
I.e., CLONE and COPY we just pass through to the DS and we don't
implement SETXATTR/REMOVEXATTR.

So the spec question, as I see it, is not whether REMOVEXATTR (or
any particular op) should update ctime/mtime, but whether delegated
timestamps are meant to follow the same attribute-authority model
as delegated size in RFC8881. If so, then we expect that the server
should query the client via CB_GETATTR to return updated ctime/mtime
after such operations while the delegation is outstanding.

Thanks,
Tom


> 
> 
> 
> > > It's certainly possible that the REMOVEXATTR is the only change that
> > > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > > at all if nothing else changed. With your version, you would.
> > > 
> > > > > 
> > > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > ---
> > > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > > >  include/linux/nfs_xdr.h |  3 +++
> > > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > > --- a/fs/nfs/nfs42proc.c
> > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > >  {
> > > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > > >         struct nfs42_removexattrargs args = {
> > > > >                 .fh = NFS_FH(inode),
> > > > > +               .bitmask = bitmask,
> > > > >                 .xattr_name = name,
> > > > >         };
> > > > > -       struct nfs42_removexattrres res;
> > > > > +       struct nfs42_removexattrres res = {
> > > > > +               .server = server,
> > > > > +       };
> > > > >         struct rpc_message msg = {
> > > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > > >                 .rpc_argp = &args,
> > > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > >         int ret;
> > > > >         unsigned long timestamp = jiffies;
> > > > > 
> > > > > +       res.fattr = nfs_alloc_fattr();
> > > > > +       if (!res.fattr)
> > > > > +               return -ENOMEM;
> > > > > +
> > > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > > +
> > > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > > >             &res.seq_res, 1);
> > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > -       if (!ret)
> > > > > +       if (!ret) {
> > > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > > +       }
> > > > > 
> > > > > +       kfree(res.fattr);
> > > > >         return ret;
> > > > >  }
> > > > > 
> > > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > > --- a/fs/nfs/nfs42xdr.c
> > > > > +++ b/fs/nfs/nfs42xdr.c
> > > > > @@ -263,11 +263,13 @@
> > > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > > >                                          encode_sequence_maxsz + \
> > > > >                                          encode_putfh_maxsz + \
> > > > > -                                        encode_removexattr_maxsz)
> > > > > +                                        encode_removexattr_maxsz + \
> > > > > +                                        encode_getattr_maxsz)
> > > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > > >                                          decode_sequence_maxsz + \
> > > > >                                          decode_putfh_maxsz + \
> > > > > -                                        decode_removexattr_maxsz)
> > > > > +                                        decode_removexattr_maxsz + \
> > > > > +                                        decode_getattr_maxsz)
> > > > > 
> > > > >  /*
> > > > >   * These values specify the maximum amount of data that is not
> > > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > > >         encode_putfh(xdr, args->fh, &hdr);
> > > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > > >         encode_nops(&hdr);
> > > > >  }
> > > > > 
> > > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > > >                 goto out;
> > > > > 
> > > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > > +       if (status)
> > > > > +               goto out;
> > > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > > >  out:
> > > > >         return status;
> > > > >  }
> > > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > > --- a/include/linux/nfs_xdr.h
> > > > > +++ b/include/linux/nfs_xdr.h
> > > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > > >  struct nfs42_removexattrargs {
> > > > >         struct nfs4_sequence_args       seq_args;
> > > > >         struct nfs_fh                   *fh;
> > > > > +       const u32                       *bitmask;
> > > > >         const char                      *xattr_name;
> > > > >  };
> > > > > 
> > > > >  struct nfs42_removexattrres {
> > > > >         struct nfs4_sequence_res        seq_res;
> > > > >         struct nfs4_change_info         cinfo;
> > > > > +       struct nfs_fattr                *fattr;
> > > > > +       const struct nfs_server         *server;
> > > > >  };
> > > > > 
> > > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > > 
> > > > > --
> > > > > 2.53.0
> > > > > 
> > > 
> > > --
> > > Jeff Layton <jlayton@kernel.org>
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Jeff Layton 5 days, 1 hour ago
On Fri, 2026-03-27 at 11:22 -0700, Thomas Haynes wrote:
> On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > 
> > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > 
> > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > it relies on the cached ctime, which is wrong.
> > > > > > 
> > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > 
> > > > > This approach relies on the fact that the server the serves delegated
> > > > > attributes would update change_attr on operations which might now
> > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > generic/728.
> > > > > 
> > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > --- a/fs/nfs/nfs42proc.c
> > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > *inode, const char *name)
> > > > >             &res.seq_res, 1);
> > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > >         if (!ret)
> > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > +                       spin_lock(&inode->i_lock);
> > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > +                       spin_unlock(&inode->i_lock);
> > > > > +               } else
> > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > 
> > > > >         return ret;
> > > > >  }
> > > > > 
> > > > 
> > > > What's the advantage of doing it this way?
> > > > 
> > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > the mtime there. The server has the most up-to-date version of the
> > > > mtime and ctime at that point.
> > > 
> > > In presence of delegated attributes, Is the server required to update
> > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > 
> > > Is possible that
> > > some implementations might be different and also do not update the
> > > ctime/mtime on REMOVEXATTR?
> > > 
> > > Therefore I was suggesting that the patch
> > > relies on the fact that it would receive an updated value. Of course
> > > perhaps all implementations are done the same as the linux server and
> > > my point is moot. I didn't see anything in the spec that clarifies
> > > what the server supposed to do (and client rely on).
> > > 
> > 
> > (cc'ing Tom)
> > 
> > That is a very good point.
> > 
> > My interpretation was that delegated timestamps generally covered
> > writes, but SETATTR style operations that do anything beyond only
> > changing the mtime can't be cached.
> > 
> > We probably need some delstid spec clarification: for what operations
> > is the server required to disable timestamp updates when a write
> > delegation is outstanding?
> > 
> > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > but not SETATTR/SETXATTR/REMOVEXATTR.
> > 
> > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > for writes when there is an outstanding timestamp delegation like we're
> > doing in nfsd? If so, does it do the same for
> > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> 
> Jeff,
> 
> I think the right way to look at this is closer to how size is
> handled under delegation in RFC8881, rather than as a per-op rule.
> 
> In our implementation, because we are acting as an MDS and data I/O
> goes to DSes, we already treat size as effectively delegated when
> a write layout is outstanding. The MDS does not maintain authoritative
> size locally in that case. We may refresh size/timestamps internally
> (e.g., on GETATTR by querying DSes), but we don’t treat that as
> overriding the delegated authority.
> 
> For timestamps, our behavior is effectively the same model. When
> the client holds the relevant delegation, the server does not
> consider itself authoritative for ctime/mtime. If current values
> are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> and the client must present the delegation stateid to demonstrate
> that authority. So the authority follows the delegation, not the
> specific operation.
> 
> That said, I don’t think we’ve fully resolved the semantics for all
> metadata-style ops either. WRITE and SETATTR are clear in our model,
> but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> been relying on assumptions rather than a fully consistent rule.
> I.e., CLONE and COPY we just pass through to the DS and we don't
> implement SETXATTR/REMOVEXATTR.
> 
> So the spec question, as I see it, is not whether REMOVEXATTR (or
> any particular op) should update ctime/mtime, but whether delegated
> timestamps are meant to follow the same attribute-authority model
> as delegated size in RFC8881. If so, then we expect that the server
> should query the client via CB_GETATTR to return updated ctime/mtime
> after such operations while the delegation is outstanding.
> 

The dilemma we have is: because we _do_ allow local processes to stat()
files that have an outstanding write delegation, we can never allow the
ctime in particular to roll backward (modulo clock jumps).

If we're dealing with changes that have been cached in the client and
are being lazily flushed out, then we can't update the timestamp when
that operation occurs. The time of the RPC to flush the changes will
almost certainly be later than the cached timestamps on the client that
will eventually be set, so when the client comes back we'd end up
violating the rollback rule.

Our only option is to freeze timestamp updates on anything that might
represent such an operation. So far, we only do that on WRITE and COPY
operations -- in general, operations that require an open file, since
FMODE_NOCMTIME is attached to the file.

Some SETATTRs that only update the mtime and atime can be cached on the
client by virtue of the fact that it's authoritative for timestamps.
There are some exceptions though:

- atime-only updates can't be cached since the ctime won't change with
a timestamp update if the mtime didn't change

- if you set the mtime to a time that is later than the time you got
the delegation from the server, but earlier than the current time, you
can't cache that. The ctime would be later than the mtime in that case,
and we don't have a mechanism to handle that in a delegated timestamp
SETATTR.

I don't see how you could reasonably buffer a SETXATTR or REMOVEXATTR
operation to be sent later. These need to be done synchronously since
they could always fail for some reason and we don't have a mechanism at
the syscall layer to handle a deferred error. They also only update the
ctime and not the mtime, and we have no mechanism to do that with
delegated timestamps.

Based on that, I think the client and server both need to ignore the
timestamp delegation on a SETXATTR or REMOVEXATTR. The server should
update the ctime and the client needs to send a trailing GETATTR on the
REMOVEXATTR compound in order to get it and the change attr.

Exactly what this patch does, fwiw...
-- 
Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Thomas Haynes 4 days, 18 hours ago
On Tue, Mar 31, 2026 at 07:42:56AM -0800, Jeff Layton wrote:
> On Fri, 2026-03-27 at 11:22 -0700, Thomas Haynes wrote:
> > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > 
> > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > 
> > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > 
> > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > 
> > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > attributes would update change_attr on operations which might now
> > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > generic/728.
> > > > > > 
> > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > *inode, const char *name)
> > > > > >             &res.seq_res, 1);
> > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > >         if (!ret)
> > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > +               } else
> > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > 
> > > > > >         return ret;
> > > > > >  }
> > > > > > 
> > > > > 
> > > > > What's the advantage of doing it this way?
> > > > > 
> > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > the mtime there. The server has the most up-to-date version of the
> > > > > mtime and ctime at that point.
> > > > 
> > > > In presence of delegated attributes, Is the server required to update
> > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > 
> > > > Is possible that
> > > > some implementations might be different and also do not update the
> > > > ctime/mtime on REMOVEXATTR?
> > > > 
> > > > Therefore I was suggesting that the patch
> > > > relies on the fact that it would receive an updated value. Of course
> > > > perhaps all implementations are done the same as the linux server and
> > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > what the server supposed to do (and client rely on).
> > > > 
> > > 
> > > (cc'ing Tom)
> > > 
> > > That is a very good point.
> > > 
> > > My interpretation was that delegated timestamps generally covered
> > > writes, but SETATTR style operations that do anything beyond only
> > > changing the mtime can't be cached.
> > > 
> > > We probably need some delstid spec clarification: for what operations
> > > is the server required to disable timestamp updates when a write
> > > delegation is outstanding?
> > > 
> > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > 
> > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > for writes when there is an outstanding timestamp delegation like we're
> > > doing in nfsd? If so, does it do the same for
> > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > 
> > Jeff,
> > 
> > I think the right way to look at this is closer to how size is
> > handled under delegation in RFC8881, rather than as a per-op rule.
> > 
> > In our implementation, because we are acting as an MDS and data I/O
> > goes to DSes, we already treat size as effectively delegated when
> > a write layout is outstanding. The MDS does not maintain authoritative
> > size locally in that case. We may refresh size/timestamps internally
> > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > overriding the delegated authority.
> > 
> > For timestamps, our behavior is effectively the same model. When
> > the client holds the relevant delegation, the server does not
> > consider itself authoritative for ctime/mtime. If current values
> > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > and the client must present the delegation stateid to demonstrate
> > that authority. So the authority follows the delegation, not the
> > specific operation.
> > 
> > That said, I don’t think we’ve fully resolved the semantics for all
> > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > been relying on assumptions rather than a fully consistent rule.
> > I.e., CLONE and COPY we just pass through to the DS and we don't
> > implement SETXATTR/REMOVEXATTR.
> > 
> > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > any particular op) should update ctime/mtime, but whether delegated
> > timestamps are meant to follow the same attribute-authority model
> > as delegated size in RFC8881. If so, then we expect that the server
> > should query the client via CB_GETATTR to return updated ctime/mtime
> > after such operations while the delegation is outstanding.
> > 
> 
> The dilemma we have is: because we _do_ allow local processes to stat()
> files that have an outstanding write delegation, we can never allow the
> ctime in particular to roll backward (modulo clock jumps).

I agree we do not want visible ctime rollback, but I do not see how
that can be guaranteed from delegated timestamps alone when the
authoritative timestamp may be generated on a different node with
a different clock and the object may change during the CB_GETATTR
window. That seems to require either monotonic clamping of exposed
ctime, or treating change_attr rather than ctime as the real
serialization signal.

> 
> If we're dealing with changes that have been cached in the client and
> are being lazily flushed out, then we can't update the timestamp when
> that operation occurs. The time of the RPC to flush the changes will
> almost certainly be later than the cached timestamps on the client that
> will eventually be set, so when the client comes back we'd end up
> violating the rollback rule.
> 
> Our only option is to freeze timestamp updates on anything that might
> represent such an operation. So far, we only do that on WRITE and COPY
> operations -- in general, operations that require an open file, since
> FMODE_NOCMTIME is attached to the file.
> 
> Some SETATTRs that only update the mtime and atime can be cached on the
> client by virtue of the fact that it's authoritative for timestamps.
> There are some exceptions though:
> 
> - atime-only updates can't be cached since the ctime won't change with
> a timestamp update if the mtime didn't change
> 
> - if you set the mtime to a time that is later than the time you got
> the delegation from the server, but earlier than the current time, you
> can't cache that. The ctime would be later than the mtime in that case,
> and we don't have a mechanism to handle that in a delegated timestamp
> SETATTR.
> 
> I don't see how you could reasonably buffer a SETXATTR or REMOVEXATTR
> operation to be sent later. These need to be done synchronously since
> they could always fail for some reason and we don't have a mechanism at
> the syscall layer to handle a deferred error. They also only update the
> ctime and not the mtime, and we have no mechanism to do that with
> delegated timestamps.
> 
> Based on that, I think the client and server both need to ignore the
> timestamp delegation on a SETXATTR or REMOVEXATTR. The server should
> update the ctime and the client needs to send a trailing GETATTR on the
> REMOVEXATTR compound in order to get it and the change attr.

One concern I have with a per-op formulation is extensibility. If
delegated timestamp behavior is defined by enumerating specific
operations, then every new operation added to the protocol creates
a fresh ambiguity until the spec is updated again. It seems better
to define the behavior in terms of operation properties - e.g., whether
the operation is synchronously visible, can be deferred/cached at
the client, and whether it affects only ctime versus mtime/atime -
so future operations can be classified without reopening the base
rule.

I.e., I can't tell if you want me to update the spec with
guidance per-op or you are just documenting what you did.

> 
> Exactly what this patch does, fwiw...
> -- 
> Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Jeff Layton 4 days, 15 hours ago
On Tue, 2026-03-31 at 11:47 -0700, Thomas Haynes wrote:
> On Tue, Mar 31, 2026 at 07:42:56AM -0800, Jeff Layton wrote:
> > On Fri, 2026-03-27 at 11:22 -0700, Thomas Haynes wrote:
> > > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > 
> > > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > 
> > > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > > 
> > > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > > 
> > > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > > attributes would update change_attr on operations which might now
> > > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > > generic/728.
> > > > > > > 
> > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > > *inode, const char *name)
> > > > > > >             &res.seq_res, 1);
> > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > >         if (!ret)
> > > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > > +               } else
> > > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > 
> > > > > > >         return ret;
> > > > > > >  }
> > > > > > > 
> > > > > > 
> > > > > > What's the advantage of doing it this way?
> > > > > > 
> > > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > > the mtime there. The server has the most up-to-date version of the
> > > > > > mtime and ctime at that point.
> > > > > 
> > > > > In presence of delegated attributes, Is the server required to update
> > > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > > 
> > > > > Is possible that
> > > > > some implementations might be different and also do not update the
> > > > > ctime/mtime on REMOVEXATTR?
> > > > > 
> > > > > Therefore I was suggesting that the patch
> > > > > relies on the fact that it would receive an updated value. Of course
> > > > > perhaps all implementations are done the same as the linux server and
> > > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > > what the server supposed to do (and client rely on).
> > > > > 
> > > > 
> > > > (cc'ing Tom)
> > > > 
> > > > That is a very good point.
> > > > 
> > > > My interpretation was that delegated timestamps generally covered
> > > > writes, but SETATTR style operations that do anything beyond only
> > > > changing the mtime can't be cached.
> > > > 
> > > > We probably need some delstid spec clarification: for what operations
> > > > is the server required to disable timestamp updates when a write
> > > > delegation is outstanding?
> > > > 
> > > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > > 
> > > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > > for writes when there is an outstanding timestamp delegation like we're
> > > > doing in nfsd? If so, does it do the same for
> > > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > > 
> > > Jeff,
> > > 
> > > I think the right way to look at this is closer to how size is
> > > handled under delegation in RFC8881, rather than as a per-op rule.
> > > 
> > > In our implementation, because we are acting as an MDS and data I/O
> > > goes to DSes, we already treat size as effectively delegated when
> > > a write layout is outstanding. The MDS does not maintain authoritative
> > > size locally in that case. We may refresh size/timestamps internally
> > > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > > overriding the delegated authority.
> > > 
> > > For timestamps, our behavior is effectively the same model. When
> > > the client holds the relevant delegation, the server does not
> > > consider itself authoritative for ctime/mtime. If current values
> > > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > > and the client must present the delegation stateid to demonstrate
> > > that authority. So the authority follows the delegation, not the
> > > specific operation.
> > > 
> > > That said, I don’t think we’ve fully resolved the semantics for all
> > > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > > been relying on assumptions rather than a fully consistent rule.
> > > I.e., CLONE and COPY we just pass through to the DS and we don't
> > > implement SETXATTR/REMOVEXATTR.
> > > 
> > > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > > any particular op) should update ctime/mtime, but whether delegated
> > > timestamps are meant to follow the same attribute-authority model
> > > as delegated size in RFC8881. If so, then we expect that the server
> > > should query the client via CB_GETATTR to return updated ctime/mtime
> > > after such operations while the delegation is outstanding.
> > > 
> > 
> > The dilemma we have is: because we _do_ allow local processes to stat()
> > files that have an outstanding write delegation, we can never allow the
> > ctime in particular to roll backward (modulo clock jumps).
> 
> I agree we do not want visible ctime rollback, but I do not see how
> that can be guaranteed from delegated timestamps alone when the
> authoritative timestamp may be generated on a different node with
> a different clock and the object may change during the CB_GETATTR
> window. That seems to require either monotonic clamping of exposed
> ctime, or treating change_attr rather than ctime as the real
> serialization signal.
> 
> > 
> > If we're dealing with changes that have been cached in the client and
> > are being lazily flushed out, then we can't update the timestamp when
> > that operation occurs. The time of the RPC to flush the changes will
> > almost certainly be later than the cached timestamps on the client that
> > will eventually be set, so when the client comes back we'd end up
> > violating the rollback rule.
> > 
> > Our only option is to freeze timestamp updates on anything that might
> > represent such an operation. So far, we only do that on WRITE and COPY
> > operations -- in general, operations that require an open file, since
> > FMODE_NOCMTIME is attached to the file.
> > 
> > Some SETATTRs that only update the mtime and atime can be cached on the
> > client by virtue of the fact that it's authoritative for timestamps.
> > There are some exceptions though:
> > 
> > - atime-only updates can't be cached since the ctime won't change with
> > a timestamp update if the mtime didn't change
> > 
> > - if you set the mtime to a time that is later than the time you got
> > the delegation from the server, but earlier than the current time, you
> > can't cache that. The ctime would be later than the mtime in that case,
> > and we don't have a mechanism to handle that in a delegated timestamp
> > SETATTR.
> > 
> > I don't see how you could reasonably buffer a SETXATTR or REMOVEXATTR
> > operation to be sent later. These need to be done synchronously since
> > they could always fail for some reason and we don't have a mechanism at
> > the syscall layer to handle a deferred error. They also only update the
> > ctime and not the mtime, and we have no mechanism to do that with
> > delegated timestamps.
> > 
> > Based on that, I think the client and server both need to ignore the
> > timestamp delegation on a SETXATTR or REMOVEXATTR. The server should
> > update the ctime and the client needs to send a trailing GETATTR on the
> > REMOVEXATTR compound in order to get it and the change attr.
> 
> One concern I have with a per-op formulation is extensibility. If
> delegated timestamp behavior is defined by enumerating specific
> operations, then every new operation added to the protocol creates
> a fresh ambiguity until the spec is updated again. It seems better
> to define the behavior in terms of operation properties - e.g., whether
> the operation is synchronously visible, can be deferred/cached at
> the client, and whether it affects only ctime versus mtime/atime -
> so future operations can be classified without reopening the base
> rule.
> 
> I.e., I can't tell if you want me to update the spec with
> guidance per-op or you are just documenting what you did.
> 
> 

I think we probably need some guidance in the spec, and I think that
guidance comes down to: operations that don't have a way to report a
delayed error condition can't be buffered on the client and must
continue to be done synchronously even if a delegation is held.

By way of example: if I do a write() on the client I can buffer that
because userland can eventually do an fsync() to see if it succeeded.
This is not true for syscalls like setxattr() or removexattr(), or most
syscalls that result in a SETATTR operation (chmod(), chown(), etc).

They must be done synchronously because:

1/ there's no way to update only the ctime in a delegated timestamp
update

...and...

2/ these syscalls can fail, so we can't return from them until we know
the outcome.

How best to phrase this guidance, I'm not sure...
-- 
Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Thomas Haynes 4 days, 15 hours ago
On Tue, Mar 31, 2026 at 05:19:33PM -0800, Jeff Layton wrote:
> On Tue, 2026-03-31 at 11:47 -0700, Thomas Haynes wrote:
> > On Tue, Mar 31, 2026 at 07:42:56AM -0800, Jeff Layton wrote:
> > > On Fri, 2026-03-27 at 11:22 -0700, Thomas Haynes wrote:
> > > > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > 
> > > > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > > 
> > > > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > > > 
> > > > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > > > 
> > > > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > > > attributes would update change_attr on operations which might now
> > > > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > > > generic/728.
> > > > > > > > 
> > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > > > *inode, const char *name)
> > > > > > > >             &res.seq_res, 1);
> > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > >         if (!ret)
> > > > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > > > +               } else
> > > > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > 
> > > > > > > >         return ret;
> > > > > > > >  }
> > > > > > > > 
> > > > > > > 
> > > > > > > What's the advantage of doing it this way?
> > > > > > > 
> > > > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > > > the mtime there. The server has the most up-to-date version of the
> > > > > > > mtime and ctime at that point.
> > > > > > 
> > > > > > In presence of delegated attributes, Is the server required to update
> > > > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > > > 
> > > > > > Is possible that
> > > > > > some implementations might be different and also do not update the
> > > > > > ctime/mtime on REMOVEXATTR?
> > > > > > 
> > > > > > Therefore I was suggesting that the patch
> > > > > > relies on the fact that it would receive an updated value. Of course
> > > > > > perhaps all implementations are done the same as the linux server and
> > > > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > > > what the server supposed to do (and client rely on).
> > > > > > 
> > > > > 
> > > > > (cc'ing Tom)
> > > > > 
> > > > > That is a very good point.
> > > > > 
> > > > > My interpretation was that delegated timestamps generally covered
> > > > > writes, but SETATTR style operations that do anything beyond only
> > > > > changing the mtime can't be cached.
> > > > > 
> > > > > We probably need some delstid spec clarification: for what operations
> > > > > is the server required to disable timestamp updates when a write
> > > > > delegation is outstanding?
> > > > > 
> > > > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > > > 
> > > > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > > > for writes when there is an outstanding timestamp delegation like we're
> > > > > doing in nfsd? If so, does it do the same for
> > > > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > > > 
> > > > Jeff,
> > > > 
> > > > I think the right way to look at this is closer to how size is
> > > > handled under delegation in RFC8881, rather than as a per-op rule.
> > > > 
> > > > In our implementation, because we are acting as an MDS and data I/O
> > > > goes to DSes, we already treat size as effectively delegated when
> > > > a write layout is outstanding. The MDS does not maintain authoritative
> > > > size locally in that case. We may refresh size/timestamps internally
> > > > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > > > overriding the delegated authority.
> > > > 
> > > > For timestamps, our behavior is effectively the same model. When
> > > > the client holds the relevant delegation, the server does not
> > > > consider itself authoritative for ctime/mtime. If current values
> > > > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > > > and the client must present the delegation stateid to demonstrate
> > > > that authority. So the authority follows the delegation, not the
> > > > specific operation.
> > > > 
> > > > That said, I don’t think we’ve fully resolved the semantics for all
> > > > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > > > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > > > been relying on assumptions rather than a fully consistent rule.
> > > > I.e., CLONE and COPY we just pass through to the DS and we don't
> > > > implement SETXATTR/REMOVEXATTR.
> > > > 
> > > > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > > > any particular op) should update ctime/mtime, but whether delegated
> > > > timestamps are meant to follow the same attribute-authority model
> > > > as delegated size in RFC8881. If so, then we expect that the server
> > > > should query the client via CB_GETATTR to return updated ctime/mtime
> > > > after such operations while the delegation is outstanding.
> > > > 
> > > 
> > > The dilemma we have is: because we _do_ allow local processes to stat()
> > > files that have an outstanding write delegation, we can never allow the
> > > ctime in particular to roll backward (modulo clock jumps).
> > 
> > I agree we do not want visible ctime rollback, but I do not see how
> > that can be guaranteed from delegated timestamps alone when the
> > authoritative timestamp may be generated on a different node with
> > a different clock and the object may change during the CB_GETATTR
> > window. That seems to require either monotonic clamping of exposed
> > ctime, or treating change_attr rather than ctime as the real
> > serialization signal.
> > 
> > > 
> > > If we're dealing with changes that have been cached in the client and
> > > are being lazily flushed out, then we can't update the timestamp when
> > > that operation occurs. The time of the RPC to flush the changes will
> > > almost certainly be later than the cached timestamps on the client that
> > > will eventually be set, so when the client comes back we'd end up
> > > violating the rollback rule.
> > > 
> > > Our only option is to freeze timestamp updates on anything that might
> > > represent such an operation. So far, we only do that on WRITE and COPY
> > > operations -- in general, operations that require an open file, since
> > > FMODE_NOCMTIME is attached to the file.
> > > 
> > > Some SETATTRs that only update the mtime and atime can be cached on the
> > > client by virtue of the fact that it's authoritative for timestamps.
> > > There are some exceptions though:
> > > 
> > > - atime-only updates can't be cached since the ctime won't change with
> > > a timestamp update if the mtime didn't change
> > > 
> > > - if you set the mtime to a time that is later than the time you got
> > > the delegation from the server, but earlier than the current time, you
> > > can't cache that. The ctime would be later than the mtime in that case,
> > > and we don't have a mechanism to handle that in a delegated timestamp
> > > SETATTR.
> > > 
> > > I don't see how you could reasonably buffer a SETXATTR or REMOVEXATTR
> > > operation to be sent later. These need to be done synchronously since
> > > they could always fail for some reason and we don't have a mechanism at
> > > the syscall layer to handle a deferred error. They also only update the
> > > ctime and not the mtime, and we have no mechanism to do that with
> > > delegated timestamps.
> > > 
> > > Based on that, I think the client and server both need to ignore the
> > > timestamp delegation on a SETXATTR or REMOVEXATTR. The server should
> > > update the ctime and the client needs to send a trailing GETATTR on the
> > > REMOVEXATTR compound in order to get it and the change attr.
> > 
> > One concern I have with a per-op formulation is extensibility. If
> > delegated timestamp behavior is defined by enumerating specific
> > operations, then every new operation added to the protocol creates
> > a fresh ambiguity until the spec is updated again. It seems better
> > to define the behavior in terms of operation properties - e.g., whether
> > the operation is synchronously visible, can be deferred/cached at
> > the client, and whether it affects only ctime versus mtime/atime -
> > so future operations can be classified without reopening the base
> > rule.
> > 
> > I.e., I can't tell if you want me to update the spec with
> > guidance per-op or you are just documenting what you did.
> > 
> > 
> 
> I think we probably need some guidance in the spec, and I think that
> guidance comes down to: operations that don't have a way to report a
> delayed error condition can't be buffered on the client and must
> continue to be done synchronously even if a delegation is held.
> 
> By way of example: if I do a write() on the client I can buffer that
> because userland can eventually do an fsync() to see if it succeeded.
> This is not true for syscalls like setxattr() or removexattr(), or most
> syscalls that result in a SETATTR operation (chmod(), chown(), etc).
> 
> They must be done synchronously because:
> 
> 1/ there's no way to update only the ctime in a delegated timestamp
> update
> 
> ...and...
> 
> 2/ these syscalls can fail, so we can't return from them until we know
> the outcome.
> 
> How best to phrase this guidance, I'm not sure...
> -- 
> Jeff Layton <jlayton@kernel.org>

One concern I have with the "ops that can report delayed error"
formulation is that, in practice, any NFSv4 operation may encounter
a temporary condition and return NFS4ERR_DELAY at execution time.
Even a simple operation like GETFH may need to allocate reply
resources and fail transiently. As such, the ability to convey delay
is not a distinguishing property of a particular operation.

I think the relevant distinction is instead whether the client may
complete the syscall locally and defer authoritative execution
and/or result until a later point without violating its semantics.
Some operations fit that model, but others do not.

Even WRITE is not inherently in the "may be deferred" category;
that depends on the data path. For example, in a pNFS configuration
where WRITE is routed synchronously to an MDS, completion semantics
differ from a buffered writeback model. This suggests that the
classification should not be tied to specific opcodes, but to the
completion semantics of the operation in the context in which it
is executed.

Given that, it may be more useful for the spec to describe delegated
timestamp behavior in terms of these semantics:

   o Operations for which the client may lawfully acknowledge
   completion before authoritative server execution has completed
   may use delegated timestamp authority, subject to the constraints
   of that deferred-completion model.

   o Operations that require authoritative execution status before
   completion of the user-visible request must be executed synchronously
   and must not rely on deferred timestamp resolution.

This avoids per-op enumeration and allows future operations to be
classified based on their semantics rather than requiring explicit
specification updates.
---
Tom Haynes <loghyr@gmail.com>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Jeff Layton 4 days, 14 hours ago
On Tue, 2026-03-31 at 15:16 -0700, Thomas Haynes wrote:
> On Tue, Mar 31, 2026 at 05:19:33PM -0800, Jeff Layton wrote:
> > On Tue, 2026-03-31 at 11:47 -0700, Thomas Haynes wrote:
> > > On Tue, Mar 31, 2026 at 07:42:56AM -0800, Jeff Layton wrote:
> > > > On Fri, 2026-03-27 at 11:22 -0700, Thomas Haynes wrote:
> > > > > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > > > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > 
> > > > > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > > > 
> > > > > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > > > > 
> > > > > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > > > > 
> > > > > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > > > > attributes would update change_attr on operations which might now
> > > > > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > > > > generic/728.
> > > > > > > > > 
> > > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > > > > *inode, const char *name)
> > > > > > > > >             &res.seq_res, 1);
> > > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > > >         if (!ret)
> > > > > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > > > > +               } else
> > > > > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > > 
> > > > > > > > >         return ret;
> > > > > > > > >  }
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > What's the advantage of doing it this way?
> > > > > > > > 
> > > > > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > > > > the mtime there. The server has the most up-to-date version of the
> > > > > > > > mtime and ctime at that point.
> > > > > > > 
> > > > > > > In presence of delegated attributes, Is the server required to update
> > > > > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > > > > 
> > > > > > > Is possible that
> > > > > > > some implementations might be different and also do not update the
> > > > > > > ctime/mtime on REMOVEXATTR?
> > > > > > > 
> > > > > > > Therefore I was suggesting that the patch
> > > > > > > relies on the fact that it would receive an updated value. Of course
> > > > > > > perhaps all implementations are done the same as the linux server and
> > > > > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > > > > what the server supposed to do (and client rely on).
> > > > > > > 
> > > > > > 
> > > > > > (cc'ing Tom)
> > > > > > 
> > > > > > That is a very good point.
> > > > > > 
> > > > > > My interpretation was that delegated timestamps generally covered
> > > > > > writes, but SETATTR style operations that do anything beyond only
> > > > > > changing the mtime can't be cached.
> > > > > > 
> > > > > > We probably need some delstid spec clarification: for what operations
> > > > > > is the server required to disable timestamp updates when a write
> > > > > > delegation is outstanding?
> > > > > > 
> > > > > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > > > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > > > > 
> > > > > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > > > > for writes when there is an outstanding timestamp delegation like we're
> > > > > > doing in nfsd? If so, does it do the same for
> > > > > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > > > > 
> > > > > Jeff,
> > > > > 
> > > > > I think the right way to look at this is closer to how size is
> > > > > handled under delegation in RFC8881, rather than as a per-op rule.
> > > > > 
> > > > > In our implementation, because we are acting as an MDS and data I/O
> > > > > goes to DSes, we already treat size as effectively delegated when
> > > > > a write layout is outstanding. The MDS does not maintain authoritative
> > > > > size locally in that case. We may refresh size/timestamps internally
> > > > > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > > > > overriding the delegated authority.
> > > > > 
> > > > > For timestamps, our behavior is effectively the same model. When
> > > > > the client holds the relevant delegation, the server does not
> > > > > consider itself authoritative for ctime/mtime. If current values
> > > > > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > > > > and the client must present the delegation stateid to demonstrate
> > > > > that authority. So the authority follows the delegation, not the
> > > > > specific operation.
> > > > > 
> > > > > That said, I don’t think we’ve fully resolved the semantics for all
> > > > > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > > > > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > > > > been relying on assumptions rather than a fully consistent rule.
> > > > > I.e., CLONE and COPY we just pass through to the DS and we don't
> > > > > implement SETXATTR/REMOVEXATTR.
> > > > > 
> > > > > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > > > > any particular op) should update ctime/mtime, but whether delegated
> > > > > timestamps are meant to follow the same attribute-authority model
> > > > > as delegated size in RFC8881. If so, then we expect that the server
> > > > > should query the client via CB_GETATTR to return updated ctime/mtime
> > > > > after such operations while the delegation is outstanding.
> > > > > 
> > > > 
> > > > The dilemma we have is: because we _do_ allow local processes to stat()
> > > > files that have an outstanding write delegation, we can never allow the
> > > > ctime in particular to roll backward (modulo clock jumps).
> > > 
> > > I agree we do not want visible ctime rollback, but I do not see how
> > > that can be guaranteed from delegated timestamps alone when the
> > > authoritative timestamp may be generated on a different node with
> > > a different clock and the object may change during the CB_GETATTR
> > > window. That seems to require either monotonic clamping of exposed
> > > ctime, or treating change_attr rather than ctime as the real
> > > serialization signal.
> > > 

We basically disable c/mtime updates on the delegation when it's handed
out and record the current time (A). We then clamp the delegated
timestamp update to between A and "now" when the SETATTR is done.
That's enough to ensure that nothing rolls backward but we can still
respect the client's update assuming the clocks are close enough.

> > > > 
> > > > If we're dealing with changes that have been cached in the client and
> > > > are being lazily flushed out, then we can't update the timestamp when
> > > > that operation occurs. The time of the RPC to flush the changes will
> > > > almost certainly be later than the cached timestamps on the client that
> > > > will eventually be set, so when the client comes back we'd end up
> > > > violating the rollback rule.
> > > > 
> > > > Our only option is to freeze timestamp updates on anything that might
> > > > represent such an operation. So far, we only do that on WRITE and COPY
> > > > operations -- in general, operations that require an open file, since
> > > > FMODE_NOCMTIME is attached to the file.
> > > > 
> > > > Some SETATTRs that only update the mtime and atime can be cached on the
> > > > client by virtue of the fact that it's authoritative for timestamps.
> > > > There are some exceptions though:
> > > > 
> > > > - atime-only updates can't be cached since the ctime won't change with
> > > > a timestamp update if the mtime didn't change
> > > > 
> > > > - if you set the mtime to a time that is later than the time you got
> > > > the delegation from the server, but earlier than the current time, you
> > > > can't cache that. The ctime would be later than the mtime in that case,
> > > > and we don't have a mechanism to handle that in a delegated timestamp
> > > > SETATTR.
> > > > 
> > > > I don't see how you could reasonably buffer a SETXATTR or REMOVEXATTR
> > > > operation to be sent later. These need to be done synchronously since
> > > > they could always fail for some reason and we don't have a mechanism at
> > > > the syscall layer to handle a deferred error. They also only update the
> > > > ctime and not the mtime, and we have no mechanism to do that with
> > > > delegated timestamps.
> > > > 
> > > > Based on that, I think the client and server both need to ignore the
> > > > timestamp delegation on a SETXATTR or REMOVEXATTR. The server should
> > > > update the ctime and the client needs to send a trailing GETATTR on the
> > > > REMOVEXATTR compound in order to get it and the change attr.
> > > 
> > > One concern I have with a per-op formulation is extensibility. If
> > > delegated timestamp behavior is defined by enumerating specific
> > > operations, then every new operation added to the protocol creates
> > > a fresh ambiguity until the spec is updated again. It seems better
> > > to define the behavior in terms of operation properties - e.g., whether
> > > the operation is synchronously visible, can be deferred/cached at
> > > the client, and whether it affects only ctime versus mtime/atime -
> > > so future operations can be classified without reopening the base
> > > rule.
> > > 
> > > I.e., I can't tell if you want me to update the spec with
> > > guidance per-op or you are just documenting what you did.
> > > 
> > > 
> > 
> > I think we probably need some guidance in the spec, and I think that
> > guidance comes down to: operations that don't have a way to report a
> > delayed error condition can't be buffered on the client and must
> > continue to be done synchronously even if a delegation is held.
> > 
> > By way of example: if I do a write() on the client I can buffer that
> > because userland can eventually do an fsync() to see if it succeeded.
> > This is not true for syscalls like setxattr() or removexattr(), or most
> > syscalls that result in a SETATTR operation (chmod(), chown(), etc).
> > 
> > They must be done synchronously because:
> > 
> > 1/ there's no way to update only the ctime in a delegated timestamp
> > update
> > 
> > ...and...
> > 
> > 2/ these syscalls can fail, so we can't return from them until we know
> > the outcome.
> > 
> > How best to phrase this guidance, I'm not sure...
> > -- 
> > Jeff Layton <jlayton@kernel.org>
> 
> One concern I have with the "ops that can report delayed error"
> formulation is that, in practice, any NFSv4 operation may encounter
> a temporary condition and return NFS4ERR_DELAY at execution time.
> Even a simple operation like GETFH may need to allocate reply
> resources and fail transiently. As such, the ability to convey delay
> is not a distinguishing property of a particular operation.
> 
> I think the relevant distinction is instead whether the client may
> complete the syscall locally and defer authoritative execution
> and/or result until a later point without violating its semantics.
> Some operations fit that model, but others do not.
> 
> Even WRITE is not inherently in the "may be deferred" category;
> that depends on the data path. For example, in a pNFS configuration
> where WRITE is routed synchronously to an MDS, completion semantics
> differ from a buffered writeback model. This suggests that the
> classification should not be tied to specific opcodes, but to the
> completion semantics of the operation in the context in which it
> is executed.
> 

Yeah, I phrased that poorly. I didn't mean something that would return
NFS4ERR_DELAY, but rather operations like unstable WRITEs and
asynchronous COPY operations (and anything else that falls into that
model) that can fail after the server has returned from the initial
operation.

> Given that, it may be more useful for the spec to describe delegated
> timestamp behavior in terms of these semantics:
> 
>    o Operations for which the client may lawfully acknowledge
>    completion before authoritative server execution has completed
>    may use delegated timestamp authority, subject to the constraints
>    of that deferred-completion model.
> 
>    o Operations that require authoritative execution status before
>    completion of the user-visible request must be executed synchronously
>    and must not rely on deferred timestamp resolution.
> 
> This avoids per-op enumeration and allows future operations to be
> classified based on their semantics rather than requiring explicit
> specification updates.

Ack. Sounds good to me.
-- 
Jeff Layton <jlayton@kernel.org>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Olga Kornievskaia 1 week, 1 day ago
On Fri, Mar 27, 2026 at 2:22 PM Thomas Haynes <loghyr@gmail.com> wrote:
>
> On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > >
> > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > >
> > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > it relies on the cached ctime, which is wrong.
> > > > > >
> > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > >
> > > > > This approach relies on the fact that the server the serves delegated
> > > > > attributes would update change_attr on operations which might now
> > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > generic/728.
> > > > >
> > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > --- a/fs/nfs/nfs42proc.c
> > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > *inode, const char *name)
> > > > >             &res.seq_res, 1);
> > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > >         if (!ret)
> > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > +                       spin_lock(&inode->i_lock);
> > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > +                       spin_unlock(&inode->i_lock);
> > > > > +               } else
> > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > >
> > > > >         return ret;
> > > > >  }
> > > > >
> > > >
> > > > What's the advantage of doing it this way?
> > > >
> > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > the mtime there. The server has the most up-to-date version of the
> > > > mtime and ctime at that point.
> > >
> > > In presence of delegated attributes, Is the server required to update
> > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > >
> > > Is possible that
> > > some implementations might be different and also do not update the
> > > ctime/mtime on REMOVEXATTR?
> > >
> > > Therefore I was suggesting that the patch
> > > relies on the fact that it would receive an updated value. Of course
> > > perhaps all implementations are done the same as the linux server and
> > > my point is moot. I didn't see anything in the spec that clarifies
> > > what the server supposed to do (and client rely on).
> > >
> >
> > (cc'ing Tom)
> >
> > That is a very good point.
> >
> > My interpretation was that delegated timestamps generally covered
> > writes, but SETATTR style operations that do anything beyond only
> > changing the mtime can't be cached.
> >
> > We probably need some delstid spec clarification: for what operations
> > is the server required to disable timestamp updates when a write
> > delegation is outstanding?
> >
> > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > but not SETATTR/SETXATTR/REMOVEXATTR.
> >
> > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > for writes when there is an outstanding timestamp delegation like we're
> > doing in nfsd? If so, does it do the same for
> > SETATTR/SETXATTR/REMOVEXATTR operations as well?
>
> Jeff,
>
> I think the right way to look at this is closer to how size is
> handled under delegation in RFC8881, rather than as a per-op rule.
>
> In our implementation, because we are acting as an MDS and data I/O
> goes to DSes, we already treat size as effectively delegated when
> a write layout is outstanding. The MDS does not maintain authoritative
> size locally in that case. We may refresh size/timestamps internally
> (e.g., on GETATTR by querying DSes), but we don’t treat that as
> overriding the delegated authority.
>
> For timestamps, our behavior is effectively the same model. When
> the client holds the relevant delegation, the server does not
> consider itself authoritative for ctime/mtime. If current values
> are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> and the client must present the delegation stateid to demonstrate
> that authority. So the authority follows the delegation, not the
> specific operation.

What happens when the client holding the attribute delegation (it's
the authority) is doing the query? Is it the server's responsibility
to query the client before replying. Example, client sends a CLONE
operation which has a GETATTR attached. (1) is the server supposed to
issue a CB_GETATTR before replying to the compound? (2) is the client
not supposed to send any GETATTRs while holding an attribute
delegation? CLONE is a modifying operation but client hasn't done any
actual modifications to the opened file so a CB_GETATTR would return
that file hasn't been modified. Is the server then not going to
express that the file has indeed been modified when replying to
GETATTR?

> That said, I don’t think we’ve fully resolved the semantics for all
> metadata-style ops either. WRITE and SETATTR are clear in our model,
> but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> been relying on assumptions rather than a fully consistent rule.
> I.e., CLONE and COPY we just pass through to the DS and we don't
> implement SETXATTR/REMOVEXATTR.
>
> So the spec question, as I see it, is not whether REMOVEXATTR (or
> any particular op) should update ctime/mtime, but whether delegated
> timestamps are meant to follow the same attribute-authority model
> as delegated size in RFC8881. If so, then we expect that the server
> should query the client via CB_GETATTR to return updated ctime/mtime
> after such operations while the delegation is outstanding.
>
> Thanks,
> Tom
>
>
> >
> >
> >
> > > > It's certainly possible that the REMOVEXATTR is the only change that
> > > > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > > > at all if nothing else changed. With your version, you would.
> > > >
> > > > > >
> > > > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > ---
> > > > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > > > >  include/linux/nfs_xdr.h |  3 +++
> > > > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > > >
> > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > >  {
> > > > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > > > >         struct nfs42_removexattrargs args = {
> > > > > >                 .fh = NFS_FH(inode),
> > > > > > +               .bitmask = bitmask,
> > > > > >                 .xattr_name = name,
> > > > > >         };
> > > > > > -       struct nfs42_removexattrres res;
> > > > > > +       struct nfs42_removexattrres res = {
> > > > > > +               .server = server,
> > > > > > +       };
> > > > > >         struct rpc_message msg = {
> > > > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > > > >                 .rpc_argp = &args,
> > > > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > >         int ret;
> > > > > >         unsigned long timestamp = jiffies;
> > > > > >
> > > > > > +       res.fattr = nfs_alloc_fattr();
> > > > > > +       if (!res.fattr)
> > > > > > +               return -ENOMEM;
> > > > > > +
> > > > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > > > +
> > > > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > > > >             &res.seq_res, 1);
> > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > -       if (!ret)
> > > > > > +       if (!ret) {
> > > > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > > > +       }
> > > > > >
> > > > > > +       kfree(res.fattr);
> > > > > >         return ret;
> > > > > >  }
> > > > > >
> > > > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > > > --- a/fs/nfs/nfs42xdr.c
> > > > > > +++ b/fs/nfs/nfs42xdr.c
> > > > > > @@ -263,11 +263,13 @@
> > > > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > > > >                                          encode_sequence_maxsz + \
> > > > > >                                          encode_putfh_maxsz + \
> > > > > > -                                        encode_removexattr_maxsz)
> > > > > > +                                        encode_removexattr_maxsz + \
> > > > > > +                                        encode_getattr_maxsz)
> > > > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > > > >                                          decode_sequence_maxsz + \
> > > > > >                                          decode_putfh_maxsz + \
> > > > > > -                                        decode_removexattr_maxsz)
> > > > > > +                                        decode_removexattr_maxsz + \
> > > > > > +                                        decode_getattr_maxsz)
> > > > > >
> > > > > >  /*
> > > > > >   * These values specify the maximum amount of data that is not
> > > > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > > > >         encode_putfh(xdr, args->fh, &hdr);
> > > > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > > > >         encode_nops(&hdr);
> > > > > >  }
> > > > > >
> > > > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > > > >                 goto out;
> > > > > >
> > > > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > > > +       if (status)
> > > > > > +               goto out;
> > > > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > > > >  out:
> > > > > >         return status;
> > > > > >  }
> > > > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > > > --- a/include/linux/nfs_xdr.h
> > > > > > +++ b/include/linux/nfs_xdr.h
> > > > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > > > >  struct nfs42_removexattrargs {
> > > > > >         struct nfs4_sequence_args       seq_args;
> > > > > >         struct nfs_fh                   *fh;
> > > > > > +       const u32                       *bitmask;
> > > > > >         const char                      *xattr_name;
> > > > > >  };
> > > > > >
> > > > > >  struct nfs42_removexattrres {
> > > > > >         struct nfs4_sequence_res        seq_res;
> > > > > >         struct nfs4_change_info         cinfo;
> > > > > > +       struct nfs_fattr                *fattr;
> > > > > > +       const struct nfs_server         *server;
> > > > > >  };
> > > > > >
> > > > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > > >
> > > > > > --
> > > > > > 2.53.0
> > > > > >
> > > >
> > > > --
> > > > Jeff Layton <jlayton@kernel.org>
> >
> > --
> > Jeff Layton <jlayton@kernel.org>
>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Thomas Haynes 1 week, 1 day ago
On Fri, Mar 27, 2026 at 03:36:12PM -0800, Olga Kornievskaia wrote:
> On Fri, Mar 27, 2026 at 2:22 PM Thomas Haynes <loghyr@gmail.com> wrote:
> >
> > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > >
> > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > >
> > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > >
> > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > >
> > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > attributes would update change_attr on operations which might now
> > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > generic/728.
> > > > > >
> > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > *inode, const char *name)
> > > > > >             &res.seq_res, 1);
> > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > >         if (!ret)
> > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > +               } else
> > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > >
> > > > > >         return ret;
> > > > > >  }
> > > > > >
> > > > >
> > > > > What's the advantage of doing it this way?
> > > > >
> > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > the mtime there. The server has the most up-to-date version of the
> > > > > mtime and ctime at that point.
> > > >
> > > > In presence of delegated attributes, Is the server required to update
> > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > >
> > > > Is possible that
> > > > some implementations might be different and also do not update the
> > > > ctime/mtime on REMOVEXATTR?
> > > >
> > > > Therefore I was suggesting that the patch
> > > > relies on the fact that it would receive an updated value. Of course
> > > > perhaps all implementations are done the same as the linux server and
> > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > what the server supposed to do (and client rely on).
> > > >
> > >
> > > (cc'ing Tom)
> > >
> > > That is a very good point.
> > >
> > > My interpretation was that delegated timestamps generally covered
> > > writes, but SETATTR style operations that do anything beyond only
> > > changing the mtime can't be cached.
> > >
> > > We probably need some delstid spec clarification: for what operations
> > > is the server required to disable timestamp updates when a write
> > > delegation is outstanding?
> > >
> > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > >
> > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > for writes when there is an outstanding timestamp delegation like we're
> > > doing in nfsd? If so, does it do the same for
> > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> >
> > Jeff,
> >
> > I think the right way to look at this is closer to how size is
> > handled under delegation in RFC8881, rather than as a per-op rule.
> >
> > In our implementation, because we are acting as an MDS and data I/O
> > goes to DSes, we already treat size as effectively delegated when
> > a write layout is outstanding. The MDS does not maintain authoritative
> > size locally in that case. We may refresh size/timestamps internally
> > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > overriding the delegated authority.
> >
> > For timestamps, our behavior is effectively the same model. When
> > the client holds the relevant delegation, the server does not
> > consider itself authoritative for ctime/mtime. If current values
> > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > and the client must present the delegation stateid to demonstrate
> > that authority. So the authority follows the delegation, not the
> > specific operation.
> 
> What happens when the client holding the attribute delegation (it's
> the authority) is doing the query? Is it the server's responsibility
> to query the client before replying.

The Hammerspace server on a getattr checks to see if there is
a delegated stateid (and whether it is the client making the request
or not). If it is and the GETATTR asks for FATTR4_SIZE, FATTR4_TIME_MODIFY,
or FATTR4_TIME_METADATA, then it will send a CB_GETATTR before
responding to the client.

While it does not do so, if the client sent in a SETATTR in the
same compound we could short circuit that. Think a sort of WCC.

> Example, client sends a CLONE
> operation which has a GETATTR attached. (1) is the server supposed to
> issue a CB_GETATTR before replying to the compound? (2) is the client
> not supposed to send any GETATTRs while holding an attribute
> delegation? CLONE is a modifying operation but client hasn't done any
> actual modifications to the opened file so a CB_GETATTR would return
> that file hasn't been modified. Is the server then not going to
> express that the file has indeed been modified when replying to
> GETATTR?

The server could argue that the client wants to know what it thinks.
But that isn't the argeement. The server has to query those values
before sending them on.

> 
> > That said, I don’t think we’ve fully resolved the semantics for all
> > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > been relying on assumptions rather than a fully consistent rule.
> > I.e., CLONE and COPY we just pass through to the DS and we don't
> > implement SETXATTR/REMOVEXATTR.
> >
> > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > any particular op) should update ctime/mtime, but whether delegated
> > timestamps are meant to follow the same attribute-authority model
> > as delegated size in RFC8881. If so, then we expect that the server
> > should query the client via CB_GETATTR to return updated ctime/mtime
> > after such operations while the delegation is outstanding.
> >
> > Thanks,
> > Tom
> >
> >
> > >
> > >
> > >
> > > > > It's certainly possible that the REMOVEXATTR is the only change that
> > > > > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > > > > at all if nothing else changed. With your version, you would.
> > > > >
> > > > > > >
> > > > > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > > ---
> > > > > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > > > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > > > > >  include/linux/nfs_xdr.h |  3 +++
> > > > > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > > > >
> > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > > > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > >  {
> > > > > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > > > > >         struct nfs42_removexattrargs args = {
> > > > > > >                 .fh = NFS_FH(inode),
> > > > > > > +               .bitmask = bitmask,
> > > > > > >                 .xattr_name = name,
> > > > > > >         };
> > > > > > > -       struct nfs42_removexattrres res;
> > > > > > > +       struct nfs42_removexattrres res = {
> > > > > > > +               .server = server,
> > > > > > > +       };
> > > > > > >         struct rpc_message msg = {
> > > > > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > > > > >                 .rpc_argp = &args,
> > > > > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > >         int ret;
> > > > > > >         unsigned long timestamp = jiffies;
> > > > > > >
> > > > > > > +       res.fattr = nfs_alloc_fattr();
> > > > > > > +       if (!res.fattr)
> > > > > > > +               return -ENOMEM;
> > > > > > > +
> > > > > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > > > > +
> > > > > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > > > > >             &res.seq_res, 1);
> > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > -       if (!ret)
> > > > > > > +       if (!ret) {
> > > > > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > > > > +       }
> > > > > > >
> > > > > > > +       kfree(res.fattr);
> > > > > > >         return ret;
> > > > > > >  }
> > > > > > >
> > > > > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > > > > --- a/fs/nfs/nfs42xdr.c
> > > > > > > +++ b/fs/nfs/nfs42xdr.c
> > > > > > > @@ -263,11 +263,13 @@
> > > > > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > > > > >                                          encode_sequence_maxsz + \
> > > > > > >                                          encode_putfh_maxsz + \
> > > > > > > -                                        encode_removexattr_maxsz)
> > > > > > > +                                        encode_removexattr_maxsz + \
> > > > > > > +                                        encode_getattr_maxsz)
> > > > > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > > > > >                                          decode_sequence_maxsz + \
> > > > > > >                                          decode_putfh_maxsz + \
> > > > > > > -                                        decode_removexattr_maxsz)
> > > > > > > +                                        decode_removexattr_maxsz + \
> > > > > > > +                                        decode_getattr_maxsz)
> > > > > > >
> > > > > > >  /*
> > > > > > >   * These values specify the maximum amount of data that is not
> > > > > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > > > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > > > > >         encode_putfh(xdr, args->fh, &hdr);
> > > > > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > > > > >         encode_nops(&hdr);
> > > > > > >  }
> > > > > > >
> > > > > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > > > > >                 goto out;
> > > > > > >
> > > > > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > > > > +       if (status)
> > > > > > > +               goto out;
> > > > > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > > > > >  out:
> > > > > > >         return status;
> > > > > > >  }
> > > > > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > > > > --- a/include/linux/nfs_xdr.h
> > > > > > > +++ b/include/linux/nfs_xdr.h
> > > > > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > > > > >  struct nfs42_removexattrargs {
> > > > > > >         struct nfs4_sequence_args       seq_args;
> > > > > > >         struct nfs_fh                   *fh;
> > > > > > > +       const u32                       *bitmask;
> > > > > > >         const char                      *xattr_name;
> > > > > > >  };
> > > > > > >
> > > > > > >  struct nfs42_removexattrres {
> > > > > > >         struct nfs4_sequence_res        seq_res;
> > > > > > >         struct nfs4_change_info         cinfo;
> > > > > > > +       struct nfs_fattr                *fattr;
> > > > > > > +       const struct nfs_server         *server;
> > > > > > >  };
> > > > > > >
> > > > > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > > > >
> > > > > > > --
> > > > > > > 2.53.0
> > > > > > >
> > > > >
> > > > > --
> > > > > Jeff Layton <jlayton@kernel.org>
> > >
> > > --
> > > Jeff Layton <jlayton@kernel.org>
> >
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Olga Kornievskaia 4 days, 19 hours ago
On Fri, Mar 27, 2026 at 4:02 PM Thomas Haynes <loghyr@gmail.com> wrote:
>
> On Fri, Mar 27, 2026 at 03:36:12PM -0800, Olga Kornievskaia wrote:
> > On Fri, Mar 27, 2026 at 2:22 PM Thomas Haynes <loghyr@gmail.com> wrote:
> > >
> > > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > >
> > > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > >
> > > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > >
> > > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > >
> > > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > > attributes would update change_attr on operations which might now
> > > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > > generic/728.
> > > > > > >
> > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > > *inode, const char *name)
> > > > > > >             &res.seq_res, 1);
> > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > >         if (!ret)
> > > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > > +               } else
> > > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > >
> > > > > > >         return ret;
> > > > > > >  }
> > > > > > >
> > > > > >
> > > > > > What's the advantage of doing it this way?
> > > > > >
> > > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > > the mtime there. The server has the most up-to-date version of the
> > > > > > mtime and ctime at that point.
> > > > >
> > > > > In presence of delegated attributes, Is the server required to update
> > > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > >
> > > > > Is possible that
> > > > > some implementations might be different and also do not update the
> > > > > ctime/mtime on REMOVEXATTR?
> > > > >
> > > > > Therefore I was suggesting that the patch
> > > > > relies on the fact that it would receive an updated value. Of course
> > > > > perhaps all implementations are done the same as the linux server and
> > > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > > what the server supposed to do (and client rely on).
> > > > >
> > > >
> > > > (cc'ing Tom)
> > > >
> > > > That is a very good point.
> > > >
> > > > My interpretation was that delegated timestamps generally covered
> > > > writes, but SETATTR style operations that do anything beyond only
> > > > changing the mtime can't be cached.
> > > >
> > > > We probably need some delstid spec clarification: for what operations
> > > > is the server required to disable timestamp updates when a write
> > > > delegation is outstanding?
> > > >
> > > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > >
> > > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > > for writes when there is an outstanding timestamp delegation like we're
> > > > doing in nfsd? If so, does it do the same for
> > > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > >
> > > Jeff,
> > >
> > > I think the right way to look at this is closer to how size is
> > > handled under delegation in RFC8881, rather than as a per-op rule.
> > >
> > > In our implementation, because we are acting as an MDS and data I/O
> > > goes to DSes, we already treat size as effectively delegated when
> > > a write layout is outstanding. The MDS does not maintain authoritative
> > > size locally in that case. We may refresh size/timestamps internally
> > > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > > overriding the delegated authority.
> > >
> > > For timestamps, our behavior is effectively the same model. When
> > > the client holds the relevant delegation, the server does not
> > > consider itself authoritative for ctime/mtime. If current values
> > > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > > and the client must present the delegation stateid to demonstrate
> > > that authority. So the authority follows the delegation, not the
> > > specific operation.
> >
> > What happens when the client holding the attribute delegation (it's
> > the authority) is doing the query? Is it the server's responsibility
> > to query the client before replying.
>
> The Hammerspace server on a getattr checks to see if there is
> a delegated stateid (and whether it is the client making the request
> or not). If it is and the GETATTR asks for FATTR4_SIZE, FATTR4_TIME_MODIFY,
> or FATTR4_TIME_METADATA, then it will send a CB_GETATTR before
> responding to the client.

So... while I might not be running the latest Hammerspace bits and
something has changed but I do not see Hammerspace server sending a
CB_GETATTR when the "client is making a getattr request with a
delegated stateid". Example is the CLONE operation with an accompanied
GETATTR. The server in this case updates the change attribute and
mtime and returns different from what the client had previously in the
OPEN back to the client (this is what the test expect but I think
that's contradictory to what is said above). (To contrast nfsd server
does not update the change attribute or mtime and returns the same
value and thus making xfstest fail). So if the spec mandates that
before answering the GETATTR a CB_GETATTR needs to be sent then
neither of the server (Hammerspace nor linux) do that. But then again
I'm not sure the client is not at fault for sending a GETATTR in the
CLONE compound in the first place. For instance client does not have a
GETATTR in the COPY compound (and then fails to pass xfstest that
checks for modifies ctime/mtime). But the solution isn't clear to is
it like Jeff's REMOVEXATTR approach to add the GETATTR to the COPY
compound but that then should trigger a CB_GETATTR back to the client.
Again probably none of the servers do that...

Another point I would like to raise is: doesn't it seem
counter-intuitive to be in a situation where we have a
delegation-holding client sending a GETATTR and then a server needing
to do a CB_GETTATTR to the said client to fulfill the request.
Delegations were supposed to reduce traffic and now we are introducing
additional traffic instead. I guess I'm arguing that the client is
broken to ever query for change_attr, mtime if it's holding the
delegation. Yet the linux client (I could be wrong here) is written
such that change_attr or mtime has to always come from the server. Say
the application did a write() followed by a stat(). Client can't
satisfy stat() without reaching out to the server. Yet the server is
not the authority and before it can generate the reply for
change_attr, mtime, it needs to callback to the client (CB_GETATTR).
It seems it would have been better to never get a delegated attribute
delegation in the first place?

> While it does not do so, if the client sent in a SETATTR in the
> same compound we could short circuit that. Think a sort of WCC.
>
> > Example, client sends a CLONE
> > operation which has a GETATTR attached. (1) is the server supposed to
> > issue a CB_GETATTR before replying to the compound? (2) is the client
> > not supposed to send any GETATTRs while holding an attribute
> > delegation? CLONE is a modifying operation but client hasn't done any
> > actual modifications to the opened file so a CB_GETATTR would return
> > that file hasn't been modified. Is the server then not going to
> > express that the file has indeed been modified when replying to
> > GETATTR?
>
> The server could argue that the client wants to know what it thinks.
> But that isn't the argeement. The server has to query those values
> before sending them on.
>
> >
> > > That said, I don’t think we’ve fully resolved the semantics for all
> > > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > > been relying on assumptions rather than a fully consistent rule.
> > > I.e., CLONE and COPY we just pass through to the DS and we don't
> > > implement SETXATTR/REMOVEXATTR.
> > >
> > > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > > any particular op) should update ctime/mtime, but whether delegated
> > > timestamps are meant to follow the same attribute-authority model
> > > as delegated size in RFC8881. If so, then we expect that the server
> > > should query the client via CB_GETATTR to return updated ctime/mtime
> > > after such operations while the delegation is outstanding.
> > >
> > > Thanks,
> > > Tom
> > >
> > >
> > > >
> > > >
> > > >
> > > > > > It's certainly possible that the REMOVEXATTR is the only change that
> > > > > > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > > > > > at all if nothing else changed. With your version, you would.
> > > > > >
> > > > > > > >
> > > > > > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > > > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > > > ---
> > > > > > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > > > > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > > > > > >  include/linux/nfs_xdr.h |  3 +++
> > > > > > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > > > > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > >  {
> > > > > > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > > > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > > > > > >         struct nfs42_removexattrargs args = {
> > > > > > > >                 .fh = NFS_FH(inode),
> > > > > > > > +               .bitmask = bitmask,
> > > > > > > >                 .xattr_name = name,
> > > > > > > >         };
> > > > > > > > -       struct nfs42_removexattrres res;
> > > > > > > > +       struct nfs42_removexattrres res = {
> > > > > > > > +               .server = server,
> > > > > > > > +       };
> > > > > > > >         struct rpc_message msg = {
> > > > > > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > > > > > >                 .rpc_argp = &args,
> > > > > > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > >         int ret;
> > > > > > > >         unsigned long timestamp = jiffies;
> > > > > > > >
> > > > > > > > +       res.fattr = nfs_alloc_fattr();
> > > > > > > > +       if (!res.fattr)
> > > > > > > > +               return -ENOMEM;
> > > > > > > > +
> > > > > > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > > > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > > > > > +
> > > > > > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > > > > > >             &res.seq_res, 1);
> > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > > -       if (!ret)
> > > > > > > > +       if (!ret) {
> > > > > > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > > > > > +       }
> > > > > > > >
> > > > > > > > +       kfree(res.fattr);
> > > > > > > >         return ret;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > > > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > > > > > --- a/fs/nfs/nfs42xdr.c
> > > > > > > > +++ b/fs/nfs/nfs42xdr.c
> > > > > > > > @@ -263,11 +263,13 @@
> > > > > > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > > > > > >                                          encode_sequence_maxsz + \
> > > > > > > >                                          encode_putfh_maxsz + \
> > > > > > > > -                                        encode_removexattr_maxsz)
> > > > > > > > +                                        encode_removexattr_maxsz + \
> > > > > > > > +                                        encode_getattr_maxsz)
> > > > > > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > > > > > >                                          decode_sequence_maxsz + \
> > > > > > > >                                          decode_putfh_maxsz + \
> > > > > > > > -                                        decode_removexattr_maxsz)
> > > > > > > > +                                        decode_removexattr_maxsz + \
> > > > > > > > +                                        decode_getattr_maxsz)
> > > > > > > >
> > > > > > > >  /*
> > > > > > > >   * These values specify the maximum amount of data that is not
> > > > > > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > > > > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > > > > > >         encode_putfh(xdr, args->fh, &hdr);
> > > > > > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > > > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > > > > > >         encode_nops(&hdr);
> > > > > > > >  }
> > > > > > > >
> > > > > > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > > > > > >                 goto out;
> > > > > > > >
> > > > > > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > > > > > +       if (status)
> > > > > > > > +               goto out;
> > > > > > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > > > > > >  out:
> > > > > > > >         return status;
> > > > > > > >  }
> > > > > > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > > > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > > > > > --- a/include/linux/nfs_xdr.h
> > > > > > > > +++ b/include/linux/nfs_xdr.h
> > > > > > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > > > > > >  struct nfs42_removexattrargs {
> > > > > > > >         struct nfs4_sequence_args       seq_args;
> > > > > > > >         struct nfs_fh                   *fh;
> > > > > > > > +       const u32                       *bitmask;
> > > > > > > >         const char                      *xattr_name;
> > > > > > > >  };
> > > > > > > >
> > > > > > > >  struct nfs42_removexattrres {
> > > > > > > >         struct nfs4_sequence_res        seq_res;
> > > > > > > >         struct nfs4_change_info         cinfo;
> > > > > > > > +       struct nfs_fattr                *fattr;
> > > > > > > > +       const struct nfs_server         *server;
> > > > > > > >  };
> > > > > > > >
> > > > > > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > > > > >
> > > > > > > > --
> > > > > > > > 2.53.0
> > > > > > > >
> > > > > >
> > > > > > --
> > > > > > Jeff Layton <jlayton@kernel.org>
> > > >
> > > > --
> > > > Jeff Layton <jlayton@kernel.org>
> > >
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Rick Macklem 4 days, 14 hours ago
On Tue, Mar 31, 2026 at 11:18 AM Olga Kornievskaia <aglo@umich.edu> wrote:
>
> On Fri, Mar 27, 2026 at 4:02 PM Thomas Haynes <loghyr@gmail.com> wrote:
> >
> > On Fri, Mar 27, 2026 at 03:36:12PM -0800, Olga Kornievskaia wrote:
> > > On Fri, Mar 27, 2026 at 2:22 PM Thomas Haynes <loghyr@gmail.com> wrote:
> > > >
> > > > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > >
> > > > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > >
> > > > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > > >
> > > > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > > >
> > > > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > > > attributes would update change_attr on operations which might now
> > > > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > > > generic/728.
> > > > > > > >
> > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > > > *inode, const char *name)
> > > > > > > >             &res.seq_res, 1);
> > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > >         if (!ret)
> > > > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > > > +               } else
> > > > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > >
> > > > > > > >         return ret;
> > > > > > > >  }
> > > > > > > >
> > > > > > >
> > > > > > > What's the advantage of doing it this way?
> > > > > > >
> > > > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > > > the mtime there. The server has the most up-to-date version of the
> > > > > > > mtime and ctime at that point.
> > > > > >
> > > > > > In presence of delegated attributes, Is the server required to update
> > > > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > > >
> > > > > > Is possible that
> > > > > > some implementations might be different and also do not update the
> > > > > > ctime/mtime on REMOVEXATTR?
> > > > > >
> > > > > > Therefore I was suggesting that the patch
> > > > > > relies on the fact that it would receive an updated value. Of course
> > > > > > perhaps all implementations are done the same as the linux server and
> > > > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > > > what the server supposed to do (and client rely on).
> > > > > >
> > > > >
> > > > > (cc'ing Tom)
> > > > >
> > > > > That is a very good point.
> > > > >
> > > > > My interpretation was that delegated timestamps generally covered
> > > > > writes, but SETATTR style operations that do anything beyond only
> > > > > changing the mtime can't be cached.
> > > > >
> > > > > We probably need some delstid spec clarification: for what operations
> > > > > is the server required to disable timestamp updates when a write
> > > > > delegation is outstanding?
> > > > >
> > > > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > > >
> > > > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > > > for writes when there is an outstanding timestamp delegation like we're
> > > > > doing in nfsd? If so, does it do the same for
> > > > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > > >
> > > > Jeff,
> > > >
> > > > I think the right way to look at this is closer to how size is
> > > > handled under delegation in RFC8881, rather than as a per-op rule.
> > > >
> > > > In our implementation, because we are acting as an MDS and data I/O
> > > > goes to DSes, we already treat size as effectively delegated when
> > > > a write layout is outstanding. The MDS does not maintain authoritative
> > > > size locally in that case. We may refresh size/timestamps internally
> > > > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > > > overriding the delegated authority.
> > > >
> > > > For timestamps, our behavior is effectively the same model. When
> > > > the client holds the relevant delegation, the server does not
> > > > consider itself authoritative for ctime/mtime. If current values
> > > > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > > > and the client must present the delegation stateid to demonstrate
> > > > that authority. So the authority follows the delegation, not the
> > > > specific operation.
> > >
> > > What happens when the client holding the attribute delegation (it's
> > > the authority) is doing the query? Is it the server's responsibility
> > > to query the client before replying.
> >
> > The Hammerspace server on a getattr checks to see if there is
> > a delegated stateid (and whether it is the client making the request
> > or not). If it is and the GETATTR asks for FATTR4_SIZE, FATTR4_TIME_MODIFY,
> > or FATTR4_TIME_METADATA, then it will send a CB_GETATTR before
> > responding to the client.
I thought the requirement to do a CB_GETATTR was if the GETATTR is
from a different client than the one holding the delegation, at least for
NFSv4.1/4.2? (Basically, if a 4.1/4.2 client holds a write delegation, it
is expected to "fake" the attributes and also reply with those faked attributes
to the server in a CB_GETATTR reply, so that other clients see the "faked"
attributes. Only after a DELEGRETURN does the server have the definitive
values for these attributes. At least that's the way I understood it?)

(Admittedly, for NFSv4.0, knowing of the client doing the GETATTR was
 the same one as the client holding the delegation wasn't often the case,
 but I basically ignore NFSv4.0 now. Imho NFSv4.0 delegations were
 so messy they weren't worth implementing.)

rick
>
> So... while I might not be running the latest Hammerspace bits and
> something has changed but I do not see Hammerspace server sending a
> CB_GETATTR when the "client is making a getattr request with a
> delegated stateid". Example is the CLONE operation with an accompanied
> GETATTR. The server in this case updates the change attribute and
> mtime and returns different from what the client had previously in the
> OPEN back to the client (this is what the test expect but I think
> that's contradictory to what is said above). (To contrast nfsd server
> does not update the change attribute or mtime and returns the same
> value and thus making xfstest fail). So if the spec mandates that
> before answering the GETATTR a CB_GETATTR needs to be sent then
> neither of the server (Hammerspace nor linux) do that. But then again
> I'm not sure the client is not at fault for sending a GETATTR in the
> CLONE compound in the first place. For instance client does not have a
> GETATTR in the COPY compound (and then fails to pass xfstest that
> checks for modifies ctime/mtime). But the solution isn't clear to is
> it like Jeff's REMOVEXATTR approach to add the GETATTR to the COPY
> compound but that then should trigger a CB_GETATTR back to the client.
> Again probably none of the servers do that...
>
> Another point I would like to raise is: doesn't it seem
> counter-intuitive to be in a situation where we have a
> delegation-holding client sending a GETATTR and then a server needing
> to do a CB_GETTATTR to the said client to fulfill the request.
> Delegations were supposed to reduce traffic and now we are introducing
> additional traffic instead. I guess I'm arguing that the client is
> broken to ever query for change_attr, mtime if it's holding the
> delegation. Yet the linux client (I could be wrong here) is written
> such that change_attr or mtime has to always come from the server. Say
> the application did a write() followed by a stat(). Client can't
> satisfy stat() without reaching out to the server. Yet the server is
> not the authority and before it can generate the reply for
> change_attr, mtime, it needs to callback to the client (CB_GETATTR).
> It seems it would have been better to never get a delegated attribute
> delegation in the first place?
>
> > While it does not do so, if the client sent in a SETATTR in the
> > same compound we could short circuit that. Think a sort of WCC.
> >
> > > Example, client sends a CLONE
> > > operation which has a GETATTR attached. (1) is the server supposed to
> > > issue a CB_GETATTR before replying to the compound? (2) is the client
> > > not supposed to send any GETATTRs while holding an attribute
> > > delegation? CLONE is a modifying operation but client hasn't done any
> > > actual modifications to the opened file so a CB_GETATTR would return
> > > that file hasn't been modified. Is the server then not going to
> > > express that the file has indeed been modified when replying to
> > > GETATTR?
> >
> > The server could argue that the client wants to know what it thinks.
> > But that isn't the argeement. The server has to query those values
> > before sending them on.
> >
> > >
> > > > That said, I don’t think we’ve fully resolved the semantics for all
> > > > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > > > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > > > been relying on assumptions rather than a fully consistent rule.
> > > > I.e., CLONE and COPY we just pass through to the DS and we don't
> > > > implement SETXATTR/REMOVEXATTR.
> > > >
> > > > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > > > any particular op) should update ctime/mtime, but whether delegated
> > > > timestamps are meant to follow the same attribute-authority model
> > > > as delegated size in RFC8881. If so, then we expect that the server
> > > > should query the client via CB_GETATTR to return updated ctime/mtime
> > > > after such operations while the delegation is outstanding.
> > > >
> > > > Thanks,
> > > > Tom
> > > >
> > > >
> > > > >
> > > > >
> > > > >
> > > > > > > It's certainly possible that the REMOVEXATTR is the only change that
> > > > > > > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > > > > > > at all if nothing else changed. With your version, you would.
> > > > > > >
> > > > > > > > >
> > > > > > > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > > > > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > > > > ---
> > > > > > > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > > > > > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > > > > > > >  include/linux/nfs_xdr.h |  3 +++
> > > > > > > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > > > > > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > > >  {
> > > > > > > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > > > > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > > > > > > >         struct nfs42_removexattrargs args = {
> > > > > > > > >                 .fh = NFS_FH(inode),
> > > > > > > > > +               .bitmask = bitmask,
> > > > > > > > >                 .xattr_name = name,
> > > > > > > > >         };
> > > > > > > > > -       struct nfs42_removexattrres res;
> > > > > > > > > +       struct nfs42_removexattrres res = {
> > > > > > > > > +               .server = server,
> > > > > > > > > +       };
> > > > > > > > >         struct rpc_message msg = {
> > > > > > > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > > > > > > >                 .rpc_argp = &args,
> > > > > > > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > > >         int ret;
> > > > > > > > >         unsigned long timestamp = jiffies;
> > > > > > > > >
> > > > > > > > > +       res.fattr = nfs_alloc_fattr();
> > > > > > > > > +       if (!res.fattr)
> > > > > > > > > +               return -ENOMEM;
> > > > > > > > > +
> > > > > > > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > > > > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > > > > > > +
> > > > > > > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > > > > > > >             &res.seq_res, 1);
> > > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > > > -       if (!ret)
> > > > > > > > > +       if (!ret) {
> > > > > > > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > > > > > > +       }
> > > > > > > > >
> > > > > > > > > +       kfree(res.fattr);
> > > > > > > > >         return ret;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > > > > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > > > > > > --- a/fs/nfs/nfs42xdr.c
> > > > > > > > > +++ b/fs/nfs/nfs42xdr.c
> > > > > > > > > @@ -263,11 +263,13 @@
> > > > > > > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > > > > > > >                                          encode_sequence_maxsz + \
> > > > > > > > >                                          encode_putfh_maxsz + \
> > > > > > > > > -                                        encode_removexattr_maxsz)
> > > > > > > > > +                                        encode_removexattr_maxsz + \
> > > > > > > > > +                                        encode_getattr_maxsz)
> > > > > > > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > > > > > > >                                          decode_sequence_maxsz + \
> > > > > > > > >                                          decode_putfh_maxsz + \
> > > > > > > > > -                                        decode_removexattr_maxsz)
> > > > > > > > > +                                        decode_removexattr_maxsz + \
> > > > > > > > > +                                        decode_getattr_maxsz)
> > > > > > > > >
> > > > > > > > >  /*
> > > > > > > > >   * These values specify the maximum amount of data that is not
> > > > > > > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > > > > > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > > > > > > >         encode_putfh(xdr, args->fh, &hdr);
> > > > > > > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > > > > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > > > > > > >         encode_nops(&hdr);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > > > > > > >                 goto out;
> > > > > > > > >
> > > > > > > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > > > > > > +       if (status)
> > > > > > > > > +               goto out;
> > > > > > > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > > > > > > >  out:
> > > > > > > > >         return status;
> > > > > > > > >  }
> > > > > > > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > > > > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > > > > > > --- a/include/linux/nfs_xdr.h
> > > > > > > > > +++ b/include/linux/nfs_xdr.h
> > > > > > > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > > > > > > >  struct nfs42_removexattrargs {
> > > > > > > > >         struct nfs4_sequence_args       seq_args;
> > > > > > > > >         struct nfs_fh                   *fh;
> > > > > > > > > +       const u32                       *bitmask;
> > > > > > > > >         const char                      *xattr_name;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > >  struct nfs42_removexattrres {
> > > > > > > > >         struct nfs4_sequence_res        seq_res;
> > > > > > > > >         struct nfs4_change_info         cinfo;
> > > > > > > > > +       struct nfs_fattr                *fattr;
> > > > > > > > > +       const struct nfs_server         *server;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > 2.53.0
> > > > > > > > >
> > > > > > >
> > > > > > > --
> > > > > > > Jeff Layton <jlayton@kernel.org>
> > > > >
> > > > > --
> > > > > Jeff Layton <jlayton@kernel.org>
> > > >
>
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Thomas Haynes 4 days, 18 hours ago
On Tue, Mar 31, 2026 at 02:17:54PM -0800, Olga Kornievskaia wrote:
> On Fri, Mar 27, 2026 at 4:02 PM Thomas Haynes <loghyr@gmail.com> wrote:
> >
> > On Fri, Mar 27, 2026 at 03:36:12PM -0800, Olga Kornievskaia wrote:
> > > On Fri, Mar 27, 2026 at 2:22 PM Thomas Haynes <loghyr@gmail.com> wrote:
> > > >
> > > > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > >
> > > > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > >
> > > > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > > >
> > > > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > > >
> > > > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > > > attributes would update change_attr on operations which might now
> > > > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > > > generic/728.
> > > > > > > >
> > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > > > *inode, const char *name)
> > > > > > > >             &res.seq_res, 1);
> > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > >         if (!ret)
> > > > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > > > +               } else
> > > > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > >
> > > > > > > >         return ret;
> > > > > > > >  }
> > > > > > > >
> > > > > > >
> > > > > > > What's the advantage of doing it this way?
> > > > > > >
> > > > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > > > the mtime there. The server has the most up-to-date version of the
> > > > > > > mtime and ctime at that point.
> > > > > >
> > > > > > In presence of delegated attributes, Is the server required to update
> > > > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > > >
> > > > > > Is possible that
> > > > > > some implementations might be different and also do not update the
> > > > > > ctime/mtime on REMOVEXATTR?
> > > > > >
> > > > > > Therefore I was suggesting that the patch
> > > > > > relies on the fact that it would receive an updated value. Of course
> > > > > > perhaps all implementations are done the same as the linux server and
> > > > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > > > what the server supposed to do (and client rely on).
> > > > > >
> > > > >
> > > > > (cc'ing Tom)
> > > > >
> > > > > That is a very good point.
> > > > >
> > > > > My interpretation was that delegated timestamps generally covered
> > > > > writes, but SETATTR style operations that do anything beyond only
> > > > > changing the mtime can't be cached.
> > > > >
> > > > > We probably need some delstid spec clarification: for what operations
> > > > > is the server required to disable timestamp updates when a write
> > > > > delegation is outstanding?
> > > > >
> > > > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > > >
> > > > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > > > for writes when there is an outstanding timestamp delegation like we're
> > > > > doing in nfsd? If so, does it do the same for
> > > > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > > >
> > > > Jeff,
> > > >
> > > > I think the right way to look at this is closer to how size is
> > > > handled under delegation in RFC8881, rather than as a per-op rule.
> > > >
> > > > In our implementation, because we are acting as an MDS and data I/O
> > > > goes to DSes, we already treat size as effectively delegated when
> > > > a write layout is outstanding. The MDS does not maintain authoritative
> > > > size locally in that case. We may refresh size/timestamps internally
> > > > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > > > overriding the delegated authority.
> > > >
> > > > For timestamps, our behavior is effectively the same model. When
> > > > the client holds the relevant delegation, the server does not
> > > > consider itself authoritative for ctime/mtime. If current values
> > > > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > > > and the client must present the delegation stateid to demonstrate
> > > > that authority. So the authority follows the delegation, not the
> > > > specific operation.
> > >
> > > What happens when the client holding the attribute delegation (it's
> > > the authority) is doing the query? Is it the server's responsibility
> > > to query the client before replying.
> >
> > The Hammerspace server on a getattr checks to see if there is
> > a delegated stateid (and whether it is the client making the request
> > or not). If it is and the GETATTR asks for FATTR4_SIZE, FATTR4_TIME_MODIFY,
> > or FATTR4_TIME_METADATA, then it will send a CB_GETATTR before
> > responding to the client.
> 
> So... while I might not be running the latest Hammerspace bits and
> something has changed but I do not see Hammerspace server sending a
> CB_GETATTR when the "client is making a getattr request with a
> delegated stateid".

Or the process is broken on the server side.


> Example is the CLONE operation with an accompanied
> GETATTR. The server in this case updates the change attribute and
> mtime and returns different from what the client had previously in the
> OPEN back to the client (this is what the test expect but I think
> that's contradictory to what is said above). (To contrast nfsd server
> does not update the change attribute or mtime and returns the same
> value and thus making xfstest fail). So if the spec mandates that
> before answering the GETATTR a CB_GETATTR needs to be sent then
> neither of the server (Hammerspace nor linux) do that. But then again
> I'm not sure the client is not at fault for sending a GETATTR in the
> CLONE compound in the first place. For instance client does not have a
> GETATTR in the COPY compound (and then fails to pass xfstest that
> checks for modifies ctime/mtime). But the solution isn't clear to is
> it like Jeff's REMOVEXATTR approach to add the GETATTR to the COPY
> compound but that then should trigger a CB_GETATTR back to the client.
> Again probably none of the servers do that...
> 
> Another point I would like to raise is: doesn't it seem
> counter-intuitive to be in a situation where we have a
> delegation-holding client sending a GETATTR and then a server needing
> to do a CB_GETTATTR to the said client to fulfill the request.
> Delegations were supposed to reduce traffic and now we are introducing
> additional traffic instead. I guess I'm arguing that the client is
> broken to ever query for change_attr, mtime if it's holding the
> delegation. Yet the linux client (I could be wrong here) is written
> such that change_attr or mtime has to always come from the server. Say
> the application did a write() followed by a stat(). Client can't
> satisfy stat() without reaching out to the server. Yet the server is
> not the authority and before it can generate the reply for
> change_attr, mtime, it needs to callback to the client (CB_GETATTR).
> It seems it would have been better to never get a delegated attribute
> delegation in the first place?

Or the client needs to change its behaviour in this scenario? Why ask
as question to a source which is not the authority?

Or, I agree with your analysis...

> 
> > While it does not do so, if the client sent in a SETATTR in the
> > same compound we could short circuit that. Think a sort of WCC.
> >
> > > Example, client sends a CLONE
> > > operation which has a GETATTR attached. (1) is the server supposed to
> > > issue a CB_GETATTR before replying to the compound? (2) is the client
> > > not supposed to send any GETATTRs while holding an attribute
> > > delegation? CLONE is a modifying operation but client hasn't done any
> > > actual modifications to the opened file so a CB_GETATTR would return
> > > that file hasn't been modified. Is the server then not going to
> > > express that the file has indeed been modified when replying to
> > > GETATTR?
> >
> > The server could argue that the client wants to know what it thinks.
> > But that isn't the argeement. The server has to query those values
> > before sending them on.
> >
> > >
> > > > That said, I don’t think we’ve fully resolved the semantics for all
> > > > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > > > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > > > been relying on assumptions rather than a fully consistent rule.
> > > > I.e., CLONE and COPY we just pass through to the DS and we don't
> > > > implement SETXATTR/REMOVEXATTR.
> > > >
> > > > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > > > any particular op) should update ctime/mtime, but whether delegated
> > > > timestamps are meant to follow the same attribute-authority model
> > > > as delegated size in RFC8881. If so, then we expect that the server
> > > > should query the client via CB_GETATTR to return updated ctime/mtime
> > > > after such operations while the delegation is outstanding.
> > > >
> > > > Thanks,
> > > > Tom
> > > >
> > > >
> > > > >
> > > > >
> > > > >
> > > > > > > It's certainly possible that the REMOVEXATTR is the only change that
> > > > > > > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > > > > > > at all if nothing else changed. With your version, you would.
> > > > > > >
> > > > > > > > >
> > > > > > > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > > > > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > > > > ---
> > > > > > > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > > > > > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > > > > > > >  include/linux/nfs_xdr.h |  3 +++
> > > > > > > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > > > > > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > > >  {
> > > > > > > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > > > > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > > > > > > >         struct nfs42_removexattrargs args = {
> > > > > > > > >                 .fh = NFS_FH(inode),
> > > > > > > > > +               .bitmask = bitmask,
> > > > > > > > >                 .xattr_name = name,
> > > > > > > > >         };
> > > > > > > > > -       struct nfs42_removexattrres res;
> > > > > > > > > +       struct nfs42_removexattrres res = {
> > > > > > > > > +               .server = server,
> > > > > > > > > +       };
> > > > > > > > >         struct rpc_message msg = {
> > > > > > > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > > > > > > >                 .rpc_argp = &args,
> > > > > > > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > > >         int ret;
> > > > > > > > >         unsigned long timestamp = jiffies;
> > > > > > > > >
> > > > > > > > > +       res.fattr = nfs_alloc_fattr();
> > > > > > > > > +       if (!res.fattr)
> > > > > > > > > +               return -ENOMEM;
> > > > > > > > > +
> > > > > > > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > > > > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > > > > > > +
> > > > > > > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > > > > > > >             &res.seq_res, 1);
> > > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > > > -       if (!ret)
> > > > > > > > > +       if (!ret) {
> > > > > > > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > > > > > > +       }
> > > > > > > > >
> > > > > > > > > +       kfree(res.fattr);
> > > > > > > > >         return ret;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > > > > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > > > > > > --- a/fs/nfs/nfs42xdr.c
> > > > > > > > > +++ b/fs/nfs/nfs42xdr.c
> > > > > > > > > @@ -263,11 +263,13 @@
> > > > > > > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > > > > > > >                                          encode_sequence_maxsz + \
> > > > > > > > >                                          encode_putfh_maxsz + \
> > > > > > > > > -                                        encode_removexattr_maxsz)
> > > > > > > > > +                                        encode_removexattr_maxsz + \
> > > > > > > > > +                                        encode_getattr_maxsz)
> > > > > > > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > > > > > > >                                          decode_sequence_maxsz + \
> > > > > > > > >                                          decode_putfh_maxsz + \
> > > > > > > > > -                                        decode_removexattr_maxsz)
> > > > > > > > > +                                        decode_removexattr_maxsz + \
> > > > > > > > > +                                        decode_getattr_maxsz)
> > > > > > > > >
> > > > > > > > >  /*
> > > > > > > > >   * These values specify the maximum amount of data that is not
> > > > > > > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > > > > > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > > > > > > >         encode_putfh(xdr, args->fh, &hdr);
> > > > > > > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > > > > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > > > > > > >         encode_nops(&hdr);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > > > > > > >                 goto out;
> > > > > > > > >
> > > > > > > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > > > > > > +       if (status)
> > > > > > > > > +               goto out;
> > > > > > > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > > > > > > >  out:
> > > > > > > > >         return status;
> > > > > > > > >  }
> > > > > > > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > > > > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > > > > > > --- a/include/linux/nfs_xdr.h
> > > > > > > > > +++ b/include/linux/nfs_xdr.h
> > > > > > > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > > > > > > >  struct nfs42_removexattrargs {
> > > > > > > > >         struct nfs4_sequence_args       seq_args;
> > > > > > > > >         struct nfs_fh                   *fh;
> > > > > > > > > +       const u32                       *bitmask;
> > > > > > > > >         const char                      *xattr_name;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > >  struct nfs42_removexattrres {
> > > > > > > > >         struct nfs4_sequence_res        seq_res;
> > > > > > > > >         struct nfs4_change_info         cinfo;
> > > > > > > > > +       struct nfs_fattr                *fattr;
> > > > > > > > > +       const struct nfs_server         *server;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > 2.53.0
> > > > > > > > >
> > > > > > >
> > > > > > > --
> > > > > > > Jeff Layton <jlayton@kernel.org>
> > > > >
> > > > > --
> > > > > Jeff Layton <jlayton@kernel.org>
> > > >
Re: [PATCH v2 2/2] nfs: update inode ctime after removexattr operation
Posted by Olga Kornievskaia 4 days, 16 hours ago
On Tue, Mar 31, 2026 at 2:51 PM Thomas Haynes <loghyr@gmail.com> wrote:
>
> On Tue, Mar 31, 2026 at 02:17:54PM -0800, Olga Kornievskaia wrote:
> > On Fri, Mar 27, 2026 at 4:02 PM Thomas Haynes <loghyr@gmail.com> wrote:
> > >
> > > On Fri, Mar 27, 2026 at 03:36:12PM -0800, Olga Kornievskaia wrote:
> > > > On Fri, Mar 27, 2026 at 2:22 PM Thomas Haynes <loghyr@gmail.com> wrote:
> > > > >
> > > > > On Fri, Mar 27, 2026 at 12:59:54PM -0800, Jeff Layton wrote:
> > > > > > On Fri, 2026-03-27 at 12:20 -0400, Olga Kornievskaia wrote:
> > > > > > > On Fri, Mar 27, 2026 at 11:50 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > >
> > > > > > > > On Fri, 2026-03-27 at 11:11 -0400, Olga Kornievskaia wrote:
> > > > > > > > > On Tue, Mar 24, 2026 at 1:32 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > > >
> > > > > > > > > > xfstest generic/728 fails with delegated timestamps. The client does a
> > > > > > > > > > removexattr and then a stat to test the ctime, which doesn't change. The
> > > > > > > > > > stat() doesn't trigger a GETATTR because of the delegated timestamps, so
> > > > > > > > > > it relies on the cached ctime, which is wrong.
> > > > > > > > > >
> > > > > > > > > > The setxattr compound has a trailing GETATTR, which ensures that its
> > > > > > > > > > ctime gets updated. Follow the same strategy with removexattr.
> > > > > > > > >
> > > > > > > > > This approach relies on the fact that the server the serves delegated
> > > > > > > > > attributes would update change_attr on operations which might now
> > > > > > > > > necessarily happen (ie, linux server does not update change_attribute
> > > > > > > > > on writes or clone). I propose an alternative fix for the failing
> > > > > > > > > generic/728.
> > > > > > > > >
> > > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > > index 7b3ca68fb4bb..ede1835a45b3 100644
> > > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > > @@ -1389,7 +1389,13 @@ static int _nfs42_proc_removexattr(struct inode
> > > > > > > > > *inode, const char *name)
> > > > > > > > >             &res.seq_res, 1);
> > > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > > >         if (!ret)
> > > > > > > > > -               nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > > +               if (nfs_have_delegated_attributes(inode)) {
> > > > > > > > > +                       nfs_update_delegated_mtime(inode);
> > > > > > > > > +                       spin_lock(&inode->i_lock);
> > > > > > > > > +                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
> > > > > > > > > +                       spin_unlock(&inode->i_lock);
> > > > > > > > > +               } else
> > > > > > > > > +                       nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > >
> > > > > > > > >         return ret;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > >
> > > > > > > > What's the advantage of doing it this way?
> > > > > > > >
> > > > > > > > You just sent a REMOVEXATTR operation to the server that will change
> > > > > > > > the mtime there. The server has the most up-to-date version of the
> > > > > > > > mtime and ctime at that point.
> > > > > > >
> > > > > > > In presence of delegated attributes, Is the server required to update
> > > > > > > its mtime/ctime on an operation? As I mentioned, the linux server does
> > > > > > > not update its ctime/mtime for WRITE, CLONE, COPY.
> > > > > > >
> > > > > > > Is possible that
> > > > > > > some implementations might be different and also do not update the
> > > > > > > ctime/mtime on REMOVEXATTR?
> > > > > > >
> > > > > > > Therefore I was suggesting that the patch
> > > > > > > relies on the fact that it would receive an updated value. Of course
> > > > > > > perhaps all implementations are done the same as the linux server and
> > > > > > > my point is moot. I didn't see anything in the spec that clarifies
> > > > > > > what the server supposed to do (and client rely on).
> > > > > > >
> > > > > >
> > > > > > (cc'ing Tom)
> > > > > >
> > > > > > That is a very good point.
> > > > > >
> > > > > > My interpretation was that delegated timestamps generally covered
> > > > > > writes, but SETATTR style operations that do anything beyond only
> > > > > > changing the mtime can't be cached.
> > > > > >
> > > > > > We probably need some delstid spec clarification: for what operations
> > > > > > is the server required to disable timestamp updates when a write
> > > > > > delegation is outstanding?
> > > > > >
> > > > > > In the case of nfsd, we disable timestamp updates for WRITE/COPY/CLONE
> > > > > > but not SETATTR/SETXATTR/REMOVEXATTR.
> > > > > >
> > > > > > How does the Hammerspace anvil behave? Does it disable c/mtime updates
> > > > > > for writes when there is an outstanding timestamp delegation like we're
> > > > > > doing in nfsd? If so, does it do the same for
> > > > > > SETATTR/SETXATTR/REMOVEXATTR operations as well?
> > > > >
> > > > > Jeff,
> > > > >
> > > > > I think the right way to look at this is closer to how size is
> > > > > handled under delegation in RFC8881, rather than as a per-op rule.
> > > > >
> > > > > In our implementation, because we are acting as an MDS and data I/O
> > > > > goes to DSes, we already treat size as effectively delegated when
> > > > > a write layout is outstanding. The MDS does not maintain authoritative
> > > > > size locally in that case. We may refresh size/timestamps internally
> > > > > (e.g., on GETATTR by querying DSes), but we don’t treat that as
> > > > > overriding the delegated authority.
> > > > >
> > > > > For timestamps, our behavior is effectively the same model. When
> > > > > the client holds the relevant delegation, the server does not
> > > > > consider itself authoritative for ctime/mtime. If current values
> > > > > are needed, we can obtain them from the client (e.g., via CB_GETATTR),
> > > > > and the client must present the delegation stateid to demonstrate
> > > > > that authority. So the authority follows the delegation, not the
> > > > > specific operation.
> > > >
> > > > What happens when the client holding the attribute delegation (it's
> > > > the authority) is doing the query? Is it the server's responsibility
> > > > to query the client before replying.
> > >
> > > The Hammerspace server on a getattr checks to see if there is
> > > a delegated stateid (and whether it is the client making the request
> > > or not). If it is and the GETATTR asks for FATTR4_SIZE, FATTR4_TIME_MODIFY,
> > > or FATTR4_TIME_METADATA, then it will send a CB_GETATTR before
> > > responding to the client.
> >
> > So... while I might not be running the latest Hammerspace bits and
> > something has changed but I do not see Hammerspace server sending a
> > CB_GETATTR when the "client is making a getattr request with a
> > delegated stateid".
>
> Or the process is broken on the server side.
>
>
> > Example is the CLONE operation with an accompanied
> > GETATTR. The server in this case updates the change attribute and
> > mtime and returns different from what the client had previously in the
> > OPEN back to the client (this is what the test expect but I think
> > that's contradictory to what is said above). (To contrast nfsd server
> > does not update the change attribute or mtime and returns the same
> > value and thus making xfstest fail). So if the spec mandates that
> > before answering the GETATTR a CB_GETATTR needs to be sent then
> > neither of the server (Hammerspace nor linux) do that. But then again
> > I'm not sure the client is not at fault for sending a GETATTR in the
> > CLONE compound in the first place. For instance client does not have a
> > GETATTR in the COPY compound (and then fails to pass xfstest that
> > checks for modifies ctime/mtime). But the solution isn't clear to is
> > it like Jeff's REMOVEXATTR approach to add the GETATTR to the COPY
> > compound but that then should trigger a CB_GETATTR back to the client.
> > Again probably none of the servers do that...
> >
> > Another point I would like to raise is: doesn't it seem
> > counter-intuitive to be in a situation where we have a
> > delegation-holding client sending a GETATTR and then a server needing
> > to do a CB_GETTATTR to the said client to fulfill the request.
> > Delegations were supposed to reduce traffic and now we are introducing
> > additional traffic instead. I guess I'm arguing that the client is
> > broken to ever query for change_attr, mtime if it's holding the
> > delegation. Yet the linux client (I could be wrong here) is written
> > such that change_attr or mtime has to always come from the server. Say
> > the application did a write() followed by a stat(). Client can't
> > satisfy stat() without reaching out to the server. Yet the server is
> > not the authority and before it can generate the reply for
> > change_attr, mtime, it needs to callback to the client (CB_GETATTR).
> > It seems it would have been better to never get a delegated attribute
> > delegation in the first place?
>
> Or the client needs to change its behaviour in this scenario? Why ask
> as question to a source which is not the authority?

well i think it's because it's not able to generate the values needed
(ie the server is the only one that can generate the change_attr value
and mtime is also supposed to come from the server clock too)?

> Or, I agree with your analysis...
>
> >
> > > While it does not do so, if the client sent in a SETATTR in the
> > > same compound we could short circuit that. Think a sort of WCC.
> > >
> > > > Example, client sends a CLONE
> > > > operation which has a GETATTR attached. (1) is the server supposed to
> > > > issue a CB_GETATTR before replying to the compound? (2) is the client
> > > > not supposed to send any GETATTRs while holding an attribute
> > > > delegation? CLONE is a modifying operation but client hasn't done any
> > > > actual modifications to the opened file so a CB_GETATTR would return
> > > > that file hasn't been modified. Is the server then not going to
> > > > express that the file has indeed been modified when replying to
> > > > GETATTR?
> > >
> > > The server could argue that the client wants to know what it thinks.
> > > But that isn't the argeement. The server has to query those values
> > > before sending them on.
> > >
> > > >
> > > > > That said, I don’t think we’ve fully resolved the semantics for all
> > > > > metadata-style ops either. WRITE and SETATTR are clear in our model,
> > > > > but for things like CLONE/COPY/SETXATTR/REMOVEXATTR, we’ve likely
> > > > > been relying on assumptions rather than a fully consistent rule.
> > > > > I.e., CLONE and COPY we just pass through to the DS and we don't
> > > > > implement SETXATTR/REMOVEXATTR.
> > > > >
> > > > > So the spec question, as I see it, is not whether REMOVEXATTR (or
> > > > > any particular op) should update ctime/mtime, but whether delegated
> > > > > timestamps are meant to follow the same attribute-authority model
> > > > > as delegated size in RFC8881. If so, then we expect that the server
> > > > > should query the client via CB_GETATTR to return updated ctime/mtime
> > > > > after such operations while the delegation is outstanding.
> > > > >
> > > > > Thanks,
> > > > > Tom
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > > It's certainly possible that the REMOVEXATTR is the only change that
> > > > > > > > occurred. With what I'm proposing, we don't even need to do a SETATTR
> > > > > > > > at all if nothing else changed. With your version, you would.
> > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Fixes: 3e1f02123fba ("NFSv4.2: add client side XDR handling for extended attributes")
> > > > > > > > > > Reported-by: Olga Kornievskaia <aglo@umich.edu>
> > > > > > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > > > > > ---
> > > > > > > > > >  fs/nfs/nfs42proc.c      | 18 ++++++++++++++++--
> > > > > > > > > >  fs/nfs/nfs42xdr.c       | 10 ++++++++--
> > > > > > > > > >  include/linux/nfs_xdr.h |  3 +++
> > > > > > > > > >  3 files changed, 27 insertions(+), 4 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > > > > > > > > > index 7b3ca68fb4bb3bee293f8621e5ed5fa596f5da3f..7e5c1172fc11c9d5a55b3621977ac83bb98f7c20 100644
> > > > > > > > > > --- a/fs/nfs/nfs42proc.c
> > > > > > > > > > +++ b/fs/nfs/nfs42proc.c
> > > > > > > > > > @@ -1372,11 +1372,15 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
> > > > > > > > > >  static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > > > >  {
> > > > > > > > > >         struct nfs_server *server = NFS_SERVER(inode);
> > > > > > > > > > +       __u32 bitmask[NFS_BITMASK_SZ];
> > > > > > > > > >         struct nfs42_removexattrargs args = {
> > > > > > > > > >                 .fh = NFS_FH(inode),
> > > > > > > > > > +               .bitmask = bitmask,
> > > > > > > > > >                 .xattr_name = name,
> > > > > > > > > >         };
> > > > > > > > > > -       struct nfs42_removexattrres res;
> > > > > > > > > > +       struct nfs42_removexattrres res = {
> > > > > > > > > > +               .server = server,
> > > > > > > > > > +       };
> > > > > > > > > >         struct rpc_message msg = {
> > > > > > > > > >                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR],
> > > > > > > > > >                 .rpc_argp = &args,
> > > > > > > > > > @@ -1385,12 +1389,22 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
> > > > > > > > > >         int ret;
> > > > > > > > > >         unsigned long timestamp = jiffies;
> > > > > > > > > >
> > > > > > > > > > +       res.fattr = nfs_alloc_fattr();
> > > > > > > > > > +       if (!res.fattr)
> > > > > > > > > > +               return -ENOMEM;
> > > > > > > > > > +
> > > > > > > > > > +       nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
> > > > > > > > > > +                        inode, NFS_INO_INVALID_CHANGE);
> > > > > > > > > > +
> > > > > > > > > >         ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
> > > > > > > > > >             &res.seq_res, 1);
> > > > > > > > > >         trace_nfs4_removexattr(inode, name, ret);
> > > > > > > > > > -       if (!ret)
> > > > > > > > > > +       if (!ret) {
> > > > > > > > > >                 nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
> > > > > > > > > > +               ret = nfs_post_op_update_inode(inode, res.fattr);
> > > > > > > > > > +       }
> > > > > > > > > >
> > > > > > > > > > +       kfree(res.fattr);
> > > > > > > > > >         return ret;
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> > > > > > > > > > index 5c7452ce6e8ac94bd24bc3a33d4479d458a29907..ec105c62f721cfe01bfc60f5981396958084d627 100644
> > > > > > > > > > --- a/fs/nfs/nfs42xdr.c
> > > > > > > > > > +++ b/fs/nfs/nfs42xdr.c
> > > > > > > > > > @@ -263,11 +263,13 @@
> > > > > > > > > >  #define NFS4_enc_removexattr_sz                (compound_encode_hdr_maxsz + \
> > > > > > > > > >                                          encode_sequence_maxsz + \
> > > > > > > > > >                                          encode_putfh_maxsz + \
> > > > > > > > > > -                                        encode_removexattr_maxsz)
> > > > > > > > > > +                                        encode_removexattr_maxsz + \
> > > > > > > > > > +                                        encode_getattr_maxsz)
> > > > > > > > > >  #define NFS4_dec_removexattr_sz                (compound_decode_hdr_maxsz + \
> > > > > > > > > >                                          decode_sequence_maxsz + \
> > > > > > > > > >                                          decode_putfh_maxsz + \
> > > > > > > > > > -                                        decode_removexattr_maxsz)
> > > > > > > > > > +                                        decode_removexattr_maxsz + \
> > > > > > > > > > +                                        decode_getattr_maxsz)
> > > > > > > > > >
> > > > > > > > > >  /*
> > > > > > > > > >   * These values specify the maximum amount of data that is not
> > > > > > > > > > @@ -869,6 +871,7 @@ static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
> > > > > > > > > >         encode_sequence(xdr, &args->seq_args, &hdr);
> > > > > > > > > >         encode_putfh(xdr, args->fh, &hdr);
> > > > > > > > > >         encode_removexattr(xdr, args->xattr_name, &hdr);
> > > > > > > > > > +       encode_getfattr(xdr, args->bitmask, &hdr);
> > > > > > > > > >         encode_nops(&hdr);
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > @@ -1818,6 +1821,9 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
> > > > > > > > > >                 goto out;
> > > > > > > > > >
> > > > > > > > > >         status = decode_removexattr(xdr, &res->cinfo);
> > > > > > > > > > +       if (status)
> > > > > > > > > > +               goto out;
> > > > > > > > > > +       status = decode_getfattr(xdr, res->fattr, res->server);
> > > > > > > > > >  out:
> > > > > > > > > >         return status;
> > > > > > > > > >  }
> > > > > > > > > > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > > > > > > > > > index ff1f12aa73d27b6fd874baf7019dd03195fc36e5..fcbd21b5685f46136a210c8e11c20a54d6ed9dad 100644
> > > > > > > > > > --- a/include/linux/nfs_xdr.h
> > > > > > > > > > +++ b/include/linux/nfs_xdr.h
> > > > > > > > > > @@ -1611,12 +1611,15 @@ struct nfs42_listxattrsres {
> > > > > > > > > >  struct nfs42_removexattrargs {
> > > > > > > > > >         struct nfs4_sequence_args       seq_args;
> > > > > > > > > >         struct nfs_fh                   *fh;
> > > > > > > > > > +       const u32                       *bitmask;
> > > > > > > > > >         const char                      *xattr_name;
> > > > > > > > > >  };
> > > > > > > > > >
> > > > > > > > > >  struct nfs42_removexattrres {
> > > > > > > > > >         struct nfs4_sequence_res        seq_res;
> > > > > > > > > >         struct nfs4_change_info         cinfo;
> > > > > > > > > > +       struct nfs_fattr                *fattr;
> > > > > > > > > > +       const struct nfs_server         *server;
> > > > > > > > > >  };
> > > > > > > > > >
> > > > > > > > > >  #endif /* CONFIG_NFS_V4_2 */
> > > > > > > > > >
> > > > > > > > > > --
> > > > > > > > > > 2.53.0
> > > > > > > > > >
> > > > > > > >
> > > > > > > > --
> > > > > > > > Jeff Layton <jlayton@kernel.org>
> > > > > >
> > > > > > --
> > > > > > Jeff Layton <jlayton@kernel.org>
> > > > >