[PATCH] fuse: check attributes staleness on fuse_iget()

Zhang Tianci posted 1 patch 1 week, 1 day ago
There is a newer version of this series
fs/fuse/dir.c     | 11 +++++++----
fs/fuse/fuse_i.h  | 11 ++++++++++-
fs/fuse/inode.c   | 14 +++++++++++---
fs/fuse/readdir.c | 15 +++++++++------
4 files changed, 37 insertions(+), 14 deletions(-)
[PATCH] fuse: check attributes staleness on fuse_iget()
Posted by Zhang Tianci 1 week, 1 day ago
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.

To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,

        A                                               B
----------------------------------      --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }

readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
                                        fallocate(f, 1024)
                                        stat(f) // B see size 1024
                                        echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode

                                        stat(f) // B see size 0

In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.

To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.

Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/

Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
---
 fs/fuse/dir.c     | 11 +++++++----
 fs/fuse/fuse_i.h  | 11 ++++++++++-
 fs/fuse/inode.c   | 14 +++++++++++---
 fs/fuse/readdir.c | 15 +++++++++------
 4 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 54104dd48af7c..7d0a0fab69207 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -366,7 +366,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
 	struct fuse_mount *fm = get_fuse_mount_super(sb);
 	FUSE_ARGS(args);
 	struct fuse_forget_link *forget;
-	u64 attr_version;
+	u64 attr_version, evict_ctr;
 	int err;
 
 	*inode = NULL;
@@ -381,6 +381,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
 		goto out;
 
 	attr_version = fuse_get_attr_version(fm->fc);
+	evict_ctr = fuse_get_evict_ctr(fm->fc);
 
 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
 	err = fuse_simple_request(fm, &args);
@@ -398,7 +399,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
 
 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
 			   &outarg->attr, ATTR_TIMEOUT(outarg),
-			   attr_version);
+			   attr_version, evict_ctr);
 	err = -ENOMEM;
 	if (!*inode) {
 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
@@ -691,7 +692,8 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
 	ff->nodeid = outentry.nodeid;
 	ff->open_flags = outopenp->open_flags;
 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
-			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
+			  &outentry.attr, ATTR_TIMEOUT(&outentry), 0,
+			  fuse_get_evict_ctr(fm->fc));
 	if (!inode) {
 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
 		fuse_sync_release(NULL, ff, flags);
@@ -822,7 +824,8 @@ static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
 		goto out_put_forget_req;
 
 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
+			  &outarg.attr, ATTR_TIMEOUT(&outarg), 0,
+			  fuse_get_evict_ctr(fm->fc));
 	if (!inode) {
 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
 		return -ENOMEM;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e6cc3d552b138..f9ff0d0029aba 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -884,6 +884,9 @@ struct fuse_conn {
 	/** Version counter for attribute changes */
 	atomic64_t attr_version;
 
+	/** Version counter for evict inode */
+	atomic64_t evict_ctr;
+
 	/** Called on final put */
 	void (*release)(struct fuse_conn *);
 
@@ -978,6 +981,11 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
 	return atomic64_read(&fc->attr_version);
 }
 
+static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc)
+{
+	return atomic64_read(&fc->evict_ctr);
+}
+
 static inline bool fuse_stale_inode(const struct inode *inode, int generation,
 				    struct fuse_attr *attr)
 {
@@ -1037,7 +1045,8 @@ extern const struct dentry_operations fuse_root_dentry_operations;
  */
 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 			int generation, struct fuse_attr *attr,
-			u64 attr_valid, u64 attr_version);
+			u64 attr_valid, u64 attr_version,
+			u64 evict_ctr);
 
 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
 		     struct fuse_entry_out *outarg, struct inode **inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd3321e29a3e5..872c61dd56618 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -173,6 +173,7 @@ static void fuse_evict_inode(struct inode *inode)
 			fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
 			fi->submount_lookup = NULL;
 		}
+		atomic64_inc(&fc->evict_ctr);
 	}
 	if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
 		WARN_ON(fi->iocachectr != 0);
@@ -426,7 +427,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
 
 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 			int generation, struct fuse_attr *attr,
-			u64 attr_valid, u64 attr_version)
+			u64 attr_valid, u64 attr_version,
+			u64 evict_ctr)
 {
 	struct inode *inode;
 	struct fuse_inode *fi;
@@ -488,6 +490,10 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 	spin_unlock(&fi->lock);
 done:
 	fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
+	spin_lock(&fi->lock);
+	if (evict_ctr < fuse_get_evict_ctr(fc))
+		fuse_invalidate_attr(inode);
+	spin_unlock(&fi->lock);
 
 	return inode;
 }
@@ -940,6 +946,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 	fc->initialized = 0;
 	fc->connected = 1;
 	atomic64_set(&fc->attr_version, 1);
+	atomic64_set(&fc->evict_ctr, 1);
 	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
 	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
 	fc->user_ns = get_user_ns(user_ns);
@@ -1001,7 +1008,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 	attr.mode = mode;
 	attr.ino = FUSE_ROOT_ID;
 	attr.nlink = 1;
-	return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
+	return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
 }
 
 struct fuse_inode_handle {
@@ -1610,7 +1617,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
 		return -ENOMEM;
 
 	fuse_fill_attr_from_inode(&root_attr, parent_fi);
-	root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
+	root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
+			 fuse_get_evict_ctr(fm->fc));
 	/*
 	 * This inode is just a duplicate, so it is not looked up and
 	 * its nlookup should not be incremented.  fuse_iget() does
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 0377b6dc24c80..ceb5aefd6012f 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -149,7 +149,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
 
 static int fuse_direntplus_link(struct file *file,
 				struct fuse_direntplus *direntplus,
-				u64 attr_version)
+				u64 attr_version, u64 evict_ctr)
 {
 	struct fuse_entry_out *o = &direntplus->entry_out;
 	struct fuse_dirent *dirent = &direntplus->dirent;
@@ -233,7 +233,7 @@ static int fuse_direntplus_link(struct file *file,
 	} else {
 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
 				  &o->attr, ATTR_TIMEOUT(o),
-				  attr_version);
+				  attr_version, evict_ctr);
 		if (!inode)
 			inode = ERR_PTR(-ENOMEM);
 
@@ -284,7 +284,8 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
 }
 
 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
-			     struct dir_context *ctx, u64 attr_version)
+			     struct dir_context *ctx, u64 attr_version,
+			     u64 evict_ctr)
 {
 	struct fuse_direntplus *direntplus;
 	struct fuse_dirent *dirent;
@@ -319,7 +320,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
 		buf += reclen;
 		nbytes -= reclen;
 
-		ret = fuse_direntplus_link(file, direntplus, attr_version);
+		ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
 		if (ret)
 			fuse_force_forget(file, direntplus->entry_out.nodeid);
 	}
@@ -337,7 +338,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 	struct fuse_io_args ia = {};
 	struct fuse_args_pages *ap = &ia.ap;
 	struct fuse_page_desc desc = { .length = PAGE_SIZE };
-	u64 attr_version = 0;
+	u64 attr_version = 0, evict_ctr = 0;
 	bool locked;
 
 	page = alloc_page(GFP_KERNEL);
@@ -351,6 +352,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 	ap->descs = &desc;
 	if (plus) {
 		attr_version = fuse_get_attr_version(fm->fc);
+		evict_ctr = fuse_get_evict_ctr(fm->fc);
 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
 				    FUSE_READDIRPLUS);
 	} else {
@@ -368,7 +370,8 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
 				fuse_readdir_cache_end(file, ctx->pos);
 		} else if (plus) {
 			res = parse_dirplusfile(page_address(page), res,
-						file, ctx, attr_version);
+						file, ctx, attr_version,
+						evict_ctr);
 		} else {
 			res = parse_dirfile(page_address(page), res, file,
 					    ctx);
-- 
2.46.0.rc2
Re: [PATCH] fuse: check attributes staleness on fuse_iget()
Posted by kernel test robot 6 days, 1 hour ago
Hi Zhang,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.12-rc7]
[also build test WARNING on linus/master]
[cannot apply to mszeredi-fuse/for-next next-20241115]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhang-Tianci/fuse-check-attributes-staleness-on-fuse_iget/20241114-151838
base:   v6.12-rc7
patch link:    https://lore.kernel.org/r/20241114070905.48901-1-zhangtianci.1997%40bytedance.com
patch subject: [PATCH] fuse: check attributes staleness on fuse_iget()
config: x86_64-kexec (https://download.01.org/0day-ci/archive/20241117/202411170856.SfrPdEy1-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241117/202411170856.SfrPdEy1-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411170856.SfrPdEy1-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from fs/fuse/inode.c:9:
   In file included from fs/fuse/fuse_i.h:22:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     504 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     505 |                            item];
         |                            ~~~~
   include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     511 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     512 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   include/linux/vmstat.h:524:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     524 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     525 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
>> fs/fuse/inode.c:456:7: warning: variable 'fi' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
     456 |                 if (!fi->submount_lookup) {
         |                     ^~~~~~~~~~~~~~~~~~~~
   fs/fuse/inode.c:493:13: note: uninitialized use occurs here
     493 |         spin_lock(&fi->lock);
         |                    ^~
   fs/fuse/inode.c:456:3: note: remove the 'if' if its condition is always true
     456 |                 if (!fi->submount_lookup) {
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~
   fs/fuse/inode.c:434:23: note: initialize the variable 'fi' to silence this warning
     434 |         struct fuse_inode *fi;
         |                              ^
         |                               = NULL
   5 warnings generated.


vim +456 fs/fuse/inode.c

d8a5ba45457e4a Miklos Szeredi    2005-09-09  427  
b48badf013018e Miklos Szeredi    2008-04-30  428  struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
1fb69e7817296d Miklos Szeredi    2007-10-18  429  			int generation, struct fuse_attr *attr,
243f2d46af8e04 Zhang Tianci      2024-11-14  430  			u64 attr_valid, u64 attr_version,
243f2d46af8e04 Zhang Tianci      2024-11-14  431  			u64 evict_ctr)
d8a5ba45457e4a Miklos Szeredi    2005-09-09  432  {
d8a5ba45457e4a Miklos Szeredi    2005-09-09  433  	struct inode *inode;
9e6268db496a25 Miklos Szeredi    2005-09-09  434  	struct fuse_inode *fi;
d8a5ba45457e4a Miklos Szeredi    2005-09-09  435  	struct fuse_conn *fc = get_fuse_conn_super(sb);
d8a5ba45457e4a Miklos Szeredi    2005-09-09  436  
bf109c64040f5b Max Reitz         2020-04-21  437  	/*
bf109c64040f5b Max Reitz         2020-04-21  438  	 * Auto mount points get their node id from the submount root, which is
bf109c64040f5b Max Reitz         2020-04-21  439  	 * not a unique identifier within this filesystem.
bf109c64040f5b Max Reitz         2020-04-21  440  	 *
bf109c64040f5b Max Reitz         2020-04-21  441  	 * To avoid conflicts, do not place submount points into the inode hash
bf109c64040f5b Max Reitz         2020-04-21  442  	 * table.
bf109c64040f5b Max Reitz         2020-04-21  443  	 */
bf109c64040f5b Max Reitz         2020-04-21  444  	if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
bf109c64040f5b Max Reitz         2020-04-21  445  	    S_ISDIR(attr->mode)) {
c4d361f66ac91d Krister Johansen  2023-11-03  446  		struct fuse_inode *fi;
c4d361f66ac91d Krister Johansen  2023-11-03  447  
bf109c64040f5b Max Reitz         2020-04-21  448  		inode = new_inode(sb);
bf109c64040f5b Max Reitz         2020-04-21  449  		if (!inode)
bf109c64040f5b Max Reitz         2020-04-21  450  			return NULL;
bf109c64040f5b Max Reitz         2020-04-21  451  
facd61053cff10 Christian Brauner 2023-01-20  452  		fuse_init_inode(inode, attr, fc);
c4d361f66ac91d Krister Johansen  2023-11-03  453  		fi = get_fuse_inode(inode);
c4d361f66ac91d Krister Johansen  2023-11-03  454  		fi->nodeid = nodeid;
c4d361f66ac91d Krister Johansen  2023-11-03  455  		fi->submount_lookup = fuse_alloc_submount_lookup();
c4d361f66ac91d Krister Johansen  2023-11-03 @456  		if (!fi->submount_lookup) {
c4d361f66ac91d Krister Johansen  2023-11-03  457  			iput(inode);
c4d361f66ac91d Krister Johansen  2023-11-03  458  			return NULL;
c4d361f66ac91d Krister Johansen  2023-11-03  459  		}
c4d361f66ac91d Krister Johansen  2023-11-03  460  		/* Sets nlookup = 1 on fi->submount_lookup->nlookup */
c4d361f66ac91d Krister Johansen  2023-11-03  461  		fuse_init_submount_lookup(fi->submount_lookup, nodeid);
bf109c64040f5b Max Reitz         2020-04-21  462  		inode->i_flags |= S_AUTOMOUNT;
bf109c64040f5b Max Reitz         2020-04-21  463  		goto done;
bf109c64040f5b Max Reitz         2020-04-21  464  	}
bf109c64040f5b Max Reitz         2020-04-21  465  
d8a5ba45457e4a Miklos Szeredi    2005-09-09  466  retry:
d8a5ba45457e4a Miklos Szeredi    2005-09-09  467  	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
d8a5ba45457e4a Miklos Szeredi    2005-09-09  468  	if (!inode)
d8a5ba45457e4a Miklos Szeredi    2005-09-09  469  		return NULL;
d8a5ba45457e4a Miklos Szeredi    2005-09-09  470  
d8a5ba45457e4a Miklos Szeredi    2005-09-09  471  	if ((inode->i_state & I_NEW)) {
b0aa7606521790 Maxim Patlasov    2013-12-26  472  		inode->i_flags |= S_NOATIME;
d31433c8b06d44 Maxim Patlasov    2014-04-28  473  		if (!fc->writeback_cache || !S_ISREG(attr->mode))
b0aa7606521790 Maxim Patlasov    2013-12-26  474  			inode->i_flags |= S_NOCMTIME;
d8a5ba45457e4a Miklos Szeredi    2005-09-09  475  		inode->i_generation = generation;
facd61053cff10 Christian Brauner 2023-01-20  476  		fuse_init_inode(inode, attr, fc);
d8a5ba45457e4a Miklos Szeredi    2005-09-09  477  		unlock_new_inode(inode);
15db16837a35d8 Amir Goldstein    2021-06-21  478  	} else if (fuse_stale_inode(inode, generation, attr)) {
15db16837a35d8 Amir Goldstein    2021-06-21  479  		/* nodeid was reused, any I/O on the old inode should fail */
5d069dbe8aaf2a Miklos Szeredi    2020-12-10  480  		fuse_make_bad(inode);
b1fe686a765e6c Miklos Szeredi    2024-02-28  481  		if (inode != d_inode(sb->s_root)) {
b1fe686a765e6c Miklos Szeredi    2024-02-28  482  			remove_inode_hash(inode);
d8a5ba45457e4a Miklos Szeredi    2005-09-09  483  			iput(inode);
d8a5ba45457e4a Miklos Szeredi    2005-09-09  484  			goto retry;
d8a5ba45457e4a Miklos Szeredi    2005-09-09  485  		}
b1fe686a765e6c Miklos Szeredi    2024-02-28  486  	}
9e6268db496a25 Miklos Szeredi    2005-09-09  487  	fi = get_fuse_inode(inode);
c9d8f5f0692d59 Kirill Tkhai      2018-11-09  488  	spin_lock(&fi->lock);
9e6268db496a25 Miklos Szeredi    2005-09-09  489  	fi->nlookup++;
c9d8f5f0692d59 Kirill Tkhai      2018-11-09  490  	spin_unlock(&fi->lock);
c4d361f66ac91d Krister Johansen  2023-11-03  491  done:
972f4c46d0a1bb Miklos Szeredi    2023-08-10  492  	fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
243f2d46af8e04 Zhang Tianci      2024-11-14  493  	spin_lock(&fi->lock);
243f2d46af8e04 Zhang Tianci      2024-11-14  494  	if (evict_ctr < fuse_get_evict_ctr(fc))
243f2d46af8e04 Zhang Tianci      2024-11-14  495  		fuse_invalidate_attr(inode);
243f2d46af8e04 Zhang Tianci      2024-11-14  496  	spin_unlock(&fi->lock);
1fb69e7817296d Miklos Szeredi    2007-10-18  497  
d8a5ba45457e4a Miklos Szeredi    2005-09-09  498  	return inode;
d8a5ba45457e4a Miklos Szeredi    2005-09-09  499  }
d8a5ba45457e4a Miklos Szeredi    2005-09-09  500  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] fuse: check attributes staleness on fuse_iget()
Posted by Miklos Szeredi 1 week ago
On Thu, 14 Nov 2024 at 08:12, Zhang Tianci
<zhangtianci.1997@bytedance.com> wrote:
>
> Function fuse_direntplus_link() might call fuse_iget() to initialize a new
> fuse_inode and change its attributes. If fi->attr_version is always
> initialized with 0, even if the attributes returned by the FUSE_READDIR
> request is staled, as the new fi->attr_version is 0, fuse_change_attributes
> will still set the staled attributes to inode. This wrong behaviour may
> cause file size inconsistency even when there is no changes from
> server-side.

Thanks for working on this.

I have some comments, see below.

> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -173,6 +173,7 @@ static void fuse_evict_inode(struct inode *inode)
>                         fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
>                         fi->submount_lookup = NULL;
>                 }
> +               atomic64_inc(&fc->evict_ctr);

I think this should only be done if (inode->i_nlink > 0), because if
the file/directory was removed, then the race between another lookup
cannot happen.

> @@ -426,7 +427,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
>
>  struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
>                         int generation, struct fuse_attr *attr,
> -                       u64 attr_valid, u64 attr_version)
> +                       u64 attr_valid, u64 attr_version,
> +                       u64 evict_ctr)
>  {
>         struct inode *inode;
>         struct fuse_inode *fi;
> @@ -488,6 +490,10 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
>         spin_unlock(&fi->lock);
>  done:
>         fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
> +       spin_lock(&fi->lock);
> +       if (evict_ctr < fuse_get_evict_ctr(fc))
> +               fuse_invalidate_attr(inode);

Similarly, this should not be done on creation (attr_version == 0),
since in case of a brand new inode the previous inode state cannot
have any effect on it.

The other case that may be worth taking special care of is when the
inode is already in the cache.  This happens for example on lookup of
hard links.  This is the (fi->attr_version > 0) case where we can rely
on the validity of attr_version.  I.e. the attr_version comparison in
fuse_change_attributes() will make sure the attributes are only
updated if necessary, no need to check evict_ctr.

So the only case that needs evict_ctr verification is (attr_version !=
0 && fi->attr_version == 0).

One other thing: I don't like the fact that the invalid mask is first
cleared in fuse_change_attributes_common(), then reset in
fuse_invalidate_attr().  It would be cleaner to not clear the mask in
the first place.

Attaching an untested incremental patch.  Can you please review and test?

Thanks,
Miklos
Re: [External] Re: [PATCH] fuse: check attributes staleness on fuse_iget()
Posted by Zhang Tianci 4 days, 15 hours ago
Hi Miklos,

Thanks for your incremental patch, I test it and find one little problem:

> @@ -207,7 +214,8 @@ static ino_t fuse_squash_ino(u64 ino64)
>
>  void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
>                                    struct fuse_statx *sx,
> -                                  u64 attr_valid, u32 cache_mask)
> +                                  u64 attr_valid, u32 cache_mask,
> +                                  u64 evict_ctr)
>  {
>         struct fuse_conn *fc = get_fuse_conn(inode);
>         struct fuse_inode *fi = get_fuse_inode(inode);
> @@ -216,8 +224,20 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
>
>         fi->attr_version = atomic64_inc_return(&fc->attr_version);

Here we initialize fi->attr_version.

>         fi->i_time = attr_valid;
> -       /* Clear basic stats from invalid mask */
> -       set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
> +
> +       /*
> +        * Clear basic stats from invalid mask.
> +        *
> +        * Don't do this if this is coming from a fuse_iget() call and there
> +        * might have been a racing evict which would've invalidated the result
> +        * if the attr_version would've been preserved.
> +        *
> +        * !evict_ctr -> this is create
> +        * fi->attr_version != 0 -> this is not a new inode
> +        * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request
> +        */
> +       if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
> +               set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);

This check should be moved to before the initialization of fi->attr_version.

>
>         inode->i_ino     = fuse_squash_ino(attr->ino);
>         inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);

And I will send out the v2 patch later.

Thanks,
Tianci
Re: [External] Re: [PATCH] fuse: check attributes staleness on fuse_iget()
Posted by Miklos Szeredi 4 days, 14 hours ago
On Mon, 18 Nov 2024 at 11:15, Zhang Tianci
<zhangtianci.1997@bytedance.com> wrote:

> > +       if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
> > +               set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
>
> This check should be moved to before the initialization of fi->attr_version.

Well spotted.

> >         inode->i_ino     = fuse_squash_ino(attr->ino);
> >         inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
>
> And I will send out the v2 patch later.

Thanks, applied and pushed.

Miklos