fs/fuse/dir.c | 11 +++++++---- fs/fuse/fuse_i.h | 11 ++++++++++- fs/fuse/inode.c | 14 +++++++++++--- fs/fuse/readdir.c | 15 +++++++++------ 4 files changed, 37 insertions(+), 14 deletions(-)
Function fuse_direntplus_link() might call fuse_iget() to initialize a new
fuse_inode and change its attributes. If fi->attr_version is always
initialized with 0, even if the attributes returned by the FUSE_READDIR
request is staled, as the new fi->attr_version is 0, fuse_change_attributes
will still set the staled attributes to inode. This wrong behaviour may
cause file size inconsistency even when there is no changes from
server-side.
To reproduce the issue, consider the following 2 programs (A and B) are
running concurrently,
A B
---------------------------------- --------------------------------
{ /fusemnt/dir/f is a file path in a fuse mount, the size of f is 0. }
readdir(/fusemnt/dir) start
//Daemon set size 0 to f direntry
fallocate(f, 1024)
stat(f) // B see size 1024
echo 2 > /proc/sys/vm/drop_caches
readdir(/fusemnt/dir) reply to kernel
Kernel set 0 to the I_NEW inode
stat(f) // B see size 0
In the above case, only program B is modifying the file size, however, B
observes file size changing between the 2 'readonly' stat() calls. To fix
this issue, we should make sure readdirplus still follows the rule of
attr_version staleness checking even if the fi->attr_version is lost due to
inode eviction.
To identify this situation, the new fc->evict_ctr is used to record whether
the eviction of inodes occurs during the readdirplus request processing.
If it does, the result of readdirplus may be inaccurate; otherwise, the
result of readdirplus can be trusted. Although this may still lead to
incorrect invalidation, considering the relatively low frequency of
evict occurrences, it should be acceptable.
Link: https://lore.kernel.org/lkml/20230711043405.66256-2-zhangjiachen.jaycee@bytedance.com/
Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
---
fs/fuse/dir.c | 11 +++++++----
fs/fuse/fuse_i.h | 11 ++++++++++-
fs/fuse/inode.c | 14 +++++++++++---
fs/fuse/readdir.c | 15 +++++++++------
4 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 54104dd48af7c..7d0a0fab69207 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -366,7 +366,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
- u64 attr_version;
+ u64 attr_version, evict_ctr;
int err;
*inode = NULL;
@@ -381,6 +381,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
goto out;
attr_version = fuse_get_attr_version(fm->fc);
+ evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
err = fuse_simple_request(fm, &args);
@@ -398,7 +399,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
&outarg->attr, ATTR_TIMEOUT(outarg),
- attr_version);
+ attr_version, evict_ctr);
err = -ENOMEM;
if (!*inode) {
fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
@@ -691,7 +692,8 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
ff->nodeid = outentry.nodeid;
ff->open_flags = outopenp->open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
- &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
+ &outentry.attr, ATTR_TIMEOUT(&outentry), 0,
+ fuse_get_evict_ctr(fm->fc));
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(NULL, ff, flags);
@@ -822,7 +824,8 @@ static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
goto out_put_forget_req;
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
+ &outarg.attr, ATTR_TIMEOUT(&outarg), 0,
+ fuse_get_evict_ctr(fm->fc));
if (!inode) {
fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
return -ENOMEM;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e6cc3d552b138..f9ff0d0029aba 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -884,6 +884,9 @@ struct fuse_conn {
/** Version counter for attribute changes */
atomic64_t attr_version;
+ /** Version counter for evict inode */
+ atomic64_t evict_ctr;
+
/** Called on final put */
void (*release)(struct fuse_conn *);
@@ -978,6 +981,11 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
return atomic64_read(&fc->attr_version);
}
+static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc)
+{
+ return atomic64_read(&fc->evict_ctr);
+}
+
static inline bool fuse_stale_inode(const struct inode *inode, int generation,
struct fuse_attr *attr)
{
@@ -1037,7 +1045,8 @@ extern const struct dentry_operations fuse_root_dentry_operations;
*/
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
- u64 attr_valid, u64 attr_version);
+ u64 attr_valid, u64 attr_version,
+ u64 evict_ctr);
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd3321e29a3e5..872c61dd56618 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -173,6 +173,7 @@ static void fuse_evict_inode(struct inode *inode)
fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
fi->submount_lookup = NULL;
}
+ atomic64_inc(&fc->evict_ctr);
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(fi->iocachectr != 0);
@@ -426,7 +427,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
- u64 attr_valid, u64 attr_version)
+ u64 attr_valid, u64 attr_version,
+ u64 evict_ctr)
{
struct inode *inode;
struct fuse_inode *fi;
@@ -488,6 +490,10 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
spin_unlock(&fi->lock);
done:
fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
+ spin_lock(&fi->lock);
+ if (evict_ctr < fuse_get_evict_ctr(fc))
+ fuse_invalidate_attr(inode);
+ spin_unlock(&fi->lock);
return inode;
}
@@ -940,6 +946,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
fc->initialized = 0;
fc->connected = 1;
atomic64_set(&fc->attr_version, 1);
+ atomic64_set(&fc->evict_ctr, 1);
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
@@ -1001,7 +1008,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
attr.mode = mode;
attr.ino = FUSE_ROOT_ID;
attr.nlink = 1;
- return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
+ return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
}
struct fuse_inode_handle {
@@ -1610,7 +1617,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
return -ENOMEM;
fuse_fill_attr_from_inode(&root_attr, parent_fi);
- root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
+ root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
+ fuse_get_evict_ctr(fm->fc));
/*
* This inode is just a duplicate, so it is not looked up and
* its nlookup should not be incremented. fuse_iget() does
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 0377b6dc24c80..ceb5aefd6012f 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -149,7 +149,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
- u64 attr_version)
+ u64 attr_version, u64 evict_ctr)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
@@ -233,7 +233,7 @@ static int fuse_direntplus_link(struct file *file,
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, ATTR_TIMEOUT(o),
- attr_version);
+ attr_version, evict_ctr);
if (!inode)
inode = ERR_PTR(-ENOMEM);
@@ -284,7 +284,8 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
- struct dir_context *ctx, u64 attr_version)
+ struct dir_context *ctx, u64 attr_version,
+ u64 evict_ctr)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
@@ -319,7 +320,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
buf += reclen;
nbytes -= reclen;
- ret = fuse_direntplus_link(file, direntplus, attr_version);
+ ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
@@ -337,7 +338,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
struct fuse_page_desc desc = { .length = PAGE_SIZE };
- u64 attr_version = 0;
+ u64 attr_version = 0, evict_ctr = 0;
bool locked;
page = alloc_page(GFP_KERNEL);
@@ -351,6 +352,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fm->fc);
+ evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
@@ -368,7 +370,8 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
res = parse_dirplusfile(page_address(page), res,
- file, ctx, attr_version);
+ file, ctx, attr_version,
+ evict_ctr);
} else {
res = parse_dirfile(page_address(page), res, file,
ctx);
--
2.46.0.rc2
Hi Zhang, kernel test robot noticed the following build warnings: [auto build test WARNING on v6.12-rc7] [also build test WARNING on linus/master] [cannot apply to mszeredi-fuse/for-next next-20241115] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Zhang-Tianci/fuse-check-attributes-staleness-on-fuse_iget/20241114-151838 base: v6.12-rc7 patch link: https://lore.kernel.org/r/20241114070905.48901-1-zhangtianci.1997%40bytedance.com patch subject: [PATCH] fuse: check attributes staleness on fuse_iget() config: x86_64-kexec (https://download.01.org/0day-ci/archive/20241117/202411170856.SfrPdEy1-lkp@intel.com/config) compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241117/202411170856.SfrPdEy1-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202411170856.SfrPdEy1-lkp@intel.com/ All warnings (new ones prefixed by >>): In file included from fs/fuse/inode.c:9: In file included from fs/fuse/fuse_i.h:22: In file included from include/linux/mm.h:2213: include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 504 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 505 | item]; | ~~~~ include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 511 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 512 | NR_VM_NUMA_EVENT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~~ include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 518 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ include/linux/vmstat.h:524:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 524 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 525 | NR_VM_NUMA_EVENT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~~ >> fs/fuse/inode.c:456:7: warning: variable 'fi' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 456 | if (!fi->submount_lookup) { | ^~~~~~~~~~~~~~~~~~~~ fs/fuse/inode.c:493:13: note: uninitialized use occurs here 493 | spin_lock(&fi->lock); | ^~ fs/fuse/inode.c:456:3: note: remove the 'if' if its condition is always true 456 | if (!fi->submount_lookup) { | ^~~~~~~~~~~~~~~~~~~~~~~~~ fs/fuse/inode.c:434:23: note: initialize the variable 'fi' to silence this warning 434 | struct fuse_inode *fi; | ^ | = NULL 5 warnings generated. vim +456 fs/fuse/inode.c d8a5ba45457e4a Miklos Szeredi 2005-09-09 427 b48badf013018e Miklos Szeredi 2008-04-30 428 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 1fb69e7817296d Miklos Szeredi 2007-10-18 429 int generation, struct fuse_attr *attr, 243f2d46af8e04 Zhang Tianci 2024-11-14 430 u64 attr_valid, u64 attr_version, 243f2d46af8e04 Zhang Tianci 2024-11-14 431 u64 evict_ctr) d8a5ba45457e4a Miklos Szeredi 2005-09-09 432 { d8a5ba45457e4a Miklos Szeredi 2005-09-09 433 struct inode *inode; 9e6268db496a25 Miklos Szeredi 2005-09-09 434 struct fuse_inode *fi; d8a5ba45457e4a Miklos Szeredi 2005-09-09 435 struct fuse_conn *fc = get_fuse_conn_super(sb); d8a5ba45457e4a Miklos Szeredi 2005-09-09 436 bf109c64040f5b Max Reitz 2020-04-21 437 /* bf109c64040f5b Max Reitz 2020-04-21 438 * Auto mount points get their node id from the submount root, which is bf109c64040f5b Max Reitz 2020-04-21 439 * not a unique identifier within this filesystem. bf109c64040f5b Max Reitz 2020-04-21 440 * bf109c64040f5b Max Reitz 2020-04-21 441 * To avoid conflicts, do not place submount points into the inode hash bf109c64040f5b Max Reitz 2020-04-21 442 * table. bf109c64040f5b Max Reitz 2020-04-21 443 */ bf109c64040f5b Max Reitz 2020-04-21 444 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && bf109c64040f5b Max Reitz 2020-04-21 445 S_ISDIR(attr->mode)) { c4d361f66ac91d Krister Johansen 2023-11-03 446 struct fuse_inode *fi; c4d361f66ac91d Krister Johansen 2023-11-03 447 bf109c64040f5b Max Reitz 2020-04-21 448 inode = new_inode(sb); bf109c64040f5b Max Reitz 2020-04-21 449 if (!inode) bf109c64040f5b Max Reitz 2020-04-21 450 return NULL; bf109c64040f5b Max Reitz 2020-04-21 451 facd61053cff10 Christian Brauner 2023-01-20 452 fuse_init_inode(inode, attr, fc); c4d361f66ac91d Krister Johansen 2023-11-03 453 fi = get_fuse_inode(inode); c4d361f66ac91d Krister Johansen 2023-11-03 454 fi->nodeid = nodeid; c4d361f66ac91d Krister Johansen 2023-11-03 455 fi->submount_lookup = fuse_alloc_submount_lookup(); c4d361f66ac91d Krister Johansen 2023-11-03 @456 if (!fi->submount_lookup) { c4d361f66ac91d Krister Johansen 2023-11-03 457 iput(inode); c4d361f66ac91d Krister Johansen 2023-11-03 458 return NULL; c4d361f66ac91d Krister Johansen 2023-11-03 459 } c4d361f66ac91d Krister Johansen 2023-11-03 460 /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ c4d361f66ac91d Krister Johansen 2023-11-03 461 fuse_init_submount_lookup(fi->submount_lookup, nodeid); bf109c64040f5b Max Reitz 2020-04-21 462 inode->i_flags |= S_AUTOMOUNT; bf109c64040f5b Max Reitz 2020-04-21 463 goto done; bf109c64040f5b Max Reitz 2020-04-21 464 } bf109c64040f5b Max Reitz 2020-04-21 465 d8a5ba45457e4a Miklos Szeredi 2005-09-09 466 retry: d8a5ba45457e4a Miklos Szeredi 2005-09-09 467 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); d8a5ba45457e4a Miklos Szeredi 2005-09-09 468 if (!inode) d8a5ba45457e4a Miklos Szeredi 2005-09-09 469 return NULL; d8a5ba45457e4a Miklos Szeredi 2005-09-09 470 d8a5ba45457e4a Miklos Szeredi 2005-09-09 471 if ((inode->i_state & I_NEW)) { b0aa7606521790 Maxim Patlasov 2013-12-26 472 inode->i_flags |= S_NOATIME; d31433c8b06d44 Maxim Patlasov 2014-04-28 473 if (!fc->writeback_cache || !S_ISREG(attr->mode)) b0aa7606521790 Maxim Patlasov 2013-12-26 474 inode->i_flags |= S_NOCMTIME; d8a5ba45457e4a Miklos Szeredi 2005-09-09 475 inode->i_generation = generation; facd61053cff10 Christian Brauner 2023-01-20 476 fuse_init_inode(inode, attr, fc); d8a5ba45457e4a Miklos Szeredi 2005-09-09 477 unlock_new_inode(inode); 15db16837a35d8 Amir Goldstein 2021-06-21 478 } else if (fuse_stale_inode(inode, generation, attr)) { 15db16837a35d8 Amir Goldstein 2021-06-21 479 /* nodeid was reused, any I/O on the old inode should fail */ 5d069dbe8aaf2a Miklos Szeredi 2020-12-10 480 fuse_make_bad(inode); b1fe686a765e6c Miklos Szeredi 2024-02-28 481 if (inode != d_inode(sb->s_root)) { b1fe686a765e6c Miklos Szeredi 2024-02-28 482 remove_inode_hash(inode); d8a5ba45457e4a Miklos Szeredi 2005-09-09 483 iput(inode); d8a5ba45457e4a Miklos Szeredi 2005-09-09 484 goto retry; d8a5ba45457e4a Miklos Szeredi 2005-09-09 485 } b1fe686a765e6c Miklos Szeredi 2024-02-28 486 } 9e6268db496a25 Miklos Szeredi 2005-09-09 487 fi = get_fuse_inode(inode); c9d8f5f0692d59 Kirill Tkhai 2018-11-09 488 spin_lock(&fi->lock); 9e6268db496a25 Miklos Szeredi 2005-09-09 489 fi->nlookup++; c9d8f5f0692d59 Kirill Tkhai 2018-11-09 490 spin_unlock(&fi->lock); c4d361f66ac91d Krister Johansen 2023-11-03 491 done: 972f4c46d0a1bb Miklos Szeredi 2023-08-10 492 fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); 243f2d46af8e04 Zhang Tianci 2024-11-14 493 spin_lock(&fi->lock); 243f2d46af8e04 Zhang Tianci 2024-11-14 494 if (evict_ctr < fuse_get_evict_ctr(fc)) 243f2d46af8e04 Zhang Tianci 2024-11-14 495 fuse_invalidate_attr(inode); 243f2d46af8e04 Zhang Tianci 2024-11-14 496 spin_unlock(&fi->lock); 1fb69e7817296d Miklos Szeredi 2007-10-18 497 d8a5ba45457e4a Miklos Szeredi 2005-09-09 498 return inode; d8a5ba45457e4a Miklos Szeredi 2005-09-09 499 } d8a5ba45457e4a Miklos Szeredi 2005-09-09 500 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Thu, 14 Nov 2024 at 08:12, Zhang Tianci <zhangtianci.1997@bytedance.com> wrote: > > Function fuse_direntplus_link() might call fuse_iget() to initialize a new > fuse_inode and change its attributes. If fi->attr_version is always > initialized with 0, even if the attributes returned by the FUSE_READDIR > request is staled, as the new fi->attr_version is 0, fuse_change_attributes > will still set the staled attributes to inode. This wrong behaviour may > cause file size inconsistency even when there is no changes from > server-side. Thanks for working on this. I have some comments, see below. > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -173,6 +173,7 @@ static void fuse_evict_inode(struct inode *inode) > fuse_cleanup_submount_lookup(fc, fi->submount_lookup); > fi->submount_lookup = NULL; > } > + atomic64_inc(&fc->evict_ctr); I think this should only be done if (inode->i_nlink > 0), because if the file/directory was removed, then the race between another lookup cannot happen. > @@ -426,7 +427,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp) > > struct inode *fuse_iget(struct super_block *sb, u64 nodeid, > int generation, struct fuse_attr *attr, > - u64 attr_valid, u64 attr_version) > + u64 attr_valid, u64 attr_version, > + u64 evict_ctr) > { > struct inode *inode; > struct fuse_inode *fi; > @@ -488,6 +490,10 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, > spin_unlock(&fi->lock); > done: > fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); > + spin_lock(&fi->lock); > + if (evict_ctr < fuse_get_evict_ctr(fc)) > + fuse_invalidate_attr(inode); Similarly, this should not be done on creation (attr_version == 0), since in case of a brand new inode the previous inode state cannot have any effect on it. The other case that may be worth taking special care of is when the inode is already in the cache. This happens for example on lookup of hard links. This is the (fi->attr_version > 0) case where we can rely on the validity of attr_version. I.e. the attr_version comparison in fuse_change_attributes() will make sure the attributes are only updated if necessary, no need to check evict_ctr. So the only case that needs evict_ctr verification is (attr_version != 0 && fi->attr_version == 0). One other thing: I don't like the fact that the invalid mask is first cleared in fuse_change_attributes_common(), then reset in fuse_invalidate_attr(). It would be cleaner to not clear the mask in the first place. Attaching an untested incremental patch. Can you please review and test? Thanks, Miklos
Hi Miklos, Thanks for your incremental patch, I test it and find one little problem: > @@ -207,7 +214,8 @@ static ino_t fuse_squash_ino(u64 ino64) > > void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, > struct fuse_statx *sx, > - u64 attr_valid, u32 cache_mask) > + u64 attr_valid, u32 cache_mask, > + u64 evict_ctr) > { > struct fuse_conn *fc = get_fuse_conn(inode); > struct fuse_inode *fi = get_fuse_inode(inode); > @@ -216,8 +224,20 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, > > fi->attr_version = atomic64_inc_return(&fc->attr_version); Here we initialize fi->attr_version. > fi->i_time = attr_valid; > - /* Clear basic stats from invalid mask */ > - set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); > + > + /* > + * Clear basic stats from invalid mask. > + * > + * Don't do this if this is coming from a fuse_iget() call and there > + * might have been a racing evict which would've invalidated the result > + * if the attr_version would've been preserved. > + * > + * !evict_ctr -> this is create > + * fi->attr_version != 0 -> this is not a new inode > + * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request > + */ > + if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) > + set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); This check should be moved to before the initialization of fi->attr_version. > > inode->i_ino = fuse_squash_ino(attr->ino); > inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); And I will send out the v2 patch later. Thanks, Tianci
On Mon, 18 Nov 2024 at 11:15, Zhang Tianci <zhangtianci.1997@bytedance.com> wrote: > > + if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) > > + set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); > > This check should be moved to before the initialization of fi->attr_version. Well spotted. > > inode->i_ino = fuse_squash_ino(attr->ino); > > inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); > > And I will send out the v2 patch later. Thanks, applied and pushed. Miklos
© 2016 - 2024 Red Hat, Inc.