[PATCH 04/14] kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation

Christian Brauner posted 14 patches 1 month ago
[PATCH 04/14] kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation
Posted by Christian Brauner 1 month ago
Adapt kernfs to use the rhashtable-based xattr path and switch from an
embedded struct to pointer-based lazy allocation.

Change kernfs_iattrs.xattrs from embedded 'struct simple_xattrs' to a
pointer 'struct simple_xattrs *', initialized to NULL (zeroed by
kmem_cache_zalloc). Since kernfs_iattrs is already lazily allocated
itself, this adds a second level of lazy allocation specifically for
the xattr store.

The xattr store is allocated on first setxattr. Read paths
check for NULL and return -ENODATA or empty list.

Replaced xattr entries are freed via simple_xattr_free_rcu() to allow
concurrent RCU readers to finish.

The cleanup paths in kernfs_free_rcu() and __kernfs_new_node() error
handling conditionally free the xattr store only when allocated.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/kernfs/dir.c             | 15 +++++++++++----
 fs/kernfs/inode.c           | 34 +++++++++++++++++++++++++---------
 fs/kernfs/kernfs-internal.h |  2 +-
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 29baeeb97871..e5735c45fb99 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -547,10 +547,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu)
 	/* If the whole node goes away, then name can't be used outside */
 	kfree_const(rcu_access_pointer(kn->name));
 
-	if (kn->iattr) {
-		simple_xattrs_free(&kn->iattr->xattrs, NULL);
+	if (kn->iattr)
 		kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
-	}
 
 	kmem_cache_free(kernfs_node_cache, kn);
 }
@@ -584,6 +582,12 @@ void kernfs_put(struct kernfs_node *kn)
 	if (kernfs_type(kn) == KERNFS_LINK)
 		kernfs_put(kn->symlink.target_kn);
 
+	if (kn->iattr && kn->iattr->xattrs) {
+		simple_xattrs_free(kn->iattr->xattrs, NULL);
+		kfree(kn->iattr->xattrs);
+		kn->iattr->xattrs = NULL;
+	}
+
 	spin_lock(&root->kernfs_idr_lock);
 	idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
 	spin_unlock(&root->kernfs_idr_lock);
@@ -682,7 +686,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 
  err_out4:
 	if (kn->iattr) {
-		simple_xattrs_free(&kn->iattr->xattrs, NULL);
+		if (kn->iattr->xattrs) {
+			simple_xattrs_free(kn->iattr->xattrs, NULL);
+			kfree(kn->iattr->xattrs);
+		}
 		kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
 	}
  err_out3:
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index a36aaee98dce..dfc3315b5afc 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -45,7 +45,6 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc)
 	ret->ia_mtime = ret->ia_atime;
 	ret->ia_ctime = ret->ia_atime;
 
-	simple_xattrs_init(&ret->xattrs);
 	atomic_set(&ret->nr_user_xattrs, 0);
 	atomic_set(&ret->user_xattr_size, 0);
 
@@ -146,7 +145,8 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
 	if (!attrs)
 		return -ENOMEM;
 
-	return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
+	return simple_xattr_list(d_inode(dentry), READ_ONCE(attrs->xattrs),
+				 buf, size);
 }
 
 static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
@@ -298,27 +298,38 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
 		     void *value, size_t size)
 {
 	struct kernfs_iattrs *attrs = kernfs_iattrs_noalloc(kn);
+	struct simple_xattrs *xattrs;
+
 	if (!attrs)
 		return -ENODATA;
 
-	return simple_xattr_get(&attrs->xattrs, name, value, size);
+	xattrs = READ_ONCE(attrs->xattrs);
+	if (!xattrs)
+		return -ENODATA;
+
+	return simple_xattr_get(xattrs, name, value, size);
 }
 
 int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
 		     const void *value, size_t size, int flags)
 {
 	struct simple_xattr *old_xattr;
+	struct simple_xattrs *xattrs;
 	struct kernfs_iattrs *attrs;
 
 	attrs = kernfs_iattrs(kn);
 	if (!attrs)
 		return -ENOMEM;
 
-	old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
+	xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
+	if (IS_ERR_OR_NULL(xattrs))
+		return PTR_ERR(xattrs);
+
+	old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
 	if (IS_ERR(old_xattr))
 		return PTR_ERR(old_xattr);
 
-	simple_xattr_free(old_xattr);
+	simple_xattr_free_rcu(old_xattr);
 	return 0;
 }
 
@@ -376,7 +387,7 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
 
 	ret = 0;
 	size = old_xattr->size;
-	simple_xattr_free(old_xattr);
+	simple_xattr_free_rcu(old_xattr);
 dec_size_out:
 	atomic_sub(size, sz);
 dec_count_out:
@@ -403,7 +414,7 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
 
 	atomic_sub(old_xattr->size, sz);
 	atomic_dec(nr);
-	simple_xattr_free(old_xattr);
+	simple_xattr_free_rcu(old_xattr);
 	return 0;
 }
 
@@ -415,6 +426,7 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
 {
 	const char *full_name = xattr_full_name(handler, suffix);
 	struct kernfs_node *kn = inode->i_private;
+	struct simple_xattrs *xattrs;
 	struct kernfs_iattrs *attrs;
 
 	if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR))
@@ -424,11 +436,15 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
 	if (!attrs)
 		return -ENOMEM;
 
+	xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
+	if (IS_ERR_OR_NULL(xattrs))
+		return PTR_ERR(xattrs);
+
 	if (value)
-		return kernfs_vfs_user_xattr_add(kn, full_name, &attrs->xattrs,
+		return kernfs_vfs_user_xattr_add(kn, full_name, xattrs,
 						 value, size, flags);
 	else
-		return kernfs_vfs_user_xattr_rm(kn, full_name, &attrs->xattrs,
+		return kernfs_vfs_user_xattr_rm(kn, full_name, xattrs,
 						value, size, flags);
 
 }
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 6061b6f70d2a..1324ed8c0661 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -26,7 +26,7 @@ struct kernfs_iattrs {
 	struct timespec64	ia_mtime;
 	struct timespec64	ia_ctime;
 
-	struct simple_xattrs	xattrs;
+	struct simple_xattrs	*xattrs;
 	atomic_t		nr_user_xattrs;
 	atomic_t		user_xattr_size;
 };

-- 
2.47.3
Re: [PATCH 04/14] kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation
Posted by Jan Kara 2 weeks, 5 days ago
On Mon 16-02-26 14:32:00, Christian Brauner wrote:
> Adapt kernfs to use the rhashtable-based xattr path and switch from an
> embedded struct to pointer-based lazy allocation.
> 
> Change kernfs_iattrs.xattrs from embedded 'struct simple_xattrs' to a
> pointer 'struct simple_xattrs *', initialized to NULL (zeroed by
> kmem_cache_zalloc). Since kernfs_iattrs is already lazily allocated
> itself, this adds a second level of lazy allocation specifically for
> the xattr store.
> 
> The xattr store is allocated on first setxattr. Read paths
> check for NULL and return -ENODATA or empty list.
> 
> Replaced xattr entries are freed via simple_xattr_free_rcu() to allow
> concurrent RCU readers to finish.
> 
> The cleanup paths in kernfs_free_rcu() and __kernfs_new_node() error
> handling conditionally free the xattr store only when allocated.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>

...

> @@ -584,6 +582,12 @@ void kernfs_put(struct kernfs_node *kn)
>  	if (kernfs_type(kn) == KERNFS_LINK)
>  		kernfs_put(kn->symlink.target_kn);
>  
> +	if (kn->iattr && kn->iattr->xattrs) {
> +		simple_xattrs_free(kn->iattr->xattrs, NULL);
> +		kfree(kn->iattr->xattrs);
> +		kn->iattr->xattrs = NULL;
> +	}
> +
>  	spin_lock(&root->kernfs_idr_lock);
>  	idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
>  	spin_unlock(&root->kernfs_idr_lock);

This is a slight change in the lifetime rules because previously kernfs
xattrs could be safely accessed only under RCU but after this change you
have to hold inode reference *and* RCU to safely access them. I don't think
anybody would be accessing xattrs without holding inode reference so this
should be safe but it would be good to mention this in the changelog.
Otherwise feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> @@ -682,7 +686,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
>  
>   err_out4:
>  	if (kn->iattr) {
> -		simple_xattrs_free(&kn->iattr->xattrs, NULL);
> +		if (kn->iattr->xattrs) {
> +			simple_xattrs_free(kn->iattr->xattrs, NULL);
> +			kfree(kn->iattr->xattrs);
> +		}
>  		kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
>  	}
>   err_out3:
> diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
> index a36aaee98dce..dfc3315b5afc 100644
> --- a/fs/kernfs/inode.c
> +++ b/fs/kernfs/inode.c
> @@ -45,7 +45,6 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc)
>  	ret->ia_mtime = ret->ia_atime;
>  	ret->ia_ctime = ret->ia_atime;
>  
> -	simple_xattrs_init(&ret->xattrs);
>  	atomic_set(&ret->nr_user_xattrs, 0);
>  	atomic_set(&ret->user_xattr_size, 0);
>  
> @@ -146,7 +145,8 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
>  	if (!attrs)
>  		return -ENOMEM;
>  
> -	return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
> +	return simple_xattr_list(d_inode(dentry), READ_ONCE(attrs->xattrs),
> +				 buf, size);
>  }
>  
>  static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
> @@ -298,27 +298,38 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
>  		     void *value, size_t size)
>  {
>  	struct kernfs_iattrs *attrs = kernfs_iattrs_noalloc(kn);
> +	struct simple_xattrs *xattrs;
> +
>  	if (!attrs)
>  		return -ENODATA;
>  
> -	return simple_xattr_get(&attrs->xattrs, name, value, size);
> +	xattrs = READ_ONCE(attrs->xattrs);
> +	if (!xattrs)
> +		return -ENODATA;
> +
> +	return simple_xattr_get(xattrs, name, value, size);
>  }
>  
>  int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
>  		     const void *value, size_t size, int flags)
>  {
>  	struct simple_xattr *old_xattr;
> +	struct simple_xattrs *xattrs;
>  	struct kernfs_iattrs *attrs;
>  
>  	attrs = kernfs_iattrs(kn);
>  	if (!attrs)
>  		return -ENOMEM;
>  
> -	old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
> +	xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
> +	if (IS_ERR_OR_NULL(xattrs))
> +		return PTR_ERR(xattrs);
> +
> +	old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
>  	if (IS_ERR(old_xattr))
>  		return PTR_ERR(old_xattr);
>  
> -	simple_xattr_free(old_xattr);
> +	simple_xattr_free_rcu(old_xattr);
>  	return 0;
>  }
>  
> @@ -376,7 +387,7 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
>  
>  	ret = 0;
>  	size = old_xattr->size;
> -	simple_xattr_free(old_xattr);
> +	simple_xattr_free_rcu(old_xattr);
>  dec_size_out:
>  	atomic_sub(size, sz);
>  dec_count_out:
> @@ -403,7 +414,7 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
>  
>  	atomic_sub(old_xattr->size, sz);
>  	atomic_dec(nr);
> -	simple_xattr_free(old_xattr);
> +	simple_xattr_free_rcu(old_xattr);
>  	return 0;
>  }
>  
> @@ -415,6 +426,7 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
>  {
>  	const char *full_name = xattr_full_name(handler, suffix);
>  	struct kernfs_node *kn = inode->i_private;
> +	struct simple_xattrs *xattrs;
>  	struct kernfs_iattrs *attrs;
>  
>  	if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR))
> @@ -424,11 +436,15 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
>  	if (!attrs)
>  		return -ENOMEM;
>  
> +	xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
> +	if (IS_ERR_OR_NULL(xattrs))
> +		return PTR_ERR(xattrs);
> +
>  	if (value)
> -		return kernfs_vfs_user_xattr_add(kn, full_name, &attrs->xattrs,
> +		return kernfs_vfs_user_xattr_add(kn, full_name, xattrs,
>  						 value, size, flags);
>  	else
> -		return kernfs_vfs_user_xattr_rm(kn, full_name, &attrs->xattrs,
> +		return kernfs_vfs_user_xattr_rm(kn, full_name, xattrs,
>  						value, size, flags);
>  
>  }
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 6061b6f70d2a..1324ed8c0661 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -26,7 +26,7 @@ struct kernfs_iattrs {
>  	struct timespec64	ia_mtime;
>  	struct timespec64	ia_ctime;
>  
> -	struct simple_xattrs	xattrs;
> +	struct simple_xattrs	*xattrs;
>  	atomic_t		nr_user_xattrs;
>  	atomic_t		user_xattr_size;
>  };
> 
> -- 
> 2.47.3
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR
Re: [PATCH 04/14] kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation
Posted by Christian Brauner 2 weeks, 2 days ago
On Fri, Feb 27, 2026 at 04:00:37PM +0100, Jan Kara wrote:
> On Mon 16-02-26 14:32:00, Christian Brauner wrote:
> > Adapt kernfs to use the rhashtable-based xattr path and switch from an
> > embedded struct to pointer-based lazy allocation.
> > 
> > Change kernfs_iattrs.xattrs from embedded 'struct simple_xattrs' to a
> > pointer 'struct simple_xattrs *', initialized to NULL (zeroed by
> > kmem_cache_zalloc). Since kernfs_iattrs is already lazily allocated
> > itself, this adds a second level of lazy allocation specifically for
> > the xattr store.
> > 
> > The xattr store is allocated on first setxattr. Read paths
> > check for NULL and return -ENODATA or empty list.
> > 
> > Replaced xattr entries are freed via simple_xattr_free_rcu() to allow
> > concurrent RCU readers to finish.
> > 
> > The cleanup paths in kernfs_free_rcu() and __kernfs_new_node() error
> > handling conditionally free the xattr store only when allocated.
> > 
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> 
> ...
> 
> > @@ -584,6 +582,12 @@ void kernfs_put(struct kernfs_node *kn)
> >  	if (kernfs_type(kn) == KERNFS_LINK)
> >  		kernfs_put(kn->symlink.target_kn);
> >  
> > +	if (kn->iattr && kn->iattr->xattrs) {
> > +		simple_xattrs_free(kn->iattr->xattrs, NULL);
> > +		kfree(kn->iattr->xattrs);
> > +		kn->iattr->xattrs = NULL;
> > +	}
> > +
> >  	spin_lock(&root->kernfs_idr_lock);
> >  	idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
> >  	spin_unlock(&root->kernfs_idr_lock);
> 
> This is a slight change in the lifetime rules because previously kernfs
> xattrs could be safely accessed only under RCU but after this change you
> have to hold inode reference *and* RCU to safely access them. I don't think
> anybody would be accessing xattrs without holding inode reference so this
> should be safe but it would be good to mention this in the changelog.

Done.