[PATCH 03/14] shmem: adapt to rhashtable-based simple_xattrs with lazy allocation

Christian Brauner posted 14 patches 1 month ago
[PATCH 03/14] shmem: adapt to rhashtable-based simple_xattrs with lazy allocation
Posted by Christian Brauner 1 month ago
Adapt tmpfs/shmem to use the rhashtable-based xattr path and switch
from an embedded struct to pointer-based lazy allocation.

Change shmem_inode_info.xattrs from embedded 'struct simple_xattrs' to
a pointer 'struct simple_xattrs *', initialized to NULL. This avoids
the rhashtable overhead for every tmpfs inode, which helps when a lot of
inodes exist.

The xattr store is allocated on first use:

- shmem_initxattrs(): Allocates via simple_xattrs_alloc() when
  security modules set initial xattrs during inode creation.

- shmem_xattr_handler_set(): Allocates on first setxattr, with a
  short-circuit for removal when no xattrs are stored yet.

All read paths (shmem_xattr_handler_get, shmem_listxattr) check for
NULL xattrs pointer and return -ENODATA or 0 respectively.

Replaced xattr entries are freed via simple_xattr_free_rcu() to allow
concurrent RCU readers to finish.

shmem_evict_inode() conditionally frees the xattr store only when
allocated.

Also change simple_xattr_add() from void to int to propagate
rhashtable insertion failures. shmem_initxattrs() is the only caller.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xattr.c               | 26 +++++++++++++-------------
 include/linux/shmem_fs.h |  2 +-
 include/linux/xattr.h    |  4 ++--
 mm/shmem.c               | 44 +++++++++++++++++++++++++++++++-------------
 4 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/fs/xattr.c b/fs/xattr.c
index 1d98ea459b7b..eb45ae0fd17f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1677,19 +1677,19 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
  * of matching xattrs is wanted. Should only be called during inode
  * initialization when a few distinct initial xattrs are supposed to be set.
  */
-void simple_xattr_add(struct simple_xattrs *xattrs,
-		      struct simple_xattr *new_xattr)
-{
-	if (xattrs->use_rhashtable) {
-		WARN_ON(rhashtable_insert_fast(&xattrs->ht,
-					       &new_xattr->hash_node,
-					       simple_xattr_params));
-	} else {
-		write_lock(&xattrs->lock);
-		rb_add(&new_xattr->rb_node, &xattrs->rb_root,
-		       rbtree_simple_xattr_less);
-		write_unlock(&xattrs->lock);
-	}
+int simple_xattr_add(struct simple_xattrs *xattrs,
+		     struct simple_xattr *new_xattr)
+{
+	if (xattrs->use_rhashtable)
+		return rhashtable_insert_fast(&xattrs->ht,
+					      &new_xattr->hash_node,
+					      simple_xattr_params);
+
+	write_lock(&xattrs->lock);
+	rb_add(&new_xattr->rb_node, &xattrs->rb_root,
+	       rbtree_simple_xattr_less);
+	write_unlock(&xattrs->lock);
+	return 0;
 }
 
 /**
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e2069b3179c4..53d325409a8b 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,7 +48,7 @@ struct shmem_inode_info {
 	};
 	struct timespec64	i_crtime;	/* file creation time */
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
-	struct simple_xattrs	xattrs;		/* list of xattrs */
+	struct simple_xattrs	*xattrs;	/* list of xattrs */
 	pgoff_t			fallocend;	/* highest fallocate endindex */
 	unsigned int		fsflags;	/* for FS_IOC_[SG]ETFLAGS */
 	atomic_t		stop_eviction;	/* hold when working on inode */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index ee4fd40717a0..3063ecf0004d 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -142,8 +142,8 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 				      size_t size, int flags);
 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 			  char *buffer, size_t size);
-void simple_xattr_add(struct simple_xattrs *xattrs,
-		      struct simple_xattr *new_xattr);
+int simple_xattr_add(struct simple_xattrs *xattrs,
+		     struct simple_xattr *new_xattr);
 int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
 
 DEFINE_CLASS(simple_xattr,
diff --git a/mm/shmem.c b/mm/shmem.c
index fc8020ce2e9f..8761c9b4f1c5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1426,7 +1426,10 @@ static void shmem_evict_inode(struct inode *inode)
 		}
 	}
 
-	simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+	if (info->xattrs) {
+		simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+		kfree(info->xattrs);
+	}
 	shmem_free_inode(inode->i_sb, freed);
 	WARN_ON(inode->i_blocks);
 	clear_inode(inode);
@@ -3118,7 +3121,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
 		shmem_set_inode_flags(inode, info->fsflags, NULL);
 	INIT_LIST_HEAD(&info->shrinklist);
 	INIT_LIST_HEAD(&info->swaplist);
-	simple_xattrs_init(&info->xattrs);
 	cache_no_acl(inode);
 	if (sbinfo->noswap)
 		mapping_set_unevictable(inode->i_mapping);
@@ -4270,10 +4272,13 @@ static int shmem_initxattrs(struct inode *inode,
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	const struct xattr *xattr;
-	struct simple_xattr *new_xattr;
 	size_t ispace = 0;
 	size_t len;
 
+	CLASS(simple_xattrs, xattrs)();
+	if (IS_ERR(xattrs))
+		return PTR_ERR(xattrs);
+
 	if (sbinfo->max_inodes) {
 		for (xattr = xattr_array; xattr->name != NULL; xattr++) {
 			ispace += simple_xattr_space(xattr->name,
@@ -4292,24 +4297,24 @@ static int shmem_initxattrs(struct inode *inode,
 	}
 
 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
-		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
+		CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
 		if (IS_ERR(new_xattr))
 			break;
 
 		len = strlen(xattr->name) + 1;
 		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
 					  GFP_KERNEL_ACCOUNT);
-		if (!new_xattr->name) {
-			kvfree(new_xattr);
+		if (!new_xattr->name)
 			break;
-		}
 
 		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
 		       XATTR_SECURITY_PREFIX_LEN);
 		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
 		       xattr->name, len);
 
-		simple_xattr_add(&info->xattrs, new_xattr);
+		if (simple_xattr_add(xattrs, new_xattr))
+			break;
+		retain_and_null_ptr(new_xattr);
 	}
 
 	if (xattr->name != NULL) {
@@ -4318,10 +4323,10 @@ static int shmem_initxattrs(struct inode *inode,
 			sbinfo->free_ispace += ispace;
 			raw_spin_unlock(&sbinfo->stat_lock);
 		}
-		simple_xattrs_free(&info->xattrs, NULL);
 		return -ENOMEM;
 	}
 
+	smp_store_release(&info->xattrs, no_free_ptr(xattrs));
 	return 0;
 }
 
@@ -4330,9 +4335,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
 				   const char *name, void *buffer, size_t size)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct simple_xattrs *xattrs;
+
+	xattrs = READ_ONCE(info->xattrs);
+	if (!xattrs)
+		return -ENODATA;
 
 	name = xattr_full_name(handler, name);
-	return simple_xattr_get(&info->xattrs, name, buffer, size);
+	return simple_xattr_get(xattrs, name, buffer, size);
 }
 
 static int shmem_xattr_handler_set(const struct xattr_handler *handler,
@@ -4343,10 +4353,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct simple_xattrs *xattrs;
 	struct simple_xattr *old_xattr;
 	size_t ispace = 0;
 
 	name = xattr_full_name(handler, name);
+
+	xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags);
+	if (IS_ERR_OR_NULL(xattrs))
+		return PTR_ERR(xattrs);
+
 	if (value && sbinfo->max_inodes) {
 		ispace = simple_xattr_space(name, size);
 		raw_spin_lock(&sbinfo->stat_lock);
@@ -4359,13 +4375,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
 			return -ENOSPC;
 	}
 
-	old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+	old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
 	if (!IS_ERR(old_xattr)) {
 		ispace = 0;
 		if (old_xattr && sbinfo->max_inodes)
 			ispace = simple_xattr_space(old_xattr->name,
 						    old_xattr->size);
-		simple_xattr_free(old_xattr);
+		simple_xattr_free_rcu(old_xattr);
 		old_xattr = NULL;
 		inode_set_ctime_current(inode);
 		inode_inc_iversion(inode);
@@ -4406,7 +4422,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = {
 static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
 	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
-	return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
+
+	return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs),
+				 buffer, size);
 }
 #endif /* CONFIG_TMPFS_XATTR */
 

-- 
2.47.3
Re: [PATCH 03/14] shmem: adapt to rhashtable-based simple_xattrs with lazy allocation
Posted by Jan Kara 2 weeks, 5 days ago
On Mon 16-02-26 14:31:59, Christian Brauner wrote:
> Adapt tmpfs/shmem to use the rhashtable-based xattr path and switch
> from an embedded struct to pointer-based lazy allocation.
> 
> Change shmem_inode_info.xattrs from embedded 'struct simple_xattrs' to
> a pointer 'struct simple_xattrs *', initialized to NULL. This avoids
> the rhashtable overhead for every tmpfs inode, which helps when a lot of
> inodes exist.
> 
> The xattr store is allocated on first use:
> 
> - shmem_initxattrs(): Allocates via simple_xattrs_alloc() when
>   security modules set initial xattrs during inode creation.
> 
> - shmem_xattr_handler_set(): Allocates on first setxattr, with a
>   short-circuit for removal when no xattrs are stored yet.
> 
> All read paths (shmem_xattr_handler_get, shmem_listxattr) check for
> NULL xattrs pointer and return -ENODATA or 0 respectively.
> 
> Replaced xattr entries are freed via simple_xattr_free_rcu() to allow
> concurrent RCU readers to finish.
> 
> shmem_evict_inode() conditionally frees the xattr store only when
> allocated.
> 
> Also change simple_xattr_add() from void to int to propagate
> rhashtable insertion failures. shmem_initxattrs() is the only caller.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/xattr.c               | 26 +++++++++++++-------------
>  include/linux/shmem_fs.h |  2 +-
>  include/linux/xattr.h    |  4 ++--
>  mm/shmem.c               | 44 +++++++++++++++++++++++++++++++-------------
>  4 files changed, 47 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 1d98ea459b7b..eb45ae0fd17f 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -1677,19 +1677,19 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
>   * of matching xattrs is wanted. Should only be called during inode
>   * initialization when a few distinct initial xattrs are supposed to be set.
>   */
> -void simple_xattr_add(struct simple_xattrs *xattrs,
> -		      struct simple_xattr *new_xattr)
> -{
> -	if (xattrs->use_rhashtable) {
> -		WARN_ON(rhashtable_insert_fast(&xattrs->ht,
> -					       &new_xattr->hash_node,
> -					       simple_xattr_params));
> -	} else {
> -		write_lock(&xattrs->lock);
> -		rb_add(&new_xattr->rb_node, &xattrs->rb_root,
> -		       rbtree_simple_xattr_less);
> -		write_unlock(&xattrs->lock);
> -	}
> +int simple_xattr_add(struct simple_xattrs *xattrs,
> +		     struct simple_xattr *new_xattr)
> +{
> +	if (xattrs->use_rhashtable)
> +		return rhashtable_insert_fast(&xattrs->ht,
> +					      &new_xattr->hash_node,
> +					      simple_xattr_params);
> +
> +	write_lock(&xattrs->lock);
> +	rb_add(&new_xattr->rb_node, &xattrs->rb_root,
> +	       rbtree_simple_xattr_less);
> +	write_unlock(&xattrs->lock);
> +	return 0;
>  }
>  
>  /**
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index e2069b3179c4..53d325409a8b 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -48,7 +48,7 @@ struct shmem_inode_info {
>  	};
>  	struct timespec64	i_crtime;	/* file creation time */
>  	struct shared_policy	policy;		/* NUMA memory alloc policy */
> -	struct simple_xattrs	xattrs;		/* list of xattrs */
> +	struct simple_xattrs	*xattrs;	/* list of xattrs */
>  	pgoff_t			fallocend;	/* highest fallocate endindex */
>  	unsigned int		fsflags;	/* for FS_IOC_[SG]ETFLAGS */
>  	atomic_t		stop_eviction;	/* hold when working on inode */
> diff --git a/include/linux/xattr.h b/include/linux/xattr.h
> index ee4fd40717a0..3063ecf0004d 100644
> --- a/include/linux/xattr.h
> +++ b/include/linux/xattr.h
> @@ -142,8 +142,8 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
>  				      size_t size, int flags);
>  ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
>  			  char *buffer, size_t size);
> -void simple_xattr_add(struct simple_xattrs *xattrs,
> -		      struct simple_xattr *new_xattr);
> +int simple_xattr_add(struct simple_xattrs *xattrs,
> +		     struct simple_xattr *new_xattr);
>  int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
>  
>  DEFINE_CLASS(simple_xattr,
> diff --git a/mm/shmem.c b/mm/shmem.c
> index fc8020ce2e9f..8761c9b4f1c5 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1426,7 +1426,10 @@ static void shmem_evict_inode(struct inode *inode)
>  		}
>  	}
>  
> -	simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
> +	if (info->xattrs) {
> +		simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL);
> +		kfree(info->xattrs);
> +	}
>  	shmem_free_inode(inode->i_sb, freed);
>  	WARN_ON(inode->i_blocks);
>  	clear_inode(inode);
> @@ -3118,7 +3121,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
>  		shmem_set_inode_flags(inode, info->fsflags, NULL);
>  	INIT_LIST_HEAD(&info->shrinklist);
>  	INIT_LIST_HEAD(&info->swaplist);
> -	simple_xattrs_init(&info->xattrs);
>  	cache_no_acl(inode);
>  	if (sbinfo->noswap)
>  		mapping_set_unevictable(inode->i_mapping);
> @@ -4270,10 +4272,13 @@ static int shmem_initxattrs(struct inode *inode,
>  	struct shmem_inode_info *info = SHMEM_I(inode);
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
>  	const struct xattr *xattr;
> -	struct simple_xattr *new_xattr;
>  	size_t ispace = 0;
>  	size_t len;
>  
> +	CLASS(simple_xattrs, xattrs)();
> +	if (IS_ERR(xattrs))
> +		return PTR_ERR(xattrs);
> +
>  	if (sbinfo->max_inodes) {
>  		for (xattr = xattr_array; xattr->name != NULL; xattr++) {
>  			ispace += simple_xattr_space(xattr->name,
> @@ -4292,24 +4297,24 @@ static int shmem_initxattrs(struct inode *inode,
>  	}
>  
>  	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
> -		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
> +		CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
>  		if (IS_ERR(new_xattr))
>  			break;
>  
>  		len = strlen(xattr->name) + 1;
>  		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
>  					  GFP_KERNEL_ACCOUNT);
> -		if (!new_xattr->name) {
> -			kvfree(new_xattr);
> +		if (!new_xattr->name)
>  			break;
> -		}
>  
>  		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
>  		       XATTR_SECURITY_PREFIX_LEN);
>  		memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
>  		       xattr->name, len);
>  
> -		simple_xattr_add(&info->xattrs, new_xattr);
> +		if (simple_xattr_add(xattrs, new_xattr))
> +			break;
> +		retain_and_null_ptr(new_xattr);
>  	}
>  
>  	if (xattr->name != NULL) {
> @@ -4318,10 +4323,10 @@ static int shmem_initxattrs(struct inode *inode,
>  			sbinfo->free_ispace += ispace;
>  			raw_spin_unlock(&sbinfo->stat_lock);
>  		}
> -		simple_xattrs_free(&info->xattrs, NULL);
>  		return -ENOMEM;
>  	}
>  
> +	smp_store_release(&info->xattrs, no_free_ptr(xattrs));
>  	return 0;
>  }
>  
> @@ -4330,9 +4335,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
>  				   const char *name, void *buffer, size_t size)
>  {
>  	struct shmem_inode_info *info = SHMEM_I(inode);
> +	struct simple_xattrs *xattrs;
> +
> +	xattrs = READ_ONCE(info->xattrs);
> +	if (!xattrs)
> +		return -ENODATA;
>  
>  	name = xattr_full_name(handler, name);
> -	return simple_xattr_get(&info->xattrs, name, buffer, size);
> +	return simple_xattr_get(xattrs, name, buffer, size);
>  }
>  
>  static int shmem_xattr_handler_set(const struct xattr_handler *handler,
> @@ -4343,10 +4353,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
>  {
>  	struct shmem_inode_info *info = SHMEM_I(inode);
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
> +	struct simple_xattrs *xattrs;
>  	struct simple_xattr *old_xattr;
>  	size_t ispace = 0;
>  
>  	name = xattr_full_name(handler, name);
> +
> +	xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags);
> +	if (IS_ERR_OR_NULL(xattrs))
> +		return PTR_ERR(xattrs);
> +
>  	if (value && sbinfo->max_inodes) {
>  		ispace = simple_xattr_space(name, size);
>  		raw_spin_lock(&sbinfo->stat_lock);
> @@ -4359,13 +4375,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
>  			return -ENOSPC;
>  	}
>  
> -	old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
> +	old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
>  	if (!IS_ERR(old_xattr)) {
>  		ispace = 0;
>  		if (old_xattr && sbinfo->max_inodes)
>  			ispace = simple_xattr_space(old_xattr->name,
>  						    old_xattr->size);
> -		simple_xattr_free(old_xattr);
> +		simple_xattr_free_rcu(old_xattr);
>  		old_xattr = NULL;
>  		inode_set_ctime_current(inode);
>  		inode_inc_iversion(inode);
> @@ -4406,7 +4422,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = {
>  static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
>  {
>  	struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
> -	return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
> +
> +	return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs),
> +				 buffer, size);
>  }
>  #endif /* CONFIG_TMPFS_XATTR */
>  
> 
> -- 
> 2.47.3
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR