[PATCH 06/14] xattr: remove rbtree-based simple_xattr infrastructure

Christian Brauner posted 14 patches 1 month ago
[PATCH 06/14] xattr: remove rbtree-based simple_xattr infrastructure
Posted by Christian Brauner 1 month ago
Now that all consumers (shmem, kernfs, pidfs) have been converted to
use the rhashtable-based simple_xattrs with pointer-based lazy
allocation, remove the legacy rbtree code path. The rhashtable
implementation provides O(1) average-case lookup with RCU-based lockless
reads, replacing the O(log n) rbtree with reader-writer spinlock
contention.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xattr.c            | 387 +++++++++++++-------------------------------------
 include/linux/xattr.h |  12 +-
 2 files changed, 103 insertions(+), 296 deletions(-)

diff --git a/fs/xattr.c b/fs/xattr.c
index eb45ae0fd17f..64803097e1dc 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1200,20 +1200,18 @@ void simple_xattr_free(struct simple_xattr *xattr)
 
 static void simple_xattr_rcu_free(struct rcu_head *head)
 {
-	struct simple_xattr *xattr;
+	struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
 
-	xattr = container_of(head, struct simple_xattr, rcu);
 	simple_xattr_free(xattr);
 }
 
 /**
- * simple_xattr_free_rcu - free an xattr object after an RCU grace period
+ * simple_xattr_free_rcu - free an xattr object with RCU delay
  * @xattr: the xattr object
  *
- * Schedule RCU-deferred freeing of an xattr entry. This is used by
- * rhashtable-based callers of simple_xattr_set() that replace or remove
- * an existing entry while concurrent RCU readers may still be accessing
- * it.
+ * Free the xattr object after an RCU grace period. This must be used when
+ * the xattr was removed from a data structure that concurrent RCU readers
+ * may still be traversing. Can handle @xattr being NULL.
  */
 void simple_xattr_free_rcu(struct simple_xattr *xattr)
 {
@@ -1254,43 +1252,6 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
 	return new_xattr;
 }
 
-/**
- * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
- * @key: xattr name
- * @node: current node
- *
- * Compare the xattr name with the xattr name attached to @node in the rbtree.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @node matches @key.
- */
-static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
-{
-	const char *xattr_name = key;
-	const struct simple_xattr *xattr;
-
-	xattr = rb_entry(node, struct simple_xattr, rb_node);
-	return strcmp(xattr->name, xattr_name);
-}
-
-/**
- * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @new_node matches the xattr attached to @node.
- */
-static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
-					const struct rb_node *node)
-{
-	struct simple_xattr *xattr;
-	xattr = rb_entry(new_node, struct simple_xattr, rb_node);
-	return rbtree_simple_xattr_cmp(xattr->name, node);
-}
-
 static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
 {
 	const char *name = data;
@@ -1336,41 +1297,19 @@ static const struct rhashtable_params simple_xattr_params = {
 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
 		     void *buffer, size_t size)
 {
-	struct simple_xattr *xattr = NULL;
+	struct simple_xattr *xattr;
 	int ret = -ENODATA;
 
-	if (xattrs->use_rhashtable) {
-		guard(rcu)();
-		xattr = rhashtable_lookup(&xattrs->ht, name,
-					  simple_xattr_params);
-		if (xattr) {
-			ret = xattr->size;
-			if (buffer) {
-				if (size < xattr->size)
-					ret = -ERANGE;
-				else
-					memcpy(buffer, xattr->value,
-					       xattr->size);
-			}
-		}
-	} else {
-		struct rb_node *rbp;
-
-		read_lock(&xattrs->lock);
-		rbp = rb_find(name, &xattrs->rb_root,
-			      rbtree_simple_xattr_cmp);
-		if (rbp) {
-			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-			ret = xattr->size;
-			if (buffer) {
-				if (size < xattr->size)
-					ret = -ERANGE;
-				else
-					memcpy(buffer, xattr->value,
-					       xattr->size);
-			}
+	guard(rcu)();
+	xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
+	if (xattr) {
+		ret = xattr->size;
+		if (buffer) {
+			if (size < xattr->size)
+				ret = -ERANGE;
+			else
+				memcpy(buffer, xattr->value, xattr->size);
 		}
-		read_unlock(&xattrs->lock);
 	}
 	return ret;
 }
@@ -1398,6 +1337,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
  * XATTR_REPLACE we fail as mentioned above.
  *
+ * Note: Callers must externally serialize writes. All current callers hold
+ * the inode lock for write operations. The lookup->replace/remove sequence
+ * is not atomic with respect to the rhashtable's per-bucket locking, but
+ * is safe because writes are serialized by the caller.
+ *
  * Return: On success, the removed or replaced xattr is returned, to be freed
  * by the caller; or NULL if none. On failure a negative error code is returned.
  */
@@ -1406,7 +1350,7 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 				      size_t size, int flags)
 {
 	struct simple_xattr *old_xattr = NULL;
-	int err = 0;
+	int err;
 
 	CLASS(simple_xattr, new_xattr)(value, size);
 	if (IS_ERR(new_xattr))
@@ -1418,119 +1362,52 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 			return ERR_PTR(-ENOMEM);
 	}
 
-	if (xattrs->use_rhashtable) {
-		/*
-		 * Lookup is safe without RCU here since writes are
-		 * serialized by the caller.
-		 */
-		old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
-						   simple_xattr_params);
-
-		if (old_xattr) {
-			/* Fail if XATTR_CREATE is requested and the xattr exists. */
-			if (flags & XATTR_CREATE)
-				return ERR_PTR(-EEXIST);
-
-			if (new_xattr) {
-				err = rhashtable_replace_fast(&xattrs->ht,
-							     &old_xattr->hash_node,
-							     &new_xattr->hash_node,
-							     simple_xattr_params);
-				if (err)
-					return ERR_PTR(err);
-			} else {
-				err = rhashtable_remove_fast(&xattrs->ht,
-							    &old_xattr->hash_node,
-							    simple_xattr_params);
-				if (err)
-					return ERR_PTR(err);
-			}
-		} else {
-			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
-			if (flags & XATTR_REPLACE)
-				return ERR_PTR(-ENODATA);
-
-			/*
-			 * If XATTR_CREATE or no flags are specified together
-			 * with a new value simply insert it.
-			 */
-			if (new_xattr) {
-				err = rhashtable_insert_fast(&xattrs->ht,
-							    &new_xattr->hash_node,
-							    simple_xattr_params);
-				if (err)
-					return ERR_PTR(err);
-			}
-
-			/*
-			 * If XATTR_CREATE or no flags are specified and
-			 * neither an old or new xattr exist then we don't
-			 * need to do anything.
-			 */
-		}
-	} else {
-		struct rb_node *parent = NULL, **rbp;
-		int ret;
-
-		write_lock(&xattrs->lock);
-		rbp = &xattrs->rb_root.rb_node;
-		while (*rbp) {
-			parent = *rbp;
-			ret = rbtree_simple_xattr_cmp(name, *rbp);
-			if (ret < 0)
-				rbp = &(*rbp)->rb_left;
-			else if (ret > 0)
-				rbp = &(*rbp)->rb_right;
-			else
-				old_xattr = rb_entry(*rbp, struct simple_xattr,
-						     rb_node);
-			if (old_xattr)
-				break;
-		}
+	/* Lookup is safe without RCU here since writes are serialized. */
+	old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
+					   simple_xattr_params);
 
-		if (old_xattr) {
-			/* Fail if XATTR_CREATE is requested and the xattr exists. */
-			if (flags & XATTR_CREATE) {
-				err = -EEXIST;
-				goto out_unlock;
-			}
+	if (old_xattr) {
+		/* Fail if XATTR_CREATE is requested and the xattr exists. */
+		if (flags & XATTR_CREATE)
+			return ERR_PTR(-EEXIST);
 
-			if (new_xattr)
-				rb_replace_node(&old_xattr->rb_node,
-						&new_xattr->rb_node,
-						&xattrs->rb_root);
-			else
-				rb_erase(&old_xattr->rb_node,
-					 &xattrs->rb_root);
+		if (new_xattr) {
+			err = rhashtable_replace_fast(&xattrs->ht,
+						      &old_xattr->hash_node,
+						      &new_xattr->hash_node,
+						      simple_xattr_params);
+			if (err)
+				return ERR_PTR(err);
 		} else {
-			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
-			if (flags & XATTR_REPLACE) {
-				err = -ENODATA;
-				goto out_unlock;
-			}
-
-			/*
-			 * If XATTR_CREATE or no flags are specified together
-			 * with a new value simply insert it.
-			 */
-			if (new_xattr) {
-				rb_link_node(&new_xattr->rb_node, parent, rbp);
-				rb_insert_color(&new_xattr->rb_node,
-						&xattrs->rb_root);
-			}
+			err = rhashtable_remove_fast(&xattrs->ht,
+						     &old_xattr->hash_node,
+						     simple_xattr_params);
+			if (err)
+				return ERR_PTR(err);
+		}
+	} else {
+		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
+		if (flags & XATTR_REPLACE)
+			return ERR_PTR(-ENODATA);
 
-			/*
-			 * If XATTR_CREATE or no flags are specified and
-			 * neither an old or new xattr exist then we don't
-			 * need to do anything.
-			 */
+		/*
+		 * If XATTR_CREATE or no flags are specified together with a
+		 * new value simply insert it.
+		 */
+		if (new_xattr) {
+			err = rhashtable_insert_fast(&xattrs->ht,
+						     &new_xattr->hash_node,
+						     simple_xattr_params);
+			if (err)
+				return ERR_PTR(err);
 		}
 
-out_unlock:
-		write_unlock(&xattrs->lock);
-		if (err)
-			return ERR_PTR(err);
+		/*
+		 * If XATTR_CREATE or no flags are specified and neither an
+		 * old or new xattr exist then we don't need to do anything.
+		 */
 	}
+
 	retain_and_null_ptr(new_xattr);
 	return old_xattr;
 }
@@ -1572,6 +1449,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 			  char *buffer, size_t size)
 {
 	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+	struct rhashtable_iter iter;
 	struct simple_xattr *xattr;
 	ssize_t remaining_size = size;
 	int err = 0;
@@ -1595,77 +1473,34 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 	if (!xattrs)
 		return size - remaining_size;
 
-	if (xattrs->use_rhashtable) {
-		struct rhashtable_iter iter;
-
-		rhashtable_walk_enter(&xattrs->ht, &iter);
-		rhashtable_walk_start(&iter);
-
-		while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
-			if (IS_ERR(xattr)) {
-				if (PTR_ERR(xattr) == -EAGAIN)
-					continue;
-				err = PTR_ERR(xattr);
-				break;
-			}
-
-			/* skip "trusted." attributes for unprivileged callers */
-			if (!trusted && xattr_is_trusted(xattr->name))
-				continue;
+	rhashtable_walk_enter(&xattrs->ht, &iter);
+	rhashtable_walk_start(&iter);
 
-			/* skip MAC labels; these are provided by LSM above */
-			if (xattr_is_maclabel(xattr->name))
+	while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
+		if (IS_ERR(xattr)) {
+			if (PTR_ERR(xattr) == -EAGAIN)
 				continue;
-
-			err = xattr_list_one(&buffer, &remaining_size,
-					     xattr->name);
-			if (err)
-				break;
+			err = PTR_ERR(xattr);
+			break;
 		}
 
-		rhashtable_walk_stop(&iter);
-		rhashtable_walk_exit(&iter);
-	} else {
-		struct rb_node *rbp;
-
-		read_lock(&xattrs->lock);
-		for (rbp = rb_first(&xattrs->rb_root); rbp;
-		     rbp = rb_next(rbp)) {
-			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-
-			/* skip "trusted." attributes for unprivileged callers */
-			if (!trusted && xattr_is_trusted(xattr->name))
-				continue;
+		/* skip "trusted." attributes for unprivileged callers */
+		if (!trusted && xattr_is_trusted(xattr->name))
+			continue;
 
-			/* skip MAC labels; these are provided by LSM above */
-			if (xattr_is_maclabel(xattr->name))
-				continue;
+		/* skip MAC labels; these are provided by LSM above */
+		if (xattr_is_maclabel(xattr->name))
+			continue;
 
-			err = xattr_list_one(&buffer, &remaining_size,
-					     xattr->name);
-			if (err)
-				break;
-		}
-		read_unlock(&xattrs->lock);
+		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
+		if (err)
+			break;
 	}
 
-	return err ? err : size - remaining_size;
-}
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
 
-/**
- * rbtree_simple_xattr_less - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- * Note that this function technically tolerates duplicate entries.
- *
- * Return: True if insertion point in the rbtree is found.
- */
-static bool rbtree_simple_xattr_less(struct rb_node *new_node,
-				     const struct rb_node *node)
-{
-	return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
+	return err ? err : size - remaining_size;
 }
 
 /**
@@ -1676,33 +1511,29 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
  * Add an xattr object to @xattrs. This assumes no replacement or removal
  * of matching xattrs is wanted. Should only be called during inode
  * initialization when a few distinct initial xattrs are supposed to be set.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
  */
 int simple_xattr_add(struct simple_xattrs *xattrs,
 		     struct simple_xattr *new_xattr)
 {
-	if (xattrs->use_rhashtable)
-		return rhashtable_insert_fast(&xattrs->ht,
-					      &new_xattr->hash_node,
-					      simple_xattr_params);
-
-	write_lock(&xattrs->lock);
-	rb_add(&new_xattr->rb_node, &xattrs->rb_root,
-	       rbtree_simple_xattr_less);
-	write_unlock(&xattrs->lock);
-	return 0;
+	return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
+				      simple_xattr_params);
 }
 
 /**
  * simple_xattrs_init - initialize new xattr header
  * @xattrs: header to initialize
  *
- * Initialize relevant fields of a an xattr header.
+ * Initialize the rhashtable used to store xattr objects.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
  */
-void simple_xattrs_init(struct simple_xattrs *xattrs)
+int simple_xattrs_init(struct simple_xattrs *xattrs)
 {
-	xattrs->use_rhashtable = false;
-	xattrs->rb_root = RB_ROOT;
-	rwlock_init(&xattrs->lock);
+	return rhashtable_init(&xattrs->ht, &simple_xattr_params);
 }
 
 /**
@@ -1710,7 +1541,8 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
  *
  * Dynamically allocate a simple_xattrs header and initialize the
  * underlying rhashtable. This is intended for consumers that want
- * rhashtable-based xattr storage.
+ * to lazily allocate xattr storage only when the first xattr is set,
+ * avoiding the per-inode rhashtable overhead when no xattrs are used.
  *
  * Return: On success a new simple_xattrs is returned. On failure an
  * ERR_PTR is returned.
@@ -1718,14 +1550,15 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
 struct simple_xattrs *simple_xattrs_alloc(void)
 {
 	struct simple_xattrs *xattrs __free(kfree) = NULL;
+	int ret;
 
 	xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
 	if (!xattrs)
 		return ERR_PTR(-ENOMEM);
 
-	xattrs->use_rhashtable = true;
-	if (rhashtable_init(&xattrs->ht, &simple_xattr_params))
-		return ERR_PTR(-ENOMEM);
+	ret = simple_xattrs_init(xattrs);
+	if (ret)
+		return ERR_PTR(ret);
 
 	return no_free_ptr(xattrs);
 }
@@ -1784,28 +1617,10 @@ static void simple_xattr_ht_free(void *ptr, void *arg)
  */
 void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
 {
+	might_sleep();
+
 	if (freed_space)
 		*freed_space = 0;
-
-	if (xattrs->use_rhashtable) {
-		rhashtable_free_and_destroy(&xattrs->ht,
-					    simple_xattr_ht_free, freed_space);
-	} else {
-		struct rb_node *rbp;
-
-		rbp = rb_first(&xattrs->rb_root);
-		while (rbp) {
-			struct simple_xattr *xattr;
-			struct rb_node *rbp_next;
-
-			rbp_next = rb_next(rbp);
-			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-			rb_erase(&xattr->rb_node, &xattrs->rb_root);
-			if (freed_space)
-				*freed_space += simple_xattr_space(xattr->name,
-								   xattr->size);
-			simple_xattr_free(xattr);
-			rbp = rbp_next;
-		}
-	}
+	rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
+				    freed_space);
 }
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 3063ecf0004d..f60357d9f938 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -107,18 +107,10 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
 }
 
 struct simple_xattrs {
-	bool use_rhashtable;
-	union {
-		struct {
-			struct rb_root rb_root;
-			rwlock_t lock;
-		};
-		struct rhashtable ht;
-	};
+	struct rhashtable ht;
 };
 
 struct simple_xattr {
-	struct rb_node rb_node;
 	struct rhash_head hash_node;
 	struct rcu_head rcu;
 	char *name;
@@ -126,7 +118,7 @@ struct simple_xattr {
 	char value[];
 };
 
-void simple_xattrs_init(struct simple_xattrs *xattrs);
+int simple_xattrs_init(struct simple_xattrs *xattrs);
 struct simple_xattrs *simple_xattrs_alloc(void);
 struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
 					       const void *value, int flags);

-- 
2.47.3
Re: [PATCH 06/14] xattr: remove rbtree-based simple_xattr infrastructure
Posted by Jan Kara 2 weeks, 5 days ago
On Mon 16-02-26 14:32:02, Christian Brauner wrote:
> Now that all consumers (shmem, kernfs, pidfs) have been converted to
> use the rhashtable-based simple_xattrs with pointer-based lazy
> allocation, remove the legacy rbtree code path. The rhashtable
> implementation provides O(1) average-case lookup with RCU-based lockless
> reads, replacing the O(log n) rbtree with reader-writer spinlock
> contention.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/xattr.c            | 387 +++++++++++++-------------------------------------
>  include/linux/xattr.h |  12 +-
>  2 files changed, 103 insertions(+), 296 deletions(-)
> 
> diff --git a/fs/xattr.c b/fs/xattr.c
> index eb45ae0fd17f..64803097e1dc 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -1200,20 +1200,18 @@ void simple_xattr_free(struct simple_xattr *xattr)
>  
>  static void simple_xattr_rcu_free(struct rcu_head *head)
>  {
> -	struct simple_xattr *xattr;
> +	struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
>  
> -	xattr = container_of(head, struct simple_xattr, rcu);
>  	simple_xattr_free(xattr);
>  }
>  
>  /**
> - * simple_xattr_free_rcu - free an xattr object after an RCU grace period
> + * simple_xattr_free_rcu - free an xattr object with RCU delay
>   * @xattr: the xattr object
>   *
> - * Schedule RCU-deferred freeing of an xattr entry. This is used by
> - * rhashtable-based callers of simple_xattr_set() that replace or remove
> - * an existing entry while concurrent RCU readers may still be accessing
> - * it.
> + * Free the xattr object after an RCU grace period. This must be used when
> + * the xattr was removed from a data structure that concurrent RCU readers
> + * may still be traversing. Can handle @xattr being NULL.
>   */
>  void simple_xattr_free_rcu(struct simple_xattr *xattr)
>  {
> @@ -1254,43 +1252,6 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
>  	return new_xattr;
>  }
>  
> -/**
> - * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
> - * @key: xattr name
> - * @node: current node
> - *
> - * Compare the xattr name with the xattr name attached to @node in the rbtree.
> - *
> - * Return: Negative value if continuing left, positive if continuing right, 0
> - * if the xattr attached to @node matches @key.
> - */
> -static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
> -{
> -	const char *xattr_name = key;
> -	const struct simple_xattr *xattr;
> -
> -	xattr = rb_entry(node, struct simple_xattr, rb_node);
> -	return strcmp(xattr->name, xattr_name);
> -}
> -
> -/**
> - * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
> - * @new_node: new node
> - * @node: current node
> - *
> - * Compare the xattr attached to @new_node with the xattr attached to @node.
> - *
> - * Return: Negative value if continuing left, positive if continuing right, 0
> - * if the xattr attached to @new_node matches the xattr attached to @node.
> - */
> -static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
> -					const struct rb_node *node)
> -{
> -	struct simple_xattr *xattr;
> -	xattr = rb_entry(new_node, struct simple_xattr, rb_node);
> -	return rbtree_simple_xattr_cmp(xattr->name, node);
> -}
> -
>  static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
>  {
>  	const char *name = data;
> @@ -1336,41 +1297,19 @@ static const struct rhashtable_params simple_xattr_params = {
>  int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
>  		     void *buffer, size_t size)
>  {
> -	struct simple_xattr *xattr = NULL;
> +	struct simple_xattr *xattr;
>  	int ret = -ENODATA;
>  
> -	if (xattrs->use_rhashtable) {
> -		guard(rcu)();
> -		xattr = rhashtable_lookup(&xattrs->ht, name,
> -					  simple_xattr_params);
> -		if (xattr) {
> -			ret = xattr->size;
> -			if (buffer) {
> -				if (size < xattr->size)
> -					ret = -ERANGE;
> -				else
> -					memcpy(buffer, xattr->value,
> -					       xattr->size);
> -			}
> -		}
> -	} else {
> -		struct rb_node *rbp;
> -
> -		read_lock(&xattrs->lock);
> -		rbp = rb_find(name, &xattrs->rb_root,
> -			      rbtree_simple_xattr_cmp);
> -		if (rbp) {
> -			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
> -			ret = xattr->size;
> -			if (buffer) {
> -				if (size < xattr->size)
> -					ret = -ERANGE;
> -				else
> -					memcpy(buffer, xattr->value,
> -					       xattr->size);
> -			}
> +	guard(rcu)();
> +	xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
> +	if (xattr) {
> +		ret = xattr->size;
> +		if (buffer) {
> +			if (size < xattr->size)
> +				ret = -ERANGE;
> +			else
> +				memcpy(buffer, xattr->value, xattr->size);
>  		}
> -		read_unlock(&xattrs->lock);
>  	}
>  	return ret;
>  }
> @@ -1398,6 +1337,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
>   * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
>   * XATTR_REPLACE we fail as mentioned above.
>   *
> + * Note: Callers must externally serialize writes. All current callers hold
> + * the inode lock for write operations. The lookup->replace/remove sequence
> + * is not atomic with respect to the rhashtable's per-bucket locking, but
> + * is safe because writes are serialized by the caller.
> + *
>   * Return: On success, the removed or replaced xattr is returned, to be freed
>   * by the caller; or NULL if none. On failure a negative error code is returned.
>   */
> @@ -1406,7 +1350,7 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
>  				      size_t size, int flags)
>  {
>  	struct simple_xattr *old_xattr = NULL;
> -	int err = 0;
> +	int err;
>  
>  	CLASS(simple_xattr, new_xattr)(value, size);
>  	if (IS_ERR(new_xattr))
> @@ -1418,119 +1362,52 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
>  			return ERR_PTR(-ENOMEM);
>  	}
>  
> -	if (xattrs->use_rhashtable) {
> -		/*
> -		 * Lookup is safe without RCU here since writes are
> -		 * serialized by the caller.
> -		 */
> -		old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
> -						   simple_xattr_params);
> -
> -		if (old_xattr) {
> -			/* Fail if XATTR_CREATE is requested and the xattr exists. */
> -			if (flags & XATTR_CREATE)
> -				return ERR_PTR(-EEXIST);
> -
> -			if (new_xattr) {
> -				err = rhashtable_replace_fast(&xattrs->ht,
> -							     &old_xattr->hash_node,
> -							     &new_xattr->hash_node,
> -							     simple_xattr_params);
> -				if (err)
> -					return ERR_PTR(err);
> -			} else {
> -				err = rhashtable_remove_fast(&xattrs->ht,
> -							    &old_xattr->hash_node,
> -							    simple_xattr_params);
> -				if (err)
> -					return ERR_PTR(err);
> -			}
> -		} else {
> -			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
> -			if (flags & XATTR_REPLACE)
> -				return ERR_PTR(-ENODATA);
> -
> -			/*
> -			 * If XATTR_CREATE or no flags are specified together
> -			 * with a new value simply insert it.
> -			 */
> -			if (new_xattr) {
> -				err = rhashtable_insert_fast(&xattrs->ht,
> -							    &new_xattr->hash_node,
> -							    simple_xattr_params);
> -				if (err)
> -					return ERR_PTR(err);
> -			}
> -
> -			/*
> -			 * If XATTR_CREATE or no flags are specified and
> -			 * neither an old or new xattr exist then we don't
> -			 * need to do anything.
> -			 */
> -		}
> -	} else {
> -		struct rb_node *parent = NULL, **rbp;
> -		int ret;
> -
> -		write_lock(&xattrs->lock);
> -		rbp = &xattrs->rb_root.rb_node;
> -		while (*rbp) {
> -			parent = *rbp;
> -			ret = rbtree_simple_xattr_cmp(name, *rbp);
> -			if (ret < 0)
> -				rbp = &(*rbp)->rb_left;
> -			else if (ret > 0)
> -				rbp = &(*rbp)->rb_right;
> -			else
> -				old_xattr = rb_entry(*rbp, struct simple_xattr,
> -						     rb_node);
> -			if (old_xattr)
> -				break;
> -		}
> +	/* Lookup is safe without RCU here since writes are serialized. */
> +	old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
> +					   simple_xattr_params);
>  
> -		if (old_xattr) {
> -			/* Fail if XATTR_CREATE is requested and the xattr exists. */
> -			if (flags & XATTR_CREATE) {
> -				err = -EEXIST;
> -				goto out_unlock;
> -			}
> +	if (old_xattr) {
> +		/* Fail if XATTR_CREATE is requested and the xattr exists. */
> +		if (flags & XATTR_CREATE)
> +			return ERR_PTR(-EEXIST);
>  
> -			if (new_xattr)
> -				rb_replace_node(&old_xattr->rb_node,
> -						&new_xattr->rb_node,
> -						&xattrs->rb_root);
> -			else
> -				rb_erase(&old_xattr->rb_node,
> -					 &xattrs->rb_root);
> +		if (new_xattr) {
> +			err = rhashtable_replace_fast(&xattrs->ht,
> +						      &old_xattr->hash_node,
> +						      &new_xattr->hash_node,
> +						      simple_xattr_params);
> +			if (err)
> +				return ERR_PTR(err);
>  		} else {
> -			/* Fail if XATTR_REPLACE is requested but no xattr is found. */
> -			if (flags & XATTR_REPLACE) {
> -				err = -ENODATA;
> -				goto out_unlock;
> -			}
> -
> -			/*
> -			 * If XATTR_CREATE or no flags are specified together
> -			 * with a new value simply insert it.
> -			 */
> -			if (new_xattr) {
> -				rb_link_node(&new_xattr->rb_node, parent, rbp);
> -				rb_insert_color(&new_xattr->rb_node,
> -						&xattrs->rb_root);
> -			}
> +			err = rhashtable_remove_fast(&xattrs->ht,
> +						     &old_xattr->hash_node,
> +						     simple_xattr_params);
> +			if (err)
> +				return ERR_PTR(err);
> +		}
> +	} else {
> +		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
> +		if (flags & XATTR_REPLACE)
> +			return ERR_PTR(-ENODATA);
>  
> -			/*
> -			 * If XATTR_CREATE or no flags are specified and
> -			 * neither an old or new xattr exist then we don't
> -			 * need to do anything.
> -			 */
> +		/*
> +		 * If XATTR_CREATE or no flags are specified together with a
> +		 * new value simply insert it.
> +		 */
> +		if (new_xattr) {
> +			err = rhashtable_insert_fast(&xattrs->ht,
> +						     &new_xattr->hash_node,
> +						     simple_xattr_params);
> +			if (err)
> +				return ERR_PTR(err);
>  		}
>  
> -out_unlock:
> -		write_unlock(&xattrs->lock);
> -		if (err)
> -			return ERR_PTR(err);
> +		/*
> +		 * If XATTR_CREATE or no flags are specified and neither an
> +		 * old or new xattr exist then we don't need to do anything.
> +		 */
>  	}
> +
>  	retain_and_null_ptr(new_xattr);
>  	return old_xattr;
>  }
> @@ -1572,6 +1449,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
>  			  char *buffer, size_t size)
>  {
>  	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
> +	struct rhashtable_iter iter;
>  	struct simple_xattr *xattr;
>  	ssize_t remaining_size = size;
>  	int err = 0;
> @@ -1595,77 +1473,34 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
>  	if (!xattrs)
>  		return size - remaining_size;
>  
> -	if (xattrs->use_rhashtable) {
> -		struct rhashtable_iter iter;
> -
> -		rhashtable_walk_enter(&xattrs->ht, &iter);
> -		rhashtable_walk_start(&iter);
> -
> -		while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
> -			if (IS_ERR(xattr)) {
> -				if (PTR_ERR(xattr) == -EAGAIN)
> -					continue;
> -				err = PTR_ERR(xattr);
> -				break;
> -			}
> -
> -			/* skip "trusted." attributes for unprivileged callers */
> -			if (!trusted && xattr_is_trusted(xattr->name))
> -				continue;
> +	rhashtable_walk_enter(&xattrs->ht, &iter);
> +	rhashtable_walk_start(&iter);
>  
> -			/* skip MAC labels; these are provided by LSM above */
> -			if (xattr_is_maclabel(xattr->name))
> +	while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
> +		if (IS_ERR(xattr)) {
> +			if (PTR_ERR(xattr) == -EAGAIN)
>  				continue;
> -
> -			err = xattr_list_one(&buffer, &remaining_size,
> -					     xattr->name);
> -			if (err)
> -				break;
> +			err = PTR_ERR(xattr);
> +			break;
>  		}
>  
> -		rhashtable_walk_stop(&iter);
> -		rhashtable_walk_exit(&iter);
> -	} else {
> -		struct rb_node *rbp;
> -
> -		read_lock(&xattrs->lock);
> -		for (rbp = rb_first(&xattrs->rb_root); rbp;
> -		     rbp = rb_next(rbp)) {
> -			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
> -
> -			/* skip "trusted." attributes for unprivileged callers */
> -			if (!trusted && xattr_is_trusted(xattr->name))
> -				continue;
> +		/* skip "trusted." attributes for unprivileged callers */
> +		if (!trusted && xattr_is_trusted(xattr->name))
> +			continue;
>  
> -			/* skip MAC labels; these are provided by LSM above */
> -			if (xattr_is_maclabel(xattr->name))
> -				continue;
> +		/* skip MAC labels; these are provided by LSM above */
> +		if (xattr_is_maclabel(xattr->name))
> +			continue;
>  
> -			err = xattr_list_one(&buffer, &remaining_size,
> -					     xattr->name);
> -			if (err)
> -				break;
> -		}
> -		read_unlock(&xattrs->lock);
> +		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
> +		if (err)
> +			break;
>  	}
>  
> -	return err ? err : size - remaining_size;
> -}
> +	rhashtable_walk_stop(&iter);
> +	rhashtable_walk_exit(&iter);
>  
> -/**
> - * rbtree_simple_xattr_less - compare two xattr rbtree nodes
> - * @new_node: new node
> - * @node: current node
> - *
> - * Compare the xattr attached to @new_node with the xattr attached to @node.
> - * Note that this function technically tolerates duplicate entries.
> - *
> - * Return: True if insertion point in the rbtree is found.
> - */
> -static bool rbtree_simple_xattr_less(struct rb_node *new_node,
> -				     const struct rb_node *node)
> -{
> -	return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
> +	return err ? err : size - remaining_size;
>  }
>  
>  /**
> @@ -1676,33 +1511,29 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
>   * Add an xattr object to @xattrs. This assumes no replacement or removal
>   * of matching xattrs is wanted. Should only be called during inode
>   * initialization when a few distinct initial xattrs are supposed to be set.
> + *
> + * Return: On success zero is returned. On failure a negative error code is
> + * returned.
>   */
>  int simple_xattr_add(struct simple_xattrs *xattrs,
>  		     struct simple_xattr *new_xattr)
>  {
> -	if (xattrs->use_rhashtable)
> -		return rhashtable_insert_fast(&xattrs->ht,
> -					      &new_xattr->hash_node,
> -					      simple_xattr_params);
> -
> -	write_lock(&xattrs->lock);
> -	rb_add(&new_xattr->rb_node, &xattrs->rb_root,
> -	       rbtree_simple_xattr_less);
> -	write_unlock(&xattrs->lock);
> -	return 0;
> +	return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
> +				      simple_xattr_params);
>  }
>  
>  /**
>   * simple_xattrs_init - initialize new xattr header
>   * @xattrs: header to initialize
>   *
> - * Initialize relevant fields of a an xattr header.
> + * Initialize the rhashtable used to store xattr objects.
> + *
> + * Return: On success zero is returned. On failure a negative error code is
> + * returned.
>   */
> -void simple_xattrs_init(struct simple_xattrs *xattrs)
> +int simple_xattrs_init(struct simple_xattrs *xattrs)
>  {
> -	xattrs->use_rhashtable = false;
> -	xattrs->rb_root = RB_ROOT;
> -	rwlock_init(&xattrs->lock);
> +	return rhashtable_init(&xattrs->ht, &simple_xattr_params);
>  }
>  
>  /**
> @@ -1710,7 +1541,8 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
>   *
>   * Dynamically allocate a simple_xattrs header and initialize the
>   * underlying rhashtable. This is intended for consumers that want
> - * rhashtable-based xattr storage.
> + * to lazily allocate xattr storage only when the first xattr is set,
> + * avoiding the per-inode rhashtable overhead when no xattrs are used.
>   *
>   * Return: On success a new simple_xattrs is returned. On failure an
>   * ERR_PTR is returned.
> @@ -1718,14 +1550,15 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
>  struct simple_xattrs *simple_xattrs_alloc(void)
>  {
>  	struct simple_xattrs *xattrs __free(kfree) = NULL;
> +	int ret;
>  
>  	xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
>  	if (!xattrs)
>  		return ERR_PTR(-ENOMEM);
>  
> -	xattrs->use_rhashtable = true;
> -	if (rhashtable_init(&xattrs->ht, &simple_xattr_params))
> -		return ERR_PTR(-ENOMEM);
> +	ret = simple_xattrs_init(xattrs);
> +	if (ret)
> +		return ERR_PTR(ret);
>  
>  	return no_free_ptr(xattrs);
>  }
> @@ -1784,28 +1617,10 @@ static void simple_xattr_ht_free(void *ptr, void *arg)
>   */
>  void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
>  {
> +	might_sleep();
> +
>  	if (freed_space)
>  		*freed_space = 0;
> -
> -	if (xattrs->use_rhashtable) {
> -		rhashtable_free_and_destroy(&xattrs->ht,
> -					    simple_xattr_ht_free, freed_space);
> -	} else {
> -		struct rb_node *rbp;
> -
> -		rbp = rb_first(&xattrs->rb_root);
> -		while (rbp) {
> -			struct simple_xattr *xattr;
> -			struct rb_node *rbp_next;
> -
> -			rbp_next = rb_next(rbp);
> -			xattr = rb_entry(rbp, struct simple_xattr, rb_node);
> -			rb_erase(&xattr->rb_node, &xattrs->rb_root);
> -			if (freed_space)
> -				*freed_space += simple_xattr_space(xattr->name,
> -								   xattr->size);
> -			simple_xattr_free(xattr);
> -			rbp = rbp_next;
> -		}
> -	}
> +	rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
> +				    freed_space);
>  }
> diff --git a/include/linux/xattr.h b/include/linux/xattr.h
> index 3063ecf0004d..f60357d9f938 100644
> --- a/include/linux/xattr.h
> +++ b/include/linux/xattr.h
> @@ -107,18 +107,10 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
>  }
>  
>  struct simple_xattrs {
> -	bool use_rhashtable;
> -	union {
> -		struct {
> -			struct rb_root rb_root;
> -			rwlock_t lock;
> -		};
> -		struct rhashtable ht;
> -	};
> +	struct rhashtable ht;
>  };
>  
>  struct simple_xattr {
> -	struct rb_node rb_node;
>  	struct rhash_head hash_node;
>  	struct rcu_head rcu;
>  	char *name;
> @@ -126,7 +118,7 @@ struct simple_xattr {
>  	char value[];
>  };
>  
> -void simple_xattrs_init(struct simple_xattrs *xattrs);
> +int simple_xattrs_init(struct simple_xattrs *xattrs);
>  struct simple_xattrs *simple_xattrs_alloc(void);
>  struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
>  					       const void *value, int flags);
> 
> -- 
> 2.47.3
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR