fs/xattr.c | 109 ++++++++++++++++++++++++++++++++---------- include/linux/xattr.h | 13 +++-- 2 files changed, 92 insertions(+), 30 deletions(-)
The patch was announced here:
https://lore.kernel.org/all/62188f37-f816-08e9-cdd5-8df23131746d@openvz.org/
"5) simple_xattrs: replace list to rb-tree
This significantly reduces the search time for existing entries."
It was compiled but was not tested yet.
---
Currently simple_xattr uses a list to store existing entries.
If the list grows, the presence check may be slow and potentially
lead to problems. Red-black tree should work more efficiently
in this situation.
This patch replaces list to rb_tree and switches simple_xattr_* calls
to its using.
Signed-off-by: Vasily Averin <vvs@openvz.org>
---
fs/xattr.c | 109 ++++++++++++++++++++++++++++++++----------
include/linux/xattr.h | 13 +++--
2 files changed, 92 insertions(+), 30 deletions(-)
diff --git a/fs/xattr.c b/fs/xattr.c
index 6401703707f2..672f2214fcfd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1021,6 +1021,60 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
return new_xattr;
}
+static struct simple_xattr *simple_xattr_rb_search(struct rb_root *root,
+ const char* name)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new)
+ {
+ struct simple_xattr *xattr;
+ int result;
+
+ xattr = container_of(*new, struct simple_xattr, node);
+ result = strcmp(xattr->name, name);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ return xattr;
+ }
+ return NULL;
+}
+
+static bool simple_xattr_rb_insert(struct rb_root *root,
+ struct simple_xattr *new_xattr)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct simple_xattr *xattr;
+ int result;
+
+ xattr = container_of(*new, struct simple_xattr, node);
+ result = strcmp(xattr->name, new_xattr->name);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ return false;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&new_xattr->node, parent, new);
+ rb_insert_color(&new_xattr->node, root);
+
+ return true;
+}
+
/*
* xattr GET operation for in-memory/pseudo filesystems
*/
@@ -1031,10 +1085,8 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
int ret = -ENODATA;
spin_lock(&xattrs->lock);
- list_for_each_entry(xattr, &xattrs->head, list) {
- if (strcmp(name, xattr->name))
- continue;
-
+ xattr = simple_xattr_rb_search(&xattrs->rb_root, name);
+ if (xattr) {
ret = xattr->size;
if (buffer) {
if (size < xattr->size)
@@ -1042,7 +1094,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
else
memcpy(buffer, xattr->value, xattr->size);
}
- break;
}
spin_unlock(&xattrs->lock);
return ret;
@@ -1067,7 +1118,7 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
const void *value, size_t size, int flags,
ssize_t *removed_size)
{
- struct simple_xattr *xattr;
+ struct simple_xattr *xattr = NULL;
struct simple_xattr *new_xattr = NULL;
int err = 0;
@@ -1088,31 +1139,36 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
}
spin_lock(&xattrs->lock);
- list_for_each_entry(xattr, &xattrs->head, list) {
- if (!strcmp(name, xattr->name)) {
- if (flags & XATTR_CREATE) {
- xattr = new_xattr;
- err = -EEXIST;
- } else if (new_xattr) {
- list_replace(&xattr->list, &new_xattr->list);
+ if ((flags & XATTR_CREATE) && new_xattr) {
+ /* create new */
+ if (!simple_xattr_rb_insert(&xattrs->rb_root, new_xattr)) {
+ /* already exist */
+ xattr = new_xattr;
+ err = -EEXIST;
+ }
+ } else {
+ /* replace or remove */
+ xattr = simple_xattr_rb_search(&xattrs->rb_root, name);
+ if (xattr) {
+ /* found */
+ if (!new_xattr) {
+ /* remove existing */
+ rb_erase(&xattr->node, &xattrs->rb_root);
if (removed_size)
*removed_size = xattr->size;
} else {
- list_del(&xattr->list);
+ /* replace existing */
+ rb_replace_node(&xattr->node, &new_xattr->node,
+ &xattrs->rb_root);
if (removed_size)
*removed_size = xattr->size;
}
- goto out;
+ } else if (new_xattr) {
+ /* not found, incorrect replace */
+ xattr = new_xattr;
+ err = -ENODATA;
}
}
- if (flags & XATTR_REPLACE) {
- xattr = new_xattr;
- err = -ENODATA;
- } else {
- list_add(&new_xattr->list, &xattrs->head);
- xattr = NULL;
- }
-out:
spin_unlock(&xattrs->lock);
if (xattr) {
kfree(xattr->name);
@@ -1149,6 +1205,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
{
bool trusted = capable(CAP_SYS_ADMIN);
struct simple_xattr *xattr;
+ struct rb_node *node;
ssize_t remaining_size = size;
int err = 0;
@@ -1170,7 +1227,9 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
#endif
spin_lock(&xattrs->lock);
- list_for_each_entry(xattr, &xattrs->head, list) {
+ for (node = rb_first(&xattrs->rb_root); node; node = rb_next(node)) {
+ xattr = container_of(node, struct simple_xattr, node);
+
/* skip "trusted." attributes for unprivileged callers */
if (!trusted && xattr_is_trusted(xattr->name))
continue;
@@ -1191,6 +1250,6 @@ void simple_xattr_list_add(struct simple_xattrs *xattrs,
struct simple_xattr *new_xattr)
{
spin_lock(&xattrs->lock);
- list_add(&new_xattr->list, &xattrs->head);
+ simple_xattr_rb_insert(&xattrs->rb_root, new_xattr);
spin_unlock(&xattrs->lock);
}
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 979a9d3e5bfb..bbe81cfb7a4d 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -80,12 +80,12 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
}
struct simple_xattrs {
- struct list_head head;
+ struct rb_root rb_root;
spinlock_t lock;
};
struct simple_xattr {
- struct list_head list;
+ struct rb_node node;
char *name;
size_t size;
char value[];
@@ -96,7 +96,7 @@ struct simple_xattr {
*/
static inline void simple_xattrs_init(struct simple_xattrs *xattrs)
{
- INIT_LIST_HEAD(&xattrs->head);
+ xattrs->rb_root = RB_ROOT;
spin_lock_init(&xattrs->lock);
}
@@ -105,9 +105,12 @@ static inline void simple_xattrs_init(struct simple_xattrs *xattrs)
*/
static inline void simple_xattrs_free(struct simple_xattrs *xattrs)
{
- struct simple_xattr *xattr, *node;
+ struct simple_xattr *xattr;
+ struct rb_node *node;
- list_for_each_entry_safe(xattr, node, &xattrs->head, list) {
+ while ((node = rb_first(&xattrs->rb_root))) {
+ rb_erase(node, &xattrs->rb_root);
+ xattr = container_of(node, struct simple_xattr, node);
kfree(xattr->name);
kvfree(xattr);
}
--
2.34.1
On Thu, Aug 18, 2022 at 12:12:30PM +0300, Vasily Averin wrote:
> The patch was announced here:
> https://lore.kernel.org/all/62188f37-f816-08e9-cdd5-8df23131746d@openvz.org/
> "5) simple_xattrs: replace list to rb-tree
> This significantly reduces the search time for existing entries."
>
> It was compiled but was not tested yet.
> ---
> Currently simple_xattr uses a list to store existing entries.
> If the list grows, the presence check may be slow and potentially
> lead to problems. Red-black tree should work more efficiently
> in this situation.
>
> This patch replaces list to rb_tree and switches simple_xattr_* calls
> to its using.
>
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
I think the background for the performance issues in the commit message
would be helpful and I have a few comments. Also, trying to test whether the
lockups are gone due to the rbtree switch would be +1.
This will likely conflict with some acl/xattr changes I have lined up so
if we decide to proceed I wouldn't mind dealing with this series if
there are no objections.
> fs/xattr.c | 109 ++++++++++++++++++++++++++++++++----------
> include/linux/xattr.h | 13 +++--
> 2 files changed, 92 insertions(+), 30 deletions(-)
>
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 6401703707f2..672f2214fcfd 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -1021,6 +1021,60 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
> return new_xattr;
> }
>
> +static struct simple_xattr *simple_xattr_rb_search(struct rb_root *root,
> + const char* name)
> +{
> + struct rb_node **new = &(root->rb_node), *parent = NULL;
I'd suggest to not name this "new" but rather just "cur" or "node".
> +
> + /* Figure out where to put new node */
> + while (*new)
> + {
nit: that "{" should be on the same line as the while
> + struct simple_xattr *xattr;
> + int result;
> +
> + xattr = container_of(*new, struct simple_xattr, node);
> + result = strcmp(xattr->name, name);
> +
> + parent = *new;
That variable and assignment seems unnecessary?
> + if (result < 0)
> + new = &((*new)->rb_left);
> + else if (result > 0)
> + new = &((*new)->rb_right);
> + else
> + return xattr;
> + }
> + return NULL;
> +}
> +
> +static bool simple_xattr_rb_insert(struct rb_root *root,
> + struct simple_xattr *new_xattr)
> +{
> + struct rb_node **new = &(root->rb_node), *parent = NULL;
> +
> + /* Figure out where to put new node */
> + while (*new) {
> + struct simple_xattr *xattr;
> + int result;
> +
> + xattr = container_of(*new, struct simple_xattr, node);
> + result = strcmp(xattr->name, new_xattr->name);
> +
> + parent = *new;
> + if (result < 0)
> + new = &((*new)->rb_left);
> + else if (result > 0)
> + new = &((*new)->rb_right);
> + else
> + return false;
> + }
> +
> + /* Add new node and rebalance tree. */
> + rb_link_node(&new_xattr->node, parent, new);
> + rb_insert_color(&new_xattr->node, root);
> +
> + return true;
> +}
> +
> /*
> * xattr GET operation for in-memory/pseudo filesystems
> */
> @@ -1031,10 +1085,8 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
> int ret = -ENODATA;
>
> spin_lock(&xattrs->lock);
> - list_for_each_entry(xattr, &xattrs->head, list) {
> - if (strcmp(name, xattr->name))
> - continue;
> -
> + xattr = simple_xattr_rb_search(&xattrs->rb_root, name);
> + if (xattr) {
> ret = xattr->size;
> if (buffer) {
> if (size < xattr->size)
> @@ -1042,7 +1094,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
> else
> memcpy(buffer, xattr->value, xattr->size);
> }
> - break;
> }
> spin_unlock(&xattrs->lock);
> return ret;
> @@ -1067,7 +1118,7 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
> const void *value, size_t size, int flags,
> ssize_t *removed_size)
> {
> - struct simple_xattr *xattr;
> + struct simple_xattr *xattr = NULL;
> struct simple_xattr *new_xattr = NULL;
> int err = 0;
>
> @@ -1088,31 +1139,36 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
> }
>
> spin_lock(&xattrs->lock);
> - list_for_each_entry(xattr, &xattrs->head, list) {
> - if (!strcmp(name, xattr->name)) {
> - if (flags & XATTR_CREATE) {
> - xattr = new_xattr;
> - err = -EEXIST;
> - } else if (new_xattr) {
> - list_replace(&xattr->list, &new_xattr->list);
> + if ((flags & XATTR_CREATE) && new_xattr) {
> + /* create new */
> + if (!simple_xattr_rb_insert(&xattrs->rb_root, new_xattr)) {
> + /* already exist */
> + xattr = new_xattr;
> + err = -EEXIST;
> + }
> + } else {
> + /* replace or remove */
> + xattr = simple_xattr_rb_search(&xattrs->rb_root, name);
> + if (xattr) {
> + /* found */
> + if (!new_xattr) {
> + /* remove existing */
> + rb_erase(&xattr->node, &xattrs->rb_root);
> if (removed_size)
> *removed_size = xattr->size;
> } else {
> - list_del(&xattr->list);
> + /* replace existing */
> + rb_replace_node(&xattr->node, &new_xattr->node,
> + &xattrs->rb_root);
> if (removed_size)
> *removed_size = xattr->size;
> }
> - goto out;
> + } else if (new_xattr) {
> + /* not found, incorrect replace */
> + xattr = new_xattr;
> + err = -ENODATA;
> }
> }
> - if (flags & XATTR_REPLACE) {
I think keeping this rather close to the original code might be nicer.
I find the code more difficult to follow afterwards. So how about
(COMPLETELY UNTESTED) sm like:
@@ -1077,30 +1139,40 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
}
spin_lock(&xattrs->lock);
- list_for_each_entry(xattr, &xattrs->head, list) {
- if (!strcmp(name, xattr->name)) {
- if (flags & XATTR_CREATE) {
- xattr = new_xattr;
- err = -EEXIST;
- } else if (new_xattr) {
- list_replace(&xattr->list, &new_xattr->list);
- if (removed_size)
- *removed_size = xattr->size;
- } else {
- list_del(&xattr->list);
- if (removed_size)
- *removed_size = xattr->size;
- }
- goto out;
+ /* Find any matching xattr by name. */
+ xattr = simple_xattr_rb_search(&xattrs->rb_root, name);
+ if (xattr) {
+ if (flags & XATTR_CREATE) {
+ /* Creating request but the xattr already existed. */
+ xattr = new_xattr;
+ err = -EEXIST;
+ } else if (new_xattr) {
+ /* Replace the existing xattr. */
+ rb_replace_node(&xattr->node, &new_xattr->node,
+ &xattrs->rb_root);
+ if (removed_size)
+ *removed_size = xattr->size;
+ } else {
+ /* No new xattr specified so wipe the existing xattr. */
+ rb_erase(&xattr->node, &xattrs->rb_root);
+ if (removed_size)
+ *removed_size = xattr->size;
}
+ goto out;
}
+
if (flags & XATTR_REPLACE) {
+ /* There's no matching xattr so fail on replace. */
xattr = new_xattr;
err = -ENODATA;
} else {
- list_add(&new_xattr->list, &xattrs->head);
- xattr = NULL;
+ /*
+ * We're holding the lock and verified that there's no
+ * pre-existing xattr so this should always succeed.
+ */
+ WARN_ON(!simple_xattr_rb_insert(&xattrs->rb_root, new_xattr))
}
+
out:
spin_unlock(&xattrs->lock);
if (xattr) {
> - xattr = new_xattr;
> - err = -ENODATA;
> - } else {
> - list_add(&new_xattr->list, &xattrs->head);
> - xattr = NULL;
> - }
> -out:
> spin_unlock(&xattrs->lock);
> if (xattr) {
> kfree(xattr->name);
> @@ -1149,6 +1205,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
> {
> bool trusted = capable(CAP_SYS_ADMIN);
> struct simple_xattr *xattr;
> + struct rb_node *node;
> ssize_t remaining_size = size;
> int err = 0;
>
> @@ -1170,7 +1227,9 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
> #endif
>
> spin_lock(&xattrs->lock);
> - list_for_each_entry(xattr, &xattrs->head, list) {
> + for (node = rb_first(&xattrs->rb_root); node; node = rb_next(node)) {
> + xattr = container_of(node, struct simple_xattr, node);
> +
> /* skip "trusted." attributes for unprivileged callers */
> if (!trusted && xattr_is_trusted(xattr->name))
> continue;
> @@ -1191,6 +1250,6 @@ void simple_xattr_list_add(struct simple_xattrs *xattrs,
> struct simple_xattr *new_xattr)
> {
> spin_lock(&xattrs->lock);
> - list_add(&new_xattr->list, &xattrs->head);
> + simple_xattr_rb_insert(&xattrs->rb_root, new_xattr);
> spin_unlock(&xattrs->lock);
> }
> diff --git a/include/linux/xattr.h b/include/linux/xattr.h
> index 979a9d3e5bfb..bbe81cfb7a4d 100644
> --- a/include/linux/xattr.h
> +++ b/include/linux/xattr.h
> @@ -80,12 +80,12 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
> }
>
> struct simple_xattrs {
> - struct list_head head;
> + struct rb_root rb_root;
> spinlock_t lock;
> };
>
> struct simple_xattr {
> - struct list_head list;
> + struct rb_node node;
> char *name;
> size_t size;
> char value[];
> @@ -96,7 +96,7 @@ struct simple_xattr {
> */
> static inline void simple_xattrs_init(struct simple_xattrs *xattrs)
> {
> - INIT_LIST_HEAD(&xattrs->head);
> + xattrs->rb_root = RB_ROOT;
> spin_lock_init(&xattrs->lock);
> }
>
> @@ -105,9 +105,12 @@ static inline void simple_xattrs_init(struct simple_xattrs *xattrs)
> */
> static inline void simple_xattrs_free(struct simple_xattrs *xattrs)
> {
> - struct simple_xattr *xattr, *node;
> + struct simple_xattr *xattr;
> + struct rb_node *node;
>
> - list_for_each_entry_safe(xattr, node, &xattrs->head, list) {
> + while ((node = rb_first(&xattrs->rb_root))) {
> + rb_erase(node, &xattrs->rb_root);
> + xattr = container_of(node, struct simple_xattr, node);
> kfree(xattr->name);
> kvfree(xattr);
> }
> --
> 2.34.1
>
>
On 8/18/22 16:19, Christian Brauner wrote:
> On Thu, Aug 18, 2022 at 12:12:30PM +0300, Vasily Averin wrote:
>> The patch was announced here:
>> https://lore.kernel.org/all/62188f37-f816-08e9-cdd5-8df23131746d@openvz.org/
>> "5) simple_xattrs: replace list to rb-tree
>> This significantly reduces the search time for existing entries."
>>
>> It was compiled but was not tested yet.
>> ---
>> Currently simple_xattr uses a list to store existing entries.
>> If the list grows, the presence check may be slow and potentially
>> lead to problems. Red-black tree should work more efficiently
>> in this situation.
>>
>> This patch replaces list to rb_tree and switches simple_xattr_* calls
>> to its using.
>>
>> Signed-off-by: Vasily Averin <vvs@openvz.org>
>> ---
>
> I think the background for the performance issues in the commit message
> would be helpful and I have a few comments. Also, trying to test whether the
> lockups are gone due to the rbtree switch would be +1.
>
> This will likely conflict with some acl/xattr changes I have lined up so
> if we decide to proceed I wouldn't mind dealing with this series if
> there are no objections.
I would be very grateful if you pick up this issue.
Unfortunately I do not have enough time to process it properly.
I'm agree with all your remarks, however I would like to comment following one.
> I think keeping this rather close to the original code might be nicer.
> I find the code more difficult to follow afterwards. So how about
> (COMPLETELY UNTESTED) sm like:
I had this idea too, however it have one disadvantage in rb-tree scenario:
in the most typical case, when adding a new entry, we run through the tree twice:
first in simple_xattr_rb_search() and then in simple_xattr_rb_insert().
In my patch version we run through the rb-tree once only.
However now I think we can save closest neighbour on "search" stage,
and use it on "insert" stage. This should be safe because both functions
are called under the same spinlock.
> @@ -1077,30 +1139,40 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
> }
>
> spin_lock(&xattrs->lock);
> - list_for_each_entry(xattr, &xattrs->head, list) {
> - if (!strcmp(name, xattr->name)) {
> - if (flags & XATTR_CREATE) {
> - xattr = new_xattr;
> - err = -EEXIST;
> - } else if (new_xattr) {
> - list_replace(&xattr->list, &new_xattr->list);
> - if (removed_size)
> - *removed_size = xattr->size;
> - } else {
> - list_del(&xattr->list);
> - if (removed_size)
> - *removed_size = xattr->size;
> - }
> - goto out;
> + /* Find any matching xattr by name. */
> + xattr = simple_xattr_rb_search(&xattrs->rb_root, name);
> + if (xattr) {
> + if (flags & XATTR_CREATE) {
> + /* Creating request but the xattr already existed. */
> + xattr = new_xattr;
> + err = -EEXIST;
> + } else if (new_xattr) {
> + /* Replace the existing xattr. */
> + rb_replace_node(&xattr->node, &new_xattr->node,
> + &xattrs->rb_root);
> + if (removed_size)
> + *removed_size = xattr->size;
> + } else {
> + /* No new xattr specified so wipe the existing xattr. */
> + rb_erase(&xattr->node, &xattrs->rb_root);
> + if (removed_size)
> + *removed_size = xattr->size;
> }
> + goto out;
> }
> +
> if (flags & XATTR_REPLACE) {
> + /* There's no matching xattr so fail on replace. */
> xattr = new_xattr;
> err = -ENODATA;
> } else {
> - list_add(&new_xattr->list, &xattrs->head);
> - xattr = NULL;
> + /*
> + * We're holding the lock and verified that there's no
> + * pre-existing xattr so this should always succeed.
> + */
> + WARN_ON(!simple_xattr_rb_insert(&xattrs->rb_root, new_xattr))
> }
> +
> out:
> spin_unlock(&xattrs->lock);
> if (xattr) {
>
>
>> - xattr = new_xattr;
>> - err = -ENODATA;
>> - } else {
>> - list_add(&new_xattr->list, &xattrs->head);
>> - xattr = NULL;
>> - }
>> -out:
>> spin_unlock(&xattrs->lock);
>> if (xattr) {
>> kfree(xattr->name);
© 2016 - 2026 Red Hat, Inc.