[PATCH v5 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()

André Almeida posted 9 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH v5 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by André Almeida 1 month, 2 weeks ago
To add overlayfs support casefold layers, create a new function
ovl_casefold(), to be able to do case-insensitive strncmp().

ovl_casefold() allocates a new buffer and stores the casefolded version
of the string on it. If the allocation or the casefold operation fails,
fallback to use the original string.

The case-insentive name is then used in the rb-tree search/insertion
operation. If the name is found in the rb-tree, the name can be
discarded and the buffer is freed. If the name isn't found, it's then
stored at struct ovl_cache_entry to be used later.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
Changes from v4:
 - Move the consumer/free buffer logic out to the caller
 - s/aux/c_name

Changes from v3:
 - Improve commit message text
 - s/OVL_NAME_LEN/NAME_MAX
 - drop #ifdef in favor of if(IS_ENABLED)
 - use new helper sb_encoding
 - merged patch "Store casefold name..." and "Create ovl_casefold()..."
 - Guard all the casefolding inside of IS_ENABLED(UNICODE)

Changes from v2:
- Refactor the patch to do a single kmalloc() per rb_tree operation
- Instead of casefolding the cache entry name everytime per strncmp(),
  casefold it once and reuse it for every strncmp().
---
 fs/overlayfs/readdir.c | 115 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 97 insertions(+), 18 deletions(-)

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index b65cdfce31ce27172d28d879559f1008b9c87320..803ac6a7516d0156ae7793ee1ff884dbbf2e20b0 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -27,6 +27,8 @@ struct ovl_cache_entry {
 	bool is_upper;
 	bool is_whiteout;
 	bool check_xwhiteout;
+	const char *cf_name;
+	int cf_len;
 	char name[];
 };
 
@@ -45,6 +47,7 @@ struct ovl_readdir_data {
 	struct list_head *list;
 	struct list_head middle;
 	struct ovl_cache_entry *first_maybe_whiteout;
+	struct unicode_map *map;
 	int count;
 	int err;
 	bool is_upper;
@@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
 	return rb_entry(n, struct ovl_cache_entry, node);
 }
 
+static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
+{
+	const struct qstr qstr = { .name = str, .len = len };
+	int cf_len;
+
+	if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
+		return 0;
+
+	*dst = kmalloc(NAME_MAX, GFP_KERNEL);
+
+	if (dst) {
+		cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
+
+		if (cf_len > 0)
+			return cf_len;
+	}
+
+	kfree(*dst);
+	return 0;
+}
+
 static bool ovl_cache_entry_find_link(const char *name, int len,
 				      struct rb_node ***link,
 				      struct rb_node **parent)
@@ -79,7 +103,7 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
 
 		*parent = *newp;
 		tmp = ovl_cache_entry_from_node(*newp);
-		cmp = strncmp(name, tmp->name, len);
+		cmp = strncmp(name, tmp->cf_name, tmp->cf_len);
 		if (cmp > 0)
 			newp = &tmp->node.rb_right;
 		else if (cmp < 0 || len < tmp->len)
@@ -101,7 +125,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
 	while (node) {
 		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
 
-		cmp = strncmp(name, p->name, len);
+		cmp = strncmp(name, p->cf_name, p->cf_len);
 		if (cmp > 0)
 			node = p->node.rb_right;
 		else if (cmp < 0 || len < p->len)
@@ -145,13 +169,16 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
 
 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 						   const char *name, int len,
+						   const char *cf_name, int cf_len,
 						   u64 ino, unsigned int d_type)
 {
 	struct ovl_cache_entry *p;
 
 	p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL);
-	if (!p)
+	if (!p) {
+		kfree(cf_name);
 		return NULL;
+	}
 
 	memcpy(p->name, name, len);
 	p->name[len] = '\0';
@@ -167,6 +194,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 	/* Defer check for overlay.whiteout to ovl_iterate() */
 	p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
 
+	if (cf_name && cf_name != name) {
+		p->cf_name = cf_name;
+		p->cf_len = cf_len;
+	} else {
+		p->cf_name = p->name;
+		p->cf_len = len;
+	}
+
 	if (d_type == DT_CHR) {
 		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
 		rdd->first_maybe_whiteout = p;
@@ -174,48 +209,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 	return p;
 }
 
-static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
-				  const char *name, int len, u64 ino,
+/* Return 0 for found, 1 for added, <0 for error */
+static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+				  const char *name, int len,
+				  const char *cf_name, int cf_len,
+				  u64 ino,
 				  unsigned int d_type)
 {
 	struct rb_node **newp = &rdd->root->rb_node;
 	struct rb_node *parent = NULL;
 	struct ovl_cache_entry *p;
 
-	if (ovl_cache_entry_find_link(name, len, &newp, &parent))
-		return true;
+	if (ovl_cache_entry_find_link(cf_name, cf_len, &newp, &parent))
+		return 0;
 
-	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
+	p = ovl_cache_entry_new(rdd, name, len, cf_name, cf_len, ino, d_type);
 	if (p == NULL) {
 		rdd->err = -ENOMEM;
-		return false;
+		return -ENOMEM;
 	}
 
 	list_add_tail(&p->l_node, rdd->list);
 	rb_link_node(&p->node, parent, newp);
 	rb_insert_color(&p->node, rdd->root);
 
-	return true;
+	return 1;
 }
 
-static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
+/* Return 0 for found, 1 for added, <0 for error */
+static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
 			   const char *name, int namelen,
+			   const char *cf_name, int cf_len,
 			   loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct ovl_cache_entry *p;
 
-	p = ovl_cache_entry_find(rdd->root, name, namelen);
+	p = ovl_cache_entry_find(rdd->root, cf_name, cf_len);
 	if (p) {
 		list_move_tail(&p->l_node, &rdd->middle);
+		return 0;
 	} else {
-		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+		p = ovl_cache_entry_new(rdd, name, namelen, cf_name, cf_len,
+					ino, d_type);
 		if (p == NULL)
 			rdd->err = -ENOMEM;
 		else
 			list_add_tail(&p->l_node, &rdd->middle);
 	}
 
-	return rdd->err == 0;
+	return rdd->err ?: 1;
 }
 
 void ovl_cache_free(struct list_head *list)
@@ -223,8 +265,11 @@ void ovl_cache_free(struct list_head *list)
 	struct ovl_cache_entry *p;
 	struct ovl_cache_entry *n;
 
-	list_for_each_entry_safe(p, n, list, l_node)
+	list_for_each_entry_safe(p, n, list, l_node) {
+		if (p->cf_name != p->name)
+			kfree(p->cf_name);
 		kfree(p);
+	}
 
 	INIT_LIST_HEAD(list);
 }
@@ -260,12 +305,38 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
 {
 	struct ovl_readdir_data *rdd =
 		container_of(ctx, struct ovl_readdir_data, ctx);
+	struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
+	char *cf_name = NULL;
+	int c_len = 0;
+	int ret;
+
+	const char *c_name = NULL;
+
+	if (ofs->casefold)
+		c_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
+
+	if (c_len <= 0) {
+		c_name = name;
+		c_len = namelen;
+	} else {
+		c_name = cf_name;
+	}
 
 	rdd->count++;
 	if (!rdd->is_lowest)
-		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
+		ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
 	else
-		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
+		ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
+
+	/*
+	 * If ret == 1, that means that c_name is being used as part of struct
+	 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
+	 * c_name was found in the rb-tree so we can free it here.
+	 */
+	if (ret != 1 && c_name != name)
+		kfree(c_name);
+
+	return ret >= 0;
 }
 
 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
@@ -357,12 +428,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
 		.list = list,
 		.root = root,
 		.is_lowest = false,
+		.map = NULL,
 	};
 	int idx, next;
 	const struct ovl_layer *layer;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 
 	for (idx = 0; idx != -1; idx = next) {
 		next = ovl_path_next(idx, dentry, &realpath, &layer);
+
+		if (ofs->casefold)
+			rdd.map = sb_encoding(realpath.dentry->d_sb);
+
 		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
 		rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
 					ovl_dentry_has_xwhiteouts(dentry);
@@ -555,7 +632,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
 		container_of(ctx, struct ovl_readdir_data, ctx);
 
 	rdd->count++;
-	p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+	p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
 	if (p == NULL) {
 		rdd->err = -ENOMEM;
 		return false;
@@ -1023,6 +1100,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
 
 del_entry:
 		list_del(&p->l_node);
+		if (p->cf_name != p->name)
+			kfree(p->cf_name);
 		kfree(p);
 	}
 

-- 
2.50.1

Re: [PATCH v5 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 2 weeks ago
On Thu, Aug 14, 2025 at 7:22 PM André Almeida <andrealmeid@igalia.com> wrote:
>
> To add overlayfs support casefold layers, create a new function
> ovl_casefold(), to be able to do case-insensitive strncmp().
>
> ovl_casefold() allocates a new buffer and stores the casefolded version
> of the string on it. If the allocation or the casefold operation fails,
> fallback to use the original string.
>
> The case-insentive name is then used in the rb-tree search/insertion
> operation. If the name is found in the rb-tree, the name can be
> discarded and the buffer is freed. If the name isn't found, it's then
> stored at struct ovl_cache_entry to be used later.
>
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
> Changes from v4:
>  - Move the consumer/free buffer logic out to the caller
>  - s/aux/c_name
>
> Changes from v3:
>  - Improve commit message text
>  - s/OVL_NAME_LEN/NAME_MAX
>  - drop #ifdef in favor of if(IS_ENABLED)
>  - use new helper sb_encoding
>  - merged patch "Store casefold name..." and "Create ovl_casefold()..."
>  - Guard all the casefolding inside of IS_ENABLED(UNICODE)
>
> Changes from v2:
> - Refactor the patch to do a single kmalloc() per rb_tree operation
> - Instead of casefolding the cache entry name everytime per strncmp(),
>   casefold it once and reuse it for every strncmp().
> ---
>  fs/overlayfs/readdir.c | 115 +++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 97 insertions(+), 18 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index b65cdfce31ce27172d28d879559f1008b9c87320..803ac6a7516d0156ae7793ee1ff884dbbf2e20b0 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>         bool is_upper;
>         bool is_whiteout;
>         bool check_xwhiteout;
> +       const char *cf_name;
> +       int cf_len;

We should also change these member names to c_name
Because they are the "compare/canonicalized" name, which
may or may not be casefolded.

>         char name[];
>  };
>
> @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>         struct list_head *list;
>         struct list_head middle;
>         struct ovl_cache_entry *first_maybe_whiteout;
> +       struct unicode_map *map;
>         int count;
>         int err;
>         bool is_upper;
> @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>         return rb_entry(n, struct ovl_cache_entry, node);
>  }
>
> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> +{
> +       const struct qstr qstr = { .name = str, .len = len };
> +       int cf_len;
> +
> +       if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> +               return 0;
> +
> +       *dst = kmalloc(NAME_MAX, GFP_KERNEL);
> +
> +       if (dst) {
> +               cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> +
> +               if (cf_len > 0)
> +                       return cf_len;
> +       }
> +
> +       kfree(*dst);
> +       return 0;
> +}
> +
>  static bool ovl_cache_entry_find_link(const char *name, int len,
>                                       struct rb_node ***link,
>                                       struct rb_node **parent)
> @@ -79,7 +103,7 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>
>                 *parent = *newp;
>                 tmp = ovl_cache_entry_from_node(*newp);
> -               cmp = strncmp(name, tmp->name, len);
> +               cmp = strncmp(name, tmp->cf_name, tmp->cf_len);
>                 if (cmp > 0)
>                         newp = &tmp->node.rb_right;
>                 else if (cmp < 0 || len < tmp->len)

This looks like a bug - should be len < tmp->c_len

> @@ -101,7 +125,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
>         while (node) {
>                 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
>
> -               cmp = strncmp(name, p->name, len);
> +               cmp = strncmp(name, p->cf_name, p->cf_len);
>                 if (cmp > 0)
>                         node = p->node.rb_right;
>                 else if (cmp < 0 || len < p->len)

Same here.

But it's not the only bug, because this patch regresses 3 fstests without
enabling any casefolding:

overlay/038 12s ...  [14:16:39] [14:16:50]- output mismatch (see
/results/overlay/results-large/overlay/038.out.bad)
    --- tests/overlay/038.out 2025-05-25 08:52:54.000000000 +0000
    +++ /results/overlay/results-large/overlay/038.out.bad 2025-08-17
14:16:50.549367654 +0000
    @@ -1,2 +1,3 @@
     QA output created by 038
    +Merged dir: Invalid d_ino reported for ..
     Silence is golden

overlay/041 11s ...  [14:16:54] [14:17:05]- output mismatch (see
/results/overlay/results-large/overlay/041.out.bad)
    --- tests/overlay/041.out 2025-05-25 08:52:54.000000000 +0000
    +++ /results/overlay/results-large/overlay/041.out.bad 2025-08-17
14:17:05.275206922 +0000
    @@ -1,2 +1,3 @@
     QA output created by 041
    +Merged dir: Invalid d_ino reported for ..
     Silence is golden

overlay/077 19s ...  [14:17:08][  107.348626] WARNING: CPU: 3 PID:
5414 at fs/overlayfs/readdir.c:677 ovl_dir_read_impure+0x178/0x1c0
[  107.354647] ---[ end trace 0000000000000000 ]---
[  107.399525] WARNING: CPU: 2 PID: 5415 at fs/overlayfs/readdir.c:677
ovl_dir_read_impure+0x178/0x1c0
[  107.406826] ---[ end trace 0000000000000000 ]---
_check_dmesg: something found in dmesg (see
/results/overlay/results-large/overlay/077.dmesg)
 [14:17:28]- output mismatch (see
/results/overlay/results-large/overlay/077.out.bad)
    --- tests/overlay/077.out 2025-05-25 08:52:54.000000000 +0000
    +++ /results/overlay/results-large/overlay/077.out.bad 2025-08-17
14:17:28.762250671 +0000
    @@ -1,2 +1,6 @@
     QA output created by 077
    +getdents: Input/output error
    +Missing created file in impure upper dir (see
/results/overlay/results-large/overlay/077.full for details)
    +getdents: Input/output error
    +Found unlinked file in impure upper dir (see
/results/overlay/results-large/overlay/077.full for details)
     Silence is golden

Thanks,
Amir.
Re: [PATCH v5 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by André Almeida 1 month, 1 week ago
Em 17/08/2025 11:33, Amir Goldstein escreveu:
> On Thu, Aug 14, 2025 at 7:22 PM André Almeida <andrealmeid@igalia.com> wrote:
>>
>> To add overlayfs support casefold layers, create a new function
>> ovl_casefold(), to be able to do case-insensitive strncmp().
>>
>> ovl_casefold() allocates a new buffer and stores the casefolded version
>> of the string on it. If the allocation or the casefold operation fails,
>> fallback to use the original string.
>>
>> The case-insentive name is then used in the rb-tree search/insertion
>> operation. If the name is found in the rb-tree, the name can be
>> discarded and the buffer is freed. If the name isn't found, it's then
>> stored at struct ovl_cache_entry to be used later.
>>
>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>> ---
>> Changes from v4:
>>   - Move the consumer/free buffer logic out to the caller
>>   - s/aux/c_name
>>
>> Changes from v3:
>>   - Improve commit message text
>>   - s/OVL_NAME_LEN/NAME_MAX
>>   - drop #ifdef in favor of if(IS_ENABLED)
>>   - use new helper sb_encoding
>>   - merged patch "Store casefold name..." and "Create ovl_casefold()..."
>>   - Guard all the casefolding inside of IS_ENABLED(UNICODE)
>>
>> Changes from v2:
>> - Refactor the patch to do a single kmalloc() per rb_tree operation
>> - Instead of casefolding the cache entry name everytime per strncmp(),
>>    casefold it once and reuse it for every strncmp().
>> ---
>>   fs/overlayfs/readdir.c | 115 +++++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 97 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
>> index b65cdfce31ce27172d28d879559f1008b9c87320..803ac6a7516d0156ae7793ee1ff884dbbf2e20b0 100644
>> --- a/fs/overlayfs/readdir.c
>> +++ b/fs/overlayfs/readdir.c
>> @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>>          bool is_upper;
>>          bool is_whiteout;
>>          bool check_xwhiteout;
>> +       const char *cf_name;
>> +       int cf_len;
> 
> We should also change these member names to c_name
> Because they are the "compare/canonicalized" name, which
> may or may not be casefolded.
> 
>>          char name[];
>>   };
>>
>> @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>>          struct list_head *list;
>>          struct list_head middle;
>>          struct ovl_cache_entry *first_maybe_whiteout;
>> +       struct unicode_map *map;
>>          int count;
>>          int err;
>>          bool is_upper;
>> @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>>          return rb_entry(n, struct ovl_cache_entry, node);
>>   }
>>
>> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
>> +{
>> +       const struct qstr qstr = { .name = str, .len = len };
>> +       int cf_len;
>> +
>> +       if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
>> +               return 0;
>> +
>> +       *dst = kmalloc(NAME_MAX, GFP_KERNEL);
>> +
>> +       if (dst) {
>> +               cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
>> +
>> +               if (cf_len > 0)
>> +                       return cf_len;
>> +       }
>> +
>> +       kfree(*dst);
>> +       return 0;
>> +}
>> +
>>   static bool ovl_cache_entry_find_link(const char *name, int len,
>>                                        struct rb_node ***link,
>>                                        struct rb_node **parent)
>> @@ -79,7 +103,7 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>>
>>                  *parent = *newp;
>>                  tmp = ovl_cache_entry_from_node(*newp);
>> -               cmp = strncmp(name, tmp->name, len);
>> +               cmp = strncmp(name, tmp->cf_name, tmp->cf_len);
>>                  if (cmp > 0)
>>                          newp = &tmp->node.rb_right;
>>                  else if (cmp < 0 || len < tmp->len)
> 
> This looks like a bug - should be len < tmp->c_len
> 
>> @@ -101,7 +125,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
>>          while (node) {
>>                  struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
>>
>> -               cmp = strncmp(name, p->name, len);
>> +               cmp = strncmp(name, p->cf_name, p->cf_len);
>>                  if (cmp > 0)
>>                          node = p->node.rb_right;
>>                  else if (cmp < 0 || len < p->len)
> 
> Same here.
> 
> But it's not the only bug, because this patch regresses 3 fstests without
> enabling any casefolding:
> 

That was due to the following change:

-               cmp = strncmp(name, p->name, len);
+               cmp = strncmp(name, p->cf_name, p->cf_len);

Keeping len (instead of p->cf_len) as the third argument fixed it. I 
will send a v6 with that and the other changes.

> overlay/038 12s ...  [14:16:39] [14:16:50]- output mismatch (see
> /results/overlay/results-large/overlay/038.out.bad)
>      --- tests/overlay/038.out 2025-05-25 08:52:54.000000000 +0000
>      +++ /results/overlay/results-large/overlay/038.out.bad 2025-08-17
> 14:16:50.549367654 +0000
>      @@ -1,2 +1,3 @@
>       QA output created by 038
>      +Merged dir: Invalid d_ino reported for ..
>       Silence is golden
> 
> overlay/041 11s ...  [14:16:54] [14:17:05]- output mismatch (see
> /results/overlay/results-large/overlay/041.out.bad)
>      --- tests/overlay/041.out 2025-05-25 08:52:54.000000000 +0000
>      +++ /results/overlay/results-large/overlay/041.out.bad 2025-08-17
> 14:17:05.275206922 +0000
>      @@ -1,2 +1,3 @@
>       QA output created by 041
>      +Merged dir: Invalid d_ino reported for ..
>       Silence is golden
> 
> overlay/077 19s ...  [14:17:08][  107.348626] WARNING: CPU: 3 PID:
> 5414 at fs/overlayfs/readdir.c:677 ovl_dir_read_impure+0x178/0x1c0
> [  107.354647] ---[ end trace 0000000000000000 ]---
> [  107.399525] WARNING: CPU: 2 PID: 5415 at fs/overlayfs/readdir.c:677
> ovl_dir_read_impure+0x178/0x1c0
> [  107.406826] ---[ end trace 0000000000000000 ]---
> _check_dmesg: something found in dmesg (see
> /results/overlay/results-large/overlay/077.dmesg)
>   [14:17:28]- output mismatch (see
> /results/overlay/results-large/overlay/077.out.bad)
>      --- tests/overlay/077.out 2025-05-25 08:52:54.000000000 +0000
>      +++ /results/overlay/results-large/overlay/077.out.bad 2025-08-17
> 14:17:28.762250671 +0000
>      @@ -1,2 +1,6 @@
>       QA output created by 077
>      +getdents: Input/output error
>      +Missing created file in impure upper dir (see
> /results/overlay/results-large/overlay/077.full for details)
>      +getdents: Input/output error
>      +Found unlinked file in impure upper dir (see
> /results/overlay/results-large/overlay/077.full for details)
>       Silence is golden
> 
> Thanks,
> Amir.

Re: [PATCH v5 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 2 weeks ago
On Thu, Aug 14, 2025 at 7:22 PM André Almeida <andrealmeid@igalia.com> wrote:
>
> To add overlayfs support casefold layers, create a new function
> ovl_casefold(), to be able to do case-insensitive strncmp().
>
> ovl_casefold() allocates a new buffer and stores the casefolded version
> of the string on it. If the allocation or the casefold operation fails,
> fallback to use the original string.
>
> The case-insentive name is then used in the rb-tree search/insertion
> operation. If the name is found in the rb-tree, the name can be
> discarded and the buffer is freed. If the name isn't found, it's then
> stored at struct ovl_cache_entry to be used later.
>
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
> Changes from v4:
>  - Move the consumer/free buffer logic out to the caller
>  - s/aux/c_name
>
> Changes from v3:
>  - Improve commit message text
>  - s/OVL_NAME_LEN/NAME_MAX
>  - drop #ifdef in favor of if(IS_ENABLED)
>  - use new helper sb_encoding
>  - merged patch "Store casefold name..." and "Create ovl_casefold()..."
>  - Guard all the casefolding inside of IS_ENABLED(UNICODE)
>
> Changes from v2:
> - Refactor the patch to do a single kmalloc() per rb_tree operation
> - Instead of casefolding the cache entry name everytime per strncmp(),
>   casefold it once and reuse it for every strncmp().
> ---
>  fs/overlayfs/readdir.c | 115 +++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 97 insertions(+), 18 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index b65cdfce31ce27172d28d879559f1008b9c87320..803ac6a7516d0156ae7793ee1ff884dbbf2e20b0 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>         bool is_upper;
>         bool is_whiteout;
>         bool check_xwhiteout;
> +       const char *cf_name;
> +       int cf_len;
>         char name[];
>  };
>
> @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>         struct list_head *list;
>         struct list_head middle;
>         struct ovl_cache_entry *first_maybe_whiteout;
> +       struct unicode_map *map;
>         int count;
>         int err;
>         bool is_upper;
> @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>         return rb_entry(n, struct ovl_cache_entry, node);
>  }
>
> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> +{
> +       const struct qstr qstr = { .name = str, .len = len };
> +       int cf_len;
> +
> +       if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> +               return 0;
> +
> +       *dst = kmalloc(NAME_MAX, GFP_KERNEL);
> +
> +       if (dst) {
> +               cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> +
> +               if (cf_len > 0)
> +                       return cf_len;
> +       }
> +
> +       kfree(*dst);
> +       return 0;
> +}
> +
>  static bool ovl_cache_entry_find_link(const char *name, int len,
>                                       struct rb_node ***link,
>                                       struct rb_node **parent)
> @@ -79,7 +103,7 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>
>                 *parent = *newp;
>                 tmp = ovl_cache_entry_from_node(*newp);
> -               cmp = strncmp(name, tmp->name, len);
> +               cmp = strncmp(name, tmp->cf_name, tmp->cf_len);
>                 if (cmp > 0)
>                         newp = &tmp->node.rb_right;
>                 else if (cmp < 0 || len < tmp->len)
> @@ -101,7 +125,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
>         while (node) {
>                 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
>
> -               cmp = strncmp(name, p->name, len);
> +               cmp = strncmp(name, p->cf_name, p->cf_len);
>                 if (cmp > 0)
>                         node = p->node.rb_right;
>                 else if (cmp < 0 || len < p->len)
> @@ -145,13 +169,16 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
>
>  static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>                                                    const char *name, int len,
> +                                                  const char *cf_name, int cf_len,
>                                                    u64 ino, unsigned int d_type)
>  {
>         struct ovl_cache_entry *p;
>
>         p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL);
> -       if (!p)
> +       if (!p) {
> +               kfree(cf_name);
>                 return NULL;
> +       }
>
>         memcpy(p->name, name, len);
>         p->name[len] = '\0';
> @@ -167,6 +194,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>         /* Defer check for overlay.whiteout to ovl_iterate() */
>         p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
>
> +       if (cf_name && cf_name != name) {
> +               p->cf_name = cf_name;
> +               p->cf_len = cf_len;
> +       } else {
> +               p->cf_name = p->name;
> +               p->cf_len = len;
> +       }
> +
>         if (d_type == DT_CHR) {
>                 p->next_maybe_whiteout = rdd->first_maybe_whiteout;
>                 rdd->first_maybe_whiteout = p;
> @@ -174,48 +209,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>         return p;
>  }
>
> -static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> -                                 const char *name, int len, u64 ino,
> +/* Return 0 for found, 1 for added, <0 for error */
> +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> +                                 const char *name, int len,
> +                                 const char *cf_name, int cf_len,
> +                                 u64 ino,
>                                   unsigned int d_type)
>  {
>         struct rb_node **newp = &rdd->root->rb_node;
>         struct rb_node *parent = NULL;
>         struct ovl_cache_entry *p;
>
> -       if (ovl_cache_entry_find_link(name, len, &newp, &parent))
> -               return true;
> +       if (ovl_cache_entry_find_link(cf_name, cf_len, &newp, &parent))
> +               return 0;
>
> -       p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
> +       p = ovl_cache_entry_new(rdd, name, len, cf_name, cf_len, ino, d_type);
>         if (p == NULL) {
>                 rdd->err = -ENOMEM;
> -               return false;
> +               return -ENOMEM;
>         }
>
>         list_add_tail(&p->l_node, rdd->list);
>         rb_link_node(&p->node, parent, newp);
>         rb_insert_color(&p->node, rdd->root);
>
> -       return true;
> +       return 1;
>  }
>
> -static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
> +/* Return 0 for found, 1 for added, <0 for error */
> +static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
>                            const char *name, int namelen,
> +                          const char *cf_name, int cf_len,
>                            loff_t offset, u64 ino, unsigned int d_type)
>  {
>         struct ovl_cache_entry *p;
>
> -       p = ovl_cache_entry_find(rdd->root, name, namelen);
> +       p = ovl_cache_entry_find(rdd->root, cf_name, cf_len);
>         if (p) {
>                 list_move_tail(&p->l_node, &rdd->middle);
> +               return 0;
>         } else {
> -               p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> +               p = ovl_cache_entry_new(rdd, name, namelen, cf_name, cf_len,
> +                                       ino, d_type);
>                 if (p == NULL)
>                         rdd->err = -ENOMEM;
>                 else
>                         list_add_tail(&p->l_node, &rdd->middle);
>         }
>
> -       return rdd->err == 0;
> +       return rdd->err ?: 1;
>  }
>
>  void ovl_cache_free(struct list_head *list)
> @@ -223,8 +265,11 @@ void ovl_cache_free(struct list_head *list)
>         struct ovl_cache_entry *p;
>         struct ovl_cache_entry *n;
>
> -       list_for_each_entry_safe(p, n, list, l_node)
> +       list_for_each_entry_safe(p, n, list, l_node) {
> +               if (p->cf_name != p->name)
> +                       kfree(p->cf_name);
>                 kfree(p);
> +       }
>
>         INIT_LIST_HEAD(list);
>  }
> @@ -260,12 +305,38 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
>  {
>         struct ovl_readdir_data *rdd =
>                 container_of(ctx, struct ovl_readdir_data, ctx);
> +       struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
> +       char *cf_name = NULL;
> +       int c_len = 0;
> +       int ret;
> +
> +       const char *c_name = NULL;
> +

Another nit:
Pls move up next to cf_name = NULL line in your branch

No need to repost.

Thanks,
Amir.
Re: [PATCH v5 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by André Almeida 1 month, 2 weeks ago
On 8/15/25 13:16, Amir Goldstein wrote:
> On Thu, Aug 14, 2025 at 7:22 PM André Almeida <andrealmeid@igalia.com> wrote:
>> To add overlayfs support casefold layers, create a new function
>> ovl_casefold(), to be able to do case-insensitive strncmp().
>>
>> ovl_casefold() allocates a new buffer and stores the casefolded version
>> of the string on it. If the allocation or the casefold operation fails,
>> fallback to use the original string.
>>
>> The case-insentive name is then used in the rb-tree search/insertion
>> operation. If the name is found in the rb-tree, the name can be
>> discarded and the buffer is freed. If the name isn't found, it's then
>> stored at struct ovl_cache_entry to be used later.
>>
>> Signed-off-by: André Almeida <andrealmeid@igalia.com>

[...]

>> @@ -260,12 +305,38 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
>>   {
>>          struct ovl_readdir_data *rdd =
>>                  container_of(ctx, struct ovl_readdir_data, ctx);
>> +       struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
>> +       char *cf_name = NULL;
>> +       int c_len = 0;
>> +       int ret;
>> +
>> +       const char *c_name = NULL;
>> +
> Another nit:
> Pls move up next to cf_name = NULL line in your branch
>
> No need to repost.

Done. Also joined int ret and int c_len to the same declaration;

>
> Thanks,
> Amir.
Re: [PATCH v5 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 2 weeks ago
On Thu, Aug 14, 2025 at 7:22 PM André Almeida <andrealmeid@igalia.com> wrote:
>
> To add overlayfs support casefold layers, create a new function
> ovl_casefold(), to be able to do case-insensitive strncmp().
>
> ovl_casefold() allocates a new buffer and stores the casefolded version
> of the string on it. If the allocation or the casefold operation fails,
> fallback to use the original string.
>
> The case-insentive name is then used in the rb-tree search/insertion
> operation. If the name is found in the rb-tree, the name can be
> discarded and the buffer is freed. If the name isn't found, it's then
> stored at struct ovl_cache_entry to be used later.
>
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
> Changes from v4:
>  - Move the consumer/free buffer logic out to the caller
>  - s/aux/c_name
>
> Changes from v3:
>  - Improve commit message text
>  - s/OVL_NAME_LEN/NAME_MAX
>  - drop #ifdef in favor of if(IS_ENABLED)
>  - use new helper sb_encoding
>  - merged patch "Store casefold name..." and "Create ovl_casefold()..."
>  - Guard all the casefolding inside of IS_ENABLED(UNICODE)
>
> Changes from v2:
> - Refactor the patch to do a single kmalloc() per rb_tree operation
> - Instead of casefolding the cache entry name everytime per strncmp(),
>   casefold it once and reuse it for every strncmp().
> ---
>  fs/overlayfs/readdir.c | 115 +++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 97 insertions(+), 18 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index b65cdfce31ce27172d28d879559f1008b9c87320..803ac6a7516d0156ae7793ee1ff884dbbf2e20b0 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>         bool is_upper;
>         bool is_whiteout;
>         bool check_xwhiteout;
> +       const char *cf_name;
> +       int cf_len;
>         char name[];
>  };
>
> @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>         struct list_head *list;
>         struct list_head middle;
>         struct ovl_cache_entry *first_maybe_whiteout;
> +       struct unicode_map *map;
>         int count;
>         int err;
>         bool is_upper;
> @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>         return rb_entry(n, struct ovl_cache_entry, node);
>  }
>
> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> +{
> +       const struct qstr qstr = { .name = str, .len = len };
> +       int cf_len;
> +
> +       if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> +               return 0;
> +
> +       *dst = kmalloc(NAME_MAX, GFP_KERNEL);
> +
> +       if (dst) {
> +               cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> +
> +               if (cf_len > 0)
> +                       return cf_len;
> +       }
> +
> +       kfree(*dst);
> +       return 0;
> +}
> +
>  static bool ovl_cache_entry_find_link(const char *name, int len,
>                                       struct rb_node ***link,
>                                       struct rb_node **parent)
> @@ -79,7 +103,7 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>
>                 *parent = *newp;
>                 tmp = ovl_cache_entry_from_node(*newp);
> -               cmp = strncmp(name, tmp->name, len);
> +               cmp = strncmp(name, tmp->cf_name, tmp->cf_len);
>                 if (cmp > 0)
>                         newp = &tmp->node.rb_right;
>                 else if (cmp < 0 || len < tmp->len)
> @@ -101,7 +125,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
>         while (node) {
>                 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
>
> -               cmp = strncmp(name, p->name, len);
> +               cmp = strncmp(name, p->cf_name, p->cf_len);
>                 if (cmp > 0)
>                         node = p->node.rb_right;
>                 else if (cmp < 0 || len < p->len)
> @@ -145,13 +169,16 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
>
>  static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>                                                    const char *name, int len,
> +                                                  const char *cf_name, int cf_len,
>                                                    u64 ino, unsigned int d_type)
>  {
>         struct ovl_cache_entry *p;
>
>         p = kmalloc(struct_size(p, name, len + 1), GFP_KERNEL);
> -       if (!p)
> +       if (!p) {
> +               kfree(cf_name);

Not needed.
Caller will get -ENOMEM and will free c_name

No need to repost just for this I can fix on commit.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Thanks,
Amir.