[PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()

André Almeida posted 9 patches 1 month, 1 week ago
[PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by André Almeida 1 month, 1 week ago
To add overlayfs support casefold layers, create a new function
ovl_casefold(), to be able to do case-insensitive strncmp().

ovl_casefold() allocates a new buffer and stores the casefolded version
of the string on it. If the allocation or the casefold operation fails,
fallback to use the original string.

The case-insentive name is then used in the rb-tree search/insertion
operation. If the name is found in the rb-tree, the name can be
discarded and the buffer is freed. If the name isn't found, it's then
stored at struct ovl_cache_entry to be used later.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
Changes from v6:
 - Last version was using `strncmp(... tmp->len)` which was causing
   regressions. It should be `strncmp(... len)`.
 - Rename cf_len to c_len
 - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
 - Remove needless kfree(cf_name)
---
 fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 94 insertions(+), 19 deletions(-)

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -27,6 +27,8 @@ struct ovl_cache_entry {
 	bool is_upper;
 	bool is_whiteout;
 	bool check_xwhiteout;
+	const char *c_name;
+	int c_len;
 	char name[];
 };
 
@@ -45,6 +47,7 @@ struct ovl_readdir_data {
 	struct list_head *list;
 	struct list_head middle;
 	struct ovl_cache_entry *first_maybe_whiteout;
+	struct unicode_map *map;
 	int count;
 	int err;
 	bool is_upper;
@@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
 	return rb_entry(n, struct ovl_cache_entry, node);
 }
 
+static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
+{
+	const struct qstr qstr = { .name = str, .len = len };
+	int cf_len;
+
+	if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
+		return 0;
+
+	*dst = kmalloc(NAME_MAX, GFP_KERNEL);
+
+	if (dst) {
+		cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
+
+		if (cf_len > 0)
+			return cf_len;
+	}
+
+	kfree(*dst);
+	return 0;
+}
+
 static bool ovl_cache_entry_find_link(const char *name, int len,
 				      struct rb_node ***link,
 				      struct rb_node **parent)
@@ -79,10 +103,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
 
 		*parent = *newp;
 		tmp = ovl_cache_entry_from_node(*newp);
-		cmp = strncmp(name, tmp->name, len);
+		cmp = strncmp(name, tmp->c_name, len);
 		if (cmp > 0)
 			newp = &tmp->node.rb_right;
-		else if (cmp < 0 || len < tmp->len)
+		else if (cmp < 0 || len < tmp->c_len)
 			newp = &tmp->node.rb_left;
 		else
 			found = true;
@@ -101,10 +125,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
 	while (node) {
 		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
 
-		cmp = strncmp(name, p->name, len);
+		cmp = strncmp(name, p->c_name, len);
 		if (cmp > 0)
 			node = p->node.rb_right;
-		else if (cmp < 0 || len < p->len)
+		else if (cmp < 0 || len < p->c_len)
 			node = p->node.rb_left;
 		else
 			return p;
@@ -145,6 +169,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
 
 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 						   const char *name, int len,
+						   const char *c_name, int c_len,
 						   u64 ino, unsigned int d_type)
 {
 	struct ovl_cache_entry *p;
@@ -167,6 +192,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 	/* Defer check for overlay.whiteout to ovl_iterate() */
 	p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
 
+	if (c_name && c_name != name) {
+		p->c_name = c_name;
+		p->c_len = c_len;
+	} else {
+		p->c_name = p->name;
+		p->c_len = len;
+	}
+
 	if (d_type == DT_CHR) {
 		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
 		rdd->first_maybe_whiteout = p;
@@ -174,48 +207,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 	return p;
 }
 
-static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
-				  const char *name, int len, u64 ino,
+/* Return 0 for found, 1 for added, <0 for error */
+static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+				  const char *name, int len,
+				  const char *c_name, int c_len,
+				  u64 ino,
 				  unsigned int d_type)
 {
 	struct rb_node **newp = &rdd->root->rb_node;
 	struct rb_node *parent = NULL;
 	struct ovl_cache_entry *p;
 
-	if (ovl_cache_entry_find_link(name, len, &newp, &parent))
-		return true;
+	if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
+		return 0;
 
-	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
+	p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
 	if (p == NULL) {
 		rdd->err = -ENOMEM;
-		return false;
+		return -ENOMEM;
 	}
 
 	list_add_tail(&p->l_node, rdd->list);
 	rb_link_node(&p->node, parent, newp);
 	rb_insert_color(&p->node, rdd->root);
 
-	return true;
+	return 1;
 }
 
-static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
+/* Return 0 for found, 1 for added, <0 for error */
+static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
 			   const char *name, int namelen,
+			   const char *c_name, int c_len,
 			   loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct ovl_cache_entry *p;
 
-	p = ovl_cache_entry_find(rdd->root, name, namelen);
+	p = ovl_cache_entry_find(rdd->root, c_name, c_len);
 	if (p) {
 		list_move_tail(&p->l_node, &rdd->middle);
+		return 0;
 	} else {
-		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+		p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
+					ino, d_type);
 		if (p == NULL)
 			rdd->err = -ENOMEM;
 		else
 			list_add_tail(&p->l_node, &rdd->middle);
 	}
 
-	return rdd->err == 0;
+	return rdd->err ?: 1;
 }
 
 void ovl_cache_free(struct list_head *list)
@@ -223,8 +263,11 @@ void ovl_cache_free(struct list_head *list)
 	struct ovl_cache_entry *p;
 	struct ovl_cache_entry *n;
 
-	list_for_each_entry_safe(p, n, list, l_node)
+	list_for_each_entry_safe(p, n, list, l_node) {
+		if (p->c_name != p->name)
+			kfree(p->c_name);
 		kfree(p);
+	}
 
 	INIT_LIST_HEAD(list);
 }
@@ -260,12 +303,36 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
 {
 	struct ovl_readdir_data *rdd =
 		container_of(ctx, struct ovl_readdir_data, ctx);
+	struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
+	const char *c_name = NULL;
+	char *cf_name = NULL;
+	int c_len = 0, ret;
+
+	if (ofs->casefold)
+		c_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
+
+	if (c_len <= 0) {
+		c_name = name;
+		c_len = namelen;
+	} else {
+		c_name = cf_name;
+	}
 
 	rdd->count++;
 	if (!rdd->is_lowest)
-		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
+		ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
 	else
-		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
+		ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
+
+	/*
+	 * If ret == 1, that means that c_name is being used as part of struct
+	 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
+	 * c_name was found in the rb-tree so we can free it here.
+	 */
+	if (ret != 1 && c_name != name)
+		kfree(c_name);
+
+	return ret >= 0;
 }
 
 static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
@@ -357,12 +424,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
 		.list = list,
 		.root = root,
 		.is_lowest = false,
+		.map = NULL,
 	};
 	int idx, next;
 	const struct ovl_layer *layer;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 
 	for (idx = 0; idx != -1; idx = next) {
 		next = ovl_path_next(idx, dentry, &realpath, &layer);
+
+		if (ofs->casefold)
+			rdd.map = sb_encoding(realpath.dentry->d_sb);
+
 		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
 		rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
 					ovl_dentry_has_xwhiteouts(dentry);
@@ -555,7 +628,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
 		container_of(ctx, struct ovl_readdir_data, ctx);
 
 	rdd->count++;
-	p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+	p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
 	if (p == NULL) {
 		rdd->err = -ENOMEM;
 		return false;
@@ -1023,6 +1096,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
 
 del_entry:
 		list_del(&p->l_node);
+		if (p->c_name != p->name)
+			kfree(p->c_name);
 		kfree(p);
 	}
 

-- 
2.50.1

Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Gabriel Krisman Bertazi 1 month, 1 week ago
André Almeida <andrealmeid@igalia.com> writes:

> To add overlayfs support casefold layers, create a new function
> ovl_casefold(), to be able to do case-insensitive strncmp().
>
> ovl_casefold() allocates a new buffer and stores the casefolded version
> of the string on it. If the allocation or the casefold operation fails,
> fallback to use the original string.
>
> The case-insentive name is then used in the rb-tree search/insertion
> operation. If the name is found in the rb-tree, the name can be
> discarded and the buffer is freed. If the name isn't found, it's then
> stored at struct ovl_cache_entry to be used later.
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
> Changes from v6:
>  - Last version was using `strncmp(... tmp->len)` which was causing
>    regressions. It should be `strncmp(... len)`.
>  - Rename cf_len to c_len
>  - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
>  - Remove needless kfree(cf_name)
> ---
>  fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 94 insertions(+), 19 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>  	bool is_upper;
>  	bool is_whiteout;
>  	bool check_xwhiteout;
> +	const char *c_name;
> +	int c_len;
>  	char name[];
>  };
>  
> @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>  	struct list_head *list;
>  	struct list_head middle;
>  	struct ovl_cache_entry *first_maybe_whiteout;
> +	struct unicode_map *map;
>  	int count;
>  	int err;
>  	bool is_upper;
> @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>  	return rb_entry(n, struct ovl_cache_entry, node);
>  }
>  
> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> +{
> +	const struct qstr qstr = { .name = str, .len = len };
> +	int cf_len;
> +
> +	if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> +		return 0;
> +
> +	*dst = kmalloc(NAME_MAX, GFP_KERNEL);
> +
> +	if (dst) {
> +		cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> +
> +		if (cf_len > 0)
> +			return cf_len;
> +	}
> +
> +	kfree(*dst);
> +	return 0;
> +}

Hi,

I should just note this does not differentiates allocation errors from
casefolding errors (invalid encoding).  It might be just a theoretical
error because GFP_KERNEL shouldn't fail (wink, wink) and the rest of the
operation is likely to fail too, but if you have an allocation failure, you
can end up with an inconsistent cache, because a file is added under the
!casefolded name and a later successful lookup will look for the
casefolded version.

> +
>  static bool ovl_cache_entry_find_link(const char *name, int len,
>  				      struct rb_node ***link,
>  				      struct rb_node **parent)
> @@ -79,10 +103,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>  
>  		*parent = *newp;
>  		tmp = ovl_cache_entry_from_node(*newp);
> -		cmp = strncmp(name, tmp->name, len);
> +		cmp = strncmp(name, tmp->c_name, len);
>  		if (cmp > 0)
>  			newp = &tmp->node.rb_right;
> -		else if (cmp < 0 || len < tmp->len)
> +		else if (cmp < 0 || len < tmp->c_len)
>  			newp = &tmp->node.rb_left;
>  		else
>  			found = true;
> @@ -101,10 +125,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
>  	while (node) {
>  		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
>  
> -		cmp = strncmp(name, p->name, len);
> +		cmp = strncmp(name, p->c_name, len);
>  		if (cmp > 0)
>  			node = p->node.rb_right;
> -		else if (cmp < 0 || len < p->len)
> +		else if (cmp < 0 || len < p->c_len)
>  			node = p->node.rb_left;
>  		else
>  			return p;
> @@ -145,6 +169,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
>  
>  static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>  						   const char *name, int len,
> +						   const char *c_name, int c_len,
>  						   u64 ino, unsigned int d_type)
>  {
>  	struct ovl_cache_entry *p;
> @@ -167,6 +192,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>  	/* Defer check for overlay.whiteout to ovl_iterate() */
>  	p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
>  
> +	if (c_name && c_name != name) {
> +		p->c_name = c_name;
> +		p->c_len = c_len;
> +	} else {
> +		p->c_name = p->name;
> +		p->c_len = len;
> +	}
> +
>  	if (d_type == DT_CHR) {
>  		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
>  		rdd->first_maybe_whiteout = p;
> @@ -174,48 +207,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>  	return p;
>  }
>  
> -static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> -				  const char *name, int len, u64 ino,
> +/* Return 0 for found, 1 for added, <0 for error */
> +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> +				  const char *name, int len,
> +				  const char *c_name, int c_len,
> +				  u64 ino,
>  				  unsigned int d_type)
>  {
>  	struct rb_node **newp = &rdd->root->rb_node;
>  	struct rb_node *parent = NULL;
>  	struct ovl_cache_entry *p;
>  
> -	if (ovl_cache_entry_find_link(name, len, &newp, &parent))
> -		return true;
> +	if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
> +		return 0;
>  
> -	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
> +	p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
>  	if (p == NULL) {
>  		rdd->err = -ENOMEM;
> -		return false;
> +		return -ENOMEM;
>  	}
>  
>  	list_add_tail(&p->l_node, rdd->list);
>  	rb_link_node(&p->node, parent, newp);
>  	rb_insert_color(&p->node, rdd->root);
>  
> -	return true;
> +	return 1;
>  }
>  
> -static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
> +/* Return 0 for found, 1 for added, <0 for error */
> +static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
>  			   const char *name, int namelen,
> +			   const char *c_name, int c_len,
>  			   loff_t offset, u64 ino, unsigned int d_type)
>  {
>  	struct ovl_cache_entry *p;
>  
> -	p = ovl_cache_entry_find(rdd->root, name, namelen);
> +	p = ovl_cache_entry_find(rdd->root, c_name, c_len);
>  	if (p) {
>  		list_move_tail(&p->l_node, &rdd->middle);
> +		return 0;
>  	} else {
> -		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> +		p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
> +					ino, d_type);
>  		if (p == NULL)
>  			rdd->err = -ENOMEM;
>  		else
>  			list_add_tail(&p->l_node, &rdd->middle);
>  	}
>  
> -	return rdd->err == 0;
> +	return rdd->err ?: 1;
>  }
>  
>  void ovl_cache_free(struct list_head *list)
> @@ -223,8 +263,11 @@ void ovl_cache_free(struct list_head *list)
>  	struct ovl_cache_entry *p;
>  	struct ovl_cache_entry *n;
>  
> -	list_for_each_entry_safe(p, n, list, l_node)
> +	list_for_each_entry_safe(p, n, list, l_node) {
> +		if (p->c_name != p->name)
> +			kfree(p->c_name);
>  		kfree(p);
> +	}
>  
>  	INIT_LIST_HEAD(list);
>  }
> @@ -260,12 +303,36 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
>  {
>  	struct ovl_readdir_data *rdd =
>  		container_of(ctx, struct ovl_readdir_data, ctx);
> +	struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
> +	const char *c_name = NULL;
> +	char *cf_name = NULL;
> +	int c_len = 0, ret;
> +
> +	if (ofs->casefold)
> +		c_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
> +
> +	if (c_len <= 0) {
> +		c_name = name;
> +		c_len = namelen;
> +	} else {
> +		c_name = cf_name;
> +	}
>  
>  	rdd->count++;
>  	if (!rdd->is_lowest)
> -		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
> +		ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
>  	else
> -		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
> +		ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
> +
> +	/*
> +	 * If ret == 1, that means that c_name is being used as part of struct
> +	 * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
> +	 * c_name was found in the rb-tree so we can free it here.
> +	 */
> +	if (ret != 1 && c_name != name)
> +		kfree(c_name);
> +

The semantics of this being conditionally freed is a bit annoying, as
it is already replicated in 3 places. I suppose a helper would come in
hand.

In this specific case, it could just be:

if (ret != 1)
        kfree(cf_name);


> +	return ret >= 0;
>  }
>  
>  static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
> @@ -357,12 +424,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
>  		.list = list,
>  		.root = root,
>  		.is_lowest = false,
> +		.map = NULL,
>  	};
>  	int idx, next;
>  	const struct ovl_layer *layer;
> +	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
>  
>  	for (idx = 0; idx != -1; idx = next) {
>  		next = ovl_path_next(idx, dentry, &realpath, &layer);
> +
> +		if (ofs->casefold)
> +			rdd.map = sb_encoding(realpath.dentry->d_sb);
> +
>  		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
>  		rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
>  					ovl_dentry_has_xwhiteouts(dentry);
> @@ -555,7 +628,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
>  		container_of(ctx, struct ovl_readdir_data, ctx);
>  
>  	rdd->count++;
> -	p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> +	p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
>  	if (p == NULL) {
>  		rdd->err = -ENOMEM;
>  		return false;
> @@ -1023,6 +1096,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
>  
>  del_entry:
>  		list_del(&p->l_node);
> +		if (p->c_name != p->name)
> +			kfree(p->c_name);
>  		kfree(p);
>  	}

-- 
Gabriel Krisman Bertazi
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 1 week ago
On Mon, Aug 25, 2025 at 1:09 PM Gabriel Krisman Bertazi
<gabriel@krisman.be> wrote:
>
> André Almeida <andrealmeid@igalia.com> writes:
>
> > To add overlayfs support casefold layers, create a new function
> > ovl_casefold(), to be able to do case-insensitive strncmp().
> >
> > ovl_casefold() allocates a new buffer and stores the casefolded version
> > of the string on it. If the allocation or the casefold operation fails,
> > fallback to use the original string.
> >
> > The case-insentive name is then used in the rb-tree search/insertion
> > operation. If the name is found in the rb-tree, the name can be
> > discarded and the buffer is freed. If the name isn't found, it's then
> > stored at struct ovl_cache_entry to be used later.
> >
> > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> > Signed-off-by: André Almeida <andrealmeid@igalia.com>
> > ---
> > Changes from v6:
> >  - Last version was using `strncmp(... tmp->len)` which was causing
> >    regressions. It should be `strncmp(... len)`.
> >  - Rename cf_len to c_len
> >  - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
> >  - Remove needless kfree(cf_name)
> > ---
> >  fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 94 insertions(+), 19 deletions(-)
> >
> > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> > index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
> > --- a/fs/overlayfs/readdir.c
> > +++ b/fs/overlayfs/readdir.c
> > @@ -27,6 +27,8 @@ struct ovl_cache_entry {
> >       bool is_upper;
> >       bool is_whiteout;
> >       bool check_xwhiteout;
> > +     const char *c_name;
> > +     int c_len;
> >       char name[];
> >  };
> >
> > @@ -45,6 +47,7 @@ struct ovl_readdir_data {
> >       struct list_head *list;
> >       struct list_head middle;
> >       struct ovl_cache_entry *first_maybe_whiteout;
> > +     struct unicode_map *map;
> >       int count;
> >       int err;
> >       bool is_upper;
> > @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
> >       return rb_entry(n, struct ovl_cache_entry, node);
> >  }
> >
> > +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> > +{
> > +     const struct qstr qstr = { .name = str, .len = len };
> > +     int cf_len;
> > +
> > +     if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> > +             return 0;
> > +
> > +     *dst = kmalloc(NAME_MAX, GFP_KERNEL);
> > +
> > +     if (dst) {
> > +             cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> > +
> > +             if (cf_len > 0)
> > +                     return cf_len;
> > +     }
> > +
> > +     kfree(*dst);
> > +     return 0;
> > +}
>
> Hi,
>
> I should just note this does not differentiates allocation errors from
> casefolding errors (invalid encoding).  It might be just a theoretical
> error because GFP_KERNEL shouldn't fail (wink, wink) and the rest of the
> operation is likely to fail too, but if you have an allocation failure, you
> can end up with an inconsistent cache, because a file is added under the
> !casefolded name and a later successful lookup will look for the
> casefolded version.

Good point.
I will fix this in my tree.

>
> > +
> >  static bool ovl_cache_entry_find_link(const char *name, int len,
> >                                     struct rb_node ***link,
> >                                     struct rb_node **parent)
> > @@ -79,10 +103,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
> >
> >               *parent = *newp;
> >               tmp = ovl_cache_entry_from_node(*newp);
> > -             cmp = strncmp(name, tmp->name, len);
> > +             cmp = strncmp(name, tmp->c_name, len);
> >               if (cmp > 0)
> >                       newp = &tmp->node.rb_right;
> > -             else if (cmp < 0 || len < tmp->len)
> > +             else if (cmp < 0 || len < tmp->c_len)
> >                       newp = &tmp->node.rb_left;
> >               else
> >                       found = true;
> > @@ -101,10 +125,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
> >       while (node) {
> >               struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
> >
> > -             cmp = strncmp(name, p->name, len);
> > +             cmp = strncmp(name, p->c_name, len);
> >               if (cmp > 0)
> >                       node = p->node.rb_right;
> > -             else if (cmp < 0 || len < p->len)
> > +             else if (cmp < 0 || len < p->c_len)
> >                       node = p->node.rb_left;
> >               else
> >                       return p;
> > @@ -145,6 +169,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
> >
> >  static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
> >                                                  const char *name, int len,
> > +                                                const char *c_name, int c_len,
> >                                                  u64 ino, unsigned int d_type)
> >  {
> >       struct ovl_cache_entry *p;
> > @@ -167,6 +192,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
> >       /* Defer check for overlay.whiteout to ovl_iterate() */
> >       p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
> >
> > +     if (c_name && c_name != name) {
> > +             p->c_name = c_name;
> > +             p->c_len = c_len;
> > +     } else {
> > +             p->c_name = p->name;
> > +             p->c_len = len;
> > +     }
> > +
> >       if (d_type == DT_CHR) {
> >               p->next_maybe_whiteout = rdd->first_maybe_whiteout;
> >               rdd->first_maybe_whiteout = p;
> > @@ -174,48 +207,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
> >       return p;
> >  }
> >
> > -static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> > -                               const char *name, int len, u64 ino,
> > +/* Return 0 for found, 1 for added, <0 for error */
> > +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> > +                               const char *name, int len,
> > +                               const char *c_name, int c_len,
> > +                               u64 ino,
> >                                 unsigned int d_type)
> >  {
> >       struct rb_node **newp = &rdd->root->rb_node;
> >       struct rb_node *parent = NULL;
> >       struct ovl_cache_entry *p;
> >
> > -     if (ovl_cache_entry_find_link(name, len, &newp, &parent))
> > -             return true;
> > +     if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
> > +             return 0;
> >
> > -     p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
> > +     p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
> >       if (p == NULL) {
> >               rdd->err = -ENOMEM;
> > -             return false;
> > +             return -ENOMEM;
> >       }
> >
> >       list_add_tail(&p->l_node, rdd->list);
> >       rb_link_node(&p->node, parent, newp);
> >       rb_insert_color(&p->node, rdd->root);
> >
> > -     return true;
> > +     return 1;
> >  }
> >
> > -static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
> > +/* Return 0 for found, 1 for added, <0 for error */
> > +static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
> >                          const char *name, int namelen,
> > +                        const char *c_name, int c_len,
> >                          loff_t offset, u64 ino, unsigned int d_type)
> >  {
> >       struct ovl_cache_entry *p;
> >
> > -     p = ovl_cache_entry_find(rdd->root, name, namelen);
> > +     p = ovl_cache_entry_find(rdd->root, c_name, c_len);
> >       if (p) {
> >               list_move_tail(&p->l_node, &rdd->middle);
> > +             return 0;
> >       } else {
> > -             p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> > +             p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
> > +                                     ino, d_type);
> >               if (p == NULL)
> >                       rdd->err = -ENOMEM;
> >               else
> >                       list_add_tail(&p->l_node, &rdd->middle);
> >       }
> >
> > -     return rdd->err == 0;
> > +     return rdd->err ?: 1;
> >  }
> >
> >  void ovl_cache_free(struct list_head *list)
> > @@ -223,8 +263,11 @@ void ovl_cache_free(struct list_head *list)
> >       struct ovl_cache_entry *p;
> >       struct ovl_cache_entry *n;
> >
> > -     list_for_each_entry_safe(p, n, list, l_node)
> > +     list_for_each_entry_safe(p, n, list, l_node) {
> > +             if (p->c_name != p->name)
> > +                     kfree(p->c_name);
> >               kfree(p);
> > +     }
> >
> >       INIT_LIST_HEAD(list);
> >  }
> > @@ -260,12 +303,36 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
> >  {
> >       struct ovl_readdir_data *rdd =
> >               container_of(ctx, struct ovl_readdir_data, ctx);
> > +     struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
> > +     const char *c_name = NULL;
> > +     char *cf_name = NULL;
> > +     int c_len = 0, ret;
> > +
> > +     if (ofs->casefold)
> > +             c_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
> > +
> > +     if (c_len <= 0) {
> > +             c_name = name;
> > +             c_len = namelen;
> > +     } else {
> > +             c_name = cf_name;
> > +     }
> >
> >       rdd->count++;
> >       if (!rdd->is_lowest)
> > -             return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
> > +             ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
> >       else
> > -             return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
> > +             ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
> > +
> > +     /*
> > +      * If ret == 1, that means that c_name is being used as part of struct
> > +      * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
> > +      * c_name was found in the rb-tree so we can free it here.
> > +      */
> > +     if (ret != 1 && c_name != name)
> > +             kfree(c_name);
> > +
>
> The semantics of this being conditionally freed is a bit annoying, as
> it is already replicated in 3 places. I suppose a helper would come in
> hand.

Yeh.

I have already used ovl_cache_entry_free() in my tree.

Thanks,
Amir.

>
> In this specific case, it could just be:
>
> if (ret != 1)
>         kfree(cf_name);
>
>
> > +     return ret >= 0;
> >  }
> >
> >  static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
> > @@ -357,12 +424,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
> >               .list = list,
> >               .root = root,
> >               .is_lowest = false,
> > +             .map = NULL,
> >       };
> >       int idx, next;
> >       const struct ovl_layer *layer;
> > +     struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> >
> >       for (idx = 0; idx != -1; idx = next) {
> >               next = ovl_path_next(idx, dentry, &realpath, &layer);
> > +
> > +             if (ofs->casefold)
> > +                     rdd.map = sb_encoding(realpath.dentry->d_sb);
> > +
> >               rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
> >               rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
> >                                       ovl_dentry_has_xwhiteouts(dentry);
> > @@ -555,7 +628,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
> >               container_of(ctx, struct ovl_readdir_data, ctx);
> >
> >       rdd->count++;
> > -     p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> > +     p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
> >       if (p == NULL) {
> >               rdd->err = -ENOMEM;
> >               return false;
> > @@ -1023,6 +1096,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
> >
> >  del_entry:
> >               list_del(&p->l_node);
> > +             if (p->c_name != p->name)
> > +                     kfree(p->c_name);
> >               kfree(p);
> >       }
>
> --
> Gabriel Krisman Bertazi
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 1 week ago
On Mon, Aug 25, 2025 at 5:27 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Mon, Aug 25, 2025 at 1:09 PM Gabriel Krisman Bertazi
> <gabriel@krisman.be> wrote:
> >
> > André Almeida <andrealmeid@igalia.com> writes:
> >
> > > To add overlayfs support casefold layers, create a new function
> > > ovl_casefold(), to be able to do case-insensitive strncmp().
> > >
> > > ovl_casefold() allocates a new buffer and stores the casefolded version
> > > of the string on it. If the allocation or the casefold operation fails,
> > > fallback to use the original string.
> > >
> > > The case-insentive name is then used in the rb-tree search/insertion
> > > operation. If the name is found in the rb-tree, the name can be
> > > discarded and the buffer is freed. If the name isn't found, it's then
> > > stored at struct ovl_cache_entry to be used later.
> > >
> > > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> > > Signed-off-by: André Almeida <andrealmeid@igalia.com>
> > > ---
> > > Changes from v6:
> > >  - Last version was using `strncmp(... tmp->len)` which was causing
> > >    regressions. It should be `strncmp(... len)`.
> > >  - Rename cf_len to c_len
> > >  - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
> > >  - Remove needless kfree(cf_name)
> > > ---
> > >  fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
> > >  1 file changed, 94 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> > > index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
> > > --- a/fs/overlayfs/readdir.c
> > > +++ b/fs/overlayfs/readdir.c
> > > @@ -27,6 +27,8 @@ struct ovl_cache_entry {
> > >       bool is_upper;
> > >       bool is_whiteout;
> > >       bool check_xwhiteout;
> > > +     const char *c_name;
> > > +     int c_len;
> > >       char name[];
> > >  };
> > >
> > > @@ -45,6 +47,7 @@ struct ovl_readdir_data {
> > >       struct list_head *list;
> > >       struct list_head middle;
> > >       struct ovl_cache_entry *first_maybe_whiteout;
> > > +     struct unicode_map *map;
> > >       int count;
> > >       int err;
> > >       bool is_upper;
> > > @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
> > >       return rb_entry(n, struct ovl_cache_entry, node);
> > >  }
> > >
> > > +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> > > +{
> > > +     const struct qstr qstr = { .name = str, .len = len };
> > > +     int cf_len;
> > > +
> > > +     if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> > > +             return 0;
> > > +
> > > +     *dst = kmalloc(NAME_MAX, GFP_KERNEL);
> > > +
> > > +     if (dst) {
> > > +             cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> > > +
> > > +             if (cf_len > 0)
> > > +                     return cf_len;
> > > +     }
> > > +
> > > +     kfree(*dst);
> > > +     return 0;
> > > +}
> >
> > Hi,
> >
> > I should just note this does not differentiates allocation errors from
> > casefolding errors (invalid encoding).  It might be just a theoretical
> > error because GFP_KERNEL shouldn't fail (wink, wink) and the rest of the
> > operation is likely to fail too, but if you have an allocation failure, you
> > can end up with an inconsistent cache, because a file is added under the
> > !casefolded name and a later successful lookup will look for the
> > casefolded version.
>
> Good point.
> I will fix this in my tree.

wait why should we not fail to fill the cache for both allocation
and encoding errors?

Thanks,
Amir.

>
> >
> > > +
> > >  static bool ovl_cache_entry_find_link(const char *name, int len,
> > >                                     struct rb_node ***link,
> > >                                     struct rb_node **parent)
> > > @@ -79,10 +103,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
> > >
> > >               *parent = *newp;
> > >               tmp = ovl_cache_entry_from_node(*newp);
> > > -             cmp = strncmp(name, tmp->name, len);
> > > +             cmp = strncmp(name, tmp->c_name, len);
> > >               if (cmp > 0)
> > >                       newp = &tmp->node.rb_right;
> > > -             else if (cmp < 0 || len < tmp->len)
> > > +             else if (cmp < 0 || len < tmp->c_len)
> > >                       newp = &tmp->node.rb_left;
> > >               else
> > >                       found = true;
> > > @@ -101,10 +125,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
> > >       while (node) {
> > >               struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
> > >
> > > -             cmp = strncmp(name, p->name, len);
> > > +             cmp = strncmp(name, p->c_name, len);
> > >               if (cmp > 0)
> > >                       node = p->node.rb_right;
> > > -             else if (cmp < 0 || len < p->len)
> > > +             else if (cmp < 0 || len < p->c_len)
> > >                       node = p->node.rb_left;
> > >               else
> > >                       return p;
> > > @@ -145,6 +169,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
> > >
> > >  static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
> > >                                                  const char *name, int len,
> > > +                                                const char *c_name, int c_len,
> > >                                                  u64 ino, unsigned int d_type)
> > >  {
> > >       struct ovl_cache_entry *p;
> > > @@ -167,6 +192,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
> > >       /* Defer check for overlay.whiteout to ovl_iterate() */
> > >       p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
> > >
> > > +     if (c_name && c_name != name) {
> > > +             p->c_name = c_name;
> > > +             p->c_len = c_len;
> > > +     } else {
> > > +             p->c_name = p->name;
> > > +             p->c_len = len;
> > > +     }
> > > +
> > >       if (d_type == DT_CHR) {
> > >               p->next_maybe_whiteout = rdd->first_maybe_whiteout;
> > >               rdd->first_maybe_whiteout = p;
> > > @@ -174,48 +207,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
> > >       return p;
> > >  }
> > >
> > > -static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> > > -                               const char *name, int len, u64 ino,
> > > +/* Return 0 for found, 1 for added, <0 for error */
> > > +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> > > +                               const char *name, int len,
> > > +                               const char *c_name, int c_len,
> > > +                               u64 ino,
> > >                                 unsigned int d_type)
> > >  {
> > >       struct rb_node **newp = &rdd->root->rb_node;
> > >       struct rb_node *parent = NULL;
> > >       struct ovl_cache_entry *p;
> > >
> > > -     if (ovl_cache_entry_find_link(name, len, &newp, &parent))
> > > -             return true;
> > > +     if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
> > > +             return 0;
> > >
> > > -     p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
> > > +     p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
> > >       if (p == NULL) {
> > >               rdd->err = -ENOMEM;
> > > -             return false;
> > > +             return -ENOMEM;
> > >       }
> > >
> > >       list_add_tail(&p->l_node, rdd->list);
> > >       rb_link_node(&p->node, parent, newp);
> > >       rb_insert_color(&p->node, rdd->root);
> > >
> > > -     return true;
> > > +     return 1;
> > >  }
> > >
> > > -static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
> > > +/* Return 0 for found, 1 for added, <0 for error */
> > > +static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
> > >                          const char *name, int namelen,
> > > +                        const char *c_name, int c_len,
> > >                          loff_t offset, u64 ino, unsigned int d_type)
> > >  {
> > >       struct ovl_cache_entry *p;
> > >
> > > -     p = ovl_cache_entry_find(rdd->root, name, namelen);
> > > +     p = ovl_cache_entry_find(rdd->root, c_name, c_len);
> > >       if (p) {
> > >               list_move_tail(&p->l_node, &rdd->middle);
> > > +             return 0;
> > >       } else {
> > > -             p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> > > +             p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
> > > +                                     ino, d_type);
> > >               if (p == NULL)
> > >                       rdd->err = -ENOMEM;
> > >               else
> > >                       list_add_tail(&p->l_node, &rdd->middle);
> > >       }
> > >
> > > -     return rdd->err == 0;
> > > +     return rdd->err ?: 1;
> > >  }
> > >
> > >  void ovl_cache_free(struct list_head *list)
> > > @@ -223,8 +263,11 @@ void ovl_cache_free(struct list_head *list)
> > >       struct ovl_cache_entry *p;
> > >       struct ovl_cache_entry *n;
> > >
> > > -     list_for_each_entry_safe(p, n, list, l_node)
> > > +     list_for_each_entry_safe(p, n, list, l_node) {
> > > +             if (p->c_name != p->name)
> > > +                     kfree(p->c_name);
> > >               kfree(p);
> > > +     }
> > >
> > >       INIT_LIST_HEAD(list);
> > >  }
> > > @@ -260,12 +303,36 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
> > >  {
> > >       struct ovl_readdir_data *rdd =
> > >               container_of(ctx, struct ovl_readdir_data, ctx);
> > > +     struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
> > > +     const char *c_name = NULL;
> > > +     char *cf_name = NULL;
> > > +     int c_len = 0, ret;
> > > +
> > > +     if (ofs->casefold)
> > > +             c_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
> > > +
> > > +     if (c_len <= 0) {
> > > +             c_name = name;
> > > +             c_len = namelen;
> > > +     } else {
> > > +             c_name = cf_name;
> > > +     }
> > >
> > >       rdd->count++;
> > >       if (!rdd->is_lowest)
> > > -             return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
> > > +             ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
> > >       else
> > > -             return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
> > > +             ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
> > > +
> > > +     /*
> > > +      * If ret == 1, that means that c_name is being used as part of struct
> > > +      * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
> > > +      * c_name was found in the rb-tree so we can free it here.
> > > +      */
> > > +     if (ret != 1 && c_name != name)
> > > +             kfree(c_name);
> > > +
> >
> > The semantics of this being conditionally freed is a bit annoying, as
> > it is already replicated in 3 places. I suppose a helper would come in
> > hand.
>
> Yeh.
>
> I have already used ovl_cache_entry_free() in my tree.
>
> Thanks,
> Amir.
>
> >
> > In this specific case, it could just be:
> >
> > if (ret != 1)
> >         kfree(cf_name);
> >
> >
> > > +     return ret >= 0;
> > >  }
> > >
> > >  static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
> > > @@ -357,12 +424,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
> > >               .list = list,
> > >               .root = root,
> > >               .is_lowest = false,
> > > +             .map = NULL,
> > >       };
> > >       int idx, next;
> > >       const struct ovl_layer *layer;
> > > +     struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> > >
> > >       for (idx = 0; idx != -1; idx = next) {
> > >               next = ovl_path_next(idx, dentry, &realpath, &layer);
> > > +
> > > +             if (ofs->casefold)
> > > +                     rdd.map = sb_encoding(realpath.dentry->d_sb);
> > > +
> > >               rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
> > >               rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
> > >                                       ovl_dentry_has_xwhiteouts(dentry);
> > > @@ -555,7 +628,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
> > >               container_of(ctx, struct ovl_readdir_data, ctx);
> > >
> > >       rdd->count++;
> > > -     p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> > > +     p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
> > >       if (p == NULL) {
> > >               rdd->err = -ENOMEM;
> > >               return false;
> > > @@ -1023,6 +1096,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
> > >
> > >  del_entry:
> > >               list_del(&p->l_node);
> > > +             if (p->c_name != p->name)
> > > +                     kfree(p->c_name);
> > >               kfree(p);
> > >       }
> >
> > --
> > Gabriel Krisman Bertazi
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Gabriel Krisman Bertazi 1 month, 1 week ago
Amir Goldstein <amir73il@gmail.com> writes:

> On Mon, Aug 25, 2025 at 5:27 PM Amir Goldstein <amir73il@gmail.com> wrote:
>>
>> On Mon, Aug 25, 2025 at 1:09 PM Gabriel Krisman Bertazi
>> <gabriel@krisman.be> wrote:
>> >
>> > André Almeida <andrealmeid@igalia.com> writes:
>> >
>> > > To add overlayfs support casefold layers, create a new function
>> > > ovl_casefold(), to be able to do case-insensitive strncmp().
>> > >
>> > > ovl_casefold() allocates a new buffer and stores the casefolded version
>> > > of the string on it. If the allocation or the casefold operation fails,
>> > > fallback to use the original string.
>> > >
>> > > The case-insentive name is then used in the rb-tree search/insertion
>> > > operation. If the name is found in the rb-tree, the name can be
>> > > discarded and the buffer is freed. If the name isn't found, it's then
>> > > stored at struct ovl_cache_entry to be used later.
>> > >
>> > > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
>> > > Signed-off-by: André Almeida <andrealmeid@igalia.com>
>> > > ---
>> > > Changes from v6:
>> > >  - Last version was using `strncmp(... tmp->len)` which was causing
>> > >    regressions. It should be `strncmp(... len)`.
>> > >  - Rename cf_len to c_len
>> > >  - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
>> > >  - Remove needless kfree(cf_name)
>> > > ---
>> > >  fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
>> > >  1 file changed, 94 insertions(+), 19 deletions(-)
>> > >
>> > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
>> > > index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
>> > > --- a/fs/overlayfs/readdir.c
>> > > +++ b/fs/overlayfs/readdir.c
>> > > @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>> > >       bool is_upper;
>> > >       bool is_whiteout;
>> > >       bool check_xwhiteout;
>> > > +     const char *c_name;
>> > > +     int c_len;
>> > >       char name[];
>> > >  };
>> > >
>> > > @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>> > >       struct list_head *list;
>> > >       struct list_head middle;
>> > >       struct ovl_cache_entry *first_maybe_whiteout;
>> > > +     struct unicode_map *map;
>> > >       int count;
>> > >       int err;
>> > >       bool is_upper;
>> > > @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>> > >       return rb_entry(n, struct ovl_cache_entry, node);
>> > >  }
>> > >
>> > > +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
>> > > +{
>> > > +     const struct qstr qstr = { .name = str, .len = len };
>> > > +     int cf_len;
>> > > +
>> > > +     if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
>> > > +             return 0;
>> > > +
>> > > +     *dst = kmalloc(NAME_MAX, GFP_KERNEL);
>> > > +
>> > > +     if (dst) {
>> > > +             cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
>> > > +
>> > > +             if (cf_len > 0)
>> > > +                     return cf_len;
>> > > +     }
>> > > +
>> > > +     kfree(*dst);
>> > > +     return 0;
>> > > +}
>> >
>> > Hi,
>> >
>> > I should just note this does not differentiates allocation errors from
>> > casefolding errors (invalid encoding).  It might be just a theoretical
>> > error because GFP_KERNEL shouldn't fail (wink, wink) and the rest of the
>> > operation is likely to fail too, but if you have an allocation failure, you
>> > can end up with an inconsistent cache, because a file is added under the
>> > !casefolded name and a later successful lookup will look for the
>> > casefolded version.
>>
>> Good point.
>> I will fix this in my tree.
>
> wait why should we not fail to fill the cache for both allocation
> and encoding errors?
>

We shouldn't fail the cache for encoding errors, just for allocation errors.

Perhaps I am misreading the code, so please correct me if I'm wrong.  if
ovl_casefold fails, the non-casefolded name is used in the cache.  That
makes sense if the reason utf8_casefold failed is because the string
cannot be casefolded (i.e. an invalid utf-8 string). For those strings,
everything is fine.  But on an allocation failure, the string might have
a real casefolded version.  If we fallback to the original string as the
key, a cache lookup won't find the entry, since we compare with memcmp.

>
>>
>> >
>> > > +
>> > >  static bool ovl_cache_entry_find_link(const char *name, int len,
>> > >                                     struct rb_node ***link,
>> > >                                     struct rb_node **parent)
>> > > @@ -79,10 +103,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>> > >
>> > >               *parent = *newp;
>> > >               tmp = ovl_cache_entry_from_node(*newp);
>> > > -             cmp = strncmp(name, tmp->name, len);
>> > > +             cmp = strncmp(name, tmp->c_name, len);
>> > >               if (cmp > 0)
>> > >                       newp = &tmp->node.rb_right;
>> > > -             else if (cmp < 0 || len < tmp->len)
>> > > +             else if (cmp < 0 || len < tmp->c_len)
>> > >                       newp = &tmp->node.rb_left;
>> > >               else
>> > >                       found = true;
>> > > @@ -101,10 +125,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
>> > >       while (node) {
>> > >               struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
>> > >
>> > > -             cmp = strncmp(name, p->name, len);
>> > > +             cmp = strncmp(name, p->c_name, len);
>> > >               if (cmp > 0)
>> > >                       node = p->node.rb_right;
>> > > -             else if (cmp < 0 || len < p->len)
>> > > +             else if (cmp < 0 || len < p->c_len)
>> > >                       node = p->node.rb_left;
>> > >               else
>> > >                       return p;
>> > > @@ -145,6 +169,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
>> > >
>> > >  static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>> > >                                                  const char *name, int len,
>> > > +                                                const char *c_name, int c_len,
>> > >                                                  u64 ino, unsigned int d_type)
>> > >  {
>> > >       struct ovl_cache_entry *p;
>> > > @@ -167,6 +192,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>> > >       /* Defer check for overlay.whiteout to ovl_iterate() */
>> > >       p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
>> > >
>> > > +     if (c_name && c_name != name) {
>> > > +             p->c_name = c_name;
>> > > +             p->c_len = c_len;
>> > > +     } else {
>> > > +             p->c_name = p->name;
>> > > +             p->c_len = len;
>> > > +     }
>> > > +
>> > >       if (d_type == DT_CHR) {
>> > >               p->next_maybe_whiteout = rdd->first_maybe_whiteout;
>> > >               rdd->first_maybe_whiteout = p;
>> > > @@ -174,48 +207,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>> > >       return p;
>> > >  }
>> > >
>> > > -static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
>> > > -                               const char *name, int len, u64 ino,
>> > > +/* Return 0 for found, 1 for added, <0 for error */
>> > > +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
>> > > +                               const char *name, int len,
>> > > +                               const char *c_name, int c_len,
>> > > +                               u64 ino,
>> > >                                 unsigned int d_type)
>> > >  {
>> > >       struct rb_node **newp = &rdd->root->rb_node;
>> > >       struct rb_node *parent = NULL;
>> > >       struct ovl_cache_entry *p;
>> > >
>> > > -     if (ovl_cache_entry_find_link(name, len, &newp, &parent))
>> > > -             return true;
>> > > +     if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
>> > > +             return 0;
>> > >
>> > > -     p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
>> > > +     p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
>> > >       if (p == NULL) {
>> > >               rdd->err = -ENOMEM;
>> > > -             return false;
>> > > +             return -ENOMEM;
>> > >       }
>> > >
>> > >       list_add_tail(&p->l_node, rdd->list);
>> > >       rb_link_node(&p->node, parent, newp);
>> > >       rb_insert_color(&p->node, rdd->root);
>> > >
>> > > -     return true;
>> > > +     return 1;
>> > >  }
>> > >
>> > > -static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
>> > > +/* Return 0 for found, 1 for added, <0 for error */
>> > > +static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
>> > >                          const char *name, int namelen,
>> > > +                        const char *c_name, int c_len,
>> > >                          loff_t offset, u64 ino, unsigned int d_type)
>> > >  {
>> > >       struct ovl_cache_entry *p;
>> > >
>> > > -     p = ovl_cache_entry_find(rdd->root, name, namelen);
>> > > +     p = ovl_cache_entry_find(rdd->root, c_name, c_len);
>> > >       if (p) {
>> > >               list_move_tail(&p->l_node, &rdd->middle);
>> > > +             return 0;
>> > >       } else {
>> > > -             p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
>> > > +             p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
>> > > +                                     ino, d_type);
>> > >               if (p == NULL)
>> > >                       rdd->err = -ENOMEM;
>> > >               else
>> > >                       list_add_tail(&p->l_node, &rdd->middle);
>> > >       }
>> > >
>> > > -     return rdd->err == 0;
>> > > +     return rdd->err ?: 1;
>> > >  }
>> > >
>> > >  void ovl_cache_free(struct list_head *list)
>> > > @@ -223,8 +263,11 @@ void ovl_cache_free(struct list_head *list)
>> > >       struct ovl_cache_entry *p;
>> > >       struct ovl_cache_entry *n;
>> > >
>> > > -     list_for_each_entry_safe(p, n, list, l_node)
>> > > +     list_for_each_entry_safe(p, n, list, l_node) {
>> > > +             if (p->c_name != p->name)
>> > > +                     kfree(p->c_name);
>> > >               kfree(p);
>> > > +     }
>> > >
>> > >       INIT_LIST_HEAD(list);
>> > >  }
>> > > @@ -260,12 +303,36 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
>> > >  {
>> > >       struct ovl_readdir_data *rdd =
>> > >               container_of(ctx, struct ovl_readdir_data, ctx);
>> > > +     struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
>> > > +     const char *c_name = NULL;
>> > > +     char *cf_name = NULL;
>> > > +     int c_len = 0, ret;
>> > > +
>> > > +     if (ofs->casefold)
>> > > +             c_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
>> > > +
>> > > +     if (c_len <= 0) {
>> > > +             c_name = name;
>> > > +             c_len = namelen;
>> > > +     } else {
>> > > +             c_name = cf_name;
>> > > +     }
>> > >
>> > >       rdd->count++;
>> > >       if (!rdd->is_lowest)
>> > > -             return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
>> > > +             ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
>> > >       else
>> > > -             return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
>> > > +             ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
>> > > +
>> > > +     /*
>> > > +      * If ret == 1, that means that c_name is being used as part of struct
>> > > +      * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
>> > > +      * c_name was found in the rb-tree so we can free it here.
>> > > +      */
>> > > +     if (ret != 1 && c_name != name)
>> > > +             kfree(c_name);
>> > > +
>> >
>> > The semantics of this being conditionally freed is a bit annoying, as
>> > it is already replicated in 3 places. I suppose a helper would come in
>> > hand.
>>
>> Yeh.
>>
>> I have already used ovl_cache_entry_free() in my tree.
>>
>> Thanks,
>> Amir.
>>
>> >
>> > In this specific case, it could just be:
>> >
>> > if (ret != 1)
>> >         kfree(cf_name);
>> >
>> >
>> > > +     return ret >= 0;
>> > >  }
>> > >
>> > >  static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
>> > > @@ -357,12 +424,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
>> > >               .list = list,
>> > >               .root = root,
>> > >               .is_lowest = false,
>> > > +             .map = NULL,
>> > >       };
>> > >       int idx, next;
>> > >       const struct ovl_layer *layer;
>> > > +     struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
>> > >
>> > >       for (idx = 0; idx != -1; idx = next) {
>> > >               next = ovl_path_next(idx, dentry, &realpath, &layer);
>> > > +
>> > > +             if (ofs->casefold)
>> > > +                     rdd.map = sb_encoding(realpath.dentry->d_sb);
>> > > +
>> > >               rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
>> > >               rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
>> > >                                       ovl_dentry_has_xwhiteouts(dentry);
>> > > @@ -555,7 +628,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
>> > >               container_of(ctx, struct ovl_readdir_data, ctx);
>> > >
>> > >       rdd->count++;
>> > > -     p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
>> > > +     p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
>> > >       if (p == NULL) {
>> > >               rdd->err = -ENOMEM;
>> > >               return false;
>> > > @@ -1023,6 +1096,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
>> > >
>> > >  del_entry:
>> > >               list_del(&p->l_node);
>> > > +             if (p->c_name != p->name)
>> > > +                     kfree(p->c_name);
>> > >               kfree(p);
>> > >       }
>> >
>> > --
>> > Gabriel Krisman Bertazi

-- 
Gabriel Krisman Bertazi
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Gabriel Krisman Bertazi 1 month, 1 week ago
Gabriel Krisman Bertazi <gabriel@krisman.be> writes:

> Amir Goldstein <amir73il@gmail.com> writes:
>
>> On Mon, Aug 25, 2025 at 5:27 PM Amir Goldstein <amir73il@gmail.com> wrote:
>>>
>>> On Mon, Aug 25, 2025 at 1:09 PM Gabriel Krisman Bertazi
>>> <gabriel@krisman.be> wrote:
>>> >
>>> > André Almeida <andrealmeid@igalia.com> writes:
>>> >
>>> > > To add overlayfs support casefold layers, create a new function
>>> > > ovl_casefold(), to be able to do case-insensitive strncmp().
>>> > >
>>> > > ovl_casefold() allocates a new buffer and stores the casefolded version
>>> > > of the string on it. If the allocation or the casefold operation fails,
>>> > > fallback to use the original string.
>>> > >
>>> > > The case-insentive name is then used in the rb-tree search/insertion
>>> > > operation. If the name is found in the rb-tree, the name can be
>>> > > discarded and the buffer is freed. If the name isn't found, it's then
>>> > > stored at struct ovl_cache_entry to be used later.
>>> > >
>>> > > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
>>> > > Signed-off-by: André Almeida <andrealmeid@igalia.com>
>>> > > ---
>>> > > Changes from v6:
>>> > >  - Last version was using `strncmp(... tmp->len)` which was causing
>>> > >    regressions. It should be `strncmp(... len)`.
>>> > >  - Rename cf_len to c_len
>>> > >  - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
>>> > >  - Remove needless kfree(cf_name)
>>> > > ---
>>> > >  fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
>>> > >  1 file changed, 94 insertions(+), 19 deletions(-)
>>> > >
>>> > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
>>> > > index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
>>> > > --- a/fs/overlayfs/readdir.c
>>> > > +++ b/fs/overlayfs/readdir.c
>>> > > @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>>> > >       bool is_upper;
>>> > >       bool is_whiteout;
>>> > >       bool check_xwhiteout;
>>> > > +     const char *c_name;
>>> > > +     int c_len;
>>> > >       char name[];
>>> > >  };
>>> > >
>>> > > @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>>> > >       struct list_head *list;
>>> > >       struct list_head middle;
>>> > >       struct ovl_cache_entry *first_maybe_whiteout;
>>> > > +     struct unicode_map *map;
>>> > >       int count;
>>> > >       int err;
>>> > >       bool is_upper;
>>> > > @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>>> > >       return rb_entry(n, struct ovl_cache_entry, node);
>>> > >  }
>>> > >
>>> > > +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
>>> > > +{
>>> > > +     const struct qstr qstr = { .name = str, .len = len };
>>> > > +     int cf_len;
>>> > > +
>>> > > +     if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
>>> > > +             return 0;
>>> > > +
>>> > > +     *dst = kmalloc(NAME_MAX, GFP_KERNEL);
>>> > > +
>>> > > +     if (dst) {
>>> > > +             cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
>>> > > +
>>> > > +             if (cf_len > 0)
>>> > > +                     return cf_len;
>>> > > +     }
>>> > > +
>>> > > +     kfree(*dst);
>>> > > +     return 0;
>>> > > +}
>>> >
>>> > Hi,
>>> >
>>> > I should just note this does not differentiates allocation errors from
>>> > casefolding errors (invalid encoding).  It might be just a theoretical
>>> > error because GFP_KERNEL shouldn't fail (wink, wink) and the rest of the
>>> > operation is likely to fail too, but if you have an allocation failure, you
>>> > can end up with an inconsistent cache, because a file is added under the
>>> > !casefolded name and a later successful lookup will look for the
>>> > casefolded version.
>>>
>>> Good point.
>>> I will fix this in my tree.
>>
>> wait why should we not fail to fill the cache for both allocation
>> and encoding errors?
>>
>
> We shouldn't fail the cache for encoding errors, just for allocation errors.
>
> Perhaps I am misreading the code, so please correct me if I'm wrong.  if
> ovl_casefold fails, the non-casefolded name is used in the cache.  That
> makes sense if the reason utf8_casefold failed is because the string
> cannot be casefolded (i.e. an invalid utf-8 string). For those strings,
> everything is fine.  But on an allocation failure, the string might have
> a real casefolded version.  If we fallback to the original string as the
> key, a cache lookup won't find the entry, since we compare with memcmp.

I was thinking again about this and I suspect I misunderstood your
question.  let me try to answer it again:

Ext4, f2fs and tmpfs all allow invalid utf8-encoded strings in a
casefolded directory when running on non-strict-mode.  They are treated
as non-encoded byte-sequences, as if they were seen on a case-Sensitive
directory.  They can't collide with other filenames because they
basically "fold" to themselves.

Now I suspect there is another problem with this series: I don't see how
it implements the semantics of strict mode.  What happens if upper and
lower are in strict mode (which is valid, same encoding_flags) but there
is an invalid name in the lower?  overlayfs should reject the dentry,
because any attempt to create it to the upper will fail.

André, did you consider this scenario?  You can test by creating a file
with an invalid-encoded name in a casefolded directory of a
non-strict-mode filesystem and then flip the strict-mode flag in the
superblock.  I can give it a try tomorrow too.

Thanks,

-- 
Gabriel Krisman Bertazi
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 1 week ago
On Tue, Aug 26, 2025 at 3:34 AM Gabriel Krisman Bertazi <krisman@suse.de> wrote:
>
> Gabriel Krisman Bertazi <gabriel@krisman.be> writes:
>
> > Amir Goldstein <amir73il@gmail.com> writes:
> >
> >> On Mon, Aug 25, 2025 at 5:27 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >>>
> >>> On Mon, Aug 25, 2025 at 1:09 PM Gabriel Krisman Bertazi
> >>> <gabriel@krisman.be> wrote:
> >>> >
> >>> > André Almeida <andrealmeid@igalia.com> writes:
> >>> >
> >>> > > To add overlayfs support casefold layers, create a new function
> >>> > > ovl_casefold(), to be able to do case-insensitive strncmp().
> >>> > >
> >>> > > ovl_casefold() allocates a new buffer and stores the casefolded version
> >>> > > of the string on it. If the allocation or the casefold operation fails,
> >>> > > fallback to use the original string.
> >>> > >
> >>> > > The case-insentive name is then used in the rb-tree search/insertion
> >>> > > operation. If the name is found in the rb-tree, the name can be
> >>> > > discarded and the buffer is freed. If the name isn't found, it's then
> >>> > > stored at struct ovl_cache_entry to be used later.
> >>> > >
> >>> > > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> >>> > > Signed-off-by: André Almeida <andrealmeid@igalia.com>
> >>> > > ---
> >>> > > Changes from v6:
> >>> > >  - Last version was using `strncmp(... tmp->len)` which was causing
> >>> > >    regressions. It should be `strncmp(... len)`.
> >>> > >  - Rename cf_len to c_len
> >>> > >  - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
> >>> > >  - Remove needless kfree(cf_name)
> >>> > > ---
> >>> > >  fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
> >>> > >  1 file changed, 94 insertions(+), 19 deletions(-)
> >>> > >
> >>> > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> >>> > > index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
> >>> > > --- a/fs/overlayfs/readdir.c
> >>> > > +++ b/fs/overlayfs/readdir.c
> >>> > > @@ -27,6 +27,8 @@ struct ovl_cache_entry {
> >>> > >       bool is_upper;
> >>> > >       bool is_whiteout;
> >>> > >       bool check_xwhiteout;
> >>> > > +     const char *c_name;
> >>> > > +     int c_len;
> >>> > >       char name[];
> >>> > >  };
> >>> > >
> >>> > > @@ -45,6 +47,7 @@ struct ovl_readdir_data {
> >>> > >       struct list_head *list;
> >>> > >       struct list_head middle;
> >>> > >       struct ovl_cache_entry *first_maybe_whiteout;
> >>> > > +     struct unicode_map *map;
> >>> > >       int count;
> >>> > >       int err;
> >>> > >       bool is_upper;
> >>> > > @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
> >>> > >       return rb_entry(n, struct ovl_cache_entry, node);
> >>> > >  }
> >>> > >
> >>> > > +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> >>> > > +{
> >>> > > +     const struct qstr qstr = { .name = str, .len = len };
> >>> > > +     int cf_len;
> >>> > > +
> >>> > > +     if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> >>> > > +             return 0;
> >>> > > +
> >>> > > +     *dst = kmalloc(NAME_MAX, GFP_KERNEL);
> >>> > > +
> >>> > > +     if (dst) {

Andre,

Just noticed this is a bug, should have been if (*dst), but anyway following
Gabriel's comments I have made this change in my tree (pending more
strict related changes):

static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
                        char **dst)
{
        const struct qstr qstr = { .name = str, .len = len };
        char *cf_name;
        int cf_len;

        if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len))
                return 0;

        cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
        if (!cf_name) {
                rdd->err = -ENOMEM;
                return -ENOMEM;
        }

        cf_len = utf8_casefold(rdd->map, &qstr, *dst, NAME_MAX);
        if (cf_len > 0)
                *dst = cf_name;
        else
                kfree(cf_name);

        return cf_len;
}

> >>> > > +             cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> >>> > > +
> >>> > > +             if (cf_len > 0)
> >>> > > +                     return cf_len;
> >>> > > +     }
> >>> > > +
> >>> > > +     kfree(*dst);
> >>> > > +     return 0;
> >>> > > +}
> >>> >
> >>> > Hi,
> >>> >
> >>> > I should just note this does not differentiates allocation errors from
> >>> > casefolding errors (invalid encoding).  It might be just a theoretical
> >>> > error because GFP_KERNEL shouldn't fail (wink, wink) and the rest of the
> >>> > operation is likely to fail too, but if you have an allocation failure, you
> >>> > can end up with an inconsistent cache, because a file is added under the
> >>> > !casefolded name and a later successful lookup will look for the
> >>> > casefolded version.
> >>>
> >>> Good point.
> >>> I will fix this in my tree.
> >>
> >> wait why should we not fail to fill the cache for both allocation
> >> and encoding errors?
> >>
> >
> > We shouldn't fail the cache for encoding errors, just for allocation errors.
> >
> > Perhaps I am misreading the code, so please correct me if I'm wrong.  if
> > ovl_casefold fails, the non-casefolded name is used in the cache.  That
> > makes sense if the reason utf8_casefold failed is because the string
> > cannot be casefolded (i.e. an invalid utf-8 string). For those strings,
> > everything is fine.  But on an allocation failure, the string might have
> > a real casefolded version.  If we fallback to the original string as the
> > key, a cache lookup won't find the entry, since we compare with memcmp.

Just to make it clear in case the name "cache lookup" confuses anyone
on this thread - we are talking about ovl readdir cache, not about the vfs
lookup cache, the the purpose of ovl readdir cache is twofold:
1. plain in-memory readdir cache
2. (more important to this discussion) implementation of "merged dir" content

So I agree with you that with non-strict mode, invalid encoded names
should be added to readdir cache as is and not in the case of allocation
failure.

>
> I was thinking again about this and I suspect I misunderstood your
> question.  let me try to answer it again:
>
> Ext4, f2fs and tmpfs all allow invalid utf8-encoded strings in a
> casefolded directory when running on non-strict-mode.  They are treated
> as non-encoded byte-sequences, as if they were seen on a case-Sensitive
> directory.  They can't collide with other filenames because they
> basically "fold" to themselves.
>
> Now I suspect there is another problem with this series: I don't see how
> it implements the semantics of strict mode.  What happens if upper and
> lower are in strict mode (which is valid, same encoding_flags) but there
> is an invalid name in the lower?  overlayfs should reject the dentry,
> because any attempt to create it to the upper will fail.

Ok, so IIUC, one issue is that return value from ovl_casefold() should be
conditional to the sb encoding_flags, which was inherited from the layers.

Again, *IF* I understand correctly, then strict mode ext4 will not allow
creating an invalid-encoded name, but will strict mode ext4 allow
it as a valid lookup result?

>
> André, did you consider this scenario?

In general, as I have told Andre from v1, please stick to the most common
configs that people actually need.

We do NOT need to support every possible combination of layers configurations.

This is why we went with supporting all-or-nothing configs for casefolder dirs.
Because it is simpler for overlayfs semantics and good enough for what
users need.

So my question is to you both: do users actually use strict mode for
wine and such?
Because if they don't I would rather support the default mode only
(enforced on mount)
and add support for strict mode later per actual users demand.

> You can test by creating a file
> with an invalid-encoded name in a casefolded directory of a
> non-strict-mode filesystem and then flip the strict-mode flag in the
> superblock.  I can give it a try tomorrow too.

Can the sb flags be flipped in runtime? while mounted?
I suppose you are talking about an offline change that requires
re-mount of overlayfs and re-validate the same encoding flags on all layers?

Andre,

Please also add these and other casefold functional tests to fstest to
validate correctness of the merge dir implementation with different
casefold variants in different layers.

Thanks,
Amir.
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by André Almeida 1 month, 1 week ago
Em 26/08/2025 04:19, Amir Goldstein escreveu:
> 
> Andre,
> 
> Just noticed this is a bug, should have been if (*dst), but anyway following
> Gabriel's comments I have made this change in my tree (pending more
> strict related changes):
> 
> static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
>                          char **dst)
> {
>          const struct qstr qstr = { .name = str, .len = len };
>          char *cf_name;
>          int cf_len;
> 
>          if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len))
>                  return 0;
> 
>          cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
>          if (!cf_name) {
>                  rdd->err = -ENOMEM;
>                  return -ENOMEM;
>          }
> 
>          cf_len = utf8_casefold(rdd->map, &qstr, *dst, NAME_MAX);

The third argument here should be cf_name, not *dst anymore.

>          if (cf_len > 0)
>                  *dst = cf_name;
>          else
>                  kfree(cf_name);
> 
>          return cf_len;
> }
> 

> Thanks,
> Amir.
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month ago
On Wed, Aug 27, 2025 at 10:45 PM André Almeida <andrealmeid@igalia.com> wrote:
>
> Em 26/08/2025 04:19, Amir Goldstein escreveu:
> >
> > Andre,
> >
> > Just noticed this is a bug, should have been if (*dst), but anyway following
> > Gabriel's comments I have made this change in my tree (pending more
> > strict related changes):
> >
> > static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
> >                          char **dst)
> > {
> >          const struct qstr qstr = { .name = str, .len = len };
> >          char *cf_name;
> >          int cf_len;
> >
> >          if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len))
> >                  return 0;
> >
> >          cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
> >          if (!cf_name) {
> >                  rdd->err = -ENOMEM;
> >                  return -ENOMEM;
> >          }
> >
> >          cf_len = utf8_casefold(rdd->map, &qstr, *dst, NAME_MAX);
>
> The third argument here should be cf_name, not *dst anymore.

oops. fixed in my tree.

Thanks,
Amir.
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by André Almeida 1 month, 1 week ago
Em 26/08/2025 04:19, Amir Goldstein escreveu:
> On Tue, Aug 26, 2025 at 3:34 AM Gabriel Krisman Bertazi <krisman@suse.de> wrote:
>>
>> Gabriel Krisman Bertazi <gabriel@krisman.be> writes:
>>
>>> Amir Goldstein <amir73il@gmail.com> writes:
>>>
>>>> On Mon, Aug 25, 2025 at 5:27 PM Amir Goldstein <amir73il@gmail.com> wrote:
>>>>>
>>>>> On Mon, Aug 25, 2025 at 1:09 PM Gabriel Krisman Bertazi
>>>>> <gabriel@krisman.be> wrote:
>>>>>>
>>>>>> André Almeida <andrealmeid@igalia.com> writes:
>>>>>>
>>>>>>> To add overlayfs support casefold layers, create a new function
>>>>>>> ovl_casefold(), to be able to do case-insensitive strncmp().
>>>>>>>
>>>>>>> ovl_casefold() allocates a new buffer and stores the casefolded version
>>>>>>> of the string on it. If the allocation or the casefold operation fails,
>>>>>>> fallback to use the original string.
>>>>>>>
>>>>>>> The case-insentive name is then used in the rb-tree search/insertion
>>>>>>> operation. If the name is found in the rb-tree, the name can be
>>>>>>> discarded and the buffer is freed. If the name isn't found, it's then
>>>>>>> stored at struct ovl_cache_entry to be used later.
>>>>>>>
>>>>>>> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
>>>>>>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>>>>>>> ---
>>>>>>> Changes from v6:
>>>>>>>   - Last version was using `strncmp(... tmp->len)` which was causing
>>>>>>>     regressions. It should be `strncmp(... len)`.
>>>>>>>   - Rename cf_len to c_len
>>>>>>>   - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
>>>>>>>   - Remove needless kfree(cf_name)
>>>>>>> ---
>>>>>>>   fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
>>>>>>>   1 file changed, 94 insertions(+), 19 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
>>>>>>> index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
>>>>>>> --- a/fs/overlayfs/readdir.c
>>>>>>> +++ b/fs/overlayfs/readdir.c
>>>>>>> @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>>>>>>>        bool is_upper;
>>>>>>>        bool is_whiteout;
>>>>>>>        bool check_xwhiteout;
>>>>>>> +     const char *c_name;
>>>>>>> +     int c_len;
>>>>>>>        char name[];
>>>>>>>   };
>>>>>>>
>>>>>>> @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>>>>>>>        struct list_head *list;
>>>>>>>        struct list_head middle;
>>>>>>>        struct ovl_cache_entry *first_maybe_whiteout;
>>>>>>> +     struct unicode_map *map;
>>>>>>>        int count;
>>>>>>>        int err;
>>>>>>>        bool is_upper;
>>>>>>> @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>>>>>>>        return rb_entry(n, struct ovl_cache_entry, node);
>>>>>>>   }
>>>>>>>
>>>>>>> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
>>>>>>> +{
>>>>>>> +     const struct qstr qstr = { .name = str, .len = len };
>>>>>>> +     int cf_len;
>>>>>>> +
>>>>>>> +     if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
>>>>>>> +             return 0;
>>>>>>> +
>>>>>>> +     *dst = kmalloc(NAME_MAX, GFP_KERNEL);
>>>>>>> +
>>>>>>> +     if (dst) {
> 
> Andre,
> 
> Just noticed this is a bug, should have been if (*dst), but anyway following
> Gabriel's comments I have made this change in my tree (pending more
> strict related changes):
> 
> static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
>                          char **dst)
> {
>          const struct qstr qstr = { .name = str, .len = len };
>          char *cf_name;
>          int cf_len;
> 
>          if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map || is_dot_dotdot(str, len))
>                  return 0;
> 
>          cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
>          if (!cf_name) {
>                  rdd->err = -ENOMEM;
>                  return -ENOMEM;
>          }
> 
>          cf_len = utf8_casefold(rdd->map, &qstr, *dst, NAME_MAX);
>          if (cf_len > 0)
>                  *dst = cf_name;
>          else
>                  kfree(cf_name);
> 
>          return cf_len;
> }

Right, that makes sense to me. I was unsure what to do regarding 
allocation fails, but this seems the right direction. Thanks!

> 
>>>>>>> +             cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
>>>>>>> +
>>>>>>> +             if (cf_len > 0)
>>>>>>> +                     return cf_len;
>>>>>>> +     }
>>>>>>> +
>>>>>>> +     kfree(*dst);
>>>>>>> +     return 0;
>>>>>>> +}
>>>>>>
>>>>>> Hi,
>>>>>>
>>>>>> I should just note this does not differentiates allocation errors from
>>>>>> casefolding errors (invalid encoding).  It might be just a theoretical
>>>>>> error because GFP_KERNEL shouldn't fail (wink, wink) and the rest of the
>>>>>> operation is likely to fail too, but if you have an allocation failure, you
>>>>>> can end up with an inconsistent cache, because a file is added under the
>>>>>> !casefolded name and a later successful lookup will look for the
>>>>>> casefolded version.
>>>>>
>>>>> Good point.
>>>>> I will fix this in my tree.
>>>>
>>>> wait why should we not fail to fill the cache for both allocation
>>>> and encoding errors?
>>>>
>>>
>>> We shouldn't fail the cache for encoding errors, just for allocation errors.
>>>
>>> Perhaps I am misreading the code, so please correct me if I'm wrong.  if
>>> ovl_casefold fails, the non-casefolded name is used in the cache.  That
>>> makes sense if the reason utf8_casefold failed is because the string
>>> cannot be casefolded (i.e. an invalid utf-8 string). For those strings,
>>> everything is fine.  But on an allocation failure, the string might have
>>> a real casefolded version.  If we fallback to the original string as the
>>> key, a cache lookup won't find the entry, since we compare with memcmp.
> 
> Just to make it clear in case the name "cache lookup" confuses anyone
> on this thread - we are talking about ovl readdir cache, not about the vfs
> lookup cache, the the purpose of ovl readdir cache is twofold:
> 1. plain in-memory readdir cache
> 2. (more important to this discussion) implementation of "merged dir" content
> 
> So I agree with you that with non-strict mode, invalid encoded names
> should be added to readdir cache as is and not in the case of allocation
> failure.
> 
>>
>> I was thinking again about this and I suspect I misunderstood your
>> question.  let me try to answer it again:
>>
>> Ext4, f2fs and tmpfs all allow invalid utf8-encoded strings in a
>> casefolded directory when running on non-strict-mode.  They are treated
>> as non-encoded byte-sequences, as if they were seen on a case-Sensitive
>> directory.  They can't collide with other filenames because they
>> basically "fold" to themselves.
>>
>> Now I suspect there is another problem with this series: I don't see how
>> it implements the semantics of strict mode.  What happens if upper and
>> lower are in strict mode (which is valid, same encoding_flags) but there
>> is an invalid name in the lower?  overlayfs should reject the dentry,
>> because any attempt to create it to the upper will fail.
> 
> Ok, so IIUC, one issue is that return value from ovl_casefold() should be
> conditional to the sb encoding_flags, which was inherited from the layers.
> 
> Again, *IF* I understand correctly, then strict mode ext4 will not allow
> creating an invalid-encoded name, but will strict mode ext4 allow
> it as a valid lookup result?
> 
>>
>> André, did you consider this scenario?
> 
> In general, as I have told Andre from v1, please stick to the most common
> configs that people actually need.
> 
> We do NOT need to support every possible combination of layers configurations.
> 
> This is why we went with supporting all-or-nothing configs for casefolder dirs.
> Because it is simpler for overlayfs semantics and good enough for what
> users need.
> 
> So my question is to you both: do users actually use strict mode for
> wine and such?
> Because if they don't I would rather support the default mode only
> (enforced on mount)
> and add support for strict mode later per actual users demand.
> 

I agree with Gabriel, no need to add this for Wine. We can refuse to 
mount to make things easier.

>> You can test by creating a file
>> with an invalid-encoded name in a casefolded directory of a
>> non-strict-mode filesystem and then flip the strict-mode flag in the
>> superblock.  I can give it a try tomorrow too.
> 
> Can the sb flags be flipped in runtime? while mounted?
> I suppose you are talking about an offline change that requires
> re-mount of overlayfs and re-validate the same encoding flags on all layers?
> 
> Andre,
> 
> Please also add these and other casefold functional tests to fstest to
> validate correctness of the merge dir implementation with different
> casefold variants in different layers.
> 

Ok, I will add a test case to stress mounting layers with different 
encoding versions, flags and etc.

Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Gabriel Krisman Bertazi 1 month, 1 week ago
Amir Goldstein <amir73il@gmail.com> writes:

> On Tue, Aug 26, 2025 at 3:34 AM Gabriel Krisman Bertazi <krisman@suse.de> wrote:
>
>>
>> I was thinking again about this and I suspect I misunderstood your
>> question.  let me try to answer it again:
>>
>> Ext4, f2fs and tmpfs all allow invalid utf8-encoded strings in a
>> casefolded directory when running on non-strict-mode.  They are treated
>> as non-encoded byte-sequences, as if they were seen on a case-Sensitive
>> directory.  They can't collide with other filenames because they
>> basically "fold" to themselves.
>>
>> Now I suspect there is another problem with this series: I don't see how
>> it implements the semantics of strict mode.  What happens if upper and
>> lower are in strict mode (which is valid, same encoding_flags) but there
>> is an invalid name in the lower?  overlayfs should reject the dentry,
>> because any attempt to create it to the upper will fail.
>
> Ok, so IIUC, one issue is that return value from ovl_casefold() should be
> conditional to the sb encoding_flags, which was inherited from the
> layers.

yes, unless you reject mounting strict_mode filesystems, which the best
course of action, in my opinion.

>
> Again, *IF* I understand correctly, then strict mode ext4 will not allow
> creating an invalid-encoded name, but will strict mode ext4 allow
> it as a valid lookup result?

strict mode ext4 will not allow creating an invalid-encoded name. And
even lookups will fail.  Because the kernel can't casefold it, it will
assume the dirent is broken and ignore it during lookup.

(I just noticed the dirent is ignored and the error is not propagated in
ext4_match.  That needs improvement.).

>>
>> André, did you consider this scenario?
>
> In general, as I have told Andre from v1, please stick to the most common
> configs that people actually need.
>
> We do NOT need to support every possible combination of layers configurations.
>
> This is why we went with supporting all-or-nothing configs for casefolder dirs.
> Because it is simpler for overlayfs semantics and good enough for what
> users need.
>
> So my question is to you both: do users actually use strict mode for
> wine and such?
> Because if they don't I would rather support the default mode only
> (enforced on mount)
> and add support for strict mode later per actual users demand.

I doubt we care.  strict mode is a restricted version of casefolding
support with minor advantages.  Basically, with it, you can trust that
if you update the unicode version, there won't be any behavior change in
casefolding due to newly assigned code-points.  For Wine, that is
irrelevant.

You can very well reject strict mode and be done with it.

>
>> You can test by creating a file
>> with an invalid-encoded name in a casefolded directory of a
>> non-strict-mode filesystem and then flip the strict-mode flag in the
>> superblock.  I can give it a try tomorrow too.
>
> Can the sb flags be flipped in runtime? while mounted?
> I suppose you are talking about an offline change that requires
> re-mount of overlayfs and re-validate the same encoding flags on all layers?

No, it is set at mkfs-time.  The scenario I'm describing is a
filesystem corruption, where a filename has invalid characters but the
disk is in strict mode.  What I proposed is a way to test this by
crafting an image.

-- 
Gabriel Krisman Bertazi
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by André Almeida 1 month, 1 week ago

Em 26/08/2025 12:02, Gabriel Krisman Bertazi escreveu:
> Amir Goldstein <amir73il@gmail.com> writes:
> 
>> On Tue, Aug 26, 2025 at 3:34 AM Gabriel Krisman Bertazi <krisman@suse.de> wrote:
>>
>>>
>>> I was thinking again about this and I suspect I misunderstood your
>>> question.  let me try to answer it again:
>>>
>>> Ext4, f2fs and tmpfs all allow invalid utf8-encoded strings in a
>>> casefolded directory when running on non-strict-mode.  They are treated
>>> as non-encoded byte-sequences, as if they were seen on a case-Sensitive
>>> directory.  They can't collide with other filenames because they
>>> basically "fold" to themselves.
>>>
>>> Now I suspect there is another problem with this series: I don't see how
>>> it implements the semantics of strict mode.  What happens if upper and
>>> lower are in strict mode (which is valid, same encoding_flags) but there
>>> is an invalid name in the lower?  overlayfs should reject the dentry,
>>> because any attempt to create it to the upper will fail.
>>
>> Ok, so IIUC, one issue is that return value from ovl_casefold() should be
>> conditional to the sb encoding_flags, which was inherited from the
>> layers.
> 
> yes, unless you reject mounting strict_mode filesystems, which the best
> course of action, in my opinion.
> 
>>
>> Again, *IF* I understand correctly, then strict mode ext4 will not allow
>> creating an invalid-encoded name, but will strict mode ext4 allow
>> it as a valid lookup result?
> 
> strict mode ext4 will not allow creating an invalid-encoded name. And
> even lookups will fail.  Because the kernel can't casefold it, it will
> assume the dirent is broken and ignore it during lookup.
> 
> (I just noticed the dirent is ignored and the error is not propagated in
> ext4_match.  That needs improvement.).
> 
>>>
>>> André, did you consider this scenario?
>>
>> In general, as I have told Andre from v1, please stick to the most common
>> configs that people actually need.
>>
>> We do NOT need to support every possible combination of layers configurations.
>>
>> This is why we went with supporting all-or-nothing configs for casefolder dirs.
>> Because it is simpler for overlayfs semantics and good enough for what
>> users need.
>>
>> So my question is to you both: do users actually use strict mode for
>> wine and such?
>> Because if they don't I would rather support the default mode only
>> (enforced on mount)
>> and add support for strict mode later per actual users demand.
> 
> I doubt we care.  strict mode is a restricted version of casefolding
> support with minor advantages.  Basically, with it, you can trust that
> if you update the unicode version, there won't be any behavior change in
> casefolding due to newly assigned code-points.  For Wine, that is
> irrelevant.
> 
> You can very well reject strict mode and be done with it.
> 

Amir,

I think this can be done at ovl_get_layers(), something like:

if (sb_has_strict_encoding(sb)) {
	pr_err("strict encoding not supported\n");
	return -EINVAL;
}

Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 1 week ago
On Tue, Aug 26, 2025 at 9:58 PM André Almeida <andrealmeid@igalia.com> wrote:
>
>
>
> Em 26/08/2025 12:02, Gabriel Krisman Bertazi escreveu:
> > Amir Goldstein <amir73il@gmail.com> writes:
> >
> >> On Tue, Aug 26, 2025 at 3:34 AM Gabriel Krisman Bertazi <krisman@suse.de> wrote:
> >>
> >>>
> >>> I was thinking again about this and I suspect I misunderstood your
> >>> question.  let me try to answer it again:
> >>>
> >>> Ext4, f2fs and tmpfs all allow invalid utf8-encoded strings in a
> >>> casefolded directory when running on non-strict-mode.  They are treated
> >>> as non-encoded byte-sequences, as if they were seen on a case-Sensitive
> >>> directory.  They can't collide with other filenames because they
> >>> basically "fold" to themselves.
> >>>
> >>> Now I suspect there is another problem with this series: I don't see how
> >>> it implements the semantics of strict mode.  What happens if upper and
> >>> lower are in strict mode (which is valid, same encoding_flags) but there
> >>> is an invalid name in the lower?  overlayfs should reject the dentry,
> >>> because any attempt to create it to the upper will fail.
> >>
> >> Ok, so IIUC, one issue is that return value from ovl_casefold() should be
> >> conditional to the sb encoding_flags, which was inherited from the
> >> layers.
> >
> > yes, unless you reject mounting strict_mode filesystems, which the best
> > course of action, in my opinion.
> >
> >>
> >> Again, *IF* I understand correctly, then strict mode ext4 will not allow
> >> creating an invalid-encoded name, but will strict mode ext4 allow
> >> it as a valid lookup result?
> >
> > strict mode ext4 will not allow creating an invalid-encoded name. And
> > even lookups will fail.  Because the kernel can't casefold it, it will
> > assume the dirent is broken and ignore it during lookup.
> >
> > (I just noticed the dirent is ignored and the error is not propagated in
> > ext4_match.  That needs improvement.).
> >
> >>>
> >>> André, did you consider this scenario?
> >>
> >> In general, as I have told Andre from v1, please stick to the most common
> >> configs that people actually need.
> >>
> >> We do NOT need to support every possible combination of layers configurations.
> >>
> >> This is why we went with supporting all-or-nothing configs for casefolder dirs.
> >> Because it is simpler for overlayfs semantics and good enough for what
> >> users need.
> >>
> >> So my question is to you both: do users actually use strict mode for
> >> wine and such?
> >> Because if they don't I would rather support the default mode only
> >> (enforced on mount)
> >> and add support for strict mode later per actual users demand.
> >
> > I doubt we care.  strict mode is a restricted version of casefolding
> > support with minor advantages.  Basically, with it, you can trust that
> > if you update the unicode version, there won't be any behavior change in
> > casefolding due to newly assigned code-points.  For Wine, that is
> > irrelevant.
> >
> > You can very well reject strict mode and be done with it.
> >
>
> Amir,
>
> I think this can be done at ovl_get_layers(), something like:
>
> if (sb_has_strict_encoding(sb)) {
>         pr_err("strict encoding not supported\n");
>         return -EINVAL;
> }
>

Yap, I've put it into ovl_set_encoding() to warn more accurately
on upper fs:

/*
 * Set the ovl sb encoding as the same one used by the first layer
 */
static int ovl_set_encoding(struct super_block *sb, struct super_block *fs_sb)
{
        if (!sb_has_encoding(fs_sb))
                return 0;

#if IS_ENABLED(CONFIG_UNICODE)
        if (sb_has_strict_encoding(fs_sb)) {
                pr_err("strict encoding not supported\n");
                return -EINVAL;
        }

        sb->s_encoding = fs_sb->s_encoding;
        sb->s_encoding_flags = fs_sb->s_encoding_flags;
#endif
        return 0;
}

Thanks,
Amir.
Re: [PATCH v6 4/9] ovl: Create ovl_casefold() to support casefolded strncmp()
Posted by Amir Goldstein 1 month, 1 week ago
On Fri, Aug 22, 2025 at 4:17 PM André Almeida <andrealmeid@igalia.com> wrote:
>
> To add overlayfs support casefold layers, create a new function
> ovl_casefold(), to be able to do case-insensitive strncmp().
>
> ovl_casefold() allocates a new buffer and stores the casefolded version
> of the string on it. If the allocation or the casefold operation fails,
> fallback to use the original string.
>
> The case-insentive name is then used in the rb-tree search/insertion
> operation. If the name is found in the rb-tree, the name can be
> discarded and the buffer is freed. If the name isn't found, it's then
> stored at struct ovl_cache_entry to be used later.
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
> Changes from v6:
>  - Last version was using `strncmp(... tmp->len)` which was causing
>    regressions. It should be `strncmp(... len)`.
>  - Rename cf_len to c_len
>  - Use c_len for tree operation: (cmp < 0 || len < tmp->c_len)
>  - Remove needless kfree(cf_name)
> ---
>  fs/overlayfs/readdir.c | 113 ++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 94 insertions(+), 19 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index b65cdfce31ce27172d28d879559f1008b9c87320..dfc661b7bc3f87efbf14991e97cee169400d823b 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -27,6 +27,8 @@ struct ovl_cache_entry {
>         bool is_upper;
>         bool is_whiteout;
>         bool check_xwhiteout;
> +       const char *c_name;
> +       int c_len;
>         char name[];
>  };
>
> @@ -45,6 +47,7 @@ struct ovl_readdir_data {
>         struct list_head *list;
>         struct list_head middle;
>         struct ovl_cache_entry *first_maybe_whiteout;
> +       struct unicode_map *map;
>         int count;
>         int err;
>         bool is_upper;
> @@ -66,6 +69,27 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>         return rb_entry(n, struct ovl_cache_entry, node);
>  }
>
> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> +{
> +       const struct qstr qstr = { .name = str, .len = len };
> +       int cf_len;
> +
> +       if (!IS_ENABLED(CONFIG_UNICODE) || !map || is_dot_dotdot(str, len))
> +               return 0;
> +
> +       *dst = kmalloc(NAME_MAX, GFP_KERNEL);
> +
> +       if (dst) {
> +               cf_len = utf8_casefold(map, &qstr, *dst, NAME_MAX);
> +
> +               if (cf_len > 0)
> +                       return cf_len;
> +       }
> +
> +       kfree(*dst);
> +       return 0;
> +}
> +
>  static bool ovl_cache_entry_find_link(const char *name, int len,
>                                       struct rb_node ***link,
>                                       struct rb_node **parent)
> @@ -79,10 +103,10 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>
>                 *parent = *newp;
>                 tmp = ovl_cache_entry_from_node(*newp);
> -               cmp = strncmp(name, tmp->name, len);
> +               cmp = strncmp(name, tmp->c_name, len);
>                 if (cmp > 0)
>                         newp = &tmp->node.rb_right;
> -               else if (cmp < 0 || len < tmp->len)
> +               else if (cmp < 0 || len < tmp->c_len)
>                         newp = &tmp->node.rb_left;
>                 else
>                         found = true;
> @@ -101,10 +125,10 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
>         while (node) {
>                 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
>
> -               cmp = strncmp(name, p->name, len);
> +               cmp = strncmp(name, p->c_name, len);
>                 if (cmp > 0)
>                         node = p->node.rb_right;
> -               else if (cmp < 0 || len < p->len)
> +               else if (cmp < 0 || len < p->c_len)
>                         node = p->node.rb_left;
>                 else
>                         return p;
> @@ -145,6 +169,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
>
>  static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>                                                    const char *name, int len,
> +                                                  const char *c_name, int c_len,
>                                                    u64 ino, unsigned int d_type)
>  {
>         struct ovl_cache_entry *p;
> @@ -167,6 +192,14 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>         /* Defer check for overlay.whiteout to ovl_iterate() */
>         p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
>
> +       if (c_name && c_name != name) {
> +               p->c_name = c_name;
> +               p->c_len = c_len;
> +       } else {
> +               p->c_name = p->name;
> +               p->c_len = len;
> +       }
> +
>         if (d_type == DT_CHR) {
>                 p->next_maybe_whiteout = rdd->first_maybe_whiteout;
>                 rdd->first_maybe_whiteout = p;
> @@ -174,48 +207,55 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
>         return p;
>  }
>
> -static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> -                                 const char *name, int len, u64 ino,
> +/* Return 0 for found, 1 for added, <0 for error */
> +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
> +                                 const char *name, int len,
> +                                 const char *c_name, int c_len,
> +                                 u64 ino,
>                                   unsigned int d_type)
>  {
>         struct rb_node **newp = &rdd->root->rb_node;
>         struct rb_node *parent = NULL;
>         struct ovl_cache_entry *p;
>
> -       if (ovl_cache_entry_find_link(name, len, &newp, &parent))
> -               return true;
> +       if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
> +               return 0;
>
> -       p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
> +       p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
>         if (p == NULL) {
>                 rdd->err = -ENOMEM;
> -               return false;
> +               return -ENOMEM;
>         }
>
>         list_add_tail(&p->l_node, rdd->list);
>         rb_link_node(&p->node, parent, newp);
>         rb_insert_color(&p->node, rdd->root);
>
> -       return true;
> +       return 1;
>  }
>
> -static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
> +/* Return 0 for found, 1 for added, <0 for error */
> +static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
>                            const char *name, int namelen,
> +                          const char *c_name, int c_len,
>                            loff_t offset, u64 ino, unsigned int d_type)
>  {
>         struct ovl_cache_entry *p;
>
> -       p = ovl_cache_entry_find(rdd->root, name, namelen);
> +       p = ovl_cache_entry_find(rdd->root, c_name, c_len);
>         if (p) {
>                 list_move_tail(&p->l_node, &rdd->middle);
> +               return 0;
>         } else {
> -               p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> +               p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
> +                                       ino, d_type);
>                 if (p == NULL)
>                         rdd->err = -ENOMEM;
>                 else
>                         list_add_tail(&p->l_node, &rdd->middle);
>         }
>
> -       return rdd->err == 0;
> +       return rdd->err ?: 1;
>  }
>
>  void ovl_cache_free(struct list_head *list)
> @@ -223,8 +263,11 @@ void ovl_cache_free(struct list_head *list)
>         struct ovl_cache_entry *p;
>         struct ovl_cache_entry *n;
>
> -       list_for_each_entry_safe(p, n, list, l_node)
> +       list_for_each_entry_safe(p, n, list, l_node) {
> +               if (p->c_name != p->name)
> +                       kfree(p->c_name);
>                 kfree(p);
> +       }
>
>         INIT_LIST_HEAD(list);
>  }
> @@ -260,12 +303,36 @@ static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
>  {
>         struct ovl_readdir_data *rdd =
>                 container_of(ctx, struct ovl_readdir_data, ctx);
> +       struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
> +       const char *c_name = NULL;
> +       char *cf_name = NULL;
> +       int c_len = 0, ret;
> +
> +       if (ofs->casefold)
> +               c_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
> +
> +       if (c_len <= 0) {
> +               c_name = name;
> +               c_len = namelen;
> +       } else {
> +               c_name = cf_name;
> +       }
>
>         rdd->count++;
>         if (!rdd->is_lowest)
> -               return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
> +               ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
>         else
> -               return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
> +               ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
> +
> +       /*
> +        * If ret == 1, that means that c_name is being used as part of struct
> +        * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
> +        * c_name was found in the rb-tree so we can free it here.
> +        */
> +       if (ret != 1 && c_name != name)
> +               kfree(c_name);
> +
> +       return ret >= 0;
>  }
>
>  static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
> @@ -357,12 +424,18 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
>                 .list = list,
>                 .root = root,
>                 .is_lowest = false,
> +               .map = NULL,
>         };
>         int idx, next;
>         const struct ovl_layer *layer;
> +       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
>
>         for (idx = 0; idx != -1; idx = next) {
>                 next = ovl_path_next(idx, dentry, &realpath, &layer);
> +
> +               if (ofs->casefold)
> +                       rdd.map = sb_encoding(realpath.dentry->d_sb);
> +
>                 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
>                 rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
>                                         ovl_dentry_has_xwhiteouts(dentry);
> @@ -555,7 +628,7 @@ static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
>                 container_of(ctx, struct ovl_readdir_data, ctx);
>
>         rdd->count++;
> -       p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
> +       p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
>         if (p == NULL) {
>                 rdd->err = -ENOMEM;
>                 return false;
> @@ -1023,6 +1096,8 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
>
>  del_entry:
>                 list_del(&p->l_node);
> +               if (p->c_name != p->name)
> +                       kfree(p->c_name);
>                 kfree(p);

OK I thought this was contained in ovl_cache_free().
If we need to repeat this check, we need a helper
ovl_cache_entry_free() to use instead of kfree(p)
everywhere even in ovl_dir_read_impure() when it won't
actually be needed.

I can make this change on commit no need to repost.

Thanks,
Amir.