[PATCH v2 2/2] lib/string.c: Add wcslen()

Nathan Chancellor posted 2 patches 8 months, 3 weeks ago
There is a newer version of this series
[PATCH v2 2/2] lib/string.c: Add wcslen()
Posted by Nathan Chancellor 8 months, 3 weeks ago
A recent optimization change in LLVM [1] aims to transform certain loop
idioms into calls to strlen() or wcslen(). This change transforms the
first while loop in UniStrcat() into a call to wcslen(), breaking the
build when UniStrcat() gets inlined into alloc_path_with_tree_prefix():

  ld.lld: error: undefined symbol: wcslen
  >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
  >>>               vmlinux.o:(alloc_path_with_tree_prefix)
  >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
  >>>               vmlinux.o:(alloc_path_with_tree_prefix)

The kernel does not build with '-ffreestanding' (which would avoid this
transformation) because it does want libcall optimizations in general
and turning on '-ffreestanding' disables the majority of them. While
'-fno-builtin-wcslen' would be more targeted at the problem, it does not
work with LTO.

Add a basic wcslen() to avoid this linkage failure. While no
architecture or FORTIFY_SOURCE overrides this, add it to string.c
instead of string_helpers.c so that it is built with '-ffreestanding',
otherwise the compiler might transform it into a call to itself.

Cc: stable@vger.kernel.org
Link: https://github.com/llvm/llvm-project/commit/9694844d7e36fd5e01011ab56b64f27b867aa72d [1]
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
---
 include/linux/string.h |  2 ++
 lib/string.c           | 11 +++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/string.h b/include/linux/string.h
index 0403a4ca4c11..4a48f8eac301 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -7,6 +7,7 @@
 #include <linux/cleanup.h>	/* for DEFINE_FREE() */
 #include <linux/compiler.h>	/* for inline */
 #include <linux/types.h>	/* for size_t */
+#include <linux/nls_types.h>	/* for wchar_t */
 #include <linux/stddef.h>	/* for NULL */
 #include <linux/err.h>		/* for ERR_PTR() */
 #include <linux/errno.h>	/* for E2BIG */
@@ -203,6 +204,7 @@ extern __kernel_size_t strlen(const char *);
 #ifndef __HAVE_ARCH_STRNLEN
 extern __kernel_size_t strnlen(const char *,__kernel_size_t);
 #endif
+extern __kernel_size_t wcslen(const wchar_t *s);
 #ifndef __HAVE_ARCH_STRPBRK
 extern char * strpbrk(const char *,const char *);
 #endif
diff --git a/lib/string.c b/lib/string.c
index eb4486ed40d2..2c6f8c8f4159 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -21,6 +21,7 @@
 #include <linux/errno.h>
 #include <linux/limits.h>
 #include <linux/linkage.h>
+#include <linux/nls_types.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -429,6 +430,16 @@ size_t strnlen(const char *s, size_t count)
 EXPORT_SYMBOL(strnlen);
 #endif
 
+size_t wcslen(const wchar_t *s)
+{
+	const wchar_t *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+EXPORT_SYMBOL(wcslen);
+
 #ifndef __HAVE_ARCH_STRSPN
 /**
  * strspn - Calculate the length of the initial substring of @s which only contain letters in @accept

-- 
2.49.0
Re: [PATCH v2 2/2] lib/string.c: Add wcslen()
Posted by Andy Shevchenko 8 months, 3 weeks ago
On Wed, Mar 26, 2025 at 7:19 PM Nathan Chancellor <nathan@kernel.org> wrote:
>
> A recent optimization change in LLVM [1] aims to transform certain loop
> idioms into calls to strlen() or wcslen(). This change transforms the
> first while loop in UniStrcat() into a call to wcslen(), breaking the
> build when UniStrcat() gets inlined into alloc_path_with_tree_prefix():
>
>   ld.lld: error: undefined symbol: wcslen
>   >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
>   >>>               vmlinux.o:(alloc_path_with_tree_prefix)
>   >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
>   >>>               vmlinux.o:(alloc_path_with_tree_prefix)
>
> The kernel does not build with '-ffreestanding' (which would avoid this
> transformation) because it does want libcall optimizations in general
> and turning on '-ffreestanding' disables the majority of them. While
> '-fno-builtin-wcslen' would be more targeted at the problem, it does not
> work with LTO.
>
> Add a basic wcslen() to avoid this linkage failure. While no
> architecture or FORTIFY_SOURCE overrides this, add it to string.c
> instead of string_helpers.c so that it is built with '-ffreestanding',
> otherwise the compiler might transform it into a call to itself.

...

> --- a/include/linux/string.h
> +++ b/include/linux/string.h
> @@ -7,6 +7,7 @@
>  #include <linux/cleanup.h>     /* for DEFINE_FREE() */
>  #include <linux/compiler.h>    /* for inline */
>  #include <linux/types.h>       /* for size_t */

> +#include <linux/nls_types.h>   /* for wchar_t */

I know it's not ordered, but can we at least not make it worse, i.e.
squeeze this to be after the compiler.h? Or even somewhere after below
the err*.h? Whatever gives a better (sparsely) ordered overall
result...

>  #include <linux/stddef.h>      /* for NULL */
>  #include <linux/err.h>         /* for ERR_PTR() */

...

>  #ifndef __HAVE_ARCH_STRNLEN
>  extern __kernel_size_t strnlen(const char *,__kernel_size_t);
>  #endif
> +extern __kernel_size_t wcslen(const wchar_t *s);

I'm wondering why we still continue putting this 'extern' keyword.
Yes, I see that the rest is like this, but for new code do we really
need it?

>  #ifndef __HAVE_ARCH_STRPBRK
>  extern char * strpbrk(const char *,const char *);
>  #endif


-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH v2 2/2] lib/string.c: Add wcslen()
Posted by Nathan Chancellor 8 months, 3 weeks ago
On Wed, Mar 26, 2025 at 08:39:37PM +0200, Andy Shevchenko wrote:
> On Wed, Mar 26, 2025 at 7:19 PM Nathan Chancellor <nathan@kernel.org> wrote:
> >  #ifndef __HAVE_ARCH_STRNLEN
> >  extern __kernel_size_t strnlen(const char *,__kernel_size_t);
> >  #endif
> > +extern __kernel_size_t wcslen(const wchar_t *s);
> 
> I'm wondering why we still continue putting this 'extern' keyword.
> Yes, I see that the rest is like this, but for new code do we really
> need it?

Yeah, I just did it to keep it consistent with what is around it but
there should be no reason that it cannot be removed. I am happy to do
that in v3 if desired.

Cheers,
Nathan
Re: [PATCH v2 2/2] lib/string.c: Add wcslen()
Posted by Andy Shevchenko 8 months, 3 weeks ago
On Wed, Mar 26, 2025 at 8:39 PM Andy Shevchenko
<andy.shevchenko@gmail.com> wrote:
> On Wed, Mar 26, 2025 at 7:19 PM Nathan Chancellor <nathan@kernel.org> wrote:

...

> > --- a/include/linux/string.h
> > +++ b/include/linux/string.h
> > @@ -7,6 +7,7 @@
> >  #include <linux/cleanup.h>     /* for DEFINE_FREE() */
> >  #include <linux/compiler.h>    /* for inline */
> >  #include <linux/types.h>       /* for size_t */
>
> > +#include <linux/nls_types.h>   /* for wchar_t */
>
> I know it's not ordered, but can we at least not make it worse, i.e.
> squeeze this to be after the compiler.h? Or even somewhere after below
> the err*.h? Whatever gives a better (sparsely) ordered overall
> result...

I just checked, and the only unordered piece is those two: types +
stddef right now, and if you move nls_types.h after errno.h it will
keep the status quo.

> >  #include <linux/stddef.h>      /* for NULL */
> >  #include <linux/err.h>         /* for ERR_PTR() */

-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH v2 2/2] lib/string.c: Add wcslen()
Posted by Nathan Chancellor 8 months, 3 weeks ago
On Wed, Mar 26, 2025 at 08:41:44PM +0200, Andy Shevchenko wrote:
> On Wed, Mar 26, 2025 at 8:39 PM Andy Shevchenko
> <andy.shevchenko@gmail.com> wrote:
> > On Wed, Mar 26, 2025 at 7:19 PM Nathan Chancellor <nathan@kernel.org> wrote:
> 
> ...
> 
> > > --- a/include/linux/string.h
> > > +++ b/include/linux/string.h
> > > @@ -7,6 +7,7 @@
> > >  #include <linux/cleanup.h>     /* for DEFINE_FREE() */
> > >  #include <linux/compiler.h>    /* for inline */
> > >  #include <linux/types.h>       /* for size_t */
> >
> > > +#include <linux/nls_types.h>   /* for wchar_t */
> >
> > I know it's not ordered, but can we at least not make it worse, i.e.
> > squeeze this to be after the compiler.h? Or even somewhere after below
> > the err*.h? Whatever gives a better (sparsely) ordered overall
> > result...
> 
> I just checked, and the only unordered piece is those two: types +
> stddef right now, and if you move nls_types.h after errno.h it will
> keep the status quo.
> 
> > >  #include <linux/stddef.h>      /* for NULL */
> > >  #include <linux/err.h>         /* for ERR_PTR() */

Yeah, I had noticed there was no alphabetical consistency, so I decided
to use this place to keep the types together but I have no strong
opinion. I can send v3 tomorrow unless Kees is happy with this version
and is okay with just applying this diff on top.

Cheers,
Nathan

diff --git a/include/linux/string.h b/include/linux/string.h
index 4a48f8eac301..750715768a62 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -7,10 +7,10 @@
 #include <linux/cleanup.h>	/* for DEFINE_FREE() */
 #include <linux/compiler.h>	/* for inline */
 #include <linux/types.h>	/* for size_t */
-#include <linux/nls_types.h>	/* for wchar_t */
 #include <linux/stddef.h>	/* for NULL */
 #include <linux/err.h>		/* for ERR_PTR() */
 #include <linux/errno.h>	/* for E2BIG */
+#include <linux/nls_types.h>	/* for wchar_t */
 #include <linux/overflow.h>	/* for check_mul_overflow() */
 #include <linux/stdarg.h>
 #include <uapi/linux/string.h>