[PATCH v2 03/18] lib/crc32: expose whether the lib is really optimized at runtime

Eric Biggers posted 18 patches 1 month ago
There is a newer version of this series
[PATCH v2 03/18] lib/crc32: expose whether the lib is really optimized at runtime
Posted by Eric Biggers 1 month ago
From: Eric Biggers <ebiggers@google.com>

Make the CRC32 library export some flags that indicate which CRC32
functions are actually executing optimized code at runtime.  Set these
correctly from the architectures that implement the CRC32 functions.

This will be used to determine whether the crc32[c]-$arch shash
algorithms should be registered in the crypto API.  btrfs could also
start using these flags instead of the hack that it currently uses where
it parses the crypto_shash_driver_name.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/arm64/lib/crc32-glue.c  | 15 +++++++++++++++
 arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++
 include/linux/crc32.h        | 15 +++++++++++++++
 lib/crc32.c                  |  5 +++++
 4 files changed, 50 insertions(+)

diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c
index d7f6e1cbf0d2..16f2b7c04294 100644
--- a/arch/arm64/lib/crc32-glue.c
+++ b/arch/arm64/lib/crc32-glue.c
@@ -83,7 +83,22 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
 
 	return crc32_be_arm64(crc, p, len);
 }
 EXPORT_SYMBOL(crc32_be_arch);
 
+static int __init crc32_arm64_init(void)
+{
+	if (cpus_have_cap(ARM64_HAS_CRC32))
+		crc32_optimizations = CRC32_LE_OPTIMIZATION |
+				      CRC32_BE_OPTIMIZATION |
+				      CRC32C_OPTIMIZATION;
+	return 0;
+}
+arch_initcall(crc32_arm64_init);
+
+static void __exit crc32_arm64_exit(void)
+{
+}
+module_exit(crc32_arm64_exit);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c
index a3ff7db2a1ce..a61c13d89364 100644
--- a/arch/riscv/lib/crc32-riscv.c
+++ b/arch/riscv/lib/crc32-riscv.c
@@ -295,7 +295,22 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
 legacy:
 	return crc32_be_base(crc, p, len);
 }
 EXPORT_SYMBOL(crc32_be_arch);
 
+static int __init crc32_riscv_init(void)
+{
+	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+		crc32_optimizations = CRC32_LE_OPTIMIZATION |
+				      CRC32_BE_OPTIMIZATION |
+				      CRC32C_OPTIMIZATION;
+	return 0;
+}
+arch_initcall(crc32_riscv_init);
+
+static void __exit crc32_riscv_exit(void)
+{
+}
+module_exit(crc32_riscv_exit);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 58c632533b08..bf26d454b60d 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
 	if (IS_ENABLED(CONFIG_CRC32_ARCH))
 		return crc32c_le_arch(crc, p, len);
 	return crc32c_le_base(crc, p, len);
 }
 
+/*
+ * crc32_optimizations contains flags that indicate which CRC32 library
+ * functions are using architecture-specific optimizations.  Unlike
+ * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32
+ * variants and also whether any needed CPU features are available at runtime.
+ */
+#define CRC32_LE_OPTIMIZATION	BIT(0) /* crc32_le() is optimized */
+#define CRC32_BE_OPTIMIZATION	BIT(1) /* crc32_be() is optimized */
+#define CRC32C_OPTIMIZATION	BIT(2) /* __crc32c_le() is optimized */
+#if IS_ENABLED(CONFIG_CRC32_ARCH)
+extern u32 crc32_optimizations;
+#else
+#define crc32_optimizations 0
+#endif
+
 /**
  * crc32_le_combine - Combine two crc32 check values into one. For two
  * 		      sequences of bytes, seq1 and seq2 with lengths len1
  * 		      and len2, crc32_le() check values were calculated
  * 		      for each, crc1 and crc2.
diff --git a/lib/crc32.c b/lib/crc32.c
index 47151624332e..194de73f30f8 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -336,5 +336,10 @@ u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
 }
 #endif
 EXPORT_SYMBOL(crc32_be_base);
+
+#if IS_ENABLED(CONFIG_CRC32_ARCH)
+u32 crc32_optimizations;
+EXPORT_SYMBOL(crc32_optimizations);
+#endif
-- 
2.47.0
Re: [PATCH v2 03/18] lib/crc32: expose whether the lib is really optimized at runtime
Posted by Ard Biesheuvel 1 month ago
On Fri, 25 Oct 2024 at 21:15, Eric Biggers <ebiggers@kernel.org> wrote:
>
> From: Eric Biggers <ebiggers@google.com>
>
> Make the CRC32 library export some flags that indicate which CRC32
> functions are actually executing optimized code at runtime.  Set these
> correctly from the architectures that implement the CRC32 functions.
>
> This will be used to determine whether the crc32[c]-$arch shash
> algorithms should be registered in the crypto API.  btrfs could also
> start using these flags instead of the hack that it currently uses where
> it parses the crypto_shash_driver_name.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  arch/arm64/lib/crc32-glue.c  | 15 +++++++++++++++
>  arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++
>  include/linux/crc32.h        | 15 +++++++++++++++
>  lib/crc32.c                  |  5 +++++
>  4 files changed, 50 insertions(+)
>
...
> diff --git a/include/linux/crc32.h b/include/linux/crc32.h
> index 58c632533b08..bf26d454b60d 100644
> --- a/include/linux/crc32.h
> +++ b/include/linux/crc32.h
> @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
>         if (IS_ENABLED(CONFIG_CRC32_ARCH))
>                 return crc32c_le_arch(crc, p, len);
>         return crc32c_le_base(crc, p, len);
>  }
>
> +/*
> + * crc32_optimizations contains flags that indicate which CRC32 library
> + * functions are using architecture-specific optimizations.  Unlike
> + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32
> + * variants and also whether any needed CPU features are available at runtime.
> + */
> +#define CRC32_LE_OPTIMIZATION  BIT(0) /* crc32_le() is optimized */
> +#define CRC32_BE_OPTIMIZATION  BIT(1) /* crc32_be() is optimized */
> +#define CRC32C_OPTIMIZATION    BIT(2) /* __crc32c_le() is optimized */
> +#if IS_ENABLED(CONFIG_CRC32_ARCH)
> +extern u32 crc32_optimizations;
> +#else
> +#define crc32_optimizations 0
> +#endif
> +

Wouldn't it be cleaner to add a new library function for this, instead
of using a global variable?

>  /**
>   * crc32_le_combine - Combine two crc32 check values into one. For two
>   *                   sequences of bytes, seq1 and seq2 with lengths len1
>   *                   and len2, crc32_le() check values were calculated
>   *                   for each, crc1 and crc2.
> diff --git a/lib/crc32.c b/lib/crc32.c
> index 47151624332e..194de73f30f8 100644
> --- a/lib/crc32.c
> +++ b/lib/crc32.c
> @@ -336,5 +336,10 @@ u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
>  {
>         return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
>  }
>  #endif
>  EXPORT_SYMBOL(crc32_be_base);
> +
> +#if IS_ENABLED(CONFIG_CRC32_ARCH)
> +u32 crc32_optimizations;
> +EXPORT_SYMBOL(crc32_optimizations);
> +#endif
> --
> 2.47.0
>
>
Re: [PATCH v2 03/18] lib/crc32: expose whether the lib is really optimized at runtime
Posted by Eric Biggers 1 month ago
On Fri, Oct 25, 2024 at 10:32:14PM +0200, Ard Biesheuvel wrote:
> On Fri, 25 Oct 2024 at 21:15, Eric Biggers <ebiggers@kernel.org> wrote:
> >
> > From: Eric Biggers <ebiggers@google.com>
> >
> > Make the CRC32 library export some flags that indicate which CRC32
> > functions are actually executing optimized code at runtime.  Set these
> > correctly from the architectures that implement the CRC32 functions.
> >
> > This will be used to determine whether the crc32[c]-$arch shash
> > algorithms should be registered in the crypto API.  btrfs could also
> > start using these flags instead of the hack that it currently uses where
> > it parses the crypto_shash_driver_name.
> >
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > ---
> >  arch/arm64/lib/crc32-glue.c  | 15 +++++++++++++++
> >  arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++
> >  include/linux/crc32.h        | 15 +++++++++++++++
> >  lib/crc32.c                  |  5 +++++
> >  4 files changed, 50 insertions(+)
> >
> ...
> > diff --git a/include/linux/crc32.h b/include/linux/crc32.h
> > index 58c632533b08..bf26d454b60d 100644
> > --- a/include/linux/crc32.h
> > +++ b/include/linux/crc32.h
> > @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
> >         if (IS_ENABLED(CONFIG_CRC32_ARCH))
> >                 return crc32c_le_arch(crc, p, len);
> >         return crc32c_le_base(crc, p, len);
> >  }
> >
> > +/*
> > + * crc32_optimizations contains flags that indicate which CRC32 library
> > + * functions are using architecture-specific optimizations.  Unlike
> > + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32
> > + * variants and also whether any needed CPU features are available at runtime.
> > + */
> > +#define CRC32_LE_OPTIMIZATION  BIT(0) /* crc32_le() is optimized */
> > +#define CRC32_BE_OPTIMIZATION  BIT(1) /* crc32_be() is optimized */
> > +#define CRC32C_OPTIMIZATION    BIT(2) /* __crc32c_le() is optimized */
> > +#if IS_ENABLED(CONFIG_CRC32_ARCH)
> > +extern u32 crc32_optimizations;
> > +#else
> > +#define crc32_optimizations 0
> > +#endif
> > +
> 
> Wouldn't it be cleaner to add a new library function for this, instead
> of using a global variable?

The architecture crc32 modules need to be able to write to this.  There could be
a setter function and a getter function, but just using a variable is simpler.

- Eric
Re: [PATCH v2 03/18] lib/crc32: expose whether the lib is really optimized at runtime
Posted by Ard Biesheuvel 1 month ago
On Fri, 25 Oct 2024 at 23:32, Eric Biggers <ebiggers@kernel.org> wrote:
>
> On Fri, Oct 25, 2024 at 10:32:14PM +0200, Ard Biesheuvel wrote:
> > On Fri, 25 Oct 2024 at 21:15, Eric Biggers <ebiggers@kernel.org> wrote:
> > >
> > > From: Eric Biggers <ebiggers@google.com>
> > >
> > > Make the CRC32 library export some flags that indicate which CRC32
> > > functions are actually executing optimized code at runtime.  Set these
> > > correctly from the architectures that implement the CRC32 functions.
> > >
> > > This will be used to determine whether the crc32[c]-$arch shash
> > > algorithms should be registered in the crypto API.  btrfs could also
> > > start using these flags instead of the hack that it currently uses where
> > > it parses the crypto_shash_driver_name.
> > >
> > > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > > ---
> > >  arch/arm64/lib/crc32-glue.c  | 15 +++++++++++++++
> > >  arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++
> > >  include/linux/crc32.h        | 15 +++++++++++++++
> > >  lib/crc32.c                  |  5 +++++
> > >  4 files changed, 50 insertions(+)
> > >
> > ...
> > > diff --git a/include/linux/crc32.h b/include/linux/crc32.h
> > > index 58c632533b08..bf26d454b60d 100644
> > > --- a/include/linux/crc32.h
> > > +++ b/include/linux/crc32.h
> > > @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
> > >         if (IS_ENABLED(CONFIG_CRC32_ARCH))
> > >                 return crc32c_le_arch(crc, p, len);
> > >         return crc32c_le_base(crc, p, len);
> > >  }
> > >
> > > +/*
> > > + * crc32_optimizations contains flags that indicate which CRC32 library
> > > + * functions are using architecture-specific optimizations.  Unlike
> > > + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32
> > > + * variants and also whether any needed CPU features are available at runtime.
> > > + */
> > > +#define CRC32_LE_OPTIMIZATION  BIT(0) /* crc32_le() is optimized */
> > > +#define CRC32_BE_OPTIMIZATION  BIT(1) /* crc32_be() is optimized */
> > > +#define CRC32C_OPTIMIZATION    BIT(2) /* __crc32c_le() is optimized */
> > > +#if IS_ENABLED(CONFIG_CRC32_ARCH)
> > > +extern u32 crc32_optimizations;
> > > +#else
> > > +#define crc32_optimizations 0
> > > +#endif
> > > +
> >
> > Wouldn't it be cleaner to add a new library function for this, instead
> > of using a global variable?
>
> The architecture crc32 modules need to be able to write to this.  There could be
> a setter function and a getter function, but just using a variable is simpler.
>

If we just add 'u32 crc32_optimizations()', there is no need for those
modules to have init/exit hooks, the only thing they need to export is
this routine.

Or perhaps it could even be a static inline, with the right plumbing
of header files. At least on arm64,

static inline u32 crc32_optimizations() {
  if (!alternative_have_const_cap_likely(ARM64_HAS_CRC32))
    return 0;
  return CRC32_LE_OPTIMIZATION | CRC32_BE_OPTIMIZATION | CRC32C_OPTIMIZATION;
}

should be all we need.
Re: [PATCH v2 03/18] lib/crc32: expose whether the lib is really optimized at runtime
Posted by Eric Biggers 1 month ago
On Fri, Oct 25, 2024 at 11:37:45PM +0200, Ard Biesheuvel wrote:
> On Fri, 25 Oct 2024 at 23:32, Eric Biggers <ebiggers@kernel.org> wrote:
> >
> > On Fri, Oct 25, 2024 at 10:32:14PM +0200, Ard Biesheuvel wrote:
> > > On Fri, 25 Oct 2024 at 21:15, Eric Biggers <ebiggers@kernel.org> wrote:
> > > >
> > > > From: Eric Biggers <ebiggers@google.com>
> > > >
> > > > Make the CRC32 library export some flags that indicate which CRC32
> > > > functions are actually executing optimized code at runtime.  Set these
> > > > correctly from the architectures that implement the CRC32 functions.
> > > >
> > > > This will be used to determine whether the crc32[c]-$arch shash
> > > > algorithms should be registered in the crypto API.  btrfs could also
> > > > start using these flags instead of the hack that it currently uses where
> > > > it parses the crypto_shash_driver_name.
> > > >
> > > > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > > > ---
> > > >  arch/arm64/lib/crc32-glue.c  | 15 +++++++++++++++
> > > >  arch/riscv/lib/crc32-riscv.c | 15 +++++++++++++++
> > > >  include/linux/crc32.h        | 15 +++++++++++++++
> > > >  lib/crc32.c                  |  5 +++++
> > > >  4 files changed, 50 insertions(+)
> > > >
> > > ...
> > > > diff --git a/include/linux/crc32.h b/include/linux/crc32.h
> > > > index 58c632533b08..bf26d454b60d 100644
> > > > --- a/include/linux/crc32.h
> > > > +++ b/include/linux/crc32.h
> > > > @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
> > > >         if (IS_ENABLED(CONFIG_CRC32_ARCH))
> > > >                 return crc32c_le_arch(crc, p, len);
> > > >         return crc32c_le_base(crc, p, len);
> > > >  }
> > > >
> > > > +/*
> > > > + * crc32_optimizations contains flags that indicate which CRC32 library
> > > > + * functions are using architecture-specific optimizations.  Unlike
> > > > + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32
> > > > + * variants and also whether any needed CPU features are available at runtime.
> > > > + */
> > > > +#define CRC32_LE_OPTIMIZATION  BIT(0) /* crc32_le() is optimized */
> > > > +#define CRC32_BE_OPTIMIZATION  BIT(1) /* crc32_be() is optimized */
> > > > +#define CRC32C_OPTIMIZATION    BIT(2) /* __crc32c_le() is optimized */
> > > > +#if IS_ENABLED(CONFIG_CRC32_ARCH)
> > > > +extern u32 crc32_optimizations;
> > > > +#else
> > > > +#define crc32_optimizations 0
> > > > +#endif
> > > > +
> > >
> > > Wouldn't it be cleaner to add a new library function for this, instead
> > > of using a global variable?
> >
> > The architecture crc32 modules need to be able to write to this.  There could be
> > a setter function and a getter function, but just using a variable is simpler.
> >
> 
> If we just add 'u32 crc32_optimizations()', there is no need for those
> modules to have init/exit hooks, the only thing they need to export is
> this routine.
> 
> Or perhaps it could even be a static inline, with the right plumbing
> of header files. At least on arm64,
> 
> static inline u32 crc32_optimizations() {
>   if (!alternative_have_const_cap_likely(ARM64_HAS_CRC32))
>     return 0;
>   return CRC32_LE_OPTIMIZATION | CRC32_BE_OPTIMIZATION | CRC32C_OPTIMIZATION;
> }
> 
> should be all we need.

In 7 of the 9 affected arches, I already have a module_init function that checks
the CPU features in order to set up static keys.  (arm64 and riscv already used
alternatives patching, so I kept that.)  It's slightly convenient to set these
flags at the same time, but yes the above solution would work too.

- Eric