From: Ard Biesheuvel <ardb@kernel.org>
Before modifying the prototypes of kernel_neon_begin() and
kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers
allocated on the stack, move arm64 to the new 'ksimd' scoped guard API,
which encapsulates the calls to those functions.
For symmetry, do the same for 32-bit ARM too.
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
lib/crc/arm/crc-t10dif.h | 16 +++++-----------
lib/crc/arm/crc32.h | 11 ++++-------
lib/crc/arm64/crc-t10dif.h | 16 +++++-----------
lib/crc/arm64/crc32.h | 16 ++++++----------
4 files changed, 20 insertions(+), 39 deletions(-)
diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
index 63441de5e3f1..aaeeab0defb5 100644
--- a/lib/crc/arm/crc-t10dif.h
+++ b/lib/crc/arm/crc-t10dif.h
@@ -5,7 +5,6 @@
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@@ -20,21 +19,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
- if (static_branch_likely(&have_pmull)) {
- if (likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
+ if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
+ scoped_ksimd()
+ return crc_t10dif_pmull64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
static_branch_likely(&have_neon) &&
likely(may_use_simd())) {
u8 buf[16] __aligned(16);
- kernel_neon_begin();
- crc_t10dif_pmull8(crc, data, length, buf);
- kernel_neon_end();
+ scoped_ksimd()
+ crc_t10dif_pmull8(crc, data, length, buf);
return crc_t10dif_generic(0, buf, sizeof(buf));
}
diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h
index 7b76f52f6907..f33de6b22cd4 100644
--- a/lib/crc/arm/crc32.h
+++ b/lib/crc/arm/crc32.h
@@ -8,7 +8,6 @@
#include <linux/cpufeature.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
@@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32_pmull_le(p, n, crc);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_pmull_le(p, n, crc);
p += n;
len -= n;
}
@@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32c_pmull_le(p, n, crc);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32c_pmull_le(p, n, crc);
p += n;
len -= n;
}
diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h
index f88db2971805..0de03ab1aeab 100644
--- a/lib/crc/arm64/crc-t10dif.h
+++ b/lib/crc/arm64/crc-t10dif.h
@@ -7,7 +7,6 @@
#include <linux/cpufeature.h>
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
@@ -22,21 +21,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
- if (static_branch_likely(&have_pmull)) {
- if (likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull_p64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
+ if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
+ scoped_ksimd()
+ return crc_t10dif_pmull_p64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
static_branch_likely(&have_asimd) &&
likely(may_use_simd())) {
u8 buf[16];
- kernel_neon_begin();
- crc_t10dif_pmull_p8(crc, data, length, buf);
- kernel_neon_end();
+ scoped_ksimd()
+ crc_t10dif_pmull_p8(crc, data, length, buf);
return crc_t10dif_generic(0, buf, sizeof(buf));
}
diff --git a/lib/crc/arm64/crc32.h b/lib/crc/arm64/crc32.h
index 31e649cd40a2..1939a5dee477 100644
--- a/lib/crc/arm64/crc32.h
+++ b/lib/crc/arm64/crc32.h
@@ -2,7 +2,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <asm/neon.h>
#include <asm/simd.h>
// The minimum input length to consider the 4-way interleaved code path
@@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32_le_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_le_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
@@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32c_le_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32c_le_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
@@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32_be_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_be_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
--
2.51.1.930.gacf6e81ea2-goog
On Fri, 31 Oct 2025 11:39:07 +0100
Ard Biesheuvel <ardb+git@google.com> wrote:
> From: Ard Biesheuvel <ardb@kernel.org>
>
> Before modifying the prototypes of kernel_neon_begin() and
> kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers
> allocated on the stack, move arm64 to the new 'ksimd' scoped guard API,
> which encapsulates the calls to those functions.
>
> For symmetry, do the same for 32-bit ARM too.
>
> Reviewed-by: Eric Biggers <ebiggers@kernel.org>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> lib/crc/arm/crc-t10dif.h | 16 +++++-----------
> lib/crc/arm/crc32.h | 11 ++++-------
> lib/crc/arm64/crc-t10dif.h | 16 +++++-----------
> lib/crc/arm64/crc32.h | 16 ++++++----------
> 4 files changed, 20 insertions(+), 39 deletions(-)
>
> diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
> index 63441de5e3f1..aaeeab0defb5 100644
> --- a/lib/crc/arm/crc-t10dif.h
> +++ b/lib/crc/arm/crc-t10dif.h
> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> @@ -20,21 +19,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
> static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
> {
> if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
> - if (static_branch_likely(&have_pmull)) {
> - if (likely(may_use_simd())) {
> - kernel_neon_begin();
> - crc = crc_t10dif_pmull64(crc, data, length);
> - kernel_neon_end();
> - return crc;
> - }
> + if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
> + scoped_ksimd()
> + return crc_t10dif_pmull64(crc, data, length);
> } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> static_branch_likely(&have_neon) &&
> likely(may_use_simd())) {
I briefly thought this was a functional change but it's not because
of may_use_simd() being something that isn't going to change between
the two evaluations.
Would it hurt at all to pull that up to be
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
if (static_branch_likely(&have_pmull)) {
scoped_ksimd()
return crc_t10dif_pmull64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
static_branch_likely(&have_neon)) {
...
?
On Fri, 31 Oct 2025 at 14:49, Jonathan Cameron
<jonathan.cameron@huawei.com> wrote:
>
> On Fri, 31 Oct 2025 11:39:07 +0100
> Ard Biesheuvel <ardb+git@google.com> wrote:
>
> > From: Ard Biesheuvel <ardb@kernel.org>
> >
> > Before modifying the prototypes of kernel_neon_begin() and
> > kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers
> > allocated on the stack, move arm64 to the new 'ksimd' scoped guard API,
> > which encapsulates the calls to those functions.
> >
> > For symmetry, do the same for 32-bit ARM too.
> >
> > Reviewed-by: Eric Biggers <ebiggers@kernel.org>
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> > lib/crc/arm/crc-t10dif.h | 16 +++++-----------
> > lib/crc/arm/crc32.h | 11 ++++-------
> > lib/crc/arm64/crc-t10dif.h | 16 +++++-----------
> > lib/crc/arm64/crc32.h | 16 ++++++----------
> > 4 files changed, 20 insertions(+), 39 deletions(-)
> >
> > diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
> > index 63441de5e3f1..aaeeab0defb5 100644
> > --- a/lib/crc/arm/crc-t10dif.h
> > +++ b/lib/crc/arm/crc-t10dif.h
>
> > static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> > @@ -20,21 +19,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
> > static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
> > {
> > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
> > - if (static_branch_likely(&have_pmull)) {
> > - if (likely(may_use_simd())) {
> > - kernel_neon_begin();
> > - crc = crc_t10dif_pmull64(crc, data, length);
> > - kernel_neon_end();
> > - return crc;
> > - }
> > + if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
> > + scoped_ksimd()
> > + return crc_t10dif_pmull64(crc, data, length);
>
> > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > static_branch_likely(&have_neon) &&
> > likely(may_use_simd())) {
>
> I briefly thought this was a functional change but it's not because
> of may_use_simd() being something that isn't going to change between
> the two evaluations.
>
> Would it hurt at all to pull that up to be
> if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
> if (static_branch_likely(&have_pmull)) {
> scoped_ksimd()
> return crc_t10dif_pmull64(crc, data, length);
>
> } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> static_branch_likely(&have_neon)) {
> ...
>
> ?
>
Yeah that would be a reasonable cleanup, I guess.
On Fri, 31 Oct 2025 at 14:52, Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Fri, 31 Oct 2025 at 14:49, Jonathan Cameron
> <jonathan.cameron@huawei.com> wrote:
> >
> > On Fri, 31 Oct 2025 11:39:07 +0100
> > Ard Biesheuvel <ardb+git@google.com> wrote:
> >
> > > From: Ard Biesheuvel <ardb@kernel.org>
> > >
> > > Before modifying the prototypes of kernel_neon_begin() and
> > > kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers
> > > allocated on the stack, move arm64 to the new 'ksimd' scoped guard API,
> > > which encapsulates the calls to those functions.
> > >
> > > For symmetry, do the same for 32-bit ARM too.
> > >
> > > Reviewed-by: Eric Biggers <ebiggers@kernel.org>
> > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > ---
> > > lib/crc/arm/crc-t10dif.h | 16 +++++-----------
> > > lib/crc/arm/crc32.h | 11 ++++-------
> > > lib/crc/arm64/crc-t10dif.h | 16 +++++-----------
> > > lib/crc/arm64/crc32.h | 16 ++++++----------
> > > 4 files changed, 20 insertions(+), 39 deletions(-)
> > >
> > > diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
> > > index 63441de5e3f1..aaeeab0defb5 100644
> > > --- a/lib/crc/arm/crc-t10dif.h
> > > +++ b/lib/crc/arm/crc-t10dif.h
> >
> > > static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> > > @@ -20,21 +19,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
> > > static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
> > > {
> > > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
> > > - if (static_branch_likely(&have_pmull)) {
> > > - if (likely(may_use_simd())) {
> > > - kernel_neon_begin();
> > > - crc = crc_t10dif_pmull64(crc, data, length);
> > > - kernel_neon_end();
> > > - return crc;
> > > - }
> > > + if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
> > > + scoped_ksimd()
> > > + return crc_t10dif_pmull64(crc, data, length);
> >
> > > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > > static_branch_likely(&have_neon) &&
> > > likely(may_use_simd())) {
> >
> > I briefly thought this was a functional change but it's not because
> > of may_use_simd() being something that isn't going to change between
> > the two evaluations.
> >
> > Would it hurt at all to pull that up to be
> > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
> > if (static_branch_likely(&have_pmull)) {
> > scoped_ksimd()
> > return crc_t10dif_pmull64(crc, data, length);
> >
> > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > static_branch_likely(&have_neon)) {
> > ...
> >
> > ?
> >
>
> Yeah that would be a reasonable cleanup, I guess.
Actually, looking more closely, that would result in may_use_simd()
being evaluated even when the static keys are set to false, given that
the compiler is unlikely to be able to figure out by itself that
may_use_simd() has no side effects.
On Fri, 31 Oct 2025 15:05:19 +0100
Ard Biesheuvel <ardb@kernel.org> wrote:
> On Fri, 31 Oct 2025 at 14:52, Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > On Fri, 31 Oct 2025 at 14:49, Jonathan Cameron
> > <jonathan.cameron@huawei.com> wrote:
> > >
> > > On Fri, 31 Oct 2025 11:39:07 +0100
> > > Ard Biesheuvel <ardb+git@google.com> wrote:
> > >
> > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > >
> > > > Before modifying the prototypes of kernel_neon_begin() and
> > > > kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers
> > > > allocated on the stack, move arm64 to the new 'ksimd' scoped guard API,
> > > > which encapsulates the calls to those functions.
> > > >
> > > > For symmetry, do the same for 32-bit ARM too.
> > > >
> > > > Reviewed-by: Eric Biggers <ebiggers@kernel.org>
> > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > > ---
> > > > lib/crc/arm/crc-t10dif.h | 16 +++++-----------
> > > > lib/crc/arm/crc32.h | 11 ++++-------
> > > > lib/crc/arm64/crc-t10dif.h | 16 +++++-----------
> > > > lib/crc/arm64/crc32.h | 16 ++++++----------
> > > > 4 files changed, 20 insertions(+), 39 deletions(-)
> > > >
> > > > diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
> > > > index 63441de5e3f1..aaeeab0defb5 100644
> > > > --- a/lib/crc/arm/crc-t10dif.h
> > > > +++ b/lib/crc/arm/crc-t10dif.h
> > >
> > > > static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> > > > @@ -20,21 +19,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
> > > > static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
> > > > {
> > > > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
> > > > - if (static_branch_likely(&have_pmull)) {
> > > > - if (likely(may_use_simd())) {
> > > > - kernel_neon_begin();
> > > > - crc = crc_t10dif_pmull64(crc, data, length);
> > > > - kernel_neon_end();
> > > > - return crc;
> > > > - }
> > > > + if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
> > > > + scoped_ksimd()
> > > > + return crc_t10dif_pmull64(crc, data, length);
> > >
> > > > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > > > static_branch_likely(&have_neon) &&
> > > > likely(may_use_simd())) {
> > >
> > > I briefly thought this was a functional change but it's not because
> > > of may_use_simd() being something that isn't going to change between
> > > the two evaluations.
> > >
> > > Would it hurt at all to pull that up to be
> > > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
> > > if (static_branch_likely(&have_pmull)) {
> > > scoped_ksimd()
> > > return crc_t10dif_pmull64(crc, data, length);
> > >
> > > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > > static_branch_likely(&have_neon)) {
> > > ...
> > >
> > > ?
> > >
> >
> > Yeah that would be a reasonable cleanup, I guess.
>
> Actually, looking more closely, that would result in may_use_simd()
> being evaluated even when the static keys are set to false, given that
> the compiler is unlikely to be able to figure out by itself that
> may_use_simd() has no side effects.
Yeah. That was why it was a question :)
Given everything is marked as likely I wasn't sure if we cared about when
the static keys aren't set.
Jonathan
>
On Mon, 3 Nov 2025 at 12:28, Jonathan Cameron
<jonathan.cameron@huawei.com> wrote:
>
> On Fri, 31 Oct 2025 15:05:19 +0100
> Ard Biesheuvel <ardb@kernel.org> wrote:
>
> > On Fri, 31 Oct 2025 at 14:52, Ard Biesheuvel <ardb@kernel.org> wrote:
> > >
> > > On Fri, 31 Oct 2025 at 14:49, Jonathan Cameron
> > > <jonathan.cameron@huawei.com> wrote:
> > > >
> > > > On Fri, 31 Oct 2025 11:39:07 +0100
> > > > Ard Biesheuvel <ardb+git@google.com> wrote:
> > > >
> > > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > > >
> > > > > Before modifying the prototypes of kernel_neon_begin() and
> > > > > kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers
> > > > > allocated on the stack, move arm64 to the new 'ksimd' scoped guard API,
> > > > > which encapsulates the calls to those functions.
> > > > >
> > > > > For symmetry, do the same for 32-bit ARM too.
> > > > >
> > > > > Reviewed-by: Eric Biggers <ebiggers@kernel.org>
> > > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > > > ---
> > > > > lib/crc/arm/crc-t10dif.h | 16 +++++-----------
> > > > > lib/crc/arm/crc32.h | 11 ++++-------
> > > > > lib/crc/arm64/crc-t10dif.h | 16 +++++-----------
> > > > > lib/crc/arm64/crc32.h | 16 ++++++----------
> > > > > 4 files changed, 20 insertions(+), 39 deletions(-)
> > > > >
> > > > > diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
> > > > > index 63441de5e3f1..aaeeab0defb5 100644
> > > > > --- a/lib/crc/arm/crc-t10dif.h
> > > > > +++ b/lib/crc/arm/crc-t10dif.h
> > > >
> > > > > static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> > > > > @@ -20,21 +19,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
> > > > > static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
> > > > > {
> > > > > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
> > > > > - if (static_branch_likely(&have_pmull)) {
> > > > > - if (likely(may_use_simd())) {
> > > > > - kernel_neon_begin();
> > > > > - crc = crc_t10dif_pmull64(crc, data, length);
> > > > > - kernel_neon_end();
> > > > > - return crc;
> > > > > - }
> > > > > + if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
> > > > > + scoped_ksimd()
> > > > > + return crc_t10dif_pmull64(crc, data, length);
> > > >
> > > > > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > > > > static_branch_likely(&have_neon) &&
> > > > > likely(may_use_simd())) {
> > > >
> > > > I briefly thought this was a functional change but it's not because
> > > > of may_use_simd() being something that isn't going to change between
> > > > the two evaluations.
> > > >
> > > > Would it hurt at all to pull that up to be
> > > > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
> > > > if (static_branch_likely(&have_pmull)) {
> > > > scoped_ksimd()
> > > > return crc_t10dif_pmull64(crc, data, length);
> > > >
> > > > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > > > static_branch_likely(&have_neon)) {
> > > > ...
> > > >
> > > > ?
> > > >
> > >
> > > Yeah that would be a reasonable cleanup, I guess.
> >
> > Actually, looking more closely, that would result in may_use_simd()
> > being evaluated even when the static keys are set to false, given that
> > the compiler is unlikely to be able to figure out by itself that
> > may_use_simd() has no side effects.
> Yeah. That was why it was a question :)
> Given everything is marked as likely I wasn't sure if we cared about when
> the static keys aren't set.
>
Yeah, it is rather doubtful that those annotations (or the use of
static keys, for that matter) make a meaningful difference here.
On Tue, Nov 04, 2025 at 04:32:28PM +0100, Ard Biesheuvel wrote:
> On Mon, 3 Nov 2025 at 12:28, Jonathan Cameron
> <jonathan.cameron@huawei.com> wrote:
> >
> > On Fri, 31 Oct 2025 15:05:19 +0100
> > Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > > On Fri, 31 Oct 2025 at 14:52, Ard Biesheuvel <ardb@kernel.org> wrote:
> > > >
> > > > On Fri, 31 Oct 2025 at 14:49, Jonathan Cameron
> > > > <jonathan.cameron@huawei.com> wrote:
> > > > >
> > > > > On Fri, 31 Oct 2025 11:39:07 +0100
> > > > > Ard Biesheuvel <ardb+git@google.com> wrote:
> > > > >
> > > > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > > > >
> > > > > > Before modifying the prototypes of kernel_neon_begin() and
> > > > > > kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers
> > > > > > allocated on the stack, move arm64 to the new 'ksimd' scoped guard API,
> > > > > > which encapsulates the calls to those functions.
> > > > > >
> > > > > > For symmetry, do the same for 32-bit ARM too.
> > > > > >
> > > > > > Reviewed-by: Eric Biggers <ebiggers@kernel.org>
> > > > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > > > > ---
> > > > > > lib/crc/arm/crc-t10dif.h | 16 +++++-----------
> > > > > > lib/crc/arm/crc32.h | 11 ++++-------
> > > > > > lib/crc/arm64/crc-t10dif.h | 16 +++++-----------
> > > > > > lib/crc/arm64/crc32.h | 16 ++++++----------
> > > > > > 4 files changed, 20 insertions(+), 39 deletions(-)
> > > > > >
> > > > > > diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
> > > > > > index 63441de5e3f1..aaeeab0defb5 100644
> > > > > > --- a/lib/crc/arm/crc-t10dif.h
> > > > > > +++ b/lib/crc/arm/crc-t10dif.h
> > > > >
> > > > > > static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> > > > > > @@ -20,21 +19,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
> > > > > > static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
> > > > > > {
> > > > > > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
> > > > > > - if (static_branch_likely(&have_pmull)) {
> > > > > > - if (likely(may_use_simd())) {
> > > > > > - kernel_neon_begin();
> > > > > > - crc = crc_t10dif_pmull64(crc, data, length);
> > > > > > - kernel_neon_end();
> > > > > > - return crc;
> > > > > > - }
> > > > > > + if (static_branch_likely(&have_pmull) && likely(may_use_simd())) {
> > > > > > + scoped_ksimd()
> > > > > > + return crc_t10dif_pmull64(crc, data, length);
> > > > >
> > > > > > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > > > > > static_branch_likely(&have_neon) &&
> > > > > > likely(may_use_simd())) {
> > > > >
> > > > > I briefly thought this was a functional change but it's not because
> > > > > of may_use_simd() being something that isn't going to change between
> > > > > the two evaluations.
> > > > >
> > > > > Would it hurt at all to pull that up to be
> > > > > if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
> > > > > if (static_branch_likely(&have_pmull)) {
> > > > > scoped_ksimd()
> > > > > return crc_t10dif_pmull64(crc, data, length);
> > > > >
> > > > > } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
> > > > > static_branch_likely(&have_neon)) {
> > > > > ...
> > > > >
> > > > > ?
> > > > >
> > > >
> > > > Yeah that would be a reasonable cleanup, I guess.
> > >
> > > Actually, looking more closely, that would result in may_use_simd()
> > > being evaluated even when the static keys are set to false, given that
> > > the compiler is unlikely to be able to figure out by itself that
> > > may_use_simd() has no side effects.
> > Yeah. That was why it was a question :)
> > Given everything is marked as likely I wasn't sure if we cared about when
> > the static keys aren't set.
> >
>
> Yeah, it is rather doubtful that those annotations (or the use of
> static keys, for that matter) make a meaningful difference here.
Well, this change makes crc_t10dif_update() not usable during early boot
on arm64, as it will start hitting the
WARN_ON(!system_capabilities_finalized() in may_use_simd().
I doubt there are any current callers where that matters, but I've been
trying to avoid this sort of unnecessary pitfall in the CRC functions.
Checking the static key first eliminates this pitfall and is also more
efficient on CPUs that don't support the relevant CPU feature.
- Eric
© 2016 - 2025 Red Hat, Inc.