Since clang does not support "#pragma GCC", the instruction sets are
always disabled. In this change, we
1. wrap "#pragma GCC" inside "#ifndef __clang__",
2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
around gcc bug,
3. and annotate each function with `__attribute__((target(*)))` which
is recognized by both gcc and clang.
Signed-off-by: Shu-Chun Weng <scw@google.com>
---
configure | 16 ++++++++++++++--
util/bufferiszero.c | 33 +++++++++++++++++++++++----------
2 files changed, 37 insertions(+), 12 deletions(-)
diff --git a/configure b/configure
index 4bd80ed507..d9ce3aa5db 100755
--- a/configure
+++ b/configure
@@ -5808,10 +5808,16 @@ fi
if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
cat > $TMPC << EOF
+#include <cpuid.h>
+#ifndef __clang__
#pragma GCC push_options
#pragma GCC target("avx2")
-#include <cpuid.h>
+#endif
#include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx2")))
static int bar(void *a) {
__m256i x = *(__m256i *)a;
return _mm256_testz_si256(x, x);
@@ -5835,10 +5841,16 @@ fi
if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then
cat > $TMPC << EOF
+#include <cpuid.h>
+#ifndef __clang__
#pragma GCC push_options
#pragma GCC target("avx512f")
-#include <cpuid.h>
+#endif
#include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx512f")))
static int bar(void *a) {
__m512i x = *(__m512i *)a;
return _mm512_test_epi64_mask(x, x);
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 695bb4ce28..ca836b6e8c 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -64,17 +64,18 @@ buffer_zero_int(const void *buf, size_t len)
}
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
-/* Do not use push_options pragmas unnecessarily, because clang
- * does not support them.
- */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#ifndef __clang__
#pragma GCC push_options
#pragma GCC target("sse2")
#endif
#include <emmintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
/* Note that each of these vectorized functions require len >= 64. */
+__attribute__((target("sse2")))
static bool
buffer_zero_sse2(const void *buf, size_t len)
{
@@ -104,19 +105,22 @@ buffer_zero_sse2(const void *buf, size_t len)
return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
}
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#pragma GCC pop_options
-#endif
#ifdef CONFIG_AVX2_OPT
/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
* the includes have to be within the corresponding push_options region, and
* therefore the regions themselves have to be ordered with increasing ISA.
*/
+#ifndef __clang__
#pragma GCC push_options
#pragma GCC target("sse4")
+#endif
#include <smmintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("sse4")))
static bool
buffer_zero_sse4(const void *buf, size_t len)
{
@@ -145,11 +149,16 @@ buffer_zero_sse4(const void *buf, size_t len)
return _mm_testz_si128(t, t);
}
-#pragma GCC pop_options
+#ifndef __clang__
#pragma GCC push_options
#pragma GCC target("avx2")
+#endif
#include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx2")))
static bool
buffer_zero_avx2(const void *buf, size_t len)
{
@@ -176,14 +185,19 @@ buffer_zero_avx2(const void *buf, size_t len)
return _mm256_testz_si256(t, t);
}
-#pragma GCC pop_options
#endif /* CONFIG_AVX2_OPT */
#ifdef CONFIG_AVX512F_OPT
+#ifndef __clang__
#pragma GCC push_options
#pragma GCC target("avx512f")
+#endif
#include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx512f")))
static bool
buffer_zero_avx512(const void *buf, size_t len)
{
@@ -210,7 +224,6 @@ buffer_zero_avx512(const void *buf, size_t len)
return !_mm512_test_epi64_mask(t, t);
}
-#pragma GCC pop_options
#endif
--
2.28.0.rc0.105.gf9edc3c819-goog
On 23/07/2020 02.27, Shu-Chun Weng wrote:
> Since clang does not support "#pragma GCC", the instruction sets are
> always disabled. In this change, we
>
> 1. wrap "#pragma GCC" inside "#ifndef __clang__",
> 2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
> around gcc bug,
> 3. and annotate each function with `__attribute__((target(*)))` which
> is recognized by both gcc and clang.
>
> Signed-off-by: Shu-Chun Weng <scw@google.com>
> ---
> configure | 16 ++++++++++++++--
> util/bufferiszero.c | 33 +++++++++++++++++++++++----------
> 2 files changed, 37 insertions(+), 12 deletions(-)
>
> diff --git a/configure b/configure
> index 4bd80ed507..d9ce3aa5db 100755
> --- a/configure
> +++ b/configure
> @@ -5808,10 +5808,16 @@ fi
>
> if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
> cat > $TMPC << EOF
> +#include <cpuid.h>
> +#ifndef __clang__
> #pragma GCC push_options
> #pragma GCC target("avx2")
> -#include <cpuid.h>
> +#endif
> #include <immintrin.h>
> +#ifndef __clang__
> +#pragma GCC pop_options
> +#endif
> +__attribute__((target("avx2")))
> static int bar(void *a) {
> __m256i x = *(__m256i *)a;
> return _mm256_testz_si256(x, x);
I wonder whether it would make more sense to pass "-mavx2" to the
compile_object call afterwards and simply remove the #pragmas here?
Did you try that already?
Thomas
Do we have the flexibility to do that for util/bufferiszero.c as well?
Otherwise, we are using different mechanisms to detect (compile test.c with
-mavx2) and actually use (GCC pragma & __attribute__((target(*))))) the
feature in production.
Shu-Chun
On Wed, Jul 22, 2020 at 9:55 PM Thomas Huth <thuth@redhat.com> wrote:
> On 23/07/2020 02.27, Shu-Chun Weng wrote:
> > Since clang does not support "#pragma GCC", the instruction sets are
> > always disabled. In this change, we
> >
> > 1. wrap "#pragma GCC" inside "#ifndef __clang__",
> > 2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
> > around gcc bug,
> > 3. and annotate each function with `__attribute__((target(*)))` which
> > is recognized by both gcc and clang.
> >
> > Signed-off-by: Shu-Chun Weng <scw@google.com>
> > ---
> > configure | 16 ++++++++++++++--
> > util/bufferiszero.c | 33 +++++++++++++++++++++++----------
> > 2 files changed, 37 insertions(+), 12 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 4bd80ed507..d9ce3aa5db 100755
> > --- a/configure
> > +++ b/configure
> > @@ -5808,10 +5808,16 @@ fi
> >
> > if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
> > cat > $TMPC << EOF
> > +#include <cpuid.h>
> > +#ifndef __clang__
> > #pragma GCC push_options
> > #pragma GCC target("avx2")
> > -#include <cpuid.h>
> > +#endif
> > #include <immintrin.h>
> > +#ifndef __clang__
> > +#pragma GCC pop_options
> > +#endif
> > +__attribute__((target("avx2")))
> > static int bar(void *a) {
> > __m256i x = *(__m256i *)a;
> > return _mm256_testz_si256(x, x);
>
> I wonder whether it would make more sense to pass "-mavx2" to the
> compile_object call afterwards and simply remove the #pragmas here?
> Did you try that already?
>
> Thomas
>
>
On 23/07/2020 08.04, Shu-Chun Weng wrote:
> Do we have the flexibility to do that for util/bufferiszero.c as well?
> Otherwise, we are using different mechanisms to detect (compile test.c
> with -mavx2) and actually use (GCC pragma & __attribute__((target(*)))))
> the feature in production.
That's true ... so it's likely better to keep the pragmas in the
configure script, indeed!
Thanks,
Thomas
> Shu-Chun
>
> On Wed, Jul 22, 2020 at 9:55 PM Thomas Huth <thuth@redhat.com
> <mailto:thuth@redhat.com>> wrote:
>
> On 23/07/2020 02.27, Shu-Chun Weng wrote:
> > Since clang does not support "#pragma GCC", the instruction sets are
> > always disabled. In this change, we
> >
> > 1. wrap "#pragma GCC" inside "#ifndef __clang__",
> > 2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
> > around gcc bug,
> > 3. and annotate each function with `__attribute__((target(*)))` which
> > is recognized by both gcc and clang.
> >
> > Signed-off-by: Shu-Chun Weng <scw@google.com <mailto:scw@google.com>>
> > ---
> > configure | 16 ++++++++++++++--
> > util/bufferiszero.c | 33 +++++++++++++++++++++++----------
> > 2 files changed, 37 insertions(+), 12 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 4bd80ed507..d9ce3aa5db 100755
> > --- a/configure
> > +++ b/configure
> > @@ -5808,10 +5808,16 @@ fi
> >
> > if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
> > cat > $TMPC << EOF
> > +#include <cpuid.h>
> > +#ifndef __clang__
> > #pragma GCC push_options
> > #pragma GCC target("avx2")
> > -#include <cpuid.h>
> > +#endif
> > #include <immintrin.h>
> > +#ifndef __clang__
> > +#pragma GCC pop_options
> > +#endif
> > +__attribute__((target("avx2")))
> > static int bar(void *a) {
> > __m256i x = *(__m256i *)a;
> > return _mm256_testz_si256(x, x);
>
> I wonder whether it would make more sense to pass "-mavx2" to the
> compile_object call afterwards and simply remove the #pragmas here?
> Did you try that already?
>
> Thomas
>
© 2016 - 2026 Red Hat, Inc.