[PATCH 03/17] x86: Replace open-coded parity calculation with parity8()

Kuan-Wei Chiu posted 17 patches 9 months, 3 weeks ago
[PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by Kuan-Wei Chiu 9 months, 3 weeks ago
Refactor parity calculations to use the standard parity8() helper. This
change eliminates redundant implementations and improves code
efficiency.

Co-developed-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
 arch/x86/kernel/bootflag.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 3fed7ae58b60..314ff0e84900 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/acpi.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 
 #include <linux/mc146818rtc.h>
@@ -20,26 +21,13 @@
 
 int sbf_port __initdata = -1;	/* set via acpi_boot_init() */
 
-static int __init parity(u8 v)
-{
-	int x = 0;
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		x ^= (v & 1);
-		v >>= 1;
-	}
-
-	return x;
-}
-
 static void __init sbf_write(u8 v)
 {
 	unsigned long flags;
 
 	if (sbf_port != -1) {
 		v &= ~SBF_PARITY;
-		if (!parity(v))
+		if (!parity8(v))
 			v |= SBF_PARITY;
 
 		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
@@ -70,7 +58,7 @@ static int __init sbf_value_valid(u8 v)
 {
 	if (v & SBF_RESERVED)		/* Reserved bits */
 		return 0;
-	if (!parity(v))
+	if (!parity8(v))
 		return 0;
 
 	return 1;
-- 
2.34.1
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by Uros Bizjak 9 months, 3 weeks ago

On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> Refactor parity calculations to use the standard parity8() helper. This
> change eliminates redundant implementations and improves code
> efficiency.

The patch improves parity assembly code in bootflag.o from:

   58:	89 de                	mov    %ebx,%esi
   5a:	b9 08 00 00 00       	mov    $0x8,%ecx
   5f:	31 d2                	xor    %edx,%edx
   61:	89 f0                	mov    %esi,%eax
   63:	89 d7                	mov    %edx,%edi
   65:	40 d0 ee             	shr    %sil
   68:	83 e0 01             	and    $0x1,%eax
   6b:	31 c2                	xor    %eax,%edx
   6d:	83 e9 01             	sub    $0x1,%ecx
   70:	75 ef                	jne    61 <sbf_init+0x51>
   72:	39 c7                	cmp    %eax,%edi
   74:	74 7f                	je     f5 <sbf_init+0xe5>
   76:

to:

   54:	89 d8                	mov    %ebx,%eax
   56:	ba 96 69 00 00       	mov    $0x6996,%edx
   5b:	c0 e8 04             	shr    $0x4,%al
   5e:	31 d8                	xor    %ebx,%eax
   60:	83 e0 0f             	and    $0xf,%eax
   63:	0f a3 c2             	bt     %eax,%edx
   66:	73 64                	jae    cc <sbf_init+0xbc>
   68:

which is faster and smaller (-10 bytes) code.

Reviewed-by: Uros Bizjak <ubizjak@gmail.com>

Thanks,
Uros.

> 
> Co-developed-by: Yu-Chun Lin <eleanor15x@gmail.com>
> Signed-off-by: Yu-Chun Lin <eleanor15x@gmail.com>
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
>   arch/x86/kernel/bootflag.c | 18 +++---------------
>   1 file changed, 3 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
> index 3fed7ae58b60..314ff0e84900 100644
> --- a/arch/x86/kernel/bootflag.c
> +++ b/arch/x86/kernel/bootflag.c
> @@ -8,6 +8,7 @@
>   #include <linux/string.h>
>   #include <linux/spinlock.h>
>   #include <linux/acpi.h>
> +#include <linux/bitops.h>
>   #include <asm/io.h>
>   
>   #include <linux/mc146818rtc.h>
> @@ -20,26 +21,13 @@
>   
>   int sbf_port __initdata = -1;	/* set via acpi_boot_init() */
>   
> -static int __init parity(u8 v)
> -{
> -	int x = 0;
> -	int i;
> -
> -	for (i = 0; i < 8; i++) {
> -		x ^= (v & 1);
> -		v >>= 1;
> -	}
> -
> -	return x;
> -}
> -
>   static void __init sbf_write(u8 v)
>   {
>   	unsigned long flags;
>   
>   	if (sbf_port != -1) {
>   		v &= ~SBF_PARITY;
> -		if (!parity(v))
> +		if (!parity8(v))
>   			v |= SBF_PARITY;
>   
>   		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
> @@ -70,7 +58,7 @@ static int __init sbf_value_valid(u8 v)
>   {
>   	if (v & SBF_RESERVED)		/* Reserved bits */
>   		return 0;
> -	if (!parity(v))
> +	if (!parity8(v))
>   		return 0;
>   
>   	return 1;
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by H. Peter Anvin 9 months, 3 weeks ago
On 2/24/25 07:24, Uros Bizjak wrote:
> 
> 
> On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>> Refactor parity calculations to use the standard parity8() helper. This
>> change eliminates redundant implementations and improves code
>> efficiency.
> 
> The patch improves parity assembly code in bootflag.o from:
> 
>    58:    89 de                    mov    %ebx,%esi
>    5a:    b9 08 00 00 00           mov    $0x8,%ecx
>    5f:    31 d2                    xor    %edx,%edx
>    61:    89 f0                    mov    %esi,%eax
>    63:    89 d7                    mov    %edx,%edi
>    65:    40 d0 ee                 shr    %sil
>    68:    83 e0 01                 and    $0x1,%eax
>    6b:    31 c2                    xor    %eax,%edx
>    6d:    83 e9 01                 sub    $0x1,%ecx
>    70:    75 ef                    jne    61 <sbf_init+0x51>
>    72:    39 c7                    cmp    %eax,%edi
>    74:    74 7f                    je     f5 <sbf_init+0xe5>
>    76:
> 
> to:
> 
>    54:    89 d8                    mov    %ebx,%eax
>    56:    ba 96 69 00 00           mov    $0x6996,%edx
>    5b:    c0 e8 04                 shr    $0x4,%al
>    5e:    31 d8                    xor    %ebx,%eax
>    60:    83 e0 0f                 and    $0xf,%eax
>    63:    0f a3 c2                 bt     %eax,%edx
>    66:    73 64                    jae    cc <sbf_init+0xbc>
>    68:
> 
> which is faster and smaller (-10 bytes) code.
> 

Of course, on x86, parity8() and parity16() can be implemented very simply:

(Also, the parity functions really ought to return bool, and be flagged 
__attribute_const__.)

static inline __attribute_const__ bool _arch_parity8(u8 val)
{
	bool parity;
	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
	return parity;
}

static inline __attribute_const__ bool _arch_parity16(u16 val)
{
	bool parity;
	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
	return parity;
}

In the generic algorithm, you probably should implement parity16() in 
terms of parity8(), parity32() in terms of parity16() and so on:

static inline __attribute_const__ bool parity16(u16 val)
{
#ifdef ARCH_HAS_PARITY16
	if (!__builtin_const_p(val))
		return _arch_parity16(val);
#endif
	return parity8(val ^ (val >> 8));
}

This picks up the architectural versions when available.

Furthermore, if a popcnt instruction is known to exist, then the parity 
is simply popcnt(x) & 1.

	-hpa

Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by David Laight 9 months, 3 weeks ago
On Mon, 24 Feb 2025 13:55:28 -0800
"H. Peter Anvin" <hpa@zytor.com> wrote:

> On 2/24/25 07:24, Uros Bizjak wrote:
> > 
> > 
> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:  
> >> Refactor parity calculations to use the standard parity8() helper. This
> >> change eliminates redundant implementations and improves code
> >> efficiency.  
...
> Of course, on x86, parity8() and parity16() can be implemented very simply:
> 
> (Also, the parity functions really ought to return bool, and be flagged 
> __attribute_const__.)
> 
> static inline __attribute_const__ bool _arch_parity8(u8 val)
> {
> 	bool parity;
> 	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
> 	return parity;
> }
> 
> static inline __attribute_const__ bool _arch_parity16(u16 val)
> {
> 	bool parity;
> 	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
> 	return parity;
> }

The same (with fixes) can be done for parity64() on 32bit.

> 
> In the generic algorithm, you probably should implement parity16() in 
> terms of parity8(), parity32() in terms of parity16() and so on:
> 
> static inline __attribute_const__ bool parity16(u16 val)
> {
> #ifdef ARCH_HAS_PARITY16
> 	if (!__builtin_const_p(val))
> 		return _arch_parity16(val);
> #endif
> 	return parity8(val ^ (val >> 8));
> }
> 
> This picks up the architectural versions when available.

Not the best way to do that.
Make the name in the #ifdef the same as the function and define
a default one if the architecture doesn't define one.
So:

static inline parity16(u16 val)
{
	return __builtin_const_p(val) ? _parity_const(val) : _parity16(val);
}

#ifndef _parity16
static inline _parity16(u15 val)
{
	return _parity8(val ^ (val >> 8));
}
#endif

You only need one _parity_const().

> 
> Furthermore, if a popcnt instruction is known to exist, then the parity 
> is simply popcnt(x) & 1.

Beware that some popcnt instructions are slow.

	David

> 
> 	-hpa
> 
>
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by H. Peter Anvin 9 months, 3 weeks ago
On February 25, 2025 2:46:23 PM PST, David Laight <david.laight.linux@gmail.com> wrote:
>On Mon, 24 Feb 2025 13:55:28 -0800
>"H. Peter Anvin" <hpa@zytor.com> wrote:
>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> > 
>> > 
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:  
>> >> Refactor parity calculations to use the standard parity8() helper. This
>> >> change eliminates redundant implementations and improves code
>> >> efficiency.  
>...
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>> 
>> (Also, the parity functions really ought to return bool, and be flagged 
>> __attribute_const__.)
>> 
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>> 	bool parity;
>> 	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
>> 	return parity;
>> }
>> 
>> static inline __attribute_const__ bool _arch_parity16(u16 val)
>> {
>> 	bool parity;
>> 	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
>> 	return parity;
>> }
>
>The same (with fixes) can be done for parity64() on 32bit.
>
>> 
>> In the generic algorithm, you probably should implement parity16() in 
>> terms of parity8(), parity32() in terms of parity16() and so on:
>> 
>> static inline __attribute_const__ bool parity16(u16 val)
>> {
>> #ifdef ARCH_HAS_PARITY16
>> 	if (!__builtin_const_p(val))
>> 		return _arch_parity16(val);
>> #endif
>> 	return parity8(val ^ (val >> 8));
>> }
>> 
>> This picks up the architectural versions when available.
>
>Not the best way to do that.
>Make the name in the #ifdef the same as the function and define
>a default one if the architecture doesn't define one.
>So:
>
>static inline parity16(u16 val)
>{
>	return __builtin_const_p(val) ? _parity_const(val) : _parity16(val);
>}
>
>#ifndef _parity16
>static inline _parity16(u15 val)
>{
>	return _parity8(val ^ (val >> 8));
>}
>#endif
>
>You only need one _parity_const().
>
>> 
>> Furthermore, if a popcnt instruction is known to exist, then the parity 
>> is simply popcnt(x) & 1.
>
>Beware that some popcnt instructions are slow.
>
>	David
>
>> 
>> 	-hpa
>> 
>> 
>

Seems more verbose than just #ifdef _arch_parity8 et al since the const and generic code cases are the same (which they aren't always.)

But that part is a good idea, especially since on at least *some* architectures like x86 doing: 

#define _arch_parity8(x) __builtin_parity(x)

... etc is entirely reasonable and lets gcc use an already available parity flag should one be available.

The inline wrapper, of course, takes care of the type mangling.
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by Yury Norov 9 months, 3 weeks ago
On Mon, Feb 24, 2025 at 01:55:28PM -0800, H. Peter Anvin wrote:
> On 2/24/25 07:24, Uros Bizjak wrote:
> > 
> > 
> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> > > Refactor parity calculations to use the standard parity8() helper. This
> > > change eliminates redundant implementations and improves code
> > > efficiency.
> > 
> > The patch improves parity assembly code in bootflag.o from:
> > 
> >    58:    89 de                    mov    %ebx,%esi
> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
> >    5f:    31 d2                    xor    %edx,%edx
> >    61:    89 f0                    mov    %esi,%eax
> >    63:    89 d7                    mov    %edx,%edi
> >    65:    40 d0 ee                 shr    %sil
> >    68:    83 e0 01                 and    $0x1,%eax
> >    6b:    31 c2                    xor    %eax,%edx
> >    6d:    83 e9 01                 sub    $0x1,%ecx
> >    70:    75 ef                    jne    61 <sbf_init+0x51>
> >    72:    39 c7                    cmp    %eax,%edi
> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
> >    76:
> > 
> > to:
> > 
> >    54:    89 d8                    mov    %ebx,%eax
> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
> >    5b:    c0 e8 04                 shr    $0x4,%al
> >    5e:    31 d8                    xor    %ebx,%eax
> >    60:    83 e0 0f                 and    $0xf,%eax
> >    63:    0f a3 c2                 bt     %eax,%edx
> >    66:    73 64                    jae    cc <sbf_init+0xbc>
> >    68:
> > 
> > which is faster and smaller (-10 bytes) code.
> > 
> 
> Of course, on x86, parity8() and parity16() can be implemented very simply:
> 
> (Also, the parity functions really ought to return bool, and be flagged
> __attribute_const__.)

There was a discussion regarding return type when parity8() was added.
The integer type was taken over bool with a sort of consideration that
bool should be returned as an answer to some question, like parity_odd().

To me it's not a big deal. We can switch to boolean and describe in
comment what the 'true' means for the parity() function.
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by H. Peter Anvin 9 months, 3 weeks ago
On February 24, 2025 2:17:29 PM PST, Yury Norov <yury.norov@gmail.com> wrote:
>On Mon, Feb 24, 2025 at 01:55:28PM -0800, H. Peter Anvin wrote:
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> > 
>> > 
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>> > > Refactor parity calculations to use the standard parity8() helper. This
>> > > change eliminates redundant implementations and improves code
>> > > efficiency.
>> > 
>> > The patch improves parity assembly code in bootflag.o from:
>> > 
>> >    58:    89 de                    mov    %ebx,%esi
>> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
>> >    5f:    31 d2                    xor    %edx,%edx
>> >    61:    89 f0                    mov    %esi,%eax
>> >    63:    89 d7                    mov    %edx,%edi
>> >    65:    40 d0 ee                 shr    %sil
>> >    68:    83 e0 01                 and    $0x1,%eax
>> >    6b:    31 c2                    xor    %eax,%edx
>> >    6d:    83 e9 01                 sub    $0x1,%ecx
>> >    70:    75 ef                    jne    61 <sbf_init+0x51>
>> >    72:    39 c7                    cmp    %eax,%edi
>> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
>> >    76:
>> > 
>> > to:
>> > 
>> >    54:    89 d8                    mov    %ebx,%eax
>> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
>> >    5b:    c0 e8 04                 shr    $0x4,%al
>> >    5e:    31 d8                    xor    %ebx,%eax
>> >    60:    83 e0 0f                 and    $0xf,%eax
>> >    63:    0f a3 c2                 bt     %eax,%edx
>> >    66:    73 64                    jae    cc <sbf_init+0xbc>
>> >    68:
>> > 
>> > which is faster and smaller (-10 bytes) code.
>> > 
>> 
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>> 
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>
>There was a discussion regarding return type when parity8() was added.
>The integer type was taken over bool with a sort of consideration that
>bool should be returned as an answer to some question, like parity_odd().
>
>To me it's not a big deal. We can switch to boolean and describe in
>comment what the 'true' means for the parity() function.

Bool is really the single-bit type, and gives the compiler more information. You could argue that the function really should be called parity_odd*() in general, but that's kind of excessive IMO.
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by Yury Norov 9 months, 3 weeks ago
On Mon, Feb 24, 2025 at 02:21:13PM -0800, H. Peter Anvin wrote:
> On February 24, 2025 2:17:29 PM PST, Yury Norov <yury.norov@gmail.com> wrote:
> >On Mon, Feb 24, 2025 at 01:55:28PM -0800, H. Peter Anvin wrote:
> >> On 2/24/25 07:24, Uros Bizjak wrote:
> >> > 
> >> > 
> >> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> >> > > Refactor parity calculations to use the standard parity8() helper. This
> >> > > change eliminates redundant implementations and improves code
> >> > > efficiency.
> >> > 
> >> > The patch improves parity assembly code in bootflag.o from:
> >> > 
> >> >    58:    89 de                    mov    %ebx,%esi
> >> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
> >> >    5f:    31 d2                    xor    %edx,%edx
> >> >    61:    89 f0                    mov    %esi,%eax
> >> >    63:    89 d7                    mov    %edx,%edi
> >> >    65:    40 d0 ee                 shr    %sil
> >> >    68:    83 e0 01                 and    $0x1,%eax
> >> >    6b:    31 c2                    xor    %eax,%edx
> >> >    6d:    83 e9 01                 sub    $0x1,%ecx
> >> >    70:    75 ef                    jne    61 <sbf_init+0x51>
> >> >    72:    39 c7                    cmp    %eax,%edi
> >> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
> >> >    76:
> >> > 
> >> > to:
> >> > 
> >> >    54:    89 d8                    mov    %ebx,%eax
> >> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
> >> >    5b:    c0 e8 04                 shr    $0x4,%al
> >> >    5e:    31 d8                    xor    %ebx,%eax
> >> >    60:    83 e0 0f                 and    $0xf,%eax
> >> >    63:    0f a3 c2                 bt     %eax,%edx
> >> >    66:    73 64                    jae    cc <sbf_init+0xbc>
> >> >    68:
> >> > 
> >> > which is faster and smaller (-10 bytes) code.
> >> > 
> >> 
> >> Of course, on x86, parity8() and parity16() can be implemented very simply:
> >> 
> >> (Also, the parity functions really ought to return bool, and be flagged
> >> __attribute_const__.)
> >
> >There was a discussion regarding return type when parity8() was added.
> >The integer type was taken over bool with a sort of consideration that
> >bool should be returned as an answer to some question, like parity_odd().
> >
> >To me it's not a big deal. We can switch to boolean and describe in
> >comment what the 'true' means for the parity() function.
> 
> Bool is really the single-bit type, and gives the compiler more information. You could argue that the function really should be called parity_odd*() in general, but that's kind of excessive IMO.

Yes, I could, but I will not. :) I also feel like bool looks more
natural here.
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by Uros Bizjak 9 months, 3 weeks ago
On Mon, Feb 24, 2025 at 10:56 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> On 2/24/25 07:24, Uros Bizjak wrote:
> >
> >
> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> >> Refactor parity calculations to use the standard parity8() helper. This
> >> change eliminates redundant implementations and improves code
> >> efficiency.
> >
> > The patch improves parity assembly code in bootflag.o from:
> >
> >    58:    89 de                    mov    %ebx,%esi
> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
> >    5f:    31 d2                    xor    %edx,%edx
> >    61:    89 f0                    mov    %esi,%eax
> >    63:    89 d7                    mov    %edx,%edi
> >    65:    40 d0 ee                 shr    %sil
> >    68:    83 e0 01                 and    $0x1,%eax
> >    6b:    31 c2                    xor    %eax,%edx
> >    6d:    83 e9 01                 sub    $0x1,%ecx
> >    70:    75 ef                    jne    61 <sbf_init+0x51>
> >    72:    39 c7                    cmp    %eax,%edi
> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
> >    76:
> >
> > to:
> >
> >    54:    89 d8                    mov    %ebx,%eax
> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
> >    5b:    c0 e8 04                 shr    $0x4,%al
> >    5e:    31 d8                    xor    %ebx,%eax
> >    60:    83 e0 0f                 and    $0xf,%eax
> >    63:    0f a3 c2                 bt     %eax,%edx
> >    66:    73 64                    jae    cc <sbf_init+0xbc>
> >    68:
> >
> > which is faster and smaller (-10 bytes) code.
> >
>
> Of course, on x86, parity8() and parity16() can be implemented very simply:
>
> (Also, the parity functions really ought to return bool, and be flagged
> __attribute_const__.)
>
> static inline __attribute_const__ bool _arch_parity8(u8 val)
> {
>         bool parity;
>         asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));

asm("test %0,%0" : "=@ccnp" (parity) : "q" (val));

because we are interested only in flags.

Uros.
Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by H. Peter Anvin 9 months, 3 weeks ago
On 2/24/25 14:08, Uros Bizjak wrote:
> On Mon, Feb 24, 2025 at 10:56 PM H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>>>
>>>
>>> On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>>>> Refactor parity calculations to use the standard parity8() helper. This
>>>> change eliminates redundant implementations and improves code
>>>> efficiency.
>>>
>>> The patch improves parity assembly code in bootflag.o from:
>>>
>>>     58:    89 de                    mov    %ebx,%esi
>>>     5a:    b9 08 00 00 00           mov    $0x8,%ecx
>>>     5f:    31 d2                    xor    %edx,%edx
>>>     61:    89 f0                    mov    %esi,%eax
>>>     63:    89 d7                    mov    %edx,%edi
>>>     65:    40 d0 ee                 shr    %sil
>>>     68:    83 e0 01                 and    $0x1,%eax
>>>     6b:    31 c2                    xor    %eax,%edx
>>>     6d:    83 e9 01                 sub    $0x1,%ecx
>>>     70:    75 ef                    jne    61 <sbf_init+0x51>
>>>     72:    39 c7                    cmp    %eax,%edi
>>>     74:    74 7f                    je     f5 <sbf_init+0xe5>
>>>     76:
>>>
>>> to:
>>>
>>>     54:    89 d8                    mov    %ebx,%eax
>>>     56:    ba 96 69 00 00           mov    $0x6996,%edx
>>>     5b:    c0 e8 04                 shr    $0x4,%al
>>>     5e:    31 d8                    xor    %ebx,%eax
>>>     60:    83 e0 0f                 and    $0xf,%eax
>>>     63:    0f a3 c2                 bt     %eax,%edx
>>>     66:    73 64                    jae    cc <sbf_init+0xbc>
>>>     68:
>>>
>>> which is faster and smaller (-10 bytes) code.
>>>
>>
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>>
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>>
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>>          bool parity;
>>          asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
> 
> asm("test %0,%0" : "=@ccnp" (parity) : "q" (val));
> 
> because we are interested only in flags.
> 

Also, needs to be %1,%1 (my mistake, thought flags outputs didn't count.)

Finally, this is kind of an obvious improvement:

  static void __init sbf_write(u8 v)
  {
         unsigned long flags;

         if (sbf_port != -1) {
-               v &= ~SBF_PARITY;
                 if (!parity(v))
-                       v |= SBF_PARITY;
+                       v ^= SBF_PARITY;

	-hpa

Re: [PATCH 03/17] x86: Replace open-coded parity calculation with parity8()
Posted by H. Peter Anvin 9 months, 3 weeks ago
On February 24, 2025 2:08:05 PM PST, Uros Bizjak <ubizjak@gmail.com> wrote:
>On Mon, Feb 24, 2025 at 10:56 PM H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> >
>> >
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>> >> Refactor parity calculations to use the standard parity8() helper. This
>> >> change eliminates redundant implementations and improves code
>> >> efficiency.
>> >
>> > The patch improves parity assembly code in bootflag.o from:
>> >
>> >    58:    89 de                    mov    %ebx,%esi
>> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
>> >    5f:    31 d2                    xor    %edx,%edx
>> >    61:    89 f0                    mov    %esi,%eax
>> >    63:    89 d7                    mov    %edx,%edi
>> >    65:    40 d0 ee                 shr    %sil
>> >    68:    83 e0 01                 and    $0x1,%eax
>> >    6b:    31 c2                    xor    %eax,%edx
>> >    6d:    83 e9 01                 sub    $0x1,%ecx
>> >    70:    75 ef                    jne    61 <sbf_init+0x51>
>> >    72:    39 c7                    cmp    %eax,%edi
>> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
>> >    76:
>> >
>> > to:
>> >
>> >    54:    89 d8                    mov    %ebx,%eax
>> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
>> >    5b:    c0 e8 04                 shr    $0x4,%al
>> >    5e:    31 d8                    xor    %ebx,%eax
>> >    60:    83 e0 0f                 and    $0xf,%eax
>> >    63:    0f a3 c2                 bt     %eax,%edx
>> >    66:    73 64                    jae    cc <sbf_init+0xbc>
>> >    68:
>> >
>> > which is faster and smaller (-10 bytes) code.
>> >
>>
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>>
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>>
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>>         bool parity;
>>         asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
>
>asm("test %0,%0" : "=@ccnp" (parity) : "q" (val));
>
>because we are interested only in flags.
>
>Uros.
>

Same thing, really, but yes, using test is cleaner.