arch/x86/kernel/acpi/boot.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
Improve __acpi_acquire_global_lock by using a temporary variable.
This enables compiler to perform if-conversion and improves generated
code from:
...
72a: d1 ea shr %edx
72c: 83 e1 fc and $0xfffffffc,%ecx
72f: 83 e2 01 and $0x1,%edx
732: 09 ca or %ecx,%edx
734: 83 c2 02 add $0x2,%edx
737: f0 0f b1 17 lock cmpxchg %edx,(%rdi)
73b: 75 e9 jne 726 <__acpi_acquire_global_lock+0x6>
73d: 83 e2 03 and $0x3,%edx
740: 31 c0 xor %eax,%eax
742: 83 fa 03 cmp $0x3,%edx
745: 0f 95 c0 setne %al
748: f7 d8 neg %eax
to:
...
72a: d1 e9 shr %ecx
72c: 83 e2 fc and $0xfffffffc,%edx
72f: 83 e1 01 and $0x1,%ecx
732: 09 ca or %ecx,%edx
734: 83 c2 02 add $0x2,%edx
737: f0 0f b1 17 lock cmpxchg %edx,(%rdi)
73b: 75 e9 jne 726 <__acpi_acquire_global_lock+0x6>
73d: 8d 41 ff lea -0x1(%rcx),%eax
BTW: the compiler could generate:
lea 0x2(%rcx,%rdx,1),%edx
instead of:
or %ecx,%edx
add $0x2,%edx
but unwated conversion from add to or when bits are known to be zero
prevents this improvement. This is GCC PR108477.
No functional change intended.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
v2: Expand return statement.
---
arch/x86/kernel/acpi/boot.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1c38174b5f01..a08a4a7a03f8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1853,13 +1853,18 @@ early_param("acpi_sci", setup_acpi_sci);
int __acpi_acquire_global_lock(unsigned int *lock)
{
- unsigned int old, new;
+ unsigned int old, new, val;
old = READ_ONCE(*lock);
do {
- new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
+ val = (old >> 1) & 0x1;
+ new = (old & ~0x3) + 2 + val;
} while (!try_cmpxchg(lock, &old, new));
- return ((new & 0x3) < 3) ? -1 : 0;
+
+ if (val)
+ return 0;
+
+ return -1;
}
int __acpi_release_global_lock(unsigned int *lock)
--
2.39.2
On Mon, Mar 20, 2023 at 10:20 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > Improve __acpi_acquire_global_lock by using a temporary variable. > This enables compiler to perform if-conversion and improves generated > code from: > > ... > 72a: d1 ea shr %edx > 72c: 83 e1 fc and $0xfffffffc,%ecx > 72f: 83 e2 01 and $0x1,%edx > 732: 09 ca or %ecx,%edx > 734: 83 c2 02 add $0x2,%edx > 737: f0 0f b1 17 lock cmpxchg %edx,(%rdi) > 73b: 75 e9 jne 726 <__acpi_acquire_global_lock+0x6> > 73d: 83 e2 03 and $0x3,%edx > 740: 31 c0 xor %eax,%eax > 742: 83 fa 03 cmp $0x3,%edx > 745: 0f 95 c0 setne %al > 748: f7 d8 neg %eax > > to: > > ... > 72a: d1 e9 shr %ecx > 72c: 83 e2 fc and $0xfffffffc,%edx > 72f: 83 e1 01 and $0x1,%ecx > 732: 09 ca or %ecx,%edx > 734: 83 c2 02 add $0x2,%edx > 737: f0 0f b1 17 lock cmpxchg %edx,(%rdi) > 73b: 75 e9 jne 726 <__acpi_acquire_global_lock+0x6> > 73d: 8d 41 ff lea -0x1(%rcx),%eax > > BTW: the compiler could generate: > > lea 0x2(%rcx,%rdx,1),%edx > > instead of: > > or %ecx,%edx > add $0x2,%edx > > but unwated conversion from add to or when bits are known to be zero > prevents this improvement. This is GCC PR108477. > > No functional change intended. > > Signed-off-by: Uros Bizjak <ubizjak@gmail.com> > Cc: "Rafael J. Wysocki" <rafael@kernel.org> > Cc: Len Brown <lenb@kernel.org> > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: Borislav Petkov <bp@alien8.de> > Cc: Dave Hansen <dave.hansen@linux.intel.com> > Cc: "H. Peter Anvin" <hpa@zytor.com> > --- > v2: Expand return statement. Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> or please let me know if you want me to pick this up (in which case it will require an ACK from one of the x86 maintainers). > --- > arch/x86/kernel/acpi/boot.c | 11 ++++++++--- > 1 file changed, 8 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c > index 1c38174b5f01..a08a4a7a03f8 100644 > --- a/arch/x86/kernel/acpi/boot.c > +++ b/arch/x86/kernel/acpi/boot.c > @@ -1853,13 +1853,18 @@ early_param("acpi_sci", setup_acpi_sci); > > int __acpi_acquire_global_lock(unsigned int *lock) > { > - unsigned int old, new; > + unsigned int old, new, val; > > old = READ_ONCE(*lock); > do { > - new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); > + val = (old >> 1) & 0x1; > + new = (old & ~0x3) + 2 + val; > } while (!try_cmpxchg(lock, &old, new)); > - return ((new & 0x3) < 3) ? -1 : 0; > + > + if (val) > + return 0; > + > + return -1; > } > > int __acpi_release_global_lock(unsigned int *lock) > -- > 2.39.2 >
On 3/22/23 11:24, Rafael J. Wysocki wrote: > Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > or please let me know if you want me to pick this up (in which case it > will require an ACK from one of the x86 maintainers). I'll pull it into x86/acpi. I'm kinda shocked the compiler is so clueless, but this makes the C code more readable anyway. Win/win, I guess.
On Wed, Mar 22, 2023 at 7:34 PM Dave Hansen <dave.hansen@intel.com> wrote: > > On 3/22/23 11:24, Rafael J. Wysocki wrote: > > Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > > > or please let me know if you want me to pick this up (in which case it > > will require an ACK from one of the x86 maintainers). > > I'll pull it into x86/acpi. I'm kinda shocked the compiler is so > clueless, but this makes the C code more readable anyway. Win/win, I guess. Please note that the return form __acpi_{acquire,release}_global_lock is actually used as bool: acenv.h: int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); #define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) #define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ ((Acq) = __acpi_release_global_lock(&facs->global_lock)) evglock.c: acpi_status acpi_ev_acquire_global_lock(u16 timeout) { ... u8 acquired = FALSE; ... ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); if (acquired) ... } acpi_status acpi_ev_release_global_lock(void) { u8 pending = FALSE; ... ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending); if (pending) ... } These functions are also defined for ia64, so I didn't want to change the return value. But ia64 is going to be retired, and this opens the optimization opportunity. Uros.
© 2016 - 2025 Red Hat, Inc.