When AP firstly wakes up, MpFuncs.nasm contains below logic to assign
an unique ApIndex to each AP according to who comes first:
---ASM---
TestLock:
xchg [edi], eax
cmp eax, NotVacantFlag
jz TestLock
mov ecx, esi
add ecx, ApIndexLocation
inc dword [ecx]
mov ebx, [ecx]
Releaselock:
mov eax, VacantFlag
xchg [edi], eax
---ASM END---
"lock inc" cannot be used to increase ApIndex because not only the
global ApIndex should be increased, but also the result should be
stored to a local general purpose register EBX.
This patch learns from the NASM implementation of
InternalSyncIncrement() to use "XADD" instruction which can increase
the global ApIndex and store the original ApIndex to EBX in one
instruction.
With this patch, OVMF when running in a 255 threads QEMU spends about
one second to wakeup all APs. Original implementation needs more than
10 seconds.
Signed-off-by: Ray Ni <ray.ni@intel.com>
Cc: Eric Dong <eric.dong@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Rahul Kumar <rahul1.kumar@intel.com>
---
.../Library/MpInitLib/Ia32/MpFuncs.nasm | 20 ++++++-------------
UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++++-----------
2 files changed, 12 insertions(+), 26 deletions(-)
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
index 7e81d24aa6..2eaddc93bc 100644
--- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;
-; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
@@ -125,19 +125,11 @@ SkipEnableExecuteDisable:
add edi, LockLocation
mov eax, NotVacantFlag
-TestLock:
- xchg [edi], eax
- cmp eax, NotVacantFlag
- jz TestLock
-
- mov ecx, esi
- add ecx, ApIndexLocation
- inc dword [ecx]
- mov ebx, [ecx]
-
-Releaselock:
- mov eax, VacantFlag
- xchg [edi], eax
+ mov edi, esi
+ add edi, ApIndexLocation
+ mov ebx, 1
+ lock xadd dword [edi], ebx ; EBX = ApIndex++
+ inc ebx ; EBX is CpuNumber
mov edi, esi
add edi, StackSizeLocation
diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
index aecfd07bc0..5b588f2dcb 100644
--- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;
-; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
@@ -161,18 +161,12 @@ LongModeStart:
add edi, LockLocation
mov rax, NotVacantFlag
-TestLock:
- xchg qword [edi], rax
- cmp rax, NotVacantFlag
- jz TestLock
-
- lea ecx, [esi + ApIndexLocation]
- inc dword [ecx]
- mov ebx, [ecx]
+ mov edi, esi
+ add edi, ApIndexLocation
+ mov ebx, 1
+ lock xadd dword [edi], ebx ; EBX = ApIndex++
+ inc ebx ; EBX is CpuNumber
-Releaselock:
- mov rax, VacantFlag
- xchg qword [edi], rax
; program stack
mov edi, esi
add edi, StackSizeLocation
--
2.27.0.windows.1
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#71517): https://edk2.groups.io/g/devel/message/71517
Mute This Topic: https://groups.io/mt/80504936/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
Reviewed-by: Eric Dong <eric.dong@intel.com>
-----Original Message-----
From: Ni, Ray <ray.ni@intel.com>
Sent: Tuesday, February 9, 2021 10:17 PM
To: devel@edk2.groups.io
Cc: Dong, Eric <eric.dong@intel.com>; Laszlo Ersek <lersek@redhat.com>; Kumar, Rahul1 <rahul1.kumar@intel.com>
Subject: [PATCH v3 1/4] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
When AP firstly wakes up, MpFuncs.nasm contains below logic to assign an unique ApIndex to each AP according to who comes first:
---ASM---
TestLock:
xchg [edi], eax
cmp eax, NotVacantFlag
jz TestLock
mov ecx, esi
add ecx, ApIndexLocation
inc dword [ecx]
mov ebx, [ecx]
Releaselock:
mov eax, VacantFlag
xchg [edi], eax
---ASM END---
"lock inc" cannot be used to increase ApIndex because not only the global ApIndex should be increased, but also the result should be stored to a local general purpose register EBX.
This patch learns from the NASM implementation of
InternalSyncIncrement() to use "XADD" instruction which can increase the global ApIndex and store the original ApIndex to EBX in one instruction.
With this patch, OVMF when running in a 255 threads QEMU spends about one second to wakeup all APs. Original implementation needs more than
10 seconds.
Signed-off-by: Ray Ni <ray.ni@intel.com>
Cc: Eric Dong <eric.dong@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Rahul Kumar <rahul1.kumar@intel.com>
---
.../Library/MpInitLib/Ia32/MpFuncs.nasm | 20 ++++++-------------
UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++++-----------
2 files changed, 12 insertions(+), 26 deletions(-)
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
index 7e81d24aa6..2eaddc93bc 100644
--- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;-; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> ; SPDX-License-Identifier: BSD-2-Clause-Patent ; ; Module Name:@@ -125,19 +125,11 @@ SkipEnableExecuteDisable:
add edi, LockLocation mov eax, NotVacantFlag -TestLock:- xchg [edi], eax- cmp eax, NotVacantFlag- jz TestLock-- mov ecx, esi- add ecx, ApIndexLocation- inc dword [ecx]- mov ebx, [ecx]--Releaselock:- mov eax, VacantFlag- xchg [edi], eax+ mov edi, esi+ add edi, ApIndexLocation+ mov ebx, 1+ lock xadd dword [edi], ebx ; EBX = ApIndex+++ inc ebx ; EBX is CpuNumber mov edi, esi add edi, StackSizeLocationdiff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
index aecfd07bc0..5b588f2dcb 100644
--- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;-; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> ; SPDX-License-Identifier: BSD-2-Clause-Patent ; ; Module Name:@@ -161,18 +161,12 @@ LongModeStart:
add edi, LockLocation mov rax, NotVacantFlag -TestLock:- xchg qword [edi], rax- cmp rax, NotVacantFlag- jz TestLock-- lea ecx, [esi + ApIndexLocation]- inc dword [ecx]- mov ebx, [ecx]+ mov edi, esi+ add edi, ApIndexLocation+ mov ebx, 1+ lock xadd dword [edi], ebx ; EBX = ApIndex+++ inc ebx ; EBX is CpuNumber -Releaselock:- mov rax, VacantFlag- xchg qword [edi], rax ; program stack mov edi, esi add edi, StackSizeLocation--
2.27.0.windows.1
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#71926): https://edk2.groups.io/g/devel/message/71926
Mute This Topic: https://groups.io/mt/80504936/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
Reviewed-by: Michael D Kinney <michael.d.kinney@intel.com> > -----Original Message----- > From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Ni, Ray > Sent: Tuesday, February 9, 2021 6:17 AM > To: devel@edk2.groups.io > Cc: Dong, Eric <eric.dong@intel.com>; Laszlo Ersek <lersek@redhat.com>; Kumar, Rahul1 <rahul1.kumar@intel.com> > Subject: [edk2-devel] [PATCH v3 1/4] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release > > When AP firstly wakes up, MpFuncs.nasm contains below logic to assign > an unique ApIndex to each AP according to who comes first: > ---ASM--- > TestLock: > xchg [edi], eax > cmp eax, NotVacantFlag > jz TestLock > > mov ecx, esi > add ecx, ApIndexLocation > inc dword [ecx] > mov ebx, [ecx] > > Releaselock: > mov eax, VacantFlag > xchg [edi], eax > ---ASM END--- > > "lock inc" cannot be used to increase ApIndex because not only the > global ApIndex should be increased, but also the result should be > stored to a local general purpose register EBX. > > This patch learns from the NASM implementation of > InternalSyncIncrement() to use "XADD" instruction which can increase > the global ApIndex and store the original ApIndex to EBX in one > instruction. > > With this patch, OVMF when running in a 255 threads QEMU spends about > one second to wakeup all APs. Original implementation needs more than > 10 seconds. > > Signed-off-by: Ray Ni <ray.ni@intel.com> > Cc: Eric Dong <eric.dong@intel.com> > Cc: Laszlo Ersek <lersek@redhat.com> > Cc: Rahul Kumar <rahul1.kumar@intel.com> > --- > .../Library/MpInitLib/Ia32/MpFuncs.nasm | 20 ++++++------------- > UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++++----------- > 2 files changed, 12 insertions(+), 26 deletions(-) > > diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm > index 7e81d24aa6..2eaddc93bc 100644 > --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm > +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm > @@ -1,5 +1,5 @@ > ;------------------------------------------------------------------------------ ; > > -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR> > > +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> > > ; SPDX-License-Identifier: BSD-2-Clause-Patent > > ; > > ; Module Name: > > @@ -125,19 +125,11 @@ SkipEnableExecuteDisable: > add edi, LockLocation > > mov eax, NotVacantFlag > > > > -TestLock: > > - xchg [edi], eax > > - cmp eax, NotVacantFlag > > - jz TestLock > > - > > - mov ecx, esi > > - add ecx, ApIndexLocation > > - inc dword [ecx] > > - mov ebx, [ecx] > > - > > -Releaselock: > > - mov eax, VacantFlag > > - xchg [edi], eax > > + mov edi, esi > > + add edi, ApIndexLocation > > + mov ebx, 1 > > + lock xadd dword [edi], ebx ; EBX = ApIndex++ > > + inc ebx ; EBX is CpuNumber > > > > mov edi, esi > > add edi, StackSizeLocation > > diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm > index aecfd07bc0..5b588f2dcb 100644 > --- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm > +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm > @@ -1,5 +1,5 @@ > ;------------------------------------------------------------------------------ ; > > -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR> > > +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> > > ; SPDX-License-Identifier: BSD-2-Clause-Patent > > ; > > ; Module Name: > > @@ -161,18 +161,12 @@ LongModeStart: > add edi, LockLocation > > mov rax, NotVacantFlag > > > > -TestLock: > > - xchg qword [edi], rax > > - cmp rax, NotVacantFlag > > - jz TestLock > > - > > - lea ecx, [esi + ApIndexLocation] > > - inc dword [ecx] > > - mov ebx, [ecx] > > + mov edi, esi > > + add edi, ApIndexLocation > > + mov ebx, 1 > > + lock xadd dword [edi], ebx ; EBX = ApIndex++ > > + inc ebx ; EBX is CpuNumber > > > > -Releaselock: > > - mov rax, VacantFlag > > - xchg qword [edi], rax > > ; program stack > > mov edi, esi > > add edi, StackSizeLocation > > -- > 2.27.0.windows.1 > > > > -=-=-=-=-=-= > Groups.io Links: You receive all messages sent to this group. > View/Reply Online (#71517): https://edk2.groups.io/g/devel/message/71517 > Mute This Topic: https://groups.io/mt/80504936/1643496 > Group Owner: devel+owner@edk2.groups.io > Unsubscribe: https://edk2.groups.io/g/devel/unsub [michael.d.kinney@intel.com] > -=-=-=-=-=-= > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#72109): https://edk2.groups.io/g/devel/message/72109 Mute This Topic: https://groups.io/mt/80504936/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Laszlo, Do you think that Mike's R-b to the first patch can be an ack to the V3 patch set? Can you please review and provide comments? Thanks, Ray > -----Original Message----- > From: Kinney, Michael D <michael.d.kinney@intel.com> > Sent: Wednesday, February 24, 2021 2:12 AM > To: devel@edk2.groups.io; Ni, Ray <ray.ni@intel.com>; Kinney, Michael D > <michael.d.kinney@intel.com> > Cc: Dong, Eric <eric.dong@intel.com>; Laszlo Ersek <lersek@redhat.com>; > Kumar, Rahul1 <rahul1.kumar@intel.com> > Subject: RE: [edk2-devel] [PATCH v3 1/4] UefiCpuPkg/MpInitLib: Use XADD > to avoid lock acquire/release > > Reviewed-by: Michael D Kinney <michael.d.kinney@intel.com> > > > -----Original Message----- > > From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Ni, > Ray > > Sent: Tuesday, February 9, 2021 6:17 AM > > To: devel@edk2.groups.io > > Cc: Dong, Eric <eric.dong@intel.com>; Laszlo Ersek <lersek@redhat.com>; > Kumar, Rahul1 <rahul1.kumar@intel.com> > > Subject: [edk2-devel] [PATCH v3 1/4] UefiCpuPkg/MpInitLib: Use XADD to > avoid lock acquire/release > > > > When AP firstly wakes up, MpFuncs.nasm contains below logic to assign > > an unique ApIndex to each AP according to who comes first: > > ---ASM--- > > TestLock: > > xchg [edi], eax > > cmp eax, NotVacantFlag > > jz TestLock > > > > mov ecx, esi > > add ecx, ApIndexLocation > > inc dword [ecx] > > mov ebx, [ecx] > > > > Releaselock: > > mov eax, VacantFlag > > xchg [edi], eax > > ---ASM END--- > > > > "lock inc" cannot be used to increase ApIndex because not only the > > global ApIndex should be increased, but also the result should be > > stored to a local general purpose register EBX. > > > > This patch learns from the NASM implementation of > > InternalSyncIncrement() to use "XADD" instruction which can increase > > the global ApIndex and store the original ApIndex to EBX in one > > instruction. > > > > With this patch, OVMF when running in a 255 threads QEMU spends about > > one second to wakeup all APs. Original implementation needs more than > > 10 seconds. > > > > Signed-off-by: Ray Ni <ray.ni@intel.com> > > Cc: Eric Dong <eric.dong@intel.com> > > Cc: Laszlo Ersek <lersek@redhat.com> > > Cc: Rahul Kumar <rahul1.kumar@intel.com> > > --- > > .../Library/MpInitLib/Ia32/MpFuncs.nasm | 20 ++++++------------- > > UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++++----------- > > 2 files changed, 12 insertions(+), 26 deletions(-) > > > > diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm > b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm > > index 7e81d24aa6..2eaddc93bc 100644 > > --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm > > +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm > > @@ -1,5 +1,5 @@ > > ;------------------------------------------------------------------------------ ; > > > > -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR> > > > > +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> > > > > ; SPDX-License-Identifier: BSD-2-Clause-Patent > > > > ; > > > > ; Module Name: > > > > @@ -125,19 +125,11 @@ SkipEnableExecuteDisable: > > add edi, LockLocation > > > > mov eax, NotVacantFlag > > > > > > > > -TestLock: > > > > - xchg [edi], eax > > > > - cmp eax, NotVacantFlag > > > > - jz TestLock > > > > - > > > > - mov ecx, esi > > > > - add ecx, ApIndexLocation > > > > - inc dword [ecx] > > > > - mov ebx, [ecx] > > > > - > > > > -Releaselock: > > > > - mov eax, VacantFlag > > > > - xchg [edi], eax > > > > + mov edi, esi > > > > + add edi, ApIndexLocation > > > > + mov ebx, 1 > > > > + lock xadd dword [edi], ebx ; EBX = ApIndex++ > > > > + inc ebx ; EBX is CpuNumber > > > > > > > > mov edi, esi > > > > add edi, StackSizeLocation > > > > diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm > b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm > > index aecfd07bc0..5b588f2dcb 100644 > > --- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm > > +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm > > @@ -1,5 +1,5 @@ > > ;------------------------------------------------------------------------------ ; > > > > -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR> > > > > +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> > > > > ; SPDX-License-Identifier: BSD-2-Clause-Patent > > > > ; > > > > ; Module Name: > > > > @@ -161,18 +161,12 @@ LongModeStart: > > add edi, LockLocation > > > > mov rax, NotVacantFlag > > > > > > > > -TestLock: > > > > - xchg qword [edi], rax > > > > - cmp rax, NotVacantFlag > > > > - jz TestLock > > > > - > > > > - lea ecx, [esi + ApIndexLocation] > > > > - inc dword [ecx] > > > > - mov ebx, [ecx] > > > > + mov edi, esi > > > > + add edi, ApIndexLocation > > > > + mov ebx, 1 > > > > + lock xadd dword [edi], ebx ; EBX = ApIndex++ > > > > + inc ebx ; EBX is CpuNumber > > > > > > > > -Releaselock: > > > > - mov rax, VacantFlag > > > > - xchg qword [edi], rax > > > > ; program stack > > > > mov edi, esi > > > > add edi, StackSizeLocation > > > > -- > > 2.27.0.windows.1 > > > > > > > > -=-=-=-=-=-= > > Groups.io Links: You receive all messages sent to this group. > > View/Reply Online (#71517): > https://edk2.groups.io/g/devel/message/71517 > > Mute This Topic: https://groups.io/mt/80504936/1643496 > > Group Owner: devel+owner@edk2.groups.io > > Unsubscribe: https://edk2.groups.io/g/devel/unsub > [michael.d.kinney@intel.com] > > -=-=-=-=-=-= > > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#72179): https://edk2.groups.io/g/devel/message/72179 Mute This Topic: https://groups.io/mt/80504936/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 02/25/21 05:04, Ni, Ray wrote: > Laszlo, > Do you think that Mike's R-b to the first patch can be an ack to the V3 patch set? No, I don't think so. If an R-b is given in response to a specific patch (not the cover letter), and the reviewer doesn't explicitly say "series Reviewed-by" or "for the entire series:", then the R-b applies only to the specific patch. Now, a different question is whether you want or need Mike's R-b for *all* of the patches. That's up to you and Mike to decide. > Can you please review and provide comments? Yes, I'll comment soon. Thanks Laszlo >> -----Original Message----- >> From: Kinney, Michael D <michael.d.kinney@intel.com> >> Sent: Wednesday, February 24, 2021 2:12 AM >> To: devel@edk2.groups.io; Ni, Ray <ray.ni@intel.com>; Kinney, Michael D >> <michael.d.kinney@intel.com> >> Cc: Dong, Eric <eric.dong@intel.com>; Laszlo Ersek <lersek@redhat.com>; >> Kumar, Rahul1 <rahul1.kumar@intel.com> >> Subject: RE: [edk2-devel] [PATCH v3 1/4] UefiCpuPkg/MpInitLib: Use XADD >> to avoid lock acquire/release >> >> Reviewed-by: Michael D Kinney <michael.d.kinney@intel.com> >> >>> -----Original Message----- >>> From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Ni, >> Ray >>> Sent: Tuesday, February 9, 2021 6:17 AM >>> To: devel@edk2.groups.io >>> Cc: Dong, Eric <eric.dong@intel.com>; Laszlo Ersek <lersek@redhat.com>; >> Kumar, Rahul1 <rahul1.kumar@intel.com> >>> Subject: [edk2-devel] [PATCH v3 1/4] UefiCpuPkg/MpInitLib: Use XADD to >> avoid lock acquire/release >>> >>> When AP firstly wakes up, MpFuncs.nasm contains below logic to assign >>> an unique ApIndex to each AP according to who comes first: >>> ---ASM--- >>> TestLock: >>> xchg [edi], eax >>> cmp eax, NotVacantFlag >>> jz TestLock >>> >>> mov ecx, esi >>> add ecx, ApIndexLocation >>> inc dword [ecx] >>> mov ebx, [ecx] >>> >>> Releaselock: >>> mov eax, VacantFlag >>> xchg [edi], eax >>> ---ASM END--- >>> >>> "lock inc" cannot be used to increase ApIndex because not only the >>> global ApIndex should be increased, but also the result should be >>> stored to a local general purpose register EBX. >>> >>> This patch learns from the NASM implementation of >>> InternalSyncIncrement() to use "XADD" instruction which can increase >>> the global ApIndex and store the original ApIndex to EBX in one >>> instruction. >>> >>> With this patch, OVMF when running in a 255 threads QEMU spends about >>> one second to wakeup all APs. Original implementation needs more than >>> 10 seconds. >>> >>> Signed-off-by: Ray Ni <ray.ni@intel.com> >>> Cc: Eric Dong <eric.dong@intel.com> >>> Cc: Laszlo Ersek <lersek@redhat.com> >>> Cc: Rahul Kumar <rahul1.kumar@intel.com> >>> --- >>> .../Library/MpInitLib/Ia32/MpFuncs.nasm | 20 ++++++------------- >>> UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++++----------- >>> 2 files changed, 12 insertions(+), 26 deletions(-) >>> >>> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm >> b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm >>> index 7e81d24aa6..2eaddc93bc 100644 >>> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm >>> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm >>> @@ -1,5 +1,5 @@ >>> ;------------------------------------------------------------------------------ ; >>> >>> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR> >>> >>> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> >>> >>> ; SPDX-License-Identifier: BSD-2-Clause-Patent >>> >>> ; >>> >>> ; Module Name: >>> >>> @@ -125,19 +125,11 @@ SkipEnableExecuteDisable: >>> add edi, LockLocation >>> >>> mov eax, NotVacantFlag >>> >>> >>> >>> -TestLock: >>> >>> - xchg [edi], eax >>> >>> - cmp eax, NotVacantFlag >>> >>> - jz TestLock >>> >>> - >>> >>> - mov ecx, esi >>> >>> - add ecx, ApIndexLocation >>> >>> - inc dword [ecx] >>> >>> - mov ebx, [ecx] >>> >>> - >>> >>> -Releaselock: >>> >>> - mov eax, VacantFlag >>> >>> - xchg [edi], eax >>> >>> + mov edi, esi >>> >>> + add edi, ApIndexLocation >>> >>> + mov ebx, 1 >>> >>> + lock xadd dword [edi], ebx ; EBX = ApIndex++ >>> >>> + inc ebx ; EBX is CpuNumber >>> >>> >>> >>> mov edi, esi >>> >>> add edi, StackSizeLocation >>> >>> diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm >> b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm >>> index aecfd07bc0..5b588f2dcb 100644 >>> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm >>> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm >>> @@ -1,5 +1,5 @@ >>> ;------------------------------------------------------------------------------ ; >>> >>> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR> >>> >>> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR> >>> >>> ; SPDX-License-Identifier: BSD-2-Clause-Patent >>> >>> ; >>> >>> ; Module Name: >>> >>> @@ -161,18 +161,12 @@ LongModeStart: >>> add edi, LockLocation >>> >>> mov rax, NotVacantFlag >>> >>> >>> >>> -TestLock: >>> >>> - xchg qword [edi], rax >>> >>> - cmp rax, NotVacantFlag >>> >>> - jz TestLock >>> >>> - >>> >>> - lea ecx, [esi + ApIndexLocation] >>> >>> - inc dword [ecx] >>> >>> - mov ebx, [ecx] >>> >>> + mov edi, esi >>> >>> + add edi, ApIndexLocation >>> >>> + mov ebx, 1 >>> >>> + lock xadd dword [edi], ebx ; EBX = ApIndex++ >>> >>> + inc ebx ; EBX is CpuNumber >>> >>> >>> >>> -Releaselock: >>> >>> - mov rax, VacantFlag >>> >>> - xchg qword [edi], rax >>> >>> ; program stack >>> >>> mov edi, esi >>> >>> add edi, StackSizeLocation >>> >>> -- >>> 2.27.0.windows.1 >>> >>> >>> >>> -=-=-=-=-=-= >>> Groups.io Links: You receive all messages sent to this group. >>> View/Reply Online (#71517): >> https://edk2.groups.io/g/devel/message/71517 >>> Mute This Topic: https://groups.io/mt/80504936/1643496 >>> Group Owner: devel+owner@edk2.groups.io >>> Unsubscribe: https://edk2.groups.io/g/devel/unsub >> [michael.d.kinney@intel.com] >>> -=-=-=-=-=-= >>> > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#72207): https://edk2.groups.io/g/devel/message/72207 Mute This Topic: https://groups.io/mt/80504936/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
© 2016 - 2026 Red Hat, Inc.