Add CpuEject(), which handles the CPU ejection, and provides a holding
area for said CPUs. It is called via SmmCpuFeaturesRendezvousExit(),
at the tail end of the SMI handling.
Also UnplugCpus() now stashes APIC IDs of CPUs which need to be
ejected in CPU_HOT_EJECT_DATA.ApicIdMap. These are used by CpuEject()
to identify such CPUs.
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Ard Biesheuvel <ard.biesheuvel@arm.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Aaron Young <aaron.young@oracle.com>
Ref: https://bugzilla.tianocore.org/show_bug.cgi?id=3132
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
OvmfPkg/CpuHotplugSmm/CpuHotplug.c | 109 +++++++++++++++++++++++++++++++++++--
1 file changed, 105 insertions(+), 4 deletions(-)
diff --git a/OvmfPkg/CpuHotplugSmm/CpuHotplug.c b/OvmfPkg/CpuHotplugSmm/CpuHotplug.c
index 70d69f6ed65b..526f51faf070 100644
--- a/OvmfPkg/CpuHotplugSmm/CpuHotplug.c
+++ b/OvmfPkg/CpuHotplugSmm/CpuHotplug.c
@@ -14,6 +14,7 @@
#include <Library/MmServicesTableLib.h> // gMmst
#include <Library/PcdLib.h> // PcdGetBool()
#include <Library/SafeIntLib.h> // SafeUintnSub()
+#include <Library/CpuHotEjectData.h> // CPU_HOT_EJECT_DATA
#include <Protocol/MmCpuIo.h> // EFI_MM_CPU_IO_PROTOCOL
#include <Protocol/SmmCpuService.h> // EFI_SMM_CPU_SERVICE_PROTOCOL
#include <Uefi/UefiBaseType.h> // EFI_STATUS
@@ -32,11 +33,12 @@ STATIC EFI_MM_CPU_IO_PROTOCOL *mMmCpuIo;
//
STATIC EFI_SMM_CPU_SERVICE_PROTOCOL *mMmCpuService;
//
-// This structure is a communication side-channel between the
+// These structures serve as communication side-channels between the
// EFI_SMM_CPU_SERVICE_PROTOCOL consumer (i.e., this driver) and provider
// (i.e., PiSmmCpuDxeSmm).
//
STATIC CPU_HOT_PLUG_DATA *mCpuHotPlugData;
+STATIC CPU_HOT_EJECT_DATA *mCpuHotEjectData;
//
// SMRAM arrays for fetching the APIC IDs of processors with pending events (of
// known event types), for the time of just one MMI.
@@ -188,11 +190,53 @@ RevokeNewSlot:
}
/**
+ CPU Hot-eject handler, called from SmmCpuFeaturesRendezvousExit(),
+ on each CPU at exit from SMM.
+
+ If, the executing CPU is not being ejected, nothing to be done.
+ If, the executing CPU is being ejected, wait in a CpuDeadLoop()
+ until ejected.
+
+ @param[in] ProcessorNum Index of executing CPU.
+
+**/
+VOID
+EFIAPI
+CpuEject (
+ IN UINTN ProcessorNum
+ )
+{
+ //
+ // APIC ID is UINT32, but mCpuHotEjectData->ApicIdMap[] is UINT64
+ // so use UINT64 throughout.
+ //
+ UINT64 ApicId;
+
+ ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum];
+ if (ApicId == CPU_EJECT_INVALID) {
+ return;
+ }
+
+ //
+ // CPU(s) being unplugged get here from SmmCpuFeaturesSmiRendezvousExit()
+ // after having been cleared to exit the SMI by the monarch and thus have
+ // no SMM processing remaining.
+ //
+ // Given that we cannot allow them to escape to the guest, we pen them
+ // here until the SMM monarch tells the HW to unplug them.
+ //
+ CpuDeadLoop ();
+}
+
+/**
Process to be hot-unplugged CPUs, per QemuCpuhpCollectApicIds().
For each such CPU, report the CPU to PiSmmCpuDxeSmm via
- EFI_SMM_CPU_SERVICE_PROTOCOL. If the to be hot-unplugged CPU is
- unknown, skip it silently.
+ EFI_SMM_CPU_SERVICE_PROTOCOL and stash the APIC ID for later ejection.
+ If the to be hot-unplugged CPU is unknown, skip it silently.
+
+ Additonally, if we do stash any APIC IDs, also install a CPU eject handler
+ which would handle the ejection.
@param[in] ToUnplugApicIds The APIC IDs of the CPUs that are about to be
hot-unplugged.
@@ -216,9 +260,11 @@ UnplugCpus (
{
EFI_STATUS Status;
UINT32 ToUnplugIdx;
+ UINT32 EjectCount;
UINTN ProcessorNum;
ToUnplugIdx = 0;
+ EjectCount = 0;
while (ToUnplugIdx < ToUnplugCount) {
APIC_ID RemoveApicId;
@@ -255,13 +301,41 @@ UnplugCpus (
DEBUG ((DEBUG_ERROR, "%a: RemoveProcessor(" FMT_APIC_ID "): %r\n",
__FUNCTION__, RemoveApicId, Status));
goto Fatal;
+ } else {
+ //
+ // Stash the APIC IDs so we can do the actual ejection later.
+ //
+ if (mCpuHotEjectData->ApicIdMap[ProcessorNum] != CPU_EJECT_INVALID) {
+ //
+ // Since ProcessorNum and APIC-ID map 1-1, so a valid
+ // mCpuHotEjectData->ApicIdMap[ProcessorNum] means something
+ // is horribly wrong.
+ //
+ DEBUG ((DEBUG_ERROR, "%a: ProcessorNum %u maps to %llx, cannot "
+ "map to " FMT_APIC_ID "\n", __FUNCTION__, ProcessorNum,
+ mCpuHotEjectData->ApicIdMap[ProcessorNum], RemoveApicId));
+
+ Status = EFI_INVALID_PARAMETER;
+ goto Fatal;
+ }
+
+ mCpuHotEjectData->ApicIdMap[ProcessorNum] = (UINT64)RemoveApicId;
+ EjectCount++;
}
ToUnplugIdx++;
}
+ if (EjectCount != 0) {
+ //
+ // We have processors to be ejected; install the handler.
+ //
+ mCpuHotEjectData->Handler = CpuEject;
+ }
+
//
- // We've removed this set of APIC IDs from SMM data structures.
+ // We've removed this set of APIC IDs from SMM data structures and
+ // have installed an ejection handler if needed.
//
return EFI_SUCCESS;
@@ -458,7 +532,13 @@ CpuHotplugEntry (
// Our DEPEX on EFI_SMM_CPU_SERVICE_PROTOCOL guarantees that PiSmmCpuDxeSmm
// has pointed PcdCpuHotPlugDataAddress to CPU_HOT_PLUG_DATA in SMRAM.
//
+ // Additionally, CPU Hot-unplug is available only if CPU Hotplug is, so
+ // the same DEPEX also guarantees that PcdCpuHotEjectDataAddress points
+ // to CPU_HOT_EJECT_DATA in SMRAM.
+ //
mCpuHotPlugData = (VOID *)(UINTN)PcdGet64 (PcdCpuHotPlugDataAddress);
+ mCpuHotEjectData = (VOID *)(UINTN)PcdGet64 (PcdCpuHotEjectDataAddress);
+
if (mCpuHotPlugData == NULL) {
Status = EFI_NOT_FOUND;
DEBUG ((DEBUG_ERROR, "%a: CPU_HOT_PLUG_DATA: %r\n", __FUNCTION__, Status));
@@ -470,6 +550,9 @@ CpuHotplugEntry (
if (mCpuHotPlugData->ArrayLength == 1) {
return EFI_UNSUPPORTED;
}
+ ASSERT (mCpuHotEjectData &&
+ (mCpuHotPlugData->ArrayLength == mCpuHotEjectData->ArrayLength));
+
//
// Allocate the data structures that depend on the possible CPU count.
//
@@ -552,6 +635,24 @@ CpuHotplugEntry (
//
SmbaseInstallFirstSmiHandler ();
+ if (mCpuHotEjectData) {
+ UINT32 Idx;
+ //
+ // For CPU ejection we need to map ProcessorNum -> APIC_ID. By the time
+ // we need the mapping, however, the Processor's APIC ID has already been
+ // removed from SMM data structures. So we will maintain a local map
+ // in mCpuHotEjectData->ApicIdMap.
+ //
+ for (Idx = 0; Idx < mCpuHotEjectData->ArrayLength; Idx++) {
+ mCpuHotEjectData->ApicIdMap[Idx] = CPU_EJECT_INVALID;
+ }
+
+ //
+ // Wait to init the handler until an ejection is warranted
+ //
+ mCpuHotEjectData->Handler = NULL;
+ }
+
return EFI_SUCCESS;
ReleasePostSmmPen:
--
2.9.3
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#70873): https://edk2.groups.io/g/devel/message/70873
Mute This Topic: https://groups.io/mt/80199926/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
apologies, I've got more comments here: On 01/29/21 01:59, Ankur Arora wrote: > /** > + CPU Hot-eject handler, called from SmmCpuFeaturesRendezvousExit(), > + on each CPU at exit from SMM. > + > + If, the executing CPU is not being ejected, nothing to be done. > + If, the executing CPU is being ejected, wait in a CpuDeadLoop() > + until ejected. > + > + @param[in] ProcessorNum Index of executing CPU. > + > +**/ > +VOID > +EFIAPI > +CpuEject ( > + IN UINTN ProcessorNum > + ) > +{ > + // > + // APIC ID is UINT32, but mCpuHotEjectData->ApicIdMap[] is UINT64 > + // so use UINT64 throughout. > + // > + UINT64 ApicId; > + > + ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; > + if (ApicId == CPU_EJECT_INVALID) { > + return; > + } > + > + // > + // CPU(s) being unplugged get here from SmmCpuFeaturesSmiRendezvousExit() > + // after having been cleared to exit the SMI by the monarch and thus have > + // no SMM processing remaining. > + // > + // Given that we cannot allow them to escape to the guest, we pen them > + // here until the SMM monarch tells the HW to unplug them. > + // > + CpuDeadLoop (); > +} (15) There is no such function as SmmCpuFeaturesSmiRendezvousExit() -- it's SmmCpuFeaturesRendezvousExit(). (16) This function uses a data structure for communication between BSP and APs -- mCpuHotEjectData->ApicIdMap is modified in UnplugCpus() on the BSP, and checked above by the APs (too). What guarantees the visibility of mCpuHotEjectData->ApicIdMap? I think we might want to use InterlockedCompareExchange64() in both EjectCpu() and UnplugCpus() (and make "ApicIdMap" volatile, in addition). InterlockedCompareExchange64() can be used just for comparison as well, by passing ExchangeValue=CompareValue. (17) I think a similar observation applies to the "Handler" field too, as APs call it, while the BSP keeps flipping it between NULL and a real function address. We might have to turn that field into an EFI_PHYSICAL_ADDRESS (just a fancy name for UINT64), and use InterlockedCompareExchange64() again. Thanks Laszlo -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71029): https://edk2.groups.io/g/devel/message/71029 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 2021-02-01 11:08 a.m., Laszlo Ersek wrote: > apologies, I've got more comments here: > > On 01/29/21 01:59, Ankur Arora wrote: > >> /** >> + CPU Hot-eject handler, called from SmmCpuFeaturesRendezvousExit(), >> + on each CPU at exit from SMM. >> + >> + If, the executing CPU is not being ejected, nothing to be done. >> + If, the executing CPU is being ejected, wait in a CpuDeadLoop() >> + until ejected. >> + >> + @param[in] ProcessorNum Index of executing CPU. >> + >> +**/ >> +VOID >> +EFIAPI >> +CpuEject ( >> + IN UINTN ProcessorNum >> + ) >> +{ >> + // >> + // APIC ID is UINT32, but mCpuHotEjectData->ApicIdMap[] is UINT64 >> + // so use UINT64 throughout. >> + // >> + UINT64 ApicId; >> + >> + ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; >> + if (ApicId == CPU_EJECT_INVALID) { >> + return; >> + } >> + >> + // >> + // CPU(s) being unplugged get here from SmmCpuFeaturesSmiRendezvousExit() >> + // after having been cleared to exit the SMI by the monarch and thus have >> + // no SMM processing remaining. >> + // >> + // Given that we cannot allow them to escape to the guest, we pen them >> + // here until the SMM monarch tells the HW to unplug them. >> + // >> + CpuDeadLoop (); >> +} > > (15) There is no such function as SmmCpuFeaturesSmiRendezvousExit() -- > it's SmmCpuFeaturesRendezvousExit(). > > (16) This function uses a data structure for communication between BSP > and APs -- mCpuHotEjectData->ApicIdMap is modified in UnplugCpus() on > the BSP, and checked above by the APs (too). > > What guarantees the visibility of mCpuHotEjectData->ApicIdMap? I was banking on SmiRendezvous() explicitly signalling that all processing on the BSP was done before any AP will look at mCpuHotEjectData in SmmCpuFeaturesRendezvousExit(). 1716 // 1717 // Wait for BSP's signal to exit SMI 1718 // 1719 while (*mSmmMpSyncData->AllCpusInSync) { 1720 CpuPause (); 1721 } 1722 } 1723 1724 Exit: 1725 SmmCpuFeaturesRendezvousExit (CpuIndex); > > I think we might want to use InterlockedCompareExchange64() in both > EjectCpu() and UnplugCpus() (and make "ApicIdMap" volatile, in > addition). InterlockedCompareExchange64() can be used just for > comparison as well, by passing ExchangeValue=CompareValue. Speaking specifically about the ApicIdMap, I'm not sure I fully agree (assuming my comment just above is correct.) The only AP (reader) ApicIdMap deref is here: CpuEject(): 218 ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; For the to-be-ejected-AP, this value can only move from valid-APIC-ID (=> wait in CpuDeadLoop()) -> CPU_EJECT_INVALID. Given that, by the time the worker does the write on line 254, this AP is guaranteed to be dead already, I don't think there's any scenario where the to-be-ejected-AP can see anything other than a valid-APIC-ID. 241 QemuCpuhpWriteCpuSelector (mMmCpuIo, (APIC_ID) RemoveApicId); 242 QemuCpuhpWriteCpuStatus (mMmCpuIo, QEMU_CPUHP_STAT_EJECTED); 243 244 // 245 // Compiler barrier to ensure the next store isn't reordered 246 // 247 MemoryFence (); 248 249 // 250 // Clear the eject status for CpuIndex to ensure that an invalid 251 // SMI later does not end up trying to eject it or a newly 252 // hotplugged CpuIndex does not go into the dead loop. 253 // 254 mCpuHotEjectData->ApicIdMap[CpuIndex] = CPU_EJECT_INVALID; For APs that are not being ejected, they will always see CPU_EJECT_INVALID since the writer never changes that. The one scenario in which bad things could happen is if entries in the ApicIdMap are unaligned (or if the compiler or cpu-arch tears aligned writes). > > (17) I think a similar observation applies to the "Handler" field too, > as APs call it, while the BSP keeps flipping it between NULL and a real > function address. We might have to turn that field into an From a real function address, to NULL is the problem part right? (Same argument as above for the transition in UnplugCpus() from NULL -> function-address.) > EFI_PHYSICAL_ADDRESS (just a fancy name for UINT64), and use > InterlockedCompareExchange64() again. AFAICS, these are the problematic derefs: SmmCpuFeaturesRendezvousExit(): 450 if (mCpuHotEjectData == NULL || 451 mCpuHotEjectData->Handler == NULL) { 452 return; and problematic assignments: 266 // 267 // We are done until the next hot-unplug; clear the handler. 268 // 269 mCpuHotEjectData->Handler = NULL; 270 return; 271 } Here as well, I've been banking on aligned writes such that the APs would only see the before or after value not an intermediate value. Thanks Ankur > > Thanks > Laszlo > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71031): https://edk2.groups.io/g/devel/message/71031 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 02/01/21 21:12, Ankur Arora wrote: > On 2021-02-01 11:08 a.m., Laszlo Ersek wrote: >> apologies, I've got more comments here: >> >> On 01/29/21 01:59, Ankur Arora wrote: >> >>> /** >>> + CPU Hot-eject handler, called from SmmCpuFeaturesRendezvousExit(), >>> + on each CPU at exit from SMM. >>> + >>> + If, the executing CPU is not being ejected, nothing to be done. >>> + If, the executing CPU is being ejected, wait in a CpuDeadLoop() >>> + until ejected. >>> + >>> + @param[in] ProcessorNum Index of executing CPU. >>> + >>> +**/ >>> +VOID >>> +EFIAPI >>> +CpuEject ( >>> + IN UINTN ProcessorNum >>> + ) >>> +{ >>> + // >>> + // APIC ID is UINT32, but mCpuHotEjectData->ApicIdMap[] is UINT64 >>> + // so use UINT64 throughout. >>> + // >>> + UINT64 ApicId; >>> + >>> + ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; >>> + if (ApicId == CPU_EJECT_INVALID) { >>> + return; >>> + } >>> + >>> + // >>> + // CPU(s) being unplugged get here from >>> SmmCpuFeaturesSmiRendezvousExit() >>> + // after having been cleared to exit the SMI by the monarch and >>> thus have >>> + // no SMM processing remaining. >>> + // >>> + // Given that we cannot allow them to escape to the guest, we pen >>> them >>> + // here until the SMM monarch tells the HW to unplug them. >>> + // >>> + CpuDeadLoop (); >>> +} >> >> (15) There is no such function as SmmCpuFeaturesSmiRendezvousExit() -- >> it's SmmCpuFeaturesRendezvousExit(). >> >> (16) This function uses a data structure for communication between BSP >> and APs -- mCpuHotEjectData->ApicIdMap is modified in UnplugCpus() on >> the BSP, and checked above by the APs (too). >> >> What guarantees the visibility of mCpuHotEjectData->ApicIdMap? > > I was banking on SmiRendezvous() explicitly signalling that all > processing on the BSP was done before any AP will look at > mCpuHotEjectData in SmmCpuFeaturesRendezvousExit(). > > 1716 // > 1717 // Wait for BSP's signal to exit SMI > 1718 // > 1719 while (*mSmmMpSyncData->AllCpusInSync) { > 1720 CpuPause (); > 1721 } > 1722 } > 1723 > 1724 Exit: > 1725 SmmCpuFeaturesRendezvousExit (CpuIndex); Right; it's a general pattern in edk2: volatile UINT8 (aka BOOLEAN) objects are considered atomic. (See SMM_DISPATCHER_MP_SYNC_DATA.AllCpusInSync -- it's a pointer to a volatile BOOLEAN.) But our UINT64 values are neither volatile nor UINT8, and I got suddenly doubtful about "AllCpusInSync" working as a multiprocessor barrier. (I could be unjustifiedly worried, as a bunch of other fields in SMM_DISPATCHER_MP_SYNC_DATA are volatile, wider than UINT8, and *not* accessed with InterlockedCompareExchageXx().) > >> >> I think we might want to use InterlockedCompareExchange64() in both >> EjectCpu() and UnplugCpus() (and make "ApicIdMap" volatile, in >> addition). InterlockedCompareExchange64() can be used just for >> comparison as well, by passing ExchangeValue=CompareValue. > > > Speaking specifically about the ApicIdMap, I'm not sure I fully > agree (assuming my comment just above is correct.) > > > The only AP (reader) ApicIdMap deref is here: > > CpuEject(): > 218 ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; > > For the to-be-ejected-AP, this value can only move from > valid-APIC-ID (=> wait in CpuDeadLoop()) -> CPU_EJECT_INVALID. > > Given that, by the time the worker does the write on line 254, this > AP is guaranteed to be dead already, I don't think there's any > scenario where the to-be-ejected-AP can see anything other than > a valid-APIC-ID. The scenario I had in mind was different: what guarantees that the effect of 375 mCpuHotEjectData->ApicIdMap[ProcessorNum] = (UINT64)RemoveApicId; which is performed by the BSP in UnplugCpus(), is visible by the AP on line 218 (see your quote above)? What if the AP gets to line 218 before the BSP's write on line 375 *propagates* sufficiently? There's no question that the BSP writes before the AP reads, but I'm uncertain if that suffices for the *effect* of the write to be visible to the AP. My concern is not whether the AP sees a partial vs. a settled update; my concern is if the AP could see an entirely *stale* value. The consequence of that problem would be that an AP that the BSP were about to eject would return from CpuEject() to SmmCpuFeaturesRendezvousExit() to SmiRendezvous(). ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In combination with the sync-up point that you quoted. This seems to match existing practice in PiSmmCpuDxeSmm -- there are no concurrent accesses, so atomicity is not a concern, and serializing the instruction streams coarsely, with the sync-up, in combination with volatile accesses, should presumably guarantee visibility (on x86 anyway). Thanks Laszlo > > 241 QemuCpuhpWriteCpuSelector (mMmCpuIo, (APIC_ID) RemoveApicId); > 242 QemuCpuhpWriteCpuStatus (mMmCpuIo, QEMU_CPUHP_STAT_EJECTED); > 243 > 244 // > 245 // Compiler barrier to ensure the next store isn't reordered > 246 // > 247 MemoryFence (); > 248 > 249 // > 250 // Clear the eject status for CpuIndex to ensure that an > invalid > 251 // SMI later does not end up trying to eject it or a newly > 252 // hotplugged CpuIndex does not go into the dead loop. > 253 // > 254 mCpuHotEjectData->ApicIdMap[CpuIndex] = CPU_EJECT_INVALID; > For APs that are not being ejected, they will always see > CPU_EJECT_INVALID > since the writer never changes that. > > The one scenario in which bad things could happen is if entries in the > ApicIdMap are unaligned (or if the compiler or cpu-arch tears aligned > writes). > >> >> (17) I think a similar observation applies to the "Handler" field too, >> as APs call it, while the BSP keeps flipping it between NULL and a real >> function address. We might have to turn that field into an > From a real function address, to NULL is the problem part right? > > (Same argument as above for the transition in UnplugCpus() from > NULL -> function-address.) > > >> EFI_PHYSICAL_ADDRESS (just a fancy name for UINT64), and use >> InterlockedCompareExchange64() again. > > AFAICS, these are the problematic derefs: > > SmmCpuFeaturesRendezvousExit(): > > 450 if (mCpuHotEjectData == NULL || > 451 mCpuHotEjectData->Handler == NULL) { > 452 return; > > and problematic assignments: > > 266 // > 267 // We are done until the next hot-unplug; clear the handler. > 268 // > 269 mCpuHotEjectData->Handler = NULL; > 270 return; > 271 } > > Here as well, I've been banking on aligned writes such that the APs would > only see the before or after value not an intermediate value. > > Thanks > Ankur > >> >> Thanks >> Laszlo >> > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71066): https://edk2.groups.io/g/devel/message/71066 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 2021-02-02 6:00 a.m., Laszlo Ersek wrote: > On 02/01/21 21:12, Ankur Arora wrote: >> On 2021-02-01 11:08 a.m., Laszlo Ersek wrote: >>> apologies, I've got more comments here: >>> >>> On 01/29/21 01:59, Ankur Arora wrote: >>> >>>> /** >>>> + CPU Hot-eject handler, called from SmmCpuFeaturesRendezvousExit(), >>>> + on each CPU at exit from SMM. >>>> + >>>> + If, the executing CPU is not being ejected, nothing to be done. >>>> + If, the executing CPU is being ejected, wait in a CpuDeadLoop() >>>> + until ejected. >>>> + >>>> + @param[in] ProcessorNum Index of executing CPU. >>>> + >>>> +**/ >>>> +VOID >>>> +EFIAPI >>>> +CpuEject ( >>>> + IN UINTN ProcessorNum >>>> + ) >>>> +{ >>>> + // >>>> + // APIC ID is UINT32, but mCpuHotEjectData->ApicIdMap[] is UINT64 >>>> + // so use UINT64 throughout. >>>> + // >>>> + UINT64 ApicId; >>>> + >>>> + ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; >>>> + if (ApicId == CPU_EJECT_INVALID) { >>>> + return; >>>> + } >>>> + >>>> + // >>>> + // CPU(s) being unplugged get here from >>>> SmmCpuFeaturesSmiRendezvousExit() >>>> + // after having been cleared to exit the SMI by the monarch and >>>> thus have >>>> + // no SMM processing remaining. >>>> + // >>>> + // Given that we cannot allow them to escape to the guest, we pen >>>> them >>>> + // here until the SMM monarch tells the HW to unplug them. >>>> + // >>>> + CpuDeadLoop (); >>>> +} >>> >>> (15) There is no such function as SmmCpuFeaturesSmiRendezvousExit() -- >>> it's SmmCpuFeaturesRendezvousExit(). >>> >>> (16) This function uses a data structure for communication between BSP >>> and APs -- mCpuHotEjectData->ApicIdMap is modified in UnplugCpus() on >>> the BSP, and checked above by the APs (too). >>> >>> What guarantees the visibility of mCpuHotEjectData->ApicIdMap? >> >> I was banking on SmiRendezvous() explicitly signalling that all >> processing on the BSP was done before any AP will look at >> mCpuHotEjectData in SmmCpuFeaturesRendezvousExit(). >> >> 1716 // >> 1717 // Wait for BSP's signal to exit SMI >> 1718 // >> 1719 while (*mSmmMpSyncData->AllCpusInSync) { >> 1720 CpuPause (); >> 1721 } >> 1722 } >> 1723 >> 1724 Exit: >> 1725 SmmCpuFeaturesRendezvousExit (CpuIndex); > > Right; it's a general pattern in edk2: volatile UINT8 (aka BOOLEAN) > objects are considered atomic. (See > SMM_DISPATCHER_MP_SYNC_DATA.AllCpusInSync -- it's a pointer to a > volatile BOOLEAN.) > > But our UINT64 values are neither volatile nor UINT8, and I got suddenly > doubtful about "AllCpusInSync" working as a multiprocessor barrier. > > (I could be unjustifiedly worried, as a bunch of other fields in > SMM_DISPATCHER_MP_SYNC_DATA are volatile, wider than UINT8, and *not* > accessed with InterlockedCompareExchageXx().) Thanks for pointing me to this code. There's a curious comment in about making this structure uncache-able in the declaration here (though I couldn't figure out how that is done): 418 typedef struct { 419 // 420 // Pointer to an array. The array should be located immediately after this structure 421 // so that UC cache-ability can be set together. 422 // 423 SMM_CPU_DATA_BLOCK *CpuData; 424 volatile UINT32 *Counter; 425 volatile UINT32 BspIndex; 426 volatile BOOLEAN *InsideSmm; 427 volatile BOOLEAN *AllCpusInSync; 428 volatile SMM_CPU_SYNC_MODE EffectiveSyncMode; 429 volatile BOOLEAN SwitchBsp; 430 volatile BOOLEAN *CandidateBsp; 431 EFI_AP_PROCEDURE StartupProcedure; 432 VOID *StartupProcArgs; 433 } SMM_DISPATCHER_MP_SYNC_DATA; Also, is there an expectation that these fields (at least some of them) switch over when a new leader is chosen? Otherwise I'm not sure why for instance, AllCpusInSync would be a pointer. > > >> >>> >>> I think we might want to use InterlockedCompareExchange64() in both >>> EjectCpu() and UnplugCpus() (and make "ApicIdMap" volatile, in >>> addition). InterlockedCompareExchange64() can be used just for >>> comparison as well, by passing ExchangeValue=CompareValue. >> >> >> Speaking specifically about the ApicIdMap, I'm not sure I fully >> agree (assuming my comment just above is correct.) >> >> >> The only AP (reader) ApicIdMap deref is here: >> >> CpuEject(): >> 218 ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; >> >> For the to-be-ejected-AP, this value can only move from >> valid-APIC-ID (=> wait in CpuDeadLoop()) -> CPU_EJECT_INVALID. >> >> Given that, by the time the worker does the write on line 254, this >> AP is guaranteed to be dead already, I don't think there's any >> scenario where the to-be-ejected-AP can see anything other than >> a valid-APIC-ID. > > The scenario I had in mind was different: what guarantees that the > effect of > > 375 mCpuHotEjectData->ApicIdMap[ProcessorNum] = (UINT64)RemoveApicId; > > which is performed by the BSP in UnplugCpus(), is visible by the AP on > line 218 (see your quote above)? > > What if the AP gets to line 218 before the BSP's write on line 375 > *propagates* sufficiently? I understand. That does make sense. And, as you said elsewhere, a real memory fence would come in useful here. We could use AsmCpuid() as a poor man's mfence, but that seems overkill given that x86 at least guarantees store-order. Ankur > > There's no question that the BSP writes before the AP reads, but I'm > uncertain if that suffices for the *effect* of the write to be visible > to the AP. My concern is not whether the AP sees a partial vs. a settled > update; my concern is if the AP could see an entirely *stale* value. > > The consequence of that problem would be that an AP that the BSP were > about to eject would return from CpuEject() to > SmmCpuFeaturesRendezvousExit() to SmiRendezvous(). > > ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the > array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In > combination with the sync-up point that you quoted. This seems to match > existing practice in PiSmmCpuDxeSmm -- there are no concurrent accesses, > so atomicity is not a concern, and serializing the instruction streams > coarsely, with the sync-up, in combination with volatile accesses, > should presumably guarantee visibility (on x86 anyway). > > Thanks > Laszlo > > >> >> 241 QemuCpuhpWriteCpuSelector (mMmCpuIo, (APIC_ID) RemoveApicId); >> 242 QemuCpuhpWriteCpuStatus (mMmCpuIo, QEMU_CPUHP_STAT_EJECTED); >> 243 >> 244 // >> 245 // Compiler barrier to ensure the next store isn't reordered >> 246 // >> 247 MemoryFence (); >> 248 >> 249 // >> 250 // Clear the eject status for CpuIndex to ensure that an >> invalid >> 251 // SMI later does not end up trying to eject it or a newly >> 252 // hotplugged CpuIndex does not go into the dead loop. >> 253 // >> 254 mCpuHotEjectData->ApicIdMap[CpuIndex] = CPU_EJECT_INVALID; >> For APs that are not being ejected, they will always see >> CPU_EJECT_INVALID >> since the writer never changes that. >> >> The one scenario in which bad things could happen is if entries in the >> ApicIdMap are unaligned (or if the compiler or cpu-arch tears aligned >> writes). >> >>> >>> (17) I think a similar observation applies to the "Handler" field too, >>> as APs call it, while the BSP keeps flipping it between NULL and a real >>> function address. We might have to turn that field into an >> From a real function address, to NULL is the problem part right? >> >> (Same argument as above for the transition in UnplugCpus() from >> NULL -> function-address.) >> >> >>> EFI_PHYSICAL_ADDRESS (just a fancy name for UINT64), and use >>> InterlockedCompareExchange64() again. >> >> AFAICS, these are the problematic derefs: >> >> SmmCpuFeaturesRendezvousExit(): >> >> 450 if (mCpuHotEjectData == NULL || >> 451 mCpuHotEjectData->Handler == NULL) { >> 452 return; >> >> and problematic assignments: >> >> 266 // >> 267 // We are done until the next hot-unplug; clear the handler. >> 268 // >> 269 mCpuHotEjectData->Handler = NULL; >> 270 return; >> 271 } >> >> Here as well, I've been banking on aligned writes such that the APs would >> only see the before or after value not an intermediate value. >> >> Thanks >> Ankur >> >>> >>> Thanks >>> Laszlo >>> >> > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71094): https://edk2.groups.io/g/devel/message/71094 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 02/03/21 07:13, Ankur Arora wrote: > On 2021-02-02 6:00 a.m., Laszlo Ersek wrote: >> On 02/01/21 21:12, Ankur Arora wrote: >>> On 2021-02-01 11:08 a.m., Laszlo Ersek wrote: >>>> (16) This function uses a data structure for communication between BSP >>>> and APs -- mCpuHotEjectData->ApicIdMap is modified in UnplugCpus() on >>>> the BSP, and checked above by the APs (too). >>>> >>>> What guarantees the visibility of mCpuHotEjectData->ApicIdMap? >>> >>> I was banking on SmiRendezvous() explicitly signalling that all >>> processing on the BSP was done before any AP will look at >>> mCpuHotEjectData in SmmCpuFeaturesRendezvousExit(). >>> >>> 1716 // >>> 1717 // Wait for BSP's signal to exit SMI >>> 1718 // >>> 1719 while (*mSmmMpSyncData->AllCpusInSync) { >>> 1720 CpuPause (); >>> 1721 } >>> 1722 } >>> 1723 >>> 1724 Exit: >>> 1725 SmmCpuFeaturesRendezvousExit (CpuIndex); >> >> Right; it's a general pattern in edk2: volatile UINT8 (aka BOOLEAN) >> objects are considered atomic. (See >> SMM_DISPATCHER_MP_SYNC_DATA.AllCpusInSync -- it's a pointer to a >> volatile BOOLEAN.) >> >> But our UINT64 values are neither volatile nor UINT8, and I got suddenly >> doubtful about "AllCpusInSync" working as a multiprocessor barrier. >> >> (I could be unjustifiedly worried, as a bunch of other fields in >> SMM_DISPATCHER_MP_SYNC_DATA are volatile, wider than UINT8, and *not* >> accessed with InterlockedCompareExchageXx().) > > Thanks for pointing me to this code. There's a curious comment in > about making this structure uncache-able in the declaration here > (though I couldn't figure out how that is done): > > 418 typedef struct { > 419 // > 420 // Pointer to an array. The array should be located immediately > after this structure > 421 // so that UC cache-ability can be set together. > 422 // This is probably through SMRR manipulation. The "UefiCpuPkg/Library/SmmCpuFeaturesLib" instance contains SMRR support. The "OvmfPkg/Library/SmmCpuFeaturesLib" instance contains no SMRR support. (Just search both source files for "SMRR".) > 423 SMM_CPU_DATA_BLOCK *CpuData; > 424 volatile UINT32 *Counter; > 425 volatile UINT32 BspIndex; > 426 volatile BOOLEAN *InsideSmm; > 427 volatile BOOLEAN *AllCpusInSync; > 428 volatile SMM_CPU_SYNC_MODE EffectiveSyncMode; > 429 volatile BOOLEAN SwitchBsp; > 430 volatile BOOLEAN *CandidateBsp; > 431 EFI_AP_PROCEDURE StartupProcedure; > 432 VOID *StartupProcArgs; > 433 } SMM_DISPATCHER_MP_SYNC_DATA; > > Also, is there an expectation that these fields (at least some of > them) switch over when a new leader is chosen? Yes, see for example the "Elect BSP" section in SmiRendezvous(). > Otherwise I'm not sure why for instance, AllCpusInSync would be > a pointer. TBH I can't explain that; I'm not too familiar with those parts... >>> CpuEject(): >>> 218 ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; >>> >>> For the to-be-ejected-AP, this value can only move from >>> valid-APIC-ID (=> wait in CpuDeadLoop()) -> CPU_EJECT_INVALID. >>> >>> Given that, by the time the worker does the write on line 254, this >>> AP is guaranteed to be dead already, I don't think there's any >>> scenario where the to-be-ejected-AP can see anything other than >>> a valid-APIC-ID. >> >> The scenario I had in mind was different: what guarantees that the >> effect of >> >> 375 mCpuHotEjectData->ApicIdMap[ProcessorNum] = >> (UINT64)RemoveApicId; >> >> which is performed by the BSP in UnplugCpus(), is visible by the AP on >> line 218 (see your quote above)? >> >> What if the AP gets to line 218 before the BSP's write on line 375 >> *propagates* sufficiently? > > I understand. That does make sense. And, as you said elsewhere, a real > memory fence would come in useful here. > > We could use AsmCpuid() as a poor man's mfence, but that seems overkill > given that x86 at least guarantees store-order. Right -- I don't recall any examples of AsmCpuid() being used like that in edk2. Thanks! Laszlo -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71115): https://edk2.groups.io/g/devel/message/71115 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 2021-02-03 12:55 p.m., Laszlo Ersek wrote: > On 02/03/21 07:13, Ankur Arora wrote: >> On 2021-02-02 6:00 a.m., Laszlo Ersek wrote: >>> On 02/01/21 21:12, Ankur Arora wrote: >>>> On 2021-02-01 11:08 a.m., Laszlo Ersek wrote: > >>>>> (16) This function uses a data structure for communication between BSP >>>>> and APs -- mCpuHotEjectData->ApicIdMap is modified in UnplugCpus() on >>>>> the BSP, and checked above by the APs (too). >>>>> >>>>> What guarantees the visibility of mCpuHotEjectData->ApicIdMap? >>>> >>>> I was banking on SmiRendezvous() explicitly signalling that all >>>> processing on the BSP was done before any AP will look at >>>> mCpuHotEjectData in SmmCpuFeaturesRendezvousExit(). >>>> >>>> 1716 // >>>> 1717 // Wait for BSP's signal to exit SMI >>>> 1718 // >>>> 1719 while (*mSmmMpSyncData->AllCpusInSync) { >>>> 1720 CpuPause (); >>>> 1721 } >>>> 1722 } >>>> 1723 >>>> 1724 Exit: >>>> 1725 SmmCpuFeaturesRendezvousExit (CpuIndex); >>> >>> Right; it's a general pattern in edk2: volatile UINT8 (aka BOOLEAN) >>> objects are considered atomic. (See >>> SMM_DISPATCHER_MP_SYNC_DATA.AllCpusInSync -- it's a pointer to a >>> volatile BOOLEAN.) >>> >>> But our UINT64 values are neither volatile nor UINT8, and I got suddenly >>> doubtful about "AllCpusInSync" working as a multiprocessor barrier. >>> >>> (I could be unjustifiedly worried, as a bunch of other fields in >>> SMM_DISPATCHER_MP_SYNC_DATA are volatile, wider than UINT8, and *not* >>> accessed with InterlockedCompareExchageXx().) >> >> Thanks for pointing me to this code. There's a curious comment in >> about making this structure uncache-able in the declaration here >> (though I couldn't figure out how that is done): >> >> 418 typedef struct { >> 419 // >> 420 // Pointer to an array. The array should be located immediately >> after this structure >> 421 // so that UC cache-ability can be set together. >> 422 // > > This is probably through SMRR manipulation. > > The "UefiCpuPkg/Library/SmmCpuFeaturesLib" instance contains SMRR support. > > The "OvmfPkg/Library/SmmCpuFeaturesLib" instance contains no SMRR > support. (Just search both source files for "SMRR".) > Oh, now I see what SMRR does. Thanks that helps make sense of what's going on here. Ankur > >> 423 SMM_CPU_DATA_BLOCK *CpuData; >> 424 volatile UINT32 *Counter; >> 425 volatile UINT32 BspIndex; >> 426 volatile BOOLEAN *InsideSmm; >> 427 volatile BOOLEAN *AllCpusInSync; >> 428 volatile SMM_CPU_SYNC_MODE EffectiveSyncMode; >> 429 volatile BOOLEAN SwitchBsp; >> 430 volatile BOOLEAN *CandidateBsp; >> 431 EFI_AP_PROCEDURE StartupProcedure; >> 432 VOID *StartupProcArgs; >> 433 } SMM_DISPATCHER_MP_SYNC_DATA; >> >> Also, is there an expectation that these fields (at least some of >> them) switch over when a new leader is chosen? > > Yes, see for example the "Elect BSP" section in SmiRendezvous(). > > >> Otherwise I'm not sure why for instance, AllCpusInSync would be >> a pointer. > > TBH I can't explain that; I'm not too familiar with those parts... > > >>>> CpuEject(): >>>> 218 ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; >>>> >>>> For the to-be-ejected-AP, this value can only move from >>>> valid-APIC-ID (=> wait in CpuDeadLoop()) -> CPU_EJECT_INVALID. >>>> >>>> Given that, by the time the worker does the write on line 254, this >>>> AP is guaranteed to be dead already, I don't think there's any >>>> scenario where the to-be-ejected-AP can see anything other than >>>> a valid-APIC-ID. >>> >>> The scenario I had in mind was different: what guarantees that the >>> effect of >>> >>> 375 mCpuHotEjectData->ApicIdMap[ProcessorNum] = >>> (UINT64)RemoveApicId; >>> >>> which is performed by the BSP in UnplugCpus(), is visible by the AP on >>> line 218 (see your quote above)? >>> >>> What if the AP gets to line 218 before the BSP's write on line 375 >>> *propagates* sufficiently? >> >> I understand. That does make sense. And, as you said elsewhere, a real >> memory fence would come in useful here. >> >> We could use AsmCpuid() as a poor man's mfence, but that seems overkill >> given that x86 at least guarantees store-order. > > Right -- I don't recall any examples of AsmCpuid() being used like that > in edk2. > > Thanks! > Laszlo > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71128): https://edk2.groups.io/g/devel/message/71128 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 02/02/21 15:00, Laszlo Ersek wrote: > ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the > array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In > combination with the sync-up point that you quoted. This seems to match > existing practice in PiSmmCpuDxeSmm -- there are no concurrent accesses, > so atomicity is not a concern, and serializing the instruction streams > coarsely, with the sync-up, in combination with volatile accesses, > should presumably guarantee visibility (on x86 anyway). To summarize, this is what I would ask for: - make CPU_HOT_EJECT_DATA volatile - make (*CPU_HOT_EJECT_DATA.ApicIdMap) volatile - after storing something to CPU_HOT_EJECT_DATA or CPU_HOT_EJECT_DATA.ApicIdMap on the BSP, execute a MemoryFence() - before fetching something from CPU_HOT_EJECT_DATA or CPU_HOT_EJECT_DATA.ApicIdMap on an AP, execute a MemoryFence() Except: MemoryFence() isn't a *memory fence* in fact. See "MdePkg/Library/BaseLib/X64/GccInline.c". It's just a compiler barrier, which may not add anything beyond what we'd already have from "volatile". Case in point: PiSmmCpuDxeSmm performs heavy multi-processing, but does not contain a single invocation of MemoryFence(). It uses volatile objects, and a handful of InterlockedCompareExchangeXx() calls, for implementing semaphores. (NB: there is no 8-bit variant of InterlockedCompareExchange(), as "volatile UINT8" is considered atomic in itself, and a suitable basis for a sempahore too.) And given the synchronization from those semaphores, PiSmmCpuDpxeSmm trusts that updates to the *other* volatile objects are both atomic and visible. I'm pretty sure this only works because x86 is in-order. There are instruction stream barriers in place, and compiler barriers too, but no actual memory barriers. Now the question is whether we have managed to *sufficiently* imitate these patterns from PiSmmCpuDxeSmm, in this patch set. Making stuff volatile, and relying on the existent sync-up point, might suffice. Thanks Laszlo -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71067): https://edk2.groups.io/g/devel/message/71067 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 2021-02-02 6:15 a.m., Laszlo Ersek wrote: > On 02/02/21 15:00, Laszlo Ersek wrote: > >> ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the >> array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In >> combination with the sync-up point that you quoted. This seems to match >> existing practice in PiSmmCpuDxeSmm -- there are no concurrent accesses, >> so atomicity is not a concern, and serializing the instruction streams >> coarsely, with the sync-up, in combination with volatile accesses, >> should presumably guarantee visibility (on x86 anyway). > > To summarize, this is what I would ask for: > > - make CPU_HOT_EJECT_DATA volatile > > - make (*CPU_HOT_EJECT_DATA.ApicIdMap) volatile > > - after storing something to CPU_HOT_EJECT_DATA or > CPU_HOT_EJECT_DATA.ApicIdMap on the BSP, execute a MemoryFence() > > - before fetching something from CPU_HOT_EJECT_DATA or > CPU_HOT_EJECT_DATA.ApicIdMap on an AP, execute a MemoryFence() > > > Except: MemoryFence() isn't a *memory fence* in fact. > > See "MdePkg/Library/BaseLib/X64/GccInline.c". > > It's just a compiler barrier, which may not add anything beyond what > we'd already have from "volatile". > > Case in point: PiSmmCpuDxeSmm performs heavy multi-processing, but does > not contain a single invocation of MemoryFence(). It uses volatile > objects, and a handful of InterlockedCompareExchangeXx() calls, for > implementing semaphores. (NB: there is no 8-bit variant of > InterlockedCompareExchange(), as "volatile UINT8" is considered atomic > in itself, and a suitable basis for a sempahore too.) And given the > synchronization from those semaphores, PiSmmCpuDpxeSmm trusts that > updates to the *other* volatile objects are both atomic and visible. > > I'm pretty sure this only works because x86 is in-order. There are > instruction stream barriers in place, and compiler barriers too, but no > actual memory barriers. Right and just to separate them explicitly, there are two problems: - compiler: where the compiler caches stuff in or looks at stale memory locations. Now, AFAICS, this should not happen because the ApicIdMap would never change once set so the compiler should reasonably be able to cache the address of ApicIdMap and dereference it (thus obviating the need for volatile.) The compiler could, however, cache any assignments to ApicIdMap[Idx] (especially given LTO) and we could use a MemoryFence() (as the compiler barrier that it is) to force the store. - CPU pipeline: as you said, right now we basically depend on x86 store order semantics (and the CpuPause() loop around AllCpusInSync, kinda provides a barrier.) So the BSP writes in this order: ApicIdMap[Idx]=x; ... ->AllCpusInSync = true And whenever the AP sees ->AllCpusInSync == True, it has to now see ApicIdMap[Idx] == x. Maybe the thing to do is to detail this barrier in a commit note/comment? And add the MemoryFence() but not the volatile. Thanks Ankur > > Now the question is whether we have managed to *sufficiently* imitate > these patterns from PiSmmCpuDxeSmm, in this patch set. > > Making stuff volatile, and relying on the existent sync-up point, might > suffice. > > Thanks > Laszlo > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71095): https://edk2.groups.io/g/devel/message/71095 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 02/03/21 07:45, Ankur Arora wrote: > On 2021-02-02 6:15 a.m., Laszlo Ersek wrote: >> On 02/02/21 15:00, Laszlo Ersek wrote: >> >>> ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the >>> array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In >>> combination with the sync-up point that you quoted. This seems to match >>> existing practice in PiSmmCpuDxeSmm -- there are no concurrent accesses, >>> so atomicity is not a concern, and serializing the instruction streams >>> coarsely, with the sync-up, in combination with volatile accesses, >>> should presumably guarantee visibility (on x86 anyway). >> >> To summarize, this is what I would ask for: >> >> - make CPU_HOT_EJECT_DATA volatile >> >> - make (*CPU_HOT_EJECT_DATA.ApicIdMap) volatile >> >> - after storing something to CPU_HOT_EJECT_DATA or >> CPU_HOT_EJECT_DATA.ApicIdMap on the BSP, execute a MemoryFence() >> >> - before fetching something from CPU_HOT_EJECT_DATA or >> CPU_HOT_EJECT_DATA.ApicIdMap on an AP, execute a MemoryFence() >> >> >> Except: MemoryFence() isn't a *memory fence* in fact. >> >> See "MdePkg/Library/BaseLib/X64/GccInline.c". >> >> It's just a compiler barrier, which may not add anything beyond what >> we'd already have from "volatile". >> >> Case in point: PiSmmCpuDxeSmm performs heavy multi-processing, but does >> not contain a single invocation of MemoryFence(). It uses volatile >> objects, and a handful of InterlockedCompareExchangeXx() calls, for >> implementing semaphores. (NB: there is no 8-bit variant of >> InterlockedCompareExchange(), as "volatile UINT8" is considered atomic >> in itself, and a suitable basis for a sempahore too.) And given the >> synchronization from those semaphores, PiSmmCpuDpxeSmm trusts that >> updates to the *other* volatile objects are both atomic and visible. >> >> I'm pretty sure this only works because x86 is in-order. There are >> instruction stream barriers in place, and compiler barriers too, but no >> actual memory barriers. > > Right and just to separate them explicitly, there are two problems: > > - compiler: where the compiler caches stuff in or looks at stale memory > locations. Now, AFAICS, this should not happen because the ApicIdMap would > never change once set so the compiler should reasonably be able to cache > the address of ApicIdMap and dereference it (thus obviating the need for > volatile.) (CPU_HOT_EJECT_DATA.Handler does change though.) > The compiler could, however, cache any assignments to ApicIdMap[Idx] > (especially given LTO) and we could use a MemoryFence() (as the compiler > barrier that it is) to force the store. > > - CPU pipeline: as you said, right now we basically depend on x86 store > order semantics (and the CpuPause() loop around AllCpusInSync, kinda > provides > a barrier.) > > So the BSP writes in this order: > ApicIdMap[Idx]=x; ... ->AllCpusInSync = true > > And whenever the AP sees ->AllCpusInSync == True, it has to now see > ApicIdMap[Idx] == x. Yes. > > Maybe the thing to do is to detail this barrier in a commit note/comment? That would be nice. > And add the MemoryFence() but not the volatile. Yes, that should work. Thanks, Laszlo -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71116): https://edk2.groups.io/g/devel/message/71116 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 2021-02-03 12:58 p.m., Laszlo Ersek wrote: > On 02/03/21 07:45, Ankur Arora wrote: >> On 2021-02-02 6:15 a.m., Laszlo Ersek wrote: >>> On 02/02/21 15:00, Laszlo Ersek wrote: >>> >>>> ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the >>>> array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In >>>> combination with the sync-up point that you quoted. This seems to match >>>> existing practice in PiSmmCpuDxeSmm -- there are no concurrent accesses, >>>> so atomicity is not a concern, and serializing the instruction streams >>>> coarsely, with the sync-up, in combination with volatile accesses, >>>> should presumably guarantee visibility (on x86 anyway). >>> >>> To summarize, this is what I would ask for: >>> >>> - make CPU_HOT_EJECT_DATA volatile >>> >>> - make (*CPU_HOT_EJECT_DATA.ApicIdMap) volatile >>> >>> - after storing something to CPU_HOT_EJECT_DATA or >>> CPU_HOT_EJECT_DATA.ApicIdMap on the BSP, execute a MemoryFence() >>> >>> - before fetching something from CPU_HOT_EJECT_DATA or >>> CPU_HOT_EJECT_DATA.ApicIdMap on an AP, execute a MemoryFence() >>> >>> >>> Except: MemoryFence() isn't a *memory fence* in fact. >>> >>> See "MdePkg/Library/BaseLib/X64/GccInline.c". >>> >>> It's just a compiler barrier, which may not add anything beyond what >>> we'd already have from "volatile". >>> >>> Case in point: PiSmmCpuDxeSmm performs heavy multi-processing, but does >>> not contain a single invocation of MemoryFence(). It uses volatile >>> objects, and a handful of InterlockedCompareExchangeXx() calls, for >>> implementing semaphores. (NB: there is no 8-bit variant of >>> InterlockedCompareExchange(), as "volatile UINT8" is considered atomic >>> in itself, and a suitable basis for a sempahore too.) And given the >>> synchronization from those semaphores, PiSmmCpuDpxeSmm trusts that >>> updates to the *other* volatile objects are both atomic and visible. >>> >>> I'm pretty sure this only works because x86 is in-order. There are >>> instruction stream barriers in place, and compiler barriers too, but no >>> actual memory barriers. >> >> Right and just to separate them explicitly, there are two problems: >> >> - compiler: where the compiler caches stuff in or looks at stale memory >> locations. Now, AFAICS, this should not happen because the ApicIdMap would >> never change once set so the compiler should reasonably be able to cache >> the address of ApicIdMap and dereference it (thus obviating the need for >> volatile.) > > (CPU_HOT_EJECT_DATA.Handler does change though.) Yeah, I did kinda elide over that. Let me think this through in v7 and add more explicit comments and then we can see if it still looks fishy? Thanks Ankur > >> The compiler could, however, cache any assignments to ApicIdMap[Idx] >> (especially given LTO) and we could use a MemoryFence() (as the compiler >> barrier that it is) to force the store. >> >> - CPU pipeline: as you said, right now we basically depend on x86 store >> order semantics (and the CpuPause() loop around AllCpusInSync, kinda >> provides >> a barrier.) >> >> So the BSP writes in this order: >> ApicIdMap[Idx]=x; ... ->AllCpusInSync = true >> >> And whenever the AP sees ->AllCpusInSync == True, it has to now see >> ApicIdMap[Idx] == x. > > Yes. > >> >> Maybe the thing to do is to detail this barrier in a commit note/comment? > > That would be nice. > >> And add the MemoryFence() but not the volatile. > > Yes, that should work. > > Thanks, > Laszlo > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71127): https://edk2.groups.io/g/devel/message/71127 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Hi Ankur, I figure it's prudent for me to follow up here too: On 02/04/21 03:49, Ankur Arora wrote: > On 2021-02-03 12:58 p.m., Laszlo Ersek wrote: >> On 02/03/21 07:45, Ankur Arora wrote: >>> On 2021-02-02 6:15 a.m., Laszlo Ersek wrote: >>>> On 02/02/21 15:00, Laszlo Ersek wrote: >>>> >>>>> ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the >>>>> array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In >>>>> combination with the sync-up point that you quoted. This seems to >>>>> match >>>>> existing practice in PiSmmCpuDxeSmm -- there are no concurrent >>>>> accesses, >>>>> so atomicity is not a concern, and serializing the instruction streams >>>>> coarsely, with the sync-up, in combination with volatile accesses, >>>>> should presumably guarantee visibility (on x86 anyway). >>>> >>>> To summarize, this is what I would ask for: >>>> >>>> - make CPU_HOT_EJECT_DATA volatile >>>> >>>> - make (*CPU_HOT_EJECT_DATA.ApicIdMap) volatile >>>> >>>> - after storing something to CPU_HOT_EJECT_DATA or >>>> CPU_HOT_EJECT_DATA.ApicIdMap on the BSP, execute a MemoryFence() >>>> >>>> - before fetching something from CPU_HOT_EJECT_DATA or >>>> CPU_HOT_EJECT_DATA.ApicIdMap on an AP, execute a MemoryFence() >>>> >>>> >>>> Except: MemoryFence() isn't a *memory fence* in fact. >>>> >>>> See "MdePkg/Library/BaseLib/X64/GccInline.c". >>>> >>>> It's just a compiler barrier, which may not add anything beyond what >>>> we'd already have from "volatile". >>>> >>>> Case in point: PiSmmCpuDxeSmm performs heavy multi-processing, but does >>>> not contain a single invocation of MemoryFence(). It uses volatile >>>> objects, and a handful of InterlockedCompareExchangeXx() calls, for >>>> implementing semaphores. (NB: there is no 8-bit variant of >>>> InterlockedCompareExchange(), as "volatile UINT8" is considered atomic >>>> in itself, and a suitable basis for a sempahore too.) And given the >>>> synchronization from those semaphores, PiSmmCpuDpxeSmm trusts that >>>> updates to the *other* volatile objects are both atomic and visible. >>>> >>>> I'm pretty sure this only works because x86 is in-order. There are >>>> instruction stream barriers in place, and compiler barriers too, but no >>>> actual memory barriers. >>> >>> Right and just to separate them explicitly, there are two problems: >>> >>> - compiler: where the compiler caches stuff in or looks at stale >>> memory >>> locations. Now, AFAICS, this should not happen because the ApicIdMap >>> would >>> never change once set so the compiler should reasonably be able to cache >>> the address of ApicIdMap and dereference it (thus obviating the need for >>> volatile.) >> >> (CPU_HOT_EJECT_DATA.Handler does change though.) > > Yeah, I did kinda elide over that. Let me think this through in v7 > and add more explicit comments and then we can see if it still looks > fishy? > > Thanks > Ankur > >> >>> The compiler could, however, cache any assignments to ApicIdMap[Idx] >>> (especially given LTO) and we could use a MemoryFence() (as the compiler >>> barrier that it is) to force the store. >>> >>> - CPU pipeline: as you said, right now we basically depend on x86 >>> store >>> order semantics (and the CpuPause() loop around AllCpusInSync, kinda >>> provides >>> a barrier.) >>> >>> So the BSP writes in this order: >>> ApicIdMap[Idx]=x; ... ->AllCpusInSync = true >>> >>> And whenever the AP sees ->AllCpusInSync == True, it has to now see >>> ApicIdMap[Idx] == x. >> >> Yes. >> >>> >>> Maybe the thing to do is to detail this barrier in a commit >>> note/comment? >> >> That would be nice. >> >>> And add the MemoryFence() but not the volatile. >> >> Yes, that should work. Please *do* add the volatile, and also the MemoryFence(). When built with Visual Studio, MemoryFence() does nothing at all (at least when LTO is in effect -- which it almost always is). So we should have the volatile for making things work, and MemoryFence() as a conceptual reminder, so we know where to fix up things, when (if!) we come around fixing this mess with MemoryFence(). Reference: https://edk2.groups.io/g/rfc/message/500 https://edk2.groups.io/g/rfc/message/501 https://edk2.groups.io/g/rfc/message/502 https://edk2.groups.io/g/rfc/message/503 Thanks! Laszlo -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71367): https://edk2.groups.io/g/devel/message/71367 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 2021-02-05 8:06 a.m., Laszlo Ersek wrote: > Hi Ankur, > > I figure it's prudent for me to follow up here too: > > On 02/04/21 03:49, Ankur Arora wrote: >> On 2021-02-03 12:58 p.m., Laszlo Ersek wrote: >>> On 02/03/21 07:45, Ankur Arora wrote: >>>> On 2021-02-02 6:15 a.m., Laszlo Ersek wrote: >>>>> On 02/02/21 15:00, Laszlo Ersek wrote: >>>>> >>>>>> ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the >>>>>> array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In >>>>>> combination with the sync-up point that you quoted. This seems to >>>>>> match >>>>>> existing practice in PiSmmCpuDxeSmm -- there are no concurrent >>>>>> accesses, >>>>>> so atomicity is not a concern, and serializing the instruction streams >>>>>> coarsely, with the sync-up, in combination with volatile accesses, >>>>>> should presumably guarantee visibility (on x86 anyway). >>>>> >>>>> To summarize, this is what I would ask for: >>>>> >>>>> - make CPU_HOT_EJECT_DATA volatile >>>>> >>>>> - make (*CPU_HOT_EJECT_DATA.ApicIdMap) volatile >>>>> >>>>> - after storing something to CPU_HOT_EJECT_DATA or >>>>> CPU_HOT_EJECT_DATA.ApicIdMap on the BSP, execute a MemoryFence() >>>>> >>>>> - before fetching something from CPU_HOT_EJECT_DATA or >>>>> CPU_HOT_EJECT_DATA.ApicIdMap on an AP, execute a MemoryFence() >>>>> >>>>> >>>>> Except: MemoryFence() isn't a *memory fence* in fact. >>>>> >>>>> See "MdePkg/Library/BaseLib/X64/GccInline.c". >>>>> >>>>> It's just a compiler barrier, which may not add anything beyond what >>>>> we'd already have from "volatile". >>>>> >>>>> Case in point: PiSmmCpuDxeSmm performs heavy multi-processing, but does >>>>> not contain a single invocation of MemoryFence(). It uses volatile >>>>> objects, and a handful of InterlockedCompareExchangeXx() calls, for >>>>> implementing semaphores. (NB: there is no 8-bit variant of >>>>> InterlockedCompareExchange(), as "volatile UINT8" is considered atomic >>>>> in itself, and a suitable basis for a sempahore too.) And given the >>>>> synchronization from those semaphores, PiSmmCpuDpxeSmm trusts that >>>>> updates to the *other* volatile objects are both atomic and visible. >>>>> >>>>> I'm pretty sure this only works because x86 is in-order. There are >>>>> instruction stream barriers in place, and compiler barriers too, but no >>>>> actual memory barriers. >>>> >>>> Right and just to separate them explicitly, there are two problems: >>>> >>>> - compiler: where the compiler caches stuff in or looks at stale >>>> memory >>>> locations. Now, AFAICS, this should not happen because the ApicIdMap >>>> would >>>> never change once set so the compiler should reasonably be able to cache >>>> the address of ApicIdMap and dereference it (thus obviating the need for >>>> volatile.) >>> >>> (CPU_HOT_EJECT_DATA.Handler does change though.) >> >> Yeah, I did kinda elide over that. Let me think this through in v7 >> and add more explicit comments and then we can see if it still looks >> fishy? >> >> Thanks >> Ankur >> >>> >>>> The compiler could, however, cache any assignments to ApicIdMap[Idx] >>>> (especially given LTO) and we could use a MemoryFence() (as the compiler >>>> barrier that it is) to force the store. >>>> >>>> - CPU pipeline: as you said, right now we basically depend on x86 >>>> store >>>> order semantics (and the CpuPause() loop around AllCpusInSync, kinda >>>> provides >>>> a barrier.) >>>> >>>> So the BSP writes in this order: >>>> ApicIdMap[Idx]=x; ... ->AllCpusInSync = true >>>> >>>> And whenever the AP sees ->AllCpusInSync == True, it has to now see >>>> ApicIdMap[Idx] == x. >>> >>> Yes. >>> >>>> >>>> Maybe the thing to do is to detail this barrier in a commit >>>> note/comment? >>> >>> That would be nice. >>> >>>> And add the MemoryFence() but not the volatile. >>> >>> Yes, that should work. > > Please *do* add the volatile, and also the MemoryFence(). When built > with Visual Studio, MemoryFence() does nothing at all (at least when LTO > is in effect -- which it almost always is). So we should have the > volatile for making things work, and MemoryFence() as a conceptual > reminder, so we know where to fix up things, when (if!) we come around > fixing this mess with MemoryFence(). Reference: > > https://edk2.groups.io/g/rfc/message/500 > https://edk2.groups.io/g/rfc/message/501 > https://edk2.groups.io/g/rfc/message/502 > https://edk2.groups.io/g/rfc/message/503 Did see it on the thread. Yeah agreed, Visual Studio does necessitate volatile here. Will add. Thanks Ankur > > Thanks! > Laszlo > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71427): https://edk2.groups.io/g/devel/message/71427 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 02/04/21 03:49, Ankur Arora wrote: > On 2021-02-03 12:58 p.m., Laszlo Ersek wrote: >> On 02/03/21 07:45, Ankur Arora wrote: >>> On 2021-02-02 6:15 a.m., Laszlo Ersek wrote: >>>> On 02/02/21 15:00, Laszlo Ersek wrote: >>>> >>>>> ... I guess that volatile-qualifying both CPU_HOT_EJECT_DATA, and the >>>>> array pointed-to by CPU_HOT_EJECT_DATA.ApicIdMap, should suffice. In >>>>> combination with the sync-up point that you quoted. This seems to >>>>> match >>>>> existing practice in PiSmmCpuDxeSmm -- there are no concurrent >>>>> accesses, >>>>> so atomicity is not a concern, and serializing the instruction streams >>>>> coarsely, with the sync-up, in combination with volatile accesses, >>>>> should presumably guarantee visibility (on x86 anyway). >>>> >>>> To summarize, this is what I would ask for: >>>> >>>> - make CPU_HOT_EJECT_DATA volatile >>>> >>>> - make (*CPU_HOT_EJECT_DATA.ApicIdMap) volatile >>>> >>>> - after storing something to CPU_HOT_EJECT_DATA or >>>> CPU_HOT_EJECT_DATA.ApicIdMap on the BSP, execute a MemoryFence() >>>> >>>> - before fetching something from CPU_HOT_EJECT_DATA or >>>> CPU_HOT_EJECT_DATA.ApicIdMap on an AP, execute a MemoryFence() >>>> >>>> >>>> Except: MemoryFence() isn't a *memory fence* in fact. >>>> >>>> See "MdePkg/Library/BaseLib/X64/GccInline.c". >>>> >>>> It's just a compiler barrier, which may not add anything beyond what >>>> we'd already have from "volatile". >>>> >>>> Case in point: PiSmmCpuDxeSmm performs heavy multi-processing, but does >>>> not contain a single invocation of MemoryFence(). It uses volatile >>>> objects, and a handful of InterlockedCompareExchangeXx() calls, for >>>> implementing semaphores. (NB: there is no 8-bit variant of >>>> InterlockedCompareExchange(), as "volatile UINT8" is considered atomic >>>> in itself, and a suitable basis for a sempahore too.) And given the >>>> synchronization from those semaphores, PiSmmCpuDpxeSmm trusts that >>>> updates to the *other* volatile objects are both atomic and visible. >>>> >>>> I'm pretty sure this only works because x86 is in-order. There are >>>> instruction stream barriers in place, and compiler barriers too, but no >>>> actual memory barriers. >>> >>> Right and just to separate them explicitly, there are two problems: >>> >>> - compiler: where the compiler caches stuff in or looks at stale >>> memory >>> locations. Now, AFAICS, this should not happen because the ApicIdMap >>> would >>> never change once set so the compiler should reasonably be able to cache >>> the address of ApicIdMap and dereference it (thus obviating the need for >>> volatile.) >> >> (CPU_HOT_EJECT_DATA.Handler does change though.) > > Yeah, I did kinda elide over that. Let me think this through in v7 > and add more explicit comments and then we can see if it still looks > fishy? OK. (Clearly, I don't want to block progress on this with concerns that are purely theoretical.) Thanks, Laszlo > Thanks > Ankur > >> >>> The compiler could, however, cache any assignments to ApicIdMap[Idx] >>> (especially given LTO) and we could use a MemoryFence() (as the compiler >>> barrier that it is) to force the store. >>> >>> - CPU pipeline: as you said, right now we basically depend on x86 >>> store >>> order semantics (and the CpuPause() loop around AllCpusInSync, kinda >>> provides >>> a barrier.) >>> >>> So the BSP writes in this order: >>> ApicIdMap[Idx]=x; ... ->AllCpusInSync = true >>> >>> And whenever the AP sees ->AllCpusInSync == True, it has to now see >>> ApicIdMap[Idx] == x. >> >> Yes. >> >>> >>> Maybe the thing to do is to detail this barrier in a commit >>> note/comment? >> >> That would be nice. >> >>> And add the MemoryFence() but not the volatile. >> >> Yes, that should work. >> >> Thanks, >> Laszlo >> > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71231): https://edk2.groups.io/g/devel/message/71231 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On 01/29/21 01:59, Ankur Arora wrote: > Add CpuEject(), which handles the CPU ejection, and provides a holding > area for said CPUs. It is called via SmmCpuFeaturesRendezvousExit(), > at the tail end of the SMI handling. (1) The functions introduced thus far by this patch series are all named "Verb + Object", which is great; so please call this function EjectCpu() as well, rather than CpuEject(). Modify all three of: subject line, commit message, patch body; please. > > Also UnplugCpus() now stashes APIC IDs of CPUs which need to be > ejected in CPU_HOT_EJECT_DATA.ApicIdMap. These are used by CpuEject() > to identify such CPUs. > > Cc: Laszlo Ersek <lersek@redhat.com> > Cc: Jordan Justen <jordan.l.justen@intel.com> > Cc: Ard Biesheuvel <ard.biesheuvel@arm.com> > Cc: Igor Mammedov <imammedo@redhat.com> > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> > Cc: Aaron Young <aaron.young@oracle.com> > Ref: https://bugzilla.tianocore.org/show_bug.cgi?id=3132 > Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com> > --- > OvmfPkg/CpuHotplugSmm/CpuHotplug.c | 109 +++++++++++++++++++++++++++++++++++-- > 1 file changed, 105 insertions(+), 4 deletions(-) > > diff --git a/OvmfPkg/CpuHotplugSmm/CpuHotplug.c b/OvmfPkg/CpuHotplugSmm/CpuHotplug.c > index 70d69f6ed65b..526f51faf070 100644 > --- a/OvmfPkg/CpuHotplugSmm/CpuHotplug.c > +++ b/OvmfPkg/CpuHotplugSmm/CpuHotplug.c > @@ -14,6 +14,7 @@ > #include <Library/MmServicesTableLib.h> // gMmst > #include <Library/PcdLib.h> // PcdGetBool() > #include <Library/SafeIntLib.h> // SafeUintnSub() > +#include <Library/CpuHotEjectData.h> // CPU_HOT_EJECT_DATA > #include <Protocol/MmCpuIo.h> // EFI_MM_CPU_IO_PROTOCOL > #include <Protocol/SmmCpuService.h> // EFI_SMM_CPU_SERVICE_PROTOCOL > #include <Uefi/UefiBaseType.h> // EFI_STATUS (2) This will change due to the movement of the header file, but: please keep the #include directive list alphabetically sorted. > @@ -32,11 +33,12 @@ STATIC EFI_MM_CPU_IO_PROTOCOL *mMmCpuIo; > // > STATIC EFI_SMM_CPU_SERVICE_PROTOCOL *mMmCpuService; > // > -// This structure is a communication side-channel between the > +// These structures serve as communication side-channels between the > // EFI_SMM_CPU_SERVICE_PROTOCOL consumer (i.e., this driver) and provider > // (i.e., PiSmmCpuDxeSmm). > // > STATIC CPU_HOT_PLUG_DATA *mCpuHotPlugData; > +STATIC CPU_HOT_EJECT_DATA *mCpuHotEjectData; > // > // SMRAM arrays for fetching the APIC IDs of processors with pending events (of > // known event types), for the time of just one MMI. > @@ -188,11 +190,53 @@ RevokeNewSlot: > } > > /** > + CPU Hot-eject handler, called from SmmCpuFeaturesRendezvousExit(), > + on each CPU at exit from SMM. > + > + If, the executing CPU is not being ejected, nothing to be done. > + If, the executing CPU is being ejected, wait in a CpuDeadLoop() > + until ejected. > + > + @param[in] ProcessorNum Index of executing CPU. > + > +**/ > +VOID > +EFIAPI > +CpuEject ( > + IN UINTN ProcessorNum > + ) > +{ > + // > + // APIC ID is UINT32, but mCpuHotEjectData->ApicIdMap[] is UINT64 > + // so use UINT64 throughout. > + // > + UINT64 ApicId; > + > + ApicId = mCpuHotEjectData->ApicIdMap[ProcessorNum]; > + if (ApicId == CPU_EJECT_INVALID) { > + return; > + } > + > + // > + // CPU(s) being unplugged get here from SmmCpuFeaturesSmiRendezvousExit() > + // after having been cleared to exit the SMI by the monarch and thus have > + // no SMM processing remaining. > + // > + // Given that we cannot allow them to escape to the guest, we pen them > + // here until the SMM monarch tells the HW to unplug them. > + // > + CpuDeadLoop (); > +} (3a) We can make this less resource-hungry, by replacing CpuDeadLoop() with: for (;;) { DisableInterrupts (); CpuSleep (); } This basically translates to a { CLI; HLT; } loop. (Both functions come from BaseLib, which CpuHotplugSmm already consumes, thus there is no need to modify #include's or [LibraryClasses].) (3b) Please refresh the CpuDeadLoop() reference in the function's leading comment as well. > + > +/** > Process to be hot-unplugged CPUs, per QemuCpuhpCollectApicIds(). > > For each such CPU, report the CPU to PiSmmCpuDxeSmm via > - EFI_SMM_CPU_SERVICE_PROTOCOL. If the to be hot-unplugged CPU is > - unknown, skip it silently. > + EFI_SMM_CPU_SERVICE_PROTOCOL and stash the APIC ID for later ejection. > + If the to be hot-unplugged CPU is unknown, skip it silently. > + > + Additonally, if we do stash any APIC IDs, also install a CPU eject handler > + which would handle the ejection. > > @param[in] ToUnplugApicIds The APIC IDs of the CPUs that are about to be > hot-unplugged. > @@ -216,9 +260,11 @@ UnplugCpus ( > { > EFI_STATUS Status; > UINT32 ToUnplugIdx; > + UINT32 EjectCount; > UINTN ProcessorNum; > > ToUnplugIdx = 0; > + EjectCount = 0; > while (ToUnplugIdx < ToUnplugCount) { > APIC_ID RemoveApicId; > > @@ -255,13 +301,41 @@ UnplugCpus ( > DEBUG ((DEBUG_ERROR, "%a: RemoveProcessor(" FMT_APIC_ID "): %r\n", > __FUNCTION__, RemoveApicId, Status)); > goto Fatal; > + } else { (Under patch v6 4/9, I request that the "goto" be replaced with a "return" -- my point (4) below applies regardless:) (4) Please don't add an "else" branch, if the first branch of the "if" ends with a jump statement. Because, in that case, the code that follows the "if" statement is not reachable after the first branch anyway. So please just unnest the next part: > + // > + // Stash the APIC IDs so we can do the actual ejection later. > + // > + if (mCpuHotEjectData->ApicIdMap[ProcessorNum] != CPU_EJECT_INVALID) { > + // > + // Since ProcessorNum and APIC-ID map 1-1, so a valid > + // mCpuHotEjectData->ApicIdMap[ProcessorNum] means something > + // is horribly wrong. > + // (5) To be honest, I would replace this with: // // - mCpuHotEjectData->ApicIdMap[ProcessorNum] is initialized to // CPU_EJECT_INVALID when mCpuHotEjectData->ApicIdMap is allocated, // // - mCpuHotEjectData->ApicIdMap[ProcessorNum] is restored to // CPU_EJECT_INVALID when the subject processor is ejected, // // - mMmCpuService->RemoveProcessor(ProcessorNum) invalidates // mCpuHotPlugData->ApicId[ProcessorNum], so a given ProcessorNum can // never match more than one APIC ID in a single invocation of // UnplugCpus(). // > + DEBUG ((DEBUG_ERROR, "%a: ProcessorNum %u maps to %llx, cannot " > + "map to " FMT_APIC_ID "\n", __FUNCTION__, ProcessorNum, > + mCpuHotEjectData->ApicIdMap[ProcessorNum], RemoveApicId)); (6a) The indentation of the 2nd and 3rd lines is incorrect. (6b) For logging UINTN values (i.e., ProcessorNum) portably between IA32 and X64, %u is not correct. Instead: - cast the UINTN value to UINT64 explicitly, - use the %Lu or %Lx format specifier. (6c) There is no "%llx" format string in edk2's PrintLib (no "ll" length modifier, to be more precise). UINT64 values need to be printed with "%lu" or "%lx", or -- identically -- with "%Lu" or "%Lx". I prefer the latter, because standard C does not define the "L" size modifier for integers, and that makes it clear that we're using an edk2-specific feature. The "l" (ell) length modifier could be misunderstood as "long" (which is something we don't use in edk2). (6d) FMT_APIC_ID is defined as "0x%08x"; to remain consistent with that, I would print the ApicIdMap element not just with "%Lx", but with "0x%016Lx". > + > + Status = EFI_INVALID_PARAMETER; > + goto Fatal; (7a) Please just "return EFI_ALREADY_STARTED". (7b) Please also modify the leading comment on the function -- the new return value EFI_ALREADY_STARTED should be documented. I suggest: @retval EFI_ALREADY_STARTED For the ProcessorNumber that EFI_SMM_CPU_SERVICE_PROTOCOL had assigned to one of the APIC ID in ToUnplugApicIds, mCpuHotEjectData->ApicIdMap already has an APIC ID stashed. (This should never happen.) > + } > + > + mCpuHotEjectData->ApicIdMap[ProcessorNum] = (UINT64)RemoveApicId; > + EjectCount++; > } > > ToUnplugIdx++; > } > > + if (EjectCount != 0) { > + // > + // We have processors to be ejected; install the handler. > + // > + mCpuHotEjectData->Handler = CpuEject; > + } > + (8) I suggest removing the "EjectCount" local variable, and setting the "Handler" member where you currently increment "EjectCount". > // > - // We've removed this set of APIC IDs from SMM data structures. > + // We've removed this set of APIC IDs from SMM data structures and > + // have installed an ejection handler if needed. > // > return EFI_SUCCESS; > > @@ -458,7 +532,13 @@ CpuHotplugEntry ( > // Our DEPEX on EFI_SMM_CPU_SERVICE_PROTOCOL guarantees that PiSmmCpuDxeSmm > // has pointed PcdCpuHotPlugDataAddress to CPU_HOT_PLUG_DATA in SMRAM. > // > + // Additionally, CPU Hot-unplug is available only if CPU Hotplug is, so > + // the same DEPEX also guarantees that PcdCpuHotEjectDataAddress points > + // to CPU_HOT_EJECT_DATA in SMRAM. > + // (9) I don't see the relevance of "hot-unplug depends on hot-plug" here. I recommend the following comment instead: // // Our DEPEX on EFI_SMM_CPU_SERVICE_PROTOCOL guarantees that PiSmmCpuDxeSmm // has pointed: // - PcdCpuHotPlugDataAddress to CPU_HOT_PLUG_DATA in SMRAM, // - PcdCpuHotEjectDataAddress to CPU_HOT_EJECT_DATA in SMRAM, if the // possible CPU count is greater than 1. // > mCpuHotPlugData = (VOID *)(UINTN)PcdGet64 (PcdCpuHotPlugDataAddress); > + mCpuHotEjectData = (VOID *)(UINTN)PcdGet64 (PcdCpuHotEjectDataAddress); > + > if (mCpuHotPlugData == NULL) { > Status = EFI_NOT_FOUND; > DEBUG ((DEBUG_ERROR, "%a: CPU_HOT_PLUG_DATA: %r\n", __FUNCTION__, Status)); > @@ -470,6 +550,9 @@ CpuHotplugEntry ( > if (mCpuHotPlugData->ArrayLength == 1) { > return EFI_UNSUPPORTED; > } > + ASSERT (mCpuHotEjectData && > + (mCpuHotPlugData->ArrayLength == mCpuHotEjectData->ArrayLength)); > + > // > // Allocate the data structures that depend on the possible CPU count. > // (10) To remain consistent with the check performed on "mCpuHotPlugData", please do: if (mCpuHotEjectData == NULL) { Status = EFI_NOT_FOUND; } else if (mCpuHotPlugData->ArrayLength != mCpuHotEjectData->ArrayLength) { Status = EFI_INVALID_PARAMETER; } else { Status = EFI_SUCCESS; } if (EFI_ERROR (Status)) { DEBUG ((DEBUG_ERROR, "%a: CPU_HOT_EJECT_DATA: %r\n", __FUNCTION__, Status)); goto Fatal; } ( As a digression, I'll make some comments on the ASSERT() too: - Given ASSERT ((C1) && (C2)), it is best to express the same as ASSERT (C1); ASSERT (C2); -- the effect is the same, but the error messages have finer granularity. - Checking a pointer against NULL must be explicit at all times, in edk2. IOW, ASSERT (mCpuHotEjectData) should be spelled ASSERT (mCpuHotEjectData != NULL). ) > @@ -552,6 +635,24 @@ CpuHotplugEntry ( > // > SmbaseInstallFirstSmiHandler (); > > + if (mCpuHotEjectData) { (11) This condition is guaranteed to evaluate to TRUE; see the ASSERT() above. Anyway, ignore this... > + UINT32 Idx; (12) Incorrect indentation, but ignore this too... > + // > + // For CPU ejection we need to map ProcessorNum -> APIC_ID. By the time > + // we need the mapping, however, the Processor's APIC ID has already been > + // removed from SMM data structures. So we will maintain a local map > + // in mCpuHotEjectData->ApicIdMap. > + // > + for (Idx = 0; Idx < mCpuHotEjectData->ArrayLength; Idx++) { > + mCpuHotEjectData->ApicIdMap[Idx] = CPU_EJECT_INVALID; > + } (13) ... because this init loop should be moved to patch #6 (subject "OvmfPkg/SmmCpuFeaturesLib: init CPU ejection state"), as I mentioned there... > + > + // > + // Wait to init the handler until an ejection is warranted > + // > + mCpuHotEjectData->Handler = NULL; (14) ... and because this nulling is performed by patch #6 already (subject "OvmfPkg/SmmCpuFeaturesLib: init CPU ejection state"). Therefore, this whole conditional block should be removed please. Thanks! Laszlo > + } > + > return EFI_SUCCESS; > > ReleasePostSmmPen: > -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#71023): https://edk2.groups.io/g/devel/message/71023 Mute This Topic: https://groups.io/mt/80199926/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
© 2016 - 2024 Red Hat, Inc.