OvmfPkg/IoMmuDxe/IoMmuBuffer.c | 100 ++++++++++++-------- OvmfPkg/IoMmuDxe/IoMmuDxe.inf | 1 + 2 files changed, 60 insertions(+), 41 deletions(-)
Instead of relying on raising the TPL to protect the critical sections
that manipulate the global bitmask that keeps track of bounce buffer
allocations, use compare-and-exchange to manage the global variable, and
tweak the logic to line up with that.
Given that IoMmuDxe implements a singleton protocol that is shared
between multiple drivers, and considering the elaborate and confusing
requirements in the UEFP spec regarding TPL levels at which protocol
methods may be invoked, not relying on TPL levels at all is a more
robust approach in this case.
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Jiewen Yao <jiewen.yao@intel.com>
Cc: Michael Brown <mcb30@ipxe.org>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2211060
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
OvmfPkg/IoMmuDxe/IoMmuBuffer.c | 100 ++++++++++++--------
OvmfPkg/IoMmuDxe/IoMmuDxe.inf | 1 +
2 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
index 103003cae376b93f..2764c35044ac23a7 100644
--- a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
+++ b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
@@ -12,6 +12,7 @@
#include <Library/MemEncryptSevLib.h>
#include <Library/MemEncryptTdxLib.h>
#include <Library/PcdLib.h>
+#include <Library/SynchronizationLib.h>
#include <Library/UefiBootServicesTableLib.h>
#include "IoMmuInternal.h"
@@ -268,16 +269,17 @@ InternalAllocateBuffer (
IN EFI_ALLOCATE_TYPE Type,
IN EFI_MEMORY_TYPE MemoryType,
IN UINTN Pages,
- IN OUT UINT32 *ReservedMemBitmap,
+ OUT UINT32 *ReservedMemBit,
IN OUT EFI_PHYSICAL_ADDRESS *PhysicalAddress
)
{
UINT32 MemBitmap;
+ UINT32 ReservedMemBitmap;
UINT8 Index;
IOMMU_RESERVED_MEM_RANGE *MemRange;
UINTN PagesOfLastMemRange;
- *ReservedMemBitmap = 0;
+ *ReservedMemBit = 0;
if (Pages == 0) {
ASSERT (FALSE);
@@ -309,23 +311,31 @@ InternalAllocateBuffer (
MemRange = &mReservedMemRanges[Index];
- if ((mReservedMemBitmap & MemRange->BitmapMask) == MemRange->BitmapMask) {
- // The reserved memory is exausted. Turn to legacy allocate.
- goto LegacyAllocateBuffer;
- }
+ do {
+ ReservedMemBitmap = mReservedMemBitmap;
- MemBitmap = (mReservedMemBitmap & MemRange->BitmapMask) >> MemRange->Shift;
+ if ((ReservedMemBitmap & MemRange->BitmapMask) == MemRange->BitmapMask) {
+ // The reserved memory is exhausted. Turn to legacy allocate.
+ goto LegacyAllocateBuffer;
+ }
+
+ MemBitmap = (ReservedMemBitmap & MemRange->BitmapMask) >> MemRange->Shift;
- for (Index = 0; Index < MemRange->Slots; Index++) {
- if ((MemBitmap & (UINT8)(1<<Index)) == 0) {
- break;
+ for (Index = 0; Index < MemRange->Slots; Index++) {
+ if ((MemBitmap & (UINT8)(1<<Index)) == 0) {
+ break;
+ }
}
- }
- ASSERT (Index != MemRange->Slots);
+ ASSERT (Index != MemRange->Slots);
- *PhysicalAddress = MemRange->StartAddressOfMemRange + Index * SIZE_OF_MEM_RANGE (MemRange) + MemRange->HeaderSize;
- *ReservedMemBitmap = (UINT32)(1 << (Index + MemRange->Shift));
+ *PhysicalAddress = MemRange->StartAddressOfMemRange + Index * SIZE_OF_MEM_RANGE (MemRange) + MemRange->HeaderSize;
+ *ReservedMemBit = (UINT32)(1 << (Index + MemRange->Shift));
+ } while (ReservedMemBitmap != InterlockedCompareExchange32 (
+ &mReservedMemBitmap,
+ ReservedMemBitmap,
+ ReservedMemBitmap | *ReservedMemBit
+ ));
DEBUG ((
DEBUG_VERBOSE,
@@ -334,16 +344,16 @@ InternalAllocateBuffer (
MemRange->DataSize,
*PhysicalAddress,
Pages,
- *ReservedMemBitmap,
- mReservedMemBitmap,
- mReservedMemBitmap | *ReservedMemBitmap
+ *ReservedMemBit,
+ ReservedMemBitmap,
+ ReservedMemBitmap | *ReservedMemBit
));
return EFI_SUCCESS;
LegacyAllocateBuffer:
- *ReservedMemBitmap = 0;
+ *ReservedMemBit = 0;
return gBS->AllocatePages (Type, MemoryType, Pages, PhysicalAddress);
}
@@ -366,27 +376,41 @@ IoMmuAllocateBounceBuffer (
)
{
EFI_STATUS Status;
- UINT32 ReservedMemBitmap;
- EFI_TPL OldTpl;
-
- OldTpl = gBS->RaiseTPL (TPL_NOTIFY);
- ReservedMemBitmap = 0;
- Status = InternalAllocateBuffer (
- Type,
- MemoryType,
- MapInfo->NumberOfPages,
- &ReservedMemBitmap,
- &MapInfo->PlainTextAddress
- );
- MapInfo->ReservedMemBitmap = ReservedMemBitmap;
- mReservedMemBitmap |= ReservedMemBitmap;
- gBS->RestoreTPL (OldTpl);
+ Status = InternalAllocateBuffer (
+ Type,
+ MemoryType,
+ MapInfo->NumberOfPages,
+ &MapInfo->ReservedMemBitmap,
+ &MapInfo->PlainTextAddress
+ );
ASSERT (Status == EFI_SUCCESS);
return Status;
}
+/**
+ * Clear a bit in the reserved memory bitmap in a thread safe manner
+ *
+ * @param ReservedMemBit The bit to clear
+ */
+STATIC
+VOID
+ClearReservedMemBit (
+ IN UINT32 ReservedMemBit
+ )
+{
+ UINT32 ReservedMemBitmap;
+
+ do {
+ ReservedMemBitmap = mReservedMemBitmap;
+ } while (ReservedMemBitmap != InterlockedCompareExchange32 (
+ &mReservedMemBitmap,
+ ReservedMemBitmap,
+ ReservedMemBitmap & ~ReservedMemBit
+ ));
+}
+
/**
* Free the bounce buffer allocated in IoMmuAllocateBounceBuffer.
*
@@ -398,8 +422,6 @@ IoMmuFreeBounceBuffer (
IN OUT MAP_INFO *MapInfo
)
{
- EFI_TPL OldTpl;
-
if (MapInfo->ReservedMemBitmap == 0) {
gBS->FreePages (MapInfo->PlainTextAddress, MapInfo->NumberOfPages);
} else {
@@ -412,11 +434,9 @@ IoMmuFreeBounceBuffer (
mReservedMemBitmap,
mReservedMemBitmap & ((UINT32)(~MapInfo->ReservedMemBitmap))
));
- OldTpl = gBS->RaiseTPL (TPL_NOTIFY);
MapInfo->PlainTextAddress = 0;
- mReservedMemBitmap &= (UINT32)(~MapInfo->ReservedMemBitmap);
+ ClearReservedMemBit (MapInfo->ReservedMemBitmap);
MapInfo->ReservedMemBitmap = 0;
- gBS->RestoreTPL (OldTpl);
}
return EFI_SUCCESS;
@@ -452,8 +472,6 @@ IoMmuAllocateCommonBuffer (
);
ASSERT (Status == EFI_SUCCESS);
- mReservedMemBitmap |= *ReservedMemBitmap;
-
if (*ReservedMemBitmap != 0) {
*PhysicalAddress -= SIZE_4KB;
}
@@ -494,7 +512,7 @@ IoMmuFreeCommonBuffer (
mReservedMemBitmap & ((UINT32)(~CommonBufferHeader->ReservedMemBitmap))
));
- mReservedMemBitmap &= (UINT32)(~CommonBufferHeader->ReservedMemBitmap);
+ ClearReservedMemBit (CommonBufferHeader->ReservedMemBitmap);
return EFI_SUCCESS;
LegacyFreeCommonBuffer:
diff --git a/OvmfPkg/IoMmuDxe/IoMmuDxe.inf b/OvmfPkg/IoMmuDxe/IoMmuDxe.inf
index 17fca52856925da0..d08f7e59e2b665f4 100644
--- a/OvmfPkg/IoMmuDxe/IoMmuDxe.inf
+++ b/OvmfPkg/IoMmuDxe/IoMmuDxe.inf
@@ -35,6 +35,7 @@ [LibraryClasses]
MemEncryptSevLib
MemEncryptTdxLib
MemoryAllocationLib
+ SynchronizationLib
UefiBootServicesTableLib
UefiDriverEntryPoint
--
2.39.2
-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107100): https://edk2.groups.io/g/devel/message/107100
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-
On Thu, Jul 20, 2023 at 03:45:57PM +0200, Ard Biesheuvel wrote: > Instead of relying on raising the TPL to protect the critical sections > that manipulate the global bitmask that keeps track of bounce buffer > allocations, use compare-and-exchange to manage the global variable, and > tweak the logic to line up with that. > > Given that IoMmuDxe implements a singleton protocol that is shared > between multiple drivers, and considering the elaborate and confusing > requirements in the UEFP spec regarding TPL levels at which protocol > methods may be invoked, not relying on TPL levels at all is a more > robust approach in this case. > > Cc: Gerd Hoffmann <kraxel@redhat.com> > Cc: Jiewen Yao <jiewen.yao@intel.com> > Cc: Michael Brown <mcb30@ipxe.org> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2211060 > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Looks good to me. What is the status? Merged? Or waiting for testing still? If so I can create a test build with the patch and ask our QE department to check it. take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107946): https://edk2.groups.io/g/devel/message/107946 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote: > > On Thu, Jul 20, 2023 at 03:45:57PM +0200, Ard Biesheuvel wrote: > > Instead of relying on raising the TPL to protect the critical sections > > that manipulate the global bitmask that keeps track of bounce buffer > > allocations, use compare-and-exchange to manage the global variable, and > > tweak the logic to line up with that. > > > > Given that IoMmuDxe implements a singleton protocol that is shared > > between multiple drivers, and considering the elaborate and confusing > > requirements in the UEFP spec regarding TPL levels at which protocol > > methods may be invoked, not relying on TPL levels at all is a more > > robust approach in this case. > > > > Cc: Gerd Hoffmann <kraxel@redhat.com> > > Cc: Jiewen Yao <jiewen.yao@intel.com> > > Cc: Michael Brown <mcb30@ipxe.org> > > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2211060 > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > > Looks good to me. > > What is the status? Merged? Or waiting for testing still? If so I can > create a test build with the patch and ask our QE department to check > it. > Still waiting for testing, so yes, please test. -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107948): https://edk2.groups.io/g/devel/message/107948 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote: > On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > Looks good to me. > > > > What is the status? Merged? Or waiting for testing still? If so I can > > create a test build with the patch and ask our QE department to check > > it. > > > > Still waiting for testing, so yes, please test. Hmm, QE reports back it slows down the boot alot. No boot hangs yet with 12 test runs so far, which isn't that much for a reproduce rate below 20% ... https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28 So I guess we go with the TPL version for the coming stable tag and leave any improvements for later ... take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107976): https://edk2.groups.io/g/devel/message/107976 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote: > > On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote: > > On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > > > Looks good to me. > > > > > > What is the status? Merged? Or waiting for testing still? If so I can > > > create a test build with the patch and ask our QE department to check > > > it. > > > > > > > Still waiting for testing, so yes, please test. > > Hmm, QE reports back it slows down the boot alot. No boot hangs yet > with 12 test runs so far, which isn't that much for a reproduce rate > below 20% ... > > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28 > > So I guess we go with the TPL version for the coming stable tag and > leave any improvements for later ... > Yeah, this was not going to make the stable tag in any case. The boot speed regression seems odd, though - this is effectively UP code so there shouldn't be any contention, the only thing this patch does is ensure that the critical section is restarted if it was interrupted Are we comparing apples with apples here? -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107980): https://edk2.groups.io/g/devel/message/107980 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Wed, Aug 23, 2023 at 4:12 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote: > > > On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > > > > > Looks good to me. > > > > > > > > What is the status? Merged? Or waiting for testing still? If so I can > > > > create a test build with the patch and ask our QE department to check > > > > it. > > > > > > > > > > Still waiting for testing, so yes, please test. > > > > Hmm, QE reports back it slows down the boot alot. No boot hangs yet > > with 12 test runs so far, which isn't that much for a reproduce rate > > below 20% ... > > > > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28 > > > > So I guess we go with the TPL version for the coming stable tag and > > leave any improvements for later ... > > > > Yeah, this was not going to make the stable tag in any case. > > The boot speed regression seems odd, though - this is effectively UP > code so there shouldn't be any contention, the only thing this patch > does is ensure that the critical section is restarted if it was > interrupted FWIW: Given completely correct logic, straightforward logic, a lock cmpxchg is much slower(3-4x) than an a non-lock cmpxchg, which itself is around 2x as slow as a regular relaxed load + store. See https://gist.github.com/heatd/49c9be23ccb1f4ad8dfeac231da2647a for a nice fun test benchmark. HOWEVER, given this is likely in IO paths, I would *really* not expect this to make a difference, right? Even virtio drivers will themselves trap with a VMEXIT whenever you touch a hardware register... Gerd, could you folks get a perf kvm (perf-kvm(1)) recording out of that OVMF build? Assuming you can get that thing to work, that is, personally it mysteriously stopped working 6 years ago for me :) -- Pedro -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107988): https://edk2.groups.io/g/devel/message/107988 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Wed, Aug 23, 2023 at 07:10:52PM +0100, Pedro Falcato wrote: > On Wed, Aug 23, 2023 at 4:12 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > > > Hmm, QE reports back it slows down the boot alot. No boot hangs yet > > > with 12 test runs so far, which isn't that much for a reproduce rate > > > below 20% ... > > > > > > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28 > > > > > > So I guess we go with the TPL version for the coming stable tag and > > > leave any improvements for later ... > > > > Yeah, this was not going to make the stable tag in any case. > > > > The boot speed regression seems odd, though - this is effectively UP > > code so there shouldn't be any contention, the only thing this patch > > does is ensure that the critical section is restarted if it was > > interrupted QE reported back boot times without this patch are ~20-30 seconds, with this patch it can be more than 3 minutes. > FWIW: Given completely correct logic, straightforward logic, a lock > cmpxchg is much slower(3-4x) than an a non-lock cmpxchg, which itself > is around 2x as slow as a regular relaxed load + store. > See https://gist.github.com/heatd/49c9be23ccb1f4ad8dfeac231da2647a for > a nice fun test benchmark. Also note that IoMmuDxe is only used in case memory encryption is enabled (I/O uses unencrypted bounce buffers so the host can virtio emulation can read/write stuff there). Maybe that affects performance too. Cc'ing the AMD people for comments on that. > HOWEVER, given this is likely in IO paths, I would *really* not expect > this to make a difference, right? Even virtio drivers will themselves > trap with a VMEXIT whenever you touch a hardware register... It's only a single VMEXIT per I/O request (ring the doorbell after adding a request to the ring), but still, this is heavy enough that the cmpxchg difference should be in the noise. > Gerd, could you folks get a perf kvm (perf-kvm(1)) recording out of > that OVMF build? Assuming you can get that thing to work, that is, > personally it mysteriously stopped working 6 years ago for me :) I can try hack IoMmuDxe so it is used unconditionally and try reproduce locally (without sev-capable hardware), but most likely not this week. take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#108000): https://edk2.groups.io/g/devel/message/108000 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Thu, 24 Aug 2023 at 10:06, Gerd Hoffmann <kraxel@redhat.com> wrote: > > On Wed, Aug 23, 2023 at 07:10:52PM +0100, Pedro Falcato wrote: > > On Wed, Aug 23, 2023 at 4:12 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > > > > > Hmm, QE reports back it slows down the boot alot. No boot hangs yet > > > > with 12 test runs so far, which isn't that much for a reproduce rate > > > > below 20% ... > > > > > > > > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28 > > > > > > > > So I guess we go with the TPL version for the coming stable tag and > > > > leave any improvements for later ... > > > > > > Yeah, this was not going to make the stable tag in any case. > > > > > > The boot speed regression seems odd, though - this is effectively UP > > > code so there shouldn't be any contention, the only thing this patch > > > does is ensure that the critical section is restarted if it was > > > interrupted > > QE reported back boot times without this patch are ~20-30 seconds, > with this patch it can be more than 3 minutes. > > > FWIW: Given completely correct logic, straightforward logic, a lock > > cmpxchg is much slower(3-4x) than an a non-lock cmpxchg, which itself > > is around 2x as slow as a regular relaxed load + store. > > See https://gist.github.com/heatd/49c9be23ccb1f4ad8dfeac231da2647a for > > a nice fun test benchmark. > > Also note that IoMmuDxe is only used in case memory encryption is > enabled (I/O uses unencrypted bounce buffers so the host can virtio > emulation can read/write stuff there). Maybe that affects performance > too. > > Cc'ing the AMD people for comments on that. > > > HOWEVER, given this is likely in IO paths, I would *really* not expect > > this to make a difference, right? Even virtio drivers will themselves > > trap with a VMEXIT whenever you touch a hardware register... > > It's only a single VMEXIT per I/O request (ring the doorbell after > adding a request to the ring), but still, this is heavy enough that > the cmpxchg difference should be in the noise. > > > Gerd, could you folks get a perf kvm (perf-kvm(1)) recording out of > > that OVMF build? Assuming you can get that thing to work, that is, > > personally it mysteriously stopped working 6 years ago for me :) > > I can try hack IoMmuDxe so it is used unconditionally and try reproduce > locally (without sev-capable hardware), but most likely not this week. > I have tried the patch below, and I don't see any slowdowns with or without the patch, running both DEBUG and RELEASE builds under ordinary KVM/nested-virt. Note that the change in the first hunk will cause the ASSERT()s removed in the other hunks to trigger so the code is definitely being exercised. diff --git a/OvmfPkg/IoMmuDxe/IoMmuDxe.c b/OvmfPkg/IoMmuDxe/IoMmuDxe.c index aab6d8b90687..c1082b733d3a 100644 --- a/OvmfPkg/IoMmuDxe/IoMmuDxe.c +++ b/OvmfPkg/IoMmuDxe/IoMmuDxe.c @@ -25,7 +25,7 @@ IoMmuDxeEntryPoint ( // When SEV or TDX is enabled, install IoMmu protocol otherwise install the // placeholder protocol so that other dependent module can run. // - if (MemEncryptSevIsEnabled () || MemEncryptTdxIsEnabled ()) { + if (TRUE || MemEncryptSevIsEnabled () || MemEncryptTdxIsEnabled ()) { Status = InstallIoMmuProtocol (); } else { Handle = NULL; diff --git a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c index 2764c35044ac..27fe068362ff 100644 --- a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c +++ b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c @@ -184,7 +184,7 @@ IoMmuInitReservedSharedMem ( ); ASSERT (!EFI_ERROR (Status)); } else { - ASSERT (FALSE); + //ASSERT (FALSE); } } @@ -233,7 +233,7 @@ IoMmuReleaseReservedSharedMem ( ); ASSERT (!EFI_ERROR (Status)); } else { - ASSERT (FALSE); + //ASSERT (FALSE); } } } -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#108002): https://edk2.groups.io/g/devel/message/108002 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
Hi, > I have tried the patch below, and I don't see any slowdowns with or > without the patch, running both DEBUG and RELEASE builds under > ordinary KVM/nested-virt. Note that the change in the first hunk will > cause the ASSERT()s removed in the other hunks to trigger so the code > is definitely being exercised. QE team tested again with fresh builds, the slowdown isn't there any more. Not fully clear where it came from, but most likely not from this patch. Only 20 successful test runs so far, which a good start, but with a reproduce rate below 20% not that much. I'll post an update when it I get more results. take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#108054): https://edk2.groups.io/g/devel/message/108054 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote: > > Hi, > > > I have tried the patch below, and I don't see any slowdowns with or > > without the patch, running both DEBUG and RELEASE builds under > > ordinary KVM/nested-virt. Note that the change in the first hunk will > > cause the ASSERT()s removed in the other hunks to trigger so the code > > is definitely being exercised. > > QE team tested again with fresh builds, the slowdown isn't there any > more. Not fully clear where it came from, but most likely not from > this patch. > > Only 20 successful test runs so far, which a good start, but with > a reproduce rate below 20% not that much. I'll post an update > when it I get more results. > Excellent, thanks for the update. -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#108057): https://edk2.groups.io/g/devel/message/108057 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Mon, 28 Aug 2023 at 13:13, Ard Biesheuvel <ardb@kernel.org> wrote: > > On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > Hi, > > > > > I have tried the patch below, and I don't see any slowdowns with or > > > without the patch, running both DEBUG and RELEASE builds under > > > ordinary KVM/nested-virt. Note that the change in the first hunk will > > > cause the ASSERT()s removed in the other hunks to trigger so the code > > > is definitely being exercised. > > > > QE team tested again with fresh builds, the slowdown isn't there any > > more. Not fully clear where it came from, but most likely not from > > this patch. > > > > Only 20 successful test runs so far, which a good start, but with > > a reproduce rate below 20% not that much. I'll post an update > > when it I get more results. > > > > Excellent, thanks for the update. Given that the QE engineer appears to have lost interest, I intend to merge this patch as-is unless anyone feels this is premature. -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#108197): https://edk2.groups.io/g/devel/message/108197 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Thu, Aug 31, 2023 at 06:01:56PM +0200, Ard Biesheuvel wrote: > On Mon, 28 Aug 2023 at 13:13, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > > > Hi, > > > > > > > I have tried the patch below, and I don't see any slowdowns with or > > > > without the patch, running both DEBUG and RELEASE builds under > > > > ordinary KVM/nested-virt. Note that the change in the first hunk will > > > > cause the ASSERT()s removed in the other hunks to trigger so the code > > > > is definitely being exercised. > > > > > > QE team tested again with fresh builds, the slowdown isn't there any > > > more. Not fully clear where it came from, but most likely not from > > > this patch. > > > > > > Only 20 successful test runs so far, which a good start, but with > > > a reproduce rate below 20% not that much. I'll post an update > > > when it I get more results. > > > > > > > Excellent, thanks for the update. > > Given that the QE engineer appears to have lost interest, I intend to > merge this patch as-is unless anyone feels this is premature. I was offline a few days. QE actually reported back: 100 test runs without problems. Tested-by: Gerd Hoffmann <kraxel@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#108256): https://edk2.groups.io/g/devel/message/108256 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Mon, 4 Sept 2023 at 13:45, Gerd Hoffmann <kraxel@redhat.com> wrote: > > On Thu, Aug 31, 2023 at 06:01:56PM +0200, Ard Biesheuvel wrote: > > On Mon, 28 Aug 2023 at 13:13, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > > > > > Hi, > > > > > > > > > I have tried the patch below, and I don't see any slowdowns with or > > > > > without the patch, running both DEBUG and RELEASE builds under > > > > > ordinary KVM/nested-virt. Note that the change in the first hunk will > > > > > cause the ASSERT()s removed in the other hunks to trigger so the code > > > > > is definitely being exercised. > > > > > > > > QE team tested again with fresh builds, the slowdown isn't there any > > > > more. Not fully clear where it came from, but most likely not from > > > > this patch. > > > > > > > > Only 20 successful test runs so far, which a good start, but with > > > > a reproduce rate below 20% not that much. I'll post an update > > > > when it I get more results. > > > > > > > > > > Excellent, thanks for the update. > > > > Given that the QE engineer appears to have lost interest, I intend to > > merge this patch as-is unless anyone feels this is premature. > > I was offline a few days. QE actually reported back: 100 test runs > without problems. > > Tested-by: Gerd Hoffmann <kraxel@redhat.com> > Acked-by: Gerd Hoffmann <kraxel@redhat.com> > Thanks for reporting back. This is merged now. -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#108257): https://edk2.groups.io/g/devel/message/108257 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Wed, Aug 23, 2023 at 05:11:49PM +0200, Ard Biesheuvel wrote: > On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote: > > > On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote: > > > > > > > > Looks good to me. > > > > > > > > What is the status? Merged? Or waiting for testing still? If so I can > > > > create a test build with the patch and ask our QE department to check > > > > it. > > > > > > > > > > Still waiting for testing, so yes, please test. > > > > Hmm, QE reports back it slows down the boot alot. No boot hangs yet > > with 12 test runs so far, which isn't that much for a reproduce rate > > below 20% ... > > > > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28 > > > > So I guess we go with the TPL version for the coming stable tag and > > leave any improvements for later ... > > > > Yeah, this was not going to make the stable tag in any case. > > The boot speed regression seems odd, though - this is effectively UP > code so there shouldn't be any contention, the only thing this patch > does is ensure that the critical section is restarted if it was > interrupted > > Are we comparing apples with apples here? Hoping for an answer tomorrow, asked QE folks the same thing in comment 30. The first test build (comment 11) essentially is stable-2023-05 with a52044a9e602 ("OvmfPkg/IoMmuDxe: add locking to IoMmuAllocateBounceBuffer") cherry-picked, the second test build (comment 27) is stable-2023-05 + a52044a9e602 + this patch. take care, Gerd -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107986): https://edk2.groups.io/g/devel/message/107986 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Thu, Jul 20, 2023 at 2:46 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > Instead of relying on raising the TPL to protect the critical sections > that manipulate the global bitmask that keeps track of bounce buffer > allocations, use compare-and-exchange to manage the global variable, and > tweak the logic to line up with that. > > Given that IoMmuDxe implements a singleton protocol that is shared > between multiple drivers, and considering the elaborate and confusing > requirements in the UEFP spec regarding TPL levels at which protocol > methods may be invoked, not relying on TPL levels at all is a more Really good change (I had thought of this when reading through the other IoMmuDxe thread), but I'm wondering if this warrants the addition of bit ops (AND, OR at least) to SynchronizationLib? Sidenote: the compiler can detect if atomic intrinsics use the "fetch" value in (fetch_or, or_fetch, etc) and thus use smaller, more efficient instruction sequences (https://godbolt.org/z/zWTTWacEd). if only we could use those... -- Pedro -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107134): https://edk2.groups.io/g/devel/message/107134 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Sat, 22 Jul 2023 at 00:55, Pedro Falcato <pedro.falcato@gmail.com> wrote: > > On Thu, Jul 20, 2023 at 2:46 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > Instead of relying on raising the TPL to protect the critical sections > > that manipulate the global bitmask that keeps track of bounce buffer > > allocations, use compare-and-exchange to manage the global variable, and > > tweak the logic to line up with that. > > > > Given that IoMmuDxe implements a singleton protocol that is shared > > between multiple drivers, and considering the elaborate and confusing > > requirements in the UEFP spec regarding TPL levels at which protocol > > methods may be invoked, not relying on TPL levels at all is a more > > Really good change (I had thought of this when reading through the > other IoMmuDxe thread), but I'm wondering if this warrants the > addition of bit ops (AND, OR at least) to SynchronizationLib? > Thanks. Atomic AND/OR would only help on the free path in this case. > Sidenote: the compiler can detect if atomic intrinsics use the "fetch" > value in (fetch_or, or_fetch, etc) and thus use smaller, more > efficient instruction sequences (https://godbolt.org/z/zWTTWacEd). if > only we could use those... > In principle, I'd agree but these are rarely used so I'm not sure it's worth the effort refactoring this. -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107171): https://edk2.groups.io/g/devel/message/107171 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
On Mon, Jul 24, 2023 at 3:21 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Sat, 22 Jul 2023 at 00:55, Pedro Falcato <pedro.falcato@gmail.com> wrote: > > > > On Thu, Jul 20, 2023 at 2:46 PM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > Instead of relying on raising the TPL to protect the critical sections > > > that manipulate the global bitmask that keeps track of bounce buffer > > > allocations, use compare-and-exchange to manage the global variable, and > > > tweak the logic to line up with that. > > > > > > Given that IoMmuDxe implements a singleton protocol that is shared > > > between multiple drivers, and considering the elaborate and confusing > > > requirements in the UEFP spec regarding TPL levels at which protocol > > > methods may be invoked, not relying on TPL levels at all is a more > > > > Really good change (I had thought of this when reading through the > > other IoMmuDxe thread), but I'm wondering if this warrants the > > addition of bit ops (AND, OR at least) to SynchronizationLib? > > > > Thanks. > > Atomic AND/OR would only help on the free path in this case. > > > Sidenote: the compiler can detect if atomic intrinsics use the "fetch" > > value in (fetch_or, or_fetch, etc) and thus use smaller, more > > efficient instruction sequences (https://godbolt.org/z/zWTTWacEd). if > > only we could use those... > > > > In principle, I'd agree but these are rarely used so I'm not sure it's > worth the effort refactoring this. Good point, let's skip any extensive refactoring for now. Acked-by: Pedro Falcato <pedro.falcato@gmail.com> -- Pedro -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#107184): https://edk2.groups.io/g/devel/message/107184 Mute This Topic: https://groups.io/mt/100256049/1787277 Group Owner: devel+owner@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org] -=-=-=-=-=-=-=-=-=-=-=-
© 2016 - 2024 Red Hat, Inc.