OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

[edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 9 months, 2 weeks ago

Instead of relying on raising the TPL to protect the critical sections
that manipulate the global bitmask that keeps track of bounce buffer
allocations, use compare-and-exchange to manage the global variable, and
tweak the logic to line up with that.

Given that IoMmuDxe implements a singleton protocol that is shared
between multiple drivers, and considering the elaborate and confusing
requirements in the UEFP spec regarding TPL levels at which protocol
methods may be invoked, not relying on TPL levels at all is a more
robust approach in this case.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Jiewen Yao <jiewen.yao@intel.com>
Cc: Michael Brown <mcb30@ipxe.org>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2211060
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 OvmfPkg/IoMmuDxe/IoMmuBuffer.c | 100 ++++++++++++--------
 OvmfPkg/IoMmuDxe/IoMmuDxe.inf  |   1 +
 2 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
index 103003cae376b93f..2764c35044ac23a7 100644
--- a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
+++ b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
@@ -12,6 +12,7 @@
 #include <Library/MemEncryptSevLib.h>
 #include <Library/MemEncryptTdxLib.h>
 #include <Library/PcdLib.h>
+#include <Library/SynchronizationLib.h>
 #include <Library/UefiBootServicesTableLib.h>
 #include "IoMmuInternal.h"
 
@@ -268,16 +269,17 @@ InternalAllocateBuffer (
   IN  EFI_ALLOCATE_TYPE        Type,
   IN  EFI_MEMORY_TYPE          MemoryType,
   IN  UINTN                    Pages,
-  IN OUT UINT32                *ReservedMemBitmap,
+  OUT UINT32                   *ReservedMemBit,
   IN OUT EFI_PHYSICAL_ADDRESS  *PhysicalAddress
   )
 {
   UINT32                    MemBitmap;
+  UINT32                    ReservedMemBitmap;
   UINT8                     Index;
   IOMMU_RESERVED_MEM_RANGE  *MemRange;
   UINTN                     PagesOfLastMemRange;
 
-  *ReservedMemBitmap = 0;
+  *ReservedMemBit = 0;
 
   if (Pages == 0) {
     ASSERT (FALSE);
@@ -309,23 +311,31 @@ InternalAllocateBuffer (
 
   MemRange = &mReservedMemRanges[Index];
 
-  if ((mReservedMemBitmap & MemRange->BitmapMask) == MemRange->BitmapMask) {
-    // The reserved memory is exausted. Turn to legacy allocate.
-    goto LegacyAllocateBuffer;
-  }
+  do {
+    ReservedMemBitmap = mReservedMemBitmap;
 
-  MemBitmap = (mReservedMemBitmap & MemRange->BitmapMask) >> MemRange->Shift;
+    if ((ReservedMemBitmap & MemRange->BitmapMask) == MemRange->BitmapMask) {
+      // The reserved memory is exhausted. Turn to legacy allocate.
+      goto LegacyAllocateBuffer;
+    }
+
+    MemBitmap = (ReservedMemBitmap & MemRange->BitmapMask) >> MemRange->Shift;
 
-  for (Index = 0; Index < MemRange->Slots; Index++) {
-    if ((MemBitmap & (UINT8)(1<<Index)) == 0) {
-      break;
+    for (Index = 0; Index < MemRange->Slots; Index++) {
+      if ((MemBitmap & (UINT8)(1<<Index)) == 0) {
+        break;
+      }
     }
-  }
 
-  ASSERT (Index != MemRange->Slots);
+    ASSERT (Index != MemRange->Slots);
 
-  *PhysicalAddress   = MemRange->StartAddressOfMemRange + Index * SIZE_OF_MEM_RANGE (MemRange) + MemRange->HeaderSize;
-  *ReservedMemBitmap = (UINT32)(1 << (Index + MemRange->Shift));
+    *PhysicalAddress = MemRange->StartAddressOfMemRange + Index * SIZE_OF_MEM_RANGE (MemRange) + MemRange->HeaderSize;
+    *ReservedMemBit  = (UINT32)(1 << (Index + MemRange->Shift));
+  } while (ReservedMemBitmap != InterlockedCompareExchange32 (
+                                  &mReservedMemBitmap,
+                                  ReservedMemBitmap,
+                                  ReservedMemBitmap | *ReservedMemBit
+                                  ));
 
   DEBUG ((
     DEBUG_VERBOSE,
@@ -334,16 +344,16 @@ InternalAllocateBuffer (
     MemRange->DataSize,
     *PhysicalAddress,
     Pages,
-    *ReservedMemBitmap,
-    mReservedMemBitmap,
-    mReservedMemBitmap | *ReservedMemBitmap
+    *ReservedMemBit,
+    ReservedMemBitmap,
+    ReservedMemBitmap | *ReservedMemBit
     ));
 
   return EFI_SUCCESS;
 
 LegacyAllocateBuffer:
 
-  *ReservedMemBitmap = 0;
+  *ReservedMemBit = 0;
   return gBS->AllocatePages (Type, MemoryType, Pages, PhysicalAddress);
 }
 
@@ -366,27 +376,41 @@ IoMmuAllocateBounceBuffer (
   )
 {
   EFI_STATUS  Status;
-  UINT32      ReservedMemBitmap;
-  EFI_TPL     OldTpl;
-
-  OldTpl            = gBS->RaiseTPL (TPL_NOTIFY);
-  ReservedMemBitmap = 0;
-  Status            = InternalAllocateBuffer (
-                        Type,
-                        MemoryType,
-                        MapInfo->NumberOfPages,
-                        &ReservedMemBitmap,
-                        &MapInfo->PlainTextAddress
-                        );
-  MapInfo->ReservedMemBitmap = ReservedMemBitmap;
-  mReservedMemBitmap        |= ReservedMemBitmap;
-  gBS->RestoreTPL (OldTpl);
 
+  Status = InternalAllocateBuffer (
+             Type,
+             MemoryType,
+             MapInfo->NumberOfPages,
+             &MapInfo->ReservedMemBitmap,
+             &MapInfo->PlainTextAddress
+             );
   ASSERT (Status == EFI_SUCCESS);
 
   return Status;
 }
 
+/**
+ * Clear a bit in the reserved memory bitmap in a thread safe manner
+ *
+ * @param ReservedMemBit  The bit to clear
+ */
+STATIC
+VOID
+ClearReservedMemBit (
+  IN  UINT32  ReservedMemBit
+  )
+{
+  UINT32  ReservedMemBitmap;
+
+  do {
+    ReservedMemBitmap = mReservedMemBitmap;
+  } while (ReservedMemBitmap != InterlockedCompareExchange32 (
+                                  &mReservedMemBitmap,
+                                  ReservedMemBitmap,
+                                  ReservedMemBitmap & ~ReservedMemBit
+                                  ));
+}
+
 /**
  * Free the bounce buffer allocated in IoMmuAllocateBounceBuffer.
  *
@@ -398,8 +422,6 @@ IoMmuFreeBounceBuffer (
   IN OUT     MAP_INFO  *MapInfo
   )
 {
-  EFI_TPL  OldTpl;
-
   if (MapInfo->ReservedMemBitmap == 0) {
     gBS->FreePages (MapInfo->PlainTextAddress, MapInfo->NumberOfPages);
   } else {
@@ -412,11 +434,9 @@ IoMmuFreeBounceBuffer (
       mReservedMemBitmap,
       mReservedMemBitmap & ((UINT32)(~MapInfo->ReservedMemBitmap))
       ));
-    OldTpl                     = gBS->RaiseTPL (TPL_NOTIFY);
     MapInfo->PlainTextAddress  = 0;
-    mReservedMemBitmap        &= (UINT32)(~MapInfo->ReservedMemBitmap);
+    ClearReservedMemBit (MapInfo->ReservedMemBitmap);
     MapInfo->ReservedMemBitmap = 0;
-    gBS->RestoreTPL (OldTpl);
   }
 
   return EFI_SUCCESS;
@@ -452,8 +472,6 @@ IoMmuAllocateCommonBuffer (
              );
   ASSERT (Status == EFI_SUCCESS);
 
-  mReservedMemBitmap |= *ReservedMemBitmap;
-
   if (*ReservedMemBitmap != 0) {
     *PhysicalAddress -= SIZE_4KB;
   }
@@ -494,7 +512,7 @@ IoMmuFreeCommonBuffer (
     mReservedMemBitmap & ((UINT32)(~CommonBufferHeader->ReservedMemBitmap))
     ));
 
-  mReservedMemBitmap &= (UINT32)(~CommonBufferHeader->ReservedMemBitmap);
+  ClearReservedMemBit (CommonBufferHeader->ReservedMemBitmap);
   return EFI_SUCCESS;
 
 LegacyFreeCommonBuffer:
diff --git a/OvmfPkg/IoMmuDxe/IoMmuDxe.inf b/OvmfPkg/IoMmuDxe/IoMmuDxe.inf
index 17fca52856925da0..d08f7e59e2b665f4 100644
--- a/OvmfPkg/IoMmuDxe/IoMmuDxe.inf
+++ b/OvmfPkg/IoMmuDxe/IoMmuDxe.inf
@@ -35,6 +35,7 @@ [LibraryClasses]
   MemEncryptSevLib
   MemEncryptTdxLib
   MemoryAllocationLib
+  SynchronizationLib
   UefiBootServicesTableLib
   UefiDriverEntryPoint
 
-- 
2.39.2



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107100): https://edk2.groups.io/g/devel/message/107100
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Gerd Hoffmann 8 months, 1 week ago

On Thu, Jul 20, 2023 at 03:45:57PM +0200, Ard Biesheuvel wrote:
> Instead of relying on raising the TPL to protect the critical sections
> that manipulate the global bitmask that keeps track of bounce buffer
> allocations, use compare-and-exchange to manage the global variable, and
> tweak the logic to line up with that.
> 
> Given that IoMmuDxe implements a singleton protocol that is shared
> between multiple drivers, and considering the elaborate and confusing
> requirements in the UEFP spec regarding TPL levels at which protocol
> methods may be invoked, not relying on TPL levels at all is a more
> robust approach in this case.
> 
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Jiewen Yao <jiewen.yao@intel.com>
> Cc: Michael Brown <mcb30@ipxe.org>
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2211060
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

Looks good to me.

What is the status?  Merged?  Or waiting for testing still?  If so I can
create a test build with the patch and ask our QE department to check
it.

take care,
  Gerd



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107946): https://edk2.groups.io/g/devel/message/107946
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 8 months, 1 week ago

On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> On Thu, Jul 20, 2023 at 03:45:57PM +0200, Ard Biesheuvel wrote:
> > Instead of relying on raising the TPL to protect the critical sections
> > that manipulate the global bitmask that keeps track of bounce buffer
> > allocations, use compare-and-exchange to manage the global variable, and
> > tweak the logic to line up with that.
> >
> > Given that IoMmuDxe implements a singleton protocol that is shared
> > between multiple drivers, and considering the elaborate and confusing
> > requirements in the UEFP spec regarding TPL levels at which protocol
> > methods may be invoked, not relying on TPL levels at all is a more
> > robust approach in this case.
> >
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: Jiewen Yao <jiewen.yao@intel.com>
> > Cc: Michael Brown <mcb30@ipxe.org>
> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2211060
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
>
> Looks good to me.
>
> What is the status?  Merged?  Or waiting for testing still?  If so I can
> create a test build with the patch and ask our QE department to check
> it.
>

Still waiting for testing, so yes, please test.


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107948): https://edk2.groups.io/g/devel/message/107948
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Gerd Hoffmann 8 months, 1 week ago

On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote:
> On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote:
> >
> > Looks good to me.
> >
> > What is the status?  Merged?  Or waiting for testing still?  If so I can
> > create a test build with the patch and ask our QE department to check
> > it.
> >
> 
> Still waiting for testing, so yes, please test.

Hmm, QE reports back it slows down the boot alot.  No boot hangs yet
with 12 test runs so far, which isn't that much for a reproduce rate
below 20% ...

https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28

So I guess we go with the TPL version for the coming stable tag and
leave any improvements for later ...

take care,
  Gerd



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107976): https://edk2.groups.io/g/devel/message/107976
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 8 months, 1 week ago

On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote:
> > On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote:
> > >
> > > Looks good to me.
> > >
> > > What is the status?  Merged?  Or waiting for testing still?  If so I can
> > > create a test build with the patch and ask our QE department to check
> > > it.
> > >
> >
> > Still waiting for testing, so yes, please test.
>
> Hmm, QE reports back it slows down the boot alot.  No boot hangs yet
> with 12 test runs so far, which isn't that much for a reproduce rate
> below 20% ...
>
> https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28
>
> So I guess we go with the TPL version for the coming stable tag and
> leave any improvements for later ...
>

Yeah, this was not going to make the stable tag in any case.

The boot speed regression seems odd, though - this is effectively UP
code so there shouldn't be any contention, the only thing this patch
does is ensure that the critical section is restarted if it was
interrupted

Are we comparing apples with apples here?


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107980): https://edk2.groups.io/g/devel/message/107980
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Pedro Falcato 8 months, 1 week ago

On Wed, Aug 23, 2023 at 4:12 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote:
> > > On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote:
> > > >
> > > > Looks good to me.
> > > >
> > > > What is the status?  Merged?  Or waiting for testing still?  If so I can
> > > > create a test build with the patch and ask our QE department to check
> > > > it.
> > > >
> > >
> > > Still waiting for testing, so yes, please test.
> >
> > Hmm, QE reports back it slows down the boot alot.  No boot hangs yet
> > with 12 test runs so far, which isn't that much for a reproduce rate
> > below 20% ...
> >
> > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28
> >
> > So I guess we go with the TPL version for the coming stable tag and
> > leave any improvements for later ...
> >
>
> Yeah, this was not going to make the stable tag in any case.
>
> The boot speed regression seems odd, though - this is effectively UP
> code so there shouldn't be any contention, the only thing this patch
> does is ensure that the critical section is restarted if it was
> interrupted

FWIW: Given completely correct logic, straightforward logic, a lock
cmpxchg is much slower(3-4x) than an a non-lock cmpxchg, which itself
is around 2x as slow as a regular relaxed load + store.
See https://gist.github.com/heatd/49c9be23ccb1f4ad8dfeac231da2647a for
a nice fun test benchmark.

HOWEVER, given this is likely in IO paths, I would *really* not expect
this to make a difference, right? Even virtio drivers will themselves
trap with a VMEXIT whenever you touch a hardware register...

Gerd, could you folks get a perf kvm (perf-kvm(1)) recording out of
that OVMF build? Assuming you can get that thing to work, that is,
personally it mysteriously stopped working 6 years ago for me :)

-- 
Pedro

-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107988): https://edk2.groups.io/g/devel/message/107988
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Gerd Hoffmann 8 months, 1 week ago

On Wed, Aug 23, 2023 at 07:10:52PM +0100, Pedro Falcato wrote:
> On Wed, Aug 23, 2023 at 4:12 PM Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> > >
> > > Hmm, QE reports back it slows down the boot alot.  No boot hangs yet
> > > with 12 test runs so far, which isn't that much for a reproduce rate
> > > below 20% ...
> > >
> > > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28
> > >
> > > So I guess we go with the TPL version for the coming stable tag and
> > > leave any improvements for later ...
> >
> > Yeah, this was not going to make the stable tag in any case.
> >
> > The boot speed regression seems odd, though - this is effectively UP
> > code so there shouldn't be any contention, the only thing this patch
> > does is ensure that the critical section is restarted if it was
> > interrupted

QE reported back boot times without this patch are ~20-30 seconds,
with this patch it can be more than 3 minutes.

> FWIW: Given completely correct logic, straightforward logic, a lock
> cmpxchg is much slower(3-4x) than an a non-lock cmpxchg, which itself
> is around 2x as slow as a regular relaxed load + store.
> See https://gist.github.com/heatd/49c9be23ccb1f4ad8dfeac231da2647a for
> a nice fun test benchmark.

Also note that IoMmuDxe is only used in case memory encryption is
enabled (I/O uses unencrypted bounce buffers so the host can virtio
emulation can read/write stuff there).  Maybe that affects performance
too.

Cc'ing the AMD people for comments on that.

> HOWEVER, given this is likely in IO paths, I would *really* not expect
> this to make a difference, right? Even virtio drivers will themselves
> trap with a VMEXIT whenever you touch a hardware register...

It's only a single VMEXIT per I/O request (ring the doorbell after
adding a request to the ring), but still, this is heavy enough that
the cmpxchg difference should be in the noise.

> Gerd, could you folks get a perf kvm (perf-kvm(1)) recording out of
> that OVMF build? Assuming you can get that thing to work, that is,
> personally it mysteriously stopped working 6 years ago for me :)

I can try hack IoMmuDxe so it is used unconditionally and try reproduce
locally (without sev-capable hardware), but most likely not this week.

take care,
  Gerd



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#108000): https://edk2.groups.io/g/devel/message/108000
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 8 months, 1 week ago

On Thu, 24 Aug 2023 at 10:06, Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> On Wed, Aug 23, 2023 at 07:10:52PM +0100, Pedro Falcato wrote:
> > On Wed, Aug 23, 2023 at 4:12 PM Ard Biesheuvel <ardb@kernel.org> wrote:
> > >
> > > On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> > > >
> > > > Hmm, QE reports back it slows down the boot alot.  No boot hangs yet
> > > > with 12 test runs so far, which isn't that much for a reproduce rate
> > > > below 20% ...
> > > >
> > > > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28
> > > >
> > > > So I guess we go with the TPL version for the coming stable tag and
> > > > leave any improvements for later ...
> > >
> > > Yeah, this was not going to make the stable tag in any case.
> > >
> > > The boot speed regression seems odd, though - this is effectively UP
> > > code so there shouldn't be any contention, the only thing this patch
> > > does is ensure that the critical section is restarted if it was
> > > interrupted
>
> QE reported back boot times without this patch are ~20-30 seconds,
> with this patch it can be more than 3 minutes.
>
> > FWIW: Given completely correct logic, straightforward logic, a lock
> > cmpxchg is much slower(3-4x) than an a non-lock cmpxchg, which itself
> > is around 2x as slow as a regular relaxed load + store.
> > See https://gist.github.com/heatd/49c9be23ccb1f4ad8dfeac231da2647a for
> > a nice fun test benchmark.
>
> Also note that IoMmuDxe is only used in case memory encryption is
> enabled (I/O uses unencrypted bounce buffers so the host can virtio
> emulation can read/write stuff there).  Maybe that affects performance
> too.
>
> Cc'ing the AMD people for comments on that.
>
> > HOWEVER, given this is likely in IO paths, I would *really* not expect
> > this to make a difference, right? Even virtio drivers will themselves
> > trap with a VMEXIT whenever you touch a hardware register...
>
> It's only a single VMEXIT per I/O request (ring the doorbell after
> adding a request to the ring), but still, this is heavy enough that
> the cmpxchg difference should be in the noise.
>
> > Gerd, could you folks get a perf kvm (perf-kvm(1)) recording out of
> > that OVMF build? Assuming you can get that thing to work, that is,
> > personally it mysteriously stopped working 6 years ago for me :)
>
> I can try hack IoMmuDxe so it is used unconditionally and try reproduce
> locally (without sev-capable hardware), but most likely not this week.
>

I have tried the patch below, and I don't see any slowdowns with or
without the patch, running both DEBUG and RELEASE builds under
ordinary KVM/nested-virt. Note that the change in the first hunk will
cause the ASSERT()s removed in the other hunks to trigger so the code
is definitely being exercised.


diff --git a/OvmfPkg/IoMmuDxe/IoMmuDxe.c b/OvmfPkg/IoMmuDxe/IoMmuDxe.c
index aab6d8b90687..c1082b733d3a 100644
--- a/OvmfPkg/IoMmuDxe/IoMmuDxe.c
+++ b/OvmfPkg/IoMmuDxe/IoMmuDxe.c
@@ -25,7 +25,7 @@ IoMmuDxeEntryPoint (
   // When SEV or TDX is enabled, install IoMmu protocol otherwise install the
   // placeholder protocol so that other dependent module can run.
   //
-  if (MemEncryptSevIsEnabled () || MemEncryptTdxIsEnabled ()) {
+  if (TRUE || MemEncryptSevIsEnabled () || MemEncryptTdxIsEnabled ()) {
     Status = InstallIoMmuProtocol ();
   } else {
     Handle = NULL;
diff --git a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
index 2764c35044ac..27fe068362ff 100644
--- a/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
+++ b/OvmfPkg/IoMmuDxe/IoMmuBuffer.c
@@ -184,7 +184,7 @@ IoMmuInitReservedSharedMem (
                    );
         ASSERT (!EFI_ERROR (Status));
       } else {
-        ASSERT (FALSE);
+        //ASSERT (FALSE);
       }
     }

@@ -233,7 +233,7 @@ IoMmuReleaseReservedSharedMem (
                    );
         ASSERT (!EFI_ERROR (Status));
       } else {
-        ASSERT (FALSE);
+        //ASSERT (FALSE);
       }
     }
   }


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#108002): https://edk2.groups.io/g/devel/message/108002
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Gerd Hoffmann 8 months ago

  Hi,

> I have tried the patch below, and I don't see any slowdowns with or
> without the patch, running both DEBUG and RELEASE builds under
> ordinary KVM/nested-virt. Note that the change in the first hunk will
> cause the ASSERT()s removed in the other hunks to trigger so the code
> is definitely being exercised.

QE team tested again with fresh builds, the slowdown isn't there any
more.  Not fully clear where it came from, but most likely not from
this patch.

Only 20 successful test runs so far, which a good start, but with
a reproduce rate below 20% not that much.    I'll post an update
when it I get more results.

take care,
  Gerd



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#108054): https://edk2.groups.io/g/devel/message/108054
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 8 months ago

On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote:
>
>   Hi,
>
> > I have tried the patch below, and I don't see any slowdowns with or
> > without the patch, running both DEBUG and RELEASE builds under
> > ordinary KVM/nested-virt. Note that the change in the first hunk will
> > cause the ASSERT()s removed in the other hunks to trigger so the code
> > is definitely being exercised.
>
> QE team tested again with fresh builds, the slowdown isn't there any
> more.  Not fully clear where it came from, but most likely not from
> this patch.
>
> Only 20 successful test runs so far, which a good start, but with
> a reproduce rate below 20% not that much.    I'll post an update
> when it I get more results.
>

Excellent, thanks for the update.


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#108057): https://edk2.groups.io/g/devel/message/108057
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 8 months ago

On Mon, 28 Aug 2023 at 13:13, Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote:
> >
> >   Hi,
> >
> > > I have tried the patch below, and I don't see any slowdowns with or
> > > without the patch, running both DEBUG and RELEASE builds under
> > > ordinary KVM/nested-virt. Note that the change in the first hunk will
> > > cause the ASSERT()s removed in the other hunks to trigger so the code
> > > is definitely being exercised.
> >
> > QE team tested again with fresh builds, the slowdown isn't there any
> > more.  Not fully clear where it came from, but most likely not from
> > this patch.
> >
> > Only 20 successful test runs so far, which a good start, but with
> > a reproduce rate below 20% not that much.    I'll post an update
> > when it I get more results.
> >
>
> Excellent, thanks for the update.

Given that the QE engineer appears to have lost interest, I intend to
merge this patch as-is unless anyone feels this is premature.


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#108197): https://edk2.groups.io/g/devel/message/108197
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Gerd Hoffmann 7 months, 4 weeks ago

On Thu, Aug 31, 2023 at 06:01:56PM +0200, Ard Biesheuvel wrote:
> On Mon, 28 Aug 2023 at 13:13, Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote:
> > >
> > >   Hi,
> > >
> > > > I have tried the patch below, and I don't see any slowdowns with or
> > > > without the patch, running both DEBUG and RELEASE builds under
> > > > ordinary KVM/nested-virt. Note that the change in the first hunk will
> > > > cause the ASSERT()s removed in the other hunks to trigger so the code
> > > > is definitely being exercised.
> > >
> > > QE team tested again with fresh builds, the slowdown isn't there any
> > > more.  Not fully clear where it came from, but most likely not from
> > > this patch.
> > >
> > > Only 20 successful test runs so far, which a good start, but with
> > > a reproduce rate below 20% not that much.    I'll post an update
> > > when it I get more results.
> > >
> >
> > Excellent, thanks for the update.
> 
> Given that the QE engineer appears to have lost interest, I intend to
> merge this patch as-is unless anyone feels this is premature.

I was offline a few days.  QE actually reported back: 100 test runs
without problems.

Tested-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>

take care,
  Gerd



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#108256): https://edk2.groups.io/g/devel/message/108256
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 7 months, 4 weeks ago

On Mon, 4 Sept 2023 at 13:45, Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> On Thu, Aug 31, 2023 at 06:01:56PM +0200, Ard Biesheuvel wrote:
> > On Mon, 28 Aug 2023 at 13:13, Ard Biesheuvel <ardb@kernel.org> wrote:
> > >
> > > On Mon, 28 Aug 2023 at 11:16, Gerd Hoffmann <kraxel@redhat.com> wrote:
> > > >
> > > >   Hi,
> > > >
> > > > > I have tried the patch below, and I don't see any slowdowns with or
> > > > > without the patch, running both DEBUG and RELEASE builds under
> > > > > ordinary KVM/nested-virt. Note that the change in the first hunk will
> > > > > cause the ASSERT()s removed in the other hunks to trigger so the code
> > > > > is definitely being exercised.
> > > >
> > > > QE team tested again with fresh builds, the slowdown isn't there any
> > > > more.  Not fully clear where it came from, but most likely not from
> > > > this patch.
> > > >
> > > > Only 20 successful test runs so far, which a good start, but with
> > > > a reproduce rate below 20% not that much.    I'll post an update
> > > > when it I get more results.
> > > >
> > >
> > > Excellent, thanks for the update.
> >
> > Given that the QE engineer appears to have lost interest, I intend to
> > merge this patch as-is unless anyone feels this is premature.
>
> I was offline a few days.  QE actually reported back: 100 test runs
> without problems.
>
> Tested-by: Gerd Hoffmann <kraxel@redhat.com>
> Acked-by: Gerd Hoffmann <kraxel@redhat.com>
>

Thanks for reporting back. This is merged now.


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#108257): https://edk2.groups.io/g/devel/message/108257
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Gerd Hoffmann 8 months, 1 week ago

On Wed, Aug 23, 2023 at 05:11:49PM +0200, Ard Biesheuvel wrote:
> On Wed, 23 Aug 2023 at 13:08, Gerd Hoffmann <kraxel@redhat.com> wrote:
> >
> > On Tue, Aug 22, 2023 at 09:57:12AM +0200, Ard Biesheuvel wrote:
> > > On Tue, 22 Aug 2023 at 08:25, Gerd Hoffmann <kraxel@redhat.com> wrote:
> > > >
> > > > Looks good to me.
> > > >
> > > > What is the status?  Merged?  Or waiting for testing still?  If so I can
> > > > create a test build with the patch and ask our QE department to check
> > > > it.
> > > >
> > >
> > > Still waiting for testing, so yes, please test.
> >
> > Hmm, QE reports back it slows down the boot alot.  No boot hangs yet
> > with 12 test runs so far, which isn't that much for a reproduce rate
> > below 20% ...
> >
> > https://bugzilla.redhat.com//show_bug.cgi?id=2211060#c28
> >
> > So I guess we go with the TPL version for the coming stable tag and
> > leave any improvements for later ...
> >
> 
> Yeah, this was not going to make the stable tag in any case.
> 
> The boot speed regression seems odd, though - this is effectively UP
> code so there shouldn't be any contention, the only thing this patch
> does is ensure that the critical section is restarted if it was
> interrupted
> 
> Are we comparing apples with apples here?

Hoping for an answer tomorrow, asked QE folks the same thing in comment
30.  The first test build (comment 11) essentially is stable-2023-05 with
a52044a9e602 ("OvmfPkg/IoMmuDxe: add locking to
IoMmuAllocateBounceBuffer") cherry-picked, the second test build
(comment 27) is stable-2023-05 + a52044a9e602 + this patch.

take care,
  Gerd



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107986): https://edk2.groups.io/g/devel/message/107986
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Pedro Falcato 9 months, 1 week ago

On Thu, Jul 20, 2023 at 2:46 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> Instead of relying on raising the TPL to protect the critical sections
> that manipulate the global bitmask that keeps track of bounce buffer
> allocations, use compare-and-exchange to manage the global variable, and
> tweak the logic to line up with that.
>
> Given that IoMmuDxe implements a singleton protocol that is shared
> between multiple drivers, and considering the elaborate and confusing
> requirements in the UEFP spec regarding TPL levels at which protocol
> methods may be invoked, not relying on TPL levels at all is a more

Really good change (I had thought of this when reading through the
other IoMmuDxe thread), but I'm wondering if this warrants the
addition of bit ops (AND, OR at least) to SynchronizationLib?

Sidenote: the compiler can detect if atomic intrinsics use the "fetch"
value in (fetch_or, or_fetch, etc) and thus use smaller, more
efficient instruction sequences (https://godbolt.org/z/zWTTWacEd). if
only we could use those...

-- 
Pedro


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107134): https://edk2.groups.io/g/devel/message/107134
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Ard Biesheuvel 9 months, 1 week ago

On Sat, 22 Jul 2023 at 00:55, Pedro Falcato <pedro.falcato@gmail.com> wrote:
>
> On Thu, Jul 20, 2023 at 2:46 PM Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > Instead of relying on raising the TPL to protect the critical sections
> > that manipulate the global bitmask that keeps track of bounce buffer
> > allocations, use compare-and-exchange to manage the global variable, and
> > tweak the logic to line up with that.
> >
> > Given that IoMmuDxe implements a singleton protocol that is shared
> > between multiple drivers, and considering the elaborate and confusing
> > requirements in the UEFP spec regarding TPL levels at which protocol
> > methods may be invoked, not relying on TPL levels at all is a more
>
> Really good change (I had thought of this when reading through the
> other IoMmuDxe thread), but I'm wondering if this warrants the
> addition of bit ops (AND, OR at least) to SynchronizationLib?
>

Thanks.

Atomic AND/OR would only help on the free path in this case.

> Sidenote: the compiler can detect if atomic intrinsics use the "fetch"
> value in (fetch_or, or_fetch, etc) and thus use smaller, more
> efficient instruction sequences (https://godbolt.org/z/zWTTWacEd). if
> only we could use those...
>

In principle, I'd agree but these are rarely used so I'm not sure it's
worth the effort refactoring this.


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107171): https://edk2.groups.io/g/devel/message/107171
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [RFC/RFT PATCH] OvmfPkg/IoMmuDxe: don't rely on TPLs for re-entrancy

Posted by Pedro Falcato 9 months, 1 week ago

On Mon, Jul 24, 2023 at 3:21 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Sat, 22 Jul 2023 at 00:55, Pedro Falcato <pedro.falcato@gmail.com> wrote:
> >
> > On Thu, Jul 20, 2023 at 2:46 PM Ard Biesheuvel <ardb@kernel.org> wrote:
> > >
> > > Instead of relying on raising the TPL to protect the critical sections
> > > that manipulate the global bitmask that keeps track of bounce buffer
> > > allocations, use compare-and-exchange to manage the global variable, and
> > > tweak the logic to line up with that.
> > >
> > > Given that IoMmuDxe implements a singleton protocol that is shared
> > > between multiple drivers, and considering the elaborate and confusing
> > > requirements in the UEFP spec regarding TPL levels at which protocol
> > > methods may be invoked, not relying on TPL levels at all is a more
> >
> > Really good change (I had thought of this when reading through the
> > other IoMmuDxe thread), but I'm wondering if this warrants the
> > addition of bit ops (AND, OR at least) to SynchronizationLib?
> >
>
> Thanks.
>
> Atomic AND/OR would only help on the free path in this case.
>
> > Sidenote: the compiler can detect if atomic intrinsics use the "fetch"
> > value in (fetch_or, or_fetch, etc) and thus use smaller, more
> > efficient instruction sequences (https://godbolt.org/z/zWTTWacEd). if
> > only we could use those...
> >
>
> In principle, I'd agree but these are rarely used so I'm not sure it's
> worth the effort refactoring this.

Good point, let's skip any extensive refactoring for now.

Acked-by: Pedro Falcato <pedro.falcato@gmail.com>

-- 
Pedro


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#107184): https://edk2.groups.io/g/devel/message/107184
Mute This Topic: https://groups.io/mt/100256049/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-