[edk2-devel] [PATCH] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time

Dong, Eric posted 1 patch 4 years, 4 months ago
Failed in applying to current master (apply log)
There is a newer version of this series
UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c        | 67 ++++++++++++++++++--
UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h   | 15 +++++
UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf |  1 +
UefiCpuPkg/UefiCpuPkg.dec                    |  4 ++
UefiCpuPkg/UefiCpuPkg.uni                    |  1 +
5 files changed, 84 insertions(+), 4 deletions(-)
[edk2-devel] [PATCH] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time
Posted by Dong, Eric 4 years, 4 months ago
REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388

Token is new introduced by MM MP Protocol. Current logic allocate Token
every time when need to use it. The logic caused SMI latency raised to
very high. Update logic to allocate Token buffer at driver's entry point.
Later use the token from the allocated token buffer. Only when all the
buffer have been used, then need to allocate new buffer.

Signed-off-by: Eric Dong <eric.dong@intel.com>
Cc: Ray Ni <ray.ni@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
---
V3 changes:
  Introduce PCD to control the pre allocated toke buffer size.

v2 changes:
  Minor update based on comments.

 UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c        | 67 ++++++++++++++++++--
 UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h   | 15 +++++
 UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf |  1 +
 UefiCpuPkg/UefiCpuPkg.dec                    |  4 ++
 UefiCpuPkg/UefiCpuPkg.uni                    |  1 +
 5 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
index d8d2b6f444..33aad3f3e9 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
@@ -492,6 +492,24 @@ FreeTokens (
 {
   LIST_ENTRY            *Link;
   PROCEDURE_TOKEN       *ProcToken;
+  TOKEN_BUFFER          *TokenBuf;
+
+  //
+  // Not free the buffer, just clear the UsedTokenNum. In order to
+  // avoid triggering allocate action when we need to use the token,
+  // do not free the buffer.
+  //
+  gSmmCpuPrivate->UsedTokenNum = 0;
+
+  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
+  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
+    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
+
+    Link = RemoveEntryList (&TokenBuf->Link);
+
+    FreePool (TokenBuf->Buffer);
+    FreePool (TokenBuf);
+  }
 
   while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
     Link = GetFirstNode (&gSmmCpuPrivate->TokenList);
@@ -499,7 +517,6 @@ FreeTokens (
 
     RemoveEntryList (&ProcToken->Link);
 
-    FreePool ((VOID *)ProcToken->ProcedureToken);
     FreePool (ProcToken);
   }
 }
@@ -1115,13 +1132,37 @@ CreateToken (
   VOID
   )
 {
-  PROCEDURE_TOKEN    *ProcToken;
+  PROCEDURE_TOKEN     *ProcToken;
   SPIN_LOCK           *CpuToken;
   UINTN               SpinLockSize;
+  TOKEN_BUFFER        *TokenBuf;
+  UINT32              TokenCountPerChunk;
 
   SpinLockSize = GetSpinLockProperties ();
-  CpuToken = AllocatePool (SpinLockSize);
-  ASSERT (CpuToken != NULL);
+  TokenCountPerChunk = PcdGet32 (PcdTokenCountPerChunk);
+
+  if (gSmmCpuPrivate->UsedTokenNum == TokenCountPerChunk) {
+    DEBUG ((DEBUG_VERBOSE, "CpuSmm: No free token buffer, allocate new buffer!\n"));
+
+    //
+    // Record current token buffer for later free action usage.
+    // Current used token buffer not in this list.
+    //
+    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
+    ASSERT (TokenBuf != NULL);
+    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
+    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
+
+    InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link);
+
+    gSmmCpuPrivate->CurrentTokenBuf = AllocatePool (SpinLockSize * TokenCountPerChunk);
+    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
+    gSmmCpuPrivate->UsedTokenNum = 0;
+  }
+
+  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);
+  gSmmCpuPrivate->UsedTokenNum++;
+
   InitializeSpinLock (CpuToken);
   AcquireSpinLock (CpuToken);
 
@@ -1737,10 +1778,28 @@ InitializeDataForMmMp (
   VOID
   )
 {
+  UINTN              SpinLockSize;
+  UINT32             TokenCountPerChunk;
+
+  SpinLockSize = GetSpinLockProperties ();
+  TokenCountPerChunk = PcdGet32 (PcdTokenCountPerChunk);
+  ASSERT_EFI_ERROR (TokenCountPerChunk != 0);
+  if (TokenCountPerChunk == 0) {
+    DEBUG ((EFI_D_ERROR, "PcdTokenCountPerChunk should not be Zero!\n"));
+    CpuDeadLoop ();
+  }
+  DEBUG ((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x, PreAllocateTokenNum = 0x%x\n", SpinLockSize, TokenCountPerChunk));
+
+  gSmmCpuPrivate->CurrentTokenBuf = AllocatePool (SpinLockSize * TokenCountPerChunk);
+  ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
+
+  gSmmCpuPrivate->UsedTokenNum = 0;
+
   gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus);
   ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
 
   InitializeListHead (&gSmmCpuPrivate->TokenList);
+  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
 }
 
 /**
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
index 8c29f1a558..08ef8d2e15 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
@@ -217,6 +217,17 @@ typedef struct {
 
 #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE)
 
+#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
+
+typedef struct {
+  UINTN                   Signature;
+  LIST_ENTRY              Link;
+
+  UINT8                   *Buffer;
+} TOKEN_BUFFER;
+
+#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE)
+
 //
 // Private structure for the SMM CPU module that is stored in DXE Runtime memory
 // Contains the SMM Configuration Protocols that is produced.
@@ -243,6 +254,10 @@ typedef struct {
   PROCEDURE_WRAPPER               *ApWrapperFunc;
   LIST_ENTRY                      TokenList;
 
+  LIST_ENTRY                      OldTokenBufList;
+
+  UINT8                           *CurrentTokenBuf;
+  UINTN                           UsedTokenNum;     // Only record tokens used in CurrentTokenBuf.
 } SMM_CPU_PRIVATE_DATA;
 
 extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
index 851a8cb258..8b6c71b697 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
@@ -140,6 +140,7 @@
   gEfiMdeModulePkgTokenSpaceGuid.PcdNullPointerDetectionPropertyMask    ## CONSUMES
   gEfiMdeModulePkgTokenSpaceGuid.PcdHeapGuardPropertyMask               ## CONSUMES
   gEfiMdePkgTokenSpaceGuid.PcdControlFlowEnforcementPropertyMask        ## CONSUMES
+  gUefiCpuPkgTokenSpaceGuid.PcdTokenCountPerChunk                       ## CONSUMES
 
 [Depex]
   gEfiMpServiceProtocolGuid
diff --git a/UefiCpuPkg/UefiCpuPkg.dec b/UefiCpuPkg/UefiCpuPkg.dec
index 83acd33612..8ec2153459 100644
--- a/UefiCpuPkg/UefiCpuPkg.dec
+++ b/UefiCpuPkg/UefiCpuPkg.dec
@@ -147,6 +147,10 @@
   # @Prompt Specify size of good stack of exception which need switching stack.
   gUefiCpuPkgTokenSpaceGuid.PcdCpuKnownGoodStackSize|2048|UINT32|0x30002001
 
+  ## Size of pre allocated token count per chunk.
+  # @Prompt Specify the size of pre allocated token count per chunk.
+  gUefiCpuPkgTokenSpaceGuid.PcdTokenCountPerChunk|64|UINT32|0x30002002
+
 [PcdsFixedAtBuild, PcdsPatchableInModule]
   ## This value is the CPU Local APIC base address, which aligns the address on a 4-KByte boundary.
   # @Prompt Configure base address of CPU Local APIC
diff --git a/UefiCpuPkg/UefiCpuPkg.uni b/UefiCpuPkg/UefiCpuPkg.uni
index fbf7680726..3bb951cc72 100644
--- a/UefiCpuPkg/UefiCpuPkg.uni
+++ b/UefiCpuPkg/UefiCpuPkg.uni
@@ -252,3 +252,4 @@
                                                                                             "24000000  -  6th and 7th generation Intel Core processors and Intel Xeon W Processor Family(24MHz).<BR>\n"
                                                                                             "19200000  -  Intel Atom processors based on Goldmont Microarchitecture with CPUID signature 06_5CH(19.2MHz).<BR>\n"
 
+#string STR_gUefiCpuPkgTokenSpaceGuid_PcdTokenCountPerChunk_PROMPT  #language en-US "Specify the size of pre allocated token count per chunk.\n"
\ No newline at end of file
-- 
2.23.0.windows.1


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.

View/Reply Online (#51721): https://edk2.groups.io/g/devel/message/51721
Mute This Topic: https://groups.io/mt/66393846/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub  [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [PATCH] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time
Posted by Laszlo Ersek 4 years, 4 months ago
Hi Eric,

I have three comments:

On 12/04/19 09:05, Eric Dong wrote:
> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388
> 
> Token is new introduced by MM MP Protocol. Current logic allocate Token
> every time when need to use it. The logic caused SMI latency raised to
> very high. Update logic to allocate Token buffer at driver's entry point.
> Later use the token from the allocated token buffer. Only when all the
> buffer have been used, then need to allocate new buffer.
> 
> Signed-off-by: Eric Dong <eric.dong@intel.com>
> Cc: Ray Ni <ray.ni@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> ---
> V3 changes:
>   Introduce PCD to control the pre allocated toke buffer size.
> 
> v2 changes:
>   Minor update based on comments.
> 
>  UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c        | 67 ++++++++++++++++++--
>  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h   | 15 +++++
>  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf |  1 +
>  UefiCpuPkg/UefiCpuPkg.dec                    |  4 ++
>  UefiCpuPkg/UefiCpuPkg.uni                    |  1 +
>  5 files changed, 84 insertions(+), 4 deletions(-)
> 
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> index d8d2b6f444..33aad3f3e9 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> @@ -492,6 +492,24 @@ FreeTokens (
>  {
>    LIST_ENTRY            *Link;
>    PROCEDURE_TOKEN       *ProcToken;
> +  TOKEN_BUFFER          *TokenBuf;
> +
> +  //
> +  // Not free the buffer, just clear the UsedTokenNum. In order to
> +  // avoid triggering allocate action when we need to use the token,
> +  // do not free the buffer.
> +  //
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> +
> +  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
> +  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
> +    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
> +
> +    Link = RemoveEntryList (&TokenBuf->Link);
> +
> +    FreePool (TokenBuf->Buffer);
> +    FreePool (TokenBuf);
> +  }
>  
>    while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
>      Link = GetFirstNode (&gSmmCpuPrivate->TokenList);
> @@ -499,7 +517,6 @@ FreeTokens (
>  
>      RemoveEntryList (&ProcToken->Link);
>  
> -    FreePool ((VOID *)ProcToken->ProcedureToken);
>      FreePool (ProcToken);
>    }
>  }
> @@ -1115,13 +1132,37 @@ CreateToken (
>    VOID
>    )
>  {
> -  PROCEDURE_TOKEN    *ProcToken;
> +  PROCEDURE_TOKEN     *ProcToken;
>    SPIN_LOCK           *CpuToken;
>    UINTN               SpinLockSize;
> +  TOKEN_BUFFER        *TokenBuf;
> +  UINT32              TokenCountPerChunk;
>  
>    SpinLockSize = GetSpinLockProperties ();
> -  CpuToken = AllocatePool (SpinLockSize);
> -  ASSERT (CpuToken != NULL);
> +  TokenCountPerChunk = PcdGet32 (PcdTokenCountPerChunk);

(1) I understand that the DEC file introduces the PCD as
[PcdsFixedAtBuild] *only*. So, at this specific time, the code above is
safe.

But I'm worried that at a later time, we might relax that to a dynamic
PCD, and then the above code will behave really badly. Therefore, please
update the patch in one of two ways:

(1a) please use a static global variable in the entry point function to
cache the PCD, and then use that variable in place of the PCD everywhere,

(1b) alternatively, please update the INF file, and the PcdGet32() calls
too, to [FixedPcd] and FixedPcdGet32(). Then, if we happen to relax the
PCD declaration to dynamic, and a platform actually tries to use that,
they will get a build error. (And then they'll be forced to imlement
(1a) above.)

The idea is to make this as robust / safe as possible.

> +
> +  if (gSmmCpuPrivate->UsedTokenNum == TokenCountPerChunk) {
> +    DEBUG ((DEBUG_VERBOSE, "CpuSmm: No free token buffer, allocate new buffer!\n"));
> +
> +    //
> +    // Record current token buffer for later free action usage.
> +    // Current used token buffer not in this list.
> +    //
> +    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
> +    ASSERT (TokenBuf != NULL);
> +    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
> +    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
> +
> +    InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link);
> +
> +    gSmmCpuPrivate->CurrentTokenBuf = AllocatePool (SpinLockSize * TokenCountPerChunk);
> +    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +    gSmmCpuPrivate->UsedTokenNum = 0;
> +  }
> +
> +  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);
> +  gSmmCpuPrivate->UsedTokenNum++;
> +
>    InitializeSpinLock (CpuToken);
>    AcquireSpinLock (CpuToken);
>  
> @@ -1737,10 +1778,28 @@ InitializeDataForMmMp (
>    VOID
>    )
>  {
> +  UINTN              SpinLockSize;
> +  UINT32             TokenCountPerChunk;
> +
> +  SpinLockSize = GetSpinLockProperties ();
> +  TokenCountPerChunk = PcdGet32 (PcdTokenCountPerChunk);
> +  ASSERT_EFI_ERROR (TokenCountPerChunk != 0);
> +  if (TokenCountPerChunk == 0) {
> +    DEBUG ((EFI_D_ERROR, "PcdTokenCountPerChunk should not be Zero!\n"));

(2) This is a good check, but we should use DEBUG_ERROR, not EFI_D_ERROR.

> +    CpuDeadLoop ();
> +  }
> +  DEBUG ((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x, PreAllocateTokenNum = 0x%x\n", SpinLockSize, TokenCountPerChunk));
> +
> +  gSmmCpuPrivate->CurrentTokenBuf = AllocatePool (SpinLockSize * TokenCountPerChunk);
> +  ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> +
>    gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus);
>    ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
>  
>    InitializeListHead (&gSmmCpuPrivate->TokenList);
> +  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
>  }
>  
>  /**
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> index 8c29f1a558..08ef8d2e15 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> @@ -217,6 +217,17 @@ typedef struct {
>  
>  #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE)
>  
> +#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
> +
> +typedef struct {
> +  UINTN                   Signature;
> +  LIST_ENTRY              Link;
> +
> +  UINT8                   *Buffer;
> +} TOKEN_BUFFER;
> +
> +#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE)
> +
>  //
>  // Private structure for the SMM CPU module that is stored in DXE Runtime memory
>  // Contains the SMM Configuration Protocols that is produced.
> @@ -243,6 +254,10 @@ typedef struct {
>    PROCEDURE_WRAPPER               *ApWrapperFunc;
>    LIST_ENTRY                      TokenList;
>  
> +  LIST_ENTRY                      OldTokenBufList;
> +
> +  UINT8                           *CurrentTokenBuf;
> +  UINTN                           UsedTokenNum;     // Only record tokens used in CurrentTokenBuf.
>  } SMM_CPU_PRIVATE_DATA;
>  
>  extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
> index 851a8cb258..8b6c71b697 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
> @@ -140,6 +140,7 @@
>    gEfiMdeModulePkgTokenSpaceGuid.PcdNullPointerDetectionPropertyMask    ## CONSUMES
>    gEfiMdeModulePkgTokenSpaceGuid.PcdHeapGuardPropertyMask               ## CONSUMES
>    gEfiMdePkgTokenSpaceGuid.PcdControlFlowEnforcementPropertyMask        ## CONSUMES
> +  gUefiCpuPkgTokenSpaceGuid.PcdTokenCountPerChunk                       ## CONSUMES
>  
>  [Depex]
>    gEfiMpServiceProtocolGuid
> diff --git a/UefiCpuPkg/UefiCpuPkg.dec b/UefiCpuPkg/UefiCpuPkg.dec
> index 83acd33612..8ec2153459 100644
> --- a/UefiCpuPkg/UefiCpuPkg.dec
> +++ b/UefiCpuPkg/UefiCpuPkg.dec
> @@ -147,6 +147,10 @@
>    # @Prompt Specify size of good stack of exception which need switching stack.
>    gUefiCpuPkgTokenSpaceGuid.PcdCpuKnownGoodStackSize|2048|UINT32|0x30002001
>  
> +  ## Size of pre allocated token count per chunk.
> +  # @Prompt Specify the size of pre allocated token count per chunk.
> +  gUefiCpuPkgTokenSpaceGuid.PcdTokenCountPerChunk|64|UINT32|0x30002002
> +
>  [PcdsFixedAtBuild, PcdsPatchableInModule]
>    ## This value is the CPU Local APIC base address, which aligns the address on a 4-KByte boundary.
>    # @Prompt Configure base address of CPU Local APIC
> diff --git a/UefiCpuPkg/UefiCpuPkg.uni b/UefiCpuPkg/UefiCpuPkg.uni
> index fbf7680726..3bb951cc72 100644
> --- a/UefiCpuPkg/UefiCpuPkg.uni
> +++ b/UefiCpuPkg/UefiCpuPkg.uni
> @@ -252,3 +252,4 @@
>                                                                                              "24000000  -  6th and 7th generation Intel Core processors and Intel Xeon W Processor Family(24MHz).<BR>\n"
>                                                                                              "19200000  -  Intel Atom processors based on Goldmont Microarchitecture with CPUID signature 06_5CH(19.2MHz).<BR>\n"
>  
> +#string STR_gUefiCpuPkgTokenSpaceGuid_PcdTokenCountPerChunk_PROMPT  #language en-US "Specify the size of pre allocated token count per chunk.\n"
> \ No newline at end of file
> 

(3) I think this would be the first PCD in the UNI file that does not
have _HELP, only _PROMPT. Is that OK? It seems like an inconsistency.
But, I'll let Ray decide.

With (1) fixed (either (1a) or (1b)):

Acked-by: Laszlo Ersek <lersek@redhat.com>

Thanks
Laszlo


-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.

View/Reply Online (#51728): https://edk2.groups.io/g/devel/message/51728
Mute This Topic: https://groups.io/mt/66393846/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub  [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-

Re: [edk2-devel] [PATCH] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time
Posted by Dong, Eric 4 years, 4 months ago
Hi Laszlo,

> -----Original Message-----
> From: Laszlo Ersek [mailto:lersek@redhat.com]
> Sent: Wednesday, December 4, 2019 5:20 PM
> To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io
> Cc: Ni, Ray <ray.ni@intel.com>
> Subject: Re: [PATCH] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token
> every time
> 
> Hi Eric,
> 
> I have three comments:
> 
> On 12/04/19 09:05, Eric Dong wrote:
> > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388
> >
> > Token is new introduced by MM MP Protocol. Current logic allocate
> > Token every time when need to use it. The logic caused SMI latency
> > raised to very high. Update logic to allocate Token buffer at driver's entry
> point.
> > Later use the token from the allocated token buffer. Only when all the
> > buffer have been used, then need to allocate new buffer.
> >
> > Signed-off-by: Eric Dong <eric.dong@intel.com>
> > Cc: Ray Ni <ray.ni@intel.com>
> > Cc: Laszlo Ersek <lersek@redhat.com>
> > ---
> > V3 changes:
> >   Introduce PCD to control the pre allocated toke buffer size.
> >
> > v2 changes:
> >   Minor update based on comments.
> >
> >  UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c        | 67
> ++++++++++++++++++--
> >  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h   | 15 +++++
> >  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf |  1 +
> >  UefiCpuPkg/UefiCpuPkg.dec                    |  4 ++
> >  UefiCpuPkg/UefiCpuPkg.uni                    |  1 +
> >  5 files changed, 84 insertions(+), 4 deletions(-)
> >
> > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > index d8d2b6f444..33aad3f3e9 100644
> > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > @@ -492,6 +492,24 @@ FreeTokens (
> >  {
> >    LIST_ENTRY            *Link;
> >    PROCEDURE_TOKEN       *ProcToken;
> > +  TOKEN_BUFFER          *TokenBuf;
> > +
> > +  //
> > +  // Not free the buffer, just clear the UsedTokenNum. In order to
> > + // avoid triggering allocate action when we need to use the token,
> > + // do not free the buffer.
> > +  //
> > +  gSmmCpuPrivate->UsedTokenNum = 0;
> > +
> > +  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
> > +  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
> > +    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
> > +
> > +    Link = RemoveEntryList (&TokenBuf->Link);
> > +
> > +    FreePool (TokenBuf->Buffer);
> > +    FreePool (TokenBuf);
> > +  }
> >
> >    while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
> >      Link = GetFirstNode (&gSmmCpuPrivate->TokenList); @@ -499,7
> > +517,6 @@ FreeTokens (
> >
> >      RemoveEntryList (&ProcToken->Link);
> >
> > -    FreePool ((VOID *)ProcToken->ProcedureToken);
> >      FreePool (ProcToken);
> >    }
> >  }
> > @@ -1115,13 +1132,37 @@ CreateToken (
> >    VOID
> >    )
> >  {
> > -  PROCEDURE_TOKEN    *ProcToken;
> > +  PROCEDURE_TOKEN     *ProcToken;
> >    SPIN_LOCK           *CpuToken;
> >    UINTN               SpinLockSize;
> > +  TOKEN_BUFFER        *TokenBuf;
> > +  UINT32              TokenCountPerChunk;
> >
> >    SpinLockSize = GetSpinLockProperties ();
> > -  CpuToken = AllocatePool (SpinLockSize);
> > -  ASSERT (CpuToken != NULL);
> > +  TokenCountPerChunk = PcdGet32 (PcdTokenCountPerChunk);
> 
> (1) I understand that the DEC file introduces the PCD as [PcdsFixedAtBuild]
> *only*. So, at this specific time, the code above is safe.
> 
> But I'm worried that at a later time, we might relax that to a dynamic PCD,
> and then the above code will behave really badly. Therefore, please update
> the patch in one of two ways:
> 
> (1a) please use a static global variable in the entry point function to cache the
> PCD, and then use that variable in place of the PCD everywhere,
> 
> (1b) alternatively, please update the INF file, and the PcdGet32() calls too, to
> [FixedPcd] and FixedPcdGet32(). Then, if we happen to relax the PCD
> declaration to dynamic, and a platform actually tries to use that, they will get
> a build error. (And then they'll be forced to imlement
> (1a) above.)
> 
> The idea is to make this as robust / safe as possible.
[[Eric]] Good suggestion. I will use 1b as the final solution for it.
 
> 
> > +
> > +  if (gSmmCpuPrivate->UsedTokenNum == TokenCountPerChunk) {
> > +    DEBUG ((DEBUG_VERBOSE, "CpuSmm: No free token buffer, allocate
> > + new buffer!\n"));
> > +
> > +    //
> > +    // Record current token buffer for later free action usage.
> > +    // Current used token buffer not in this list.
> > +    //
> > +    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
> > +    ASSERT (TokenBuf != NULL);
> > +    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
> > +    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
> > +
> > +    InsertTailList (&gSmmCpuPrivate->OldTokenBufList,
> > + &TokenBuf->Link);
> > +
> > +    gSmmCpuPrivate->CurrentTokenBuf = AllocatePool (SpinLockSize *
> TokenCountPerChunk);
> > +    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> > +    gSmmCpuPrivate->UsedTokenNum = 0;  }
> > +
> > +  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf +
> > + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);
> > + gSmmCpuPrivate->UsedTokenNum++;
> > +
> >    InitializeSpinLock (CpuToken);
> >    AcquireSpinLock (CpuToken);
> >
> > @@ -1737,10 +1778,28 @@ InitializeDataForMmMp (
> >    VOID
> >    )
> >  {
> > +  UINTN              SpinLockSize;
> > +  UINT32             TokenCountPerChunk;
> > +
> > +  SpinLockSize = GetSpinLockProperties ();  TokenCountPerChunk =
> > + PcdGet32 (PcdTokenCountPerChunk);  ASSERT_EFI_ERROR
> > + (TokenCountPerChunk != 0);  if (TokenCountPerChunk == 0) {
> > +    DEBUG ((EFI_D_ERROR, "PcdTokenCountPerChunk should not be
> > + Zero!\n"));
> 
> (2) This is a good check, but we should use DEBUG_ERROR, not EFI_D_ERROR.
[[Eric]] good catch, it's copy paste issue :). I will update it when I check in the code.

> 
> > +    CpuDeadLoop ();
> > +  }
> > +  DEBUG ((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x,
> > + PreAllocateTokenNum = 0x%x\n", SpinLockSize, TokenCountPerChunk));
> > +
> > +  gSmmCpuPrivate->CurrentTokenBuf = AllocatePool (SpinLockSize *
> > + TokenCountPerChunk);  ASSERT (gSmmCpuPrivate->CurrentTokenBuf !=
> > + NULL);
> > +
> > +  gSmmCpuPrivate->UsedTokenNum = 0;
> > +
> >    gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof
> (PROCEDURE_WRAPPER) * gSmmCpuPrivate-
> >SmmCoreEntryContext.NumberOfCpus);
> >    ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
> >
> >    InitializeListHead (&gSmmCpuPrivate->TokenList);
> > +  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
> >  }
> >
> >  /**
> > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > index 8c29f1a558..08ef8d2e15 100644
> > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > @@ -217,6 +217,17 @@ typedef struct {
> >
> >  #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN,
> Link,
> > PROCEDURE_TOKEN_SIGNATURE)
> >
> > +#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
> > +
> > +typedef struct {
> > +  UINTN                   Signature;
> > +  LIST_ENTRY              Link;
> > +
> > +  UINT8                   *Buffer;
> > +} TOKEN_BUFFER;
> > +
> > +#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link,
> > +TOKEN_BUFFER_SIGNATURE)
> > +
> >  //
> >  // Private structure for the SMM CPU module that is stored in DXE
> > Runtime memory  // Contains the SMM Configuration Protocols that is
> produced.
> > @@ -243,6 +254,10 @@ typedef struct {
> >    PROCEDURE_WRAPPER               *ApWrapperFunc;
> >    LIST_ENTRY                      TokenList;
> >
> > +  LIST_ENTRY                      OldTokenBufList;
> > +
> > +  UINT8                           *CurrentTokenBuf;
> > +  UINTN                           UsedTokenNum;     // Only record tokens used in
> CurrentTokenBuf.
> >  } SMM_CPU_PRIVATE_DATA;
> >
> >  extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate; diff --git
> > a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
> > b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
> > index 851a8cb258..8b6c71b697 100644
> > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
> > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.inf
> > @@ -140,6 +140,7 @@
> >
> gEfiMdeModulePkgTokenSpaceGuid.PcdNullPointerDetectionPropertyMask
> ## CONSUMES
> >    gEfiMdeModulePkgTokenSpaceGuid.PcdHeapGuardPropertyMask
> ## CONSUMES
> >    gEfiMdePkgTokenSpaceGuid.PcdControlFlowEnforcementPropertyMask
> ## CONSUMES
> > +  gUefiCpuPkgTokenSpaceGuid.PcdTokenCountPerChunk                       ##
> CONSUMES
> >
> >  [Depex]
> >    gEfiMpServiceProtocolGuid
> > diff --git a/UefiCpuPkg/UefiCpuPkg.dec b/UefiCpuPkg/UefiCpuPkg.dec
> > index 83acd33612..8ec2153459 100644
> > --- a/UefiCpuPkg/UefiCpuPkg.dec
> > +++ b/UefiCpuPkg/UefiCpuPkg.dec
> > @@ -147,6 +147,10 @@
> >    # @Prompt Specify size of good stack of exception which need switching
> stack.
> >
> >
> gUefiCpuPkgTokenSpaceGuid.PcdCpuKnownGoodStackSize|2048|UINT32|0
> x30002
> > 001
> >
> > +  ## Size of pre allocated token count per chunk.
> > +  # @Prompt Specify the size of pre allocated token count per chunk.
> > +
> > +
> gUefiCpuPkgTokenSpaceGuid.PcdTokenCountPerChunk|64|UINT32|0x3000
> 2002
> > +
> >  [PcdsFixedAtBuild, PcdsPatchableInModule]
> >    ## This value is the CPU Local APIC base address, which aligns the address
> on a 4-KByte boundary.
> >    # @Prompt Configure base address of CPU Local APIC diff --git
> > a/UefiCpuPkg/UefiCpuPkg.uni b/UefiCpuPkg/UefiCpuPkg.uni index
> > fbf7680726..3bb951cc72 100644
> > --- a/UefiCpuPkg/UefiCpuPkg.uni
> > +++ b/UefiCpuPkg/UefiCpuPkg.uni
> > @@ -252,3 +252,4 @@
> >                                                                                              "24000000  -  6th and 7th
> generation Intel Core processors and Intel Xeon W Processor
> Family(24MHz).<BR>\n"
> >                                                                                              "19200000  -  Intel Atom
> processors based on Goldmont Microarchitecture with CPUID signature
> 06_5CH(19.2MHz).<BR>\n"
> >
> > +#string
> STR_gUefiCpuPkgTokenSpaceGuid_PcdTokenCountPerChunk_PROMPT
> #language en-US "Specify the size of pre allocated token count per chunk.\n"
> > \ No newline at end of file
> >
> 
> (3) I think this would be the first PCD in the UNI file that does not have _HELP,
> only _PROMPT. Is that OK? It seems like an inconsistency.
> But, I'll let Ray decide.
[[Eric]] I missed the _HELP, will add it when I check in the code.

Thanks,
Eric
> 
> With (1) fixed (either (1a) or (1b)):
> 
> Acked-by: Laszlo Ersek <lersek@redhat.com>
> 
> Thanks
> Laszlo

-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.

View/Reply Online (#51763): https://edk2.groups.io/g/devel/message/51763
Mute This Topic: https://groups.io/mt/66393846/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub  [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-