[edk2-devel] [PATCH] UefiCpuPkg/Feature: Support different thread count per core

Ni, Ray posted 1 patch 3 years, 4 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/edk2 tags/patchew/20201202115505.664-1-ray.ni@intel.com
UefiCpuPkg/Include/AcpiCpuData.h              |  16 ++-
.../CpuFeaturesInitialize.c                   | 113 ++++++++++--------
UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c             |  73 ++++++-----
3 files changed, 119 insertions(+), 83 deletions(-)
[edk2-devel] [PATCH] UefiCpuPkg/Feature: Support different thread count per core
Posted by Ni, Ray 3 years, 4 months ago
Today's code assumes every core contains the same number of threads.
It's not always TRUE for certain model.
Such assumption causes system hang when thread count per core
is different and there is core or package dependency between CPU
features (using CPU_FEATURE_CORE_BEFORE/AFTER,
CPU_FEATURE_PACKAGE_BEFORE/AFTER).

The change removes such assumption by calculating the actual thread
count per package and per core.

Signed-off-by: Ray Ni <ray.ni@intel.com>
Cc: Eric Dong <eric.dong@intel.com>
Cc: Yun Lou <yun.lou@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
---
 UefiCpuPkg/Include/AcpiCpuData.h              |  16 ++-
 .../CpuFeaturesInitialize.c                   | 113 ++++++++++--------
 UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c             |  73 ++++++-----
 3 files changed, 119 insertions(+), 83 deletions(-)

diff --git a/UefiCpuPkg/Include/AcpiCpuData.h b/UefiCpuPkg/Include/AcpiCpuData.h
index 77da5d4455..b5a69ad80c 100644
--- a/UefiCpuPkg/Include/AcpiCpuData.h
+++ b/UefiCpuPkg/Include/AcpiCpuData.h
@@ -1,7 +1,7 @@
 /** @file
 Definitions for CPU S3 data.
 
-Copyright (c) 2013 - 2018, Intel Corporation. All rights reserved.<BR>
+Copyright (c) 2013 - 2020, Intel Corporation. All rights reserved.<BR>
 SPDX-License-Identifier: BSD-2-Clause-Patent
 
 **/
@@ -60,14 +60,24 @@ typedef struct {
   UINT32                      MaxThreadCount;
   //
   // This field points to an array.
-  // This array saves valid core count (type UINT32) of each package.
+  // This array saves thread count (type UINT32) of each package.
   // The array has PackageCount elements.
   //
   // If the platform does not support MSR setting at S3 resume, and
   // therefore it doesn't need the dependency semaphores, it should set
   // this field to 0.
   //
-  EFI_PHYSICAL_ADDRESS        ValidCoreCountPerPackage;
+  EFI_PHYSICAL_ADDRESS        ThreadCountPerPackage;
+  //
+  // This field points to an array.
+  // This array saves thread count (type UINT8) of each core.
+  // The array has PackageCount * MaxCoreCount elements.
+  //
+  // If the platform does not support MSR setting at S3 resume, and
+  // therefore it doesn't need the dependency semaphores, it should set
+  // this field to 0.
+  //
+  EFI_PHYSICAL_ADDRESS        ThreadCountPerCore;
 } CPU_STATUS_INFORMATION;
 
 //
diff --git a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
index 5c673fa8cf..0cce909cc0 100644
--- a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
+++ b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
@@ -103,14 +103,13 @@ CpuInitDataInitialize (
   UINT32                               Package;
   UINT32                               Thread;
   EFI_CPU_PHYSICAL_LOCATION            *Location;
-  BOOLEAN                              *CoresVisited;
-  UINTN                                Index;
   UINT32                               PackageIndex;
   UINT32                               CoreIndex;
   UINT32                               First;
   ACPI_CPU_DATA                        *AcpiCpuData;
   CPU_STATUS_INFORMATION               *CpuStatus;
-  UINT32                               *ValidCoreCountPerPackage;
+  UINT32                               *ThreadCountPerPackage;
+  UINT8                                *ThreadCountPerCore;
   UINTN                                NumberOfCpus;
   UINTN                                NumberOfEnabledProcessors;
 
@@ -202,35 +201,32 @@ CpuInitDataInitialize (
   //
   // Collect valid core count in each package because not all cores are valid.
   //
-  ValidCoreCountPerPackage= AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
-  ASSERT (ValidCoreCountPerPackage != 0);
-  CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ValidCoreCountPerPackage;
-  CoresVisited = AllocatePool (sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
-  ASSERT (CoresVisited != NULL);
-
-  for (Index = 0; Index < CpuStatus->PackageCount; Index ++ ) {
-    ZeroMem (CoresVisited, sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
-    //
-    // Collect valid cores in Current package.
-    //
-    for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
-      Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
-      if (Location->Package == Index && !CoresVisited[Location->Core] ) {
-        //
-        // The ValidCores position for Location->Core is valid.
-        // The possible values in ValidCores[Index] are 0 or 1.
-        // FALSE means no valid threads in this Core.
-        // TRUE means have valid threads in this core, no matter the thead count is 1 or more.
-        //
-        CoresVisited[Location->Core] = TRUE;
-        ValidCoreCountPerPackage[Index]++;
-      }
-    }
+  ThreadCountPerPackage = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
+  ASSERT (ThreadCountPerPackage != NULL);
+  CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerPackage;
+
+  ThreadCountPerCore = AllocateZeroPool (sizeof (UINT8) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount);
+  ASSERT (ThreadCountPerCore != NULL);
+  CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerCore;
+
+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
+    ThreadCountPerPackage[Location->Package]++;
+    ThreadCountPerCore[Location->Package * CpuStatus->MaxCoreCount + Location->Core]++;
   }
-  FreePool (CoresVisited);
 
-  for (Index = 0; Index <= Package; Index++) {
-    DEBUG ((DEBUG_INFO, "Package: %d, Valid Core : %d\n", Index, ValidCoreCountPerPackage[Index]));
+  for (PackageIndex = 0; PackageIndex < CpuStatus->PackageCount; PackageIndex++) {
+    if (ThreadCountPerPackage[PackageIndex] != 0) {
+      DEBUG ((DEBUG_INFO, "P%02d: Thread Count = %d\n", PackageIndex, ThreadCountPerPackage[PackageIndex]));
+      for (CoreIndex = 0; CoreIndex < CpuStatus->MaxCoreCount; CoreIndex++) {
+        if (ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex] != 0) {
+          DEBUG ((
+            DEBUG_INFO, "  P%02d C%04d, Thread Count = %d\n", PackageIndex, CoreIndex, 
+            ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex]
+            ));
+        }
+      }
+    }
   }
 
   CpuFeaturesData->CpuFlags.CoreSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);
@@ -894,11 +890,11 @@ ProgramProcessorRegister (
   CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;
   volatile UINT32           *SemaphorePtr;
   UINT32                    FirstThread;
-  UINT32                    PackageThreadsCount;
   UINT32                    CurrentThread;
+  UINT32                    CurrentCore;
   UINTN                     ProcessorIndex;
-  UINTN                     ValidThreadCount;
-  UINT32                    *ValidCoreCountPerPackage;
+  UINT32                    *ThreadCountPerPackage;
+  UINT8                     *ThreadCountPerCore;
   EFI_STATUS                Status;
   UINT64                    CurrentValue;
 
@@ -1029,28 +1025,44 @@ ProgramProcessorRegister (
       switch (RegisterTableEntry->Value) {
       case CoreDepType:
         SemaphorePtr = CpuFlags->CoreSemaphoreCount;
+        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
+
+        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
         //
         // Get Offset info for the first thread in the core which current thread belongs to.
         //
-        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
+        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;
         CurrentThread = FirstThread + ApLocation->Thread;
+
         //
-        // First Notify all threads in current Core that this thread has ready.
+        // Different cores may have different valid threads in them. If driver maintail clearly
+        // thread index in different cores, the logic will be much complicated.
+        // Here driver just simply records the max thread number in all cores and use it as expect
+        // thread number for all cores.
+        // In below two steps logic, first current thread will Release semaphore for each thread
+        // in current core. Maybe some threads are not valid in this core, but driver don't
+        // care. Second, driver will let current thread wait semaphore for all valid threads in
+        // current core. Because only the valid threads will do release semaphore for this
+        // thread, driver here only need to wait the valid thread count.
+        //
+
+        //
+        // First Notify ALL THREADs in current Core that this thread is ready.
         //
         for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
-          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
         }
         //
-        // Second, check whether all valid threads in current core have ready.
+        // Second, check whether all VALID THREADs (not all threads) in current core are ready.
         //
-        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
           LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
         }
         break;
 
       case PackageDepType:
         SemaphorePtr = CpuFlags->PackageSemaphoreCount;
-        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
+        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
         //
         // Get Offset info for the first thread in the package which current thread belongs to.
         //
@@ -1058,18 +1070,13 @@ ProgramProcessorRegister (
         //
         // Get the possible threads count for current package.
         //
-        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
         CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
-        //
-        // Get the valid thread count for current package.
-        //
-        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
 
         //
-        // Different packages may have different valid cores in them. If driver maintail clearly
-        // cores number in different packages, the logic will be much complicated.
-        // Here driver just simply records the max core number in all packages and use it as expect
-        // core number for all packages.
+        // Different packages may have different valid threads in them. If driver maintail clearly
+        // thread index in different packages, the logic will be much complicated.
+        // Here driver just simply records the max thread number in all packages and use it as expect
+        // thread number for all packages.
         // In below two steps logic, first current thread will Release semaphore for each thread
         // in current package. Maybe some threads are not valid in this package, but driver don't
         // care. Second, driver will let current thread wait semaphore for all valid threads in
@@ -1078,15 +1085,15 @@ ProgramProcessorRegister (
         //
 
         //
-        // First Notify ALL THREADS in current package that this thread has ready.
+        // First Notify ALL THREADS in current package that this thread is ready.
         //
-        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
-          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
+        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
         }
         //
-        // Second, check whether VALID THREADS (not all threads) in current package have ready.
+        // Second, check whether VALID THREADS (not all threads) in current package are ready.
         //
-        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
           LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
         }
         break;
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
index 29e9ba92b4..9592430636 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
@@ -1,7 +1,7 @@
 /** @file
 Code for Processor S3 restoration
 
-Copyright (c) 2006 - 2019, Intel Corporation. All rights reserved.<BR>
+Copyright (c) 2006 - 2020, Intel Corporation. All rights reserved.<BR>
 SPDX-License-Identifier: BSD-2-Clause-Patent
 
 **/
@@ -235,11 +235,11 @@ ProgramProcessorRegister (
   CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;
   volatile UINT32           *SemaphorePtr;
   UINT32                    FirstThread;
-  UINT32                    PackageThreadsCount;
   UINT32                    CurrentThread;
+  UINT32                    CurrentCore;
   UINTN                     ProcessorIndex;
-  UINTN                     ValidThreadCount;
-  UINT32                    *ValidCoreCountPerPackage;
+  UINT32                    *ThreadCountPerPackage;
+  UINT8                     *ThreadCountPerCore;
   EFI_STATUS                Status;
   UINT64                    CurrentValue;
 
@@ -372,35 +372,52 @@ ProgramProcessorRegister (
       //
       ASSERT (
         (ApLocation != NULL) &&
-        (CpuStatus->ValidCoreCountPerPackage != 0) &&
+        (CpuStatus->ThreadCountPerPackage != 0) &&
+        (CpuStatus->ThreadCountPerCore != 0) &&
         (CpuFlags->CoreSemaphoreCount != NULL) &&
         (CpuFlags->PackageSemaphoreCount != NULL)
         );
       switch (RegisterTableEntry->Value) {
       case CoreDepType:
         SemaphorePtr = CpuFlags->CoreSemaphoreCount;
+        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
+
+        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
         //
         // Get Offset info for the first thread in the core which current thread belongs to.
         //
-        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
+        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;
         CurrentThread = FirstThread + ApLocation->Thread;
+
         //
-        // First Notify all threads in current Core that this thread has ready.
+        // Different cores may have different valid threads in them. If driver maintail clearly
+        // thread index in different cores, the logic will be much complicated.
+        // Here driver just simply records the max thread number in all cores and use it as expect
+        // thread number for all cores.
+        // In below two steps logic, first current thread will Release semaphore for each thread
+        // in current core. Maybe some threads are not valid in this core, but driver don't
+        // care. Second, driver will let current thread wait semaphore for all valid threads in
+        // current core. Because only the valid threads will do release semaphore for this
+        // thread, driver here only need to wait the valid thread count.
+        //
+
+        //
+        // First Notify ALL THREADs in current Core that this thread is ready.
         //
         for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
           S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
         }
         //
-        // Second, check whether all valid threads in current core have ready.
+        // Second, check whether all VALID THREADs (not all threads) in current core are ready.
         //
-        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
           S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
         }
         break;
 
       case PackageDepType:
         SemaphorePtr = CpuFlags->PackageSemaphoreCount;
-        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
+        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
         //
         // Get Offset info for the first thread in the package which current thread belongs to.
         //
@@ -408,18 +425,13 @@ ProgramProcessorRegister (
         //
         // Get the possible threads count for current package.
         //
-        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
         CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
-        //
-        // Get the valid thread count for current package.
-        //
-        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
 
         //
-        // Different packages may have different valid cores in them. If driver maintail clearly
-        // cores number in different packages, the logic will be much complicated.
-        // Here driver just simply records the max core number in all packages and use it as expect
-        // core number for all packages.
+        // Different packages may have different valid threads in them. If driver maintail clearly
+        // thread index in different packages, the logic will be much complicated.
+        // Here driver just simply records the max thread number in all packages and use it as expect
+        // thread number for all packages.
         // In below two steps logic, first current thread will Release semaphore for each thread
         // in current package. Maybe some threads are not valid in this package, but driver don't
         // care. Second, driver will let current thread wait semaphore for all valid threads in
@@ -428,15 +440,15 @@ ProgramProcessorRegister (
         //
 
         //
-        // First Notify all threads in current package that this thread has ready.
+        // First Notify ALL THREADS in current package that this thread is ready.
         //
-        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
+        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
           S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
         }
         //
-        // Second, check whether all valid threads in current package have ready.
+        // Second, check whether VALID THREADS (not all threads) in current package are ready.
         //
-        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
           S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
         }
         break;
@@ -1059,12 +1071,19 @@ GetAcpiCpuData (
 
   CpuStatus = &mAcpiCpuData.CpuStatus;
   CopyMem (CpuStatus, &AcpiCpuData->CpuStatus, sizeof (CPU_STATUS_INFORMATION));
-  if (AcpiCpuData->CpuStatus.ValidCoreCountPerPackage != 0) {
-    CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
+  if (AcpiCpuData->CpuStatus.ThreadCountPerPackage != 0) {
+    CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
                                             sizeof (UINT32) * CpuStatus->PackageCount,
-                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ValidCoreCountPerPackage
+                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerPackage
+                                            );
+    ASSERT (CpuStatus->ThreadCountPerPackage != 0);
+  }
+  if (AcpiCpuData->CpuStatus.ThreadCountPerCore != 0) {
+    CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
+                                            sizeof (UINT8) * (CpuStatus->PackageCount * CpuStatus->MaxCoreCount),
+                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerCore
                                             );
-    ASSERT (CpuStatus->ValidCoreCountPerPackage != 0);
+    ASSERT (CpuStatus->ThreadCountPerCore != 0);
   }
   if (AcpiCpuData->ApLocation != 0) {
     mAcpiCpuData.ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
-- 
2.27.0.windows.1



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#68173): https://edk2.groups.io/g/devel/message/68173
Mute This Topic: https://groups.io/mt/78657549/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-


Re: [edk2-devel] [PATCH] UefiCpuPkg/Feature: Support different thread count per core
Posted by Laszlo Ersek 3 years, 4 months ago
On 12/02/20 12:55, Ray Ni wrote:
> Today's code assumes every core contains the same number of threads.
> It's not always TRUE for certain model.
> Such assumption causes system hang when thread count per core
> is different and there is core or package dependency between CPU
> features (using CPU_FEATURE_CORE_BEFORE/AFTER,
> CPU_FEATURE_PACKAGE_BEFORE/AFTER).
> 
> The change removes such assumption by calculating the actual thread
> count per package and per core.
> 
> Signed-off-by: Ray Ni <ray.ni@intel.com>
> Cc: Eric Dong <eric.dong@intel.com>
> Cc: Yun Lou <yun.lou@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> ---
>  UefiCpuPkg/Include/AcpiCpuData.h              |  16 ++-
>  .../CpuFeaturesInitialize.c                   | 113 ++++++++++--------
>  UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c             |  73 ++++++-----
>  3 files changed, 119 insertions(+), 83 deletions(-)

Acked-by: Laszlo Ersek <lersek@redhat.com>

Thanks
Laszlo

> diff --git a/UefiCpuPkg/Include/AcpiCpuData.h b/UefiCpuPkg/Include/AcpiCpuData.h
> index 77da5d4455..b5a69ad80c 100644
> --- a/UefiCpuPkg/Include/AcpiCpuData.h
> +++ b/UefiCpuPkg/Include/AcpiCpuData.h
> @@ -1,7 +1,7 @@
>  /** @file
>  Definitions for CPU S3 data.
>  
> -Copyright (c) 2013 - 2018, Intel Corporation. All rights reserved.<BR>
> +Copyright (c) 2013 - 2020, Intel Corporation. All rights reserved.<BR>
>  SPDX-License-Identifier: BSD-2-Clause-Patent
>  
>  **/
> @@ -60,14 +60,24 @@ typedef struct {
>    UINT32                      MaxThreadCount;
>    //
>    // This field points to an array.
> -  // This array saves valid core count (type UINT32) of each package.
> +  // This array saves thread count (type UINT32) of each package.
>    // The array has PackageCount elements.
>    //
>    // If the platform does not support MSR setting at S3 resume, and
>    // therefore it doesn't need the dependency semaphores, it should set
>    // this field to 0.
>    //
> -  EFI_PHYSICAL_ADDRESS        ValidCoreCountPerPackage;
> +  EFI_PHYSICAL_ADDRESS        ThreadCountPerPackage;
> +  //
> +  // This field points to an array.
> +  // This array saves thread count (type UINT8) of each core.
> +  // The array has PackageCount * MaxCoreCount elements.
> +  //
> +  // If the platform does not support MSR setting at S3 resume, and
> +  // therefore it doesn't need the dependency semaphores, it should set
> +  // this field to 0.
> +  //
> +  EFI_PHYSICAL_ADDRESS        ThreadCountPerCore;
>  } CPU_STATUS_INFORMATION;
>  
>  //
> diff --git a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> index 5c673fa8cf..0cce909cc0 100644
> --- a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> +++ b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> @@ -103,14 +103,13 @@ CpuInitDataInitialize (
>    UINT32                               Package;
>    UINT32                               Thread;
>    EFI_CPU_PHYSICAL_LOCATION            *Location;
> -  BOOLEAN                              *CoresVisited;
> -  UINTN                                Index;
>    UINT32                               PackageIndex;
>    UINT32                               CoreIndex;
>    UINT32                               First;
>    ACPI_CPU_DATA                        *AcpiCpuData;
>    CPU_STATUS_INFORMATION               *CpuStatus;
> -  UINT32                               *ValidCoreCountPerPackage;
> +  UINT32                               *ThreadCountPerPackage;
> +  UINT8                                *ThreadCountPerCore;
>    UINTN                                NumberOfCpus;
>    UINTN                                NumberOfEnabledProcessors;
>  
> @@ -202,35 +201,32 @@ CpuInitDataInitialize (
>    //
>    // Collect valid core count in each package because not all cores are valid.
>    //
> -  ValidCoreCountPerPackage= AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
> -  ASSERT (ValidCoreCountPerPackage != 0);
> -  CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ValidCoreCountPerPackage;
> -  CoresVisited = AllocatePool (sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
> -  ASSERT (CoresVisited != NULL);
> -
> -  for (Index = 0; Index < CpuStatus->PackageCount; Index ++ ) {
> -    ZeroMem (CoresVisited, sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
> -    //
> -    // Collect valid cores in Current package.
> -    //
> -    for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
> -      Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
> -      if (Location->Package == Index && !CoresVisited[Location->Core] ) {
> -        //
> -        // The ValidCores position for Location->Core is valid.
> -        // The possible values in ValidCores[Index] are 0 or 1.
> -        // FALSE means no valid threads in this Core.
> -        // TRUE means have valid threads in this core, no matter the thead count is 1 or more.
> -        //
> -        CoresVisited[Location->Core] = TRUE;
> -        ValidCoreCountPerPackage[Index]++;
> -      }
> -    }
> +  ThreadCountPerPackage = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
> +  ASSERT (ThreadCountPerPackage != NULL);
> +  CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerPackage;
> +
> +  ThreadCountPerCore = AllocateZeroPool (sizeof (UINT8) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount);
> +  ASSERT (ThreadCountPerCore != NULL);
> +  CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerCore;
> +
> +  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
> +    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
> +    ThreadCountPerPackage[Location->Package]++;
> +    ThreadCountPerCore[Location->Package * CpuStatus->MaxCoreCount + Location->Core]++;
>    }
> -  FreePool (CoresVisited);
>  
> -  for (Index = 0; Index <= Package; Index++) {
> -    DEBUG ((DEBUG_INFO, "Package: %d, Valid Core : %d\n", Index, ValidCoreCountPerPackage[Index]));
> +  for (PackageIndex = 0; PackageIndex < CpuStatus->PackageCount; PackageIndex++) {
> +    if (ThreadCountPerPackage[PackageIndex] != 0) {
> +      DEBUG ((DEBUG_INFO, "P%02d: Thread Count = %d\n", PackageIndex, ThreadCountPerPackage[PackageIndex]));
> +      for (CoreIndex = 0; CoreIndex < CpuStatus->MaxCoreCount; CoreIndex++) {
> +        if (ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex] != 0) {
> +          DEBUG ((
> +            DEBUG_INFO, "  P%02d C%04d, Thread Count = %d\n", PackageIndex, CoreIndex, 
> +            ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex]
> +            ));
> +        }
> +      }
> +    }
>    }
>  
>    CpuFeaturesData->CpuFlags.CoreSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);
> @@ -894,11 +890,11 @@ ProgramProcessorRegister (
>    CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;
>    volatile UINT32           *SemaphorePtr;
>    UINT32                    FirstThread;
> -  UINT32                    PackageThreadsCount;
>    UINT32                    CurrentThread;
> +  UINT32                    CurrentCore;
>    UINTN                     ProcessorIndex;
> -  UINTN                     ValidThreadCount;
> -  UINT32                    *ValidCoreCountPerPackage;
> +  UINT32                    *ThreadCountPerPackage;
> +  UINT8                     *ThreadCountPerCore;
>    EFI_STATUS                Status;
>    UINT64                    CurrentValue;
>  
> @@ -1029,28 +1025,44 @@ ProgramProcessorRegister (
>        switch (RegisterTableEntry->Value) {
>        case CoreDepType:
>          SemaphorePtr = CpuFlags->CoreSemaphoreCount;
> +        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
> +
> +        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
>          //
>          // Get Offset info for the first thread in the core which current thread belongs to.
>          //
> -        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
> +        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;
>          CurrentThread = FirstThread + ApLocation->Thread;
> +
>          //
> -        // First Notify all threads in current Core that this thread has ready.
> +        // Different cores may have different valid threads in them. If driver maintail clearly
> +        // thread index in different cores, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all cores and use it as expect
> +        // thread number for all cores.
> +        // In below two steps logic, first current thread will Release semaphore for each thread
> +        // in current core. Maybe some threads are not valid in this core, but driver don't
> +        // care. Second, driver will let current thread wait semaphore for all valid threads in
> +        // current core. Because only the valid threads will do release semaphore for this
> +        // thread, driver here only need to wait the valid thread count.
> +        //
> +
> +        //
> +        // First Notify ALL THREADs in current Core that this thread is ready.
>          //
>          for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> -          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
> +          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether all valid threads in current core have ready.
> +        // Second, check whether all VALID THREADs (not all threads) in current core are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
>            LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
>  
>        case PackageDepType:
>          SemaphorePtr = CpuFlags->PackageSemaphoreCount;
> -        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
> +        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
>          //
>          // Get Offset info for the first thread in the package which current thread belongs to.
>          //
> @@ -1058,18 +1070,13 @@ ProgramProcessorRegister (
>          //
>          // Get the possible threads count for current package.
>          //
> -        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
>          CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
> -        //
> -        // Get the valid thread count for current package.
> -        //
> -        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
>  
>          //
> -        // Different packages may have different valid cores in them. If driver maintail clearly
> -        // cores number in different packages, the logic will be much complicated.
> -        // Here driver just simply records the max core number in all packages and use it as expect
> -        // core number for all packages.
> +        // Different packages may have different valid threads in them. If driver maintail clearly
> +        // thread index in different packages, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all packages and use it as expect
> +        // thread number for all packages.
>          // In below two steps logic, first current thread will Release semaphore for each thread
>          // in current package. Maybe some threads are not valid in this package, but driver don't
>          // care. Second, driver will let current thread wait semaphore for all valid threads in
> @@ -1078,15 +1085,15 @@ ProgramProcessorRegister (
>          //
>  
>          //
> -        // First Notify ALL THREADS in current package that this thread has ready.
> +        // First Notify ALL THREADS in current package that this thread is ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
> -          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
> +        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
> +          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether VALID THREADS (not all threads) in current package have ready.
> +        // Second, check whether VALID THREADS (not all threads) in current package are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
>            LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> index 29e9ba92b4..9592430636 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> @@ -1,7 +1,7 @@
>  /** @file
>  Code for Processor S3 restoration
>  
> -Copyright (c) 2006 - 2019, Intel Corporation. All rights reserved.<BR>
> +Copyright (c) 2006 - 2020, Intel Corporation. All rights reserved.<BR>
>  SPDX-License-Identifier: BSD-2-Clause-Patent
>  
>  **/
> @@ -235,11 +235,11 @@ ProgramProcessorRegister (
>    CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;
>    volatile UINT32           *SemaphorePtr;
>    UINT32                    FirstThread;
> -  UINT32                    PackageThreadsCount;
>    UINT32                    CurrentThread;
> +  UINT32                    CurrentCore;
>    UINTN                     ProcessorIndex;
> -  UINTN                     ValidThreadCount;
> -  UINT32                    *ValidCoreCountPerPackage;
> +  UINT32                    *ThreadCountPerPackage;
> +  UINT8                     *ThreadCountPerCore;
>    EFI_STATUS                Status;
>    UINT64                    CurrentValue;
>  
> @@ -372,35 +372,52 @@ ProgramProcessorRegister (
>        //
>        ASSERT (
>          (ApLocation != NULL) &&
> -        (CpuStatus->ValidCoreCountPerPackage != 0) &&
> +        (CpuStatus->ThreadCountPerPackage != 0) &&
> +        (CpuStatus->ThreadCountPerCore != 0) &&
>          (CpuFlags->CoreSemaphoreCount != NULL) &&
>          (CpuFlags->PackageSemaphoreCount != NULL)
>          );
>        switch (RegisterTableEntry->Value) {
>        case CoreDepType:
>          SemaphorePtr = CpuFlags->CoreSemaphoreCount;
> +        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
> +
> +        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
>          //
>          // Get Offset info for the first thread in the core which current thread belongs to.
>          //
> -        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
> +        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;
>          CurrentThread = FirstThread + ApLocation->Thread;
> +
>          //
> -        // First Notify all threads in current Core that this thread has ready.
> +        // Different cores may have different valid threads in them. If driver maintail clearly
> +        // thread index in different cores, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all cores and use it as expect
> +        // thread number for all cores.
> +        // In below two steps logic, first current thread will Release semaphore for each thread
> +        // in current core. Maybe some threads are not valid in this core, but driver don't
> +        // care. Second, driver will let current thread wait semaphore for all valid threads in
> +        // current core. Because only the valid threads will do release semaphore for this
> +        // thread, driver here only need to wait the valid thread count.
> +        //
> +
> +        //
> +        // First Notify ALL THREADs in current Core that this thread is ready.
>          //
>          for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
>            S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether all valid threads in current core have ready.
> +        // Second, check whether all VALID THREADs (not all threads) in current core are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
>            S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
>  
>        case PackageDepType:
>          SemaphorePtr = CpuFlags->PackageSemaphoreCount;
> -        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
> +        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
>          //
>          // Get Offset info for the first thread in the package which current thread belongs to.
>          //
> @@ -408,18 +425,13 @@ ProgramProcessorRegister (
>          //
>          // Get the possible threads count for current package.
>          //
> -        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
>          CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
> -        //
> -        // Get the valid thread count for current package.
> -        //
> -        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
>  
>          //
> -        // Different packages may have different valid cores in them. If driver maintail clearly
> -        // cores number in different packages, the logic will be much complicated.
> -        // Here driver just simply records the max core number in all packages and use it as expect
> -        // core number for all packages.
> +        // Different packages may have different valid threads in them. If driver maintail clearly
> +        // thread index in different packages, the logic will be much complicated.
> +        // Here driver just simply records the max thread number in all packages and use it as expect
> +        // thread number for all packages.
>          // In below two steps logic, first current thread will Release semaphore for each thread
>          // in current package. Maybe some threads are not valid in this package, but driver don't
>          // care. Second, driver will let current thread wait semaphore for all valid threads in
> @@ -428,15 +440,15 @@ ProgramProcessorRegister (
>          //
>  
>          //
> -        // First Notify all threads in current package that this thread has ready.
> +        // First Notify ALL THREADS in current package that this thread is ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
>            S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
>          }
>          //
> -        // Second, check whether all valid threads in current package have ready.
> +        // Second, check whether VALID THREADS (not all threads) in current package are ready.
>          //
> -        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
> +        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
>            S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
>          }
>          break;
> @@ -1059,12 +1071,19 @@ GetAcpiCpuData (
>  
>    CpuStatus = &mAcpiCpuData.CpuStatus;
>    CopyMem (CpuStatus, &AcpiCpuData->CpuStatus, sizeof (CPU_STATUS_INFORMATION));
> -  if (AcpiCpuData->CpuStatus.ValidCoreCountPerPackage != 0) {
> -    CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> +  if (AcpiCpuData->CpuStatus.ThreadCountPerPackage != 0) {
> +    CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
>                                              sizeof (UINT32) * CpuStatus->PackageCount,
> -                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ValidCoreCountPerPackage
> +                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerPackage
> +                                            );
> +    ASSERT (CpuStatus->ThreadCountPerPackage != 0);
> +  }
> +  if (AcpiCpuData->CpuStatus.ThreadCountPerCore != 0) {
> +    CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> +                                            sizeof (UINT8) * (CpuStatus->PackageCount * CpuStatus->MaxCoreCount),
> +                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerCore
>                                              );
> -    ASSERT (CpuStatus->ValidCoreCountPerPackage != 0);
> +    ASSERT (CpuStatus->ThreadCountPerCore != 0);
>    }
>    if (AcpiCpuData->ApLocation != 0) {
>      mAcpiCpuData.ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> 



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#68234): https://edk2.groups.io/g/devel/message/68234
Mute This Topic: https://groups.io/mt/78657549/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-


Re: [edk2-devel] [PATCH] UefiCpuPkg/Feature: Support different thread count per core
Posted by Dong, Eric 3 years, 4 months ago
Reviewed-by: Eric Dong <eric.dong@intel.com>

-----Original Message-----
From: Ni, Ray <ray.ni@intel.com> 
Sent: Wednesday, December 2, 2020 7:55 PM
To: devel@edk2.groups.io
Cc: Dong, Eric <eric.dong@intel.com>; Lou, Yun <yun.lou@intel.com>; Laszlo Ersek <lersek@redhat.com>
Subject: [PATCH] UefiCpuPkg/Feature: Support different thread count per core

Today's code assumes every core contains the same number of threads.
It's not always TRUE for certain model.
Such assumption causes system hang when thread count per core
is different and there is core or package dependency between CPU
features (using CPU_FEATURE_CORE_BEFORE/AFTER,
CPU_FEATURE_PACKAGE_BEFORE/AFTER).

The change removes such assumption by calculating the actual thread
count per package and per core.

Signed-off-by: Ray Ni <ray.ni@intel.com>
Cc: Eric Dong <eric.dong@intel.com>
Cc: Yun Lou <yun.lou@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
---
 UefiCpuPkg/Include/AcpiCpuData.h              |  16 ++-
 .../CpuFeaturesInitialize.c                   | 113 ++++++++++--------
 UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c             |  73 ++++++-----
 3 files changed, 119 insertions(+), 83 deletions(-)

diff --git a/UefiCpuPkg/Include/AcpiCpuData.h b/UefiCpuPkg/Include/AcpiCpuData.h
index 77da5d4455..b5a69ad80c 100644
--- a/UefiCpuPkg/Include/AcpiCpuData.h
+++ b/UefiCpuPkg/Include/AcpiCpuData.h
@@ -1,7 +1,7 @@
 /** @file

 Definitions for CPU S3 data.

 

-Copyright (c) 2013 - 2018, Intel Corporation. All rights reserved.<BR>

+Copyright (c) 2013 - 2020, Intel Corporation. All rights reserved.<BR>

 SPDX-License-Identifier: BSD-2-Clause-Patent

 

 **/

@@ -60,14 +60,24 @@ typedef struct {
   UINT32                      MaxThreadCount;

   //

   // This field points to an array.

-  // This array saves valid core count (type UINT32) of each package.

+  // This array saves thread count (type UINT32) of each package.

   // The array has PackageCount elements.

   //

   // If the platform does not support MSR setting at S3 resume, and

   // therefore it doesn't need the dependency semaphores, it should set

   // this field to 0.

   //

-  EFI_PHYSICAL_ADDRESS        ValidCoreCountPerPackage;

+  EFI_PHYSICAL_ADDRESS        ThreadCountPerPackage;

+  //

+  // This field points to an array.

+  // This array saves thread count (type UINT8) of each core.

+  // The array has PackageCount * MaxCoreCount elements.

+  //

+  // If the platform does not support MSR setting at S3 resume, and

+  // therefore it doesn't need the dependency semaphores, it should set

+  // this field to 0.

+  //

+  EFI_PHYSICAL_ADDRESS        ThreadCountPerCore;

 } CPU_STATUS_INFORMATION;

 

 //

diff --git a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
index 5c673fa8cf..0cce909cc0 100644
--- a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
+++ b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
@@ -103,14 +103,13 @@ CpuInitDataInitialize (
   UINT32                               Package;

   UINT32                               Thread;

   EFI_CPU_PHYSICAL_LOCATION            *Location;

-  BOOLEAN                              *CoresVisited;

-  UINTN                                Index;

   UINT32                               PackageIndex;

   UINT32                               CoreIndex;

   UINT32                               First;

   ACPI_CPU_DATA                        *AcpiCpuData;

   CPU_STATUS_INFORMATION               *CpuStatus;

-  UINT32                               *ValidCoreCountPerPackage;

+  UINT32                               *ThreadCountPerPackage;

+  UINT8                                *ThreadCountPerCore;

   UINTN                                NumberOfCpus;

   UINTN                                NumberOfEnabledProcessors;

 

@@ -202,35 +201,32 @@ CpuInitDataInitialize (
   //

   // Collect valid core count in each package because not all cores are valid.

   //

-  ValidCoreCountPerPackage= AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);

-  ASSERT (ValidCoreCountPerPackage != 0);

-  CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ValidCoreCountPerPackage;

-  CoresVisited = AllocatePool (sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);

-  ASSERT (CoresVisited != NULL);

-

-  for (Index = 0; Index < CpuStatus->PackageCount; Index ++ ) {

-    ZeroMem (CoresVisited, sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);

-    //

-    // Collect valid cores in Current package.

-    //

-    for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {

-      Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;

-      if (Location->Package == Index && !CoresVisited[Location->Core] ) {

-        //

-        // The ValidCores position for Location->Core is valid.

-        // The possible values in ValidCores[Index] are 0 or 1.

-        // FALSE means no valid threads in this Core.

-        // TRUE means have valid threads in this core, no matter the thead count is 1 or more.

-        //

-        CoresVisited[Location->Core] = TRUE;

-        ValidCoreCountPerPackage[Index]++;

-      }

-    }

+  ThreadCountPerPackage = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);

+  ASSERT (ThreadCountPerPackage != NULL);

+  CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerPackage;

+

+  ThreadCountPerCore = AllocateZeroPool (sizeof (UINT8) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount);

+  ASSERT (ThreadCountPerCore != NULL);

+  CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerCore;

+

+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {

+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;

+    ThreadCountPerPackage[Location->Package]++;

+    ThreadCountPerCore[Location->Package * CpuStatus->MaxCoreCount + Location->Core]++;

   }

-  FreePool (CoresVisited);

 

-  for (Index = 0; Index <= Package; Index++) {

-    DEBUG ((DEBUG_INFO, "Package: %d, Valid Core : %d\n", Index, ValidCoreCountPerPackage[Index]));

+  for (PackageIndex = 0; PackageIndex < CpuStatus->PackageCount; PackageIndex++) {

+    if (ThreadCountPerPackage[PackageIndex] != 0) {

+      DEBUG ((DEBUG_INFO, "P%02d: Thread Count = %d\n", PackageIndex, ThreadCountPerPackage[PackageIndex]));

+      for (CoreIndex = 0; CoreIndex < CpuStatus->MaxCoreCount; CoreIndex++) {

+        if (ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex] != 0) {

+          DEBUG ((

+            DEBUG_INFO, "  P%02d C%04d, Thread Count = %d\n", PackageIndex, CoreIndex, 

+            ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex]

+            ));

+        }

+      }

+    }

   }

 

   CpuFeaturesData->CpuFlags.CoreSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);

@@ -894,11 +890,11 @@ ProgramProcessorRegister (
   CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;

   volatile UINT32           *SemaphorePtr;

   UINT32                    FirstThread;

-  UINT32                    PackageThreadsCount;

   UINT32                    CurrentThread;

+  UINT32                    CurrentCore;

   UINTN                     ProcessorIndex;

-  UINTN                     ValidThreadCount;

-  UINT32                    *ValidCoreCountPerPackage;

+  UINT32                    *ThreadCountPerPackage;

+  UINT8                     *ThreadCountPerCore;

   EFI_STATUS                Status;

   UINT64                    CurrentValue;

 

@@ -1029,28 +1025,44 @@ ProgramProcessorRegister (
       switch (RegisterTableEntry->Value) {

       case CoreDepType:

         SemaphorePtr = CpuFlags->CoreSemaphoreCount;

+        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;

+

+        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;

         //

         // Get Offset info for the first thread in the core which current thread belongs to.

         //

-        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;

+        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;

         CurrentThread = FirstThread + ApLocation->Thread;

+

         //

-        // First Notify all threads in current Core that this thread has ready.

+        // Different cores may have different valid threads in them. If driver maintail clearly

+        // thread index in different cores, the logic will be much complicated.

+        // Here driver just simply records the max thread number in all cores and use it as expect

+        // thread number for all cores.

+        // In below two steps logic, first current thread will Release semaphore for each thread

+        // in current core. Maybe some threads are not valid in this core, but driver don't

+        // care. Second, driver will let current thread wait semaphore for all valid threads in

+        // current core. Because only the valid threads will do release semaphore for this

+        // thread, driver here only need to wait the valid thread count.

+        //

+

+        //

+        // First Notify ALL THREADs in current Core that this thread is ready.

         //

         for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {

-          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);

+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);

         }

         //

-        // Second, check whether all valid threads in current core have ready.

+        // Second, check whether all VALID THREADs (not all threads) in current core are ready.

         //

-        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {

+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {

           LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);

         }

         break;

 

       case PackageDepType:

         SemaphorePtr = CpuFlags->PackageSemaphoreCount;

-        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;

+        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;

         //

         // Get Offset info for the first thread in the package which current thread belongs to.

         //

@@ -1058,18 +1070,13 @@ ProgramProcessorRegister (
         //

         // Get the possible threads count for current package.

         //

-        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;

         CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;

-        //

-        // Get the valid thread count for current package.

-        //

-        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];

 

         //

-        // Different packages may have different valid cores in them. If driver maintail clearly

-        // cores number in different packages, the logic will be much complicated.

-        // Here driver just simply records the max core number in all packages and use it as expect

-        // core number for all packages.

+        // Different packages may have different valid threads in them. If driver maintail clearly

+        // thread index in different packages, the logic will be much complicated.

+        // Here driver just simply records the max thread number in all packages and use it as expect

+        // thread number for all packages.

         // In below two steps logic, first current thread will Release semaphore for each thread

         // in current package. Maybe some threads are not valid in this package, but driver don't

         // care. Second, driver will let current thread wait semaphore for all valid threads in

@@ -1078,15 +1085,15 @@ ProgramProcessorRegister (
         //

 

         //

-        // First Notify ALL THREADS in current package that this thread has ready.

+        // First Notify ALL THREADS in current package that this thread is ready.

         //

-        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {

-          LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);

+        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {

+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);

         }

         //

-        // Second, check whether VALID THREADS (not all threads) in current package have ready.

+        // Second, check whether VALID THREADS (not all threads) in current package are ready.

         //

-        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {

+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {

           LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);

         }

         break;

diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
index 29e9ba92b4..9592430636 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
@@ -1,7 +1,7 @@
 /** @file

 Code for Processor S3 restoration

 

-Copyright (c) 2006 - 2019, Intel Corporation. All rights reserved.<BR>

+Copyright (c) 2006 - 2020, Intel Corporation. All rights reserved.<BR>

 SPDX-License-Identifier: BSD-2-Clause-Patent

 

 **/

@@ -235,11 +235,11 @@ ProgramProcessorRegister (
   CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;

   volatile UINT32           *SemaphorePtr;

   UINT32                    FirstThread;

-  UINT32                    PackageThreadsCount;

   UINT32                    CurrentThread;

+  UINT32                    CurrentCore;

   UINTN                     ProcessorIndex;

-  UINTN                     ValidThreadCount;

-  UINT32                    *ValidCoreCountPerPackage;

+  UINT32                    *ThreadCountPerPackage;

+  UINT8                     *ThreadCountPerCore;

   EFI_STATUS                Status;

   UINT64                    CurrentValue;

 

@@ -372,35 +372,52 @@ ProgramProcessorRegister (
       //

       ASSERT (

         (ApLocation != NULL) &&

-        (CpuStatus->ValidCoreCountPerPackage != 0) &&

+        (CpuStatus->ThreadCountPerPackage != 0) &&

+        (CpuStatus->ThreadCountPerCore != 0) &&

         (CpuFlags->CoreSemaphoreCount != NULL) &&

         (CpuFlags->PackageSemaphoreCount != NULL)

         );

       switch (RegisterTableEntry->Value) {

       case CoreDepType:

         SemaphorePtr = CpuFlags->CoreSemaphoreCount;

+        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;

+

+        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;

         //

         // Get Offset info for the first thread in the core which current thread belongs to.

         //

-        FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;

+        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;

         CurrentThread = FirstThread + ApLocation->Thread;

+

         //

-        // First Notify all threads in current Core that this thread has ready.

+        // Different cores may have different valid threads in them. If driver maintail clearly

+        // thread index in different cores, the logic will be much complicated.

+        // Here driver just simply records the max thread number in all cores and use it as expect

+        // thread number for all cores.

+        // In below two steps logic, first current thread will Release semaphore for each thread

+        // in current core. Maybe some threads are not valid in this core, but driver don't

+        // care. Second, driver will let current thread wait semaphore for all valid threads in

+        // current core. Because only the valid threads will do release semaphore for this

+        // thread, driver here only need to wait the valid thread count.

+        //

+

+        //

+        // First Notify ALL THREADs in current Core that this thread is ready.

         //

         for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {

           S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);

         }

         //

-        // Second, check whether all valid threads in current core have ready.

+        // Second, check whether all VALID THREADs (not all threads) in current core are ready.

         //

-        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {

+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {

           S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);

         }

         break;

 

       case PackageDepType:

         SemaphorePtr = CpuFlags->PackageSemaphoreCount;

-        ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;

+        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;

         //

         // Get Offset info for the first thread in the package which current thread belongs to.

         //

@@ -408,18 +425,13 @@ ProgramProcessorRegister (
         //

         // Get the possible threads count for current package.

         //

-        PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;

         CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;

-        //

-        // Get the valid thread count for current package.

-        //

-        ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];

 

         //

-        // Different packages may have different valid cores in them. If driver maintail clearly

-        // cores number in different packages, the logic will be much complicated.

-        // Here driver just simply records the max core number in all packages and use it as expect

-        // core number for all packages.

+        // Different packages may have different valid threads in them. If driver maintail clearly

+        // thread index in different packages, the logic will be much complicated.

+        // Here driver just simply records the max thread number in all packages and use it as expect

+        // thread number for all packages.

         // In below two steps logic, first current thread will Release semaphore for each thread

         // in current package. Maybe some threads are not valid in this package, but driver don't

         // care. Second, driver will let current thread wait semaphore for all valid threads in

@@ -428,15 +440,15 @@ ProgramProcessorRegister (
         //

 

         //

-        // First Notify all threads in current package that this thread has ready.

+        // First Notify ALL THREADS in current package that this thread is ready.

         //

-        for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {

+        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {

           S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);

         }

         //

-        // Second, check whether all valid threads in current package have ready.

+        // Second, check whether VALID THREADS (not all threads) in current package are ready.

         //

-        for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {

+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {

           S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);

         }

         break;

@@ -1059,12 +1071,19 @@ GetAcpiCpuData (
 

   CpuStatus = &mAcpiCpuData.CpuStatus;

   CopyMem (CpuStatus, &AcpiCpuData->CpuStatus, sizeof (CPU_STATUS_INFORMATION));

-  if (AcpiCpuData->CpuStatus.ValidCoreCountPerPackage != 0) {

-    CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (

+  if (AcpiCpuData->CpuStatus.ThreadCountPerPackage != 0) {

+    CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (

                                             sizeof (UINT32) * CpuStatus->PackageCount,

-                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ValidCoreCountPerPackage

+                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerPackage

+                                            );

+    ASSERT (CpuStatus->ThreadCountPerPackage != 0);

+  }

+  if (AcpiCpuData->CpuStatus.ThreadCountPerCore != 0) {

+    CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (

+                                            sizeof (UINT8) * (CpuStatus->PackageCount * CpuStatus->MaxCoreCount),

+                                            (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerCore

                                             );

-    ASSERT (CpuStatus->ValidCoreCountPerPackage != 0);

+    ASSERT (CpuStatus->ThreadCountPerCore != 0);

   }

   if (AcpiCpuData->ApLocation != 0) {

     mAcpiCpuData.ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (

-- 
2.27.0.windows.1



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#68224): https://edk2.groups.io/g/devel/message/68224
Mute This Topic: https://groups.io/mt/78657549/1787277
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [importer@patchew.org]
-=-=-=-=-=-=-=-=-=-=-=-