[PATCH 3/8] i386/cpu: Introduce cache model for SapphireRapids

Zhao Liu posted 8 patches 2 months, 1 week ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Zhao Liu <zhao1.liu@intel.com>
There is a newer version of this series
[PATCH 3/8] i386/cpu: Introduce cache model for SapphireRapids
Posted by Zhao Liu 2 months, 1 week ago
Add the cache model to SapphireRapids (v4) to better emulate its
environment.

The cache model is based on SapphireRapids-SP (Scalable Performance):

      --- cache 0 ---
      cache type                         = data cache (1)
      cache level                        = 0x1 (1)
      self-initializing cache level      = true
      fully associative cache            = false
      maximum IDs for CPUs sharing cache = 0x1 (1)
      maximum IDs for cores in pkg       = 0x3f (63)
      system coherency line size         = 0x40 (64)
      physical line partitions           = 0x1 (1)
      ways of associativity              = 0xc (12)
      number of sets                     = 0x40 (64)
      WBINVD/INVD acts on lower caches   = false
      inclusive to lower caches          = false
      complex cache indexing             = false
      number of sets (s)                 = 64
      (size synth)                       = 49152 (48 KB)
      --- cache 1 ---
      cache type                         = instruction cache (2)
      cache level                        = 0x1 (1)
      self-initializing cache level      = true
      fully associative cache            = false
      maximum IDs for CPUs sharing cache = 0x1 (1)
      maximum IDs for cores in pkg       = 0x3f (63)
      system coherency line size         = 0x40 (64)
      physical line partitions           = 0x1 (1)
      ways of associativity              = 0x8 (8)
      number of sets                     = 0x40 (64)
      WBINVD/INVD acts on lower caches   = false
      inclusive to lower caches          = false
      complex cache indexing             = false
      number of sets (s)                 = 64
      (size synth)                       = 32768 (32 KB)
      --- cache 2 ---
      cache type                         = unified cache (3)
      cache level                        = 0x2 (2)
      self-initializing cache level      = true
      fully associative cache            = false
      maximum IDs for CPUs sharing cache = 0x1 (1)
      maximum IDs for cores in pkg       = 0x3f (63)
      system coherency line size         = 0x40 (64)
      physical line partitions           = 0x1 (1)
      ways of associativity              = 0x10 (16)
      number of sets                     = 0x800 (2048)
      WBINVD/INVD acts on lower caches   = false
      inclusive to lower caches          = false
      complex cache indexing             = false
      number of sets (s)                 = 2048
      (size synth)                       = 2097152 (2 MB)
      --- cache 3 ---
      cache type                         = unified cache (3)
      cache level                        = 0x3 (3)
      self-initializing cache level      = true
      fully associative cache            = false
      maximum IDs for CPUs sharing cache = 0x7f (127)
      maximum IDs for cores in pkg       = 0x3f (63)
      system coherency line size         = 0x40 (64)
      physical line partitions           = 0x1 (1)
      ways of associativity              = 0xf (15)
      number of sets                     = 0x10000 (65536)
      WBINVD/INVD acts on lower caches   = false
      inclusive to lower caches          = false
      complex cache indexing             = true
      number of sets (s)                 = 65536
      (size synth)                       = 62914560 (60 MB)
      --- cache 4 ---
      cache type                         = no more caches (0)

Suggested-by: Tejus GK <tejus.gk@nutanix.com>
Suggested-by: Jason Zeng <jason.zeng@intel.com>
Suggested-by: "Daniel P . Berrangé" <berrange@redhat.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
---
 target/i386/cpu.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b40f1a5b6648..a7f2e5dd3fcb 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2886,6 +2886,97 @@ static const CPUCaches epyc_turin_cache_info = {
     }
 };
 
+static const CPUCaches xeon_spr_cache_info = {
+    .l1d_cache = &(CPUCacheInfo) {
+        /* CPUID 0x4.0x0.EAX */
+        .type = DATA_CACHE,
+        .level = 1,
+        .self_init = true,
+
+        /* CPUID 0x4.0x0.EBX */
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 12,
+
+        /* CPUID 0x4.0x0.ECX */
+        .sets = 64,
+
+        /* CPUID 0x4.0x0.EDX */
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = false,
+
+        .size = 48 * KiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+    },
+    .l1i_cache = &(CPUCacheInfo) {
+        /* CPUID 0x4.0x1.EAX */
+        .type = INSTRUCTION_CACHE,
+        .level = 1,
+        .self_init = true,
+
+        /* CPUID 0x4.0x1.EBX */
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 8,
+
+        /* CPUID 0x4.0x1.ECX */
+        .sets = 64,
+
+        /* CPUID 0x4.0x1.EDX */
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = false,
+
+        .size = 32 * KiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+    },
+    .l2_cache = &(CPUCacheInfo) {
+        /* CPUID 0x4.0x2.EAX */
+        .type = UNIFIED_CACHE,
+        .level = 2,
+        .self_init = true,
+
+        /* CPUID 0x4.0x2.EBX */
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 16,
+
+        /* CPUID 0x4.0x2.ECX */
+        .sets = 2048,
+
+        /* CPUID 0x4.0x2.EDX */
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = false,
+
+        .size = 2 * MiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+    },
+    .l3_cache = &(CPUCacheInfo) {
+        /* CPUID 0x4.0x3.EAX */
+        .type = UNIFIED_CACHE,
+        .level = 3,
+        .self_init = true,
+
+        /* CPUID 0x4.0x3.EBX */
+        .line_size = 64,
+        .partitions = 1,
+        .associativity = 15,
+
+        /* CPUID 0x4.0x3.ECX */
+        .sets = 65536,
+
+        /* CPUID 0x4.0x3.EDX */
+        .no_invd_sharing = false,
+        .inclusive = false,
+        .complex_indexing = true,
+
+        .size = 60 * MiB,
+        .share_level = CPU_TOPOLOGY_LEVEL_SOCKET,
+    },
+};
+
 static const CPUCaches xeon_gnr_cache_info = {
     .l1d_cache = &(CPUCacheInfo) {
         /* CPUID 0x4.0x0.EAX */
@@ -4892,6 +4983,11 @@ static const X86CPUDefinition builtin_x86_defs[] = {
                     { /* end of list */ }
                 }
             },
+            {
+                .version = 4,
+                .note = "with spr-sp cache model",
+                .cache_info = &xeon_spr_cache_info,
+            },
             { /* end of list */ }
         }
     },
-- 
2.34.1


Re: [PATCH 3/8] i386/cpu: Introduce cache model for SapphireRapids
Posted by Tao Su 1 month, 3 weeks ago
On Thu, Jun 26, 2025 at 04:31:00PM +0800, Zhao Liu wrote:
> Add the cache model to SapphireRapids (v4) to better emulate its
> environment.
> 
> The cache model is based on SapphireRapids-SP (Scalable Performance):
> 
>       --- cache 0 ---
>       cache type                         = data cache (1)
>       cache level                        = 0x1 (1)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x1 (1)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0xc (12)
>       number of sets                     = 0x40 (64)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = false
>       number of sets (s)                 = 64
>       (size synth)                       = 49152 (48 KB)
>       --- cache 1 ---
>       cache type                         = instruction cache (2)
>       cache level                        = 0x1 (1)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x1 (1)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0x8 (8)
>       number of sets                     = 0x40 (64)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = false
>       number of sets (s)                 = 64
>       (size synth)                       = 32768 (32 KB)
>       --- cache 2 ---
>       cache type                         = unified cache (3)
>       cache level                        = 0x2 (2)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x1 (1)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0x10 (16)
>       number of sets                     = 0x800 (2048)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = false
>       number of sets (s)                 = 2048
>       (size synth)                       = 2097152 (2 MB)
>       --- cache 3 ---
>       cache type                         = unified cache (3)
>       cache level                        = 0x3 (3)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x7f (127)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0xf (15)
>       number of sets                     = 0x10000 (65536)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = true
>       number of sets (s)                 = 65536
>       (size synth)                       = 62914560 (60 MB)
>       --- cache 4 ---
>       cache type                         = no more caches (0)
> 
> Suggested-by: Tejus GK <tejus.gk@nutanix.com>
> Suggested-by: Jason Zeng <jason.zeng@intel.com>
> Suggested-by: "Daniel P . Berrangé" <berrange@redhat.com>
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> ---
>  target/i386/cpu.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 96 insertions(+)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index b40f1a5b6648..a7f2e5dd3fcb 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -2886,6 +2886,97 @@ static const CPUCaches epyc_turin_cache_info = {
>      }
>  };
>  
> +static const CPUCaches xeon_spr_cache_info = {
> +    .l1d_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x0.EAX */
> +        .type = DATA_CACHE,
> +        .level = 1,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x0.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 12,
> +
> +        /* CPUID 0x4.0x0.ECX */
> +        .sets = 64,
> +
> +        /* CPUID 0x4.0x0.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = false,
> +
> +        .size = 48 * KiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
> +    },
> +    .l1i_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x1.EAX */
> +        .type = INSTRUCTION_CACHE,
> +        .level = 1,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x1.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 8,
> +
> +        /* CPUID 0x4.0x1.ECX */
> +        .sets = 64,
> +
> +        /* CPUID 0x4.0x1.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = false,
> +
> +        .size = 32 * KiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
> +    },
> +    .l2_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x2.EAX */
> +        .type = UNIFIED_CACHE,
> +        .level = 2,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x2.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 16,
> +
> +        /* CPUID 0x4.0x2.ECX */
> +        .sets = 2048,
> +
> +        /* CPUID 0x4.0x2.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = false,
> +
> +        .size = 2 * MiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
> +    },
> +    .l3_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x3.EAX */
> +        .type = UNIFIED_CACHE,
> +        .level = 3,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x3.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 15,
> +
> +        /* CPUID 0x4.0x3.ECX */
> +        .sets = 65536,
> +
> +        /* CPUID 0x4.0x3.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = true,
> +
> +        .size = 60 * MiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_SOCKET,
> +    },
> +};
> +
>  static const CPUCaches xeon_gnr_cache_info = {
>      .l1d_cache = &(CPUCacheInfo) {
>          /* CPUID 0x4.0x0.EAX */
> @@ -4892,6 +4983,11 @@ static const X86CPUDefinition builtin_x86_defs[] = {
>                      { /* end of list */ }
>                  }
>              },
> +            {
> +                .version = 4,
> +                .note = "with spr-sp cache model",
> +                .cache_info = &xeon_spr_cache_info,
> +            },
>              { /* end of list */ }
>          }
>      },

Reviewed-by: Tao Su <tao1.su@linux.intel.com>

> -- 
> 2.34.1
> 
> 
Re: [PATCH 3/8] i386/cpu: Introduce cache model for SapphireRapids
Posted by Mi, Dapeng 2 months ago
On 6/26/2025 4:31 PM, Zhao Liu wrote:
> Add the cache model to SapphireRapids (v4) to better emulate its
> environment.
>
> The cache model is based on SapphireRapids-SP (Scalable Performance):
>
>       --- cache 0 ---
>       cache type                         = data cache (1)
>       cache level                        = 0x1 (1)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x1 (1)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0xc (12)
>       number of sets                     = 0x40 (64)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = false
>       number of sets (s)                 = 64
>       (size synth)                       = 49152 (48 KB)
>       --- cache 1 ---
>       cache type                         = instruction cache (2)
>       cache level                        = 0x1 (1)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x1 (1)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0x8 (8)
>       number of sets                     = 0x40 (64)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = false
>       number of sets (s)                 = 64
>       (size synth)                       = 32768 (32 KB)
>       --- cache 2 ---
>       cache type                         = unified cache (3)
>       cache level                        = 0x2 (2)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x1 (1)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0x10 (16)
>       number of sets                     = 0x800 (2048)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = false
>       number of sets (s)                 = 2048
>       (size synth)                       = 2097152 (2 MB)
>       --- cache 3 ---
>       cache type                         = unified cache (3)
>       cache level                        = 0x3 (3)
>       self-initializing cache level      = true
>       fully associative cache            = false
>       maximum IDs for CPUs sharing cache = 0x7f (127)
>       maximum IDs for cores in pkg       = 0x3f (63)
>       system coherency line size         = 0x40 (64)
>       physical line partitions           = 0x1 (1)
>       ways of associativity              = 0xf (15)
>       number of sets                     = 0x10000 (65536)
>       WBINVD/INVD acts on lower caches   = false
>       inclusive to lower caches          = false
>       complex cache indexing             = true
>       number of sets (s)                 = 65536
>       (size synth)                       = 62914560 (60 MB)
>       --- cache 4 ---
>       cache type                         = no more caches (0)
>
> Suggested-by: Tejus GK <tejus.gk@nutanix.com>
> Suggested-by: Jason Zeng <jason.zeng@intel.com>
> Suggested-by: "Daniel P . Berrangé" <berrange@redhat.com>
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> ---
>  target/i386/cpu.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 96 insertions(+)
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index b40f1a5b6648..a7f2e5dd3fcb 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -2886,6 +2886,97 @@ static const CPUCaches epyc_turin_cache_info = {
>      }
>  };
>  
> +static const CPUCaches xeon_spr_cache_info = {
> +    .l1d_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x0.EAX */
> +        .type = DATA_CACHE,
> +        .level = 1,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x0.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 12,
> +
> +        /* CPUID 0x4.0x0.ECX */
> +        .sets = 64,
> +
> +        /* CPUID 0x4.0x0.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = false,
> +
> +        .size = 48 * KiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
> +    },
> +    .l1i_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x1.EAX */
> +        .type = INSTRUCTION_CACHE,
> +        .level = 1,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x1.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 8,
> +
> +        /* CPUID 0x4.0x1.ECX */
> +        .sets = 64,
> +
> +        /* CPUID 0x4.0x1.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = false,
> +
> +        .size = 32 * KiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
> +    },
> +    .l2_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x2.EAX */
> +        .type = UNIFIED_CACHE,
> +        .level = 2,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x2.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 16,
> +
> +        /* CPUID 0x4.0x2.ECX */
> +        .sets = 2048,
> +
> +        /* CPUID 0x4.0x2.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = false,
> +
> +        .size = 2 * MiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_CORE,
> +    },
> +    .l3_cache = &(CPUCacheInfo) {
> +        /* CPUID 0x4.0x3.EAX */
> +        .type = UNIFIED_CACHE,
> +        .level = 3,
> +        .self_init = true,
> +
> +        /* CPUID 0x4.0x3.EBX */
> +        .line_size = 64,
> +        .partitions = 1,
> +        .associativity = 15,
> +
> +        /* CPUID 0x4.0x3.ECX */
> +        .sets = 65536,
> +
> +        /* CPUID 0x4.0x3.EDX */
> +        .no_invd_sharing = false,
> +        .inclusive = false,
> +        .complex_indexing = true,
> +
> +        .size = 60 * MiB,
> +        .share_level = CPU_TOPOLOGY_LEVEL_SOCKET,
> +    },
> +};
> +
>  static const CPUCaches xeon_gnr_cache_info = {
>      .l1d_cache = &(CPUCacheInfo) {
>          /* CPUID 0x4.0x0.EAX */
> @@ -4892,6 +4983,11 @@ static const X86CPUDefinition builtin_x86_defs[] = {
>                      { /* end of list */ }
>                  }
>              },
> +            {
> +                .version = 4,
> +                .note = "with spr-sp cache model",
> +                .cache_info = &xeon_spr_cache_info,
> +            },
>              { /* end of list */ }
>          }
>      },

Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>