[v7] Support smp.clusters for x86 in QEMU

[PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Posted by Zhao Liu 2 years, 1 month ago

From: Zhao Liu <zhao1.liu@intel.com>

CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
Intel CPUs.

After cache models have topology information, we can use
CPUCacheInfo.share_level to decide which topology level to be encoded
into CPUID[4].EAX[bits 25:14].

And since maximum_processor_id (original "num_apic_ids") is parsed
based on cpu topology levels, which are verified when parsing smp, it's
no need to check this value by "assert(num_apic_ids > 0)" again, so
remove this assert.

Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
helper to make the code cleaner.

Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
Changes since v1:
 * Use "enum CPUTopoLevel share_level" as the parameter in
   max_processor_ids_for_cache().
 * Make cache_into_passthrough case also use
   max_processor_ids_for_cache() and max_core_ids_in_package() to
   encode CPUID[4]. (Yanan)
 * Rename the title of this patch (the original is "i386: Use
   CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
---
 target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 81e07474acef..b23e8190dc68 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -235,22 +235,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
                        ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
                        0 /* Invalid value */)
 
+static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
+                                            enum CPUTopoLevel share_level)
+{
+    uint32_t num_ids = 0;
+
+    switch (share_level) {
+    case CPU_TOPO_LEVEL_CORE:
+        num_ids = 1 << apicid_core_offset(topo_info);
+        break;
+    case CPU_TOPO_LEVEL_DIE:
+        num_ids = 1 << apicid_die_offset(topo_info);
+        break;
+    case CPU_TOPO_LEVEL_PACKAGE:
+        num_ids = 1 << apicid_pkg_offset(topo_info);
+        break;
+    default:
+        /*
+         * Currently there is no use case for SMT and MODULE, so use
+         * assert directly to facilitate debugging.
+         */
+        g_assert_not_reached();
+    }
+
+    return num_ids - 1;
+}
+
+static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
+{
+    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
+                               apicid_core_offset(topo_info));
+    return num_cores - 1;
+}
 
 /* Encode cache info for CPUID[4] */
 static void encode_cache_cpuid4(CPUCacheInfo *cache,
-                                int num_apic_ids, int num_cores,
+                                X86CPUTopoInfo *topo_info,
                                 uint32_t *eax, uint32_t *ebx,
                                 uint32_t *ecx, uint32_t *edx)
 {
     assert(cache->size == cache->line_size * cache->associativity *
                           cache->partitions * cache->sets);
 
-    assert(num_apic_ids > 0);
     *eax = CACHE_TYPE(cache->type) |
            CACHE_LEVEL(cache->level) |
            (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
-           ((num_cores - 1) << 26) |
-           ((num_apic_ids - 1) << 14);
+           (max_core_ids_in_package(topo_info) << 26) |
+           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
 
     assert(cache->line_size > 0);
     assert(cache->partitions > 0);
@@ -6263,56 +6294,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
 
                 if (cores_per_pkg > 1) {
-                    int addressable_cores_offset =
-                                                apicid_pkg_offset(&topo_info) -
-                                                apicid_core_offset(&topo_info);
-
                     *eax &= ~0xFC000000;
-                    *eax |= (1 << (addressable_cores_offset - 1)) << 26;
+                    *eax |= max_core_ids_in_package(&topo_info) << 26;
                 }
                 if (host_vcpus_per_cache > cpus_per_pkg) {
-                    int pkg_offset = apicid_pkg_offset(&topo_info);
-
                     *eax &= ~0x3FFC000;
-                    *eax |= (1 << (pkg_offset - 1)) << 14;
+                    *eax |=
+                        max_processor_ids_for_cache(&topo_info,
+                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
                 }
             }
         } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
             *eax = *ebx = *ecx = *edx = 0;
         } else {
             *eax = 0;
-            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
-                                           apicid_core_offset(&topo_info);
-            int core_offset, die_offset;
 
             switch (count) {
             case 0: /* L1 dcache info */
-                core_offset = apicid_core_offset(&topo_info);
                 encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
-                                    (1 << core_offset),
-                                    (1 << addressable_cores_offset),
+                                    &topo_info,
                                     eax, ebx, ecx, edx);
                 break;
             case 1: /* L1 icache info */
-                core_offset = apicid_core_offset(&topo_info);
                 encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
-                                    (1 << core_offset),
-                                    (1 << addressable_cores_offset),
+                                    &topo_info,
                                     eax, ebx, ecx, edx);
                 break;
             case 2: /* L2 cache info */
-                core_offset = apicid_core_offset(&topo_info);
                 encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
-                                    (1 << core_offset),
-                                    (1 << addressable_cores_offset),
+                                    &topo_info,
                                     eax, ebx, ecx, edx);
                 break;
             case 3: /* L3 cache info */
-                die_offset = apicid_die_offset(&topo_info);
                 if (cpu->enable_l3_cache) {
                     encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
-                                        (1 << die_offset),
-                                        (1 << addressable_cores_offset),
+                                        &topo_info,
                                         eax, ebx, ecx, edx);
                     break;
                 }
-- 
2.34.1

Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Posted by Xiaoyao Li 2 years ago

On 1/8/2024 4:27 PM, Zhao Liu wrote:
> From: Zhao Liu <zhao1.liu@intel.com>
> 
> CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
> Intel CPUs.
> 
> After cache models have topology information, we can use
> CPUCacheInfo.share_level to decide which topology level to be encoded
> into CPUID[4].EAX[bits 25:14].
> 
> And since maximum_processor_id (original "num_apic_ids") is parsed
> based on cpu topology levels, which are verified when parsing smp, it's
> no need to check this value by "assert(num_apic_ids > 0)" again, so
> remove this assert.
> 
> Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
> helper to make the code cleaner.
> 
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> Tested-by: Babu Moger <babu.moger@amd.com>
> Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> Changes since v1:
>   * Use "enum CPUTopoLevel share_level" as the parameter in
>     max_processor_ids_for_cache().
>   * Make cache_into_passthrough case also use
>     max_processor_ids_for_cache() and max_core_ids_in_package() to
>     encode CPUID[4]. (Yanan)
>   * Rename the title of this patch (the original is "i386: Use
>     CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
> ---
>   target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
>   1 file changed, 43 insertions(+), 27 deletions(-)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 81e07474acef..b23e8190dc68 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -235,22 +235,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
>                          ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
>                          0 /* Invalid value */)
>   
> +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
> +                                            enum CPUTopoLevel share_level)

I prefer the name to max_lp_ids_share_the_cache()

> +{
> +    uint32_t num_ids = 0;
> +
> +    switch (share_level) {
> +    case CPU_TOPO_LEVEL_CORE:
> +        num_ids = 1 << apicid_core_offset(topo_info);
> +        break;
> +    case CPU_TOPO_LEVEL_DIE:
> +        num_ids = 1 << apicid_die_offset(topo_info);
> +        break;
> +    case CPU_TOPO_LEVEL_PACKAGE:
> +        num_ids = 1 << apicid_pkg_offset(topo_info);
> +        break;
> +    default:
> +        /*
> +         * Currently there is no use case for SMT and MODULE, so use
> +         * assert directly to facilitate debugging.
> +         */
> +        g_assert_not_reached();
> +    }
> +
> +    return num_ids - 1;

suggest to just return num_ids, and let the caller to do the -1 work.

> +}
> +
> +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
> +{
> +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
> +                               apicid_core_offset(topo_info));
> +    return num_cores - 1;

ditto.

> +}
>   
>   /* Encode cache info for CPUID[4] */
>   static void encode_cache_cpuid4(CPUCacheInfo *cache,
> -                                int num_apic_ids, int num_cores,
> +                                X86CPUTopoInfo *topo_info,
>                                   uint32_t *eax, uint32_t *ebx,
>                                   uint32_t *ecx, uint32_t *edx)
>   {
>       assert(cache->size == cache->line_size * cache->associativity *
>                             cache->partitions * cache->sets);
>   
> -    assert(num_apic_ids > 0);
>       *eax = CACHE_TYPE(cache->type) |
>              CACHE_LEVEL(cache->level) |
>              (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
> -           ((num_cores - 1) << 26) |
> -           ((num_apic_ids - 1) << 14);
> +           (max_core_ids_in_package(topo_info) << 26) |
> +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);

by the way, we can change the order of the two line. :)

>   
>       assert(cache->line_size > 0);
>       assert(cache->partitions > 0);
> @@ -6263,56 +6294,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                   int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
>   
>                   if (cores_per_pkg > 1) {
> -                    int addressable_cores_offset =
> -                                                apicid_pkg_offset(&topo_info) -
> -                                                apicid_core_offset(&topo_info);
> -
>                       *eax &= ~0xFC000000;
> -                    *eax |= (1 << (addressable_cores_offset - 1)) << 26;
> +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
>                   }
>                   if (host_vcpus_per_cache > cpus_per_pkg) {
> -                    int pkg_offset = apicid_pkg_offset(&topo_info);
> -
>                       *eax &= ~0x3FFC000;
> -                    *eax |= (1 << (pkg_offset - 1)) << 14;
> +                    *eax |=
> +                        max_processor_ids_for_cache(&topo_info,
> +                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
>                   }
>               }
>           } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
>               *eax = *ebx = *ecx = *edx = 0;
>           } else {
>               *eax = 0;
> -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
> -                                           apicid_core_offset(&topo_info);
> -            int core_offset, die_offset;
>   
>               switch (count) {
>               case 0: /* L1 dcache info */
> -                core_offset = apicid_core_offset(&topo_info);
>                   encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
> -                                    (1 << core_offset),
> -                                    (1 << addressable_cores_offset),
> +                                    &topo_info,
>                                       eax, ebx, ecx, edx);
>                   break;
>               case 1: /* L1 icache info */
> -                core_offset = apicid_core_offset(&topo_info);
>                   encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
> -                                    (1 << core_offset),
> -                                    (1 << addressable_cores_offset),
> +                                    &topo_info,
>                                       eax, ebx, ecx, edx);
>                   break;
>               case 2: /* L2 cache info */
> -                core_offset = apicid_core_offset(&topo_info);
>                   encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
> -                                    (1 << core_offset),
> -                                    (1 << addressable_cores_offset),
> +                                    &topo_info,
>                                       eax, ebx, ecx, edx);
>                   break;
>               case 3: /* L3 cache info */
> -                die_offset = apicid_die_offset(&topo_info);
>                   if (cpu->enable_l3_cache) {
>                       encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
> -                                        (1 << die_offset),
> -                                        (1 << addressable_cores_offset),
> +                                        &topo_info,
>                                           eax, ebx, ecx, edx);
>                       break;
>                   }

Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Posted by Zhao Liu 2 years ago

Hi Xiaoyao,

On Sun, Jan 14, 2024 at 10:31:50PM +0800, Xiaoyao Li wrote:
> Date: Sun, 14 Jan 2024 22:31:50 +0800
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> Subject: Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode
>  CPUID[4]
> 
> On 1/8/2024 4:27 PM, Zhao Liu wrote:
> > From: Zhao Liu <zhao1.liu@intel.com>
> > 
> > CPUID[4].EAX[bits 25:14] is used to represent the cache topology for
> > Intel CPUs.
> > 
> > After cache models have topology information, we can use
> > CPUCacheInfo.share_level to decide which topology level to be encoded
> > into CPUID[4].EAX[bits 25:14].
> > 
> > And since maximum_processor_id (original "num_apic_ids") is parsed
> > based on cpu topology levels, which are verified when parsing smp, it's
> > no need to check this value by "assert(num_apic_ids > 0)" again, so
> > remove this assert.
> > 
> > Additionally, wrap the encoding of CPUID[4].EAX[bits 31:26] into a
> > helper to make the code cleaner.
> > 
> > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > Tested-by: Babu Moger <babu.moger@amd.com>
> > Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> > Acked-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > Changes since v1:
> >   * Use "enum CPUTopoLevel share_level" as the parameter in
> >     max_processor_ids_for_cache().
> >   * Make cache_into_passthrough case also use
> >     max_processor_ids_for_cache() and max_core_ids_in_package() to
> >     encode CPUID[4]. (Yanan)
> >   * Rename the title of this patch (the original is "i386: Use
> >     CPUCacheInfo.share_level to encode CPUID[4].EAX[bits 25:14]").
> > ---
> >   target/i386/cpu.c | 70 +++++++++++++++++++++++++++++------------------
> >   1 file changed, 43 insertions(+), 27 deletions(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 81e07474acef..b23e8190dc68 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -235,22 +235,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache)
> >                          ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \
> >                          0 /* Invalid value */)
> > +static uint32_t max_processor_ids_for_cache(X86CPUTopoInfo *topo_info,
> > +                                            enum CPUTopoLevel share_level)
> 
> I prefer the name to max_lp_ids_share_the_cache()

Yes, lp is more accurate.

> 
> > +{
> > +    uint32_t num_ids = 0;
> > +
> > +    switch (share_level) {
> > +    case CPU_TOPO_LEVEL_CORE:
> > +        num_ids = 1 << apicid_core_offset(topo_info);
> > +        break;
> > +    case CPU_TOPO_LEVEL_DIE:
> > +        num_ids = 1 << apicid_die_offset(topo_info);
> > +        break;
> > +    case CPU_TOPO_LEVEL_PACKAGE:
> > +        num_ids = 1 << apicid_pkg_offset(topo_info);
> > +        break;
> > +    default:
> > +        /*
> > +         * Currently there is no use case for SMT and MODULE, so use
> > +         * assert directly to facilitate debugging.
> > +         */
> > +        g_assert_not_reached();
> > +    }
> > +
> > +    return num_ids - 1;
> 
> suggest to just return num_ids, and let the caller to do the -1 work.

Emm, SDM calls the whole "num_ids - 1" (CPUID.0x4.EAX[bits 14-25]) as
"maximum number of addressable IDs for logical processors sharing this
cache"...

So if this helper just names "num_ids" as max_lp_ids_share_the_cache,
I'm not sure there would be ambiguity here?

> 
> > +}
> > +
> > +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
> > +{
> > +    uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
> > +                               apicid_core_offset(topo_info));
> > +    return num_cores - 1;
> 
> ditto.
> 
> > +}
> >   /* Encode cache info for CPUID[4] */
> >   static void encode_cache_cpuid4(CPUCacheInfo *cache,
> > -                                int num_apic_ids, int num_cores,
> > +                                X86CPUTopoInfo *topo_info,
> >                                   uint32_t *eax, uint32_t *ebx,
> >                                   uint32_t *ecx, uint32_t *edx)
> >   {
> >       assert(cache->size == cache->line_size * cache->associativity *
> >                             cache->partitions * cache->sets);
> > -    assert(num_apic_ids > 0);
> >       *eax = CACHE_TYPE(cache->type) |
> >              CACHE_LEVEL(cache->level) |
> >              (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) |
> > -           ((num_cores - 1) << 26) |
> > -           ((num_apic_ids - 1) << 14);
> > +           (max_core_ids_in_package(topo_info) << 26) |
> > +           (max_processor_ids_for_cache(topo_info, cache->share_level) << 14);
> 
> by the way, we can change the order of the two line. :)

Yes!

Thanks,
Zhao

> 
> >       assert(cache->line_size > 0);
> >       assert(cache->partitions > 0);
> > @@ -6263,56 +6294,41 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >                   int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
> >                   if (cores_per_pkg > 1) {
> > -                    int addressable_cores_offset =
> > -                                                apicid_pkg_offset(&topo_info) -
> > -                                                apicid_core_offset(&topo_info);
> > -
> >                       *eax &= ~0xFC000000;
> > -                    *eax |= (1 << (addressable_cores_offset - 1)) << 26;
> > +                    *eax |= max_core_ids_in_package(&topo_info) << 26;
> >                   }
> >                   if (host_vcpus_per_cache > cpus_per_pkg) {
> > -                    int pkg_offset = apicid_pkg_offset(&topo_info);
> > -
> >                       *eax &= ~0x3FFC000;
> > -                    *eax |= (1 << (pkg_offset - 1)) << 14;
> > +                    *eax |=
> > +                        max_processor_ids_for_cache(&topo_info,
> > +                                                CPU_TOPO_LEVEL_PACKAGE) << 14;
> >                   }
> >               }
> >           } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
> >               *eax = *ebx = *ecx = *edx = 0;
> >           } else {
> >               *eax = 0;
> > -            int addressable_cores_offset = apicid_pkg_offset(&topo_info) -
> > -                                           apicid_core_offset(&topo_info);
> > -            int core_offset, die_offset;
> >               switch (count) {
> >               case 0: /* L1 dcache info */
> > -                core_offset = apicid_core_offset(&topo_info);
> >                   encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
> > -                                    (1 << core_offset),
> > -                                    (1 << addressable_cores_offset),
> > +                                    &topo_info,
> >                                       eax, ebx, ecx, edx);
> >                   break;
> >               case 1: /* L1 icache info */
> > -                core_offset = apicid_core_offset(&topo_info);
> >                   encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
> > -                                    (1 << core_offset),
> > -                                    (1 << addressable_cores_offset),
> > +                                    &topo_info,
> >                                       eax, ebx, ecx, edx);
> >                   break;
> >               case 2: /* L2 cache info */
> > -                core_offset = apicid_core_offset(&topo_info);
> >                   encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
> > -                                    (1 << core_offset),
> > -                                    (1 << addressable_cores_offset),
> > +                                    &topo_info,
> >                                       eax, ebx, ecx, edx);
> >                   break;
> >               case 3: /* L3 cache info */
> > -                die_offset = apicid_die_offset(&topo_info);
> >                   if (cpu->enable_l3_cache) {
> >                       encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
> > -                                        (1 << die_offset),
> > -                                        (1 << addressable_cores_offset),
> > +                                        &topo_info,
> >                                           eax, ebx, ecx, edx);
> >                       break;
> >                   }
>

Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Posted by Xiaoyao Li 2 years ago

On 1/15/2024 11:40 AM, Zhao Liu wrote:
>>> +{
>>> +    uint32_t num_ids = 0;
>>> +
>>> +    switch (share_level) {
>>> +    case CPU_TOPO_LEVEL_CORE:
>>> +        num_ids = 1 << apicid_core_offset(topo_info);
>>> +        break;
>>> +    case CPU_TOPO_LEVEL_DIE:
>>> +        num_ids = 1 << apicid_die_offset(topo_info);
>>> +        break;
>>> +    case CPU_TOPO_LEVEL_PACKAGE:
>>> +        num_ids = 1 << apicid_pkg_offset(topo_info);
>>> +        break;
>>> +    default:
>>> +        /*
>>> +         * Currently there is no use case for SMT and MODULE, so use
>>> +         * assert directly to facilitate debugging.
>>> +         */
>>> +        g_assert_not_reached();
>>> +    }
>>> +
>>> +    return num_ids - 1;
>> suggest to just return num_ids, and let the caller to do the -1 work.
> Emm, SDM calls the whole "num_ids - 1" (CPUID.0x4.EAX[bits 14-25]) as
> "maximum number of addressable IDs for logical processors sharing this
> cache"...
> 
> So if this helper just names "num_ids" as max_lp_ids_share_the_cache,
> I'm not sure there would be ambiguity here?

I don't think it will.

if this function is going to used anywhere else, people will need to 
keep in mind to do +1 stuff to get the actual number.

leaving the -1 trick to where CPUID value gets encoded. let's make this 
function generic.

Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Posted by Zhao Liu 2 years ago

Hi Xiaoyao,

On Mon, Jan 15, 2024 at 12:25:19PM +0800, Xiaoyao Li wrote:
> Date: Mon, 15 Jan 2024 12:25:19 +0800
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> Subject: Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode
>  CPUID[4]
> 
> On 1/15/2024 11:40 AM, Zhao Liu wrote:
> > > > +{
> > > > +    uint32_t num_ids = 0;
> > > > +
> > > > +    switch (share_level) {
> > > > +    case CPU_TOPO_LEVEL_CORE:
> > > > +        num_ids = 1 << apicid_core_offset(topo_info);
> > > > +        break;
> > > > +    case CPU_TOPO_LEVEL_DIE:
> > > > +        num_ids = 1 << apicid_die_offset(topo_info);
> > > > +        break;
> > > > +    case CPU_TOPO_LEVEL_PACKAGE:
> > > > +        num_ids = 1 << apicid_pkg_offset(topo_info);
> > > > +        break;
> > > > +    default:
> > > > +        /*
> > > > +         * Currently there is no use case for SMT and MODULE, so use
> > > > +         * assert directly to facilitate debugging.
> > > > +         */
> > > > +        g_assert_not_reached();
> > > > +    }
> > > > +
> > > > +    return num_ids - 1;
> > > suggest to just return num_ids, and let the caller to do the -1 work.
> > Emm, SDM calls the whole "num_ids - 1" (CPUID.0x4.EAX[bits 14-25]) as
> > "maximum number of addressable IDs for logical processors sharing this
> > cache"...
> > 
> > So if this helper just names "num_ids" as max_lp_ids_share_the_cache,
> > I'm not sure there would be ambiguity here?
> 
> I don't think it will.
> 
> if this function is going to used anywhere else, people will need to keep in
> mind to do +1 stuff to get the actual number.
> 
> leaving the -1 trick to where CPUID value gets encoded. let's make this
> function generic.

This helper is the complete pattern to get addressable IDs, this is to
say, the "- 1" is also the part of this calculation.

Its own meaning is self-consistent and generic enough to meet the common
definitions of AMD and Intel.

Thanks,
Zhao

Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Posted by Xiaoyao Li 2 years ago

On 1/15/2024 2:25 PM, Zhao Liu wrote:
> Hi Xiaoyao,
> 
> On Mon, Jan 15, 2024 at 12:25:19PM +0800, Xiaoyao Li wrote:
>> Date: Mon, 15 Jan 2024 12:25:19 +0800
>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>> Subject: Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode
>>   CPUID[4]
>>
>> On 1/15/2024 11:40 AM, Zhao Liu wrote:
>>>>> +{
>>>>> +    uint32_t num_ids = 0;
>>>>> +
>>>>> +    switch (share_level) {
>>>>> +    case CPU_TOPO_LEVEL_CORE:
>>>>> +        num_ids = 1 << apicid_core_offset(topo_info);
>>>>> +        break;
>>>>> +    case CPU_TOPO_LEVEL_DIE:
>>>>> +        num_ids = 1 << apicid_die_offset(topo_info);
>>>>> +        break;
>>>>> +    case CPU_TOPO_LEVEL_PACKAGE:
>>>>> +        num_ids = 1 << apicid_pkg_offset(topo_info);
>>>>> +        break;
>>>>> +    default:
>>>>> +        /*
>>>>> +         * Currently there is no use case for SMT and MODULE, so use
>>>>> +         * assert directly to facilitate debugging.
>>>>> +         */
>>>>> +        g_assert_not_reached();
>>>>> +    }
>>>>> +
>>>>> +    return num_ids - 1;
>>>> suggest to just return num_ids, and let the caller to do the -1 work.
>>> Emm, SDM calls the whole "num_ids - 1" (CPUID.0x4.EAX[bits 14-25]) as
>>> "maximum number of addressable IDs for logical processors sharing this
>>> cache"...
>>>
>>> So if this helper just names "num_ids" as max_lp_ids_share_the_cache,
>>> I'm not sure there would be ambiguity here?
>>
>> I don't think it will.
>>
>> if this function is going to used anywhere else, people will need to keep in
>> mind to do +1 stuff to get the actual number.
>>
>> leaving the -1 trick to where CPUID value gets encoded. let's make this
>> function generic.
> 
> This helper is the complete pattern to get addressable IDs, this is to
> say, the "- 1" is also the part of this calculation.
>
> Its own meaning is self-consistent and generic enough to meet the common
> definitions of AMD and Intel.

OK. I stop bikeshedding on it.

> Thanks,
> Zhao
>

Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode CPUID[4]

Posted by Zhao Liu 2 years ago

Hi Xiaoyao,

On Mon, Jan 15, 2024 at 03:00:25PM +0800, Xiaoyao Li wrote:
> Date: Mon, 15 Jan 2024 15:00:25 +0800
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> Subject: Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode
>  CPUID[4]
> 
> On 1/15/2024 2:25 PM, Zhao Liu wrote:
> > Hi Xiaoyao,
> > 
> > On Mon, Jan 15, 2024 at 12:25:19PM +0800, Xiaoyao Li wrote:
> > > Date: Mon, 15 Jan 2024 12:25:19 +0800
> > > From: Xiaoyao Li <xiaoyao.li@intel.com>
> > > Subject: Re: [PATCH v7 14/16] i386: Use CPUCacheInfo.share_level to encode
> > >   CPUID[4]
> > > 
> > > On 1/15/2024 11:40 AM, Zhao Liu wrote:
> > > > > > +{
> > > > > > +    uint32_t num_ids = 0;
> > > > > > +
> > > > > > +    switch (share_level) {
> > > > > > +    case CPU_TOPO_LEVEL_CORE:
> > > > > > +        num_ids = 1 << apicid_core_offset(topo_info);
> > > > > > +        break;
> > > > > > +    case CPU_TOPO_LEVEL_DIE:
> > > > > > +        num_ids = 1 << apicid_die_offset(topo_info);
> > > > > > +        break;
> > > > > > +    case CPU_TOPO_LEVEL_PACKAGE:
> > > > > > +        num_ids = 1 << apicid_pkg_offset(topo_info);
> > > > > > +        break;
> > > > > > +    default:
> > > > > > +        /*
> > > > > > +         * Currently there is no use case for SMT and MODULE, so use
> > > > > > +         * assert directly to facilitate debugging.
> > > > > > +         */
> > > > > > +        g_assert_not_reached();
> > > > > > +    }
> > > > > > +
> > > > > > +    return num_ids - 1;
> > > > > suggest to just return num_ids, and let the caller to do the -1 work.
> > > > Emm, SDM calls the whole "num_ids - 1" (CPUID.0x4.EAX[bits 14-25]) as
> > > > "maximum number of addressable IDs for logical processors sharing this
> > > > cache"...
> > > > 
> > > > So if this helper just names "num_ids" as max_lp_ids_share_the_cache,
> > > > I'm not sure there would be ambiguity here?
> > > 
> > > I don't think it will.
> > > 
> > > if this function is going to used anywhere else, people will need to keep in
> > > mind to do +1 stuff to get the actual number.
> > > 
> > > leaving the -1 trick to where CPUID value gets encoded. let's make this
> > > function generic.
> > 
> > This helper is the complete pattern to get addressable IDs, this is to
> > say, the "- 1" is also the part of this calculation.
> > 
> > Its own meaning is self-consistent and generic enough to meet the common
> > definitions of AMD and Intel.
> 
> OK. I stop bikeshedding on it.
>

Thanks for your review ;-).

Regards,
Zhao