target/i386/cpu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
When QEMU is started with:
-cpu host,migratable=on,host-cache-info=on,l3-cache=off
-smp 180,sockets=2,dies=1,cores=45,threads=2
Try to execute "cpuid -1 -l 1 -r" in guest, we'll obtain a value of 90 for
CPUID.01H.EBX[23:16], while the expected value is 128. And Try to
execute "cpuid -1 -l 4 -r" in guest, we'll obtain a value of 63 for
CPUID.04H.EAX[31:26] as expected.
As (1+CPUID.04H.EAX[31:26]) round up to the nearest power-of-2 integer,
we'd beter round up CPUID.01H.EBX[23:16] to the nearest power-of-2
integer too. Otherwise we may encounter unexpected results in guest.
For example, when QEMU is started with CLI above and xtopology is disabled,
guest kernel 5.15.120 uses CPUID.01H.EBX[23:16]/(1+CPUID.04H.EAX[31:26]) to
calculate threads-per-core in detect_ht(). Then guest will get "90/(1+63)=1"
as the result, even though theads-per-core should actually be 2.
So let us round up CPUID.01H.EBX[23:16] to the nearest power-of-2 integer
to solve the unexpected result.
In addition, we introduce max_thread_number_in_package() instead of
using pow2ceil() to be compatible with smp and hybrid.
Signed-off-by: Guixiong Wei <weiguixiong@bytedance.com>
Signed-off-by: Yipeng Yin <yinyipeng@bytedance.com>
Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
---
target/i386/cpu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 85ef7452c0..1b4e3b6931 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -261,6 +261,12 @@ static uint32_t max_thread_ids_for_cache(X86CPUTopoInfo *topo_info,
return num_ids - 1;
}
+static uint32_t max_thread_number_in_package(X86CPUTopoInfo *topo_info)
+{
+ uint32_t num_threads = 1 << apicid_pkg_offset(topo_info);
+ return num_threads;
+}
+
static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info)
{
uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) -
@@ -6462,7 +6468,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
}
*edx = env->features[FEAT_1_EDX];
if (threads_per_pkg > 1) {
- *ebx |= threads_per_pkg << 16;
+ *ebx |= max_thread_number_in_package(&topo_info) << 16;
*edx |= CPUID_HT;
}
if (!cpu->enable_pmu) {
--
2.20.1
Hi Chuang. Look fine for me, and only some minor nits: On Mon, Oct 07, 2024 at 04:13:44PM +0800, Chuang Xu wrote: > Date: Mon, 7 Oct 2024 16:13:44 +0800 > From: Chuang Xu <xuchuangxclwt@bytedance.com> > Subject: [PATCH v4] i386/cpu: fixup number of addressable IDs for logical > processors in the physical package > X-Mailer: git-send-email 2.24.3 (Apple Git-128) > > When QEMU is started with: > -cpu host,migratable=on,host-cache-info=on,l3-cache=off > -smp 180,sockets=2,dies=1,cores=45,threads=2 > > Try to execute "cpuid -1 -l 1 -r" in guest, we'll obtain a value of 90 for > CPUID.01H.EBX[23:16], while the expected value is 128. And Try to > execute "cpuid -1 -l 4 -r" in guest, we'll obtain a value of 63 for > CPUID.04H.EAX[31:26] as expected. I polished the sentences a bit: When executing "cpuid -1 -l 1 -r" in the guest, we obtain a value of 90 for CPUID.01H.EBX[23:16], whereas the expected value is 128. Additionally, executing "cpuid -1 -l 4 -r" in the guest yields a value of 63 for CPUID.04H.EAX[31:26], which matches the expected result. > As (1+CPUID.04H.EAX[31:26]) round up to the nearest power-of-2 integer, s/round/rounds/ > we'd beter round up CPUID.01H.EBX[23:16] to the nearest power-of-2 > integer too. Otherwise we may encounter unexpected results in guest. > > For example, when QEMU is started with CLI above and xtopology is disabled, What's xtopology? > guest kernel 5.15.120 uses CPUID.01H.EBX[23:16]/(1+CPUID.04H.EAX[31:26]) to > calculate threads-per-core in detect_ht(). Then guest will get "90/(1+63)=1" > as the result, even though theads-per-core should actually be 2. s/theads-per-core/threads-per-core/ > So let us round up CPUID.01H.EBX[23:16] to the nearest power-of-2 integer > to solve the unexpected result. > > In addition, we introduce max_thread_number_in_package() instead of > using pow2ceil() to be compatible with smp and hybrid. > > Signed-off-by: Guixiong Wei <weiguixiong@bytedance.com> > Signed-off-by: Yipeng Yin <yinyipeng@bytedance.com> > Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com> > --- > target/i386/cpu.c | 8 +++++++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index 85ef7452c0..1b4e3b6931 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -261,6 +261,12 @@ static uint32_t max_thread_ids_for_cache(X86CPUTopoInfo *topo_info, > return num_ids - 1; > } > > +static uint32_t max_thread_number_in_package(X86CPUTopoInfo *topo_info) > +{ > + uint32_t num_threads = 1 << apicid_pkg_offset(topo_info); > + return num_threads; > +} > + > static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) > { > uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - > @@ -6462,7 +6468,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > } > *edx = env->features[FEAT_1_EDX]; > if (threads_per_pkg > 1) { > - *ebx |= threads_per_pkg << 16; > + *ebx |= max_thread_number_in_package(&topo_info) << 16; This helper has only 1 caller and its name doesn't distinguish the addressable ID, so it's not necessary. I feel it's better to shift the bits directly here: *ebx |= 1 << apicid_pkg_offset(topo_info) << 16; > *edx |= CPUID_HT; > } > if (!cpu->enable_pmu) { > -- > 2.20.1 > Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Hi Zhao. On 10/8/24 上午10:55, Zhao Liu wrote: > Hi Chuang. > > Look fine for me, and only some minor nits: > > On Mon, Oct 07, 2024 at 04:13:44PM +0800, Chuang Xu wrote: >> Date: Mon, 7 Oct 2024 16:13:44 +0800 >> From: Chuang Xu <xuchuangxclwt@bytedance.com> >> Subject: [PATCH v4] i386/cpu: fixup number of addressable IDs for logical >> processors in the physical package >> X-Mailer: git-send-email 2.24.3 (Apple Git-128) >> >> When QEMU is started with: >> -cpu host,migratable=on,host-cache-info=on,l3-cache=off >> -smp 180,sockets=2,dies=1,cores=45,threads=2 >> >> Try to execute "cpuid -1 -l 1 -r" in guest, we'll obtain a value of 90 for >> CPUID.01H.EBX[23:16], while the expected value is 128. And Try to >> execute "cpuid -1 -l 4 -r" in guest, we'll obtain a value of 63 for >> CPUID.04H.EAX[31:26] as expected. > I polished the sentences a bit: > > When executing "cpuid -1 -l 1 -r" in the guest, we obtain a value of 90 for > CPUID.01H.EBX[23:16], whereas the expected value is 128. Additionally, > executing "cpuid -1 -l 4 -r" in the guest yields a value of 63 for > CPUID.04H.EAX[31:26], which matches the expected result. > >> As (1+CPUID.04H.EAX[31:26]) round up to the nearest power-of-2 integer, > s/round/rounds/ > >> we'd beter round up CPUID.01H.EBX[23:16] to the nearest power-of-2 >> integer too. Otherwise we may encounter unexpected results in guest. >> >> For example, when QEMU is started with CLI above and xtopology is disabled, > What's xtopology? What I want to express here is Extended Topology Enumeration Leaf. >> guest kernel 5.15.120 uses CPUID.01H.EBX[23:16]/(1+CPUID.04H.EAX[31:26]) to >> calculate threads-per-core in detect_ht(). Then guest will get "90/(1+63)=1" >> as the result, even though theads-per-core should actually be 2. > s/theads-per-core/threads-per-core/ > >> So let us round up CPUID.01H.EBX[23:16] to the nearest power-of-2 integer >> to solve the unexpected result. >> >> In addition, we introduce max_thread_number_in_package() instead of >> using pow2ceil() to be compatible with smp and hybrid. >> >> Signed-off-by: Guixiong Wei <weiguixiong@bytedance.com> >> Signed-off-by: Yipeng Yin <yinyipeng@bytedance.com> >> Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com> >> --- >> target/i386/cpu.c | 8 +++++++- >> 1 file changed, 7 insertions(+), 1 deletion(-) >> >> diff --git a/target/i386/cpu.c b/target/i386/cpu.c >> index 85ef7452c0..1b4e3b6931 100644 >> --- a/target/i386/cpu.c >> +++ b/target/i386/cpu.c >> @@ -261,6 +261,12 @@ static uint32_t max_thread_ids_for_cache(X86CPUTopoInfo *topo_info, >> return num_ids - 1; >> } >> >> +static uint32_t max_thread_number_in_package(X86CPUTopoInfo *topo_info) >> +{ >> + uint32_t num_threads = 1 << apicid_pkg_offset(topo_info); >> + return num_threads; >> +} >> + >> static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) >> { >> uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - >> @@ -6462,7 +6468,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, >> } >> *edx = env->features[FEAT_1_EDX]; >> if (threads_per_pkg > 1) { >> - *ebx |= threads_per_pkg << 16; >> + *ebx |= max_thread_number_in_package(&topo_info) << 16; > This helper has only 1 caller and its name doesn't distinguish the > addressable ID, so it's not necessary. I feel it's better to shift > the bits directly here: > > *ebx |= 1 << apicid_pkg_offset(topo_info) << 16; > >> *edx |= CPUID_HT; >> } >> if (!cpu->enable_pmu) { >> -- >> 2.20.1 >> > Reviewed-by: Zhao Liu <zhao1.liu@intel.com> Thanks, Later I'll send patch v5.
On Mon, 7 Oct 2024 16:13:44 +0800 Chuang Xu <xuchuangxclwt@bytedance.com> wrote: > When QEMU is started with: > -cpu host,migratable=on,host-cache-info=on,l3-cache=off > -smp 180,sockets=2,dies=1,cores=45,threads=2 > > Try to execute "cpuid -1 -l 1 -r" in guest, we'll obtain a value of 90 for > CPUID.01H.EBX[23:16], while the expected value is 128. And Try to > execute "cpuid -1 -l 4 -r" in guest, we'll obtain a value of 63 for > CPUID.04H.EAX[31:26] as expected. > > As (1+CPUID.04H.EAX[31:26]) round up to the nearest power-of-2 integer, > we'd beter round up CPUID.01H.EBX[23:16] to the nearest power-of-2 > integer too. Otherwise we may encounter unexpected results in guest. > > For example, when QEMU is started with CLI above and xtopology is disabled, > guest kernel 5.15.120 uses CPUID.01H.EBX[23:16]/(1+CPUID.04H.EAX[31:26]) to > calculate threads-per-core in detect_ht(). Then guest will get "90/(1+63)=1" > as the result, even though theads-per-core should actually be 2. > > So let us round up CPUID.01H.EBX[23:16] to the nearest power-of-2 integer > to solve the unexpected result. > > In addition, we introduce max_thread_number_in_package() instead of > using pow2ceil() to be compatible with smp and hybrid. though I'm still worried that we can't use pow2ceil() to match spec closer. Probably we are doing something wrong if bit shift works while pow2ceil() as described in spec doesn't. Acked-by: Igor Mammedov <imammedo@redhat.com> > > Signed-off-by: Guixiong Wei <weiguixiong@bytedance.com> > Signed-off-by: Yipeng Yin <yinyipeng@bytedance.com> > Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com> > --- > target/i386/cpu.c | 8 +++++++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index 85ef7452c0..1b4e3b6931 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -261,6 +261,12 @@ static uint32_t max_thread_ids_for_cache(X86CPUTopoInfo *topo_info, > return num_ids - 1; > } > > +static uint32_t max_thread_number_in_package(X86CPUTopoInfo *topo_info) > +{ > + uint32_t num_threads = 1 << apicid_pkg_offset(topo_info); > + return num_threads; > +} > + > static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) > { > uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - > @@ -6462,7 +6468,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > } > *edx = env->features[FEAT_1_EDX]; > if (threads_per_pkg > 1) { > - *ebx |= threads_per_pkg << 16; > + *ebx |= max_thread_number_in_package(&topo_info) << 16; > *edx |= CPUID_HT; > } > if (!cpu->enable_pmu) {
On Mon, Oct 07, 2024 at 02:33:39PM +0200, Igor Mammedov wrote: > Date: Mon, 7 Oct 2024 14:33:39 +0200 > From: Igor Mammedov <imammedo@redhat.com> > Subject: Re: [PATCH v4] i386/cpu: fixup number of addressable IDs for > logical processors in the physical package > X-Mailer: Claws Mail 4.3.0 (GTK 3.24.43; x86_64-redhat-linux-gnu) > > On Mon, 7 Oct 2024 16:13:44 +0800 > Chuang Xu <xuchuangxclwt@bytedance.com> wrote: > > > When QEMU is started with: > > -cpu host,migratable=on,host-cache-info=on,l3-cache=off > > -smp 180,sockets=2,dies=1,cores=45,threads=2 > > > > Try to execute "cpuid -1 -l 1 -r" in guest, we'll obtain a value of 90 for > > CPUID.01H.EBX[23:16], while the expected value is 128. And Try to > > execute "cpuid -1 -l 4 -r" in guest, we'll obtain a value of 63 for > > CPUID.04H.EAX[31:26] as expected. > > > > As (1+CPUID.04H.EAX[31:26]) round up to the nearest power-of-2 integer, > > we'd beter round up CPUID.01H.EBX[23:16] to the nearest power-of-2 > > integer too. Otherwise we may encounter unexpected results in guest. > > > > For example, when QEMU is started with CLI above and xtopology is disabled, > > guest kernel 5.15.120 uses CPUID.01H.EBX[23:16]/(1+CPUID.04H.EAX[31:26]) to > > calculate threads-per-core in detect_ht(). Then guest will get "90/(1+63)=1" > > as the result, even though theads-per-core should actually be 2. > > > > So let us round up CPUID.01H.EBX[23:16] to the nearest power-of-2 integer > > to solve the unexpected result. > > > > In addition, we introduce max_thread_number_in_package() instead of > > using pow2ceil() to be compatible with smp and hybrid. > > though I'm still worried that we can't use pow2ceil() to match spec closer. > Probably we are doing something wrong if bit shift works while pow2ceil() > as described in spec doesn't. The addressable ID is the (whole or partial) initial APIC ID, so the bit shift operation on APIC ID is mathematically equivalent to the pow2ceil() on the addressable ID as well. I’ll keep a close eye on this. :-) > Acked-by: Igor Mammedov <imammedo@redhat.com>
© 2016 - 2024 Red Hat, Inc.