hw/ppc/spapr.c | 5 ----- target/ppc/excp_helper.c | 24 ++++++++++++++++++++++++ target/ppc/helper.h | 1 + target/ppc/translate.c | 11 +++++++++++ 4 files changed, 36 insertions(+), 5 deletions(-)
It's not possible to specify the number of threads of a guest when
running QEMU/TCG. Today, users can have setups like:
... -accel tcg,thread=multi -smp 8,threads=1,cores=8 ...
or
... -accel tcg,thread=multi -smp 8,sockets=2,cores=4,threads=1 ...
However, the following is not possible:
... -accel tcg,thread=multi -smp 16,threads=4,cores=2,sockets=2 ...
qemu-system-ppc64: TCG cannot support more than 1 thread/core on a pseries machine
The reason is due to how SMT is implemented since Power8. This patch
implements a very basic simulation of the msgsndp instruction, using ext
interrupt instead of doorbells. The result is a better user experience,
allowing them to play with SMT modes. However, it doesn't relate with
MTTCG threads in any way.
Results:
... -accel tcg,thread=multi -smp 16,threads=4,cores=2,sockets=2 ...
root@ubuntu:~# ppc64_cpu --smt
SMT=4
root@ubuntu:~# ppc64_cpu --info
Core 0: 0* 1* 2* 3*
Core 1: 4* 5* 6* 7*
Core 2: 8* 9* 10* 11*
Core 3: 12* 13* 14* 15*
root@ubuntu:~# ppc64_cpu --smt=2
root@ubuntu:~# ppc64_cpu --info
Core 0: 0* 1* 2 3
Core 1: 4* 5* 6 7
Core 2: 8* 9* 10 11
Core 3: 12* 13* 14 15
root@ubuntu:~# ppc64_cpu --smt=off
root@ubuntu:~# ppc64_cpu --info
Core 0: 0* 1 2 3
Core 1: 4* 5 6 7
Core 2: 8* 9 10 11
Core 3: 12* 13 14 15
root@ubuntu:~# ppc64_cpu --smt
SMT is off
root@ubuntu:~# lscpu
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 16
On-line CPU(s) list: 0,4,8,12
Off-line CPU(s) list: 1-3,5-7,9-11,13-15
Thread(s) per core: 1
Core(s) per socket: 2
Socket(s): 2
NUMA node(s): 1
Model: 2.0 (pvr 004e 1200)
Model name: POWER9 (architected), altivec supported
Hypervisor vendor: KVM
Virtualization type: para
L1d cache: 32K
L1i cache: 32K
NUMA node0 CPU(s): 0,4,8,12
root@ubuntu:~# ppc64_cpu --smt=4
root@ubuntu:~# lscpu
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 16
On-line CPU(s) list: 0-15
Thread(s) per core: 4
Core(s) per socket: 2
Socket(s): 2
NUMA node(s): 1
Model: 2.0 (pvr 004e 1200)
Model name: POWER9 (architected), altivec supported
Hypervisor vendor: KVM
Virtualization type: para
L1d cache: 32K
L1i cache: 32K
NUMA node0 CPU(s): 0-15
Note: it's also possible to simulate SMT in TCG single threaded mode.
Signed-off-by: Jose Ricardo Ziviani <joserz@linux.ibm.com>
---
hw/ppc/spapr.c | 5 -----
target/ppc/excp_helper.c | 24 ++++++++++++++++++++++++
target/ppc/helper.h | 1 +
target/ppc/translate.c | 11 +++++++++++
4 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 8783b43396..3a864dfc7d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2572,11 +2572,6 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp)
int ret;
unsigned int smp_threads = ms->smp.threads;
- if (!kvm_enabled() && (smp_threads > 1)) {
- error_setg(&local_err, "TCG cannot support more than 1 thread/core "
- "on a pseries machine");
- goto out;
- }
if (!is_power_of_2(smp_threads)) {
error_setg(&local_err, "Cannot support %d threads/core on a pseries "
"machine because it must be a power of 2", smp_threads);
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 50b004d00d..ac5d196641 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1231,6 +1231,30 @@ static int book3s_dbell2irq(target_ulong rb)
return msg == DBELL_TYPE_DBELL_SERVER ? PPC_INTERRUPT_HDOORBELL : -1;
}
+void helper_msgsndp(target_ulong rb)
+{
+ CPUState *cs;
+ int irq = rb & DBELL_TYPE_MASK;
+ int thread_id = rb & 0x3f;
+
+ if (irq != DBELL_TYPE_DBELL_SERVER) {
+ return;
+ }
+
+ qemu_mutex_lock_iothread();
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ if (cpu->vcpu_id == thread_id) {
+ continue;
+ }
+
+ cpu->env.pending_interrupts |= 1 << PPC_INTERRUPT_EXT;
+ cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb)
{
int irq = book3s_dbell2irq(rb);
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 380c9b1e2a..eadd08324b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -630,6 +630,7 @@ DEF_HELPER_FLAGS_3(store_sr, TCG_CALL_NO_RWG, void, env, tl, tl)
DEF_HELPER_FLAGS_1(602_mfrom, TCG_CALL_NO_RWG_SE, tl, tl)
DEF_HELPER_1(msgsnd, void, tl)
+DEF_HELPER_1(msgsndp, void, tl)
DEF_HELPER_2(msgclr, void, env, tl)
DEF_HELPER_1(book3s_msgsnd, void, tl)
DEF_HELPER_2(book3s_msgclr, void, env, tl)
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 4a5de28036..083731292b 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6657,6 +6657,15 @@ static void gen_msgsnd(DisasContext *ctx)
#endif /* defined(CONFIG_USER_ONLY) */
}
+static void gen_msgsndp(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+ GEN_PRIV;
+#else
+ gen_helper_msgsndp(cpu_gpr[rB(ctx->opcode)]);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+
static void gen_msgsync(DisasContext *ctx)
{
#if defined(CONFIG_USER_ONLY)
@@ -7176,6 +7185,8 @@ GEN_HANDLER2_E(tlbilx_booke206, "tlbilx", 0x1F, 0x12, 0x00, 0x03800001,
PPC_NONE, PPC2_BOOKE206),
GEN_HANDLER2_E(msgsnd, "msgsnd", 0x1F, 0x0E, 0x06, 0x03ff0001,
PPC_NONE, PPC2_PRCNTL),
+GEN_HANDLER2_E(msgsndp, "msgsndp", 0x1F, 0x0E, 0x04, 0x03ff0001,
+ PPC_NONE, PPC_POWER),
GEN_HANDLER2_E(msgclr, "msgclr", 0x1F, 0x0E, 0x07, 0x03ff0001,
PPC_NONE, PPC2_PRCNTL),
GEN_HANDLER2_E(msgsync, "msgsync", 0x1F, 0x16, 0x1B, 0x00000000,
--
2.21.0
On Tue, Jul 16, 2019 at 01:02:53AM -0300, Jose Ricardo Ziviani wrote: > It's not possible to specify the number of threads of a guest when > running QEMU/TCG. Today, users can have setups like: > > ... -accel tcg,thread=multi -smp 8,threads=1,cores=8 ... > or > ... -accel tcg,thread=multi -smp 8,sockets=2,cores=4,threads=1 ... > > However, the following is not possible: > > ... -accel tcg,thread=multi -smp 16,threads=4,cores=2,sockets=2 ... > qemu-system-ppc64: TCG cannot support more than 1 thread/core on a pseries machine > > The reason is due to how SMT is implemented since Power8. This patch > implements a very basic simulation of the msgsndp instruction, using ext > interrupt instead of doorbells. The result is a better user experience, > allowing them to play with SMT modes. However, it doesn't relate with > MTTCG threads in any way. This really isn't enough. POWER also has a number of SPRs which are per-core rather than per-thread, but currently TCG treats everything as per-thread. You'd need to properly implement per-core registers before you can advertise support for multiple threads in TCG. > > Results: > ... -accel tcg,thread=multi -smp 16,threads=4,cores=2,sockets=2 ... > > root@ubuntu:~# ppc64_cpu --smt > SMT=4 > root@ubuntu:~# ppc64_cpu --info > Core 0: 0* 1* 2* 3* > Core 1: 4* 5* 6* 7* > Core 2: 8* 9* 10* 11* > Core 3: 12* 13* 14* 15* > root@ubuntu:~# ppc64_cpu --smt=2 > root@ubuntu:~# ppc64_cpu --info > Core 0: 0* 1* 2 3 > Core 1: 4* 5* 6 7 > Core 2: 8* 9* 10 11 > Core 3: 12* 13* 14 15 > root@ubuntu:~# ppc64_cpu --smt=off > root@ubuntu:~# ppc64_cpu --info > Core 0: 0* 1 2 3 > Core 1: 4* 5 6 7 > Core 2: 8* 9 10 11 > Core 3: 12* 13 14 15 > > root@ubuntu:~# ppc64_cpu --smt > SMT is off > root@ubuntu:~# lscpu > Architecture: ppc64le > Byte Order: Little Endian > CPU(s): 16 > On-line CPU(s) list: 0,4,8,12 > Off-line CPU(s) list: 1-3,5-7,9-11,13-15 > Thread(s) per core: 1 > Core(s) per socket: 2 > Socket(s): 2 > NUMA node(s): 1 > Model: 2.0 (pvr 004e 1200) > Model name: POWER9 (architected), altivec supported > Hypervisor vendor: KVM > Virtualization type: para > L1d cache: 32K > L1i cache: 32K > NUMA node0 CPU(s): 0,4,8,12 > > root@ubuntu:~# ppc64_cpu --smt=4 > root@ubuntu:~# lscpu > Architecture: ppc64le > Byte Order: Little Endian > CPU(s): 16 > On-line CPU(s) list: 0-15 > Thread(s) per core: 4 > Core(s) per socket: 2 > Socket(s): 2 > NUMA node(s): 1 > Model: 2.0 (pvr 004e 1200) > Model name: POWER9 (architected), altivec supported > Hypervisor vendor: KVM > Virtualization type: para > L1d cache: 32K > L1i cache: 32K > NUMA node0 CPU(s): 0-15 > > Note: it's also possible to simulate SMT in TCG single threaded mode. > > Signed-off-by: Jose Ricardo Ziviani <joserz@linux.ibm.com> > --- > hw/ppc/spapr.c | 5 ----- > target/ppc/excp_helper.c | 24 ++++++++++++++++++++++++ > target/ppc/helper.h | 1 + > target/ppc/translate.c | 11 +++++++++++ > 4 files changed, 36 insertions(+), 5 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 8783b43396..3a864dfc7d 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2572,11 +2572,6 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) > int ret; > unsigned int smp_threads = ms->smp.threads; > > - if (!kvm_enabled() && (smp_threads > 1)) { > - error_setg(&local_err, "TCG cannot support more than 1 thread/core " > - "on a pseries machine"); > - goto out; > - } > if (!is_power_of_2(smp_threads)) { > error_setg(&local_err, "Cannot support %d threads/core on a pseries " > "machine because it must be a power of 2", smp_threads); > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c > index 50b004d00d..ac5d196641 100644 > --- a/target/ppc/excp_helper.c > +++ b/target/ppc/excp_helper.c > @@ -1231,6 +1231,30 @@ static int book3s_dbell2irq(target_ulong rb) > return msg == DBELL_TYPE_DBELL_SERVER ? PPC_INTERRUPT_HDOORBELL : -1; > } > > +void helper_msgsndp(target_ulong rb) > +{ > + CPUState *cs; > + int irq = rb & DBELL_TYPE_MASK; > + int thread_id = rb & 0x3f; > + > + if (irq != DBELL_TYPE_DBELL_SERVER) { > + return; > + } > + > + qemu_mutex_lock_iothread(); > + CPU_FOREACH(cs) { > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + > + if (cpu->vcpu_id == thread_id) { > + continue; > + } > + > + cpu->env.pending_interrupts |= 1 << PPC_INTERRUPT_EXT; > + cpu_interrupt(cs, CPU_INTERRUPT_HARD); > + } > + qemu_mutex_unlock_iothread(); > +} > + > void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb) > { > int irq = book3s_dbell2irq(rb); > diff --git a/target/ppc/helper.h b/target/ppc/helper.h > index 380c9b1e2a..eadd08324b 100644 > --- a/target/ppc/helper.h > +++ b/target/ppc/helper.h > @@ -630,6 +630,7 @@ DEF_HELPER_FLAGS_3(store_sr, TCG_CALL_NO_RWG, void, env, tl, tl) > > DEF_HELPER_FLAGS_1(602_mfrom, TCG_CALL_NO_RWG_SE, tl, tl) > DEF_HELPER_1(msgsnd, void, tl) > +DEF_HELPER_1(msgsndp, void, tl) > DEF_HELPER_2(msgclr, void, env, tl) > DEF_HELPER_1(book3s_msgsnd, void, tl) > DEF_HELPER_2(book3s_msgclr, void, env, tl) > diff --git a/target/ppc/translate.c b/target/ppc/translate.c > index 4a5de28036..083731292b 100644 > --- a/target/ppc/translate.c > +++ b/target/ppc/translate.c > @@ -6657,6 +6657,15 @@ static void gen_msgsnd(DisasContext *ctx) > #endif /* defined(CONFIG_USER_ONLY) */ > } > > +static void gen_msgsndp(DisasContext *ctx) > +{ > +#if defined(CONFIG_USER_ONLY) > + GEN_PRIV; > +#else > + gen_helper_msgsndp(cpu_gpr[rB(ctx->opcode)]); > +#endif /* defined(CONFIG_USER_ONLY) */ > +} > + > static void gen_msgsync(DisasContext *ctx) > { > #if defined(CONFIG_USER_ONLY) > @@ -7176,6 +7185,8 @@ GEN_HANDLER2_E(tlbilx_booke206, "tlbilx", 0x1F, 0x12, 0x00, 0x03800001, > PPC_NONE, PPC2_BOOKE206), > GEN_HANDLER2_E(msgsnd, "msgsnd", 0x1F, 0x0E, 0x06, 0x03ff0001, > PPC_NONE, PPC2_PRCNTL), > +GEN_HANDLER2_E(msgsndp, "msgsndp", 0x1F, 0x0E, 0x04, 0x03ff0001, > + PPC_NONE, PPC_POWER), > GEN_HANDLER2_E(msgclr, "msgclr", 0x1F, 0x0E, 0x07, 0x03ff0001, > PPC_NONE, PPC2_PRCNTL), > GEN_HANDLER2_E(msgsync, "msgsync", 0x1F, 0x16, 0x1B, 0x00000000, -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
© 2016 - 2024 Red Hat, Inc.