From: Christoph Müllner <christoph.muellner@vrull.eu>
This patch adds support for the T-Head SYNC instructions.
The patch uses the T-Head specific decoder and translation.
The implementation does not have much functionality (besides accepting
the instructions and not qualifying them as illegal instructions if
the hart executes in the required privilege level for the instruction),
as QEMU does not model CPU caches, or out-of-order execution.
Further the instructions don't have any exception behaviour
(at least not documented).
Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
target/riscv/cpu.c | 1 +
target/riscv/cpu.h | 1 +
target/riscv/insn_trans/trans_xthead.c.inc | 6 ++++++
target/riscv/meson.build | 1 +
target/riscv/translate.c | 3 +++
target/riscv/xtheadsync.decode | 25 ++++++++++++++++++++++
6 files changed, 37 insertions(+)
create mode 100644 target/riscv/xtheadsync.decode
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 7718ab0478..a72722cfa6 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -921,6 +921,7 @@ static Property riscv_cpu_extensions[] = {
/* Vendor-specific custom extensions */
DEFINE_PROP_BOOL("xtheadcmo", RISCVCPU, cfg.ext_xtheadcmo, false),
+ DEFINE_PROP_BOOL("xtheadsync", RISCVCPU, cfg.ext_xtheadsync, false),
DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
/* These are experimental so mark with 'x-' */
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index b7ab53b7b8..4ae22cf529 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -441,6 +441,7 @@ struct RISCVCPUConfig {
/* Vendor-specific custom extensions */
bool ext_xtheadcmo;
+ bool ext_xtheadsync;
bool ext_XVentanaCondOps;
uint8_t pmu_num;
diff --git a/target/riscv/insn_trans/trans_xthead.c.inc b/target/riscv/insn_trans/trans_xthead.c.inc
index 1b1e21ab77..0a6719b2e2 100644
--- a/target/riscv/insn_trans/trans_xthead.c.inc
+++ b/target/riscv/insn_trans/trans_xthead.c.inc
@@ -64,3 +64,9 @@ NOP_PRIVCHECK(th_l2cache_call, REQUIRE_PRIV_MHS)
NOP_PRIVCHECK(th_l2cache_ciall, REQUIRE_PRIV_MHS)
NOP_PRIVCHECK(th_l2cache_iall, REQUIRE_PRIV_MHS)
+NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS)
+NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU)
+NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU)
+
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 1d149e05cd..f201cc6997 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -3,6 +3,7 @@ gen = [
decodetree.process('insn16.decode', extra_args: ['--static-decode=decode_insn16', '--insnwidth=16']),
decodetree.process('insn32.decode', extra_args: '--static-decode=decode_insn32'),
decodetree.process('xtheadcmo.decode', extra_args: '--static-decode=decode_xtheadcmo'),
+ decodetree.process('xtheadsync.decode', extra_args: '--static-decode=decode_xtheadsync'),
decodetree.process('XVentanaCondOps.decode', extra_args: '--static-decode=decode_XVentanaCodeOps'),
]
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index d16ae63850..a63cc3de46 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -133,6 +133,7 @@ static bool always_true_p(DisasContext *ctx __attribute__((__unused__)))
}
MATERIALISE_EXT_PREDICATE(xtheadcmo)
+MATERIALISE_EXT_PREDICATE(xtheadsync)
MATERIALISE_EXT_PREDICATE(XVentanaCondOps)
#ifdef TARGET_RISCV32
@@ -720,6 +721,7 @@ static int ex_rvc_shifti(DisasContext *ctx, int imm)
/* Include decoders for factored-out extensions */
#include "decode-xtheadcmo.c.inc"
+#include "decode-xtheadsync.c.inc"
#include "decode-XVentanaCondOps.c.inc"
static bool gen_logic_imm_fn(DisasContext *ctx, arg_i *a,
@@ -1041,6 +1043,7 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
} decoders[] = {
{ always_true_p, decode_insn32 },
{ has_xtheadcmo_p, decode_xtheadcmo },
+ { has_xtheadsync_p, decode_xtheadsync },
{ has_XVentanaCondOps_p, decode_XVentanaCodeOps },
};
diff --git a/target/riscv/xtheadsync.decode b/target/riscv/xtheadsync.decode
new file mode 100644
index 0000000000..d25735cce8
--- /dev/null
+++ b/target/riscv/xtheadsync.decode
@@ -0,0 +1,25 @@
+#
+# RISC-V translation routines for the XTheadSync extension
+#
+# Copyright (c) 2022 Christoph Muellner, christoph.muellner@vrull.eu
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# The XTheadSync extension provides instructions for multi-processor synchronization.
+#
+# It is documented in
+# https://github.com/T-head-Semi/thead-extension-spec/releases/download/2.0.0/xthead-2022-09-05-2.0.0.pdf
+
+# Fields:
+%rs1 15:5
+%rs2 20:5
+
+# Formats
+@rs2_s ....... ..... ..... ... ..... ....... %rs2 %rs1
+
+# *** SYNC instructions
+th_sfence_vmas 0000010 ..... ..... 000 00000 0001011 @rs2_s
+th_sync 0000000 11000 00000 000 00000 0001011
+th_sync_i 0000000 11010 00000 000 00000 0001011
+th_sync_is 0000000 11011 00000 000 00000 0001011
+th_sync_s 0000000 11001 00000 000 00000 0001011
--
2.37.2
On 9/6/22 13:22, Christoph Muellner wrote:
> +NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS)
> +NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU)
> +NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU)
> +NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU)
> +NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU)
These should not be nops: th_sfence_vmas requires a tlb flush; th.sync{,.i} needs to end
the current translation block; th.sync.{s,is} needs multiprocessor sync, which involves a
call to async_safe_run_on_cpu.
r~
On 2022/9/8 15:29, Richard Henderson wrote:
> On 9/6/22 13:22, Christoph Muellner wrote:
>> +NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS)
>> +NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU)
>> +NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU)
>> +NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU)
>> +NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU)
>
> These should not be nops: th_sfence_vmas requires a tlb flush;
> th.sync{,.i} needs to end the current translation block;
> th.sync.{s,is} needs multiprocessor sync, which involves a call to
> async_safe_run_on_cpu.
Hi Richard,
I have fixed the description of T-Head custom synchronization
instructions according to the implementation of hardware. Sorry for the
misleading.
https://github.com/T-head-Semi/thead-extension-spec/tree/master/xtheadsync
The fix is as below:
1)th.sync.s has the same function with th.sync.
2) th.sync.is has the same function with th.sync.i
3) th.sync has the function of memory barrier, but it is stricter than
RISC-V fence instruction as it order all the instructions instead of
load/store instructions.
4) th.sync.i has the function to flush the pipeline besides the function
of th.sync.
On QEMU, I think they should be emulated them as below:
1) th.sync should be emulated as " 'tcg_gen_mb()' and 'end the current
translation block'" on QEMU.
2) th.sync.i should be emulated as " 'tcg_gen_mb()' and 'end the
current translation block'" on QEMU because we don't have the cache
model for guest on QEMU. Thus we don't have to synchronize between the
icache and dcache for guest.
'tcg_gen_mb' is for the function of memory barrier. And 'end the
current translation block' is to reflect the influence of other
instructions, such as taking interrupts which happens only at the end of
a translation block.
Maybe we can also just implement these instructions as 'tcg_gen_mb'
because currently all CSR instructions which may influence the
interrupts taking have ended the TB on QEMU.
Is it right?
Thanks,
Zhiwei
>
>
> r~
On 2022/9/8 15:29, Richard Henderson wrote:
> On 9/6/22 13:22, Christoph Muellner wrote:
>> +NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS)
>> +NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU)
>> +NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU)
>> +NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU)
>> +NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU)
>
> These should not be nops: th_sfence_vmas requires a tlb flush;
> th.sync{,.i} needs to end the current translation block;
> th.sync.{s,is} needs multiprocessor sync, which involves a call to
> async_safe_run_on_cpu.
Hi Richard,
I have fixed the description of T-Head custom synchronization
instructions according to the implementation of hardware. Sorry for the
misleading.
https://github.com/T-head-Semi/thead-extension-spec/tree/master/xtheadsync
The fix is as below:
1)th.sync.s has the same function with th.sync.
2) th.sync. has the same function with th.sync.i
3) th.sync has the function of memory barrier, but it is stricter than
RISC-V fence instruction as it order all the instructions instead of
load/store instructions.
4) th.sync.i has the function to flush the pipeline besides the function
of th.sync.
On QEMU, I think they should be emulated them as below:
1) th.sync should be emulated as " 'tcg_gen_mb()' and 'end the current
translation block'" on QEMU.
2) th.sync should be emulated as " 'tcg_gen_mb()' and 'end the current
translation block'" on QEMU because we don't have the cache model for
guest on QEMU. Thus we don't have to synchronize between the icache and
dcache for guest.
'tcg_gen_mb' is for the function of memory barrier, and 'end the
current translation block' is to reflect the influence of other
instructions, such as to take interrupts which only at the end of
translation block.
Maybe we can also just implement these instructions as 'tcg_gen_mb'
because currently all CSR instructions which may influence the
interrupts taking have ended the TB on QEMU.
Is it right?
Thanks,
Zhiwei
>
>
> r~
On Thu, Sep 8, 2022 at 9:30 AM Richard Henderson <
richard.henderson@linaro.org> wrote:
> On 9/6/22 13:22, Christoph Muellner wrote:
> > +NOP_PRIVCHECK(th_sfence_vmas, REQUIRE_PRIV_MHS)
> > +NOP_PRIVCHECK(th_sync, REQUIRE_PRIV_MHSU)
> > +NOP_PRIVCHECK(th_sync_i, REQUIRE_PRIV_MHSU)
> > +NOP_PRIVCHECK(th_sync_is, REQUIRE_PRIV_MHSU)
> > +NOP_PRIVCHECK(th_sync_s, REQUIRE_PRIV_MHSU)
>
> These should not be nops: th_sfence_vmas requires a tlb flush;
> th.sync{,.i} needs to end
> the current translation block; th.sync.{s,is} needs multiprocessor sync,
> which involves a
> call to async_safe_run_on_cpu.
>
Understood.
For a new revision, I'll do the following:
* th_sfence_vmas: async_safe_run_on_cpu() with run_on_cpu_func which
flushes TLB on all CPUs (similar like trans_sfence_vma())
* th_sync/th_sync_i: end the TB (similar like trans_fence_i())
* th_sync_s/th_sync_is: async_safe_run_on_cpu() with run_on_cpu_func which
ends the TB (similar like trans_fence_i())
Thanks!
>
>
> r~
>
© 2016 - 2026 Red Hat, Inc.