target/riscv/translate.c | 45 ++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-)
Support 4-byte atomic instruction fetch when instruction is natural
aligned.
Current implementation is not atomic because it loads instruction twice
for first and last 2 bytes. We load 4 bytes at once to keep the
atomicity. This instruction preload method only applys when instruction
is 4-byte aligned. If instruction is unaligned, it could be across pages
so that preload will trigger additional page fault.
We encounter this issue when doing pressure test of enabling & disabling
Linux kernel ftrace. Ftrace with kernel preemption requires concurrent
modification and execution of instruction, so non-atomic instruction
fetch will cause the race condition. We may fetch the wrong instruction
which is the mixing of 2 instructions.
Also, RISC-V Profile wants to provide this feature by HW. RVA20U64
Ziccif protects the atomicity of instruction fetch when it is
natural aligned.
Signed-off-by: Jim Shu <jim.shu@sifive.com>
Reviewed-by: Frank Chang <frank.chang@sifive.com>
---
target/riscv/translate.c | 45 ++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 11 deletions(-)
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 0569224e53..2be8ef63e6 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1133,13 +1133,37 @@ const RISCVDecoder decoder_table[] = {
const size_t decoder_table_size = ARRAY_SIZE(decoder_table);
-static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
+static void decode_opc(CPURISCVState *env, DisasContext *ctx)
{
ctx->virt_inst_excp = false;
+
+ uint32_t opcode;
+ bool is_4byte_align = false;
+
+ if ((ctx->base.pc_next % 4) == 0) {
+ /*
+ * Load 4 bytes at once to make instruction fetch atomically.
+ *
+ * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be
+ * across pages. We could preload 4 bytes instruction no matter
+ * real one is 2 or 4 bytes. Instruction preload wouldn't trigger
+ * additional page fault.
+ */
+ opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next);
+ is_4byte_align = true;
+ } else {
+ /*
+ * For unaligned pc, instruction preload may trigger additional
+ * page fault so we only load 2 bytes here.
+ */
+ opcode = (uint32_t) translator_lduw(env, &ctx->base, ctx->base.pc_next);
+ }
+ ctx->ol = ctx->xl;
+
ctx->cur_insn_len = insn_len(opcode);
/* Check for compressed insn */
if (ctx->cur_insn_len == 2) {
- ctx->opcode = opcode;
+ ctx->opcode = (uint16_t)opcode;
/*
* The Zca extension is added as way to refer to instructions in the C
* extension that do not include the floating-point loads and stores
@@ -1149,15 +1173,16 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
return;
}
} else {
- uint32_t opcode32 = opcode;
- opcode32 = deposit32(opcode32, 16, 16,
- translator_lduw(env, &ctx->base,
- ctx->base.pc_next + 2));
- ctx->opcode = opcode32;
+ if (!is_4byte_align) {
+ /* Load last 2 bytes of instruction here */
+ opcode = deposit32(opcode, 16, 16,
+ translator_lduw(env, &ctx->base,
+ ctx->base.pc_next + 2));
+ }
for (guint i = 0; i < ctx->decoders->len; ++i) {
riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i);
- if (func(ctx, opcode32)) {
+ if (func(ctx, opcode)) {
return;
}
}
@@ -1226,10 +1251,8 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
CPURISCVState *env = cpu_env(cpu);
- uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
- ctx->ol = ctx->xl;
- decode_opc(env, ctx, opcode16);
+ decode_opc(env, ctx);
ctx->base.pc_next += ctx->cur_insn_len;
/* Only the first insn within a TB is allowed to cross a page boundary. */
--
2.17.1
On 6/7/24 03:14, Jim Shu wrote: > Support 4-byte atomic instruction fetch when instruction is natural > aligned. > > Current implementation is not atomic because it loads instruction twice > for first and last 2 bytes. We load 4 bytes at once to keep the > atomicity. This instruction preload method only applys when instruction > is 4-byte aligned. If instruction is unaligned, it could be across pages > so that preload will trigger additional page fault. > > We encounter this issue when doing pressure test of enabling & disabling > Linux kernel ftrace. Ftrace with kernel preemption requires concurrent > modification and execution of instruction, so non-atomic instruction > fetch will cause the race condition. We may fetch the wrong instruction > which is the mixing of 2 instructions. Hmm. This is insufficient, as you have no guarantee that translator_ldl itself is atomic. Indeed, it is not: it uses memcpy as the innermost read. Let me think about this... r~
© 2016 - 2024 Red Hat, Inc.