target/loongarch/cpu.c | 4 ++ .../tcg/insn_trans/trans_memory.c.inc | 62 ++++++++++++++++++- target/loongarch/tcg/translate.c | 1 + target/loongarch/translate.h | 3 + 4 files changed, 68 insertions(+), 2 deletions(-)
LoongArch architecture (since LA664) introduces fine-grained dbar
hints that allow controlling which memory accesses are ordered by
the barrier. Previously, all dbar instructions were treated as a
full barrier (TCG_MO_ALL | TCG_BAR_SC).
This patch adds support for decoding dbar hints and emitting the
appropriate TCG memory barrier flags. For CPUs that do not advertise
the DBAR_HINTS feature (cpucfg3.DBAR_HINTS = 0), all dbar hints
fall back to a full barrier, preserving compatibility.
The hint encoding follows the LoongArch v1.10 specification:
* Bit3: barrier for previous read (0: true, 1: false)
* Bit2: barrier for previous write (0: true, 1: false)
* Bit1: barrier for succeeding read (0: true, 1: false)
* Bit0: barrier for succeeding write (0: true, 1: false)
The mapping to TCG memory order flags is as follows:
TCG_BAR_SC |TCG_MO_LD_LD | TCG_MO_LD_ST;
TCG_BAR_SC |TCG_MO_ST_LD | TCG_MO_ST_ST;
TCG_BAR_SC |TCG_MO_LD_LD | TCG_MO_ST_LD;
TCG_BAR_SC |TCG_MO_ST_ST | TCG_MO_LD_ST;
Special hint handling:
- hint 0x700: LL/SC loop barrier, treated as a full barrier as recommended.
- hint 0xf and 0x1f: reserved/no-op, treated as no operation
Signed-off-by: Song Gao <gaosong@loongson.cn>
---
target/loongarch/cpu.c | 4 ++
.../tcg/insn_trans/trans_memory.c.inc | 62 ++++++++++++++++++-
target/loongarch/tcg/translate.c | 1 +
target/loongarch/translate.h | 3 +
4 files changed, 68 insertions(+), 2 deletions(-)
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index e22568c84a..d8d106b07e 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -455,6 +455,10 @@ static void loongarch_max_initfn(Object *obj)
data = FIELD_DP32(data, CPUCFG2, LLACQ_SCREL, 1);
data = FIELD_DP32(data, CPUCFG2, SCQ, 1);
cpu->env.cpucfg[2] = data;
+
+ data = cpu->env.cpucfg[3];
+ data = FIELD_DP32(data, CPUCFG3, DBAR_HINTS, 1);
+ cpu->env.cpucfg[3] = data;
}
}
diff --git a/target/loongarch/tcg/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
index e287d46363..e1146fe9ec 100644
--- a/target/loongarch/tcg/insn_trans/trans_memory.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
@@ -137,11 +137,69 @@ static bool trans_preldx(DisasContext *ctx, arg_preldx * a)
return true;
}
+/*
+ * Decode dbar hint and emit appropriate TCG memory barrier.
+ *
+ * The hint is a 5-bit field (0-31) encoded in the instruction.
+ * For hint 0x700 (special LL/SC loop barrier), treat as full barrier.
+ *
+ * See LoongArch Reference Manual v1.10, Section 4.2.2 for details.
+ */
static bool trans_dbar(DisasContext *ctx, arg_dbar * a)
{
- tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
+ int hint = a->imm;
+ TCGBar bar_flags = 0;
+
+ /* Reserved/no-op hints: 0xf and 0x1f */
+ if (hint == 0xf || hint == 0x1f) {
+ return true;
+ }
+
+ /* If the CPU does not support fine-grained hints,or for the special LL/SC
+ * loop barrier (0x700), emit a full barrier.
+ */
+ if (!avail_DBAR_HINT(ctx) || hint == 0x700) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+ return true;
+ }
+
+ /*
+ * Fine-grained hint decoding:
+ * Bits 3-0 control which accesses must be ordered.
+ * Bit3: barrier for previous read (0: true, 1: false)
+ * Bit2: barrier for previous write (0: true, 1: false)
+ * Bit1: barrier for succeeding read (0: true, 1: false)
+ * Bit0: barrier for succeeding write (0: true, 1: false)
+ *
+ * For each combination, we set the corresponding TCG_MO_* flag if both
+ * sides of the barrier require ordering.
+ */
+
+ bool prev_rd = !(hint & 0x08); /* bit3 */
+ bool prev_wr = !(hint & 0x04); /* bit2 */
+ bool succ_rd = !(hint & 0x02); /* bit1 */
+ bool succ_wr = !(hint & 0x01); /* bit0 */
+
+ if (prev_rd) {
+ bar_flags |= TCG_MO_LD_LD | TCG_MO_LD_ST;
+ }
+ if (prev_wr) {
+ bar_flags |= TCG_MO_ST_LD | TCG_MO_ST_ST;
+ }
+ if (succ_rd) {
+ bar_flags |= TCG_MO_LD_LD | TCG_MO_ST_LD;
+ }
+ if (succ_wr) {
+ bar_flags |= TCG_MO_ST_ST | TCG_MO_LD_ST;
+ }
+
+ if (bar_flags == 0) {
+ bar_flags = TCG_MO_ALL;
+ }
+
+ tcg_gen_mb(bar_flags | TCG_BAR_SC);
return true;
-}
+ }
static bool trans_ibar(DisasContext *ctx, arg_ibar *a)
{
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index b9ed13d19c..49280b1dd3 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -149,6 +149,7 @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
ctx->cpucfg1 = env->cpucfg[1];
ctx->cpucfg2 = env->cpucfg[2];
+ ctx->cpucfg3 = env->cpucfg[3];
}
static void loongarch_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index ba1c89e57b..8aa8325dc6 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -43,6 +43,8 @@
#define avail_LLACQ_SCREL(C) (FIELD_EX32((C)->cpucfg2, CPUCFG2, LLACQ_SCREL))
#define avail_LLACQ_SCREL_64(C) (avail_64(C) && avail_LLACQ_SCREL(C))
+#define avail_DBAR_HINT(C) (FIELD_EX32((C)->cpucfg3, CPUCFG3, DBAR_HINTS))
+
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
* it may require the inputs to be sign- or zero-extended; which will
@@ -66,6 +68,7 @@ typedef struct DisasContext {
bool va32; /* 32-bit virtual address */
uint32_t cpucfg1;
uint32_t cpucfg2;
+ uint32_t cpucfg3;
} DisasContext;
void generate_exception(DisasContext *ctx, int excp);
--
2.47.3
© 2016 - 2026 Red Hat, Inc.