[PATCH v3] target/loongarch: Add support for dbar hint variants

Song Gao posted 1 patch 1 month, 2 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20260416114715.3309072-1-gaosong@loongson.cn
Maintainers: Song Gao <gaosong@loongson.cn>
target/loongarch/cpu.c                        |  4 ++
.../tcg/insn_trans/trans_memory.c.inc         | 63 ++++++++++++++++++-
target/loongarch/tcg/translate.c              |  1 +
target/loongarch/translate.h                  |  3 +
4 files changed, 69 insertions(+), 2 deletions(-)
[PATCH v3] target/loongarch: Add support for dbar hint variants
Posted by Song Gao 1 month, 2 weeks ago
LoongArch architecture (since LA664) introduces fine-grained dbar
hints that allow controlling which memory accesses are ordered by
the barrier. Previously, all dbar instructions were treated as a
full barrier (TCG_MO_ALL | TCG_BAR_SC).

This patch adds support for decoding dbar hints and emitting the
appropriate TCG memory barrier flags. For CPUs that do not advertise
the DBAR_HINTS feature (cpucfg3.DBAR_HINTS = 0), all dbar hints
fall back to a full barrier, preserving compatibility.

The hint encoding follows the LoongArch v1.10 specification:
The hint is a 5-bit field (bits 4-0). Bit4 is reserved and currently
ignored/discarded. Only bits 3-0 are used for ordering control.
 * Bit3: barrier for previous read (0: true, 1: false)
 * Bit2: barrier for previous write (0: true, 1: false)
 * Bit1: barrier for succeeding read (0: true, 1: false)
 * Bit0: barrier for succeeding write (0: true, 1: false)

The mapping to TCG memory order flags is as follows:
  TCG_BAR_SC |TCG_MO_LD_LD | TCG_MO_LD_ST;
  TCG_BAR_SC |TCG_MO_ST_LD | TCG_MO_ST_ST;
  TCG_BAR_SC |TCG_MO_LD_LD | TCG_MO_ST_LD;
  TCG_BAR_SC |TCG_MO_ST_ST | TCG_MO_LD_ST;

Special hint handling:
- hint 0x700: LL/SC loop barrier, treated as a full barrier as recommended.
- hint 0xf and 0x1f: reserved/no-op, treated as no operation

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/cpu.c                        |  4 ++
 .../tcg/insn_trans/trans_memory.c.inc         | 63 ++++++++++++++++++-
 target/loongarch/tcg/translate.c              |  1 +
 target/loongarch/translate.h                  |  3 +
 4 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index e22568c84a..d8d106b07e 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -455,6 +455,10 @@ static void loongarch_max_initfn(Object *obj)
         data = FIELD_DP32(data, CPUCFG2, LLACQ_SCREL, 1);
         data = FIELD_DP32(data, CPUCFG2, SCQ, 1);
         cpu->env.cpucfg[2] = data;
+
+        data = cpu->env.cpucfg[3];
+        data = FIELD_DP32(data, CPUCFG3, DBAR_HINTS, 1);
+        cpu->env.cpucfg[3] = data;
     }
 }
 
diff --git a/target/loongarch/tcg/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
index e287d46363..dcecf02e54 100644
--- a/target/loongarch/tcg/insn_trans/trans_memory.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
@@ -137,11 +137,70 @@ static bool trans_preldx(DisasContext *ctx, arg_preldx * a)
     return true;
 }
 
+/*
+ * Decode dbar hint and emit appropriate TCG memory barrier.
+ *
+ * The hint is a 5-bit field (0-31) encoded in the instruction.
+ * For hint 0x700 (special LL/SC loop barrier), treat as full barrier.
+ *
+ * See LoongArch Reference Manual v1.10, Section 4.2.2 for details.
+ */
 static bool trans_dbar(DisasContext *ctx, arg_dbar * a)
 {
-    tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
+    int hint = a->imm;
+    TCGBar bar_flags = 0;
+
+    /* Reserved/no-op hints: 0xf and 0x1f */
+    if (hint == 0xf || hint == 0x1f) {
+        return true;
+    }
+
+    /* If the CPU does not support fine-grained hints,or for the special LL/SC
+     * loop barrier (0x700), emit a full barrier.
+     */
+    if (!avail_DBAR_HINT(ctx) || hint == 0x700) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+        return true;
+    }
+
+    /*
+     * Fine-grained hint decoding:
+     * The hint is a 5-bit field (bits 4-0). Bit4 is reserved and currently
+     * ignored/discarded. Only bits 3-0 are used for ordering control.
+     * Bit3: barrier for previous read (0: true, 1: false)
+     * Bit2: barrier for previous write (0: true, 1: false)
+     * Bit1: barrier for succeeding read (0: true, 1: false)
+     * Bit0: barrier for succeeding write (0: true, 1: false)
+     *
+     * For each combination, we set the corresponding TCG_MO_* flag if both
+     * sides of the barrier require ordering.
+     */
+
+    bool prev_rd = !(hint & 0x08); /* bit3 */
+    bool prev_wr = !(hint & 0x04); /* bit2 */
+    bool succ_rd = !(hint & 0x02); /* bit1 */
+    bool succ_wr = !(hint & 0x01); /* bit0 */
+
+    if (prev_rd) {
+        bar_flags |= TCG_MO_LD_LD | TCG_MO_LD_ST;
+    }
+    if (prev_wr) {
+        bar_flags |= TCG_MO_ST_LD | TCG_MO_ST_ST;
+    }
+    if (succ_rd) {
+        bar_flags |= TCG_MO_LD_LD | TCG_MO_ST_LD;
+    }
+    if (succ_wr) {
+        bar_flags |= TCG_MO_ST_ST | TCG_MO_LD_ST;
+    }
+
+    if (bar_flags == 0) {
+        bar_flags = TCG_MO_ALL;
+    }
+
+    tcg_gen_mb(bar_flags | TCG_BAR_SC);
     return true;
-}
+ }
 
 static bool trans_ibar(DisasContext *ctx, arg_ibar *a)
 {
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index b9ed13d19c..49280b1dd3 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -149,6 +149,7 @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
 
     ctx->cpucfg1 = env->cpucfg[1];
     ctx->cpucfg2 = env->cpucfg[2];
+    ctx->cpucfg3 = env->cpucfg[3];
 }
 
 static void loongarch_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
index ba1c89e57b..8aa8325dc6 100644
--- a/target/loongarch/translate.h
+++ b/target/loongarch/translate.h
@@ -43,6 +43,8 @@
 #define avail_LLACQ_SCREL(C)    (FIELD_EX32((C)->cpucfg2, CPUCFG2, LLACQ_SCREL))
 #define avail_LLACQ_SCREL_64(C) (avail_64(C) && avail_LLACQ_SCREL(C))
 
+#define avail_DBAR_HINT(C) (FIELD_EX32((C)->cpucfg3, CPUCFG3, DBAR_HINTS))
+
 /*
  * If an operation is being performed on less than TARGET_LONG_BITS,
  * it may require the inputs to be sign- or zero-extended; which will
@@ -66,6 +68,7 @@ typedef struct DisasContext {
     bool va32; /* 32-bit virtual address */
     uint32_t cpucfg1;
     uint32_t cpucfg2;
+    uint32_t cpucfg3;
 } DisasContext;
 
 void generate_exception(DisasContext *ctx, int excp);
-- 
2.47.3
Re: [PATCH v3] target/loongarch: Add support for dbar hint variants
Posted by Bibo Mao 1 month, 1 week ago

On 2026/4/16 下午7:47, Song Gao wrote:
> LoongArch architecture (since LA664) introduces fine-grained dbar
> hints that allow controlling which memory accesses are ordered by
> the barrier. Previously, all dbar instructions were treated as a
> full barrier (TCG_MO_ALL | TCG_BAR_SC).
> 
> This patch adds support for decoding dbar hints and emitting the
> appropriate TCG memory barrier flags. For CPUs that do not advertise
> the DBAR_HINTS feature (cpucfg3.DBAR_HINTS = 0), all dbar hints
> fall back to a full barrier, preserving compatibility.
> 
> The hint encoding follows the LoongArch v1.10 specification:
> The hint is a 5-bit field (bits 4-0). Bit4 is reserved and currently
> ignored/discarded. Only bits 3-0 are used for ordering control.
>   * Bit3: barrier for previous read (0: true, 1: false)
>   * Bit2: barrier for previous write (0: true, 1: false)
>   * Bit1: barrier for succeeding read (0: true, 1: false)
>   * Bit0: barrier for succeeding write (0: true, 1: false)
> 
> The mapping to TCG memory order flags is as follows:
>    TCG_BAR_SC |TCG_MO_LD_LD | TCG_MO_LD_ST;
>    TCG_BAR_SC |TCG_MO_ST_LD | TCG_MO_ST_ST;
>    TCG_BAR_SC |TCG_MO_LD_LD | TCG_MO_ST_LD;
>    TCG_BAR_SC |TCG_MO_ST_ST | TCG_MO_LD_ST;
> 
> Special hint handling:
> - hint 0x700: LL/SC loop barrier, treated as a full barrier as recommended.
> - hint 0xf and 0x1f: reserved/no-op, treated as no operation
> 
> Signed-off-by: Song Gao <gaosong@loongson.cn>
> ---
>   target/loongarch/cpu.c                        |  4 ++
>   .../tcg/insn_trans/trans_memory.c.inc         | 63 ++++++++++++++++++-
>   target/loongarch/tcg/translate.c              |  1 +
>   target/loongarch/translate.h                  |  3 +
>   4 files changed, 69 insertions(+), 2 deletions(-)
> 
> diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
> index e22568c84a..d8d106b07e 100644
> --- a/target/loongarch/cpu.c
> +++ b/target/loongarch/cpu.c
> @@ -455,6 +455,10 @@ static void loongarch_max_initfn(Object *obj)
>           data = FIELD_DP32(data, CPUCFG2, LLACQ_SCREL, 1);
>           data = FIELD_DP32(data, CPUCFG2, SCQ, 1);
>           cpu->env.cpucfg[2] = data;
> +
> +        data = cpu->env.cpucfg[3];
> +        data = FIELD_DP32(data, CPUCFG3, DBAR_HINTS, 1);
> +        cpu->env.cpucfg[3] = data;
>       }
>   }
>   
> diff --git a/target/loongarch/tcg/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
> index e287d46363..dcecf02e54 100644
> --- a/target/loongarch/tcg/insn_trans/trans_memory.c.inc
> +++ b/target/loongarch/tcg/insn_trans/trans_memory.c.inc
> @@ -137,11 +137,70 @@ static bool trans_preldx(DisasContext *ctx, arg_preldx * a)
>       return true;
>   }
>   
> +/*
> + * Decode dbar hint and emit appropriate TCG memory barrier.
> + *
> + * The hint is a 5-bit field (0-31) encoded in the instruction.
> + * For hint 0x700 (special LL/SC loop barrier), treat as full barrier.
> + *
> + * See LoongArch Reference Manual v1.10, Section 4.2.2 for details.
> + */
>   static bool trans_dbar(DisasContext *ctx, arg_dbar * a)
>   {
> -    tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
> +    int hint = a->imm;
> +    TCGBar bar_flags = 0;
> +
> +    /* Reserved/no-op hints: 0xf and 0x1f */
> +    if (hint == 0xf || hint == 0x1f) {
> +        return true;
> +    }
> +
> +    /* If the CPU does not support fine-grained hints,or for the special LL/SC
> +     * loop barrier (0x700), emit a full barrier.
> +     */
> +    if (!avail_DBAR_HINT(ctx) || hint == 0x700) {
> +        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
> +        return true;
> +    }
> +
> +    /*
> +     * Fine-grained hint decoding:
> +     * The hint is a 5-bit field (bits 4-0). Bit4 is reserved and currently
> +     * ignored/discarded. Only bits 3-0 are used for ordering control.
> +     * Bit3: barrier for previous read (0: true, 1: false)
> +     * Bit2: barrier for previous write (0: true, 1: false)
> +     * Bit1: barrier for succeeding read (0: true, 1: false)
> +     * Bit0: barrier for succeeding write (0: true, 1: false)
> +     *
> +     * For each combination, we set the corresponding TCG_MO_* flag if both
> +     * sides of the barrier require ordering.
> +     */
> +
> +    bool prev_rd = !(hint & 0x08); /* bit3 */
> +    bool prev_wr = !(hint & 0x04); /* bit2 */
> +    bool succ_rd = !(hint & 0x02); /* bit1 */
> +    bool succ_wr = !(hint & 0x01); /* bit0 */
> +
> +    if (prev_rd) {
> +        bar_flags |= TCG_MO_LD_LD | TCG_MO_LD_ST;
> +    }
> +    if (prev_wr) {
> +        bar_flags |= TCG_MO_ST_LD | TCG_MO_ST_ST;
> +    }
> +    if (succ_rd) {
> +        bar_flags |= TCG_MO_LD_LD | TCG_MO_ST_LD;
> +    }
> +    if (succ_wr) {
> +        bar_flags |= TCG_MO_ST_ST | TCG_MO_LD_ST;
> +    }
> +
> +    if (bar_flags == 0) {
> +        bar_flags = TCG_MO_ALL;
> +    }
> +
> +    tcg_gen_mb(bar_flags | TCG_BAR_SC);
>       return true;
> -}
> + }
>   
>   static bool trans_ibar(DisasContext *ctx, arg_ibar *a)
>   {
> diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
> index b9ed13d19c..49280b1dd3 100644
> --- a/target/loongarch/tcg/translate.c
> +++ b/target/loongarch/tcg/translate.c
> @@ -149,6 +149,7 @@ static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
>   
>       ctx->cpucfg1 = env->cpucfg[1];
>       ctx->cpucfg2 = env->cpucfg[2];
> +    ctx->cpucfg3 = env->cpucfg[3];
>   }
>   
>   static void loongarch_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
> diff --git a/target/loongarch/translate.h b/target/loongarch/translate.h
> index ba1c89e57b..8aa8325dc6 100644
> --- a/target/loongarch/translate.h
> +++ b/target/loongarch/translate.h
> @@ -43,6 +43,8 @@
>   #define avail_LLACQ_SCREL(C)    (FIELD_EX32((C)->cpucfg2, CPUCFG2, LLACQ_SCREL))
>   #define avail_LLACQ_SCREL_64(C) (avail_64(C) && avail_LLACQ_SCREL(C))
>   
> +#define avail_DBAR_HINT(C) (FIELD_EX32((C)->cpucfg3, CPUCFG3, DBAR_HINTS))
> +
>   /*
>    * If an operation is being performed on less than TARGET_LONG_BITS,
>    * it may require the inputs to be sign- or zero-extended; which will
> @@ -66,6 +68,7 @@ typedef struct DisasContext {
>       bool va32; /* 32-bit virtual address */
>       uint32_t cpucfg1;
>       uint32_t cpucfg2;
> +    uint32_t cpucfg3;
>   } DisasContext;
>   
>   void generate_exception(DisasContext *ctx, int excp);
> 
Reviewed-by: Bibo Mao <maobibo@loongson.cn>