[PATCH v5 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi

WANG Xuerui posted 30 patches 1 year ago
There is a newer version of this series
Maintainers: Laurent Vivier <laurent@vivier.eu>, Riku Voipio <riku.voipio@iki.fi>, Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, WANG Xuerui <git@xen0n.name>
[PATCH v5 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi
Posted by WANG Xuerui 1 year ago
Signed-off-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/loongarch64/tcg-target.c.inc | 109 +++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 8f7c556c37..f1d0047f5b 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -247,6 +247,113 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
     tcg_out_opc_dbar(s, 0);
 }
 
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+    if (ret == arg) {
+        return true;
+    }
+    switch (type) {
+    case TCG_TYPE_I32:
+    case TCG_TYPE_I64:
+        /*
+         * Conventional register-register move used in LoongArch is
+         * `or dst, src, zero`.
+         */
+        tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+static bool imm_part_needs_loading(bool high_bits_are_ones,
+                                   tcg_target_long part)
+{
+    if (high_bits_are_ones) {
+        return part != -1;
+    } else {
+        return part != 0;
+    }
+}
+
+/* Loads a 32-bit immediate into rd, sign-extended.  */
+static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
+{
+    /* Single-instruction cases.  */
+    tcg_target_long lo = sextreg(val, 0, 12);
+    if (lo == val) {
+        /* val fits in simm12: addi.w rd, zero, val */
+        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
+        return;
+    }
+    if (0x800 <= val && val <= 0xfff) {
+        /* val fits in uimm12: ori rd, zero, val */
+        tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
+        return;
+    }
+
+    /* High bits must be set; load with lu12i.w + optional ori.  */
+    tcg_target_long hi12 = sextreg(val, 12, 20);
+    tcg_out_opc_lu12i_w(s, rd, hi12);
+    if (lo != 0) {
+        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
+    }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+                         tcg_target_long val)
+{
+    if (type == TCG_TYPE_I32 || val == (int32_t)val) {
+        tcg_out_movi_i32(s, rd, val);
+        return;
+    }
+
+    /* PC-relative cases.  */
+    intptr_t pc_offset = tcg_pcrel_diff(s, (void *)val);
+    if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) {
+        /* Single pcaddu2i.  */
+        tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
+        return;
+    }
+
+    if (pc_offset == (int32_t)pc_offset) {
+        /* Offset within 32 bits; load with pcalau12i + ori.  */
+        tcg_target_long lo = sextreg(val, 0, 12);
+        tcg_target_long pc_hi = (val - pc_offset) >> 12;
+        tcg_target_long val_hi = val >> 12;
+        tcg_target_long offset_hi = val_hi - pc_hi;
+        tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20));
+        tcg_out_opc_pcalau12i(s, rd, offset_hi);
+        if (lo != 0) {
+            tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
+        }
+        return;
+    }
+
+    /* Single cu52i.d case.  */
+    if (ctz64(val) >= 52) {
+        tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, val >> 52);
+        return;
+    }
+
+    /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
+    tcg_out_movi_i32(s, rd, val);
+
+    bool rd_high_bits_are_ones = (int32_t)val < 0;
+    tcg_target_long hi32 = sextreg(val, 32, 20);
+    tcg_target_long hi52 = sextreg(val, 52, 12);
+
+    if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) {
+        tcg_out_opc_cu32i_d(s, rd, hi32);
+        rd_high_bits_are_ones = hi32 < 0;
+    }
+
+    if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) {
+        tcg_out_opc_cu52i_d(s, rd, rd, hi52);
+    }
+}
+
 /*
  * Entry-points
  */
@@ -262,6 +369,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_mb(s, a0);
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
     default:
         g_assert_not_reached();
     }
-- 
2.33.0


Re: [PATCH v5 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi
Posted by Philippe Mathieu-Daudé 1 year ago
On 9/24/21 19:25, WANG Xuerui wrote:
> Signed-off-by: WANG Xuerui <git@xen0n.name>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/loongarch64/tcg-target.c.inc | 109 +++++++++++++++++++++++++++++++
>   1 file changed, 109 insertions(+)

> +/* Loads a 32-bit immediate into rd, sign-extended.  */
> +static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
> +{
> +    /* Single-instruction cases.  */
> +    tcg_target_long lo = sextreg(val, 0, 12);
> +    if (lo == val) {
> +        /* val fits in simm12: addi.w rd, zero, val */
> +        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
> +        return;
> +    }
> +    if (0x800 <= val && val <= 0xfff) {
> +        /* val fits in uimm12: ori rd, zero, val */
> +        tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
> +        return;
> +    }
> +
> +    /* High bits must be set; load with lu12i.w + optional ori.  */
> +    tcg_target_long hi12 = sextreg(val, 12, 20);

Please declare variables in function prologue.

Maybe name lo12 and hi20?

> +    tcg_out_opc_lu12i_w(s, rd, hi12);
> +    if (lo != 0) {
> +        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);

Isn't lo already 12-bit? Why the mask?

> +    }
> +}
> +
> +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
> +                         tcg_target_long val)
> +{
> +    if (type == TCG_TYPE_I32 || val == (int32_t)val) {
> +        tcg_out_movi_i32(s, rd, val);
> +        return;
> +    }
> +
> +    /* PC-relative cases.  */
> +    intptr_t pc_offset = tcg_pcrel_diff(s, (void *)val);

Declare in prologue.

> +    if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) {
> +        /* Single pcaddu2i.  */
> +        tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
> +        return;
> +    }
> +
> +    if (pc_offset == (int32_t)pc_offset) {
> +        /* Offset within 32 bits; load with pcalau12i + ori.  */
> +        tcg_target_long lo = sextreg(val, 0, 12);

Name this 'val_lo' similarly to val_hi?

> +        tcg_target_long pc_hi = (val - pc_offset) >> 12;
> +        tcg_target_long val_hi = val >> 12;
> +        tcg_target_long offset_hi = val_hi - pc_hi;
> +        tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20));
> +        tcg_out_opc_pcalau12i(s, rd, offset_hi);
> +        if (lo != 0) {
> +            tcg_out_opc_ori(s, rd, rd, lo & 0xfff);

Again, lo is already 12-bit.

> +        }
> +        return;
> +    }
> +
> +    /* Single cu52i.d case.  */
> +    if (ctz64(val) >= 52) {
> +        tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, val >> 52);
> +        return;
> +    }
> +
> +    /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
> +    tcg_out_movi_i32(s, rd, val);
> +
> +    bool rd_high_bits_are_ones = (int32_t)val < 0;

Declare in prologue, however this is hard to read. KISS:

        rd_high_bits_are_ones = (int32_t)val < 0 ? true : false;

> +    tcg_target_long hi32 = sextreg(val, 32, 20);

By 'hi32' I expect the 32 high bits. Maybe explicit as hi32_20?

> +    tcg_target_long hi52 = sextreg(val, 52, 12);

And hi52_12?

> +
> +    if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) {
> +        tcg_out_opc_cu32i_d(s, rd, hi32);
> +        rd_high_bits_are_ones = hi32 < 0;

Again KISS:

            if (hi32 < 0) {
                rd_high_bits_are_ones = true;
            }

> +    }
> +
> +    if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) {
> +        tcg_out_opc_cu52i_d(s, rd, rd, hi52);
> +    }
> +}

With comment addressed:
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

Re: [PATCH v5 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi
Posted by Richard Henderson 1 year ago
On 9/25/21 5:54 AM, Philippe Mathieu-Daudé wrote:
>> +    /* High bits must be set; load with lu12i.w + optional ori.  */
>> +    tcg_target_long hi12 = sextreg(val, 12, 20);
> 
> Please declare variables in function prologue.

Ah, true.  Officially, that's qemu coding style.  I tend to overlook it because I don't 
like the rule.


>> +    tcg_out_opc_lu12i_w(s, rd, hi12);
>> +    if (lo != 0) {
>> +        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
> 
> Isn't lo already 12-bit? Why the mask?

lo was extracted signed, for addi; ori wants the value unsigned.
The value range asserts in tcg_out_opc_* will notice the difference.

>> +    /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
>> +    tcg_out_movi_i32(s, rd, val);
>> +
>> +    bool rd_high_bits_are_ones = (int32_t)val < 0;
> 
> Declare in prologue, however this is hard to read. KISS:

If anything was to change here, I think I'd simply re-use "lo":

     lo = (int32_t)val;

and then check lo < 0 rather than rd_high_bits_are_ones as a boolean.

>> +        rd_high_bits_are_ones = hi32 < 0;
> 
> Again KISS:
> 
>             if (hi32 < 0) {
>                 rd_high_bits_are_ones = true;
>             }

This is certainly not KISS, and also wrong.  We would want an unconditional write to 
rd_high_bits_are_ones.  Although if there were any change,

     lo = hi32.

so that, again, we test lo < 0 for hi52.


r~

Re: [PATCH v5 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi
Posted by Philippe Mathieu-Daudé 1 year ago
On 9/25/21 16:04, Richard Henderson wrote:
> On 9/25/21 5:54 AM, Philippe Mathieu-Daudé wrote:
>>> +    /* High bits must be set; load with lu12i.w + optional ori.  */
>>> +    tcg_target_long hi12 = sextreg(val, 12, 20);

>>> +    tcg_out_opc_lu12i_w(s, rd, hi12);
>>> +    if (lo != 0) {
>>> +        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
>>
>> Isn't lo already 12-bit? Why the mask?
> 
> lo was extracted signed, for addi; ori wants the value unsigned.
> The value range asserts in tcg_out_opc_* will notice the difference.

I indeed missed tcg_target_long, thanks.

Re: [PATCH v5 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi
Posted by WANG Xuerui 1 year ago
Hi Philippe,

On 9/25/21 17:54, Philippe Mathieu-Daudé wrote:
> On 9/24/21 19:25, WANG Xuerui wrote:
>> Signed-off-by: WANG Xuerui <git@xen0n.name>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   tcg/loongarch64/tcg-target.c.inc | 109 +++++++++++++++++++++++++++++++
>>   1 file changed, 109 insertions(+)
>
>> +/* Loads a 32-bit immediate into rd, sign-extended.  */
>> +static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
>> +{
>> +    /* Single-instruction cases.  */
>> +    tcg_target_long lo = sextreg(val, 0, 12);
>> +    if (lo == val) {
>> +        /* val fits in simm12: addi.w rd, zero, val */
>> +        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
>> +        return;
>> +    }
>> +    if (0x800 <= val && val <= 0xfff) {
>> +        /* val fits in uimm12: ori rd, zero, val */
>> +        tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
>> +        return;
>> +    }
>> +
>> +    /* High bits must be set; load with lu12i.w + optional ori.  */
>> +    tcg_target_long hi12 = sextreg(val, 12, 20);
>
> Please declare variables in function prologue.
Sure; will fix in v6.
>
> Maybe name lo12 and hi20?
I added ASCII art to hopefully clarify the namings; originally I used 
MIPS R6 terms (low, upper, higher, top) but all the MIPS R6 instructions 
take 16-bit imm, so I figured just naming by bitfield LSB index would be 
best. The Loongson documentation people didn't invent any dedicated name 
for the 4 parts or 3 load-upper instructions, either.
>
>> +    tcg_out_opc_lu12i_w(s, rd, hi12);
>> +    if (lo != 0) {
>> +        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
>
> Isn't lo already 12-bit? Why the mask?
As Richard explained, lo is signed while ori takes unsigned imm, so this 
is necessary to not trip up the debug assert and overwrite the opcode.
>
>> +    }
>> +}
>> +
>> +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
>> +                         tcg_target_long val)
>> +{
>> +    if (type == TCG_TYPE_I32 || val == (int32_t)val) {
>> +        tcg_out_movi_i32(s, rd, val);
>> +        return;
>> +    }
>> +
>> +    /* PC-relative cases.  */
>> +    intptr_t pc_offset = tcg_pcrel_diff(s, (void *)val);
>
> Declare in prologue.
Ack; will fix.
>
>> +    if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 
>> 0) {
>> +        /* Single pcaddu2i.  */
>> +        tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
>> +        return;
>> +    }
>> +
>> +    if (pc_offset == (int32_t)pc_offset) {
>> +        /* Offset within 32 bits; load with pcalau12i + ori. */
>> +        tcg_target_long lo = sextreg(val, 0, 12);
>
> Name this 'val_lo' similarly to val_hi?
Nice catch; that was leftover from v4 where the tcg_out_movi_i32 logic 
was not factored out. Fixed in v6.
>
>> +        tcg_target_long pc_hi = (val - pc_offset) >> 12;
>> +        tcg_target_long val_hi = val >> 12;
>> +        tcg_target_long offset_hi = val_hi - pc_hi;
>> +        tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20));
>> +        tcg_out_opc_pcalau12i(s, rd, offset_hi);
>> +        if (lo != 0) {
>> +            tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
>
> Again, lo is already 12-bit.
Same as above.
>
>> +        }
>> +        return;
>> +    }
>> +
>> +    /* Single cu52i.d case.  */
>> +    if (ctz64(val) >= 52) {
>> +        tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, val >> 52);
>> +        return;
>> +    }
>> +
>> +    /* Slow path.  Initialize the low 32 bits, then concat high 
>> bits.  */
>> +    tcg_out_movi_i32(s, rd, val);
>> +
>> +    bool rd_high_bits_are_ones = (int32_t)val < 0;
>
> Declare in prologue, however this is hard to read. KISS:
>
>        rd_high_bits_are_ones = (int32_t)val < 0 ? true : false;
Hmm, comparison operators return boolean results already; and I thought 
expressions like `foo ? true : false` are typically considered to have 
"bad smell"? I think I'd want to keep the current way of saying 
things... But I'll of course move the declaration to function prologue.
>
>> +    tcg_target_long hi32 = sextreg(val, 32, 20);
>
> By 'hi32' I expect the 32 high bits. Maybe explicit as hi32_20?
>
>> +    tcg_target_long hi52 = sextreg(val, 52, 12);
>
> And hi52_12?
The names are getting long with addition of bitfield lengths; I hope the 
ASCII art in v6 could alleviate the need for longer names.
>
>> +
>> +    if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) {
>> +        tcg_out_opc_cu32i_d(s, rd, hi32);
>> +        rd_high_bits_are_ones = hi32 < 0;
>
> Again KISS:
>
>            if (hi32 < 0) {
>                rd_high_bits_are_ones = true;
>            }
>
As explained by Richard, this is indeed meant to be an unconditional 
overwrite. After concatenating the 51-to-32 bits, the topmost 12 bits is 
changed to be sign-extension of hi32, so the flag must be updated to 
reflect that.
>> +    }
>> +
>> +    if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) {
>> +        tcg_out_opc_cu52i_d(s, rd, rd, hi52);
>> +    }
>> +}
>
> With comment addressed:
> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>