tcg/loongarch64/tcg-target.c.inc | 53 +++++++++++++++++++++++++++++--- tcg/loongarch64/tcg-target.h | 3 +- 2 files changed, 49 insertions(+), 7 deletions(-)
Similar to the ARM64, LoongArch has PC-relative instructions such as
PCADDU18I. These instructions can be used to support direct jump for
LoongArch. Additionally, if instruction "B offset" can cover the target
address(target is within ±128MB range), a single "B offset" plus a nop
will be used by "tb_target_set_jump_target".
Signed-off-by: Qi Hu <huqi@loongson.cn>
---
tcg/loongarch64/tcg-target.c.inc | 53 +++++++++++++++++++++++++++++---
tcg/loongarch64/tcg-target.h | 3 +-
2 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index f5a214a17f..9f9508836a 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1031,6 +1031,36 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
#endif
}
+/* LoongArch use `andi zero, zero, 0` as NOP. */
+#define NOP OPC_ANDI
+static void tcg_out_nop(TCGContext *s)
+{
+ tcg_out32(s, NOP);
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
+{
+ tcg_insn_unit i1, i2;
+ ptrdiff_t upper, lower;
+ ptrdiff_t offset = (addr - jmp_rx) >> 2;
+
+ if (offset == sextreg(offset, 0, 28)) {
+ i1 = encode_sd10k16_insn(OPC_B, offset);
+ i2 = NOP;
+ } else {
+ upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
+ lower = (offset & 0xffff);
+ /* patch pcaddu18i */
+ i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_T0, upper);
+ /* patch jirl */
+ i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_T0, lower);
+ }
+ uint64_t pair = ((uint64_t)i2 << 32) | i1;
+ qatomic_set((uint64_t *)jmp_rw, pair);
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
+
/*
* Entry-points
*/
@@ -1058,11 +1088,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_tb:
- assert(s->tb_jmp_insn_offset == 0);
- /* indirect jump method */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
- (uintptr_t)(s->tb_jmp_target_addr + a0));
- tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+ if (s->tb_jmp_insn_offset != NULL) {
+ /* TCG_TARGET_HAS_direct_jump */
+ /* Ensure that "patch area" are 8-byte aligned so that an
+ atomic write can be used to patch the target address. */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out_nop(s);
+ }
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ /* actual branch destination will be patched by
+ tb_target_set_jmp_target later */
+ tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
+ tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+ } else {
+ /* !TCG_TARGET_HAS_direct_jump */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+ (uintptr_t)(s->tb_jmp_target_addr + a0));
+ tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+ }
set_jmp_reset_offset(s, a0);
break;
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 67380b2432..c008d5686d 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -123,7 +123,7 @@ typedef enum {
#define TCG_TARGET_HAS_clz_i32 1
#define TCG_TARGET_HAS_ctz_i32 1
#define TCG_TARGET_HAS_ctpop_i32 0
-#define TCG_TARGET_HAS_direct_jump 0
+#define TCG_TARGET_HAS_direct_jump 1
#define TCG_TARGET_HAS_brcond2 0
#define TCG_TARGET_HAS_setcond2 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
@@ -166,7 +166,6 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
-/* not defined -- call should be eliminated at compile time */
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#define TCG_TARGET_DEFAULT_MO (0)
--
2.37.3
On 10/13/22 20:01, Qi Hu wrote:
> Similar to the ARM64, LoongArch has PC-relative instructions such as
> PCADDU18I. These instructions can be used to support direct jump for
> LoongArch. Additionally, if instruction "B offset" can cover the target
> address(target is within ±128MB range), a single "B offset" plus a nop
> will be used by "tb_target_set_jump_target".
>
> Signed-off-by: Qi Hu <huqi@loongson.cn>
First, when sending a v2, do not reply to a different thread.
This confuses our patch tracking tools like patchew.org.
> +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
> + uintptr_t jmp_rw, uintptr_t addr)
> +{
> + tcg_insn_unit i1, i2;
> + ptrdiff_t upper, lower;
> + ptrdiff_t offset = (addr - jmp_rx) >> 2;
> +
> + if (offset == sextreg(offset, 0, 28)) {
> + i1 = encode_sd10k16_insn(OPC_B, offset);
> + i2 = NOP;
> + } else {
> + upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
> + lower = (offset & 0xffff);
This computation is incorrect, cropping the values to unsigned.
This will assert inside
> + /* patch pcaddu18i */
> + i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_T0, upper);
> + /* patch jirl */
> + i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_T0, lower);
these encoding functions. You want
lower = (int16_t)offset;
upper = (offset - lower) >> 16;
Wang Xuerui asked you to remove the redundant comments there, which give no more
information than the code itself.
> @@ -1058,11 +1088,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> break;
>
> case INDEX_op_goto_tb:
> - assert(s->tb_jmp_insn_offset == 0);
> - /* indirect jump method */
> - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
> - (uintptr_t)(s->tb_jmp_target_addr + a0));
> - tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> + if (s->tb_jmp_insn_offset != NULL) {
> + /* TCG_TARGET_HAS_direct_jump */
> + /* Ensure that "patch area" are 8-byte aligned so that an
> + atomic write can be used to patch the target address. */
> + if ((uintptr_t)s->code_ptr & 7) {
> + tcg_out_nop(s);
> + }
> + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
> + /* actual branch destination will be patched by
> + tb_target_set_jmp_target later */
> + tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
> + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> + } else {
> + /* !TCG_TARGET_HAS_direct_jump */
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
> + (uintptr_t)(s->tb_jmp_target_addr + a0));
> + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
> + }
Your comment formatting does not follow our coding style. It must be
/*
* Comment with
* multiple lines.
*/
There is a tool, ./scripts/check_patch.pl, that will diagnose this error.
> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
> index 67380b2432..c008d5686d 100644
> --- a/tcg/loongarch64/tcg-target.h
> +++ b/tcg/loongarch64/tcg-target.h
> @@ -123,7 +123,7 @@ typedef enum {
> #define TCG_TARGET_HAS_clz_i32 1
> #define TCG_TARGET_HAS_ctz_i32 1
> #define TCG_TARGET_HAS_ctpop_i32 0
> -#define TCG_TARGET_HAS_direct_jump 0
> +#define TCG_TARGET_HAS_direct_jump 1
> #define TCG_TARGET_HAS_brcond2 0
> #define TCG_TARGET_HAS_setcond2 0
> #define TCG_TARGET_HAS_qemu_st8_i32 0
> @@ -166,7 +166,6 @@ typedef enum {
> #define TCG_TARGET_HAS_muluh_i64 1
> #define TCG_TARGET_HAS_mulsh_i64 1
>
> -/* not defined -- call should be eliminated at compile time */
> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
>
> #define TCG_TARGET_DEFAULT_MO (0)
You are missing a change to
#define MAX_CODE_GEN_BUFFER_SIZE SIZE_MAX
The branch distance with pcaddu18i is +/- 37 bits (128GB) not 64 bits.
r~
On 2022/10/14 02:52, Richard Henderson wrote:
> On 10/13/22 20:01, Qi Hu wrote:
>> Similar to the ARM64, LoongArch has PC-relative instructions such as
>> PCADDU18I. These instructions can be used to support direct jump for
>> LoongArch. Additionally, if instruction "B offset" can cover the target
>> address(target is within ±128MB range), a single "B offset" plus a nop
>> will be used by "tb_target_set_jump_target".
>>
>> Signed-off-by: Qi Hu <huqi@loongson.cn>
>
> First, when sending a v2, do not reply to a different thread.
> This confuses our patch tracking tools like patchew.org.
>
Ok, I will pay attention about that.
>> +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
>> + uintptr_t jmp_rw, uintptr_t addr)
>> +{
>> + tcg_insn_unit i1, i2;
>> + ptrdiff_t upper, lower;
>> + ptrdiff_t offset = (addr - jmp_rx) >> 2;
>> +
>> + if (offset == sextreg(offset, 0, 28)) {
>> + i1 = encode_sd10k16_insn(OPC_B, offset);
>> + i2 = NOP;
>> + } else {
>> + upper = ((offset + (1 << 15)) >> 16) & 0xfffff;
>> + lower = (offset & 0xffff);
>
> This computation is incorrect, cropping the values to unsigned.
> This will assert inside
>
>> + /* patch pcaddu18i */
>> + i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_T0, upper);
>> + /* patch jirl */
>> + i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_T0,
>> lower);
>
> these encoding functions. You want
>
> lower = (int16_t)offset;
> upper = (offset - lower) >> 16;
Yes, thanks for your advise.
>
> Wang Xuerui asked you to remove the redundant comments there, which
> give no more information than the code itself.
I will remove these comments.
>
>> @@ -1058,11 +1088,24 @@ static void tcg_out_op(TCGContext *s,
>> TCGOpcode opc,
>> break;
>> case INDEX_op_goto_tb:
>> - assert(s->tb_jmp_insn_offset == 0);
>> - /* indirect jump method */
>> - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
>> - (uintptr_t)(s->tb_jmp_target_addr + a0));
>> - tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> + if (s->tb_jmp_insn_offset != NULL) {
>> + /* TCG_TARGET_HAS_direct_jump */
>> + /* Ensure that "patch area" are 8-byte aligned so that an
>> + atomic write can be used to patch the target address. */
>> + if ((uintptr_t)s->code_ptr & 7) {
>> + tcg_out_nop(s);
>> + }
>> + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
>> + /* actual branch destination will be patched by
>> + tb_target_set_jmp_target later */
>> + tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
>> + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> + } else {
>> + /* !TCG_TARGET_HAS_direct_jump */
>> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
>> + (uintptr_t)(s->tb_jmp_target_addr + a0));
>> + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
>> + }
>
> Your comment formatting does not follow our coding style. It must be
>
> /*
> * Comment with
> * multiple lines.
> */
>
> There is a tool, ./scripts/check_patch.pl, that will diagnose this error.
I will modify these comments.
>
>> diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
>> index 67380b2432..c008d5686d 100644
>> --- a/tcg/loongarch64/tcg-target.h
>> +++ b/tcg/loongarch64/tcg-target.h
>> @@ -123,7 +123,7 @@ typedef enum {
>> #define TCG_TARGET_HAS_clz_i32 1
>> #define TCG_TARGET_HAS_ctz_i32 1
>> #define TCG_TARGET_HAS_ctpop_i32 0
>> -#define TCG_TARGET_HAS_direct_jump 0
>> +#define TCG_TARGET_HAS_direct_jump 1
>> #define TCG_TARGET_HAS_brcond2 0
>> #define TCG_TARGET_HAS_setcond2 0
>> #define TCG_TARGET_HAS_qemu_st8_i32 0
>> @@ -166,7 +166,6 @@ typedef enum {
>> #define TCG_TARGET_HAS_muluh_i64 1
>> #define TCG_TARGET_HAS_mulsh_i64 1
>> -/* not defined -- call should be eliminated at compile time */
>> void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t,
>> uintptr_t);
>> #define TCG_TARGET_DEFAULT_MO (0)
>
> You are missing a change to
>
> #define MAX_CODE_GEN_BUFFER_SIZE SIZE_MAX
>
> The branch distance with pcaddu18i is +/- 37 bits (128GB) not 64 bits.
>
Oh, thanks for your reminder. I will add this.
>
> r~
I will send V3 after editing these.
Thanks.
Qi
© 2016 - 2026 Red Hat, Inc.