Add registers and function stubs. The functionality
is disabled via use_neon_instructions defined to 0.
We must still include results for the mandatory opcodes in
tcg_target_op_def, as all opcodes are checked during tcg init.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/arm/tcg-target-con-set.h | 4 ++
tcg/arm/tcg-target-con-str.h | 1 +
tcg/arm/tcg-target.h | 48 ++++++++++++--
tcg/arm/tcg-target.opc.h | 12 ++++
tcg/arm/tcg-target.c.inc | 124 ++++++++++++++++++++++++++++++-----
5 files changed, 165 insertions(+), 24 deletions(-)
create mode 100644 tcg/arm/tcg-target.opc.h
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
index ab63e089c2..27aced5391 100644
--- a/tcg/arm/tcg-target-con-set.h
+++ b/tcg/arm/tcg-target-con-set.h
@@ -13,11 +13,14 @@ C_O0_I1(r)
C_O0_I2(r, r)
C_O0_I2(r, rIN)
C_O0_I2(s, s)
+C_O0_I2(w, r)
C_O0_I3(s, s, s)
C_O0_I4(r, r, rI, rI)
C_O0_I4(s, s, s, s)
C_O1_I1(r, l)
C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, wr)
C_O1_I2(r, 0, rZ)
C_O1_I2(r, l, l)
C_O1_I2(r, r, r)
@@ -26,6 +29,7 @@ C_O1_I2(r, r, rIK)
C_O1_I2(r, r, rIN)
C_O1_I2(r, r, ri)
C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, w, w)
C_O1_I4(r, r, r, rI, rI)
C_O1_I4(r, r, rIN, rIK, 0)
C_O2_I1(r, r, l)
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
index a0ab7747db..255a1ae0e2 100644
--- a/tcg/arm/tcg-target-con-str.h
+++ b/tcg/arm/tcg-target-con-str.h
@@ -11,6 +11,7 @@
REGS('r', ALL_GENERAL_REGS)
REGS('l', ALL_QLOAD_REGS)
REGS('s', ALL_QSTORE_REGS)
+REGS('w', ALL_VECTOR_REGS)
/*
* Define constraint letters for constants:
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 8d1fee6327..a9dc09bd08 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -78,19 +78,38 @@ typedef enum {
TCG_REG_R13,
TCG_REG_R14,
TCG_REG_PC,
+
+ TCG_REG_Q0,
+ TCG_REG_Q1,
+ TCG_REG_Q2,
+ TCG_REG_Q3,
+ TCG_REG_Q4,
+ TCG_REG_Q5,
+ TCG_REG_Q6,
+ TCG_REG_Q7,
+ TCG_REG_Q8,
+ TCG_REG_Q9,
+ TCG_REG_Q10,
+ TCG_REG_Q11,
+ TCG_REG_Q12,
+ TCG_REG_Q13,
+ TCG_REG_Q14,
+ TCG_REG_Q15,
+
+ TCG_AREG0 = TCG_REG_R6,
+ TCG_REG_CALL_STACK = TCG_REG_R13,
} TCGReg;
-#define TCG_TARGET_NB_REGS 16
+#define TCG_TARGET_NB_REGS 32
#ifdef __ARM_ARCH_EXT_IDIV__
#define use_idiv_instructions 1
#else
extern bool use_idiv_instructions;
#endif
-
+#define use_neon_instructions 0
/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_R13
#define TCG_TARGET_STACK_ALIGN 8
#define TCG_TARGET_CALL_ALIGN_ARGS 1
#define TCG_TARGET_CALL_STACK_OFFSET 0
@@ -128,9 +147,26 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_direct_jump 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
-enum {
- TCG_AREG0 = TCG_REG_R6,
-};
+#define TCG_TARGET_HAS_v64 use_neon_instructions
+#define TCG_TARGET_HAS_v128 use_neon_instructions
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 0
+#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_not_vec 0
+#define TCG_TARGET_HAS_neg_vec 0
+#define TCG_TARGET_HAS_abs_vec 0
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_shi_vec 0
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_mul_vec 0
+#define TCG_TARGET_HAS_sat_vec 0
+#define TCG_TARGET_HAS_minmax_vec 0
+#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
new file mode 100644
index 0000000000..7a4578e9b4
--- /dev/null
+++ b/tcg/arm/tcg-target.opc.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2019 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index eb4f42e53d..9bb354abce 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -40,22 +40,10 @@ bool use_idiv_instructions;
#ifdef CONFIG_DEBUG_TCG
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
- "%r0",
- "%r1",
- "%r2",
- "%r3",
- "%r4",
- "%r5",
- "%r6",
- "%r7",
- "%r8",
- "%r9",
- "%r10",
- "%r11",
- "%r12",
- "%r13",
- "%r14",
- "%pc",
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%sp", "%r14", "%pc",
+ "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7",
+ "%q8", "%q9", "%q10", "%q11", "%q12", "%q13", "%q14", "%q15",
};
#endif
@@ -75,6 +63,23 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R3,
TCG_REG_R12,
TCG_REG_R14,
+
+ TCG_REG_Q0,
+ TCG_REG_Q1,
+ TCG_REG_Q2,
+ TCG_REG_Q3,
+ TCG_REG_Q4,
+ TCG_REG_Q5,
+ TCG_REG_Q6,
+ TCG_REG_Q7,
+ TCG_REG_Q8,
+ TCG_REG_Q9,
+ TCG_REG_Q10,
+ TCG_REG_Q11,
+ TCG_REG_Q12,
+ TCG_REG_Q13,
+ TCG_REG_Q14,
+ TCG_REG_Q15,
};
static const int tcg_target_call_iarg_regs[4] = {
@@ -85,6 +90,7 @@ static const int tcg_target_call_oarg_regs[2] = {
};
#define TCG_REG_TMP TCG_REG_R12
+#define TCG_VEC_TMP TCG_REG_Q15
enum arm_cond_code_e {
COND_EQ = 0x0,
@@ -238,6 +244,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
#define TCG_CT_CONST_ZERO 0x800
#define ALL_GENERAL_REGS 0xffffu
+#define ALL_VECTOR_REGS 0xffff0000u
/*
* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
@@ -2117,6 +2124,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_i64:
return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
+ case INDEX_op_st_vec:
+ return C_O0_I2(w, r);
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(w, r);
+ case INDEX_op_dup_vec:
+ return C_O1_I1(w, wr);
+ case INDEX_op_dup2_vec:
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(w, w, w);
+
default:
g_assert_not_reached();
}
@@ -2126,12 +2149,18 @@ static void tcg_target_init(TCGContext *s)
{
/* Only probe for the platform and capabilities if we havn't already
determined maximum values at compile time. */
-#ifndef use_idiv_instructions
+#if !defined(use_idiv_instructions) || !defined(use_neon_instructions)
{
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+#ifndef use_idiv_instructions
use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
+#endif
+#ifndef use_neon_instructions
+ use_neon_instructions = (hwcap & HWCAP_ARM_NEON) != 0;
+#endif
}
#endif
+
if (__ARM_ARCH < 7) {
const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
@@ -2139,7 +2168,7 @@ static void tcg_target_init(TCGContext *s)
}
}
- tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
+ tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
tcg_target_call_clobber_regs = 0;
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
@@ -2149,10 +2178,33 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+ if (use_neon_instructions) {
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q7);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
+ }
+
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
}
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
@@ -2186,6 +2238,42 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
tcg_out_movi32(s, COND_AL, ret, arg);
}
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, TCGReg rs)
+{
+ g_assert_not_reached();
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, TCGReg base, intptr_t offset)
+{
+ g_assert_not_reached();
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, int64_t v64)
+{
+ g_assert_not_reached();
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg *args, const int *const_args)
+{
+ g_assert_not_reached();
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ return 0;
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ g_assert_not_reached();
+}
+
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;
--
2.25.1
On Mon, 8 Feb 2021 at 03:48, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Add registers and function stubs. The functionality
> is disabled via use_neon_instructions defined to 0.
>
> We must still include results for the mandatory opcodes in
> tcg_target_op_def, as all opcodes are checked during tcg init.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> + if (use_neon_instructions) {
> + tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
> + tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
> +
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q4);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q5);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q6);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q7);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
> + }
The AAPCS says that q4-q7 are preserved across calls.
Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
thanks
-- PMM
On Mon, 8 Feb 2021 at 17:53, Peter Maydell <peter.maydell@linaro.org> wrote: > The AAPCS says that q4-q7 are preserved across calls. Speaking of which, doesn't that mean we also need to save and restore q4-q7 in tcg_target_qemu_prologue() if we might be generating neon insns? (It doesn't look like aarch64's prologue does this, which seems like a bug.) thanks -- PMM
On 2/8/21 10:28 AM, Peter Maydell wrote: > On Mon, 8 Feb 2021 at 17:53, Peter Maydell <peter.maydell@linaro.org> wrote: >> The AAPCS says that q4-q7 are preserved across calls. > > Speaking of which, doesn't that mean we also need to > save and restore q4-q7 in tcg_target_qemu_prologue() > if we might be generating neon insns? (It doesn't look like > aarch64's prologue does this, which seems like a bug.) I just put them on the reserved list so that they don't get used. > tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); ... > tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); We have way more vector registers than TCG will currently use, and we have to assume all helpers can modify env->memory, so avoiding the call-saved ones is best. r~
On Mon, 8 Feb 2021 at 18:58, Richard Henderson <richard.henderson@linaro.org> wrote: > > On 2/8/21 10:28 AM, Peter Maydell wrote: > > On Mon, 8 Feb 2021 at 17:53, Peter Maydell <peter.maydell@linaro.org> wrote: > >> The AAPCS says that q4-q7 are preserved across calls. > > > > Speaking of which, doesn't that mean we also need to > > save and restore q4-q7 in tcg_target_qemu_prologue() > > if we might be generating neon insns? (It doesn't look like > > aarch64's prologue does this, which seems like a bug.) > > I just put them on the reserved list so that they don't get used. > > > tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8); > ... > > tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15); I'm confused. That's not the reserved list, it's the call-clobber list, and it's not putting the registers on the list, it's taking them off it. (The code looks correct for setting up the clobber list: we start with "everything is clobbered" and then remove the callee-saves regs. But the AArch64 PCS says that only the bottom 64-bits of v8-v15 are saved, so in fact I don't think we can mark them as not-call-clobbers unless we're forcing the vector code to only use 64-bits of vector registers.) The only things on the reserved_regs list for aarch64 are integer regs as far as I can see (sp, fp, x18, x30, v31). -- PMM
On 2/8/21 11:30 AM, Peter Maydell wrote:
> On Mon, 8 Feb 2021 at 18:58, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 2/8/21 10:28 AM, Peter Maydell wrote:
>>> On Mon, 8 Feb 2021 at 17:53, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>> The AAPCS says that q4-q7 are preserved across calls.
>>>
>>> Speaking of which, doesn't that mean we also need to
>>> save and restore q4-q7 in tcg_target_qemu_prologue()
>>> if we might be generating neon insns? (It doesn't look like
>>> aarch64's prologue does this, which seems like a bug.)
>>
>> I just put them on the reserved list so that they don't get used.
>>
>>> tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
>> ...
>>> tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
>
> I'm confused. That's not the reserved list, it's the call-clobber
> list
Oops. It's actually done by not adding them to tcg_target_reg_alloc_order.
/* V8 - V15 are call-saved, and skipped. */
Which works as well, I suppose. I dunno which makes more sense.
r~
© 2016 - 2025 Red Hat, Inc.