+Ilya/David
On 10/1/24 23:44, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/s390x/tcg-target.h | 2 +-
> tcg/s390x/tcg-target.c.inc | 139 +++++++++++++++++++++++++------------
> 2 files changed, 97 insertions(+), 44 deletions(-)
>
> diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
> index 53bed8c8d2..ae448c3a3a 100644
> --- a/tcg/s390x/tcg-target.h
> +++ b/tcg/s390x/tcg-target.h
> @@ -138,7 +138,7 @@ extern uint64_t s390_facilities[3];
>
> #define TCG_TARGET_HAS_qemu_ldst_i128 1
>
> -#define TCG_TARGET_HAS_tst 0
> +#define TCG_TARGET_HAS_tst 1
>
> #define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
> #define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
> diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
> index 86ec737768..cb1693c9cf 100644
> --- a/tcg/s390x/tcg-target.c.inc
> +++ b/tcg/s390x/tcg-target.c.inc
> @@ -112,6 +112,9 @@ typedef enum S390Opcode {
> RI_OILH = 0xa50a,
> RI_OILL = 0xa50b,
> RI_TMLL = 0xa701,
> + RI_TMLH = 0xa700,
> + RI_TMHL = 0xa703,
> + RI_TMHH = 0xa702,
>
> RIEb_CGRJ = 0xec64,
> RIEb_CLGRJ = 0xec65,
> @@ -404,10 +407,15 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
> #define S390_CC_NEVER 0
> #define S390_CC_ALWAYS 15
>
> +#define S390_TM_EQ 8 /* CC == 0 */
> +#define S390_TM_NE 7 /* CC in {1,2,3} */
> +
> /* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
> -static const uint8_t tcg_cond_to_s390_cond[] = {
> +static const uint8_t tcg_cond_to_s390_cond[16] = {
> [TCG_COND_EQ] = S390_CC_EQ,
> [TCG_COND_NE] = S390_CC_NE,
> + [TCG_COND_TSTEQ] = S390_CC_EQ,
> + [TCG_COND_TSTNE] = S390_CC_NE,
> [TCG_COND_LT] = S390_CC_LT,
> [TCG_COND_LE] = S390_CC_LE,
> [TCG_COND_GT] = S390_CC_GT,
> @@ -421,9 +429,11 @@ static const uint8_t tcg_cond_to_s390_cond[] = {
> /* Condition codes that result from a LOAD AND TEST. Here, we have no
> unsigned instruction variation, however since the test is vs zero we
> can re-map the outcomes appropriately. */
> -static const uint8_t tcg_cond_to_ltr_cond[] = {
> +static const uint8_t tcg_cond_to_ltr_cond[16] = {
> [TCG_COND_EQ] = S390_CC_EQ,
> [TCG_COND_NE] = S390_CC_NE,
> + [TCG_COND_TSTEQ] = S390_CC_ALWAYS,
> + [TCG_COND_TSTNE] = S390_CC_NEVER,
> [TCG_COND_LT] = S390_CC_LT,
> [TCG_COND_LE] = S390_CC_LE,
> [TCG_COND_GT] = S390_CC_GT,
> @@ -542,10 +552,13 @@ static bool risbg_mask(uint64_t c)
> static bool tcg_target_const_match(int64_t val, int ct,
> TCGType type, TCGCond cond, int vece)
> {
> + uint64_t uval = val;
> +
> if (ct & TCG_CT_CONST) {
> return true;
> }
> if (type == TCG_TYPE_I32) {
> + uval = (uint32_t)val;
> val = (int32_t)val;
> }
>
> @@ -567,6 +580,15 @@ static bool tcg_target_const_match(int64_t val, int ct,
> case TCG_COND_GTU:
> ct |= TCG_CT_CONST_U32; /* CLGFI */
> break;
> + case TCG_COND_TSTNE:
> + case TCG_COND_TSTEQ:
> + if (is_const_p16(uval) >= 0) {
> + return true; /* TMxx */
> + }
> + if (risbg_mask(uval)) {
> + return true; /* RISBG */
> + }
> + break;
> default:
> g_assert_not_reached();
> }
> @@ -588,10 +610,6 @@ static bool tcg_target_const_match(int64_t val, int ct,
> if (ct & TCG_CT_CONST_INV) {
> val = ~val;
> }
> - /*
> - * Note that is_const_p16 is a subset of is_const_p32,
> - * so we don't need both constraints.
> - */
> if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
> return true;
> }
> @@ -868,6 +886,9 @@ static const S390Opcode oi_insns[4] = {
> static const S390Opcode lif_insns[2] = {
> RIL_LLILF, RIL_LLIHF,
> };
> +static const S390Opcode tm_insns[4] = {
> + RI_TMLL, RI_TMLH, RI_TMHL, RI_TMHH
> +};
>
> /* load a register with an immediate value */
> static void tcg_out_movi(TCGContext *s, TCGType type,
> @@ -1228,6 +1249,36 @@ static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
> TCGCond inv_c = tcg_invert_cond(c);
> S390Opcode op;
>
> + if (is_tst_cond(c)) {
> + tcg_debug_assert(!need_carry);
> +
> + if (!c2const) {
> + if (type == TCG_TYPE_I32) {
> + tcg_out_insn(s, RRFa, NRK, TCG_REG_R0, r1, c2);
> + } else {
> + tcg_out_insn(s, RRFa, NGRK, TCG_REG_R0, r1, c2);
> + }
> + goto exit;
> + }
> +
> + if (type == TCG_TYPE_I32) {
> + c2 = (uint32_t)c2;
> + }
> +
> + int i = is_const_p16(c2);
> + if (i >= 0) {
> + tcg_out_insn_RI(s, tm_insns[i], r1, c2 >> (i * 16));
> + *inv_cc = TCG_COND_TSTEQ ? S390_TM_NE : S390_TM_EQ;
> + return *inv_cc ^ 15;
> + }
> +
> + if (risbg_mask(c2)) {
> + tgen_andi_risbg(s, TCG_REG_R0, r1, c2);
> + goto exit;
> + }
> + g_assert_not_reached();
> + }
> +
> if (c2const) {
> if (c2 == 0) {
> if (!(is_unsigned && need_carry)) {
> @@ -1553,46 +1604,49 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
> TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
> {
> int cc;
> - bool is_unsigned = is_unsigned_cond(c);
> - bool in_range;
> - S390Opcode opc;
>
> - cc = tcg_cond_to_s390_cond[c];
> + if (!is_tst_cond(c)) {
> + bool is_unsigned = is_unsigned_cond(c);
> + bool in_range;
> + S390Opcode opc;
>
> - if (!c2const) {
> - opc = (type == TCG_TYPE_I32
> - ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
> - : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
> - tgen_compare_branch(s, opc, cc, r1, c2, l);
> - return;
> - }
> + cc = tcg_cond_to_s390_cond[c];
>
> - /*
> - * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
> - * If the immediate we've been given does not fit that range, we'll
> - * fall back to separate compare and branch instructions using the
> - * larger comparison range afforded by COMPARE IMMEDIATE.
> - */
> - if (type == TCG_TYPE_I32) {
> - if (is_unsigned) {
> - opc = RIEc_CLIJ;
> - in_range = (uint32_t)c2 == (uint8_t)c2;
> - } else {
> - opc = RIEc_CIJ;
> - in_range = (int32_t)c2 == (int8_t)c2;
> + if (!c2const) {
> + opc = (type == TCG_TYPE_I32
> + ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
> + : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
> + tgen_compare_branch(s, opc, cc, r1, c2, l);
> + return;
> }
> - } else {
> - if (is_unsigned) {
> - opc = RIEc_CLGIJ;
> - in_range = (uint64_t)c2 == (uint8_t)c2;
> +
> + /*
> + * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
> + * If the immediate we've been given does not fit that range, we'll
> + * fall back to separate compare and branch instructions using the
> + * larger comparison range afforded by COMPARE IMMEDIATE.
> + */
> + if (type == TCG_TYPE_I32) {
> + if (is_unsigned) {
> + opc = RIEc_CLIJ;
> + in_range = (uint32_t)c2 == (uint8_t)c2;
> + } else {
> + opc = RIEc_CIJ;
> + in_range = (int32_t)c2 == (int8_t)c2;
> + }
> } else {
> - opc = RIEc_CGIJ;
> - in_range = (int64_t)c2 == (int8_t)c2;
> + if (is_unsigned) {
> + opc = RIEc_CLGIJ;
> + in_range = (uint64_t)c2 == (uint8_t)c2;
> + } else {
> + opc = RIEc_CGIJ;
> + in_range = (int64_t)c2 == (int8_t)c2;
> + }
> + }
> + if (in_range) {
> + tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
> + return;
> }
> - }
> - if (in_range) {
> - tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
> - return;
> }
>
> cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
> @@ -1871,11 +1925,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
> ldst->oi = oi;
> ldst->addrlo_reg = addr_reg;
>
> - /* We are expecting a_bits to max out at 7, much lower than TMLL. */
> tcg_debug_assert(a_mask <= 0xffff);
> tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
>
> - tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
> + tcg_out16(s, RI_BRC | (S390_TM_NE << 4));
> ldst->label_ptr[0] = s->code_ptr++;
> }
>
> @@ -1956,7 +2009,7 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
> l2 = gen_new_label();
>
> tcg_out_insn(s, RI, TMLL, addr_reg, 15);
> - tgen_branch(s, 7, l1); /* CC in {1,2,3} */
> + tgen_branch(s, S390_TM_NE, l1);
> }
>
> tcg_debug_assert(!need_bswap);