On 7/1/25 09:00, Richard Henderson wrote:
> Accept byte and word extensions with the extract opcodes.
> This is preparatory to removing the specialized extracts.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/i386/tcg-target-has.h | 49 +++++++++++++++++++++++++++----
> tcg/tcg-has.h | 12 +++++---
> tcg/optimize.c | 8 +++--
> tcg/tcg-op.c | 12 +++-----
> tcg/i386/tcg-target.c.inc | 62 +++++++++++++++++++++++++++++----------
> 5 files changed, 107 insertions(+), 36 deletions(-)
>
> diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
> index 3ea2eab807..ad69f957a7 100644
> --- a/tcg/i386/tcg-target-has.h
> +++ b/tcg/i386/tcg-target-has.h
> @@ -80,7 +80,7 @@
> #define TCG_TARGET_HAS_ctpop_i64 have_popcnt
> #define TCG_TARGET_HAS_deposit_i64 1
> #define TCG_TARGET_HAS_extract_i64 1
> -#define TCG_TARGET_HAS_sextract_i64 0
> +#define TCG_TARGET_HAS_sextract_i64 1
> #define TCG_TARGET_HAS_extract2_i64 1
> #define TCG_TARGET_HAS_negsetcond_i64 1
> #define TCG_TARGET_HAS_add2_i64 1
> @@ -130,10 +130,47 @@
> (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
> #define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
>
> -/* Check for the possibility of high-byte extraction and, for 64-bit,
> - zero-extending 32-bit right-shift. */
> -#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
> -#define TCG_TARGET_extract_i64_valid(ofs, len) \
> - (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
> +/*
> + * Check for the possibility of low byte/word extraction, high-byte extraction
> + * and zero-extending 32-bit right-shift.
> + *
> + * We cannot sign-extend from high byte to 64-bits without using the
> + * REX prefix that explicitly excludes access to the high-byte registers.
> + */
> +static inline bool
> +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
> +{
> + switch (ofs) {
> + case 0:
> + switch (len) {
> + case 8:
> + case 16:
> + return true;
> + case 32:
> + return type == TCG_TYPE_I64;
> + }
> + return false;
> + case 8:
> + return len == 8 && type == TCG_TYPE_I32;
> + }
> + return false;
> +}
> +#define TCG_TARGET_sextract_valid tcg_target_sextract_valid
> +
> +static inline bool
> +tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
> +{
> + if (type == TCG_TYPE_I64 && ofs + len == 32) {
> + return true;
> + }
> + switch (ofs) {
> + case 0:
> + return len == 8 || len == 16;
> + case 8:
> + return len == 8;
> + }
> + return false;
> +}
> +#define TCG_TARGET_extract_valid tcg_target_extract_valid
>
> #endif
> diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
> index 65b6a0b0cf..8ed35be8c3 100644
> --- a/tcg/tcg-has.h
> +++ b/tcg/tcg-has.h
> @@ -56,11 +56,15 @@
> #ifndef TCG_TARGET_deposit_i64_valid
> #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
> #endif
> -#ifndef TCG_TARGET_extract_i32_valid
> -#define TCG_TARGET_extract_i32_valid(ofs, len) 1
> +#ifndef TCG_TARGET_extract_valid
> +#define TCG_TARGET_extract_valid(type, ofs, len) \
> + ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
> + : TCG_TARGET_HAS_extract_i64)
> #endif
> -#ifndef TCG_TARGET_extract_i64_valid
> -#define TCG_TARGET_extract_i64_valid(ofs, len) 1
> +#ifndef TCG_TARGET_sextract_valid
> +#define TCG_TARGET_sextract_valid(type, ofs, len) \
> + ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
> + : TCG_TARGET_HAS_sextract_i64)
> #endif
>
> /* Only one of DIV or DIV2 should be defined. */
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index c363c5c04b..cd8ad712c4 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -2362,8 +2362,10 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
> xor_opc = INDEX_op_xor_i32;
> shr_opc = INDEX_op_shr_i32;
> neg_opc = INDEX_op_neg_i32;
> - if (TCG_TARGET_extract_i32_valid(sh, 1)) {
> + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
> uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
> + }
> + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
> sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
> }
> break;
> @@ -2373,8 +2375,10 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
> xor_opc = INDEX_op_xor_i64;
> shr_opc = INDEX_op_shr_i64;
> neg_opc = INDEX_op_neg_i64;
> - if (TCG_TARGET_extract_i64_valid(sh, 1)) {
> + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
> uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
> + }
> + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
> sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
> }
> break;
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index ab5ccd8dcb..d813a7f44e 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -1014,8 +1014,7 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
> return;
> }
>
> - if (TCG_TARGET_HAS_extract_i32
> - && TCG_TARGET_extract_i32_valid(ofs, len)) {
> + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) {
> tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
> return;
> }
> @@ -1077,8 +1076,7 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
> }
> }
>
> - if (TCG_TARGET_HAS_sextract_i32
> - && TCG_TARGET_extract_i32_valid(ofs, len)) {
> + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) {
> tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
> return;
> }
> @@ -2811,8 +2809,7 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
> goto do_shift_and;
> }
>
> - if (TCG_TARGET_HAS_extract_i64
> - && TCG_TARGET_extract_i64_valid(ofs, len)) {
> + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
> tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
> return;
> }
> @@ -2917,8 +2914,7 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
> return;
> }
>
> - if (TCG_TARGET_HAS_sextract_i64
> - && TCG_TARGET_extract_i64_valid(ofs, len)) {
> + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) {
> tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
> return;
> }
> diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
> index 047c5da81c..afff56956f 100644
> --- a/tcg/i386/tcg-target.c.inc
> +++ b/tcg/i386/tcg-target.c.inc
> @@ -3036,6 +3036,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
>
> case INDEX_op_extract_i64:
> if (a2 + args[3] == 32) {
> + if (a2 == 0) {
> + tcg_out_ext32u(s, a0, a1);
> + break;
> + }
> /* This is a 32-bit zero-extending right shift. */
> tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
> tcg_out_shifti(s, SHIFT_SHR, a0, a2);
> @@ -3043,28 +3047,53 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
> }
> /* FALLTHRU */
> case INDEX_op_extract_i32:
> - /* On the off-chance that we can use the high-byte registers.
> - Otherwise we emit the same ext16 + shift pattern that we
> - would have gotten from the normal tcg-op.c expansion. */
> - tcg_debug_assert(a2 == 8 && args[3] == 8);
> - if (a1 < 4 && a0 < 8) {
> - tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
> - } else {
> + if (a2 == 0 && args[3] == 8) {
> + tcg_out_ext8u(s, a0, a1);
> + } else if (a2 == 0 && args[3] == 16) {
> tcg_out_ext16u(s, a0, a1);
> - tcg_out_shifti(s, SHIFT_SHR, a0, 8);
> + } else if (a2 == 8 && args[3] == 8) {
> + /*
> + * On the off-chance that we can use the high-byte registers.
> + * Otherwise we emit the same ext16 + shift pattern that we
> + * would have gotten from the normal tcg-op.c expansion.
> + */
> + if (a1 < 4 && a0 < 8) {
> + tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
> + } else {
> + tcg_out_ext16u(s, a0, a1);
> + tcg_out_shifti(s, SHIFT_SHR, a0, 8);
> + }
> + } else {
> + g_assert_not_reached();
> + }
> + break;
> +
> + case INDEX_op_sextract_i64:
> + if (a2 == 0 && args[3] == 8) {
> + tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
> + } else if (a2 == 0 && args[3] == 16) {
> + tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
> + } else if (a2 == 0 && args[3] == 32) {
> + tcg_out_ext32s(s, a0, a1);
> + } else {
> + g_assert_not_reached();
> }
> break;
>
> case INDEX_op_sextract_i32:
> - /* We don't implement sextract_i64, as we cannot sign-extend to
> - 64-bits without using the REX prefix that explicitly excludes
> - access to the high-byte registers. */
> - tcg_debug_assert(a2 == 8 && args[3] == 8);
> - if (a1 < 4 && a0 < 8) {
> - tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
> - } else {
> + if (a2 == 0 && args[3] == 8) {
> + tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
> + } else if (a2 == 0 && args[3] == 16) {
> tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
> - tcg_out_shifti(s, SHIFT_SAR, a0, 8);
> + } else if (a2 == 8 && args[3] == 8) {
> + if (a1 < 4 && a0 < 8) {
> + tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
> + } else {
> + tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
> + tcg_out_shifti(s, SHIFT_SAR, a0, 8);
> + }
> + } else {
> + g_assert_not_reached();
> }
> break;
>
> @@ -3747,6 +3776,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
> case INDEX_op_extract_i32:
> case INDEX_op_extract_i64:
> case INDEX_op_sextract_i32:
> + case INDEX_op_sextract_i64:
> case INDEX_op_ctpop_i32:
> case INDEX_op_ctpop_i64:
> return C_O1_I1(r, r);
To the best of my knowledge,
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>