On 12/19/24 20:10, Richard Henderson wrote:
> Avoid the use of the OptContext slots.
>
> Be careful not to call fold_masks_zs when the memory operation
> is wide enough to require multiple outputs, so split into two
> functions: fold_qemu_ld_1reg and fold_qemu_ld_2reg.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/optimize.c | 28 ++++++++++++++++++++++------
> 1 file changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index da9c8c4669..b01929fccf 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -2127,24 +2127,33 @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
> return fold_masks_s(ctx, op, s_mask);
> }
>
> -static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
> +static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
> {
> const TCGOpDef *def = &tcg_op_defs[op->opc];
> MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
> MemOp mop = get_memop(oi);
> int width = 8 * memop_size(mop);
> + uint64_t z_mask = -1, s_mask = 0;
>
> if (width < 64) {
> - ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
> + s_mask = MAKE_64BIT_MASK(width, 64 - width);
> if (!(mop & MO_SIGN)) {
> - ctx->z_mask = MAKE_64BIT_MASK(0, width);
> - ctx->s_mask <<= 1;
> + z_mask = MAKE_64BIT_MASK(0, width);
> + s_mask <<= 1;
> }
> }
>
> /* Opcodes that touch guest memory stop the mb optimization. */
> ctx->prev_mb = NULL;
> - return false;
> +
> + return fold_masks_zs(ctx, op, z_mask, s_mask);
> +}
> +
> +static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
> +{
> + /* Opcodes that touch guest memory stop the mb optimization. */
> + ctx->prev_mb = NULL;
> + return finish_folding(ctx, op);
> }
>
> static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
> @@ -3033,11 +3042,18 @@ void tcg_optimize(TCGContext *s)
> break;
> case INDEX_op_qemu_ld_a32_i32:
> case INDEX_op_qemu_ld_a64_i32:
> + done = fold_qemu_ld_1reg(&ctx, op);
> + break;
> case INDEX_op_qemu_ld_a32_i64:
> case INDEX_op_qemu_ld_a64_i64:
> + if (TCG_TARGET_REG_BITS == 64) {
> + done = fold_qemu_ld_1reg(&ctx, op);
> + break;
> + }
> + QEMU_FALLTHROUGH;
> case INDEX_op_qemu_ld_a32_i128:
> case INDEX_op_qemu_ld_a64_i128:
> - done = fold_qemu_ld(&ctx, op);
> + done = fold_qemu_ld_2reg(&ctx, op);
> break;
> case INDEX_op_qemu_st8_a32_i32:
> case INDEX_op_qemu_st8_a64_i32:
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>