This is a common operation that is executed many times in rep
movs or rep stos loops. It can improve performance by several
percentage points.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 54 ++++++++++++++++++-------------------
1 file changed, 26 insertions(+), 28 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e0f9f7748bc..4b652cc23e1 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -831,16 +831,13 @@ static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
#endif
}
-static void gen_movs(DisasContext *s, MemOp ot)
+static void gen_movs(DisasContext *s, MemOp ot, TCGv dshift)
{
- TCGv dshift;
-
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_string_movl_A0_EDI(s);
gen_op_st_v(s, ot, s->T0, s->A0);
- dshift = gen_compute_Dshift(s, ot);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
@@ -1244,22 +1241,22 @@ static inline void gen_jcc(DisasContext *s, int b, TCGLabel *l1)
}
}
-static void gen_stos(DisasContext *s, MemOp ot)
+static void gen_stos(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
gen_op_st_v(s, ot, s->T0, s->A0);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_lods(DisasContext *s, MemOp ot)
+static void gen_lods(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
- gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_ESI, dshift);
}
-static void gen_scas(DisasContext *s, MemOp ot)
+static void gen_scas(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
gen_op_ld_v(s, ot, s->T1, s->A0);
@@ -1268,13 +1265,11 @@ static void gen_scas(DisasContext *s, MemOp ot)
tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
set_cc_op(s, CC_OP_SUBB + ot);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_cmps(DisasContext *s, MemOp ot)
+static void gen_cmps(DisasContext *s, MemOp ot, TCGv dshift)
{
- TCGv dshift;
-
gen_string_movl_A0_EDI(s);
gen_op_ld_v(s, ot, s->T1, s->A0);
gen_string_movl_A0_ESI(s);
@@ -1284,7 +1279,6 @@ static void gen_cmps(DisasContext *s, MemOp ot)
tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
set_cc_op(s, CC_OP_SUBB + ot);
- dshift = gen_compute_Dshift(s, ot);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
@@ -1303,7 +1297,7 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
}
}
-static void gen_ins(DisasContext *s, MemOp ot)
+static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
/* Note: we must do this dummy write first to be restartable in
@@ -1314,11 +1308,11 @@ static void gen_ins(DisasContext *s, MemOp ot)
tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
gen_helper_in_func(ot, s->T0, s->tmp2_i32);
gen_op_st_v(s, ot, s->T0, s->A0);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
gen_bpt_io(s, s->tmp2_i32, ot);
}
-static void gen_outs(DisasContext *s, MemOp ot)
+static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
@@ -1327,14 +1321,14 @@ static void gen_outs(DisasContext *s, MemOp ot)
tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
- gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_bpt_io(s, s->tmp2_i32, ot);
}
#define REP_MAX 65535
-static void do_gen_rep(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot),
+static void do_gen_rep(DisasContext *s, MemOp ot, TCGv dshift,
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift),
bool is_repz_nz)
{
TCGLabel *last = gen_new_label();
@@ -1399,7 +1393,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
}
gen_set_label(loop);
- fn(s, ot);
+ fn(s, ot, dshift);
tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
@@ -1434,7 +1428,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
*/
gen_set_label(last);
set_cc_op(s, CC_OP_DYNAMIC);
- fn(s, ot);
+ fn(s, ot, dshift);
tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
}
@@ -1449,23 +1443,27 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
}
static void gen_repz(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot))
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
{
+ TCGv dshift = gen_compute_Dshift(s, ot);
+
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
- do_gen_rep(s, ot, fn, false);
+ do_gen_rep(s, ot, dshift, fn, false);
} else {
- fn(s, ot);
+ fn(s, ot, dshift);
}
}
static void gen_repz_nz(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot))
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
{
+ TCGv dshift = gen_compute_Dshift(s, ot);
+
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
- do_gen_rep(s, ot, fn, true);
+ do_gen_rep(s, ot, dshift, fn, true);
} else {
- fn(s, ot);
+ fn(s, ot, dshift);
}
}
--
2.47.1
On 12/15/24 03:06, Paolo Bonzini wrote:
> This is a common operation that is executed many times in rep
> movs or rep stos loops. It can improve performance by several
> percentage points.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target/i386/tcg/translate.c | 54 ++++++++++++++++++-------------------
> 1 file changed, 26 insertions(+), 28 deletions(-)
>
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index e0f9f7748bc..4b652cc23e1 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -831,16 +831,13 @@ static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
> #endif
> }
>
> -static void gen_movs(DisasContext *s, MemOp ot)
> +static void gen_movs(DisasContext *s, MemOp ot, TCGv dshift)
> {
> - TCGv dshift;
> -
> gen_string_movl_A0_ESI(s);
> gen_op_ld_v(s, ot, s->T0, s->A0);
> gen_string_movl_A0_EDI(s);
> gen_op_st_v(s, ot, s->T0, s->A0);
>
> - dshift = gen_compute_Dshift(s, ot);
> gen_op_add_reg(s, s->aflag, R_ESI, dshift);
> gen_op_add_reg(s, s->aflag, R_EDI, dshift);
> }
> @@ -1244,22 +1241,22 @@ static inline void gen_jcc(DisasContext *s, int b, TCGLabel *l1)
> }
> }
>
> -static void gen_stos(DisasContext *s, MemOp ot)
> +static void gen_stos(DisasContext *s, MemOp ot, TCGv dshift)
> {
> gen_string_movl_A0_EDI(s);
> gen_op_st_v(s, ot, s->T0, s->A0);
> - gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
> + gen_op_add_reg(s, s->aflag, R_EDI, dshift);
> }
>
> -static void gen_lods(DisasContext *s, MemOp ot)
> +static void gen_lods(DisasContext *s, MemOp ot, TCGv dshift)
> {
> gen_string_movl_A0_ESI(s);
> gen_op_ld_v(s, ot, s->T0, s->A0);
> gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
> - gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
> + gen_op_add_reg(s, s->aflag, R_ESI, dshift);
> }
>
> -static void gen_scas(DisasContext *s, MemOp ot)
> +static void gen_scas(DisasContext *s, MemOp ot, TCGv dshift)
> {
> gen_string_movl_A0_EDI(s);
> gen_op_ld_v(s, ot, s->T1, s->A0);
> @@ -1268,13 +1265,11 @@ static void gen_scas(DisasContext *s, MemOp ot)
> tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
> set_cc_op(s, CC_OP_SUBB + ot);
>
> - gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
> + gen_op_add_reg(s, s->aflag, R_EDI, dshift);
> }
>
> -static void gen_cmps(DisasContext *s, MemOp ot)
> +static void gen_cmps(DisasContext *s, MemOp ot, TCGv dshift)
> {
> - TCGv dshift;
> -
> gen_string_movl_A0_EDI(s);
> gen_op_ld_v(s, ot, s->T1, s->A0);
> gen_string_movl_A0_ESI(s);
> @@ -1284,7 +1279,6 @@ static void gen_cmps(DisasContext *s, MemOp ot)
> tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
> set_cc_op(s, CC_OP_SUBB + ot);
>
> - dshift = gen_compute_Dshift(s, ot);
> gen_op_add_reg(s, s->aflag, R_ESI, dshift);
> gen_op_add_reg(s, s->aflag, R_EDI, dshift);
> }
> @@ -1303,7 +1297,7 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
> }
> }
>
> -static void gen_ins(DisasContext *s, MemOp ot)
> +static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift)
> {
> gen_string_movl_A0_EDI(s);
> /* Note: we must do this dummy write first to be restartable in
> @@ -1314,11 +1308,11 @@ static void gen_ins(DisasContext *s, MemOp ot)
> tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
> gen_helper_in_func(ot, s->T0, s->tmp2_i32);
> gen_op_st_v(s, ot, s->T0, s->A0);
> - gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
> + gen_op_add_reg(s, s->aflag, R_EDI, dshift);
> gen_bpt_io(s, s->tmp2_i32, ot);
> }
>
> -static void gen_outs(DisasContext *s, MemOp ot)
> +static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift)
> {
> gen_string_movl_A0_ESI(s);
> gen_op_ld_v(s, ot, s->T0, s->A0);
> @@ -1327,14 +1321,14 @@ static void gen_outs(DisasContext *s, MemOp ot)
> tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
> tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
> gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
> - gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
> + gen_op_add_reg(s, s->aflag, R_ESI, dshift);
> gen_bpt_io(s, s->tmp2_i32, ot);
> }
>
> #define REP_MAX 65535
>
> -static void do_gen_rep(DisasContext *s, MemOp ot,
> - void (*fn)(DisasContext *s, MemOp ot),
> +static void do_gen_rep(DisasContext *s, MemOp ot, TCGv dshift,
> + void (*fn)(DisasContext *s, MemOp ot, TCGv dshift),
> bool is_repz_nz)
> {
> TCGLabel *last = gen_new_label();
> @@ -1399,7 +1393,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
> }
>
> gen_set_label(loop);
> - fn(s, ot);
> + fn(s, ot, dshift);
> tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
> gen_update_cc_op(s);
>
> @@ -1434,7 +1428,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
> */
> gen_set_label(last);
> set_cc_op(s, CC_OP_DYNAMIC);
> - fn(s, ot);
> + fn(s, ot, dshift);
> tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
> gen_update_cc_op(s);
> }
> @@ -1449,23 +1443,27 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
> }
>
> static void gen_repz(DisasContext *s, MemOp ot,
> - void (*fn)(DisasContext *s, MemOp ot))
> + void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
>
> {
> + TCGv dshift = gen_compute_Dshift(s, ot);
> +
> if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
> - do_gen_rep(s, ot, fn, false);
> + do_gen_rep(s, ot, dshift, fn, false);
> } else {
> - fn(s, ot);
> + fn(s, ot, dshift);
> }
> }
>
> static void gen_repz_nz(DisasContext *s, MemOp ot,
> - void (*fn)(DisasContext *s, MemOp ot))
> + void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
> {
> + TCGv dshift = gen_compute_Dshift(s, ot);
> +
> if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
> - do_gen_rep(s, ot, fn, true);
> + do_gen_rep(s, ot, dshift, fn, true);
> } else {
> - fn(s, ot);
> + fn(s, ot, dshift);
> }
> }
>
Still not a fan of the repetition. I think this could easily be
static void do_gen_rep(DisasContext *s, MemOp ot,
void (*fn)(DisasContext *s, MemOp ot, TCGv dshift),
bool is_repz_nz)
{
TCGv dshift = gen_compute_Dshift(s, ot);
TCGLabel *last, *loop, *done;
target_ulong cx_mask;
TCGv cx_next;
bool can_loop, had_rf;
if (!(s->prefix & (REPZ | REPNZ))) {
fn(s, ot, dshift);
return;
}
done = gen_new_label();
can_loop = ...
everything else.
}
Or, if you prefer, use two functions gen_repz_nz{0,1}.
r~
© 2016 - 2026 Red Hat, Inc.