This is a common operation that is executed many times in rep
movs or rep stos loops. It can improve performance by several
percentage points.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 54 ++++++++++++++++++-------------------
1 file changed, 26 insertions(+), 28 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e0f9f7748bc..4b652cc23e1 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -831,16 +831,13 @@ static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
#endif
}
-static void gen_movs(DisasContext *s, MemOp ot)
+static void gen_movs(DisasContext *s, MemOp ot, TCGv dshift)
{
- TCGv dshift;
-
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_string_movl_A0_EDI(s);
gen_op_st_v(s, ot, s->T0, s->A0);
- dshift = gen_compute_Dshift(s, ot);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
@@ -1244,22 +1241,22 @@ static inline void gen_jcc(DisasContext *s, int b, TCGLabel *l1)
}
}
-static void gen_stos(DisasContext *s, MemOp ot)
+static void gen_stos(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
gen_op_st_v(s, ot, s->T0, s->A0);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_lods(DisasContext *s, MemOp ot)
+static void gen_lods(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
- gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_ESI, dshift);
}
-static void gen_scas(DisasContext *s, MemOp ot)
+static void gen_scas(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
gen_op_ld_v(s, ot, s->T1, s->A0);
@@ -1268,13 +1265,11 @@ static void gen_scas(DisasContext *s, MemOp ot)
tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
set_cc_op(s, CC_OP_SUBB + ot);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_cmps(DisasContext *s, MemOp ot)
+static void gen_cmps(DisasContext *s, MemOp ot, TCGv dshift)
{
- TCGv dshift;
-
gen_string_movl_A0_EDI(s);
gen_op_ld_v(s, ot, s->T1, s->A0);
gen_string_movl_A0_ESI(s);
@@ -1284,7 +1279,6 @@ static void gen_cmps(DisasContext *s, MemOp ot)
tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
set_cc_op(s, CC_OP_SUBB + ot);
- dshift = gen_compute_Dshift(s, ot);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
@@ -1303,7 +1297,7 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
}
}
-static void gen_ins(DisasContext *s, MemOp ot)
+static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
/* Note: we must do this dummy write first to be restartable in
@@ -1314,11 +1308,11 @@ static void gen_ins(DisasContext *s, MemOp ot)
tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
gen_helper_in_func(ot, s->T0, s->tmp2_i32);
gen_op_st_v(s, ot, s->T0, s->A0);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
gen_bpt_io(s, s->tmp2_i32, ot);
}
-static void gen_outs(DisasContext *s, MemOp ot)
+static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
@@ -1327,14 +1321,14 @@ static void gen_outs(DisasContext *s, MemOp ot)
tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
- gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_bpt_io(s, s->tmp2_i32, ot);
}
#define REP_MAX 65535
-static void do_gen_rep(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot),
+static void do_gen_rep(DisasContext *s, MemOp ot, TCGv dshift,
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift),
bool is_repz_nz)
{
TCGLabel *last = gen_new_label();
@@ -1399,7 +1393,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
}
gen_set_label(loop);
- fn(s, ot);
+ fn(s, ot, dshift);
tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
@@ -1434,7 +1428,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
*/
gen_set_label(last);
set_cc_op(s, CC_OP_DYNAMIC);
- fn(s, ot);
+ fn(s, ot, dshift);
tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
}
@@ -1449,23 +1443,27 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
}
static void gen_repz(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot))
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
{
+ TCGv dshift = gen_compute_Dshift(s, ot);
+
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
- do_gen_rep(s, ot, fn, false);
+ do_gen_rep(s, ot, dshift, fn, false);
} else {
- fn(s, ot);
+ fn(s, ot, dshift);
}
}
static void gen_repz_nz(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot))
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
{
+ TCGv dshift = gen_compute_Dshift(s, ot);
+
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
- do_gen_rep(s, ot, fn, true);
+ do_gen_rep(s, ot, dshift, fn, true);
} else {
- fn(s, ot);
+ fn(s, ot, dshift);
}
}
--
2.47.1
On 12/15/24 03:06, Paolo Bonzini wrote: > This is a common operation that is executed many times in rep > movs or rep stos loops. It can improve performance by several > percentage points. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > target/i386/tcg/translate.c | 54 ++++++++++++++++++------------------- > 1 file changed, 26 insertions(+), 28 deletions(-) > > diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c > index e0f9f7748bc..4b652cc23e1 100644 > --- a/target/i386/tcg/translate.c > +++ b/target/i386/tcg/translate.c > @@ -831,16 +831,13 @@ static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port, > #endif > } > > -static void gen_movs(DisasContext *s, MemOp ot) > +static void gen_movs(DisasContext *s, MemOp ot, TCGv dshift) > { > - TCGv dshift; > - > gen_string_movl_A0_ESI(s); > gen_op_ld_v(s, ot, s->T0, s->A0); > gen_string_movl_A0_EDI(s); > gen_op_st_v(s, ot, s->T0, s->A0); > > - dshift = gen_compute_Dshift(s, ot); > gen_op_add_reg(s, s->aflag, R_ESI, dshift); > gen_op_add_reg(s, s->aflag, R_EDI, dshift); > } > @@ -1244,22 +1241,22 @@ static inline void gen_jcc(DisasContext *s, int b, TCGLabel *l1) > } > } > > -static void gen_stos(DisasContext *s, MemOp ot) > +static void gen_stos(DisasContext *s, MemOp ot, TCGv dshift) > { > gen_string_movl_A0_EDI(s); > gen_op_st_v(s, ot, s->T0, s->A0); > - gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); > + gen_op_add_reg(s, s->aflag, R_EDI, dshift); > } > > -static void gen_lods(DisasContext *s, MemOp ot) > +static void gen_lods(DisasContext *s, MemOp ot, TCGv dshift) > { > gen_string_movl_A0_ESI(s); > gen_op_ld_v(s, ot, s->T0, s->A0); > gen_op_mov_reg_v(s, ot, R_EAX, s->T0); > - gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); > + gen_op_add_reg(s, s->aflag, R_ESI, dshift); > } > > -static void gen_scas(DisasContext *s, MemOp ot) > +static void gen_scas(DisasContext *s, MemOp ot, TCGv dshift) > { > gen_string_movl_A0_EDI(s); > gen_op_ld_v(s, ot, s->T1, s->A0); > @@ -1268,13 +1265,11 @@ static void gen_scas(DisasContext *s, MemOp ot) > tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1); > set_cc_op(s, CC_OP_SUBB + ot); > > - gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); > + gen_op_add_reg(s, s->aflag, R_EDI, dshift); > } > > -static void gen_cmps(DisasContext *s, MemOp ot) > +static void gen_cmps(DisasContext *s, MemOp ot, TCGv dshift) > { > - TCGv dshift; > - > gen_string_movl_A0_EDI(s); > gen_op_ld_v(s, ot, s->T1, s->A0); > gen_string_movl_A0_ESI(s); > @@ -1284,7 +1279,6 @@ static void gen_cmps(DisasContext *s, MemOp ot) > tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1); > set_cc_op(s, CC_OP_SUBB + ot); > > - dshift = gen_compute_Dshift(s, ot); > gen_op_add_reg(s, s->aflag, R_ESI, dshift); > gen_op_add_reg(s, s->aflag, R_EDI, dshift); > } > @@ -1303,7 +1297,7 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) > } > } > > -static void gen_ins(DisasContext *s, MemOp ot) > +static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift) > { > gen_string_movl_A0_EDI(s); > /* Note: we must do this dummy write first to be restartable in > @@ -1314,11 +1308,11 @@ static void gen_ins(DisasContext *s, MemOp ot) > tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); > gen_helper_in_func(ot, s->T0, s->tmp2_i32); > gen_op_st_v(s, ot, s->T0, s->A0); > - gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); > + gen_op_add_reg(s, s->aflag, R_EDI, dshift); > gen_bpt_io(s, s->tmp2_i32, ot); > } > > -static void gen_outs(DisasContext *s, MemOp ot) > +static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift) > { > gen_string_movl_A0_ESI(s); > gen_op_ld_v(s, ot, s->T0, s->A0); > @@ -1327,14 +1321,14 @@ static void gen_outs(DisasContext *s, MemOp ot) > tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); > tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0); > gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); > - gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); > + gen_op_add_reg(s, s->aflag, R_ESI, dshift); > gen_bpt_io(s, s->tmp2_i32, ot); > } > > #define REP_MAX 65535 > > -static void do_gen_rep(DisasContext *s, MemOp ot, > - void (*fn)(DisasContext *s, MemOp ot), > +static void do_gen_rep(DisasContext *s, MemOp ot, TCGv dshift, > + void (*fn)(DisasContext *s, MemOp ot, TCGv dshift), > bool is_repz_nz) > { > TCGLabel *last = gen_new_label(); > @@ -1399,7 +1393,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot, > } > > gen_set_label(loop); > - fn(s, ot); > + fn(s, ot, dshift); > tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next); > gen_update_cc_op(s); > > @@ -1434,7 +1428,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot, > */ > gen_set_label(last); > set_cc_op(s, CC_OP_DYNAMIC); > - fn(s, ot); > + fn(s, ot, dshift); > tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next); > gen_update_cc_op(s); > } > @@ -1449,23 +1443,27 @@ static void do_gen_rep(DisasContext *s, MemOp ot, > } > > static void gen_repz(DisasContext *s, MemOp ot, > - void (*fn)(DisasContext *s, MemOp ot)) > + void (*fn)(DisasContext *s, MemOp ot, TCGv dshift)) > > { > + TCGv dshift = gen_compute_Dshift(s, ot); > + > if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { > - do_gen_rep(s, ot, fn, false); > + do_gen_rep(s, ot, dshift, fn, false); > } else { > - fn(s, ot); > + fn(s, ot, dshift); > } > } > > static void gen_repz_nz(DisasContext *s, MemOp ot, > - void (*fn)(DisasContext *s, MemOp ot)) > + void (*fn)(DisasContext *s, MemOp ot, TCGv dshift)) > { > + TCGv dshift = gen_compute_Dshift(s, ot); > + > if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { > - do_gen_rep(s, ot, fn, true); > + do_gen_rep(s, ot, dshift, fn, true); > } else { > - fn(s, ot); > + fn(s, ot, dshift); > } > } > Still not a fan of the repetition. I think this could easily be static void do_gen_rep(DisasContext *s, MemOp ot, void (*fn)(DisasContext *s, MemOp ot, TCGv dshift), bool is_repz_nz) { TCGv dshift = gen_compute_Dshift(s, ot); TCGLabel *last, *loop, *done; target_ulong cx_mask; TCGv cx_next; bool can_loop, had_rf; if (!(s->prefix & (REPZ | REPNZ))) { fn(s, ot, dshift); return; } done = gen_new_label(); can_loop = ... everything else. } Or, if you prefer, use two functions gen_repz_nz{0,1}. r~
© 2016 - 2025 Red Hat, Inc.