Just use CC_OP_EFLAGS; it is not that likely that the flags computed by
CC_OP_CLR survive the end of the basic block, in which case there is no
need to spill cc_op_src.
cc_op_src now does need spilling if the XOR is followed by a memory
operation, but this only costs 0.2% extra TCG ops. They will be recouped
by simplifications in how QEMU evaluates ZF at runtime, which are even
greater with this change.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.h | 1 -
target/i386/cpu-dump.c | 1 -
target/i386/tcg/cc_helper.c | 3 ---
target/i386/tcg/translate.c | 10 ----------
target/i386/tcg/emit.c.inc | 15 ++++-----------
5 files changed, 4 insertions(+), 26 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e4c947b478b..c89db50eddc 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1297,7 +1297,6 @@ typedef enum {
CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */
CC_OP_ADOX, /* CC_SRC2 = O, CC_SRC = rest. */
CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
- CC_OP_CLR, /* Z and P set, all other flags clear. */
CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
CC_OP_MULW,
diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
index dc6723aedee..a72ed93bd2f 100644
--- a/target/i386/cpu-dump.c
+++ b/target/i386/cpu-dump.c
@@ -91,7 +91,6 @@ static const char * const cc_op_str[] = {
[CC_OP_BMILGQ] = "BMILGQ",
[CC_OP_POPCNT] = "POPCNT",
- [CC_OP_CLR] = "CLR",
};
static void
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
index dbddaa2fcb3..40583c04cf9 100644
--- a/target/i386/tcg/cc_helper.c
+++ b/target/i386/tcg/cc_helper.c
@@ -104,8 +104,6 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
case CC_OP_EFLAGS:
return src1;
- case CC_OP_CLR:
- return CC_Z | CC_P;
case CC_OP_POPCNT:
return dst ? 0 : CC_Z;
@@ -243,7 +241,6 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
case CC_OP_LOGICW:
case CC_OP_LOGICL:
case CC_OP_LOGICQ:
- case CC_OP_CLR:
case CC_OP_POPCNT:
return 0;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index dc308f31041..a20fbb019c8 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -309,7 +309,6 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
[CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
[CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
[CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
- [CC_OP_CLR] = 0,
[CC_OP_POPCNT] = USES_CC_DST,
};
@@ -803,10 +802,6 @@ static void gen_mov_eflags(DisasContext *s, TCGv reg)
tcg_gen_mov_tl(reg, cpu_cc_src);
return;
}
- if (s->cc_op == CC_OP_CLR) {
- tcg_gen_movi_tl(reg, CC_Z | CC_P);
- return;
- }
dst = cpu_cc_dst;
src1 = cpu_cc_src;
@@ -897,7 +892,6 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
.reg2 = cpu_cc_src, .use_reg2 = true };
case CC_OP_LOGICB ... CC_OP_LOGICQ:
- case CC_OP_CLR:
case CC_OP_POPCNT:
return (CCPrepare) { .cond = TCG_COND_NEVER };
@@ -969,7 +963,6 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
.imm = CC_S };
- case CC_OP_CLR:
case CC_OP_POPCNT:
return (CCPrepare) { .cond = TCG_COND_NEVER };
default:
@@ -988,7 +981,6 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
.no_setcond = true };
- case CC_OP_CLR:
case CC_OP_POPCNT:
return (CCPrepare) { .cond = TCG_COND_NEVER };
case CC_OP_MULB ... CC_OP_MULQ:
@@ -1013,8 +1005,6 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
.imm = CC_Z };
- case CC_OP_CLR:
- return (CCPrepare) { .cond = TCG_COND_ALWAYS };
default:
{
MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index fd17a9b1eca..790307dbba8 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1452,19 +1452,12 @@ static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv
* C is the result of the test, Z is unchanged, and the others
* are all undefined.
*/
- switch (s->cc_op) {
- case CC_OP_DYNAMIC:
- case CC_OP_CLR:
- case CC_OP_EFLAGS:
- case CC_OP_ADCX:
- case CC_OP_ADOX:
- case CC_OP_ADCOX:
+ if (s->cc_op == CC_OP_DYNAMIC || CC_OP_HAS_EFLAGS(s->cc_op)) {
/* Generate EFLAGS and replace the C bit. */
cf = tcg_temp_new();
tcg_gen_setcond_tl(TCG_COND_TSTNE, cf, src, mask);
prepare_update_cf(decode, s, cf);
- break;
- default:
+ } else {
/*
* Z was going to be computed from the non-zero status of CC_DST.
* We can get that same Z value (and the new C value) by leaving
@@ -1475,7 +1468,6 @@ static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv
decode->cc_dst = cpu_cc_dst;
decode->cc_op = ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB;
tcg_gen_shr_tl(decode->cc_src, src, s->T1);
- break;
}
}
@@ -4724,7 +4716,8 @@ static void gen_XOR(DisasContext *s, X86DecodedInsn *decode)
decode->op[2].unit == X86_OP_INT &&
decode->op[1].n == decode->op[2].n) {
tcg_gen_movi_tl(s->T0, 0);
- decode->cc_op = CC_OP_CLR;
+ decode->cc_op = CC_OP_EFLAGS;
+ decode->cc_src = tcg_constant_tl(CC_Z | CC_P);
} else {
MemOp ot = decode->op[1].ot;
--
2.47.0