[PATCH v2 07/19] target/i386: introduce flags writeback mechanism

Paolo Bonzini posted 19 patches 1 year, 1 month ago
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, Eduardo Habkost <eduardo@habkost.net>
[PATCH v2 07/19] target/i386: introduce flags writeback mechanism
Posted by Paolo Bonzini 1 year, 1 month ago
ALU instructions can write to both memory and flags.  If the CC_SRC*
and CC_DST locations have been written already when a memory access
causes a fault, the value in CC_SRC* and CC_DST might be interpreted
with the wrong CC_OP (the one that is in effect before the instruction.

Besides just using the wrong result for the flags, something like
subtracting -1 can have disastrous effects if the current CC_OP is
CC_OP_EFLAGS: this is because QEMU does not expect bits outside the ALU
flags to be set in CC_SRC, and env->eflags can end up set to all-ones.
In the case of the attached testcase, this sets IOPL to 3 and would
cause an assertion failure if SUB is moved to the new decoder.

This mechanism is not really needed for BMI instructions, which can
only write to a register, but put it to use anyway for cleanliness.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h                |  1 +
 target/i386/tcg/decode-new.c.inc | 31 ++++++++++++++++++++++++++
 target/i386/tcg/decode-new.h     |  4 ++++
 target/i386/tcg/emit.c.inc       | 19 ++++++++++------
 tests/tcg/i386/Makefile.target   |  2 +-
 tests/tcg/i386/test-flags.c      | 37 ++++++++++++++++++++++++++++++++
 6 files changed, 87 insertions(+), 7 deletions(-)
 create mode 100644 tests/tcg/i386/test-flags.c

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e1875466b9d..94a5137f068 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1285,6 +1285,7 @@ typedef enum {
 
     CC_OP_NB,
 } CCOp;
+QEMU_BUILD_BUG_ON(CC_OP_NB >= 128);
 
 typedef struct SegmentCache {
     uint32_t selector;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index eb2400095f8..0a2aebf2ebb 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1823,6 +1823,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
     }
 
     memset(&decode, 0, sizeof(decode));
+    decode.cc_op = -1;
     decode.b = b;
     if (!decode_insn(s, env, decode_func, &decode)) {
         goto illegal_op;
@@ -1943,6 +1944,36 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
         decode.e.gen(s, env, &decode);
         gen_writeback(s, &decode, 0, s->T0);
     }
+
+    /*
+     * Write back flags after last memory access.  Some newer ALU instructions, as
+     * well as SSE instructions, write flags in the gen_* function, but that can
+     * cause incorrect tracking of CC_OP for instructions that write to both memory
+     * and flags.
+     */
+    if (decode.cc_op != -1) {
+        if (decode.cc_dst) {
+            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
+        }
+        if (decode.cc_src) {
+            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
+        }
+        if (decode.cc_src2) {
+            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
+        }
+        if (decode.cc_op == CC_OP_DYNAMIC) {
+            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
+        } else {
+            assert(!decode.cc_op_dynamic);
+        }
+        set_cc_op(s, decode.cc_op);
+    } else {
+        assert(!decode.cc_dst);
+        assert(!decode.cc_src);
+        assert(!decode.cc_src2);
+        assert(!decode.cc_op_dynamic);
+    }
+
     return;
  gp_fault:
     gen_exception_gpf(s);
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index ab21fa6db97..4258db19899 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -265,6 +265,10 @@ struct X86DecodedInsn {
     target_ulong immediate;
     AddressParts mem;
 
+    TCGv cc_dst, cc_src, cc_src2;
+    TCGv_i32 cc_op_dynamic;
+    int8_t cc_op;
+
     uint8_t b;
 };
 
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 82da5488d47..90da1401745 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -323,6 +323,12 @@ static inline int vector_len(DisasContext *s, X86DecodedInsn *decode)
     return s->vex_l ? 32 : 16;
 }
 
+static void prepare_update1_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op)
+{
+    decode->cc_dst = s->T0;
+    decode->cc_op = op;
+}
+
 static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs)
 {
     MemOp ot = decode->op[0].ot;
@@ -1011,6 +1017,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
 VSIB_AVX(VPGATHERD, vpgatherd)
 VSIB_AVX(VPGATHERQ, vpgatherq)
 
+/* ADCX/ADOX do not have memory operands and can use set_cc_op.  */
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
     int opposite_cc_op;
@@ -1073,8 +1080,7 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     MemOp ot = decode->op[0].ot;
 
     tcg_gen_andc_tl(s->T0, s->T1, s->T0);
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_LOGICB + ot);
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
 static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@@ -1105,10 +1111,10 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
     tcg_gen_andc_tl(s->T0, s->T0, s->T1);
 
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_LOGICB + ot);
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
+/* BLSI do not have memory operands and can use set_cc_op.  */
 static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1120,6 +1126,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     set_cc_op(s, CC_OP_BMILGB + ot);
 }
 
+/* BLSMSK do not have memory operands and can use set_cc_op.  */
 static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1131,6 +1138,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
     set_cc_op(s, CC_OP_BMILGB + ot);
 }
 
+/* BLSR do not have memory operands and can use set_cc_op.  */
 static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1161,8 +1169,7 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
     tcg_gen_andc_tl(s->T0, s->T0, s->A0);
 
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_BMILGB + ot);
+    prepare_update1_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
 static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index fdf757c6ce4..ca0f543ef16 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -13,7 +13,7 @@ config-cc.mak: Makefile
 
 I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
 ALL_X86_TESTS=$(I386_SRCS:.c=)
-SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
+SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx test-flags
 X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
 
 test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
diff --git a/tests/tcg/i386/test-flags.c b/tests/tcg/i386/test-flags.c
new file mode 100644
index 00000000000..c379e296275
--- /dev/null
+++ b/tests/tcg/i386/test-flags.c
@@ -0,0 +1,37 @@
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <signal.h>
+#include <stdio.h>
+#include <assert.h>
+
+volatile unsigned long flags;
+volatile unsigned long flags_after;
+int *addr;
+
+void sigsegv(int sig, siginfo_t *info, ucontext_t *uc)
+{
+    flags = uc->uc_mcontext.gregs[REG_EFL];
+    mprotect(addr, 4096, PROT_READ|PROT_WRITE);
+}
+
+int main()
+{
+    struct sigaction sa = { .sa_handler = (void *)sigsegv, .sa_flags = SA_SIGINFO };
+    sigaction(SIGSEGV, &sa, NULL);
+
+    /* fault in the page then protect it */
+    addr = mmap (NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+    *addr = 0x1234;
+    mprotect(addr, 4096, PROT_READ);
+
+    asm("# set flags to all ones    \n"
+        "mov $-1, %%eax             \n"
+        "movq addr, %%rdi           \n"
+        "sahf                       \n"
+        "sub %%eax, (%%rdi)         \n"
+        "pushf                      \n"
+        "pop  flags_after(%%rip)    \n" : : : "eax", "edi", "memory");
+
+    /* OF can have any value before the SUB instruction.  */
+    assert((flags & 0xff) == 0xd7 && (flags_after & 0x8ff) == 0x17);
+}
-- 
2.41.0
Re: [PATCH v2 07/19] target/i386: introduce flags writeback mechanism
Posted by Richard Henderson 1 year, 1 month ago
On 10/19/23 03:46, Paolo Bonzini wrote:
> +    /*
> +     * Write back flags after last memory access.  Some newer ALU instructions, as
> +     * well as SSE instructions, write flags in the gen_* function, but that can
> +     * cause incorrect tracking of CC_OP for instructions that write to both memory
> +     * and flags.
> +     */
> +    if (decode.cc_op != -1) {
> +        if (decode.cc_dst) {
> +            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
> +        }
> +        if (decode.cc_src) {
> +            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
> +        }
> +        if (decode.cc_src2) {
> +            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
> +        }
> +        if (decode.cc_op == CC_OP_DYNAMIC) {
> +            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
> +        } else {
> +            assert(!decode.cc_op_dynamic);
> +        }
> +        set_cc_op(s, decode.cc_op);
> +    } else {
> +        assert(!decode.cc_dst);
> +        assert(!decode.cc_src);
> +        assert(!decode.cc_src2);
> +        assert(!decode.cc_op_dynamic);
> +    }

I suggest you use cc_op_live[] to ensure that each output is present if USES_CC_* is set, 
and absent otherwise. Obviously that's not possible for CC_OP_DYNAMIC, but for everything 
else...


r~
Re: [PATCH v2 07/19] target/i386: introduce flags writeback mechanism
Posted by Paolo Bonzini 1 year, 1 month ago
Il gio 19 ott 2023, 19:44 Richard Henderson <richard.henderson@linaro.org>
ha scritto:

> On 10/19/23 03:46, Paolo Bonzini wrote:
> > +    /*
> > +     * Write back flags after last memory access.  Some newer ALU
> instructions, as
> > +     * well as SSE instructions, write flags in the gen_* function, but
> that can
> > +     * cause incorrect tracking of CC_OP for instructions that write to
> both memory
> > +     * and flags.
> > +     */
> > +    if (decode.cc_op != -1) {
> > +        if (decode.cc_dst) {
> > +            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
> > +        }
> > +        if (decode.cc_src) {
> > +            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
> > +        }
> > +        if (decode.cc_src2) {
> > +            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
> > +        }
> > +        if (decode.cc_op == CC_OP_DYNAMIC) {
> > +            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
> > +        } else {
> > +            assert(!decode.cc_op_dynamic);
> > +        }
> > +        set_cc_op(s, decode.cc_op);
> > +    } else {
> > +        assert(!decode.cc_dst);
> > +        assert(!decode.cc_src);
> > +        assert(!decode.cc_src2);
> > +        assert(!decode.cc_op_dynamic);
> > +    }
>
> I suggest you use cc_op_live[] to ensure that each output is present if
> USES_CC_* is set,
> and absent otherwise. Obviously that's not possible for CC_OP_DYNAMIC, but
> for everything else...
>

I tried but it didn't work very well. I have shuffled things a bit and I
don't remember why :) so I will give it another go.

Paolo


>
> r~
>
>