target/i386/tcg/emit.c.inc | 85 +++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 33 deletions(-)
RCR and RCL instructions with a count of 9 are the same as if the
count was 0, but they generated incorrect code because the can_be_zero
flag is false. This causes 0 to underflow into -1 at
tcg_gen_subi_tl(count, count, 1).
Fix by absorbind the call to gen_shift_count() into gen_rotc_mod(),
so that the new function handles both mask and mod.
Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3452
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/emit.c.inc | 85 +++++++++++++++++++++++---------------
1 file changed, 52 insertions(+), 33 deletions(-)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index ce636b6c56c..c6b0be1f60d 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -3244,12 +3244,10 @@ static void gen_PUSHF(DisasContext *s, X86DecodedInsn *decode)
assume_cc_op(s, CC_OP_EFLAGS);
}
-static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
- bool *can_be_zero, TCGv *count, int unit)
+static void gen_shift_count_1(DisasContext *s, X86DecodedInsn *decode,
+ bool *can_be_zero, TCGv *count, int unit,
+ int mask)
{
- MemOp ot = decode->op[0].ot;
- int mask = (ot <= MO_32 ? 0x1f : 0x3f);
-
*can_be_zero = false;
switch (unit) {
case X86_OP_INT:
@@ -3259,12 +3257,17 @@ static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
break;
case X86_OP_IMM:
- if ((decode->immediate & mask) == 0) {
+ /*
+ * The caller applied the mask (this is awkward; unfortunately the
+ * mask must be applied *before* the modulo operation for RCL/RCR,
+ * and the modulo operation must be before this check for zero).
+ */
+ if (decode->immediate == 0) {
*count = NULL;
break;
}
*count = tcg_temp_new();
- tcg_gen_movi_tl(*count, decode->immediate & mask);
+ tcg_gen_movi_tl(*count, decode->immediate);
break;
case X86_OP_SKIP:
@@ -3275,7 +3278,19 @@ static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
default:
g_assert_not_reached();
}
+}
+static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
+ bool *can_be_zero, TCGv *count, int unit)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask = (ot <= MO_32 ? 0x1f : 0x3f);
+
+ if (unit == X86_OP_IMM) {
+ decode->immediate &= mask;
+ }
+
+ gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);
return ot;
}
@@ -3394,32 +3409,38 @@ static void gen_rot_overflow(X86DecodedInsn *decode, TCGv result, TCGv old,
}
}
-/*
- * RCx operations are invariant modulo 8*operand_size+1. For 8 and 16-bit operands,
- * this is less than 0x1f (the mask applied by gen_shift_count) so reduce further.
- */
-static void gen_rotc_mod(MemOp ot, TCGv count)
+static MemOp gen_rotc_count(DisasContext *s, X86DecodedInsn *decode,
+ bool *can_be_zero, TCGv *count, int unit)
{
TCGv temp;
+ MemOp ot = decode->op[0].ot;
+ int mod = (8 << ot) + 1;
+ int mask = (ot <= MO_32 ? 0x1f : 0x3f);
- switch (ot) {
- case MO_8:
- temp = tcg_temp_new();
- tcg_gen_subi_tl(temp, count, 18);
- tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
- tcg_gen_subi_tl(temp, count, 9);
- tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
- break;
-
- case MO_16:
- temp = tcg_temp_new();
- tcg_gen_subi_tl(temp, count, 17);
- tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
- break;
-
- default:
- break;
+ /*
+ * RCx operations are invariant modulo 8*operand_size+1. For 8 and 16-bit
+ * operands, this is less than 0x1f (the mask applied by gen_shift_count)
+ * so reduce further. Failure to do so results in incorrect shifts
+ * in gen_RCL/gen_RCR.
+ */
+ if (unit == X86_OP_IMM) {
+ decode->immediate &= mask;
+ decode->immediate %= mod;
}
+
+ gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);
+
+ if (unit == X86_OP_INT && mod < mask) {
+ temp = tcg_temp_new();
+ if (mod * 2 < mask) {
+ tcg_gen_subi_tl(temp, *count, mod * 2);
+ tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0), temp, *count);
+ }
+ tcg_gen_subi_tl(temp, *count, mod);
+ tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0), temp, *count);
+ }
+
+ return ot;
}
/*
@@ -3440,7 +3461,7 @@ static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
bool have_1bit_cin, can_be_zero;
TCGv count;
TCGLabel *zero_label = NULL;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
+ MemOp ot = gen_rotc_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
TCGv low, high, low_count;
if (!count) {
@@ -3451,7 +3472,6 @@ static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
high = tcg_temp_new();
low_count = tcg_temp_new();
- gen_rotc_mod(ot, count);
have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero);
if (can_be_zero) {
zero_label = gen_new_label();
@@ -3492,7 +3512,7 @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
bool have_1bit_cin, can_be_zero;
TCGv count;
TCGLabel *zero_label = NULL;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
+ MemOp ot = gen_rotc_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
TCGv low, high, high_count;
if (!count) {
@@ -3503,7 +3523,6 @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
high = tcg_temp_new();
high_count = tcg_temp_new();
- gen_rotc_mod(ot, count);
have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero);
if (can_be_zero) {
zero_label = gen_new_label();
--
2.54.0
On 5/28/26 09:46, Paolo Bonzini wrote:
> RCR and RCL instructions with a count of 9 are the same as if the
> count was 0, but they generated incorrect code because the can_be_zero
> flag is false. This causes 0 to underflow into -1 at
> tcg_gen_subi_tl(count, count, 1).
>
> Fix by absorbind the call to gen_shift_count() into gen_rotc_mod(),
> so that the new function handles both mask and mod.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3452
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target/i386/tcg/emit.c.inc | 85 +++++++++++++++++++++++---------------
> 1 file changed, 52 insertions(+), 33 deletions(-)
>
> diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
> index ce636b6c56c..c6b0be1f60d 100644
> --- a/target/i386/tcg/emit.c.inc
> +++ b/target/i386/tcg/emit.c.inc
> @@ -3244,12 +3244,10 @@ static void gen_PUSHF(DisasContext *s, X86DecodedInsn *decode)
> assume_cc_op(s, CC_OP_EFLAGS);
> }
>
> -static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
> - bool *can_be_zero, TCGv *count, int unit)
> +static void gen_shift_count_1(DisasContext *s, X86DecodedInsn *decode,
> + bool *can_be_zero, TCGv *count, int unit,
> + int mask)
> {
> - MemOp ot = decode->op[0].ot;
> - int mask = (ot <= MO_32 ? 0x1f : 0x3f);
> -
> *can_be_zero = false;
> switch (unit) {
> case X86_OP_INT:
> @@ -3259,12 +3257,17 @@ static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
> break;
>
> case X86_OP_IMM:
> - if ((decode->immediate & mask) == 0) {
> + /*
> + * The caller applied the mask (this is awkward; unfortunately the
> + * mask must be applied *before* the modulo operation for RCL/RCR,
> + * and the modulo operation must be before this check for zero).
> + */
> + if (decode->immediate == 0) {
Perhaps clearer to pass down mod and perform all of the arithmetic in here.
> +static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
> + bool *can_be_zero, TCGv *count, int unit)
> +{
> + MemOp ot = decode->op[0].ot;
> + int mask = (ot <= MO_32 ? 0x1f : 0x3f);
> +
> + if (unit == X86_OP_IMM) {
> + decode->immediate &= mask;
> + }
> +
> + gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);
Here you could pass mod = 64, arbitrarily large, so that we could keep the mask
computation common within gen_shift_count_1.
> -static void gen_rotc_mod(MemOp ot, TCGv count)
> +static MemOp gen_rotc_count(DisasContext *s, X86DecodedInsn *decode,
> + bool *can_be_zero, TCGv *count, int unit)
> {
> TCGv temp;
> + MemOp ot = decode->op[0].ot;
> + int mod = (8 << ot) + 1;
> + int mask = (ot <= MO_32 ? 0x1f : 0x3f);
>
> - switch (ot) {
> - case MO_8:
> - temp = tcg_temp_new();
> - tcg_gen_subi_tl(temp, count, 18);
> - tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
> - tcg_gen_subi_tl(temp, count, 9);
> - tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
> - break;
> -
> - case MO_16:
> - temp = tcg_temp_new();
> - tcg_gen_subi_tl(temp, count, 17);
> - tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
> - break;
> -
> - default:
> - break;
> + /*
> + * RCx operations are invariant modulo 8*operand_size+1. For 8 and 16-bit
> + * operands, this is less than 0x1f (the mask applied by gen_shift_count)
> + * so reduce further. Failure to do so results in incorrect shifts
> + * in gen_RCL/gen_RCR.
> + */
> + if (unit == X86_OP_IMM) {
> + decode->immediate &= mask;
> + decode->immediate %= mod;
> }
> +
> + gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);
> +
> + if (unit == X86_OP_INT && mod < mask) {
And it would avoid having to have code split across these two functions in odd ways, and
reusing the switch within gen_shift_count_1.
> + temp = tcg_temp_new();
> + if (mod * 2 < mask) {
> + tcg_gen_subi_tl(temp, *count, mod * 2);
> + tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0), temp, *count);
> + }
> + tcg_gen_subi_tl(temp, *count, mod);
> + tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0), temp, *count);
> + }
Perhaps worth assert(mask < mod * 4), for documentation's sake?
r~
© 2016 - 2026 Red Hat, Inc.