target/sh4/op_helper.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-)
The saturation arithmetic logic in helper_macw is not correct.
I tested and verified this behavior on a SH7091, the general pattern
is a code sequence such as:
sets
mov.l _mach,r2
lds r2,mach
mov.l _macl,r2
lds r2,macl
mova _n,r0
mov r0,r1
mova _m,r0
mac.w @r0+,@r1+
_mach: .long 0xffffffff
_macl: .long 0xfffffffe
_m: .word 0x0002
.word 0
_n: .word 0x0003
.word 0
test 0:
(mach should not be modified if an overflow did not occur)
given, prior to saturation mac.l:
mach = 0xffffffff ; macl = 0xfffffffe
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0xffffffff (unchanged)
macl = 0x00000004
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
In the context of the helper_macw implementation prior to this
commit, initially this appears to be a surprising result. This is
because (prior to unary negation) the C literal `0x80000000` (due to
being outside the range of a `signed int`) is evaluated as an
`unsigned int` whereas the literal `1` (due to being inside the
range of `signed int`) is evaluated as `signed int`, as in:
static_assert(1 < -0x80000000 == 1);
static_assert(1 < -1 == 0);
This is because the unary negation of an unsigned int is an
unsigned int.
In other words, if the `res < -0x80000000` comparison used
infinite-precision literals, the saturation mac.w result would have
been:
mach = 0x00000000
macl = 0x00000004
Due to this (forgivable) misunderstanding of C literals, the
following behavior also occurs:
test 1:
(`2 * 3 + 0` is not an overflow)
given, prior to saturation mac.l:
mach = 0x00000000 ; macl = 0x00000000
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0x00000000 (unchanged)
macl = 0x00000006
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
test 2:
(mach should not be accumulated in saturation mode)
(16-bit operands are sign-extended)
given, prior to saturation mac.l:
mach = 0x12345678 ; macl = 0x7ffffffe
@r0 = 0x0002 ; @r1 = 0xfffd
expected saturation mac.w result:
mach = 0x12345678 (unchanged)
macl = 0x7ffffff8
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x7fffffff
test 3:
(macl should have the correct saturation value)
given, prior to saturation mac.l:
mach = 0xabcdef12 ; macl = 0x7ffffffa
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0x00000001 (overwritten)
macl = 0x7fffffff
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
All of the above also matches the description of MAC.W as documented
in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
Signed-off-by: Zack Buhman <zack@buhman.org>
---
target/sh4/op_helper.c | 41 +++++++++++++++++++++++++++++++----------
1 file changed, 31 insertions(+), 10 deletions(-)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index ee16524083..b3c1e69f53 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -187,20 +187,41 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
{
- int64_t res;
+ int16_t value0 = (int16_t)arg0;
+ int16_t value1 = (int16_t)arg1;
+ int32_t mul = ((int32_t)value0) * ((int32_t)value1);
- res = ((uint64_t) env->mach << 32) | env->macl;
- res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
- env->mach = (res >> 32) & 0xffffffff;
- env->macl = res & 0xffffffff;
+ /* Perform 32-bit saturation arithmetic if the S flag is set */
if (env->sr & (1u << SR_S)) {
- if (res < -0x80000000) {
- env->mach = 1;
- env->macl = 0x80000000;
- } else if (res > 0x000000007fffffff) {
+ const int64_t upper_bound = ((1ul << 31) - 1);
+ const int64_t lower_bound = -((1ul << 31) - 0);
+
+ /*
+ * In saturation arithmetic mode, the accumulator is 32-bit
+ * with carry. MACH is not considered during the addition
+ * operation nor the 32-bit saturation logic.
+ */
+ int32_t mac = env->macl;
+ int32_t result;
+ bool overflow = sadd32_overflow(mac, mul, &result);
+ if (overflow) {
+ result = (mac < 0) ? lower_bound : upper_bound;
+ /* MACH is set to 1 to denote overflow */
+ env->macl = result;
env->mach = 1;
- env->macl = 0x7fffffff;
+ } else {
+ result = MIN(MAX(result, lower_bound), upper_bound);
+ /* If there was no overflow, MACH is unchanged */
+ env->macl = result;
}
+ } else {
+ /* In non-saturation arithmetic mode, the accumulator is 64-bit */
+ int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
+
+ /* The carry bit of the 64-bit addition is discarded */
+ int64_t result = mac + (int64_t)mul;
+ env->macl = result;
+ env->mach = result >> 32;
}
}
--
2.41.0
On Fri, 5 Apr 2024 at 08:55, Zack Buhman <zack@buhman.org> wrote: > > The saturation arithmetic logic in helper_macw is not correct. > > I tested and verified this behavior on a SH7091, the general pattern > is a code sequence such as: > > sets > > mov.l _mach,r2 > lds r2,mach > mov.l _macl,r2 > lds r2,macl > > mova _n,r0 > mov r0,r1 > mova _m,r0 > mac.w @r0+,@r1+ > > _mach: .long 0xffffffff > _macl: .long 0xfffffffe > _m: .word 0x0002 > .word 0 > _n: .word 0x0003 > .word 0 > > test 0: > (mach should not be modified if an overflow did not occur) > > given, prior to saturation mac.l: > mach = 0xffffffff ; macl = 0xfffffffe > @r0 = 0x0002 ; @r1 = 0x0003 > > expected saturation mac.w result: > mach = 0xffffffff (unchanged) > macl = 0x00000004 > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x80000000 > > In the context of the helper_macw implementation prior to this > commit, initially this appears to be a surprising result. This is > because (prior to unary negation) the C literal `0x80000000` (due to > being outside the range of a `signed int`) is evaluated as an > `unsigned int` whereas the literal `1` (due to being inside the > range of `signed int`) is evaluated as `signed int`, as in: > > static_assert(1 < -0x80000000 == 1); > static_assert(1 < -1 == 0); > > This is because the unary negation of an unsigned int is an > unsigned int. So we could also fix this by getting the C literals right so that they are correctly the signed 64 bit values that the author intended, right? > In other words, if the `res < -0x80000000` comparison used > infinite-precision literals, the saturation mac.w result would have > been: > > mach = 0x00000000 > macl = 0x00000004 > > Due to this (forgivable) misunderstanding of C literals, the > following behavior also occurs: > > test 1: > (`2 * 3 + 0` is not an overflow) > > given, prior to saturation mac.l: > mach = 0x00000000 ; macl = 0x00000000 > @r0 = 0x0002 ; @r1 = 0x0003 > > expected saturation mac.w result: > mach = 0x00000000 (unchanged) > macl = 0x00000006 > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x80000000 > > test 2: > (mach should not be accumulated in saturation mode) > (16-bit operands are sign-extended) > > given, prior to saturation mac.l: > mach = 0x12345678 ; macl = 0x7ffffffe > @r0 = 0x0002 ; @r1 = 0xfffd > > expected saturation mac.w result: > mach = 0x12345678 (unchanged) > macl = 0x7ffffff8 > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x7fffffff > > test 3: > (macl should have the correct saturation value) > > given, prior to saturation mac.l: > mach = 0xabcdef12 ; macl = 0x7ffffffa > @r0 = 0x0002 ; @r1 = 0x0003 > > expected saturation mac.w result: > mach = 0x00000001 (overwritten) > macl = 0x7fffffff > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x80000000 > > All of the above also matches the description of MAC.W as documented > in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf > > Signed-off-by: Zack Buhman <zack@buhman.org> > --- > target/sh4/op_helper.c | 41 +++++++++++++++++++++++++++++++---------- > 1 file changed, 31 insertions(+), 10 deletions(-) > > diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c > index ee16524083..b3c1e69f53 100644 > --- a/target/sh4/op_helper.c > +++ b/target/sh4/op_helper.c > @@ -187,20 +187,41 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) > > void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1) > { > - int64_t res; > + int16_t value0 = (int16_t)arg0; > + int16_t value1 = (int16_t)arg1; > + int32_t mul = ((int32_t)value0) * ((int32_t)value1); > > - res = ((uint64_t) env->mach << 32) | env->macl; > - res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1; > - env->mach = (res >> 32) & 0xffffffff; > - env->macl = res & 0xffffffff; > + /* Perform 32-bit saturation arithmetic if the S flag is set */ > if (env->sr & (1u << SR_S)) { > - if (res < -0x80000000) { > - env->mach = 1; > - env->macl = 0x80000000; > - } else if (res > 0x000000007fffffff) { > + const int64_t upper_bound = ((1ul << 31) - 1); > + const int64_t lower_bound = -((1ul << 31) - 0); UL is usually the wrong suffix to use (and more generally, in QEMU the "long" type is rarely the right type, because it might be either 32 or 64 bits depending on the host). Either we know the value fits in 32 bits and we want a 32-bit type, in which case U is sufficient, or we want a 64-bit type, in which case we need ULL. > + > + /* > + * In saturation arithmetic mode, the accumulator is 32-bit > + * with carry. MACH is not considered during the addition > + * operation nor the 32-bit saturation logic. > + */ > + int32_t mac = env->macl; > + int32_t result; > + bool overflow = sadd32_overflow(mac, mul, &result); > + if (overflow) { > + result = (mac < 0) ? lower_bound : upper_bound; > + /* MACH is set to 1 to denote overflow */ > + env->macl = result; > env->mach = 1; > - env->macl = 0x7fffffff; > + } else { > + result = MIN(MAX(result, lower_bound), upper_bound); Maybe I'm confused, but result is an int32_t, so when can it be lower than lower_bound or higher than upper_bound ? > + /* If there was no overflow, MACH is unchanged */ > + env->macl = result; > } > + } else { > + /* In non-saturation arithmetic mode, the accumulator is 64-bit */ > + int64_t mac = (((uint64_t)env->mach) << 32) | env->macl; > + > + /* The carry bit of the 64-bit addition is discarded */ > + int64_t result = mac + (int64_t)mul; > + env->macl = result; > + env->mach = result >> 32; > } > } > thanks -- PMM
Peter Maydell <peter.maydell@linaro.org> writes: > On Fri, 5 Apr 2024 at 08:55, Zack Buhman <zack@buhman.org> wrote: >> >> The saturation arithmetic logic in helper_macw is not correct. >> >> I tested and verified this behavior on a SH7091, the general pattern >> is a code sequence such as: >> >> sets >> >> mov.l _mach,r2 >> lds r2,mach >> mov.l _macl,r2 >> lds r2,macl >> >> mova _n,r0 >> mov r0,r1 >> mova _m,r0 >> mac.w @r0+,@r1+ >> >> _mach: .long 0xffffffff >> _macl: .long 0xfffffffe >> _m: .word 0x0002 >> .word 0 >> _n: .word 0x0003 >> .word 0 >> >> test 0: >> (mach should not be modified if an overflow did not occur) >> >> given, prior to saturation mac.l: >> mach = 0xffffffff ; macl = 0xfffffffe >> @r0 = 0x0002 ; @r1 = 0x0003 >> >> expected saturation mac.w result: >> mach = 0xffffffff (unchanged) >> macl = 0x00000004 >> >> qemu saturation mac.w result (before this commit): >> mach = 0x00000001 >> macl = 0x80000000 >> >> In the context of the helper_macw implementation prior to this >> commit, initially this appears to be a surprising result. This is >> because (prior to unary negation) the C literal `0x80000000` (due to >> being outside the range of a `signed int`) is evaluated as an >> `unsigned int` whereas the literal `1` (due to being inside the >> range of `signed int`) is evaluated as `signed int`, as in: >> >> static_assert(1 < -0x80000000 == 1); >> static_assert(1 < -1 == 0); >> >> This is because the unary negation of an unsigned int is an >> unsigned int. > > So we could also fix this by getting the C literals right > so that they are correctly the signed 64 bit values that > the author intended, right? Making -0x80000000 signed as intended, as a standalone change without the other aspects of this patch, would not yield a completely correct mac.w implementation. For example, in saturation mode, the following logic does not exist prior to this patch: - MACH must not be overwritten if an overflow did not occur - MACH must not be included in the addition/accumulation operation (it is simply a 32-bit + 32-bit = 33-bit addition of MACL and the multiplication result) > >> In other words, if the `res < -0x80000000` comparison used >> infinite-precision literals, the saturation mac.w result would have >> been: >> >> mach = 0x00000000 >> macl = 0x00000004 >> >> Due to this (forgivable) misunderstanding of C literals, the >> following behavior also occurs: >> >> test 1: >> (`2 * 3 + 0` is not an overflow) >> >> given, prior to saturation mac.l: >> mach = 0x00000000 ; macl = 0x00000000 >> @r0 = 0x0002 ; @r1 = 0x0003 >> >> expected saturation mac.w result: >> mach = 0x00000000 (unchanged) >> macl = 0x00000006 >> >> qemu saturation mac.w result (before this commit): >> mach = 0x00000001 >> macl = 0x80000000 >> >> test 2: >> (mach should not be accumulated in saturation mode) >> (16-bit operands are sign-extended) >> >> given, prior to saturation mac.l: >> mach = 0x12345678 ; macl = 0x7ffffffe >> @r0 = 0x0002 ; @r1 = 0xfffd >> >> expected saturation mac.w result: >> mach = 0x12345678 (unchanged) >> macl = 0x7ffffff8 >> >> qemu saturation mac.w result (before this commit): >> mach = 0x00000001 >> macl = 0x7fffffff >> >> test 3: >> (macl should have the correct saturation value) >> >> given, prior to saturation mac.l: >> mach = 0xabcdef12 ; macl = 0x7ffffffa >> @r0 = 0x0002 ; @r1 = 0x0003 >> >> expected saturation mac.w result: >> mach = 0x00000001 (overwritten) >> macl = 0x7fffffff >> >> qemu saturation mac.w result (before this commit): >> mach = 0x00000001 >> macl = 0x80000000 >> >> All of the above also matches the description of MAC.W as documented >> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf >> >> Signed-off-by: Zack Buhman <zack@buhman.org> >> --- >> target/sh4/op_helper.c | 41 +++++++++++++++++++++++++++++++---------- >> 1 file changed, 31 insertions(+), 10 deletions(-) >> >> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c >> index ee16524083..b3c1e69f53 100644 >> --- a/target/sh4/op_helper.c >> +++ b/target/sh4/op_helper.c >> @@ -187,20 +187,41 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) >> >> void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1) >> { >> - int64_t res; >> + int16_t value0 = (int16_t)arg0; >> + int16_t value1 = (int16_t)arg1; >> + int32_t mul = ((int32_t)value0) * ((int32_t)value1); >> >> - res = ((uint64_t) env->mach << 32) | env->macl; >> - res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1; >> - env->mach = (res >> 32) & 0xffffffff; >> - env->macl = res & 0xffffffff; >> + /* Perform 32-bit saturation arithmetic if the S flag is set */ >> if (env->sr & (1u << SR_S)) { >> - if (res < -0x80000000) { >> - env->mach = 1; >> - env->macl = 0x80000000; >> - } else if (res > 0x000000007fffffff) { >> + const int64_t upper_bound = ((1ul << 31) - 1); >> + const int64_t lower_bound = -((1ul << 31) - 0); > > UL is usually the wrong suffix to use (and more generally, > in QEMU the "long" type is rarely the right type, because > it might be either 32 or 64 bits depending on the host). > Either we know the value fits in 32 bits and we want a 32-bit > type, in which case U is sufficient, or we want a 64-bit type, > in which case we need ULL. > >> + >> + /* >> + * In saturation arithmetic mode, the accumulator is 32-bit >> + * with carry. MACH is not considered during the addition >> + * operation nor the 32-bit saturation logic. >> + */ >> + int32_t mac = env->macl; >> + int32_t result; >> + bool overflow = sadd32_overflow(mac, mul, &result); >> + if (overflow) { >> + result = (mac < 0) ? lower_bound : upper_bound; >> + /* MACH is set to 1 to denote overflow */ >> + env->macl = result; >> env->mach = 1; >> - env->macl = 0x7fffffff; >> + } else { >> + result = MIN(MAX(result, lower_bound), upper_bound); > > Maybe I'm confused, but result is an int32_t, so when can it > be lower than lower_bound or higher than upper_bound ? Correct, this MIN(MAX(...)) is a no-operation in this case. I will send [PATCH v2] with this removed and your other suggestions included. > >> + /* If there was no overflow, MACH is unchanged */ >> + env->macl = result; >> } >> + } else { >> + /* In non-saturation arithmetic mode, the accumulator is 64-bit */ >> + int64_t mac = (((uint64_t)env->mach) << 32) | env->macl; >> + >> + /* The carry bit of the 64-bit addition is discarded */ >> + int64_t result = mac + (int64_t)mul; >> + env->macl = result; >> + env->mach = result >> 32; >> } >> } >> > > thanks > -- PMM
The saturation arithmetic logic in helper_macw is not correct.
I tested and verified this behavior on a SH7091, the general pattern
is a code sequence such as:
sets
mov.l _mach,r2
lds r2,mach
mov.l _macl,r2
lds r2,macl
mova _n,r0
mov r0,r1
mova _m,r0
mac.w @r0+,@r1+
_mach: .long 0xffffffff
_macl: .long 0xfffffffe
_m: .word 0x0002
.word 0
_n: .word 0x0003
.word 0
test 0:
(mach should not be modified if an overflow did not occur)
given, prior to saturation mac.l:
mach = 0xffffffff ; macl = 0xfffffffe
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0xffffffff (unchanged)
macl = 0x00000004
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
In the context of the helper_macw implementation prior to this
commit, initially this appears to be a surprising result. This is
because (prior to unary negation) the C literal `0x80000000` (due to
being outside the range of a `signed int`) is evaluated as an
`unsigned int` whereas the literal `1` (due to being inside the
range of `signed int`) is evaluated as `signed int`, as in:
static_assert(1 < -0x80000000 == 1);
static_assert(1 < -1 == 0);
This is because the unary negation of an unsigned int is an
unsigned int.
In other words, if the `res < -0x80000000` comparison used
infinite-precision literals, the saturation mac.w result would have
been:
mach = 0x00000000
macl = 0x00000004
Due to this (forgivable) misunderstanding of C literals, the
following behavior also occurs:
test 1:
(`2 * 3 + 0` is not an overflow)
given, prior to saturation mac.l:
mach = 0x00000000 ; macl = 0x00000000
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0x00000000 (unchanged)
macl = 0x00000006
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
test 2:
(mach should not be accumulated in saturation mode)
(16-bit operands are sign-extended)
given, prior to saturation mac.l:
mach = 0x12345678 ; macl = 0x7ffffffe
@r0 = 0x0002 ; @r1 = 0xfffd
expected saturation mac.w result:
mach = 0x12345678 (unchanged)
macl = 0x7ffffff8
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x7fffffff
test 3:
(macl should have the correct saturation value)
given, prior to saturation mac.l:
mach = 0xabcdef12 ; macl = 0x7ffffffa
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0x00000001 (overwritten)
macl = 0x7fffffff
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
All of the above also matches the description of MAC.W as documented
in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
Signed-off-by: Zack Buhman <zack@buhman.org>
---
target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index ee16524083..07ff2cf53d 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -187,20 +187,45 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
{
- int64_t res;
+ int16_t value0 = (int16_t)arg0;
+ int16_t value1 = (int16_t)arg1;
+ int32_t mul = ((int32_t)value0) * ((int32_t)value1);
- res = ((uint64_t) env->mach << 32) | env->macl;
- res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
- env->mach = (res >> 32) & 0xffffffff;
- env->macl = res & 0xffffffff;
+ /* Perform 32-bit saturation arithmetic if the S flag is set */
if (env->sr & (1u << SR_S)) {
- if (res < -0x80000000) {
- env->mach = 1;
- env->macl = 0x80000000;
- } else if (res > 0x000000007fffffff) {
+ const int32_t upper_bound = ((1u << 31) - 1);
+ const int32_t lower_bound = -((1u << 31) - 0);
+
+ /*
+ * In saturation arithmetic mode, the accumulator is 32-bit
+ * with carry. MACH is not considered during the addition
+ * operation nor the 32-bit saturation logic.
+ */
+ int32_t mac = env->macl;
+ int32_t result;
+ bool overflow = sadd32_overflow(mac, mul, &result);
+ if (overflow) {
+ result = (mac < 0) ? lower_bound : upper_bound;
+ /* MACH is set to 1 to denote overflow */
+ env->macl = result;
env->mach = 1;
- env->macl = 0x7fffffff;
+ } else {
+ /*
+ * If there is no overflow, the result is already inside
+ * the saturation bounds.
+ *
+ * If there was no overflow, MACH is unchanged.
+ */
+ env->macl = result;
}
+ } else {
+ /* In non-saturation arithmetic mode, the accumulator is 64-bit */
+ int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
+
+ /* The carry bit of the 64-bit addition is discarded */
+ int64_t result = mac + (int64_t)mul;
+ env->macl = result;
+ env->mach = result >> 32;
}
}
--
2.41.0
On Sat, 06 Apr 2024 08:38:04 +0900, Zack Buhman wrote: > > The saturation arithmetic logic in helper_macw is not correct. > > I tested and verified this behavior on a SH7091, the general pattern > is a code sequence such as: > > sets > > mov.l _mach,r2 > lds r2,mach > mov.l _macl,r2 > lds r2,macl > > mova _n,r0 > mov r0,r1 > mova _m,r0 > mac.w @r0+,@r1+ > > _mach: .long 0xffffffff > _macl: .long 0xfffffffe > _m: .word 0x0002 > .word 0 > _n: .word 0x0003 > .word 0 > > test 0: > (mach should not be modified if an overflow did not occur) > > given, prior to saturation mac.l: > mach = 0xffffffff ; macl = 0xfffffffe > @r0 = 0x0002 ; @r1 = 0x0003 > > expected saturation mac.w result: > mach = 0xffffffff (unchanged) > macl = 0x00000004 > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x80000000 > > In the context of the helper_macw implementation prior to this > commit, initially this appears to be a surprising result. This is > because (prior to unary negation) the C literal `0x80000000` (due to > being outside the range of a `signed int`) is evaluated as an > `unsigned int` whereas the literal `1` (due to being inside the > range of `signed int`) is evaluated as `signed int`, as in: > > static_assert(1 < -0x80000000 == 1); > static_assert(1 < -1 == 0); > > This is because the unary negation of an unsigned int is an > unsigned int. > > In other words, if the `res < -0x80000000` comparison used > infinite-precision literals, the saturation mac.w result would have > been: > > mach = 0x00000000 > macl = 0x00000004 > > Due to this (forgivable) misunderstanding of C literals, the > following behavior also occurs: > > test 1: > (`2 * 3 + 0` is not an overflow) > > given, prior to saturation mac.l: > mach = 0x00000000 ; macl = 0x00000000 > @r0 = 0x0002 ; @r1 = 0x0003 > > expected saturation mac.w result: > mach = 0x00000000 (unchanged) > macl = 0x00000006 > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x80000000 > > test 2: > (mach should not be accumulated in saturation mode) > (16-bit operands are sign-extended) > > given, prior to saturation mac.l: > mach = 0x12345678 ; macl = 0x7ffffffe > @r0 = 0x0002 ; @r1 = 0xfffd > > expected saturation mac.w result: > mach = 0x12345678 (unchanged) > macl = 0x7ffffff8 > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x7fffffff > > test 3: > (macl should have the correct saturation value) > > given, prior to saturation mac.l: > mach = 0xabcdef12 ; macl = 0x7ffffffa > @r0 = 0x0002 ; @r1 = 0x0003 > > expected saturation mac.w result: > mach = 0x00000001 (overwritten) > macl = 0x7fffffff > > qemu saturation mac.w result (before this commit): > mach = 0x00000001 > macl = 0x80000000 > > All of the above also matches the description of MAC.W as documented > in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf > > Signed-off-by: Zack Buhman <zack@buhman.org> > --- > target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++---------- > 1 file changed, 35 insertions(+), 10 deletions(-) > > diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c > index ee16524083..07ff2cf53d 100644 > --- a/target/sh4/op_helper.c > +++ b/target/sh4/op_helper.c > @@ -187,20 +187,45 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) > > void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1) > { > - int64_t res; > + int16_t value0 = (int16_t)arg0; > + int16_t value1 = (int16_t)arg1; > + int32_t mul = ((int32_t)value0) * ((int32_t)value1); > > - res = ((uint64_t) env->mach << 32) | env->macl; > - res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1; > - env->mach = (res >> 32) & 0xffffffff; > - env->macl = res & 0xffffffff; > + /* Perform 32-bit saturation arithmetic if the S flag is set */ > if (env->sr & (1u << SR_S)) { > - if (res < -0x80000000) { > - env->mach = 1; > - env->macl = 0x80000000; > - } else if (res > 0x000000007fffffff) { > + const int32_t upper_bound = ((1u << 31) - 1); > + const int32_t lower_bound = -((1u << 31) - 0); > + > + /* > + * In saturation arithmetic mode, the accumulator is 32-bit > + * with carry. MACH is not considered during the addition > + * operation nor the 32-bit saturation logic. > + */ > + int32_t mac = env->macl; > + int32_t result; > + bool overflow = sadd32_overflow(mac, mul, &result); > + if (overflow) { > + result = (mac < 0) ? lower_bound : upper_bound; > + /* MACH is set to 1 to denote overflow */ > + env->macl = result; > env->mach = 1; > - env->macl = 0x7fffffff; > + } else { > + /* > + * If there is no overflow, the result is already inside > + * the saturation bounds. > + * > + * If there was no overflow, MACH is unchanged. > + */ > + env->macl = result; > } > + } else { > + /* In non-saturation arithmetic mode, the accumulator is 64-bit */ > + int64_t mac = (((uint64_t)env->mach) << 32) | env->macl; > + > + /* The carry bit of the 64-bit addition is discarded */ > + int64_t result = mac + (int64_t)mul; > + env->macl = result; > + env->mach = result >> 32; > } > } > > -- > 2.41.0 > Reviewd-by: Yoshinori Sato <ysato@users.sourceforge.jp> -- Yosinori Sato
© 2016 - 2024 Red Hat, Inc.