[PATCH] sh4: mac.l: implement saturation arithmetic logic

Zack Buhman posted 1 patch 3 weeks, 6 days ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20240404151100.24063-1-zack@buhman.org
Maintainers: Yoshinori Sato <ysato@users.sourceforge.jp>
There is a newer version of this series
target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 10 deletions(-)
[PATCH] sh4: mac.l: implement saturation arithmetic logic
Posted by Zack Buhman 3 weeks, 6 days ago
The saturation arithmetic logic in helper_macl is not correct.

I tested and verified this behavior on a SH7091, the general pattern
is a code sequence such as:

	sets

	mov.l _mach,r2
	lds r2,mach
	mov.l _macl,r2
	lds r2,macl

	mova _n,r0
	mov r0,r1
	mova _m,r0
	mac.l @r0+,@r1+

    _mach: .long 0x00007fff
    _macl: .long 0x12345678
    _m:    .long 0x7fffffff
    _n:    .long 0x7fffffff

Test case 0: (no int64_t overflow)
  given; prior to saturation mac.l:
    mach = 0x00007fff macl = 0x12345678
    @r0  = 0x7fffffff @r1  = 0x7fffffff

  expected saturation mac.l result:
    mach = 0x00007fff macl = 0xffffffff

  qemu saturation mac.l result (prior to this commit):
    mach = 0x00007ffe macl = 0x12345678

Test case 1: (no int64_t overflow)
  given; prior to saturation mac.l:
    mach = 0xffff8000 macl = 0x00000000
    @r0  = 0xffffffff @r1  = 0x00000001

  expected saturation mac.l result:
    mach = 0xffff8000 macl = 0x00000000

  qemu saturation mac.l result (prior to this commit):
    mach = 0xffff7fff macl = 0xffffffff

Test case 2: (int64_t addition overflow)
  given; prior to saturation mac.l:
    mach = 0x80000000 macl = 0x00000000
    @r0  = 0xffffffff @r1  = 0x00000001

  expected saturation mac.l result:
    mach = 0xffff8000 macl = 0x00000000

  qemu saturation mac.l result (prior to this commit):
    mach = 0xffff7fff macl = 0xffffffff

Test case 3: (int64_t addition overflow)
  given; prior to saturation mac.l:
    mach = 0x7fffffff macl = 0x00000000
    @r0 = 0x7fffffff @r1 = 0x7fffffff

  expected saturation mac.l result:
    mach = 0x00007fff macl = 0xffffffff

  qemu saturation mac.l result (prior to this commit):
    mach = 0xfffffffe macl = 0x00000001

All of the above also matches the description of MAC.L as documented
in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
---
 target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 4559d0d376..a3eb2f5281 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -160,18 +160,43 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
 
 void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
-    int64_t res;
-
-    res = ((uint64_t) env->mach << 32) | env->macl;
-    res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
-    env->mach = (res >> 32) & 0xffffffff;
-    env->macl = res & 0xffffffff;
+    int32_t value0 = (int32_t)arg0;
+    int32_t value1 = (int32_t)arg1;
+    int64_t mul = ((int64_t)value0) * ((int64_t)value1);
+    int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
+    int64_t result = mac + mul;
+    /* Perform 48-bit saturation arithmetic if the S flag is set */
     if (env->sr & (1u << SR_S)) {
-        if (res < 0)
-            env->mach |= 0xffff0000;
-        else
-            env->mach &= 0x00007fff;
+        /*
+         * The following xor/and expression is necessary to detect an
+         * overflow in MSB of res; this is logic necessary because the
+         * sign bit of `mac + mul` may overflow. The MAC unit on real
+         * SH-4 hardware has carry/saturation logic that is equivalent
+         * to the following:
+         */
+        const int64_t upper_bound =  ((1ull << 47) - 1);
+        const int64_t lower_bound = -((1ull << 47) - 0);
+
+        if (((((result ^ mac) & (result ^ mul)) >> 63) & 1) == 1) {
+            /* An overflow occured during 64-bit addition */
+            if (((mac >> 63) & 1) == 0) {
+                result = upper_bound;
+            } else {
+                result = lower_bound;
+            }
+        } else {
+            /* An overflow did not occur during 64-bit addition */
+            if (result > upper_bound) {
+                result = upper_bound;
+            } else if (result < lower_bound) {
+                result = lower_bound;
+            } else {
+                /* leave result unchanged */
+            }
+        }
     }
+    env->macl = result;
+    env->mach = result >> 32;
 }
 
 void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-- 
2.41.0
Re: [PATCH] sh4: mac.l: implement saturation arithmetic logic
Posted by Peter Maydell 3 weeks, 6 days ago
On Thu, 4 Apr 2024 at 16:12, Zack Buhman <zack@buhman.org> wrote:
>
> The saturation arithmetic logic in helper_macl is not correct.
>
> I tested and verified this behavior on a SH7091, the general pattern
> is a code sequence such as:
>
>         sets
>
>         mov.l _mach,r2
>         lds r2,mach
>         mov.l _macl,r2
>         lds r2,macl
>
>         mova _n,r0
>         mov r0,r1
>         mova _m,r0
>         mac.l @r0+,@r1+
>
>     _mach: .long 0x00007fff
>     _macl: .long 0x12345678
>     _m:    .long 0x7fffffff
>     _n:    .long 0x7fffffff
>
> Test case 0: (no int64_t overflow)
>   given; prior to saturation mac.l:
>     mach = 0x00007fff macl = 0x12345678
>     @r0  = 0x7fffffff @r1  = 0x7fffffff
>
>   expected saturation mac.l result:
>     mach = 0x00007fff macl = 0xffffffff
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0x00007ffe macl = 0x12345678
>
> Test case 1: (no int64_t overflow)
>   given; prior to saturation mac.l:
>     mach = 0xffff8000 macl = 0x00000000
>     @r0  = 0xffffffff @r1  = 0x00000001
>
>   expected saturation mac.l result:
>     mach = 0xffff8000 macl = 0x00000000
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0xffff7fff macl = 0xffffffff
>
> Test case 2: (int64_t addition overflow)
>   given; prior to saturation mac.l:
>     mach = 0x80000000 macl = 0x00000000
>     @r0  = 0xffffffff @r1  = 0x00000001
>
>   expected saturation mac.l result:
>     mach = 0xffff8000 macl = 0x00000000
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0xffff7fff macl = 0xffffffff
>
> Test case 3: (int64_t addition overflow)
>   given; prior to saturation mac.l:
>     mach = 0x7fffffff macl = 0x00000000
>     @r0 = 0x7fffffff @r1 = 0x7fffffff
>
>   expected saturation mac.l result:
>     mach = 0x00007fff macl = 0xffffffff
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0xfffffffe macl = 0x00000001
>
> All of the above also matches the description of MAC.L as documented
> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
> ---
>  target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
>  1 file changed, 35 insertions(+), 10 deletions(-)
>
> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
> index 4559d0d376..a3eb2f5281 100644
> --- a/target/sh4/op_helper.c
> +++ b/target/sh4/op_helper.c
> @@ -160,18 +160,43 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
>
>  void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
>  {
> -    int64_t res;
> -
> -    res = ((uint64_t) env->mach << 32) | env->macl;
> -    res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
> -    env->mach = (res >> 32) & 0xffffffff;
> -    env->macl = res & 0xffffffff;
> +    int32_t value0 = (int32_t)arg0;
> +    int32_t value1 = (int32_t)arg1;
> +    int64_t mul = ((int64_t)value0) * ((int64_t)value1);
> +    int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
> +    int64_t result = mac + mul;
> +    /* Perform 48-bit saturation arithmetic if the S flag is set */
>      if (env->sr & (1u << SR_S)) {
> -        if (res < 0)
> -            env->mach |= 0xffff0000;
> -        else
> -            env->mach &= 0x00007fff;
> +        /*
> +         * The following xor/and expression is necessary to detect an
> +         * overflow in MSB of res; this is logic necessary because the
> +         * sign bit of `mac + mul` may overflow. The MAC unit on real
> +         * SH-4 hardware has carry/saturation logic that is equivalent
> +         * to the following:
> +         */
> +        const int64_t upper_bound =  ((1ull << 47) - 1);
> +        const int64_t lower_bound = -((1ull << 47) - 0);
> +
> +        if (((((result ^ mac) & (result ^ mul)) >> 63) & 1) == 1) {
> +            /* An overflow occured during 64-bit addition */

This is testing whether the "int64_t result = mac + mul"
signed 64-bit arithmetic overflowed, right? That's probably
cleaner written by using the sadd64_overflow() function in
host-utils.h, which does the 64-bit add and returns a bool
to tell you whether it overflowed or not:

   if (sadd64_overflow(mac, mul, &result)) {
       result = (result < 0) ? lower_bound : upper_bound;
   } else {
       result = MIN(MAX(result, lower_bound), upper_bound);
   }



> +            if (((mac >> 63) & 1) == 0) {
> +                result = upper_bound;
> +            } else {
> +                result = lower_bound;
> +            }
> +        } else {
> +            /* An overflow did not occur during 64-bit addition */
> +            if (result > upper_bound) {
> +                result = upper_bound;
> +            } else if (result < lower_bound) {
> +                result = lower_bound;
> +            } else {
> +                /* leave result unchanged */
> +            }
> +        }
>      }
> +    env->macl = result;
> +    env->mach = result >> 32;

thanks
-- PMM
[PATCH v2] sh4: mac.l: implement saturation arithmetic logic
Posted by Zack Buhman 3 weeks, 6 days ago
The saturation arithmetic logic in helper_macl is not correct.

I tested and verified this behavior on a SH7091, the general pattern
is a code sequence such as:

	sets

	mov.l _mach,r2
	lds r2,mach
	mov.l _macl,r2
	lds r2,macl

	mova _n,r0
	mov r0,r1
	mova _m,r0
	mac.l @r0+,@r1+

    _mach: .long 0x00007fff
    _macl: .long 0x12345678
    _m:    .long 0x7fffffff
    _n:    .long 0x7fffffff

Test case 0: (no int64_t overflow)
  given; prior to saturation mac.l:
    mach = 0x00007fff macl = 0x12345678
    @r0  = 0x7fffffff @r1  = 0x7fffffff

  expected saturation mac.l result:
    mach = 0x00007fff macl = 0xffffffff

  qemu saturation mac.l result (prior to this commit):
    mach = 0x00007ffe macl = 0x12345678

Test case 1: (no int64_t overflow)
  given; prior to saturation mac.l:
    mach = 0xffff8000 macl = 0x00000000
    @r0  = 0xffffffff @r1  = 0x00000001

  expected saturation mac.l result:
    mach = 0xffff8000 macl = 0x00000000

  qemu saturation mac.l result (prior to this commit):
    mach = 0xffff7fff macl = 0xffffffff

Test case 2: (int64_t addition overflow)
  given; prior to saturation mac.l:
    mach = 0x80000000 macl = 0x00000000
    @r0  = 0xffffffff @r1  = 0x00000001

  expected saturation mac.l result:
    mach = 0xffff8000 macl = 0x00000000

  qemu saturation mac.l result (prior to this commit):
    mach = 0xffff7fff macl = 0xffffffff

Test case 3: (int64_t addition overflow)
  given; prior to saturation mac.l:
    mach = 0x7fffffff macl = 0x00000000
    @r0 = 0x7fffffff @r1 = 0x7fffffff

  expected saturation mac.l result:
    mach = 0x00007fff macl = 0xffffffff

  qemu saturation mac.l result (prior to this commit):
    mach = 0xfffffffe macl = 0x00000001

All of the above also matches the description of MAC.L as documented
in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
---
 target/sh4/op_helper.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 4559d0d376..ee16524083 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -160,18 +160,29 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
 
 void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
-    int64_t res;
-
-    res = ((uint64_t) env->mach << 32) | env->macl;
-    res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
-    env->mach = (res >> 32) & 0xffffffff;
-    env->macl = res & 0xffffffff;
+    int32_t value0 = (int32_t)arg0;
+    int32_t value1 = (int32_t)arg1;
+    int64_t mul = ((int64_t)value0) * ((int64_t)value1);
+    int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
+    int64_t result;
+    bool overflow = sadd64_overflow(mac, mul, &result);
+    /* Perform 48-bit saturation arithmetic if the S flag is set */
     if (env->sr & (1u << SR_S)) {
-        if (res < 0)
-            env->mach |= 0xffff0000;
-        else
-            env->mach &= 0x00007fff;
+        /*
+         * The sign bit of `mac + mul` may overflow. The MAC unit on
+         * real SH-4 hardware has equivalent carry/saturation logic:
+         */
+        const int64_t upper_bound =  ((1ull << 47) - 1);
+        const int64_t lower_bound = -((1ull << 47) - 0);
+
+        if (overflow) {
+            result = (mac < 0) ? lower_bound : upper_bound;
+        } else {
+            result = MIN(MAX(result, lower_bound), upper_bound);
+        }
     }
+    env->macl = result;
+    env->mach = result >> 32;
 }
 
 void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
-- 
2.41.0
Re: [PATCH v2] sh4: mac.l: implement saturation arithmetic logic
Posted by Richard Henderson 3 weeks, 5 days ago
On 4/4/24 06:26, Zack Buhman wrote:
> The saturation arithmetic logic in helper_macl is not correct.
> 
> I tested and verified this behavior on a SH7091, the general pattern
> is a code sequence such as:
> 
> 	sets
> 
> 	mov.l _mach,r2
> 	lds r2,mach
> 	mov.l _macl,r2
> 	lds r2,macl
> 
> 	mova _n,r0
> 	mov r0,r1
> 	mova _m,r0
> 	mac.l @r0+,@r1+
> 
>      _mach: .long 0x00007fff
>      _macl: .long 0x12345678
>      _m:    .long 0x7fffffff
>      _n:    .long 0x7fffffff
> 
> Test case 0: (no int64_t overflow)
>    given; prior to saturation mac.l:
>      mach = 0x00007fff macl = 0x12345678
>      @r0  = 0x7fffffff @r1  = 0x7fffffff
> 
>    expected saturation mac.l result:
>      mach = 0x00007fff macl = 0xffffffff
> 
>    qemu saturation mac.l result (prior to this commit):
>      mach = 0x00007ffe macl = 0x12345678
> 
> Test case 1: (no int64_t overflow)
>    given; prior to saturation mac.l:
>      mach = 0xffff8000 macl = 0x00000000
>      @r0  = 0xffffffff @r1  = 0x00000001
> 
>    expected saturation mac.l result:
>      mach = 0xffff8000 macl = 0x00000000
> 
>    qemu saturation mac.l result (prior to this commit):
>      mach = 0xffff7fff macl = 0xffffffff
> 
> Test case 2: (int64_t addition overflow)
>    given; prior to saturation mac.l:
>      mach = 0x80000000 macl = 0x00000000
>      @r0  = 0xffffffff @r1  = 0x00000001
> 
>    expected saturation mac.l result:
>      mach = 0xffff8000 macl = 0x00000000
> 
>    qemu saturation mac.l result (prior to this commit):
>      mach = 0xffff7fff macl = 0xffffffff
> 
> Test case 3: (int64_t addition overflow)
>    given; prior to saturation mac.l:
>      mach = 0x7fffffff macl = 0x00000000
>      @r0 = 0x7fffffff @r1 = 0x7fffffff
> 
>    expected saturation mac.l result:
>      mach = 0x00007fff macl = 0xffffffff
> 
>    qemu saturation mac.l result (prior to this commit):
>      mach = 0xfffffffe macl = 0x00000001
> 
> All of the above also matches the description of MAC.L as documented
> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
> ---
>   target/sh4/op_helper.c | 31 +++++++++++++++++++++----------
>   1 file changed, 21 insertions(+), 10 deletions(-)

Queued, thanks.


r~
Re: [PATCH v2] sh4: mac.l: implement saturation arithmetic logic
Posted by Peter Maydell 3 weeks, 6 days ago
On Thu, 4 Apr 2024 at 17:26, Zack Buhman <zack@buhman.org> wrote:
>
> The saturation arithmetic logic in helper_macl is not correct.
>
> I tested and verified this behavior on a SH7091, the general pattern
> is a code sequence such as:
>
>         sets
>
>         mov.l _mach,r2
>         lds r2,mach
>         mov.l _macl,r2
>         lds r2,macl
>
>         mova _n,r0
>         mov r0,r1
>         mova _m,r0
>         mac.l @r0+,@r1+
>
>     _mach: .long 0x00007fff
>     _macl: .long 0x12345678
>     _m:    .long 0x7fffffff
>     _n:    .long 0x7fffffff
>
> Test case 0: (no int64_t overflow)
>   given; prior to saturation mac.l:
>     mach = 0x00007fff macl = 0x12345678
>     @r0  = 0x7fffffff @r1  = 0x7fffffff
>
>   expected saturation mac.l result:
>     mach = 0x00007fff macl = 0xffffffff
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0x00007ffe macl = 0x12345678
>
> Test case 1: (no int64_t overflow)
>   given; prior to saturation mac.l:
>     mach = 0xffff8000 macl = 0x00000000
>     @r0  = 0xffffffff @r1  = 0x00000001
>
>   expected saturation mac.l result:
>     mach = 0xffff8000 macl = 0x00000000
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0xffff7fff macl = 0xffffffff
>
> Test case 2: (int64_t addition overflow)
>   given; prior to saturation mac.l:
>     mach = 0x80000000 macl = 0x00000000
>     @r0  = 0xffffffff @r1  = 0x00000001
>
>   expected saturation mac.l result:
>     mach = 0xffff8000 macl = 0x00000000
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0xffff7fff macl = 0xffffffff
>
> Test case 3: (int64_t addition overflow)
>   given; prior to saturation mac.l:
>     mach = 0x7fffffff macl = 0x00000000
>     @r0 = 0x7fffffff @r1 = 0x7fffffff
>
>   expected saturation mac.l result:
>     mach = 0x00007fff macl = 0xffffffff
>
>   qemu saturation mac.l result (prior to this commit):
>     mach = 0xfffffffe macl = 0x00000001
>
> All of the above also matches the description of MAC.L as documented
> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf

Hi. I just noticed that you didn't include a signed-off-by line
in your commit message. We need these as they're how you say
that you're legally OK to contribute this code to QEMU and
you're happy for it to go into the project:

https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
has links to what exactly this means, but basically the
requirement is that the last line of your commit message should be
"Signed-off-by: Your Name <your@email>"

In this case, if you just reply to this email with that, we
can pick it up and fix up the commit message when we apply the
patch.

> ---
>  target/sh4/op_helper.c | 31 +++++++++++++++++++++----------
>  1 file changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
> index 4559d0d376..ee16524083 100644
> --- a/target/sh4/op_helper.c
> +++ b/target/sh4/op_helper.c
> @@ -160,18 +160,29 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
>
>  void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
>  {
> -    int64_t res;
> -
> -    res = ((uint64_t) env->mach << 32) | env->macl;
> -    res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
> -    env->mach = (res >> 32) & 0xffffffff;
> -    env->macl = res & 0xffffffff;
> +    int32_t value0 = (int32_t)arg0;
> +    int32_t value1 = (int32_t)arg1;
> +    int64_t mul = ((int64_t)value0) * ((int64_t)value1);
> +    int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
> +    int64_t result;
> +    bool overflow = sadd64_overflow(mac, mul, &result);
> +    /* Perform 48-bit saturation arithmetic if the S flag is set */
>      if (env->sr & (1u << SR_S)) {
> -        if (res < 0)
> -            env->mach |= 0xffff0000;
> -        else
> -            env->mach &= 0x00007fff;
> +        /*
> +         * The sign bit of `mac + mul` may overflow. The MAC unit on
> +         * real SH-4 hardware has equivalent carry/saturation logic:
> +         */
> +        const int64_t upper_bound =  ((1ull << 47) - 1);
> +        const int64_t lower_bound = -((1ull << 47) - 0);
> +
> +        if (overflow) {
> +            result = (mac < 0) ? lower_bound : upper_bound;
> +        } else {
> +            result = MIN(MAX(result, lower_bound), upper_bound);
> +        }
>      }
> +    env->macl = result;
> +    env->mach = result >> 32;
>  }

I haven't checked the sh4 docs but the change looks right, so

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
Re: [PATCH v2] sh4: mac.l: implement saturation arithmetic logic
Posted by Philippe Mathieu-Daudé 3 weeks, 6 days ago
Hi Zack,

Cc'ing the maintainer of this file, Yoshinori:

$ ./scripts/get_maintainer.pl -f target/sh4/op_helper.c
Yoshinori Sato <ysato@users.sourceforge.jp> (reviewer:SH4 TCG CPUs)
(https://www.qemu.org/docs/master/devel/submitting-a-patch.html#cc-the-relevant-maintainer)

On 4/4/24 18:39, Peter Maydell wrote:
> On Thu, 4 Apr 2024 at 17:26, Zack Buhman <zack@buhman.org> wrote:
>>
>> The saturation arithmetic logic in helper_macl is not correct.
>>
>> I tested and verified this behavior on a SH7091, the general pattern
>> is a code sequence such as:
>>
>>          sets
>>
>>          mov.l _mach,r2
>>          lds r2,mach
>>          mov.l _macl,r2
>>          lds r2,macl
>>
>>          mova _n,r0
>>          mov r0,r1
>>          mova _m,r0
>>          mac.l @r0+,@r1+
>>
>>      _mach: .long 0x00007fff
>>      _macl: .long 0x12345678
>>      _m:    .long 0x7fffffff
>>      _n:    .long 0x7fffffff
>>
>> Test case 0: (no int64_t overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0x00007fff macl = 0x12345678
>>      @r0  = 0x7fffffff @r1  = 0x7fffffff
>>
>>    expected saturation mac.l result:
>>      mach = 0x00007fff macl = 0xffffffff
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0x00007ffe macl = 0x12345678
>>
>> Test case 1: (no int64_t overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0xffff8000 macl = 0x00000000
>>      @r0  = 0xffffffff @r1  = 0x00000001
>>
>>    expected saturation mac.l result:
>>      mach = 0xffff8000 macl = 0x00000000
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0xffff7fff macl = 0xffffffff
>>
>> Test case 2: (int64_t addition overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0x80000000 macl = 0x00000000
>>      @r0  = 0xffffffff @r1  = 0x00000001
>>
>>    expected saturation mac.l result:
>>      mach = 0xffff8000 macl = 0x00000000
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0xffff7fff macl = 0xffffffff
>>
>> Test case 3: (int64_t addition overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0x7fffffff macl = 0x00000000
>>      @r0 = 0x7fffffff @r1 = 0x7fffffff
>>
>>    expected saturation mac.l result:
>>      mach = 0x00007fff macl = 0xffffffff
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0xfffffffe macl = 0x00000001
>>
>> All of the above also matches the description of MAC.L as documented
>> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
> 
> Hi. I just noticed that you didn't include a signed-off-by line
> in your commit message. We need these as they're how you say
> that you're legally OK to contribute this code to QEMU and
> you're happy for it to go into the project:
> 
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> has links to what exactly this means, but basically the
> requirement is that the last line of your commit message should be
> "Signed-off-by: Your Name <your@email>"
> 
> In this case, if you just reply to this email with that, we
> can pick it up and fix up the commit message when we apply the
> patch.
> 
>> ---
>>   target/sh4/op_helper.c | 31 +++++++++++++++++++++----------
>>   1 file changed, 21 insertions(+), 10 deletions(-)
>>
>> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
>> index 4559d0d376..ee16524083 100644
>> --- a/target/sh4/op_helper.c
>> +++ b/target/sh4/op_helper.c
>> @@ -160,18 +160,29 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
>>
>>   void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
>>   {
>> -    int64_t res;
>> -
>> -    res = ((uint64_t) env->mach << 32) | env->macl;
>> -    res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
>> -    env->mach = (res >> 32) & 0xffffffff;
>> -    env->macl = res & 0xffffffff;
>> +    int32_t value0 = (int32_t)arg0;
>> +    int32_t value1 = (int32_t)arg1;
>> +    int64_t mul = ((int64_t)value0) * ((int64_t)value1);
>> +    int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
>> +    int64_t result;
>> +    bool overflow = sadd64_overflow(mac, mul, &result);
>> +    /* Perform 48-bit saturation arithmetic if the S flag is set */
>>       if (env->sr & (1u << SR_S)) {
>> -        if (res < 0)
>> -            env->mach |= 0xffff0000;
>> -        else
>> -            env->mach &= 0x00007fff;
>> +        /*
>> +         * The sign bit of `mac + mul` may overflow. The MAC unit on
>> +         * real SH-4 hardware has equivalent carry/saturation logic:
>> +         */
>> +        const int64_t upper_bound =  ((1ull << 47) - 1);
>> +        const int64_t lower_bound = -((1ull << 47) - 0);
>> +
>> +        if (overflow) {
>> +            result = (mac < 0) ? lower_bound : upper_bound;
>> +        } else {
>> +            result = MIN(MAX(result, lower_bound), upper_bound);
>> +        }
>>       }
>> +    env->macl = result;
>> +    env->mach = result >> 32;
>>   }
> 
> I haven't checked the sh4 docs but the change looks right, so
> 
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> 
> thanks
> -- PMM
>
Re: [PATCH v2] sh4: mac.l: implement saturation arithmetic logic
Posted by Zack Buhman 3 weeks, 6 days ago
Signed-off-by: Zack Buhman <zack@buhman.org>

----- Original message -----
From: "Philippe Mathieu-Daudé" <philmd@linaro.org>
To: Peter Maydell <peter.maydell@linaro.org>, Zack Buhman <zack@buhman.org>
Cc: qemu-devel@nongnu.org, Yoshinori Sato <ysato@users.sourceforge.jp>
Subject: Re: [PATCH v2] sh4: mac.l: implement saturation arithmetic logic
Date: Friday, April 05, 2024 1:26 AM

Hi Zack,

Cc'ing the maintainer of this file, Yoshinori:

$ ./scripts/get_maintainer.pl -f target/sh4/op_helper.c
Yoshinori Sato <ysato@users.sourceforge.jp> (reviewer:SH4 TCG CPUs)
(https://www.qemu.org/docs/master/devel/submitting-a-patch.html#cc-the-relevant-maintainer)

On 4/4/24 18:39, Peter Maydell wrote:
> On Thu, 4 Apr 2024 at 17:26, Zack Buhman <zack@buhman.org> wrote:
>>
>> The saturation arithmetic logic in helper_macl is not correct.
>>
>> I tested and verified this behavior on a SH7091, the general pattern
>> is a code sequence such as:
>>
>>          sets
>>
>>          mov.l _mach,r2
>>          lds r2,mach
>>          mov.l _macl,r2
>>          lds r2,macl
>>
>>          mova _n,r0
>>          mov r0,r1
>>          mova _m,r0
>>          mac.l @r0+,@r1+
>>
>>      _mach: .long 0x00007fff
>>      _macl: .long 0x12345678
>>      _m:    .long 0x7fffffff
>>      _n:    .long 0x7fffffff
>>
>> Test case 0: (no int64_t overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0x00007fff macl = 0x12345678
>>      @r0  = 0x7fffffff @r1  = 0x7fffffff
>>
>>    expected saturation mac.l result:
>>      mach = 0x00007fff macl = 0xffffffff
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0x00007ffe macl = 0x12345678
>>
>> Test case 1: (no int64_t overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0xffff8000 macl = 0x00000000
>>      @r0  = 0xffffffff @r1  = 0x00000001
>>
>>    expected saturation mac.l result:
>>      mach = 0xffff8000 macl = 0x00000000
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0xffff7fff macl = 0xffffffff
>>
>> Test case 2: (int64_t addition overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0x80000000 macl = 0x00000000
>>      @r0  = 0xffffffff @r1  = 0x00000001
>>
>>    expected saturation mac.l result:
>>      mach = 0xffff8000 macl = 0x00000000
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0xffff7fff macl = 0xffffffff
>>
>> Test case 3: (int64_t addition overflow)
>>    given; prior to saturation mac.l:
>>      mach = 0x7fffffff macl = 0x00000000
>>      @r0 = 0x7fffffff @r1 = 0x7fffffff
>>
>>    expected saturation mac.l result:
>>      mach = 0x00007fff macl = 0xffffffff
>>
>>    qemu saturation mac.l result (prior to this commit):
>>      mach = 0xfffffffe macl = 0x00000001
>>
>> All of the above also matches the description of MAC.L as documented
>> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
> 
> Hi. I just noticed that you didn't include a signed-off-by line
> in your commit message. We need these as they're how you say
> that you're legally OK to contribute this code to QEMU and
> you're happy for it to go into the project:
> 
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> has links to what exactly this means, but basically the
> requirement is that the last line of your commit message should be
> "Signed-off-by: Your Name <your@email>"
> 
> In this case, if you just reply to this email with that, we
> can pick it up and fix up the commit message when we apply the
> patch.
> 
>> ---
>>   target/sh4/op_helper.c | 31 +++++++++++++++++++++----------
>>   1 file changed, 21 insertions(+), 10 deletions(-)
>>
>> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
>> index 4559d0d376..ee16524083 100644
>> --- a/target/sh4/op_helper.c
>> +++ b/target/sh4/op_helper.c
>> @@ -160,18 +160,29 @@ void helper_ocbi(CPUSH4State *env, uint32_t address)
>>
>>   void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
>>   {
>> -    int64_t res;
>> -
>> -    res = ((uint64_t) env->mach << 32) | env->macl;
>> -    res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1;
>> -    env->mach = (res >> 32) & 0xffffffff;
>> -    env->macl = res & 0xffffffff;
>> +    int32_t value0 = (int32_t)arg0;
>> +    int32_t value1 = (int32_t)arg1;
>> +    int64_t mul = ((int64_t)value0) * ((int64_t)value1);
>> +    int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
>> +    int64_t result;
>> +    bool overflow = sadd64_overflow(mac, mul, &result);
>> +    /* Perform 48-bit saturation arithmetic if the S flag is set */
>>       if (env->sr & (1u << SR_S)) {
>> -        if (res < 0)
>> -            env->mach |= 0xffff0000;
>> -        else
>> -            env->mach &= 0x00007fff;
>> +        /*
>> +         * The sign bit of `mac + mul` may overflow. The MAC unit on
>> +         * real SH-4 hardware has equivalent carry/saturation logic:
>> +         */
>> +        const int64_t upper_bound =  ((1ull << 47) - 1);
>> +        const int64_t lower_bound = -((1ull << 47) - 0);
>> +
>> +        if (overflow) {
>> +            result = (mac < 0) ? lower_bound : upper_bound;
>> +        } else {
>> +            result = MIN(MAX(result, lower_bound), upper_bound);
>> +        }
>>       }
>> +    env->macl = result;
>> +    env->mach = result >> 32;
>>   }
> 
> I haven't checked the sh4 docs but the change looks right, so
> 
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> 
> thanks
> -- PMM
>