[PATCH v2] sh4: mac.w: implement saturation arithmetic logic

Zack Buhman posted 1 patch 3 weeks, 5 days ago
target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 10 deletions(-)
[PATCH v2] sh4: mac.w: implement saturation arithmetic logic
Posted by Zack Buhman 3 weeks, 5 days ago
The saturation arithmetic logic in helper_macw is not correct.

I tested and verified this behavior on a SH7091, the general pattern
is a code sequence such as:

	sets

	mov.l _mach,r2
	lds r2,mach
	mov.l _macl,r2
	lds r2,macl

	mova _n,r0
	mov r0,r1
	mova _m,r0
	mac.w @r0+,@r1+

 _mach: .long 0xffffffff
 _macl: .long 0xfffffffe
 _m:    .word 0x0002
        .word 0
 _n:    .word 0x0003
        .word 0

test 0:
  (mach should not be modified if an overflow did not occur)

  given, prior to saturation mac.l:
    mach = 0xffffffff ; macl = 0xfffffffe
    @r0  = 0x0002     ; @r1  = 0x0003

  expected saturation mac.w result:
    mach = 0xffffffff (unchanged)
    macl = 0x00000004

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x80000000

  In the context of the helper_macw implementation prior to this
  commit, initially this appears to be a surprising result. This is
  because (prior to unary negation) the C literal `0x80000000` (due to
  being outside the range of a `signed int`) is evaluated as an
  `unsigned int` whereas the literal `1` (due to being inside the
  range of `signed int`) is evaluated as `signed int`, as in:

    static_assert(1 < -0x80000000 == 1);
    static_assert(1 < -1 == 0);

  This is because the unary negation of an unsigned int is an
  unsigned int.

  In other words, if the `res < -0x80000000` comparison used
  infinite-precision literals, the saturation mac.w result would have
  been:

    mach = 0x00000000
    macl = 0x00000004

  Due to this (forgivable) misunderstanding of C literals, the
  following behavior also occurs:

test 1:
  (`2 * 3 + 0` is not an overflow)

  given, prior to saturation mac.l:
    mach = 0x00000000 ; macl = 0x00000000
    @r0  = 0x0002     ; @r1  = 0x0003

  expected saturation mac.w result:
    mach = 0x00000000 (unchanged)
    macl = 0x00000006

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x80000000

test 2:
  (mach should not be accumulated in saturation mode)
  (16-bit operands are sign-extended)

  given, prior to saturation mac.l:
    mach = 0x12345678 ; macl = 0x7ffffffe
    @r0  = 0x0002     ; @r1  = 0xfffd

  expected saturation mac.w result:
    mach = 0x12345678 (unchanged)
    macl = 0x7ffffff8

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x7fffffff

test 3:
  (macl should have the correct saturation value)

  given, prior to saturation mac.l:
    mach = 0xabcdef12 ; macl = 0x7ffffffa
    @r0  = 0x0002     ; @r1  = 0x0003

  expected saturation mac.w result:
    mach = 0x00000001 (overwritten)
    macl = 0x7fffffff

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x80000000

All of the above also matches the description of MAC.W as documented
in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf

Signed-off-by: Zack Buhman <zack@buhman.org>
---
 target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index ee16524083..07ff2cf53d 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -187,20 +187,45 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 
 void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
-    int64_t res;
+    int16_t value0 = (int16_t)arg0;
+    int16_t value1 = (int16_t)arg1;
+    int32_t mul = ((int32_t)value0) * ((int32_t)value1);
 
-    res = ((uint64_t) env->mach << 32) | env->macl;
-    res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
-    env->mach = (res >> 32) & 0xffffffff;
-    env->macl = res & 0xffffffff;
+    /* Perform 32-bit saturation arithmetic if the S flag is set */
     if (env->sr & (1u << SR_S)) {
-        if (res < -0x80000000) {
-            env->mach = 1;
-            env->macl = 0x80000000;
-        } else if (res > 0x000000007fffffff) {
+        const int32_t upper_bound =  ((1u << 31) - 1);
+        const int32_t lower_bound = -((1u << 31) - 0);
+
+        /*
+         * In saturation arithmetic mode, the accumulator is 32-bit
+         * with carry. MACH is not considered during the addition
+         * operation nor the 32-bit saturation logic.
+         */
+        int32_t mac = env->macl;
+        int32_t result;
+        bool overflow = sadd32_overflow(mac, mul, &result);
+        if (overflow) {
+            result = (mac < 0) ? lower_bound : upper_bound;
+            /* MACH is set to 1 to denote overflow */
+            env->macl = result;
             env->mach = 1;
-            env->macl = 0x7fffffff;
+        } else {
+            /*
+             * If there is no overflow, the result is already inside
+             * the saturation bounds.
+             *
+             * If there was no overflow, MACH is unchanged.
+             */
+            env->macl = result;
         }
+    } else {
+        /* In non-saturation arithmetic mode, the accumulator is 64-bit */
+        int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
+
+        /* The carry bit of the 64-bit addition is discarded */
+        int64_t result = mac + (int64_t)mul;
+        env->macl = result;
+        env->mach = result >> 32;
     }
 }
 
-- 
2.41.0
Re: [PATCH v2] sh4: mac.w: implement saturation arithmetic logic
Posted by Yoshinori Sato 3 weeks, 3 days ago
On Sat, 06 Apr 2024 08:38:04 +0900,
Zack Buhman wrote:
> 
> The saturation arithmetic logic in helper_macw is not correct.
> 
> I tested and verified this behavior on a SH7091, the general pattern
> is a code sequence such as:
> 
> 	sets
> 
> 	mov.l _mach,r2
> 	lds r2,mach
> 	mov.l _macl,r2
> 	lds r2,macl
> 
> 	mova _n,r0
> 	mov r0,r1
> 	mova _m,r0
> 	mac.w @r0+,@r1+
> 
>  _mach: .long 0xffffffff
>  _macl: .long 0xfffffffe
>  _m:    .word 0x0002
>         .word 0
>  _n:    .word 0x0003
>         .word 0
> 
> test 0:
>   (mach should not be modified if an overflow did not occur)
> 
>   given, prior to saturation mac.l:
>     mach = 0xffffffff ; macl = 0xfffffffe
>     @r0  = 0x0002     ; @r1  = 0x0003
> 
>   expected saturation mac.w result:
>     mach = 0xffffffff (unchanged)
>     macl = 0x00000004
> 
>   qemu saturation mac.w result (before this commit):
>     mach = 0x00000001
>     macl = 0x80000000
> 
>   In the context of the helper_macw implementation prior to this
>   commit, initially this appears to be a surprising result. This is
>   because (prior to unary negation) the C literal `0x80000000` (due to
>   being outside the range of a `signed int`) is evaluated as an
>   `unsigned int` whereas the literal `1` (due to being inside the
>   range of `signed int`) is evaluated as `signed int`, as in:
> 
>     static_assert(1 < -0x80000000 == 1);
>     static_assert(1 < -1 == 0);
> 
>   This is because the unary negation of an unsigned int is an
>   unsigned int.
> 
>   In other words, if the `res < -0x80000000` comparison used
>   infinite-precision literals, the saturation mac.w result would have
>   been:
> 
>     mach = 0x00000000
>     macl = 0x00000004
> 
>   Due to this (forgivable) misunderstanding of C literals, the
>   following behavior also occurs:
> 
> test 1:
>   (`2 * 3 + 0` is not an overflow)
> 
>   given, prior to saturation mac.l:
>     mach = 0x00000000 ; macl = 0x00000000
>     @r0  = 0x0002     ; @r1  = 0x0003
> 
>   expected saturation mac.w result:
>     mach = 0x00000000 (unchanged)
>     macl = 0x00000006
> 
>   qemu saturation mac.w result (before this commit):
>     mach = 0x00000001
>     macl = 0x80000000
> 
> test 2:
>   (mach should not be accumulated in saturation mode)
>   (16-bit operands are sign-extended)
> 
>   given, prior to saturation mac.l:
>     mach = 0x12345678 ; macl = 0x7ffffffe
>     @r0  = 0x0002     ; @r1  = 0xfffd
> 
>   expected saturation mac.w result:
>     mach = 0x12345678 (unchanged)
>     macl = 0x7ffffff8
> 
>   qemu saturation mac.w result (before this commit):
>     mach = 0x00000001
>     macl = 0x7fffffff
> 
> test 3:
>   (macl should have the correct saturation value)
> 
>   given, prior to saturation mac.l:
>     mach = 0xabcdef12 ; macl = 0x7ffffffa
>     @r0  = 0x0002     ; @r1  = 0x0003
> 
>   expected saturation mac.w result:
>     mach = 0x00000001 (overwritten)
>     macl = 0x7fffffff
> 
>   qemu saturation mac.w result (before this commit):
>     mach = 0x00000001
>     macl = 0x80000000
> 
> All of the above also matches the description of MAC.W as documented
> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
> 
> Signed-off-by: Zack Buhman <zack@buhman.org>
> ---
>  target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
>  1 file changed, 35 insertions(+), 10 deletions(-)
> 
> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
> index ee16524083..07ff2cf53d 100644
> --- a/target/sh4/op_helper.c
> +++ b/target/sh4/op_helper.c
> @@ -187,20 +187,45 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
>  
>  void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
>  {
> -    int64_t res;
> +    int16_t value0 = (int16_t)arg0;
> +    int16_t value1 = (int16_t)arg1;
> +    int32_t mul = ((int32_t)value0) * ((int32_t)value1);
>  
> -    res = ((uint64_t) env->mach << 32) | env->macl;
> -    res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
> -    env->mach = (res >> 32) & 0xffffffff;
> -    env->macl = res & 0xffffffff;
> +    /* Perform 32-bit saturation arithmetic if the S flag is set */
>      if (env->sr & (1u << SR_S)) {
> -        if (res < -0x80000000) {
> -            env->mach = 1;
> -            env->macl = 0x80000000;
> -        } else if (res > 0x000000007fffffff) {
> +        const int32_t upper_bound =  ((1u << 31) - 1);
> +        const int32_t lower_bound = -((1u << 31) - 0);
> +
> +        /*
> +         * In saturation arithmetic mode, the accumulator is 32-bit
> +         * with carry. MACH is not considered during the addition
> +         * operation nor the 32-bit saturation logic.
> +         */
> +        int32_t mac = env->macl;
> +        int32_t result;
> +        bool overflow = sadd32_overflow(mac, mul, &result);
> +        if (overflow) {
> +            result = (mac < 0) ? lower_bound : upper_bound;
> +            /* MACH is set to 1 to denote overflow */
> +            env->macl = result;
>              env->mach = 1;
> -            env->macl = 0x7fffffff;
> +        } else {
> +            /*
> +             * If there is no overflow, the result is already inside
> +             * the saturation bounds.
> +             *
> +             * If there was no overflow, MACH is unchanged.
> +             */
> +            env->macl = result;
>          }
> +    } else {
> +        /* In non-saturation arithmetic mode, the accumulator is 64-bit */
> +        int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
> +
> +        /* The carry bit of the 64-bit addition is discarded */
> +        int64_t result = mac + (int64_t)mul;
> +        env->macl = result;
> +        env->mach = result >> 32;
>      }
>  }
>  
> -- 
> 2.41.0
> 

Reviewd-by: Yoshinori Sato <ysato@users.sourceforge.jp>

-- 
Yosinori Sato