target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-)
The saturation arithmetic logic in helper_macw is not correct.
I tested and verified this behavior on a SH7091, the general pattern
is a code sequence such as:
sets
mov.l _mach,r2
lds r2,mach
mov.l _macl,r2
lds r2,macl
mova _n,r0
mov r0,r1
mova _m,r0
mac.w @r0+,@r1+
_mach: .long 0xffffffff
_macl: .long 0xfffffffe
_m: .word 0x0002
.word 0
_n: .word 0x0003
.word 0
test 0:
(mach should not be modified if an overflow did not occur)
given, prior to saturation mac.l:
mach = 0xffffffff ; macl = 0xfffffffe
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0xffffffff (unchanged)
macl = 0x00000004
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
In the context of the helper_macw implementation prior to this
commit, initially this appears to be a surprising result. This is
because (prior to unary negation) the C literal `0x80000000` (due to
being outside the range of a `signed int`) is evaluated as an
`unsigned int` whereas the literal `1` (due to being inside the
range of `signed int`) is evaluated as `signed int`, as in:
static_assert(1 < -0x80000000 == 1);
static_assert(1 < -1 == 0);
This is because the unary negation of an unsigned int is an
unsigned int.
In other words, if the `res < -0x80000000` comparison used
infinite-precision literals, the saturation mac.w result would have
been:
mach = 0x00000000
macl = 0x00000004
Due to this (forgivable) misunderstanding of C literals, the
following behavior also occurs:
test 1:
(`2 * 3 + 0` is not an overflow)
given, prior to saturation mac.l:
mach = 0x00000000 ; macl = 0x00000000
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0x00000000 (unchanged)
macl = 0x00000006
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
test 2:
(mach should not be accumulated in saturation mode)
(16-bit operands are sign-extended)
given, prior to saturation mac.l:
mach = 0x12345678 ; macl = 0x7ffffffe
@r0 = 0x0002 ; @r1 = 0xfffd
expected saturation mac.w result:
mach = 0x12345678 (unchanged)
macl = 0x7ffffff8
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x7fffffff
test 3:
(macl should have the correct saturation value)
given, prior to saturation mac.l:
mach = 0xabcdef12 ; macl = 0x7ffffffa
@r0 = 0x0002 ; @r1 = 0x0003
expected saturation mac.w result:
mach = 0x00000001 (overwritten)
macl = 0x7fffffff
qemu saturation mac.w result (before this commit):
mach = 0x00000001
macl = 0x80000000
All of the above also matches the description of MAC.W as documented
in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
Signed-off-by: Zack Buhman <zack@buhman.org>
---
target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index ee16524083..07ff2cf53d 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -187,20 +187,45 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
{
- int64_t res;
+ int16_t value0 = (int16_t)arg0;
+ int16_t value1 = (int16_t)arg1;
+ int32_t mul = ((int32_t)value0) * ((int32_t)value1);
- res = ((uint64_t) env->mach << 32) | env->macl;
- res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
- env->mach = (res >> 32) & 0xffffffff;
- env->macl = res & 0xffffffff;
+ /* Perform 32-bit saturation arithmetic if the S flag is set */
if (env->sr & (1u << SR_S)) {
- if (res < -0x80000000) {
- env->mach = 1;
- env->macl = 0x80000000;
- } else if (res > 0x000000007fffffff) {
+ const int32_t upper_bound = ((1u << 31) - 1);
+ const int32_t lower_bound = -((1u << 31) - 0);
+
+ /*
+ * In saturation arithmetic mode, the accumulator is 32-bit
+ * with carry. MACH is not considered during the addition
+ * operation nor the 32-bit saturation logic.
+ */
+ int32_t mac = env->macl;
+ int32_t result;
+ bool overflow = sadd32_overflow(mac, mul, &result);
+ if (overflow) {
+ result = (mac < 0) ? lower_bound : upper_bound;
+ /* MACH is set to 1 to denote overflow */
+ env->macl = result;
env->mach = 1;
- env->macl = 0x7fffffff;
+ } else {
+ /*
+ * If there is no overflow, the result is already inside
+ * the saturation bounds.
+ *
+ * If there was no overflow, MACH is unchanged.
+ */
+ env->macl = result;
}
+ } else {
+ /* In non-saturation arithmetic mode, the accumulator is 64-bit */
+ int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
+
+ /* The carry bit of the 64-bit addition is discarded */
+ int64_t result = mac + (int64_t)mul;
+ env->macl = result;
+ env->mach = result >> 32;
}
}
--
2.41.0
On Sat, 06 Apr 2024 08:38:04 +0900,
Zack Buhman wrote:
>
> The saturation arithmetic logic in helper_macw is not correct.
>
> I tested and verified this behavior on a SH7091, the general pattern
> is a code sequence such as:
>
> sets
>
> mov.l _mach,r2
> lds r2,mach
> mov.l _macl,r2
> lds r2,macl
>
> mova _n,r0
> mov r0,r1
> mova _m,r0
> mac.w @r0+,@r1+
>
> _mach: .long 0xffffffff
> _macl: .long 0xfffffffe
> _m: .word 0x0002
> .word 0
> _n: .word 0x0003
> .word 0
>
> test 0:
> (mach should not be modified if an overflow did not occur)
>
> given, prior to saturation mac.l:
> mach = 0xffffffff ; macl = 0xfffffffe
> @r0 = 0x0002 ; @r1 = 0x0003
>
> expected saturation mac.w result:
> mach = 0xffffffff (unchanged)
> macl = 0x00000004
>
> qemu saturation mac.w result (before this commit):
> mach = 0x00000001
> macl = 0x80000000
>
> In the context of the helper_macw implementation prior to this
> commit, initially this appears to be a surprising result. This is
> because (prior to unary negation) the C literal `0x80000000` (due to
> being outside the range of a `signed int`) is evaluated as an
> `unsigned int` whereas the literal `1` (due to being inside the
> range of `signed int`) is evaluated as `signed int`, as in:
>
> static_assert(1 < -0x80000000 == 1);
> static_assert(1 < -1 == 0);
>
> This is because the unary negation of an unsigned int is an
> unsigned int.
>
> In other words, if the `res < -0x80000000` comparison used
> infinite-precision literals, the saturation mac.w result would have
> been:
>
> mach = 0x00000000
> macl = 0x00000004
>
> Due to this (forgivable) misunderstanding of C literals, the
> following behavior also occurs:
>
> test 1:
> (`2 * 3 + 0` is not an overflow)
>
> given, prior to saturation mac.l:
> mach = 0x00000000 ; macl = 0x00000000
> @r0 = 0x0002 ; @r1 = 0x0003
>
> expected saturation mac.w result:
> mach = 0x00000000 (unchanged)
> macl = 0x00000006
>
> qemu saturation mac.w result (before this commit):
> mach = 0x00000001
> macl = 0x80000000
>
> test 2:
> (mach should not be accumulated in saturation mode)
> (16-bit operands are sign-extended)
>
> given, prior to saturation mac.l:
> mach = 0x12345678 ; macl = 0x7ffffffe
> @r0 = 0x0002 ; @r1 = 0xfffd
>
> expected saturation mac.w result:
> mach = 0x12345678 (unchanged)
> macl = 0x7ffffff8
>
> qemu saturation mac.w result (before this commit):
> mach = 0x00000001
> macl = 0x7fffffff
>
> test 3:
> (macl should have the correct saturation value)
>
> given, prior to saturation mac.l:
> mach = 0xabcdef12 ; macl = 0x7ffffffa
> @r0 = 0x0002 ; @r1 = 0x0003
>
> expected saturation mac.w result:
> mach = 0x00000001 (overwritten)
> macl = 0x7fffffff
>
> qemu saturation mac.w result (before this commit):
> mach = 0x00000001
> macl = 0x80000000
>
> All of the above also matches the description of MAC.W as documented
> in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf
>
> Signed-off-by: Zack Buhman <zack@buhman.org>
> ---
> target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++----------
> 1 file changed, 35 insertions(+), 10 deletions(-)
>
> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
> index ee16524083..07ff2cf53d 100644
> --- a/target/sh4/op_helper.c
> +++ b/target/sh4/op_helper.c
> @@ -187,20 +187,45 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
>
> void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
> {
> - int64_t res;
> + int16_t value0 = (int16_t)arg0;
> + int16_t value1 = (int16_t)arg1;
> + int32_t mul = ((int32_t)value0) * ((int32_t)value1);
>
> - res = ((uint64_t) env->mach << 32) | env->macl;
> - res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
> - env->mach = (res >> 32) & 0xffffffff;
> - env->macl = res & 0xffffffff;
> + /* Perform 32-bit saturation arithmetic if the S flag is set */
> if (env->sr & (1u << SR_S)) {
> - if (res < -0x80000000) {
> - env->mach = 1;
> - env->macl = 0x80000000;
> - } else if (res > 0x000000007fffffff) {
> + const int32_t upper_bound = ((1u << 31) - 1);
> + const int32_t lower_bound = -((1u << 31) - 0);
> +
> + /*
> + * In saturation arithmetic mode, the accumulator is 32-bit
> + * with carry. MACH is not considered during the addition
> + * operation nor the 32-bit saturation logic.
> + */
> + int32_t mac = env->macl;
> + int32_t result;
> + bool overflow = sadd32_overflow(mac, mul, &result);
> + if (overflow) {
> + result = (mac < 0) ? lower_bound : upper_bound;
> + /* MACH is set to 1 to denote overflow */
> + env->macl = result;
> env->mach = 1;
> - env->macl = 0x7fffffff;
> + } else {
> + /*
> + * If there is no overflow, the result is already inside
> + * the saturation bounds.
> + *
> + * If there was no overflow, MACH is unchanged.
> + */
> + env->macl = result;
> }
> + } else {
> + /* In non-saturation arithmetic mode, the accumulator is 64-bit */
> + int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
> +
> + /* The carry bit of the 64-bit addition is discarded */
> + int64_t result = mac + (int64_t)mul;
> + env->macl = result;
> + env->mach = result >> 32;
> }
> }
>
> --
> 2.41.0
>
Reviewd-by: Yoshinori Sato <ysato@users.sourceforge.jp>
--
Yosinori Sato
© 2016 - 2025 Red Hat, Inc.