ARM instruction set does not have loads to 128-bit vector register (q-regs).
Instead it can read several consecutive 64-bit vector register (d-regs)
which is used by GCC to load 128-bit registers from memory.
For vector operations to work we need to detect such loads and transform them
into 128-bit loads to 128-bit temporaries.
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
target/arm/translate.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 90e14df..5bd0b1c 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4710,6 +4710,21 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
tcg_gen_addi_i32(addr, addr, 1 << size);
}
if (size == 3) {
+#ifdef TCG_TARGET_HAS_REG128
+ if (rd % 2 == 0 && nregs == 2) {
+ TCGv aa32addr = gen_aa32_addr(s, addr, MO_TE | MO_128);
+ /* 128-bit load */
+ if (load) {
+ tcg_gen_qemu_ld_v128(cpu_Q[rd / 2], aa32addr,
+ get_mem_index(s), MO_TE | MO_128);
+ } else {
+ tcg_gen_qemu_st_v128(cpu_Q[rd / 2], aa32addr,
+ get_mem_index(s), MO_TE | MO_128);
+ }
+ tcg_temp_free(aa32addr);
+ break;
+ }
+#endif
tmp64 = tcg_temp_new_i64();
if (load) {
gen_aa32_ld64(s, tmp64, addr, get_mem_index(s));
--
2.1.4