From nobody Thu Jul 3 16:07:37 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zoho.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1486046803645746.5789886661299; Thu, 2 Feb 2017 06:46:43 -0800 (PST) Received: from localhost ([::1]:57033 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cZIen-0006sQ-I4 for importer@patchew.org; Thu, 02 Feb 2017 09:46:41 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:35296) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cZIU3-0005MO-7z for qemu-devel@nongnu.org; Thu, 02 Feb 2017 09:35:37 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1cZITy-0005QP-LT for qemu-devel@nongnu.org; Thu, 02 Feb 2017 09:35:35 -0500 Received: from bran.ispras.ru ([83.149.199.196]:39804 helo=smtp.ispras.ru) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cZITy-0005Ow-9r for qemu-devel@nongnu.org; Thu, 02 Feb 2017 09:35:30 -0500 Received: from bulbul.intra.ispras.ru (spartak.intra.ispras.ru [10.10.3.51]) by smtp.ispras.ru (Postfix) with ESMTP id B182961786; Thu, 2 Feb 2017 17:35:29 +0300 (MSK) From: Kirill Batuzov To: qemu-devel@nongnu.org Date: Thu, 2 Feb 2017 17:34:51 +0300 Message-Id: <1486046099-17726-14-git-send-email-batuzovk@ispras.ru> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1486046099-17726-1-git-send-email-batuzovk@ispras.ru> References: <1486046099-17726-1-git-send-email-batuzovk@ispras.ru> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 83.149.199.196 Subject: [Qemu-devel] [PATCH v2.1 13/21] tcg/i386: support remaining vector addition operations X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Peter Maydell , Peter Crosthwaite , Kirill Batuzov , Paolo Bonzini , =?UTF-8?q?Alex=20Benn=C3=A9e?= , Richard Henderson Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Signed-off-by: Kirill Batuzov --- I believe checkpatch warning here to be false-positive. --- tcg/i386/tcg-target.h | 10 +++++++++ tcg/i386/tcg-target.inc.c | 54 +++++++++++++++++++++++++++++++++++++++++++= ++-- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 755ebaa..bd6cfe1 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -172,7 +172,17 @@ extern bool have_popcnt; #endif =20 #ifdef TCG_TARGET_HAS_REG128 +#define TCG_TARGET_HAS_add_i8x16 1 +#define TCG_TARGET_HAS_add_i16x8 1 #define TCG_TARGET_HAS_add_i32x4 1 +#define TCG_TARGET_HAS_add_i64x2 1 +#endif + +#ifdef TCG_TARGET_HAS_REGV64 +#define TCG_TARGET_HAS_add_i8x8 1 +#define TCG_TARGET_HAS_add_i16x4 1 +#define TCG_TARGET_HAS_add_i32x2 1 +#define TCG_TARGET_HAS_add_i64x1 1 #endif =20 #define TCG_TARGET_deposit_i32_valid(ofs, len) \ diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index 208bb81..d8f0d81 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -168,6 +168,11 @@ static bool have_lzcnt; #else # define have_lzcnt 0 #endif +#if defined(CONFIG_CPUID_H) && defined(bit_AVX) && defined(bit_OSXSAVE) +static bool have_avx; +#else +# define have_avx 0 +#endif =20 static tcg_insn_unit *tb_ret_addr; =20 @@ -393,7 +398,10 @@ static inline int tcg_target_const_match(tcg_target_lo= ng val, TCGType type, #define OPC_MOVQ_M2R (0x7e | P_SSE_F30F) #define OPC_MOVQ_R2M (0xd6 | P_SSE_660F) #define OPC_MOVQ_R2R (0x7e | P_SSE_F30F) +#define OPC_PADDB (0xfc | P_SSE_660F) +#define OPC_PADDW (0xfd | P_SSE_660F) #define OPC_PADDD (0xfe | P_SSE_660F) +#define OPC_PADDQ (0xd4 | P_SSE_660F) =20 /* Group 1 opcode extensions for 0x80-0x83. These are also used as modifiers for OPC_ARITH. */ @@ -1963,6 +1971,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpco= de opc, TCGArg a0, a1, a2; int c, const_a2, vexop, rexw =3D 0; =20 + static const int vect_binop[] =3D { + [INDEX_op_add_i8x16] =3D OPC_PADDB, + [INDEX_op_add_i16x8] =3D OPC_PADDW, + [INDEX_op_add_i32x4] =3D OPC_PADDD, + [INDEX_op_add_i64x2] =3D OPC_PADDQ, + + [INDEX_op_add_i8x8] =3D OPC_PADDB, + [INDEX_op_add_i16x4] =3D OPC_PADDW, + [INDEX_op_add_i32x2] =3D OPC_PADDD, + [INDEX_op_add_i64x1] =3D OPC_PADDQ, + }; + + #if TCG_TARGET_REG_BITS =3D=3D 64 # define OP_32_64(x) \ case glue(glue(INDEX_op_, x), _i64): \ @@ -1972,6 +1993,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpco= de opc, # define OP_32_64(x) \ case glue(glue(INDEX_op_, x), _i32) #endif +#define OP_V128_ALL(x) \ + case glue(glue(INDEX_op_, x), _i8x16): \ + case glue(glue(INDEX_op_, x), _i16x8): \ + case glue(glue(INDEX_op_, x), _i32x4): \ + case glue(glue(INDEX_op_, x), _i64x2) + +#define OP_V64_ALL(x) \ + case glue(glue(INDEX_op_, x), _i8x8): \ + case glue(glue(INDEX_op_, x), _i16x4): \ + case glue(glue(INDEX_op_, x), _i32x2): \ + case glue(glue(INDEX_op_, x), _i64x1) =20 /* Hoist the loads of the most common arguments. */ a0 =3D args[0]; @@ -2369,8 +2401,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpco= de opc, tcg_out_mb(s, a0); break; =20 - case INDEX_op_add_i32x4: - tcg_out_modrm(s, OPC_PADDD, args[0], args[2]); + OP_V128_ALL(add): + OP_V64_ALL(add): + if (have_avx) { + tcg_out_vex_modrm(s, vect_binop[opc], args[0], args[1], args[2= ]); + } else { + tcg_out_modrm(s, vect_binop[opc], args[0], args[2]); + } break; =20 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ @@ -2383,6 +2420,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcod= e opc, } =20 #undef OP_32_64 +#undef OP_V128_ALL +#undef OP_V64_ALL } =20 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) @@ -2613,7 +2652,14 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOp= code op) return &s2; } =20 + case INDEX_op_add_i8x16: + case INDEX_op_add_i16x8: case INDEX_op_add_i32x4: + case INDEX_op_add_i64x2: + case INDEX_op_add_i8x8: + case INDEX_op_add_i16x4: + case INDEX_op_add_i32x2: + case INDEX_op_add_i64x1: return &V_0_V; =20 default: @@ -2728,6 +2774,10 @@ static void tcg_target_init(TCGContext *s) #ifdef bit_POPCNT have_popcnt =3D (c & bit_POPCNT) !=3D 0; #endif +#if defined(bit_AVX) && defined(bit_OSXSAVE) + have_avx =3D (c & (bit_AVX | bit_OSXSAVE)) =3D=3D (bit_AVX | bit_O= SXSAVE); +#endif + } =20 if (max >=3D 7) { --=20 2.1.4