Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/812957/?format=api
{ "id": 812957, "url": "http://patchwork.ozlabs.org/api/patches/812957/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20170912162513.21694-7-richard.henderson@linaro.org/", "project": { "id": 14, "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api", "name": "QEMU Development", "link_name": "qemu-devel", "list_id": "qemu-devel.nongnu.org", "list_email": "qemu-devel@nongnu.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20170912162513.21694-7-richard.henderson@linaro.org>", "list_archive_url": null, "date": "2017-09-12T16:25:03", "name": "[v2,06/16] tcg: Add vector infrastructure and ops for add/sub/logic", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "09f3969fa7230de72e77e336b8c4eb1fba40f805", "submitter": { "id": 72104, "url": "http://patchwork.ozlabs.org/api/people/72104/?format=api", "name": "Richard Henderson", "email": "richard.henderson@linaro.org" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20170912162513.21694-7-richard.henderson@linaro.org/mbox/", "series": [ { "id": 2737, "url": "http://patchwork.ozlabs.org/api/series/2737/?format=api", "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=2737", "date": "2017-09-12T16:24:59", "name": "TCG vectorization and example conversion", "version": 2, "mbox": "http://patchwork.ozlabs.org/series/2737/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/812957/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/812957/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>", "X-Original-To": "incoming@patchwork.ozlabs.org", "Delivered-To": "patchwork-incoming@bilbo.ozlabs.org", "Authentication-Results": [ "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=nongnu.org\n\t(client-ip=2001:4830:134:3::11; helo=lists.gnu.org;\n\tenvelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n\treceiver=<UNKNOWN>)", "ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key;\n\tunprotected) header.d=linaro.org header.i=@linaro.org\n\theader.b=\"M7+F76lk\"; dkim-atps=neutral" ], "Received": [ "from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11])\n\t(using TLSv1 with cipher AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xs9Hq0Ykmz9s7f\n\tfor <incoming@patchwork.ozlabs.org>;\n\tWed, 13 Sep 2017 02:29:59 +1000 (AEST)", "from localhost ([::1]:36897 helo=lists.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.71) (envelope-from\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>)\n\tid 1dro4S-0005mf-Vr\n\tfor incoming@patchwork.ozlabs.org; Tue, 12 Sep 2017 12:29:57 -0400", "from eggs.gnu.org ([2001:4830:134:3::10]:37947)\n\tby lists.gnu.org with esmtp (Exim 4.71)\n\t(envelope-from <richard.henderson@linaro.org>) id 1dro09-0001kT-7j\n\tfor qemu-devel@nongnu.org; Tue, 12 Sep 2017 12:25:32 -0400", "from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)\n\t(envelope-from <richard.henderson@linaro.org>) id 1dro06-00072Z-4d\n\tfor qemu-devel@nongnu.org; Tue, 12 Sep 2017 12:25:29 -0400", "from mail-pf0-x236.google.com ([2607:f8b0:400e:c00::236]:33034)\n\tby eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16)\n\t(Exim 4.71) (envelope-from <richard.henderson@linaro.org>)\n\tid 1dro05-000722-QC\n\tfor qemu-devel@nongnu.org; Tue, 12 Sep 2017 12:25:26 -0400", "by mail-pf0-x236.google.com with SMTP id y29so18668673pff.0\n\tfor <qemu-devel@nongnu.org>; Tue, 12 Sep 2017 09:25:25 -0700 (PDT)", "from bigtime.twiddle.net (97-126-103-167.tukw.qwest.net.\n\t[97.126.103.167]) by smtp.gmail.com with ESMTPSA id\n\tb22sm20382140pfh.175.2017.09.12.09.25.23\n\t(version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256);\n\tTue, 12 Sep 2017 09:25:23 -0700 (PDT)" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google;\n\th=from:to:cc:subject:date:message-id:in-reply-to:references;\n\tbh=0JcS9EA47A/X+tvVNHzf7WGLdo5Pi/Td5FVLvgIbWrg=;\n\tb=M7+F76lkVtlzvLmbfmBSCr0lPcyK4xeh8aezYb9W9O0aK0lOV2rkJ0TzxhtNGTjx9e\n\tAbIFASZSaN44kmo4XUY9w7LekjbtG/+ErYL/clxlih852xYcrkQH3bwOnaoNINTbRUZP\n\tuZHb+MmztNbk3cxDi/dBD956gMZi9BlGCVlmQ=", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n\t:references;\n\tbh=0JcS9EA47A/X+tvVNHzf7WGLdo5Pi/Td5FVLvgIbWrg=;\n\tb=ltGoiNdtkULhpjTVcFo4vswCOk2fVG7OaIqOvYmA42gCUPbuhFjoQCEmy4mORxwgNe\n\tZxHS7GFEHdI8Yb6d4KJZ1chNK+GYS6sSeqQsjOYTZBN8d2d5C/RpnvrxLmW78G460aeN\n\tdla0SQ+PcN3sao0+8bzoqBRPD0h4XAepuRqzodYJtafkwj1G34kNkhuaV1QSerceScXa\n\thtgq3waoG7jmYP8qh2WSfcWf5Qeg5BGkiFfhDTHIPoz0VJz7xUQNh/B1OAVSJMQA3BPL\n\t/YXS87mOyAXlgCOWVSC43OT9P2THqFW0yGfHY8T8sInwBU/HsDTC8VVjw9GfvxF4bzTD\n\tezVA==", "X-Gm-Message-State": "AHPjjUi54S0nJ6NY+q+Gm3xpfUAV1RGMczokQtJvTPF3C6fXl/9EgftW\n\tukgjcLtSQrzHaI4iebku2Q==", "X-Google-Smtp-Source": "ADKCNb6Cl2/cmQ/FvWWU3ZZi619ZtYBI900nVUH62HuJYG0JfNRFX+vJwlf67/A4AkEn+gFR74hmUw==", "X-Received": "by 10.98.155.220 with SMTP id e89mr16134570pfk.120.1505233524206;\n\tTue, 12 Sep 2017 09:25:24 -0700 (PDT)", "From": "Richard Henderson <richard.henderson@linaro.org>", "To": "qemu-devel@nongnu.org", "Date": "Tue, 12 Sep 2017 09:25:03 -0700", "Message-Id": "<20170912162513.21694-7-richard.henderson@linaro.org>", "X-Mailer": "git-send-email 2.13.5", "In-Reply-To": "<20170912162513.21694-1-richard.henderson@linaro.org>", "References": "<20170912162513.21694-1-richard.henderson@linaro.org>", "X-detected-operating-system": "by eggs.gnu.org: Genre and OS details not\n\trecognized.", "X-Received-From": "2607:f8b0:400e:c00::236", "Subject": "[Qemu-devel] [PATCH v2 06/16] tcg: Add vector infrastructure and\n\tops for add/sub/logic", "X-BeenThere": "qemu-devel@nongnu.org", "X-Mailman-Version": "2.1.21", "Precedence": "list", "List-Id": "<qemu-devel.nongnu.org>", "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>", "List-Archive": "<http://lists.nongnu.org/archive/html/qemu-devel/>", "List-Post": "<mailto:qemu-devel@nongnu.org>", "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>", "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=subscribe>", "Cc": "alex.bennee@linaro.org, f4bug@amsat.org", "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org", "Sender": "\"Qemu-devel\"\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>" }, "content": "Signed-off-by: Richard Henderson <richard.henderson@linaro.org>\n---\n Makefile.target | 2 +-\n tcg/tcg-op-gvec.h | 61 ++++++\n tcg/tcg-runtime.h | 16 ++\n tcg/tcg.h | 2 +\n tcg/tcg-op-gvec.c | 489 +++++++++++++++++++++++++++++++++++++++++++++++++\n tcg/tcg-runtime-gvec.c | 192 +++++++++++++++++++\n tcg/tcg.c | 4 +-\n 7 files changed, 763 insertions(+), 3 deletions(-)\n create mode 100644 tcg/tcg-runtime-gvec.c", "diff": "diff --git a/Makefile.target b/Makefile.target\nindex e647b6e2cb..9eefe7cbd7 100644\n--- a/Makefile.target\n+++ b/Makefile.target\n@@ -95,7 +95,7 @@ obj-y += exec.o\n obj-y += accel/\n obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-gvec.o\n obj-$(CONFIG_TCG) += tcg/optimize.o tcg/tcg-common.o\n-obj-$(CONFIG_TCG) += tcg/tcg-runtime.o\n+obj-$(CONFIG_TCG) += tcg/tcg-runtime.o tcg/tcg-runtime-gvec.o\n obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o\n obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o\n obj-y += fpu/softfloat.o\ndiff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h\nindex affb7c2e89..11d04342b6 100644\n--- a/tcg/tcg-op-gvec.h\n+++ b/tcg/tcg-op-gvec.h\n@@ -41,3 +41,64 @@ typedef void (gen_helper_gvec_3_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr,\n void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,\n uint32_t data, gen_helper_gvec_3_ptr *fn);\n+\n+/* Expand a gvec operation. Either inline or out-of-line depending on\n+ the actual vector size and the operations supported by the host. */\n+typedef struct {\n+ /* \"Small\" sizes: expand inline as a 64-bit or 32-bit lane.\n+ Only one of these will be non-NULL. */\n+ void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);\n+ void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);\n+ /* Larger sizes: expand out-of-line helper w/descriptor. */\n+ gen_helper_gvec_3 *fno;\n+ /* Host vector operations. */\n+ TCGOpcode op_v64;\n+ TCGOpcode op_v128;\n+ TCGOpcode op_v256;\n+} GVecGen3;\n+\n+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz, const GVecGen3 *);\n+\n+/* Expand a specific vector operation. */\n+\n+#define DEF(X) \\\n+ void tcg_gen_gvec_##X(uint32_t dofs, uint32_t aofs, uint32_t bofs, \\\n+ uint32_t opsz, uint32_t clsz)\n+\n+DEF(add8);\n+DEF(add16);\n+DEF(add32);\n+DEF(add64);\n+\n+DEF(sub8);\n+DEF(sub16);\n+DEF(sub32);\n+DEF(sub64);\n+\n+DEF(and);\n+DEF(or);\n+DEF(xor);\n+DEF(andc);\n+DEF(orc);\n+\n+#undef DEF\n+\n+/*\n+ * 64-bit vector operations. Use these when the register has been allocated\n+ * with tcg_global_mem_new_i64, and so we cannot also address it via pointer.\n+ * OPRSZ = MAXSZ = 8.\n+ */\n+\n+#define DEF(X) \\\n+ void tcg_gen_vec_##X(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+\n+DEF(add8);\n+DEF(add16);\n+DEF(add32);\n+\n+DEF(sub8);\n+DEF(sub16);\n+DEF(sub32);\n+\n+#undef DEF\ndiff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h\nindex c41d38a557..befb0fa659 100644\n--- a/tcg/tcg-runtime.h\n+++ b/tcg/tcg-runtime.h\n@@ -134,3 +134,19 @@ GEN_ATOMIC_HELPERS(xor_fetch)\n GEN_ATOMIC_HELPERS(xchg)\n \n #undef GEN_ATOMIC_HELPERS\n+\n+DEF_HELPER_FLAGS_4(gvec_add8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_add16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_add32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_add64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+\n+DEF_HELPER_FLAGS_4(gvec_sub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_sub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_sub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_sub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+\n+DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_xor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_andc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_orc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\ndiff --git a/tcg/tcg.h b/tcg/tcg.h\nindex b81c67a754..37ad9fddab 100644\n--- a/tcg/tcg.h\n+++ b/tcg/tcg.h\n@@ -824,9 +824,11 @@ int tcg_global_mem_new_internal(TCGType, TCGv_ptr, intptr_t, const char *);\n TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name);\n TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name);\n \n+int tcg_temp_new_internal(TCGType type, int temp_local);\n TCGv_i32 tcg_temp_new_internal_i32(int temp_local);\n TCGv_i64 tcg_temp_new_internal_i64(int temp_local);\n \n+void tcg_temp_free_internal(int idx);\n void tcg_temp_free_i32(TCGv_i32 arg);\n void tcg_temp_free_i64(TCGv_i64 arg);\n \ndiff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c\nindex f48415020d..4b39617682 100644\n--- a/tcg/tcg-op-gvec.c\n+++ b/tcg/tcg-op-gvec.c\n@@ -24,6 +24,30 @@\n #include \"tcg-op-gvec.h\"\n #include \"tcg-gvec-desc.h\"\n \n+#define REP8(x) ((x) * 0x0101010101010101ull)\n+#define REP16(x) ((x) * 0x0001000100010001ull)\n+\n+#define MAX_UNROLL 4\n+\n+/* Verify vector size and alignment rules. OFS should be the OR of all\n+ of the operand offsets so that we can check them all at once. */\n+static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)\n+{\n+ uint32_t align = maxsz > 16 || oprsz >= 16 ? 15 : 7;\n+ tcg_debug_assert(oprsz > 0);\n+ tcg_debug_assert(oprsz <= maxsz);\n+ tcg_debug_assert((oprsz & align) == 0);\n+ tcg_debug_assert((maxsz & align) == 0);\n+ tcg_debug_assert((ofs & align) == 0);\n+}\n+\n+/* Verify vector overlap rules for three operands. */\n+static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s)\n+{\n+ tcg_debug_assert(d == a || d + s <= a || a + s <= d);\n+ tcg_debug_assert(d == b || d + s <= b || b + s <= d);\n+ tcg_debug_assert(a == b || a + s <= b || b + s <= a);\n+}\n \n /* Create a descriptor from components. */\n uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)\n@@ -91,3 +115,468 @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n tcg_temp_free_ptr(a2);\n tcg_temp_free_i32(desc);\n }\n+\n+/* Return true if we want to implement something of OPRSZ bytes\n+ in units of LNSZ. This limits the expansion of inline code. */\n+static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)\n+{\n+ uint32_t lnct = oprsz / lnsz;\n+ return lnct >= 1 && lnct <= MAX_UNROLL;\n+}\n+\n+/* Clear MAXSZ bytes at DOFS using elements of TYPE. LNSZ = sizeof(TYPE);\n+ OPC_MV is the opcode that zeros; OPC_ST is the opcode that stores. */\n+static void expand_clr_v(uint32_t dofs, uint32_t maxsz, uint32_t lnsz,\n+ TCGType type, TCGOpcode opc_mv, TCGOpcode opc_st)\n+{\n+ TCGArg t0 = tcg_temp_new_internal(type, 0);\n+ TCGArg env = GET_TCGV_PTR(tcg_ctx.tcg_env);\n+ uint32_t i;\n+\n+ tcg_gen_op2(&tcg_ctx, opc_mv, t0, 0);\n+ for (i = 0; i < maxsz; i += lnsz) {\n+ tcg_gen_op3(&tcg_ctx, opc_st, t0, env, dofs + i);\n+ }\n+ tcg_temp_free_internal(t0);\n+}\n+\n+/* Clear MAXSZ bytes at DOFS. */\n+static void expand_clr(uint32_t dofs, uint32_t maxsz)\n+{\n+ if (maxsz >= 32 && TCG_TARGET_HAS_v256) {\n+ uint32_t done = QEMU_ALIGN_DOWN(maxsz, 32);\n+ expand_clr_v(dofs, done, 32, TCG_TYPE_V256,\n+ INDEX_op_movi_v256, INDEX_op_st_v256);\n+ dofs += done;\n+ maxsz -= done;\n+ }\n+\n+ if (maxsz >= 16 && TCG_TARGET_HAS_v128) {\n+ uint16_t done = QEMU_ALIGN_DOWN(maxsz, 16);\n+ expand_clr_v(dofs, done, 16, TCG_TYPE_V128,\n+ INDEX_op_movi_v128, INDEX_op_st_v128);\n+ dofs += done;\n+ maxsz -= done;\n+ }\n+\n+ if (TCG_TARGET_REG_BITS == 64) {\n+ expand_clr_v(dofs, maxsz, 8, TCG_TYPE_I64,\n+ INDEX_op_movi_i64, INDEX_op_st_i64);\n+ } else if (TCG_TARGET_HAS_v64) {\n+ expand_clr_v(dofs, maxsz, 8, TCG_TYPE_V64,\n+ INDEX_op_movi_v64, INDEX_op_st_v64);\n+ } else {\n+ expand_clr_v(dofs, maxsz, 4, TCG_TYPE_I32,\n+ INDEX_op_movi_i32, INDEX_op_st_i32);\n+ }\n+}\n+\n+/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */\n+static void expand_3x4(uint32_t dofs, uint32_t aofs,\n+ uint32_t bofs, uint32_t opsz,\n+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))\n+{\n+ TCGv_i32 t0 = tcg_temp_new_i32();\n+ TCGv_i32 t1 = tcg_temp_new_i32();\n+ uint32_t i;\n+\n+ for (i = 0; i < opsz; i += 4) {\n+ tcg_gen_ld_i32(t0, tcg_ctx.tcg_env, aofs + i);\n+ tcg_gen_ld_i32(t1, tcg_ctx.tcg_env, bofs + i);\n+ fni(t0, t0, t1);\n+ tcg_gen_st_i32(t0, tcg_ctx.tcg_env, dofs + i);\n+ }\n+ tcg_temp_free_i32(t1);\n+ tcg_temp_free_i32(t0);\n+}\n+\n+/* Expand OPSZ bytes worth of three-operand operations using i64 elements. */\n+static void expand_3x8(uint32_t dofs, uint32_t aofs,\n+ uint32_t bofs, uint32_t opsz,\n+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))\n+{\n+ TCGv_i64 t0 = tcg_temp_new_i64();\n+ TCGv_i64 t1 = tcg_temp_new_i64();\n+ uint32_t i;\n+\n+ for (i = 0; i < opsz; i += 8) {\n+ tcg_gen_ld_i64(t0, tcg_ctx.tcg_env, aofs + i);\n+ tcg_gen_ld_i64(t1, tcg_ctx.tcg_env, bofs + i);\n+ fni(t0, t0, t1);\n+ tcg_gen_st_i64(t0, tcg_ctx.tcg_env, dofs + i);\n+ }\n+ tcg_temp_free_i64(t1);\n+ tcg_temp_free_i64(t0);\n+}\n+\n+/* Expand OPSZ bytes worth of three-operand operations using vector elements.\n+ OPC_OP is the operation, OPC_LD is the load, OPC_ST is the store. */\n+static void expand_3_v(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t oprsz, uint32_t lnsz, TCGType type,\n+ TCGOpcode opc_op, TCGOpcode opc_ld, TCGOpcode opc_st)\n+{\n+ TCGArg t0 = tcg_temp_new_internal(type, 0);\n+ TCGArg env = GET_TCGV_PTR(tcg_ctx.tcg_env);\n+ uint32_t i;\n+\n+ if (aofs == bofs) {\n+ for (i = 0; i < oprsz; i += lnsz) {\n+ tcg_gen_op3(&tcg_ctx, opc_ld, t0, env, aofs + i);\n+ tcg_gen_op3(&tcg_ctx, opc_op, t0, t0, t0);\n+ tcg_gen_op3(&tcg_ctx, opc_st, t0, env, dofs + i);\n+ }\n+ } else {\n+ TCGArg t1 = tcg_temp_new_internal(type, 0);\n+ for (i = 0; i < oprsz; i += lnsz) {\n+ tcg_gen_op3(&tcg_ctx, opc_ld, t0, env, aofs + i);\n+ tcg_gen_op3(&tcg_ctx, opc_ld, t1, env, bofs + i);\n+ tcg_gen_op3(&tcg_ctx, opc_op, t0, t0, t1);\n+ tcg_gen_op3(&tcg_ctx, opc_st, t0, env, dofs + i);\n+ }\n+ tcg_temp_free_internal(t1);\n+ }\n+ tcg_temp_free_internal(t0);\n+}\n+\n+/* Expand a vector three-operand operation. */\n+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)\n+{\n+ check_size_align(oprsz, maxsz, dofs | aofs | bofs);\n+ check_overlap_3(dofs, aofs, bofs, maxsz);\n+\n+ /* Quick check for sizes we won't support inline. */\n+ if (oprsz > MAX_UNROLL * 32 || maxsz > MAX_UNROLL * 32) {\n+ goto do_ool;\n+ }\n+\n+ /* Recall that ARM SVE allows vector sizes that are not a power of 2.\n+ Expand with successively smaller host vector sizes. The intent is\n+ that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */\n+ /* ??? For maxsz > oprsz, the host may be able to use an op-sized\n+ operation, zeroing the balance of the register. We can then\n+ use a cl-sized store to implement the clearing without an extra\n+ store operation. This is true for aarch64 and x86_64 hosts. */\n+\n+ if (check_size_impl(oprsz, 32) && tcg_op_supported(g->op_v256)) {\n+ uint32_t done = QEMU_ALIGN_DOWN(oprsz, 32);\n+ expand_3_v(dofs, aofs, bofs, done, 32, TCG_TYPE_V256,\n+ g->op_v256, INDEX_op_ld_v256, INDEX_op_st_v256);\n+ dofs += done;\n+ aofs += done;\n+ bofs += done;\n+ oprsz -= done;\n+ maxsz -= done;\n+ }\n+\n+ if (check_size_impl(oprsz, 16) && tcg_op_supported(g->op_v128)) {\n+ uint32_t done = QEMU_ALIGN_DOWN(oprsz, 16);\n+ expand_3_v(dofs, aofs, bofs, done, 16, TCG_TYPE_V128,\n+ g->op_v128, INDEX_op_ld_v128, INDEX_op_st_v128);\n+ dofs += done;\n+ aofs += done;\n+ bofs += done;\n+ oprsz -= done;\n+ maxsz -= done;\n+ }\n+\n+ if (check_size_impl(oprsz, 8)) {\n+ uint32_t done = QEMU_ALIGN_DOWN(oprsz, 8);\n+ if (tcg_op_supported(g->op_v64)) {\n+ expand_3_v(dofs, aofs, bofs, done, 8, TCG_TYPE_V64,\n+ g->op_v64, INDEX_op_ld_v64, INDEX_op_st_v64);\n+ } else if (g->fni8) {\n+ expand_3x8(dofs, aofs, bofs, done, g->fni8);\n+ } else {\n+ done = 0;\n+ }\n+ dofs += done;\n+ aofs += done;\n+ bofs += done;\n+ oprsz -= done;\n+ maxsz -= done;\n+ }\n+\n+ if (check_size_impl(oprsz, 4)) {\n+ uint32_t done = QEMU_ALIGN_DOWN(oprsz, 4);\n+ expand_3x4(dofs, aofs, bofs, done, g->fni4);\n+ dofs += done;\n+ aofs += done;\n+ bofs += done;\n+ oprsz -= done;\n+ maxsz -= done;\n+ }\n+\n+ if (oprsz == 0) {\n+ if (maxsz != 0) {\n+ expand_clr(dofs, maxsz);\n+ }\n+ return;\n+ }\n+\n+ do_ool:\n+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, g->fno);\n+}\n+\n+/*\n+ * Expand specific vector operations.\n+ */\n+\n+static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)\n+{\n+ TCGv_i64 t1 = tcg_temp_new_i64();\n+ TCGv_i64 t2 = tcg_temp_new_i64();\n+ TCGv_i64 t3 = tcg_temp_new_i64();\n+\n+ tcg_gen_andc_i64(t1, a, m);\n+ tcg_gen_andc_i64(t2, b, m);\n+ tcg_gen_xor_i64(t3, a, b);\n+ tcg_gen_add_i64(d, t1, t2);\n+ tcg_gen_and_i64(t3, t3, m);\n+ tcg_gen_xor_i64(d, d, t3);\n+\n+ tcg_temp_free_i64(t1);\n+ tcg_temp_free_i64(t2);\n+ tcg_temp_free_i64(t3);\n+}\n+\n+void tcg_gen_vec_add8(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+ TCGv_i64 m = tcg_const_i64(REP8(0x80));\n+ gen_addv_mask(d, a, b, m);\n+ tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_add16(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+ TCGv_i64 m = tcg_const_i64(REP16(0x8000));\n+ gen_addv_mask(d, a, b, m);\n+ tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_add32(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+ TCGv_i64 t1 = tcg_temp_new_i64();\n+ TCGv_i64 t2 = tcg_temp_new_i64();\n+\n+ tcg_gen_andi_i64(t1, a, ~0xffffffffull);\n+ tcg_gen_add_i64(t2, a, b);\n+ tcg_gen_add_i64(t1, t1, b);\n+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);\n+\n+ tcg_temp_free_i64(t1);\n+ tcg_temp_free_i64(t2);\n+}\n+\n+void tcg_gen_gvec_add8(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_vec_add8,\n+ .fno = gen_helper_gvec_add8,\n+ .op_v64 = INDEX_op_add8_v64,\n+ .op_v128 = INDEX_op_add8_v128,\n+ .op_v256 = INDEX_op_add8_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_add16(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_vec_add16,\n+ .fno = gen_helper_gvec_add16,\n+ .op_v64 = INDEX_op_add16_v64,\n+ .op_v128 = INDEX_op_add16_v128,\n+ .op_v256 = INDEX_op_add16_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_add32(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni4 = tcg_gen_add_i32,\n+ .fno = gen_helper_gvec_add32,\n+ .op_v64 = INDEX_op_add32_v64,\n+ .op_v128 = INDEX_op_add32_v128,\n+ .op_v256 = INDEX_op_add32_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_add64(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_add_i64,\n+ .fno = gen_helper_gvec_add64,\n+ .op_v128 = INDEX_op_add64_v128,\n+ .op_v256 = INDEX_op_add64_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)\n+{\n+ TCGv_i64 t1 = tcg_temp_new_i64();\n+ TCGv_i64 t2 = tcg_temp_new_i64();\n+ TCGv_i64 t3 = tcg_temp_new_i64();\n+\n+ tcg_gen_or_i64(t1, a, m);\n+ tcg_gen_andc_i64(t2, b, m);\n+ tcg_gen_eqv_i64(t3, a, b);\n+ tcg_gen_sub_i64(d, t1, t2);\n+ tcg_gen_and_i64(t3, t3, m);\n+ tcg_gen_xor_i64(d, d, t3);\n+\n+ tcg_temp_free_i64(t1);\n+ tcg_temp_free_i64(t2);\n+ tcg_temp_free_i64(t3);\n+}\n+\n+void tcg_gen_vec_sub8(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+ TCGv_i64 m = tcg_const_i64(REP8(0x80));\n+ gen_subv_mask(d, a, b, m);\n+ tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_sub16(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+ TCGv_i64 m = tcg_const_i64(REP16(0x8000));\n+ gen_subv_mask(d, a, b, m);\n+ tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_sub32(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+ TCGv_i64 t1 = tcg_temp_new_i64();\n+ TCGv_i64 t2 = tcg_temp_new_i64();\n+\n+ tcg_gen_andi_i64(t1, b, ~0xffffffffull);\n+ tcg_gen_sub_i64(t2, a, b);\n+ tcg_gen_sub_i64(t1, a, t1);\n+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);\n+\n+ tcg_temp_free_i64(t1);\n+ tcg_temp_free_i64(t2);\n+}\n+\n+void tcg_gen_gvec_sub8(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_vec_sub8,\n+ .fno = gen_helper_gvec_sub8,\n+ .op_v64 = INDEX_op_sub8_v64,\n+ .op_v128 = INDEX_op_sub8_v128,\n+ .op_v256 = INDEX_op_sub8_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_sub16(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_vec_sub16,\n+ .fno = gen_helper_gvec_sub16,\n+ .op_v64 = INDEX_op_sub16_v64,\n+ .op_v128 = INDEX_op_sub16_v128,\n+ .op_v256 = INDEX_op_sub16_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_sub32(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni4 = tcg_gen_sub_i32,\n+ .fno = gen_helper_gvec_sub32,\n+ .op_v64 = INDEX_op_sub32_v64,\n+ .op_v128 = INDEX_op_sub32_v128,\n+ .op_v256 = INDEX_op_sub32_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_sub64(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_sub_i64,\n+ .fno = gen_helper_gvec_sub64,\n+ .op_v128 = INDEX_op_sub64_v128,\n+ .op_v256 = INDEX_op_sub64_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_and(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_and_i64,\n+ .fno = gen_helper_gvec_and,\n+ .op_v64 = INDEX_op_and_v64,\n+ .op_v128 = INDEX_op_and_v128,\n+ .op_v256 = INDEX_op_and_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_or(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_or_i64,\n+ .fno = gen_helper_gvec_or,\n+ .op_v64 = INDEX_op_or_v64,\n+ .op_v128 = INDEX_op_or_v128,\n+ .op_v256 = INDEX_op_or_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_xor(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_xor_i64,\n+ .fno = gen_helper_gvec_xor,\n+ .op_v64 = INDEX_op_xor_v64,\n+ .op_v128 = INDEX_op_xor_v128,\n+ .op_v256 = INDEX_op_xor_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_andc(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_andc_i64,\n+ .fno = gen_helper_gvec_andc,\n+ .op_v64 = INDEX_op_andc_v64,\n+ .op_v128 = INDEX_op_andc_v128,\n+ .op_v256 = INDEX_op_andc_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_orc(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+ uint32_t opsz, uint32_t clsz)\n+{\n+ static const GVecGen3 g = {\n+ .fni8 = tcg_gen_orc_i64,\n+ .fno = gen_helper_gvec_orc,\n+ .op_v64 = INDEX_op_orc_v64,\n+ .op_v128 = INDEX_op_orc_v128,\n+ .op_v256 = INDEX_op_orc_v256,\n+ };\n+ tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\ndiff --git a/tcg/tcg-runtime-gvec.c b/tcg/tcg-runtime-gvec.c\nnew file mode 100644\nindex 0000000000..ff0f896629\n--- /dev/null\n+++ b/tcg/tcg-runtime-gvec.c\n@@ -0,0 +1,192 @@\n+/*\n+ * Generic vectorized operation runtime\n+ *\n+ * Copyright (c) 2017 Linaro\n+ *\n+ * This library is free software; you can redistribute it and/or\n+ * modify it under the terms of the GNU Lesser General Public\n+ * License as published by the Free Software Foundation; either\n+ * version 2 of the License, or (at your option) any later version.\n+ *\n+ * This library is distributed in the hope that it will be useful,\n+ * but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ * Lesser General Public License for more details.\n+ *\n+ * You should have received a copy of the GNU Lesser General Public\n+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.\n+ */\n+\n+#include \"qemu/osdep.h\"\n+#include \"qemu/host-utils.h\"\n+#include \"cpu.h\"\n+#include \"exec/helper-proto.h\"\n+#include \"tcg-gvec-desc.h\"\n+\n+\n+/* Virtually all hosts support 16-byte vectors. Those that don't can emulate\n+ them via GCC's generic vector extension. This turns out to be simpler and\n+ more reliable than getting the compiler to autovectorize.\n+\n+ In tcg-op-gvec.c, we asserted that both the size and alignment\n+ of the data are multiples of 16. */\n+\n+typedef uint8_t vec8 __attribute__((vector_size(16)));\n+typedef uint16_t vec16 __attribute__((vector_size(16)));\n+typedef uint32_t vec32 __attribute__((vector_size(16)));\n+typedef uint64_t vec64 __attribute__((vector_size(16)));\n+\n+static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)\n+{\n+ intptr_t maxsz = simd_maxsz(desc);\n+ intptr_t i;\n+\n+ if (unlikely(maxsz > oprsz)) {\n+ for (i = oprsz; i < maxsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = (vec64){ 0 };\n+ }\n+ }\n+}\n+\n+void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec8)) {\n+ *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec16)) {\n+ *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec32)) {\n+ *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec8)) {\n+ *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec16)) {\n+ *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec32)) {\n+ *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)\n+{\n+ intptr_t oprsz = simd_oprsz(desc);\n+ intptr_t i;\n+\n+ for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+ *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);\n+ }\n+ clear_high(d, oprsz, desc);\n+}\ndiff --git a/tcg/tcg.c b/tcg/tcg.c\nindex 8fca202bec..240bcaa8d5 100644\n--- a/tcg/tcg.c\n+++ b/tcg/tcg.c\n@@ -602,7 +602,7 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,\n return temp_idx(s, ts);\n }\n \n-static int tcg_temp_new_internal(TCGType type, int temp_local)\n+int tcg_temp_new_internal(TCGType type, int temp_local)\n {\n TCGContext *s = &tcg_ctx;\n TCGTemp *ts;\n@@ -664,7 +664,7 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local)\n return MAKE_TCGV_I64(idx);\n }\n \n-static void tcg_temp_free_internal(int idx)\n+void tcg_temp_free_internal(int idx)\n {\n TCGContext *s = &tcg_ctx;\n TCGTemp *ts;\n", "prefixes": [ "v2", "06/16" ] }