Patch Detail

GET /api/patches/812957/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 812957,
    "url": "http://patchwork.ozlabs.org/api/patches/812957/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20170912162513.21694-7-richard.henderson@linaro.org/",
    "project": {
        "id": 14,
        "url": "http://patchwork.ozlabs.org/api/projects/14/?format=api",
        "name": "QEMU Development",
        "link_name": "qemu-devel",
        "list_id": "qemu-devel.nongnu.org",
        "list_email": "qemu-devel@nongnu.org",
        "web_url": "",
        "scm_url": "",
        "webscm_url": "",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20170912162513.21694-7-richard.henderson@linaro.org>",
    "list_archive_url": null,
    "date": "2017-09-12T16:25:03",
    "name": "[v2,06/16] tcg: Add vector infrastructure and ops for add/sub/logic",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "09f3969fa7230de72e77e336b8c4eb1fba40f805",
    "submitter": {
        "id": 72104,
        "url": "http://patchwork.ozlabs.org/api/people/72104/?format=api",
        "name": "Richard Henderson",
        "email": "richard.henderson@linaro.org"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/qemu-devel/patch/20170912162513.21694-7-richard.henderson@linaro.org/mbox/",
    "series": [
        {
            "id": 2737,
            "url": "http://patchwork.ozlabs.org/api/series/2737/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/qemu-devel/list/?series=2737",
            "date": "2017-09-12T16:24:59",
            "name": "TCG vectorization and example conversion",
            "version": 2,
            "mbox": "http://patchwork.ozlabs.org/series/2737/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/812957/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/812957/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>",
        "X-Original-To": "incoming@patchwork.ozlabs.org",
        "Delivered-To": "patchwork-incoming@bilbo.ozlabs.org",
        "Authentication-Results": [
            "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=nongnu.org\n\t(client-ip=2001:4830:134:3::11; helo=lists.gnu.org;\n\tenvelope-from=qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org;\n\treceiver=<UNKNOWN>)",
            "ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key;\n\tunprotected) header.d=linaro.org header.i=@linaro.org\n\theader.b=\"M7+F76lk\"; dkim-atps=neutral"
        ],
        "Received": [
            "from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11])\n\t(using TLSv1 with cipher AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xs9Hq0Ykmz9s7f\n\tfor <incoming@patchwork.ozlabs.org>;\n\tWed, 13 Sep 2017 02:29:59 +1000 (AEST)",
            "from localhost ([::1]:36897 helo=lists.gnu.org)\n\tby lists.gnu.org with esmtp (Exim 4.71) (envelope-from\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>)\n\tid 1dro4S-0005mf-Vr\n\tfor incoming@patchwork.ozlabs.org; Tue, 12 Sep 2017 12:29:57 -0400",
            "from eggs.gnu.org ([2001:4830:134:3::10]:37947)\n\tby lists.gnu.org with esmtp (Exim 4.71)\n\t(envelope-from <richard.henderson@linaro.org>) id 1dro09-0001kT-7j\n\tfor qemu-devel@nongnu.org; Tue, 12 Sep 2017 12:25:32 -0400",
            "from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)\n\t(envelope-from <richard.henderson@linaro.org>) id 1dro06-00072Z-4d\n\tfor qemu-devel@nongnu.org; Tue, 12 Sep 2017 12:25:29 -0400",
            "from mail-pf0-x236.google.com ([2607:f8b0:400e:c00::236]:33034)\n\tby eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16)\n\t(Exim 4.71) (envelope-from <richard.henderson@linaro.org>)\n\tid 1dro05-000722-QC\n\tfor qemu-devel@nongnu.org; Tue, 12 Sep 2017 12:25:26 -0400",
            "by mail-pf0-x236.google.com with SMTP id y29so18668673pff.0\n\tfor <qemu-devel@nongnu.org>; Tue, 12 Sep 2017 09:25:25 -0700 (PDT)",
            "from bigtime.twiddle.net (97-126-103-167.tukw.qwest.net.\n\t[97.126.103.167]) by smtp.gmail.com with ESMTPSA id\n\tb22sm20382140pfh.175.2017.09.12.09.25.23\n\t(version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256);\n\tTue, 12 Sep 2017 09:25:23 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google;\n\th=from:to:cc:subject:date:message-id:in-reply-to:references;\n\tbh=0JcS9EA47A/X+tvVNHzf7WGLdo5Pi/Td5FVLvgIbWrg=;\n\tb=M7+F76lkVtlzvLmbfmBSCr0lPcyK4xeh8aezYb9W9O0aK0lOV2rkJ0TzxhtNGTjx9e\n\tAbIFASZSaN44kmo4XUY9w7LekjbtG/+ErYL/clxlih852xYcrkQH3bwOnaoNINTbRUZP\n\tuZHb+MmztNbk3cxDi/dBD956gMZi9BlGCVlmQ=",
        "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n\t:references;\n\tbh=0JcS9EA47A/X+tvVNHzf7WGLdo5Pi/Td5FVLvgIbWrg=;\n\tb=ltGoiNdtkULhpjTVcFo4vswCOk2fVG7OaIqOvYmA42gCUPbuhFjoQCEmy4mORxwgNe\n\tZxHS7GFEHdI8Yb6d4KJZ1chNK+GYS6sSeqQsjOYTZBN8d2d5C/RpnvrxLmW78G460aeN\n\tdla0SQ+PcN3sao0+8bzoqBRPD0h4XAepuRqzodYJtafkwj1G34kNkhuaV1QSerceScXa\n\thtgq3waoG7jmYP8qh2WSfcWf5Qeg5BGkiFfhDTHIPoz0VJz7xUQNh/B1OAVSJMQA3BPL\n\t/YXS87mOyAXlgCOWVSC43OT9P2THqFW0yGfHY8T8sInwBU/HsDTC8VVjw9GfvxF4bzTD\n\tezVA==",
        "X-Gm-Message-State": "AHPjjUi54S0nJ6NY+q+Gm3xpfUAV1RGMczokQtJvTPF3C6fXl/9EgftW\n\tukgjcLtSQrzHaI4iebku2Q==",
        "X-Google-Smtp-Source": "ADKCNb6Cl2/cmQ/FvWWU3ZZi619ZtYBI900nVUH62HuJYG0JfNRFX+vJwlf67/A4AkEn+gFR74hmUw==",
        "X-Received": "by 10.98.155.220 with SMTP id e89mr16134570pfk.120.1505233524206;\n\tTue, 12 Sep 2017 09:25:24 -0700 (PDT)",
        "From": "Richard Henderson <richard.henderson@linaro.org>",
        "To": "qemu-devel@nongnu.org",
        "Date": "Tue, 12 Sep 2017 09:25:03 -0700",
        "Message-Id": "<20170912162513.21694-7-richard.henderson@linaro.org>",
        "X-Mailer": "git-send-email 2.13.5",
        "In-Reply-To": "<20170912162513.21694-1-richard.henderson@linaro.org>",
        "References": "<20170912162513.21694-1-richard.henderson@linaro.org>",
        "X-detected-operating-system": "by eggs.gnu.org: Genre and OS details not\n\trecognized.",
        "X-Received-From": "2607:f8b0:400e:c00::236",
        "Subject": "[Qemu-devel] [PATCH v2 06/16] tcg: Add vector infrastructure and\n\tops for add/sub/logic",
        "X-BeenThere": "qemu-devel@nongnu.org",
        "X-Mailman-Version": "2.1.21",
        "Precedence": "list",
        "List-Id": "<qemu-devel.nongnu.org>",
        "List-Unsubscribe": "<https://lists.nongnu.org/mailman/options/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>",
        "List-Archive": "<http://lists.nongnu.org/archive/html/qemu-devel/>",
        "List-Post": "<mailto:qemu-devel@nongnu.org>",
        "List-Help": "<mailto:qemu-devel-request@nongnu.org?subject=help>",
        "List-Subscribe": "<https://lists.nongnu.org/mailman/listinfo/qemu-devel>,\n\t<mailto:qemu-devel-request@nongnu.org?subject=subscribe>",
        "Cc": "alex.bennee@linaro.org, f4bug@amsat.org",
        "Errors-To": "qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org",
        "Sender": "\"Qemu-devel\"\n\t<qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org>"
    },
    "content": "Signed-off-by: Richard Henderson <richard.henderson@linaro.org>\n---\n Makefile.target        |   2 +-\n tcg/tcg-op-gvec.h      |  61 ++++++\n tcg/tcg-runtime.h      |  16 ++\n tcg/tcg.h              |   2 +\n tcg/tcg-op-gvec.c      | 489 +++++++++++++++++++++++++++++++++++++++++++++++++\n tcg/tcg-runtime-gvec.c | 192 +++++++++++++++++++\n tcg/tcg.c              |   4 +-\n 7 files changed, 763 insertions(+), 3 deletions(-)\n create mode 100644 tcg/tcg-runtime-gvec.c",
    "diff": "diff --git a/Makefile.target b/Makefile.target\nindex e647b6e2cb..9eefe7cbd7 100644\n--- a/Makefile.target\n+++ b/Makefile.target\n@@ -95,7 +95,7 @@ obj-y += exec.o\n obj-y += accel/\n obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-gvec.o\n obj-$(CONFIG_TCG) += tcg/optimize.o tcg/tcg-common.o\n-obj-$(CONFIG_TCG) += tcg/tcg-runtime.o\n+obj-$(CONFIG_TCG) += tcg/tcg-runtime.o tcg/tcg-runtime-gvec.o\n obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o\n obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o\n obj-y += fpu/softfloat.o\ndiff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h\nindex affb7c2e89..11d04342b6 100644\n--- a/tcg/tcg-op-gvec.h\n+++ b/tcg/tcg-op-gvec.h\n@@ -41,3 +41,64 @@ typedef void (gen_helper_gvec_3_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr,\n void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n                         TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,\n                         uint32_t data, gen_helper_gvec_3_ptr *fn);\n+\n+/* Expand a gvec operation.  Either inline or out-of-line depending on\n+   the actual vector size and the operations supported by the host.  */\n+typedef struct {\n+    /* \"Small\" sizes: expand inline as a 64-bit or 32-bit lane.\n+       Only one of these will be non-NULL.  */\n+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);\n+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);\n+    /* Larger sizes: expand out-of-line helper w/descriptor.  */\n+    gen_helper_gvec_3 *fno;\n+    /* Host vector operations.  */\n+    TCGOpcode op_v64;\n+    TCGOpcode op_v128;\n+    TCGOpcode op_v256;\n+} GVecGen3;\n+\n+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                    uint32_t opsz, uint32_t clsz, const GVecGen3 *);\n+\n+/* Expand a specific vector operation.  */\n+\n+#define DEF(X) \\\n+    void tcg_gen_gvec_##X(uint32_t dofs, uint32_t aofs, uint32_t bofs, \\\n+                          uint32_t opsz, uint32_t clsz)\n+\n+DEF(add8);\n+DEF(add16);\n+DEF(add32);\n+DEF(add64);\n+\n+DEF(sub8);\n+DEF(sub16);\n+DEF(sub32);\n+DEF(sub64);\n+\n+DEF(and);\n+DEF(or);\n+DEF(xor);\n+DEF(andc);\n+DEF(orc);\n+\n+#undef DEF\n+\n+/*\n+ * 64-bit vector operations.  Use these when the register has been allocated\n+ * with tcg_global_mem_new_i64, and so we cannot also address it via pointer.\n+ * OPRSZ = MAXSZ = 8.\n+ */\n+\n+#define DEF(X) \\\n+    void tcg_gen_vec_##X(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+\n+DEF(add8);\n+DEF(add16);\n+DEF(add32);\n+\n+DEF(sub8);\n+DEF(sub16);\n+DEF(sub32);\n+\n+#undef DEF\ndiff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h\nindex c41d38a557..befb0fa659 100644\n--- a/tcg/tcg-runtime.h\n+++ b/tcg/tcg-runtime.h\n@@ -134,3 +134,19 @@ GEN_ATOMIC_HELPERS(xor_fetch)\n GEN_ATOMIC_HELPERS(xchg)\n \n #undef GEN_ATOMIC_HELPERS\n+\n+DEF_HELPER_FLAGS_4(gvec_add8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_add16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_add32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_add64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+\n+DEF_HELPER_FLAGS_4(gvec_sub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_sub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_sub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_sub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+\n+DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_xor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_andc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\n+DEF_HELPER_FLAGS_4(gvec_orc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)\ndiff --git a/tcg/tcg.h b/tcg/tcg.h\nindex b81c67a754..37ad9fddab 100644\n--- a/tcg/tcg.h\n+++ b/tcg/tcg.h\n@@ -824,9 +824,11 @@ int tcg_global_mem_new_internal(TCGType, TCGv_ptr, intptr_t, const char *);\n TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name);\n TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name);\n \n+int tcg_temp_new_internal(TCGType type, int temp_local);\n TCGv_i32 tcg_temp_new_internal_i32(int temp_local);\n TCGv_i64 tcg_temp_new_internal_i64(int temp_local);\n \n+void tcg_temp_free_internal(int idx);\n void tcg_temp_free_i32(TCGv_i32 arg);\n void tcg_temp_free_i64(TCGv_i64 arg);\n \ndiff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c\nindex f48415020d..4b39617682 100644\n--- a/tcg/tcg-op-gvec.c\n+++ b/tcg/tcg-op-gvec.c\n@@ -24,6 +24,30 @@\n #include \"tcg-op-gvec.h\"\n #include \"tcg-gvec-desc.h\"\n \n+#define REP8(x)    ((x) * 0x0101010101010101ull)\n+#define REP16(x)   ((x) * 0x0001000100010001ull)\n+\n+#define MAX_UNROLL  4\n+\n+/* Verify vector size and alignment rules.  OFS should be the OR of all\n+   of the operand offsets so that we can check them all at once.  */\n+static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)\n+{\n+    uint32_t align = maxsz > 16 || oprsz >= 16 ? 15 : 7;\n+    tcg_debug_assert(oprsz > 0);\n+    tcg_debug_assert(oprsz <= maxsz);\n+    tcg_debug_assert((oprsz & align) == 0);\n+    tcg_debug_assert((maxsz & align) == 0);\n+    tcg_debug_assert((ofs & align) == 0);\n+}\n+\n+/* Verify vector overlap rules for three operands.  */\n+static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s)\n+{\n+    tcg_debug_assert(d == a || d + s <= a || a + s <= d);\n+    tcg_debug_assert(d == b || d + s <= b || b + s <= d);\n+    tcg_debug_assert(a == b || a + s <= b || b + s <= a);\n+}\n \n /* Create a descriptor from components.  */\n uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)\n@@ -91,3 +115,468 @@ void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n     tcg_temp_free_ptr(a2);\n     tcg_temp_free_i32(desc);\n }\n+\n+/* Return true if we want to implement something of OPRSZ bytes\n+   in units of LNSZ.  This limits the expansion of inline code.  */\n+static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)\n+{\n+    uint32_t lnct = oprsz / lnsz;\n+    return lnct >= 1 && lnct <= MAX_UNROLL;\n+}\n+\n+/* Clear MAXSZ bytes at DOFS using elements of TYPE.  LNSZ = sizeof(TYPE);\n+   OPC_MV is the opcode that zeros; OPC_ST is the opcode that stores.  */\n+static void expand_clr_v(uint32_t dofs, uint32_t maxsz, uint32_t lnsz,\n+                         TCGType type, TCGOpcode opc_mv, TCGOpcode opc_st)\n+{\n+    TCGArg t0 = tcg_temp_new_internal(type, 0);\n+    TCGArg env = GET_TCGV_PTR(tcg_ctx.tcg_env);\n+    uint32_t i;\n+\n+    tcg_gen_op2(&tcg_ctx, opc_mv, t0, 0);\n+    for (i = 0; i < maxsz; i += lnsz) {\n+        tcg_gen_op3(&tcg_ctx, opc_st, t0, env, dofs + i);\n+    }\n+    tcg_temp_free_internal(t0);\n+}\n+\n+/* Clear MAXSZ bytes at DOFS.  */\n+static void expand_clr(uint32_t dofs, uint32_t maxsz)\n+{\n+    if (maxsz >= 32 && TCG_TARGET_HAS_v256) {\n+        uint32_t done = QEMU_ALIGN_DOWN(maxsz, 32);\n+        expand_clr_v(dofs, done, 32, TCG_TYPE_V256,\n+                     INDEX_op_movi_v256, INDEX_op_st_v256);\n+        dofs += done;\n+        maxsz -= done;\n+    }\n+\n+    if (maxsz >= 16 && TCG_TARGET_HAS_v128) {\n+        uint16_t done = QEMU_ALIGN_DOWN(maxsz, 16);\n+        expand_clr_v(dofs, done, 16, TCG_TYPE_V128,\n+                     INDEX_op_movi_v128, INDEX_op_st_v128);\n+        dofs += done;\n+        maxsz -= done;\n+    }\n+\n+    if (TCG_TARGET_REG_BITS == 64) {\n+        expand_clr_v(dofs, maxsz, 8, TCG_TYPE_I64,\n+                     INDEX_op_movi_i64, INDEX_op_st_i64);\n+    } else if (TCG_TARGET_HAS_v64) {\n+        expand_clr_v(dofs, maxsz, 8, TCG_TYPE_V64,\n+                     INDEX_op_movi_v64, INDEX_op_st_v64);\n+    } else {\n+        expand_clr_v(dofs, maxsz, 4, TCG_TYPE_I32,\n+                     INDEX_op_movi_i32, INDEX_op_st_i32);\n+    }\n+}\n+\n+/* Expand OPSZ bytes worth of three-operand operations using i32 elements.  */\n+static void expand_3x4(uint32_t dofs, uint32_t aofs,\n+                       uint32_t bofs, uint32_t opsz,\n+                       void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))\n+{\n+    TCGv_i32 t0 = tcg_temp_new_i32();\n+    TCGv_i32 t1 = tcg_temp_new_i32();\n+    uint32_t i;\n+\n+    for (i = 0; i < opsz; i += 4) {\n+        tcg_gen_ld_i32(t0, tcg_ctx.tcg_env, aofs + i);\n+        tcg_gen_ld_i32(t1, tcg_ctx.tcg_env, bofs + i);\n+        fni(t0, t0, t1);\n+        tcg_gen_st_i32(t0, tcg_ctx.tcg_env, dofs + i);\n+    }\n+    tcg_temp_free_i32(t1);\n+    tcg_temp_free_i32(t0);\n+}\n+\n+/* Expand OPSZ bytes worth of three-operand operations using i64 elements.  */\n+static void expand_3x8(uint32_t dofs, uint32_t aofs,\n+                       uint32_t bofs, uint32_t opsz,\n+                       void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))\n+{\n+    TCGv_i64 t0 = tcg_temp_new_i64();\n+    TCGv_i64 t1 = tcg_temp_new_i64();\n+    uint32_t i;\n+\n+    for (i = 0; i < opsz; i += 8) {\n+        tcg_gen_ld_i64(t0, tcg_ctx.tcg_env, aofs + i);\n+        tcg_gen_ld_i64(t1, tcg_ctx.tcg_env, bofs + i);\n+        fni(t0, t0, t1);\n+        tcg_gen_st_i64(t0, tcg_ctx.tcg_env, dofs + i);\n+    }\n+    tcg_temp_free_i64(t1);\n+    tcg_temp_free_i64(t0);\n+}\n+\n+/* Expand OPSZ bytes worth of three-operand operations using vector elements.\n+   OPC_OP is the operation, OPC_LD is the load, OPC_ST is the store.  */\n+static void expand_3_v(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                       uint32_t oprsz, uint32_t lnsz, TCGType type,\n+                       TCGOpcode opc_op, TCGOpcode opc_ld, TCGOpcode opc_st)\n+{\n+    TCGArg t0 = tcg_temp_new_internal(type, 0);\n+    TCGArg env = GET_TCGV_PTR(tcg_ctx.tcg_env);\n+    uint32_t i;\n+\n+    if (aofs == bofs) {\n+        for (i = 0; i < oprsz; i += lnsz) {\n+            tcg_gen_op3(&tcg_ctx, opc_ld, t0, env, aofs + i);\n+            tcg_gen_op3(&tcg_ctx, opc_op, t0, t0, t0);\n+            tcg_gen_op3(&tcg_ctx, opc_st, t0, env, dofs + i);\n+        }\n+    } else {\n+        TCGArg t1 = tcg_temp_new_internal(type, 0);\n+        for (i = 0; i < oprsz; i += lnsz) {\n+            tcg_gen_op3(&tcg_ctx, opc_ld, t0, env, aofs + i);\n+            tcg_gen_op3(&tcg_ctx, opc_ld, t1, env, bofs + i);\n+            tcg_gen_op3(&tcg_ctx, opc_op, t0, t0, t1);\n+            tcg_gen_op3(&tcg_ctx, opc_st, t0, env, dofs + i);\n+        }\n+        tcg_temp_free_internal(t1);\n+    }\n+    tcg_temp_free_internal(t0);\n+}\n+\n+/* Expand a vector three-operand operation.  */\n+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                    uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)\n+{\n+    check_size_align(oprsz, maxsz, dofs | aofs | bofs);\n+    check_overlap_3(dofs, aofs, bofs, maxsz);\n+\n+    /* Quick check for sizes we won't support inline.  */\n+    if (oprsz > MAX_UNROLL * 32 || maxsz > MAX_UNROLL * 32) {\n+        goto do_ool;\n+    }\n+\n+    /* Recall that ARM SVE allows vector sizes that are not a power of 2.\n+       Expand with successively smaller host vector sizes.  The intent is\n+       that e.g. oprsz == 80 would be expanded with 2x32 + 1x16.  */\n+    /* ??? For maxsz > oprsz, the host may be able to use an op-sized\n+       operation, zeroing the balance of the register.  We can then\n+       use a cl-sized store to implement the clearing without an extra\n+       store operation.  This is true for aarch64 and x86_64 hosts.  */\n+\n+    if (check_size_impl(oprsz, 32) && tcg_op_supported(g->op_v256)) {\n+        uint32_t done = QEMU_ALIGN_DOWN(oprsz, 32);\n+        expand_3_v(dofs, aofs, bofs, done, 32, TCG_TYPE_V256,\n+                   g->op_v256, INDEX_op_ld_v256, INDEX_op_st_v256);\n+        dofs += done;\n+        aofs += done;\n+        bofs += done;\n+        oprsz -= done;\n+        maxsz -= done;\n+    }\n+\n+    if (check_size_impl(oprsz, 16) && tcg_op_supported(g->op_v128)) {\n+        uint32_t done = QEMU_ALIGN_DOWN(oprsz, 16);\n+        expand_3_v(dofs, aofs, bofs, done, 16, TCG_TYPE_V128,\n+                   g->op_v128, INDEX_op_ld_v128, INDEX_op_st_v128);\n+        dofs += done;\n+        aofs += done;\n+        bofs += done;\n+        oprsz -= done;\n+        maxsz -= done;\n+    }\n+\n+    if (check_size_impl(oprsz, 8)) {\n+        uint32_t done = QEMU_ALIGN_DOWN(oprsz, 8);\n+        if (tcg_op_supported(g->op_v64)) {\n+            expand_3_v(dofs, aofs, bofs, done, 8, TCG_TYPE_V64,\n+                       g->op_v64, INDEX_op_ld_v64, INDEX_op_st_v64);\n+        } else if (g->fni8) {\n+            expand_3x8(dofs, aofs, bofs, done, g->fni8);\n+        } else {\n+            done = 0;\n+        }\n+        dofs += done;\n+        aofs += done;\n+        bofs += done;\n+        oprsz -= done;\n+        maxsz -= done;\n+    }\n+\n+    if (check_size_impl(oprsz, 4)) {\n+        uint32_t done = QEMU_ALIGN_DOWN(oprsz, 4);\n+        expand_3x4(dofs, aofs, bofs, done, g->fni4);\n+        dofs += done;\n+        aofs += done;\n+        bofs += done;\n+        oprsz -= done;\n+        maxsz -= done;\n+    }\n+\n+    if (oprsz == 0) {\n+        if (maxsz != 0) {\n+            expand_clr(dofs, maxsz);\n+        }\n+        return;\n+    }\n+\n+ do_ool:\n+    tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, g->fno);\n+}\n+\n+/*\n+ * Expand specific vector operations.\n+ */\n+\n+static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)\n+{\n+    TCGv_i64 t1 = tcg_temp_new_i64();\n+    TCGv_i64 t2 = tcg_temp_new_i64();\n+    TCGv_i64 t3 = tcg_temp_new_i64();\n+\n+    tcg_gen_andc_i64(t1, a, m);\n+    tcg_gen_andc_i64(t2, b, m);\n+    tcg_gen_xor_i64(t3, a, b);\n+    tcg_gen_add_i64(d, t1, t2);\n+    tcg_gen_and_i64(t3, t3, m);\n+    tcg_gen_xor_i64(d, d, t3);\n+\n+    tcg_temp_free_i64(t1);\n+    tcg_temp_free_i64(t2);\n+    tcg_temp_free_i64(t3);\n+}\n+\n+void tcg_gen_vec_add8(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+    TCGv_i64 m = tcg_const_i64(REP8(0x80));\n+    gen_addv_mask(d, a, b, m);\n+    tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_add16(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+    TCGv_i64 m = tcg_const_i64(REP16(0x8000));\n+    gen_addv_mask(d, a, b, m);\n+    tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_add32(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+    TCGv_i64 t1 = tcg_temp_new_i64();\n+    TCGv_i64 t2 = tcg_temp_new_i64();\n+\n+    tcg_gen_andi_i64(t1, a, ~0xffffffffull);\n+    tcg_gen_add_i64(t2, a, b);\n+    tcg_gen_add_i64(t1, t1, b);\n+    tcg_gen_deposit_i64(d, t1, t2, 0, 32);\n+\n+    tcg_temp_free_i64(t1);\n+    tcg_temp_free_i64(t2);\n+}\n+\n+void tcg_gen_gvec_add8(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                       uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_vec_add8,\n+        .fno = gen_helper_gvec_add8,\n+        .op_v64 = INDEX_op_add8_v64,\n+        .op_v128 = INDEX_op_add8_v128,\n+        .op_v256 = INDEX_op_add8_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_add16(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                        uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_vec_add16,\n+        .fno = gen_helper_gvec_add16,\n+        .op_v64 = INDEX_op_add16_v64,\n+        .op_v128 = INDEX_op_add16_v128,\n+        .op_v256 = INDEX_op_add16_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_add32(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                        uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni4 = tcg_gen_add_i32,\n+        .fno = gen_helper_gvec_add32,\n+        .op_v64 = INDEX_op_add32_v64,\n+        .op_v128 = INDEX_op_add32_v128,\n+        .op_v256 = INDEX_op_add32_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_add64(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                        uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_add_i64,\n+        .fno = gen_helper_gvec_add64,\n+        .op_v128 = INDEX_op_add64_v128,\n+        .op_v256 = INDEX_op_add64_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)\n+{\n+    TCGv_i64 t1 = tcg_temp_new_i64();\n+    TCGv_i64 t2 = tcg_temp_new_i64();\n+    TCGv_i64 t3 = tcg_temp_new_i64();\n+\n+    tcg_gen_or_i64(t1, a, m);\n+    tcg_gen_andc_i64(t2, b, m);\n+    tcg_gen_eqv_i64(t3, a, b);\n+    tcg_gen_sub_i64(d, t1, t2);\n+    tcg_gen_and_i64(t3, t3, m);\n+    tcg_gen_xor_i64(d, d, t3);\n+\n+    tcg_temp_free_i64(t1);\n+    tcg_temp_free_i64(t2);\n+    tcg_temp_free_i64(t3);\n+}\n+\n+void tcg_gen_vec_sub8(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+    TCGv_i64 m = tcg_const_i64(REP8(0x80));\n+    gen_subv_mask(d, a, b, m);\n+    tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_sub16(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+    TCGv_i64 m = tcg_const_i64(REP16(0x8000));\n+    gen_subv_mask(d, a, b, m);\n+    tcg_temp_free_i64(m);\n+}\n+\n+void tcg_gen_vec_sub32(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)\n+{\n+    TCGv_i64 t1 = tcg_temp_new_i64();\n+    TCGv_i64 t2 = tcg_temp_new_i64();\n+\n+    tcg_gen_andi_i64(t1, b, ~0xffffffffull);\n+    tcg_gen_sub_i64(t2, a, b);\n+    tcg_gen_sub_i64(t1, a, t1);\n+    tcg_gen_deposit_i64(d, t1, t2, 0, 32);\n+\n+    tcg_temp_free_i64(t1);\n+    tcg_temp_free_i64(t2);\n+}\n+\n+void tcg_gen_gvec_sub8(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                       uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_vec_sub8,\n+        .fno = gen_helper_gvec_sub8,\n+        .op_v64 = INDEX_op_sub8_v64,\n+        .op_v128 = INDEX_op_sub8_v128,\n+        .op_v256 = INDEX_op_sub8_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_sub16(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                        uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_vec_sub16,\n+        .fno = gen_helper_gvec_sub16,\n+        .op_v64 = INDEX_op_sub16_v64,\n+        .op_v128 = INDEX_op_sub16_v128,\n+        .op_v256 = INDEX_op_sub16_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_sub32(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                        uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni4 = tcg_gen_sub_i32,\n+        .fno = gen_helper_gvec_sub32,\n+        .op_v64 = INDEX_op_sub32_v64,\n+        .op_v128 = INDEX_op_sub32_v128,\n+        .op_v256 = INDEX_op_sub32_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_sub64(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                        uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_sub_i64,\n+        .fno = gen_helper_gvec_sub64,\n+        .op_v128 = INDEX_op_sub64_v128,\n+        .op_v256 = INDEX_op_sub64_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_and(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                      uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_and_i64,\n+        .fno = gen_helper_gvec_and,\n+        .op_v64 = INDEX_op_and_v64,\n+        .op_v128 = INDEX_op_and_v128,\n+        .op_v256 = INDEX_op_and_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_or(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                     uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_or_i64,\n+        .fno = gen_helper_gvec_or,\n+        .op_v64 = INDEX_op_or_v64,\n+        .op_v128 = INDEX_op_or_v128,\n+        .op_v256 = INDEX_op_or_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_xor(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                      uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_xor_i64,\n+        .fno = gen_helper_gvec_xor,\n+        .op_v64 = INDEX_op_xor_v64,\n+        .op_v128 = INDEX_op_xor_v128,\n+        .op_v256 = INDEX_op_xor_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_andc(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                       uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_andc_i64,\n+        .fno = gen_helper_gvec_andc,\n+        .op_v64 = INDEX_op_andc_v64,\n+        .op_v128 = INDEX_op_andc_v128,\n+        .op_v256 = INDEX_op_andc_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\n+\n+void tcg_gen_gvec_orc(uint32_t dofs, uint32_t aofs, uint32_t bofs,\n+                      uint32_t opsz, uint32_t clsz)\n+{\n+    static const GVecGen3 g = {\n+        .fni8 = tcg_gen_orc_i64,\n+        .fno = gen_helper_gvec_orc,\n+        .op_v64 = INDEX_op_orc_v64,\n+        .op_v128 = INDEX_op_orc_v128,\n+        .op_v256 = INDEX_op_orc_v256,\n+    };\n+    tcg_gen_gvec_3(dofs, aofs, bofs, opsz, clsz, &g);\n+}\ndiff --git a/tcg/tcg-runtime-gvec.c b/tcg/tcg-runtime-gvec.c\nnew file mode 100644\nindex 0000000000..ff0f896629\n--- /dev/null\n+++ b/tcg/tcg-runtime-gvec.c\n@@ -0,0 +1,192 @@\n+/*\n+ *  Generic vectorized operation runtime\n+ *\n+ *  Copyright (c) 2017 Linaro\n+ *\n+ * This library is free software; you can redistribute it and/or\n+ * modify it under the terms of the GNU Lesser General Public\n+ * License as published by the Free Software Foundation; either\n+ * version 2 of the License, or (at your option) any later version.\n+ *\n+ * This library is distributed in the hope that it will be useful,\n+ * but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n+ * Lesser General Public License for more details.\n+ *\n+ * You should have received a copy of the GNU Lesser General Public\n+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.\n+ */\n+\n+#include \"qemu/osdep.h\"\n+#include \"qemu/host-utils.h\"\n+#include \"cpu.h\"\n+#include \"exec/helper-proto.h\"\n+#include \"tcg-gvec-desc.h\"\n+\n+\n+/* Virtually all hosts support 16-byte vectors.  Those that don't can emulate\n+   them via GCC's generic vector extension.  This turns out to be simpler and\n+   more reliable than getting the compiler to autovectorize.\n+\n+   In tcg-op-gvec.c, we asserted that both the size and alignment\n+   of the data are multiples of 16.  */\n+\n+typedef uint8_t vec8 __attribute__((vector_size(16)));\n+typedef uint16_t vec16 __attribute__((vector_size(16)));\n+typedef uint32_t vec32 __attribute__((vector_size(16)));\n+typedef uint64_t vec64 __attribute__((vector_size(16)));\n+\n+static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)\n+{\n+    intptr_t maxsz = simd_maxsz(desc);\n+    intptr_t i;\n+\n+    if (unlikely(maxsz > oprsz)) {\n+        for (i = oprsz; i < maxsz; i += sizeof(vec64)) {\n+            *(vec64 *)(d + i) = (vec64){ 0 };\n+        }\n+    }\n+}\n+\n+void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec8)) {\n+        *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec16)) {\n+        *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec32)) {\n+        *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+        *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec8)) {\n+        *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec16)) {\n+        *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec32)) {\n+        *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+        *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+        *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+        *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+        *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+        *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\n+\n+void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)\n+{\n+    intptr_t oprsz = simd_oprsz(desc);\n+    intptr_t i;\n+\n+    for (i = 0; i < oprsz; i += sizeof(vec64)) {\n+        *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);\n+    }\n+    clear_high(d, oprsz, desc);\n+}\ndiff --git a/tcg/tcg.c b/tcg/tcg.c\nindex 8fca202bec..240bcaa8d5 100644\n--- a/tcg/tcg.c\n+++ b/tcg/tcg.c\n@@ -602,7 +602,7 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,\n     return temp_idx(s, ts);\n }\n \n-static int tcg_temp_new_internal(TCGType type, int temp_local)\n+int tcg_temp_new_internal(TCGType type, int temp_local)\n {\n     TCGContext *s = &tcg_ctx;\n     TCGTemp *ts;\n@@ -664,7 +664,7 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local)\n     return MAKE_TCGV_I64(idx);\n }\n \n-static void tcg_temp_free_internal(int idx)\n+void tcg_temp_free_internal(int idx)\n {\n     TCGContext *s = &tcg_ctx;\n     TCGTemp *ts;\n",
    "prefixes": [
        "v2",
        "06/16"
    ]
}