Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2226263/?format=api
{ "id": 2226263, "url": "http://patchwork.ozlabs.org/api/patches/2226263/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/patch/bmm.hhubd62wbc.gcc.gcc-TEST.pinskia.20.1.1@forge-stage.sourceware.org/", "project": { "id": 17, "url": "http://patchwork.ozlabs.org/api/projects/17/?format=api", "name": "GNU Compiler Collection", "link_name": "gcc", "list_id": "gcc-patches.gcc.gnu.org", "list_email": "gcc-patches@gcc.gnu.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<bmm.hhubd62wbc.gcc.gcc-TEST.pinskia.20.1.1@forge-stage.sourceware.org>", "list_archive_url": null, "date": "2026-04-22T10:29:46", "name": "[v1,01/11] RFC: aarch64: Start to support v4qi modes for SLP", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "1742de785804b57c816b155843108ee236b2ca99", "submitter": { "id": 93219, "url": "http://patchwork.ozlabs.org/api/people/93219/?format=api", "name": "Andrew Pinski via Sourceware Forge", "email": "forge-bot+pinskia@forge-stage.sourceware.org" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/gcc/patch/bmm.hhubd62wbc.gcc.gcc-TEST.pinskia.20.1.1@forge-stage.sourceware.org/mbox/", "series": [ { "id": 500972, "url": "http://patchwork.ozlabs.org/api/series/500972/?format=api", "web_url": "http://patchwork.ozlabs.org/project/gcc/list/?series=500972", "date": "2026-04-22T10:29:49", "name": "WIP: v2hiv4qi", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/500972/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2226263/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2226263/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "gcc-patches@gcc.gnu.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org\n (client-ip=38.145.34.32; helo=vm01.sourceware.org;\n envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;\n receiver=patchwork.ozlabs.org)", "sourceware.org; dmarc=none (p=none dis=none)\n header.from=forge-stage.sourceware.org", "sourceware.org;\n spf=pass smtp.mailfrom=forge-stage.sourceware.org", "server2.sourceware.org;\n arc=none smtp.remote-ip=38.145.34.39" ], "Received": [ "from vm01.sourceware.org (vm01.sourceware.org [38.145.34.32])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g0wzl2N59z1yD5\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 22 Apr 2026 20:52:51 +1000 (AEST)", "from vm01.sourceware.org (localhost [127.0.0.1])\n\tby sourceware.org (Postfix) with ESMTP id DA9564422B5F\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 22 Apr 2026 10:52:48 +0000 (GMT)", "from forge-stage.sourceware.org (vm08.sourceware.org [38.145.34.39])\n by sourceware.org (Postfix) with ESMTPS id 4718048FE0BC\n for <gcc-patches@gcc.gnu.org>; Wed, 22 Apr 2026 10:31:06 +0000 (GMT)", "from forge-stage.sourceware.org (localhost [IPv6:::1])\n (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n key-exchange x25519 server-signature ECDSA (prime256v1) server-digest SHA256)\n (No client certificate requested)\n by forge-stage.sourceware.org (Postfix) with ESMTPS id 1A33D42604\n for <gcc-patches@gcc.gnu.org>; Wed, 22 Apr 2026 10:31:06 +0000 (UTC)" ], "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 sourceware.org DA9564422B5F", "OpenDKIM Filter v2.11.0 sourceware.org 4718048FE0BC" ], "DMARC-Filter": "OpenDMARC Filter v1.4.2 sourceware.org 4718048FE0BC", "ARC-Filter": "OpenARC Filter v1.0.0 sourceware.org 4718048FE0BC", "ARC-Seal": "i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1776853866; cv=none;\n b=cUS93B/9IJkx5izCzcSPCejvSSnuyz3rVeOdRx7SmqN5kkBNIqrpY2zGowRhpo2OvA/N5LN4eLjHC2Q2zqQ0jM69e84iCx8G9ojdzbYwCjCOcCf7Old1sJ0txF3HWAjBzD341YhVYdisW3s1knDXLuvkgcZZkvrn4C0UKL8lZRE=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=sourceware.org; s=key;\n t=1776853866; c=relaxed/simple;\n bh=Duj2+dFKbYodQYI3psuIV9BpY7OZmZ/UTc6j7d0hIxk=;\n h=From:Date:Subject:To:Message-ID;\n b=sKBhOEPszML7m7PKEaa0OHMFCKqo0uZDhM+oV40OTZL5eWjwdDWHmlD40zSDKCDVfq3VpOSJ1zBsQlWZpfDYXclYIJJMmaidPKIifs5uFOjuhqV6t+3ji0NCR59my3WZVUtFfbf/69xiobeLWgFDcRZdA6x6wvnLTKQm7XnkLn4=", "ARC-Authentication-Results": "i=1; server2.sourceware.org", "From": "Andrew Pinski via Sourceware Forge\n <forge-bot+pinskia@forge-stage.sourceware.org>", "Date": "Wed, 22 Apr 2026 10:29:46 +0000", "Subject": "[PATCH v1 01/11] RFC: aarch64: Start to support v4qi modes for SLP", "To": "gcc-patches mailing list <gcc-patches@gcc.gnu.org>", "Message-ID": "\n <bmm.hhubd62wbc.gcc.gcc-TEST.pinskia.20.1.1@forge-stage.sourceware.org>", "X-Mailer": "batrachomyomachia", "X-Pull-Request-Organization": "gcc", "X-Pull-Request-Repository": "gcc-TEST", "X-Pull-Request": "https://forge.sourceware.org/gcc/gcc-TEST/pulls/20", "References": "\n <bmm.hhubd62wbc.gcc.gcc-TEST.pinskia.20.1.0@forge-stage.sourceware.org>", "In-Reply-To": "\n <bmm.hhubd62wbc.gcc.gcc-TEST.pinskia.20.1.0@forge-stage.sourceware.org>", "X-Patch-URL": "\n https://forge.sourceware.org/pinskia/gcc-TEST/commit/5572fe1804f164f923e4d2fb770dd34c82f4e560", "X-BeenThere": "gcc-patches@gcc.gnu.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "Gcc-patches mailing list <gcc-patches.gcc.gnu.org>", "List-Unsubscribe": "<https://gcc.gnu.org/mailman/options/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>", "List-Archive": "<https://gcc.gnu.org/pipermail/gcc-patches/>", "List-Post": "<mailto:gcc-patches@gcc.gnu.org>", "List-Help": "<mailto:gcc-patches-request@gcc.gnu.org?subject=help>", "List-Subscribe": "<https://gcc.gnu.org/mailman/listinfo/gcc-patches>,\n <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>", "Reply-To": "gcc-patches mailing list <gcc-patches@gcc.gnu.org>,\n pinskia@gcc.gnu.org", "Errors-To": "gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org" }, "content": "From: Andrew Pinski <quic_apinski@quicinc.com>\n\nThis is the start of adding V4QI mode to the aarch64 backend to support SLP vectorization.\nCurrently we support addition, subtraction, extend, and truncate for the types.\n\nSigned-off-by: Andrew Pinski <quic_apinski@quicinc.com>\n---\n gcc/config/aarch64/aarch64-modes.def | 1 +\n gcc/config/aarch64/aarch64-simd.md | 569 +++++++++++-------\n gcc/config/aarch64/aarch64.cc | 217 ++++++-\n gcc/config/aarch64/aarch64.opt | 4 +\n gcc/config/aarch64/iterators.md | 129 ++--\n .../gcc.target/aarch64/vect_mixed_sizes_3.c | 6 +-\n .../gcc.target/aarch64/vect_mixed_sizes_6.c | 3 +-\n .../gcc.target/aarch64/vect_mixed_sizes_7.c | 3 +-\n gcc/testsuite/lib/target-supports.exp | 2 +-\n 9 files changed, 643 insertions(+), 291 deletions(-)", "diff": "diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def\nindex 25a22c1195e1..940c57db5739 100644\n--- a/gcc/config/aarch64/aarch64-modes.def\n+++ b/gcc/config/aarch64/aarch64-modes.def\n@@ -70,6 +70,7 @@ ADJUST_ALIGNMENT (VNx2BI, 2);\n FLOAT_MODE (BF, 2, 0);\n ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);\n \n+VECTOR_MODES (INT, 4); /* V4QI V2HI V1SI. */\n VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */\n VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */\n VECTOR_MODES (FLOAT, 8); /* V2SF. */\ndiff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md\nindex cfe95bd4c316..13e632550cd0 100644\n--- a/gcc/config/aarch64/aarch64-simd.md\n+++ b/gcc/config/aarch64/aarch64-simd.md\n@@ -47,8 +47,8 @@\n (define_subst_attr \"vczbe\" \"add_vec_concat_subst_be\" \"\" \"_vec_concatz_be\")\n \n (define_expand \"mov<mode>\"\n- [(set (match_operand:VALL_F16 0 \"nonimmediate_operand\")\n-\t(match_operand:VALL_F16 1 \"general_operand\"))]\n+ [(set (match_operand:VALLS_F16 0 \"nonimmediate_operand\")\n+\t(match_operand:VALLS_F16 1 \"general_operand\"))]\n \"TARGET_FLOAT\"\n \"\n /* Force the operand into a register if it is not an\n@@ -78,8 +78,8 @@\n )\n \n (define_expand \"movmisalign<mode>\"\n- [(set (match_operand:VALL_F16 0 \"nonimmediate_operand\")\n- (match_operand:VALL_F16 1 \"general_operand\"))]\n+ [(set (match_operand:VALLS_F16 0 \"nonimmediate_operand\")\n+ (match_operand:VALLS_F16 1 \"general_operand\"))]\n \"TARGET_FLOAT && !STRICT_ALIGNMENT\"\n {\n /* This pattern is not permitted to fail during expansion: if both arguments\n@@ -91,8 +91,8 @@\n })\n \n (define_insn \"aarch64_simd_dup<mode>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n-\t(vec_duplicate:VDQ_I\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+\t(vec_duplicate:VDQS_I\n \t (match_operand:<VEL> 1 \"register_operand\")))]\n \"TARGET_SIMD\"\n {@ [ cons: =0 , 1 ; attrs: type ]\n@@ -142,26 +142,26 @@\n [(set_attr \"type\" \"neon_dup<q>\")]\n )\n \n-(define_insn_and_split \"*aarch64_simd_mov<VDMOV:mode>\"\n- [(set (match_operand:VDMOV 0 \"nonimmediate_operand\")\n-\t(match_operand:VDMOV 1 \"general_operand\"))]\n+(define_insn_and_split \"*aarch64_simd_mov<VDHMOV:mode>\"\n+ [(set (match_operand:VDHMOV 0 \"nonimmediate_operand\")\n+\t(match_operand:VDHMOV 1 \"general_operand\"))]\n \"TARGET_FLOAT\n && (register_operand (operands[0], <MODE>mode)\n || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))\"\n {@ [cons: =0, 1; attrs: type, arch, length]\n- [w , m ; neon_load1_1reg<q> , * , *] ldr\\t%d0, %1\n- [r , m ; load_8 , * , *] ldr\\t%x0, %1\n- [m , Dz; store_8 , * , *] str\\txzr, %0\n- [m , w ; neon_store1_1reg<q>, * , *] str\\t%d1, %0\n- [m , r ; store_8 , * , *] str\\t%x1, %0\n+ [w , m ; neon_load1_1reg<q> , * , *] ldr\\t%<single_type>0, %1\n+ [r , m ; load_8 , * , *] ldr\\t%<single_wx>0, %1\n+ [m , Dz; store_8 , * , *] str\\t<single_wx>zr, %0\n+ [m , w ; neon_store1_1reg<q>, * , *] str\\t%<single_type>1, %0\n+ [m , r ; store_8 , * , *] str\\t%<single_wx>1, %0\n [w , w ; neon_logic<q> , simd , *] mov\\t%0.<Vbtype>, %1.<Vbtype>\n- [w , w ; neon_logic<q> , * , *] fmov\\t%d0, %d1\n- [?r, w ; neon_to_gp<q> , base_simd, *] umov\\t%0, %1.d[0]\n- [?r, w ; neon_to_gp<q> , * , *] fmov\\t%x0, %d1\n- [?w, r ; f_mcr , * , *] fmov\\t%d0, %1\n- [?r, r ; mov_reg , * , *] mov\\t%0, %1\n+ [w , w ; neon_logic<q> , * , *] fmov\\t%<single_type>0, %<single_type>1\n+ [?r, w ; neon_to_gp<q> , base_simd, *] umov\\t%<single_wx>0, %1.<single_type>[0]\n+ [?r, w ; neon_to_gp<q> , * , *] fmov\\t%<single_wx>0, %<single_type>1\n+ [?w, r ; f_mcr , * , *] fmov\\t%<single_type>0, %<single_wx>1\n+ [?r, r ; mov_reg , * , *] mov\\t%<single_wx>0, %<single_wx>1\n [w , Dn; neon_move<q> , simd , *] << aarch64_output_simd_mov_imm (operands[1], 64);\n- [w , Dz; f_mcr , * , *] fmov\\t%d0, xzr\n+ [w , Dz; f_mcr , * , *] fmov\\t%<single_type>0, xzr\n [w , Dx; neon_move , simd , 8] #\n }\n \"CONST_INT_P (operands[1])\n@@ -322,45 +322,45 @@\n )\n \n (define_insn \"iorn<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 \"register_operand\" \"w\"))\n-\t\t(match_operand:VDQ_I 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (ior:VDQ_I (not:VDQS_I (match_operand:VDQ_I 2 \"register_operand\" \"w\"))\n+\t\t(match_operand:VDQS_I 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"orn\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>\"\n [(set_attr \"type\" \"neon_logic<q>\")]\n )\n \n (define_insn \"andn<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 \"register_operand\" \"w\"))\n-\t\t(match_operand:VDQ_I 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (and:VDQ_I (not:VDQS_I (match_operand:VDQ_I 2 \"register_operand\" \"w\"))\n+\t\t(match_operand:VDQS_I 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"bic\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>\"\n [(set_attr \"type\" \"neon_logic<q>\")]\n )\n \n (define_insn \"add<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (plus:VDQ_I (match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_I 2 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (plus:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_I 2 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"add\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n [(set_attr \"type\" \"neon_add<q>\")]\n )\n \n (define_insn \"sub<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (minus:VDQ_I (match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_I 2 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (minus:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_I 2 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"sub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n [(set_attr \"type\" \"neon_sub<q>\")]\n )\n \n (define_insn \"mul<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n- (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_BHSI 2 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+ (mult:VDQS_BHSI (match_operand:VDQS_BHSI 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_BHSI 2 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n [(set_attr \"type\" \"neon_mul_<Vetype><q>\")]\n@@ -777,16 +777,16 @@\n )\n \n (define_insn \"neg<mode>2<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n-\t(neg:VDQ_I (match_operand:VDQ_I 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+\t(neg:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"neg\\t%0.<Vtype>, %1.<Vtype>\"\n [(set_attr \"type\" \"neon_neg<q>\")]\n )\n \n (define_insn \"abs<mode>2<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (abs:VDQ_I (match_operand:VDQ_I 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (abs:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"abs\\t%0.<Vtype>, %1.<Vtype>\"\n [(set_attr \"type\" \"neon_abs<q>\")]\n@@ -796,9 +796,9 @@\n ;; combine with any operation with an integrated ABS step, such\n ;; as SABD.\n (define_insn \"aarch64_abs<mode><vczle><vczbe>\"\n- [(set (match_operand:VSDQ_I_DI 0 \"register_operand\" \"=w\")\n-\t (unspec:VSDQ_I_DI\n-\t [(match_operand:VSDQ_I_DI 1 \"register_operand\" \"w\")]\n+ [(set (match_operand:VSDQS_I_DI 0 \"register_operand\" \"=w\")\n+\t (unspec:VSDQS_I_DI\n+\t [(match_operand:VSDQS_I_DI 1 \"register_operand\" \"w\")]\n \t UNSPEC_ABS))]\n \"TARGET_SIMD\"\n \"abs\\t%<v>0<Vmtype>, %<v>1<Vmtype>\"\n@@ -811,12 +811,12 @@\n ;; Whereas SABD would return 192 (-64 signed) on the above example.\n ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.\n (define_insn \"aarch64_<su>abd<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n-\t(minus:VDQ_BHSI\n-\t (USMAX:VDQ_BHSI\n-\t (match_operand:VDQ_BHSI 1 \"register_operand\" \"w\")\n-\t (match_operand:VDQ_BHSI 2 \"register_operand\" \"w\"))\n-\t (<max_opp>:VDQ_BHSI\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+\t(minus:VDQS_BHSI\n+\t (USMAX:VDQS_BHSI\n+\t (match_operand:VDQS_BHSI 1 \"register_operand\" \"w\")\n+\t (match_operand:VDQS_BHSI 2 \"register_operand\" \"w\"))\n+\t (<max_opp>:VDQS_BHSI\n \t (match_dup 1)\n \t (match_dup 2))))]\n \"TARGET_SIMD\"\n@@ -825,10 +825,10 @@\n )\n \n (define_expand \"<su>abd<mode>3\"\n- [(match_operand:VDQ_BHSI 0 \"register_operand\")\n- (USMAX:VDQ_BHSI\n- (match_operand:VDQ_BHSI 1 \"register_operand\")\n- (match_operand:VDQ_BHSI 2 \"register_operand\"))]\n+ [(match_operand:VDQS_BHSI 0 \"register_operand\")\n+ (USMAX:VDQS_BHSI\n+ (match_operand:VDQS_BHSI 1 \"register_operand\")\n+ (match_operand:VDQS_BHSI 2 \"register_operand\"))]\n \"TARGET_SIMD\"\n {\n emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));\n@@ -1092,15 +1092,15 @@\n )\n \n (define_insn \"aarch64_<su>aba<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n-\t(plus:VDQ_BHSI (minus:VDQ_BHSI\n-\t\t\t (USMAX:VDQ_BHSI\n-\t\t\t (match_operand:VDQ_BHSI 2 \"register_operand\" \"w\")\n-\t\t\t (match_operand:VDQ_BHSI 3 \"register_operand\" \"w\"))\n-\t\t\t (<max_opp>:VDQ_BHSI\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+\t(plus:VDQS_BHSI (minus:VDQS_BHSI\n+\t\t\t (USMAX:VDQS_BHSI\n+\t\t\t (match_operand:VDQS_BHSI 2 \"register_operand\" \"w\")\n+\t\t\t (match_operand:VDQS_BHSI 3 \"register_operand\" \"w\"))\n+\t\t\t (<max_opp>:VDQS_BHSI\n \t\t\t (match_dup 2)\n \t\t\t (match_dup 3)))\n-\t\t (match_operand:VDQ_BHSI 1 \"register_operand\" \"0\")))]\n+\t\t (match_operand:VDQS_BHSI 1 \"register_operand\" \"0\")))]\n \"TARGET_SIMD\"\n \"<su>aba\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>\"\n [(set_attr \"type\" \"neon_arith_acc<q>\")]\n@@ -1119,9 +1119,9 @@\n \n ;; For AND (vector, register) and BIC (vector, immediate)\n (define_insn \"and<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n-\t(and:VDQ_I (match_operand:VDQ_I 1 \"register_operand\")\n-\t\t (match_operand:VDQ_I 2 \"aarch64_reg_or_and_imm\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+\t(and:VDQS_I (match_operand:VDQS_I 1 \"register_operand\")\n+\t\t (match_operand:VDQS_I 2 \"aarch64_reg_or_and_imm\")))]\n \"TARGET_SIMD\"\n {@ [ cons: =0 , 1 , 2 ]\n [ w , w , w ] and\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>\n@@ -1132,9 +1132,9 @@\n \n ;; For ORR (vector, register) and ORR (vector, immediate)\n (define_insn \"ior<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n-\t(ior:VDQ_I (match_operand:VDQ_I 1 \"register_operand\")\n-\t\t (match_operand:VDQ_I 2 \"aarch64_reg_or_orr_imm\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+\t(ior:VDQS_I (match_operand:VDQS_I 1 \"register_operand\")\n+\t\t (match_operand:VDQS_I 2 \"aarch64_reg_or_orr_imm\")))]\n \"TARGET_SIMD\"\n {@ [ cons: =0 , 1 , 2 ]\n [ w , w , w ] orr\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>\n@@ -1145,9 +1145,9 @@\n \n ;; For EOR (vector, register) and SVE EOR (vector, immediate)\n (define_insn \"xor<mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n- (xor:VDQ_I (match_operand:VDQ_I 1 \"register_operand\")\n- (match_operand:VDQ_I 2 \"aarch64_reg_or_xor_imm\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (xor:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_I 2 \"aarch64_reg_or_xor_imm\" \"w\")))]\n \"TARGET_SIMD\"\n {@ [ cons: =0 , 1 , 2 ]\n [ w , w , w ] eor\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>\n@@ -1157,19 +1157,19 @@\n )\n \n (define_insn \"one_cmpl<mode>2<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (not:VDQ_I (match_operand:VDQ_I 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (not:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"not\\t%0.<Vbtype>, %1.<Vbtype>\"\n [(set_attr \"type\" \"neon_logic<q>\")]\n )\n \n (define_insn \"aarch64_simd_vec_set<mode>\"\n- [(set (match_operand:VALL_F16 0 \"register_operand\" \"=w,w,w\")\n-\t(vec_merge:VALL_F16\n-\t (vec_duplicate:VALL_F16\n+ [(set (match_operand:VALLS_F16 0 \"register_operand\" \"=w,w,w\")\n+\t(vec_merge:VALLS_F16\n+\t (vec_duplicate:VALLS_F16\n \t\t(match_operand:<VEL> 1 \"aarch64_simd_nonimmediate_operand\" \"w,?r,Utv\"))\n-\t (match_operand:VALL_F16 3 \"register_operand\" \"0,0,0\")\n+\t (match_operand:VALLS_F16 3 \"register_operand\" \"0,0,0\")\n \t (match_operand:SI 2 \"immediate_operand\" \"i,i,i\")))]\n \"TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0\"\n {\n@@ -1191,10 +1191,10 @@\n )\n \n (define_insn \"aarch64_simd_vec_set_zero<mode>\"\n- [(set (match_operand:VALL_F16 0 \"register_operand\" \"=w\")\n-\t(vec_merge:VALL_F16\n-\t (match_operand:VALL_F16 1 \"aarch64_simd_imm_zero\" \"\")\n-\t (match_operand:VALL_F16 3 \"register_operand\" \"0\")\n+ [(set (match_operand:VALLS_F16 0 \"register_operand\" \"=w\")\n+\t(vec_merge:VALLS_F16\n+\t (match_operand:VALLS_F16 1 \"aarch64_simd_imm_zero\" \"\")\n+\t (match_operand:VALLS_F16 3 \"register_operand\" \"0\")\n \t (match_operand:SI 2 \"immediate_operand\" \"i\")))]\n \"TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0\"\n {\n@@ -1205,14 +1205,14 @@\n )\n \n (define_insn \"@aarch64_simd_vec_copy_lane<mode>\"\n- [(set (match_operand:VALL_F16 0 \"register_operand\" \"=w\")\n-\t(vec_merge:VALL_F16\n-\t (vec_duplicate:VALL_F16\n+ [(set (match_operand:VALLS_F16 0 \"register_operand\" \"=w\")\n+\t(vec_merge:VALLS_F16\n+\t (vec_duplicate:VALLS_F16\n \t (vec_select:<VEL>\n-\t\t(match_operand:VALL_F16 3 \"register_operand\" \"w\")\n+\t\t(match_operand:VALLS_F16 3 \"register_operand\" \"w\")\n \t\t(parallel\n \t\t [(match_operand:SI 4 \"immediate_operand\" \"i\")])))\n-\t (match_operand:VALL_F16 1 \"register_operand\" \"0\")\n+\t (match_operand:VALLS_F16 1 \"register_operand\" \"0\")\n \t (match_operand:SI 2 \"immediate_operand\" \"i\")))]\n \"TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0\"\n {\n@@ -1263,18 +1263,18 @@\n })\n \n (define_insn \"aarch64_simd_lshr<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (lshiftrt:VDQ_I (match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_I 2 \"aarch64_simd_rshift_imm\" \"Dr\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (lshiftrt:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_I 2 \"aarch64_simd_rshift_imm\" \"Dr\")))]\n \"TARGET_SIMD\"\n \"ushr\\t%0.<Vtype>, %1.<Vtype>, %2\"\n [(set_attr \"type\" \"neon_shift_imm<q>\")]\n )\n \n (define_insn \"aarch64_simd_ashr<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n- (ashiftrt:VDQ_I (match_operand:VDQ_I 1 \"register_operand\")\n-\t\t (match_operand:VDQ_I 2 \"aarch64_simd_rshift_imm\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+ (ashiftrt:VDQS_I (match_operand:VDQS_I 1 \"register_operand\")\n+\t\t (match_operand:VDQS_I 2 \"aarch64_simd_rshift_imm\")))]\n \"TARGET_SIMD\"\n {@ [ cons: =0 , 1 , 2 ; attrs: type ]\n [ w , w , D1 ; neon_compare<q> ] cmlt\\t%0.<Vtype>, %1.<Vtype>, #0\n@@ -1283,12 +1283,12 @@\n )\n \n (define_insn \"aarch64_<sra_op>sra_n<mode>_insn\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n-\t(plus:VDQ_I\n-\t (SHIFTRT:VDQ_I\n-\t\t(match_operand:VDQ_I 2 \"register_operand\" \"w\")\n-\t\t(match_operand:VDQ_I 3 \"aarch64_simd_rshift_imm\"))\n-\t (match_operand:VDQ_I 1 \"register_operand\" \"0\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+\t(plus:VDQS_I\n+\t (SHIFTRT:VDQS_I\n+\t\t(match_operand:VDQS_I 2 \"register_operand\" \"w\")\n+\t\t(match_operand:VDQS_I 3 \"aarch64_simd_rshift_imm\"))\n+\t (match_operand:VDQS_I 1 \"register_operand\" \"0\")))]\n \"TARGET_SIMD\"\n \"<sra_op>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\"\n [(set_attr \"type\" \"neon_shift_acc<q>\")]\n@@ -1346,12 +1346,12 @@\n )\n \n (define_expand \"aarch64_<sra_op>sra_n<mode>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n-\t(plus:VDQ_I\n-\t (SHIFTRT:VDQ_I\n-\t\t(match_operand:VDQ_I 2 \"register_operand\")\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+\t(plus:VDQS_I\n+\t (SHIFTRT:VDQS_I\n+\t\t(match_operand:VDQS_I 2 \"register_operand\")\n \t\t(match_operand:SI 3 \"aarch64_simd_shift_imm_offset_<ve_mode>\"))\n-\t (match_operand:VDQ_I 1 \"register_operand\")))]\n+\t (match_operand:VDQS_I 1 \"register_operand\")))]\n \"TARGET_SIMD\"\n {\n operands[3]\n@@ -1387,9 +1387,9 @@\n )\n \n (define_insn \"aarch64_simd_imm_shl<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n- (ashift:VDQ_I (match_operand:VDQ_I 1 \"register_operand\")\n-\t\t (match_operand:VDQ_I 2 \"aarch64_simd_lshift_imm\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+ (ashift:VDQS_I (match_operand:VDQS_I 1 \"register_operand\")\n+\t\t (match_operand:VDQS_I 2 \"aarch64_simd_lshift_imm\")))]\n \"TARGET_SIMD\"\n {@ [ cons: =0, 1, 2 ; attrs: type ]\n [ w , w, vs1 ; neon_add<q> ] add\\t%0.<Vtype>, %1.<Vtype>, %1.<Vtype>\n@@ -1398,18 +1398,18 @@\n )\n \n (define_insn \"aarch64_simd_reg_sshl<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (ashift:VDQ_I (match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_I 2 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (ashift:VDQS_I (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_I 2 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"sshl\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n [(set_attr \"type\" \"neon_shift_reg<q>\")]\n )\n \n (define_insn \"aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (unspec:VDQ_I [(match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_I 2 \"register_operand\" \"w\")]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (unspec:VDQS_I [(match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_I 2 \"register_operand\" \"w\")]\n \t\t UNSPEC_ASHIFT_UNSIGNED))]\n \"TARGET_SIMD\"\n \"ushl\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n@@ -1417,9 +1417,9 @@\n )\n \n (define_insn \"aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\" \"=w\")\n- (unspec:VDQ_I [(match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_I 2 \"register_operand\" \"w\")]\n+ [(set (match_operand:VDQS_I 0 \"register_operand\" \"=w\")\n+ (unspec:VDQS_I [(match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_I 2 \"register_operand\" \"w\")]\n \t\t UNSPEC_ASHIFT_SIGNED))]\n \"TARGET_SIMD\"\n \"sshl\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n@@ -1427,8 +1427,8 @@\n )\n \n (define_expand \"ashl<mode>3\"\n- [(match_operand:VDQ_I 0 \"register_operand\")\n- (match_operand:VDQ_I 1 \"register_operand\")\n+ [(match_operand:VDQS_I 0 \"register_operand\")\n+ (match_operand:VDQS_I 1 \"register_operand\")\n (match_operand:SI 2 \"general_operand\")]\n \"TARGET_SIMD\"\n {\n@@ -1460,8 +1460,8 @@\n })\n \n (define_expand \"lshr<mode>3\"\n- [(match_operand:VDQ_I 0 \"register_operand\")\n- (match_operand:VDQ_I 1 \"register_operand\")\n+ [(match_operand:VDQS_I 0 \"register_operand\")\n+ (match_operand:VDQS_I 1 \"register_operand\")\n (match_operand:SI 2 \"general_operand\")]\n \"TARGET_SIMD\"\n {\n@@ -1495,8 +1495,8 @@\n })\n \n (define_expand \"ashr<mode>3\"\n- [(match_operand:VDQ_I 0 \"register_operand\")\n- (match_operand:VDQ_I 1 \"register_operand\")\n+ [(match_operand:VDQS_I 0 \"register_operand\")\n+ (match_operand:VDQS_I 1 \"register_operand\")\n (match_operand:SI 2 \"general_operand\")]\n \"TARGET_SIMD\"\n {\n@@ -1530,9 +1530,9 @@\n })\n \n (define_expand \"vashl<mode>3\"\n- [(match_operand:VDQ_I 0 \"register_operand\")\n- (match_operand:VDQ_I 1 \"register_operand\")\n- (match_operand:VDQ_I 2 \"register_operand\")]\n+ [(match_operand:VDQS_I 0 \"register_operand\")\n+ (match_operand:VDQS_I 1 \"register_operand\")\n+ (match_operand:VDQS_I 2 \"register_operand\")]\n \"TARGET_SIMD\"\n {\n emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],\n@@ -1541,9 +1541,9 @@\n })\n \n (define_expand \"vashr<mode>3\"\n- [(match_operand:VDQ_I 0 \"register_operand\")\n- (match_operand:VDQ_I 1 \"register_operand\")\n- (match_operand:VDQ_I 2 \"register_operand\")]\n+ [(match_operand:VDQS_I 0 \"register_operand\")\n+ (match_operand:VDQS_I 1 \"register_operand\")\n+ (match_operand:VDQS_I 2 \"register_operand\")]\n \"TARGET_SIMD\"\n {\n rtx neg = gen_reg_rtx (<MODE>mode);\n@@ -1571,9 +1571,9 @@\n )\n \n (define_expand \"vlshr<mode>3\"\n- [(match_operand:VDQ_I 0 \"register_operand\")\n- (match_operand:VDQ_I 1 \"register_operand\")\n- (match_operand:VDQ_I 2 \"register_operand\")]\n+ [(match_operand:VDQS_I 0 \"register_operand\")\n+ (match_operand:VDQS_I 1 \"register_operand\")\n+ (match_operand:VDQS_I 2 \"register_operand\")]\n \"TARGET_SIMD\"\n {\n rtx neg = gen_reg_rtx (<MODE>mode);\n@@ -1614,7 +1614,7 @@\n )\n \n (define_expand \"vec_set<mode>\"\n- [(match_operand:VALL_F16 0 \"register_operand\")\n+ [(match_operand:VALLS_F16 0 \"register_operand\")\n (match_operand:<VEL> 1 \"aarch64_simd_nonimmediate_operand\")\n (match_operand:SI 2 \"immediate_operand\")]\n \"TARGET_SIMD\"\n@@ -1628,11 +1628,11 @@\n \n \n (define_insn \"aarch64_mla<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n- (plus:VDQ_BHSI (mult:VDQ_BHSI\n-\t\t\t(match_operand:VDQ_BHSI 2 \"register_operand\" \"w\")\n-\t\t\t(match_operand:VDQ_BHSI 3 \"register_operand\" \"w\"))\n-\t\t (match_operand:VDQ_BHSI 1 \"register_operand\" \"0\")))]\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+ (plus:VDQS_BHSI (mult:VDQS_BHSI\n+\t\t\t(match_operand:VDQS_BHSI 2 \"register_operand\" \"w\")\n+\t\t\t(match_operand:VDQS_BHSI 3 \"register_operand\" \"w\"))\n+\t\t (match_operand:VDQS_BHSI 1 \"register_operand\" \"0\")))]\n \"TARGET_SIMD\"\n \"mla\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>\"\n [(set_attr \"type\" \"neon_mla_<Vetype><q>\")]\n@@ -1688,10 +1688,10 @@\n )\n \n (define_insn \"aarch64_mls<mode><vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n- (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 \"register_operand\" \"0\")\n-\t\t (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 \"register_operand\" \"w\")\n-\t\t\t (match_operand:VDQ_BHSI 3 \"register_operand\" \"w\"))))]\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+ (minus:VDQS_BHSI (match_operand:VDQS_BHSI 1 \"register_operand\" \"0\")\n+\t\t (mult:VDQS_BHSI (match_operand:VDQS_BHSI 2 \"register_operand\" \"w\")\n+\t\t\t (match_operand:VDQS_BHSI 3 \"register_operand\" \"w\"))))]\n \"TARGET_SIMD\"\n \"mls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>\"\n [(set_attr \"type\" \"neon_mla_<Vetype><q>\")]\n@@ -1748,9 +1748,9 @@\n \n ;; Max/Min operations.\n (define_insn \"<su><maxmin><mode>3<vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n- (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 \"register_operand\" \"w\")\n-\t\t (match_operand:VDQ_BHSI 2 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+ (MAXMIN:VDQS_BHSI (match_operand:VDQS_BHSI 1 \"register_operand\" \"w\")\n+\t\t (match_operand:VDQS_BHSI 2 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"<su><maxmin>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n [(set_attr \"type\" \"neon_minmax<q>\")]\n@@ -3526,24 +3526,24 @@\n )\n \n (define_insn \"clrsb<mode>2<vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n- (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+ (clrsb:VDQS_BHSI (match_operand:VDQS_BHSI 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"cls\\\\t%0.<Vtype>, %1.<Vtype>\"\n [(set_attr \"type\" \"neon_cls<q>\")]\n )\n \n (define_insn \"clz<mode>2<vczle><vczbe>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n- (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+ (clz:VDQS_BHSI (match_operand:VDQS_BHSI 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"clz\\\\t%0.<Vtype>, %1.<Vtype>\"\n [(set_attr \"type\" \"neon_cls<q>\")]\n )\n \n (define_insn \"popcount<mode>2<vczle><vczbe>\"\n- [(set (match_operand:VB 0 \"register_operand\" \"=w\")\n- (popcount:VB (match_operand:VB 1 \"register_operand\" \"w\")))]\n+ [(set (match_operand:VB_WS 0 \"register_operand\" \"=w\")\n+ (popcount:VB_WS (match_operand:VB_WS 1 \"register_operand\" \"w\")))]\n \"TARGET_SIMD\"\n \"cnt\\\\t%0.<Vbtype>, %1.<Vbtype>\"\n [(set_attr \"type\" \"neon_cnt<q>\")]\n@@ -3701,13 +3701,13 @@\n ;; in *aarch64_simd_bsl<mode>_alt.\n \n (define_insn \"aarch64_simd_bsl<mode>_internal<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n-\t(xor:VDQ_I\n-\t (and:VDQ_I\n-\t (xor:VDQ_I\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+\t(xor:VDQS_I\n+\t (and:VDQS_I\n+\t (xor:VDQS_I\n \t (match_operand:<V_INT_EQUIV> 3 \"register_operand\")\n-\t (match_operand:VDQ_I 2 \"register_operand\"))\n-\t (match_operand:VDQ_I 1 \"register_operand\"))\n+\t (match_operand:VDQS_I 2 \"register_operand\"))\n+\t (match_operand:VDQS_I 1 \"register_operand\"))\n \t (match_dup:<V_INT_EQUIV> 3)\n \t))]\n \"TARGET_SIMD\"\n@@ -3726,13 +3726,13 @@\n ;; permutations of commutative operations, we have to have a separate pattern.\n \n (define_insn \"*aarch64_simd_bsl<mode>_alt<vczle><vczbe>\"\n- [(set (match_operand:VDQ_I 0 \"register_operand\")\n-\t(xor:VDQ_I\n-\t (and:VDQ_I\n-\t (xor:VDQ_I\n-\t (match_operand:VDQ_I 3 \"register_operand\")\n+ [(set (match_operand:VDQS_I 0 \"register_operand\")\n+\t(xor:VDQS_I\n+\t (and:VDQS_I\n+\t (xor:VDQS_I\n+\t (match_operand:VDQS_I 3 \"register_operand\")\n \t (match_operand:<V_INT_EQUIV> 2 \"register_operand\"))\n-\t (match_operand:VDQ_I 1 \"register_operand\"))\n+\t (match_operand:VDQS_I 1 \"register_operand\"))\n \t (match_dup:<V_INT_EQUIV> 2)))]\n \"TARGET_SIMD\"\n {@ [ cons: =0 , 1 , 2 , 3 ]\n@@ -3836,10 +3836,10 @@\n )\n \n (define_expand \"aarch64_simd_bsl<mode>\"\n- [(match_operand:VALLDIF 0 \"register_operand\")\n+ [(match_operand:VALLSDIF 0 \"register_operand\")\n (match_operand:<V_INT_EQUIV> 1 \"register_operand\")\n- (match_operand:VALLDIF 2 \"register_operand\")\n- (match_operand:VALLDIF 3 \"register_operand\")]\n+ (match_operand:VALLSDIF 2 \"register_operand\")\n+ (match_operand:VALLSDIF 3 \"register_operand\")]\n \"TARGET_SIMD\"\n {\n /* We can't alias operands together if they have different modes. */\n@@ -3862,9 +3862,9 @@\n })\n \n (define_expand \"vcond_mask_<mode><v_int_equiv>\"\n- [(match_operand:VALLDI 0 \"register_operand\")\n- (match_operand:VALLDI 1 \"nonmemory_operand\")\n- (match_operand:VALLDI 2 \"nonmemory_operand\")\n+ [(match_operand:VALLSDI 0 \"register_operand\")\n+ (match_operand:VALLSDI 1 \"nonmemory_operand\")\n+ (match_operand:VALLSDI 2 \"nonmemory_operand\")\n (match_operand:<V_INT_EQUIV> 3 \"register_operand\")]\n \"TARGET_SIMD\"\n {\n@@ -3896,8 +3896,8 @@\n [(set (pc)\n (if_then_else\n (match_operator 0 \"aarch64_equality_operator\"\n- [(match_operand:VDQ_I 1 \"register_operand\")\n- (match_operand:VDQ_I 2 \"aarch64_simd_reg_or_zero\")])\n+ [(match_operand:VDQS_I 1 \"register_operand\")\n+ (match_operand:VDQS_I 2 \"aarch64_simd_reg_or_zero\")])\n (label_ref (match_operand 3 \"\"))\n (pc)))]\n \"TARGET_SIMD\"\n@@ -3922,12 +3922,15 @@\n emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));\n emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));\n }\n+ auto mode = DImode;\n+ if (known_eq (32, GET_MODE_BITSIZE (<MODE>mode)))\n+ mode = SImode;\n \n- rtx val = gen_reg_rtx (DImode);\n- emit_move_insn (val, gen_lowpart (DImode, tmp));\n+ rtx val = gen_reg_rtx (mode);\n+ emit_move_insn (val, gen_lowpart (mode, tmp));\n \n rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);\n- rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);\n+ rtx cmp_rtx = gen_rtx_fmt_ee (code, mode, cc_reg, const0_rtx);\n emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));\n DONE;\n })\n@@ -3935,10 +3938,10 @@\n ;; Patterns comparing two vectors to produce a mask.\n \n (define_expand \"vec_cmp<mode><mode>\"\n- [(set (match_operand:VSDQ_I_DI 0 \"register_operand\")\n+ [(set (match_operand:VSDQS_I_DI 0 \"register_operand\")\n \t (match_operator 1 \"comparison_operator\"\n-\t [(match_operand:VSDQ_I_DI 2 \"register_operand\")\n-\t (match_operand:VSDQ_I_DI 3 \"nonmemory_operand\")]))]\n+\t [(match_operand:VSDQS_I_DI 2 \"register_operand\")\n+\t (match_operand:VSDQS_I_DI 3 \"nonmemory_operand\")]))]\n \"TARGET_SIMD\"\n {\n rtx mask = operands[0];\n@@ -4188,10 +4191,10 @@\n })\n \n (define_expand \"vec_cmpu<mode><mode>\"\n- [(set (match_operand:VSDQ_I_DI 0 \"register_operand\")\n+ [(set (match_operand:VSDQS_I_DI 0 \"register_operand\")\n \t (match_operator 1 \"comparison_operator\"\n-\t [(match_operand:VSDQ_I_DI 2 \"register_operand\")\n-\t (match_operand:VSDQ_I_DI 3 \"nonmemory_operand\")]))]\n+\t [(match_operand:VSDQS_I_DI 2 \"register_operand\")\n+\t (match_operand:VSDQS_I_DI 3 \"nonmemory_operand\")]))]\n \"TARGET_SIMD\"\n {\n emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],\n@@ -4200,13 +4203,13 @@\n })\n \n (define_expand \"vcond<mode><mode>\"\n- [(set (match_operand:VALLDI 0 \"register_operand\")\n-\t(if_then_else:VALLDI\n+ [(set (match_operand:VALLSDI 0 \"register_operand\")\n+\t(if_then_else:VALLSDI\n \t (match_operator 3 \"comparison_operator\"\n-\t [(match_operand:VALLDI 4 \"register_operand\")\n-\t (match_operand:VALLDI 5 \"nonmemory_operand\")])\n-\t (match_operand:VALLDI 1 \"nonmemory_operand\")\n-\t (match_operand:VALLDI 2 \"nonmemory_operand\")))]\n+\t [(match_operand:VALLSDI 4 \"register_operand\")\n+\t (match_operand:VALLSDI 5 \"nonmemory_operand\")])\n+\t (match_operand:VALLSDI 1 \"nonmemory_operand\")\n+\t (match_operand:VALLSDI 2 \"nonmemory_operand\")))]\n \"TARGET_SIMD\"\n {\n rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);\n@@ -4360,7 +4363,7 @@\n (define_insn_and_split \"aarch64_get_lane<mode>\"\n [(set (match_operand:<VEL> 0 \"aarch64_simd_nonimmediate_operand\" \"=?r, w, Utv\")\n \t(vec_select:<VEL>\n-\t (match_operand:VALL_F16 1 \"register_operand\" \"w, w, w\")\n+\t (match_operand:VALLS_F16 1 \"register_operand\" \"w, w, w\")\n \t (parallel [(match_operand:SI 2 \"immediate_operand\" \"i, i, i\")])))]\n \"TARGET_SIMD\"\n {\n@@ -4859,14 +4862,14 @@\n ;; <su><r>h<addsub>.\n \n (define_expand \"<su_optab>avg<mode>3_floor\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\")\n-\t(truncate:VDQ_BHSI\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\")\n+\t(truncate:VDQS_BHSI\n \t (ashiftrt:<V2XWIDE>\n \t (plus:<V2XWIDE>\n \t (ANY_EXTEND:<V2XWIDE>\n-\t\t(match_operand:VDQ_BHSI 1 \"register_operand\"))\n+\t\t(match_operand:VDQS_BHSI 1 \"register_operand\"))\n \t (ANY_EXTEND:<V2XWIDE>\n-\t\t(match_operand:VDQ_BHSI 2 \"register_operand\")))\n+\t\t(match_operand:VDQS_BHSI 2 \"register_operand\")))\n \t (match_dup 3))))]\n \"TARGET_SIMD\"\n {\n@@ -4875,15 +4878,15 @@\n )\n \n (define_expand \"<su_optab>avg<mode>3_ceil\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\")\n-\t(truncate:VDQ_BHSI\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\")\n+\t(truncate:VDQS_BHSI\n \t (ashiftrt:<V2XWIDE>\n \t (plus:<V2XWIDE>\n \t (plus:<V2XWIDE>\n \t\t(ANY_EXTEND:<V2XWIDE>\n-\t\t (match_operand:VDQ_BHSI 1 \"register_operand\"))\n+\t\t (match_operand:VDQS_BHSI 1 \"register_operand\"))\n \t\t(ANY_EXTEND:<V2XWIDE>\n-\t\t (match_operand:VDQ_BHSI 2 \"register_operand\")))\n+\t\t (match_operand:VDQS_BHSI 2 \"register_operand\")))\n \t (match_dup 3))\n \t (match_dup 3))))]\n \"TARGET_SIMD\"\n@@ -4893,14 +4896,14 @@\n )\n \n (define_expand \"aarch64_<su>hsub<mode>\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\")\n-\t(truncate:VDQ_BHSI\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\")\n+\t(truncate:VDQS_BHSI\n \t (ashiftrt:<V2XWIDE>\n \t (minus:<V2XWIDE>\n \t (ANY_EXTEND:<V2XWIDE>\n-\t\t(match_operand:VDQ_BHSI 1 \"register_operand\"))\n+\t\t(match_operand:VDQS_BHSI 1 \"register_operand\"))\n \t (ANY_EXTEND:<V2XWIDE>\n-\t\t(match_operand:VDQ_BHSI 2 \"register_operand\")))\n+\t\t(match_operand:VDQS_BHSI 2 \"register_operand\")))\n \t (match_dup 3))))]\n \"TARGET_SIMD\"\n {\n@@ -4909,14 +4912,14 @@\n )\n \n (define_insn \"*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n-\t(truncate:VDQ_BHSI\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+\t(truncate:VDQS_BHSI\n \t (ashiftrt:<V2XWIDE>\n \t (ADDSUB:<V2XWIDE>\n \t (ANY_EXTEND:<V2XWIDE>\n-\t\t(match_operand:VDQ_BHSI 1 \"register_operand\" \"w\"))\n+\t\t(match_operand:VDQS_BHSI 1 \"register_operand\" \"w\"))\n \t (ANY_EXTEND:<V2XWIDE>\n-\t\t(match_operand:VDQ_BHSI 2 \"register_operand\" \"w\")))\n+\t\t(match_operand:VDQS_BHSI 2 \"register_operand\" \"w\")))\n \t (match_operand:<V2XWIDE> 3 \"aarch64_simd_imm_one\"))))]\n \"TARGET_SIMD\"\n \"<su>h<ADDSUB:optab>\\\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n@@ -4924,15 +4927,15 @@\n )\n \n (define_insn \"*aarch64_<su>rhadd<mode><vczle><vczbe>_insn\"\n- [(set (match_operand:VDQ_BHSI 0 \"register_operand\" \"=w\")\n-\t(truncate:VDQ_BHSI\n+ [(set (match_operand:VDQS_BHSI 0 \"register_operand\" \"=w\")\n+\t(truncate:VDQS_BHSI\n \t (ashiftrt:<V2XWIDE>\n \t (plus:<V2XWIDE>\n \t (plus:<V2XWIDE>\n \t\t(ANY_EXTEND:<V2XWIDE>\n-\t\t (match_operand:VDQ_BHSI 1 \"register_operand\" \"w\"))\n+\t\t (match_operand:VDQS_BHSI 1 \"register_operand\" \"w\"))\n \t\t(ANY_EXTEND:<V2XWIDE>\n-\t\t (match_operand:VDQ_BHSI 2 \"register_operand\" \"w\")))\n+\t\t (match_operand:VDQS_BHSI 2 \"register_operand\" \"w\")))\n \t (match_operand:<V2XWIDE> 3 \"aarch64_simd_imm_one\"))\n \t (match_dup 3))))]\n \"TARGET_SIMD\"\n@@ -7093,8 +7096,8 @@\n [(set (match_operand:<V_INT_EQUIV> 0 \"register_operand\")\n \t(neg:<V_INT_EQUIV>\n \t (COMPARISONS:<V_INT_EQUIV>\n-\t (match_operand:VDQ_I 1 \"register_operand\")\n-\t (match_operand:VDQ_I 2 \"aarch64_simd_reg_or_zero\")\n+\t (match_operand:VDQS_I 1 \"register_operand\")\n+\t (match_operand:VDQS_I 2 \"aarch64_simd_reg_or_zero\")\n \t )))]\n \"TARGET_SIMD\"\n {@ [ cons: =0 , 1 , 2 ; attrs: type ]\n@@ -7158,8 +7161,8 @@\n [(set (match_operand:<V_INT_EQUIV> 0 \"register_operand\" \"=w\")\n \t(neg:<V_INT_EQUIV>\n \t (UCOMPARISONS:<V_INT_EQUIV>\n-\t (match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t (match_operand:VDQ_I 2 \"register_operand\" \"w\")\n+\t (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t (match_operand:VDQS_I 2 \"register_operand\" \"w\")\n \t )))]\n \"TARGET_SIMD\"\n \"cm<n_optab>\\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>\"\n@@ -7225,10 +7228,10 @@\n [(set (match_operand:<V_INT_EQUIV> 0 \"register_operand\" \"=w\")\n \t(plus:<V_INT_EQUIV>\n \t (eq:<V_INT_EQUIV>\n-\t (and:VDQ_I\n-\t (match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t (match_operand:VDQ_I 2 \"register_operand\" \"w\"))\n-\t (match_operand:VDQ_I 3 \"aarch64_simd_imm_zero\"))\n+\t (and:VDQS_I\n+\t (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t (match_operand:VDQS_I 2 \"register_operand\" \"w\"))\n+\t (match_operand:VDQS_I 3 \"aarch64_simd_imm_zero\"))\n \t (match_operand:<V_INT_EQUIV> 4 \"aarch64_simd_imm_minus_one\")))\n ]\n \"TARGET_SIMD\"\n@@ -7244,8 +7247,8 @@\n [(set (match_operand:<V_INT_EQUIV> 0 \"register_operand\" \"=w\")\n \t(plus:<V_INT_EQUIV>\n \t (eq:<V_INT_EQUIV>\n-\t (match_operand:VDQ_I 1 \"register_operand\" \"w\")\n-\t (match_operand:VDQ_I 2 \"aarch64_simd_imm_zero\"))\n+\t (match_operand:VDQS_I 1 \"register_operand\" \"w\")\n+\t (match_operand:VDQS_I 2 \"aarch64_simd_imm_zero\"))\n \t (match_operand:<V_INT_EQUIV> 3 \"aarch64_simd_imm_minus_one\")))\n ]\n \"TARGET_SIMD\"\n@@ -8462,10 +8465,10 @@\n ;; vec_perm support\n \n (define_expand \"vec_perm<mode>\"\n- [(match_operand:VB 0 \"register_operand\")\n- (match_operand:VB 1 \"register_operand\")\n- (match_operand:VB 2 \"register_operand\")\n- (match_operand:VB 3 \"register_operand\")]\n+ [(match_operand:VB_WS 0 \"register_operand\")\n+ (match_operand:VB_WS 1 \"register_operand\")\n+ (match_operand:VB_WS 2 \"register_operand\")\n+ (match_operand:VB_WS 3 \"register_operand\")]\n \"TARGET_SIMD\"\n {\n aarch64_expand_vec_perm (operands[0], operands[1],\n@@ -8749,7 +8752,7 @@\n ;; Standard pattern name vec_init<mode><Vel>.\n \n (define_expand \"vec_init<mode><Vel>\"\n- [(match_operand:VALL_F16 0 \"register_operand\")\n+ [(match_operand:VALLS_F16 0 \"register_operand\")\n (match_operand 1 \"\" \"\")]\n \"TARGET_SIMD\"\n {\n@@ -8766,9 +8769,18 @@\n DONE;\n })\n \n+(define_expand \"vec_init<mode><Vhalf>\"\n+ [(match_operand:VD_NO2E 0 \"register_operand\")\n+ (match_operand 1 \"\" \"\")]\n+ \"TARGET_SIMD\"\n+{\n+ aarch64_expand_vector_init (operands[0], operands[1]);\n+ DONE;\n+})\n+\n (define_insn \"*aarch64_simd_ld1r<mode>\"\n- [(set (match_operand:VALL_F16 0 \"register_operand\" \"=w\")\n-\t(vec_duplicate:VALL_F16\n+ [(set (match_operand:VALLS_F16 0 \"register_operand\" \"=w\")\n+\t(vec_duplicate:VALLS_F16\n \t (match_operand:<VEL> 1 \"aarch64_simd_struct_operand\" \"Utv\")))]\n \"TARGET_SIMD\"\n \"ld1r\\\\t{%0.<Vtype>}, %1\"\n@@ -8828,7 +8840,7 @@\n \n (define_expand \"vec_extract<mode><Vel>\"\n [(match_operand:<VEL> 0 \"aarch64_simd_nonimmediate_operand\")\n- (match_operand:VALL_F16 1 \"register_operand\")\n+ (match_operand:VALLS_F16 1 \"register_operand\")\n (match_operand:SI 2 \"immediate_operand\")]\n \"TARGET_SIMD\"\n {\n@@ -9999,3 +10011,120 @@\n \"TARGET_FAMINMAX\"\n \"<faminmax_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>\"\n )\n+\n+;; V4QI and V2HI modes special patterns\n+\n+;; TODO: vec_extract V8QI->V4QI/V4HI->V2HI\n+\n+/* Extends */\n+/* V2HI -> V2SI, V4QI -> V4HI\n+ via V4HI -> V4SI, V8QI -> V8HI */\n+(define_expand \"<optab><mode><Vwide>2\"\n+ [(set (match_operand:<VWIDE> 0 \"register_operand\" \"=w\")\n+\t(ANY_EXTEND:<VWIDE> (match_operand:VH_I 1 \"register_operand\" \"w\")))]\n+ \"TARGET_SIMD\"\n+ {\n+ auto widemode = (<MODE>mode == V2HImode) ? V4SImode : V8HImode;\n+ auto op1mode = (<MODE>mode == V2HImode) ? V4HImode : V8QImode;\n+ rtx op1 = gen_lowpart (op1mode, operands[1]);\n+ rtx op0 = gen_reg_rtx (widemode);\n+ if (<MODE>mode == V2HImode)\n+ emit_insn (gen_<optab>v4hiv4si2 (op0, op1));\n+ else\n+ emit_insn (gen_<optab>v8qiv8hi2 (op0, op1));\n+ emit_move_insn (operands[0], gen_lowpart (<VWIDE>mode, op0));\n+ DONE;\n+ }\n+)\n+\n+(define_expand \"<optab>v4qiv4si2\"\n+ [(set (match_operand:V4SI 0 \"register_operand\")\n+\t(ANY_EXTEND:V4SI (match_operand:V4QI 1 \"register_operand\")))]\n+ \"TARGET_SIMD\"\n+ {\n+ rtx tmp = gen_reg_rtx (V4HImode);\n+ emit_insn (gen_<optab>v4qiv4hi2 (tmp, operands[1]));\n+ emit_insn (gen_<optab>v4hiv4si2 (operands[0], tmp));\n+ DONE;\n+ }\n+)\n+\n+/* Truncates */\n+(define_insn \"truncv4hiv4qi2\"\n+ [(set (match_operand:V4QI 0 \"register_operand\" \"=w\")\n+\t(truncate:V4QI (match_operand:V4HI 1 \"register_operand\" \"w\")))]\n+ \"TARGET_SIMD\"\n+ \"xtn\\t%0.8b, %1.8h\"\n+ [(set_attr \"type\" \"neon_move_narrow_q\")]\n+)\n+\n+(define_expand \"truncv4siv4qi2\"\n+ [(set (match_operand:V4QI 0 \"register_operand\")\n+\t(truncate:V4QI (match_operand:V4SI 1 \"register_operand\")))]\n+ \"TARGET_SIMD\"\n+ {\n+ rtx tmp = gen_reg_rtx (V4HImode);\n+ emit_insn (gen_truncv4siv4hi2 (tmp, operands[1]));\n+ emit_insn (gen_truncv4hiv4qi2 (operands[0], tmp));\n+ DONE;\n+ }\n+)\n+\n+/* Widening sum */\n+(define_insn \"widen_<su>sumv4qi3\"\n+ [(set (match_operand:V4HI 0 \"register_operand\" \"=w\")\n+\t(plus:V4HI (ANY_EXTEND:V4HI\n+\t\t (match_operand:V4QI 1 \"register_operand\" \"w\"))\n+\t\t (match_operand:V4HI 2 \"register_operand\" \"w\")))]\n+ \"TARGET_SIMD\"\n+ \"<su>addw\\t%0.8h, %2.8h, %1.8b\"\n+ [(set_attr \"type\" \"neon_add_widen\")]\n+)\n+\n+/* Reductions */\n+(define_expand \"reduc_plus_scal_<mode>\"\n+ [(set (match_operand:<VEL> 0 \"register_operand\")\n+ (unspec:<VEL> [(match_operand:VH_I 1 \"register_operand\")]\n+\t\t UNSPEC_ADDV))]\n+ \"TARGET_SIMD\"\n+ {\n+ rtx double_reg = gen_reg_rtx (<VDBL>mode);\n+ emit_insn (gen_aarch64_vec_concat<mode> (double_reg, operands[1], CONST0_RTX (<MODE>mode)));\n+ emit_insn (gen_reduc_plus_scal_<Vdbl> (operands[0], double_reg));\n+ DONE;\n+ }\n+)\n+\n+(define_expand \"reduc_<optab>_scal_<mode>\"\n+ [(match_operand:<VEL> 0 \"register_operand\")\n+ (unspec:VH_I [(match_operand:VH_I 1 \"register_operand\")]\n+\t\t MAXMINV)]\n+ \"TARGET_SIMD\"\n+ {\n+ rtx double_reg = gen_reg_rtx (<VDBL>mode);\n+ rtx other = operands[1];\n+ /* umax can be optimized to use the upper part being 0 rather than a dup,\n+ This allows using fmov in some cases. */\n+ if (<CODE> == UMAX)\n+ other = CONST0_RTX (<MODE>mode);\n+ emit_insn (gen_aarch64_vec_concat<mode> (double_reg, operands[1], other));\n+ emit_insn (gen_reduc_<optab>_scal_<Vdbl> (operands[0], double_reg));\n+ DONE;\n+ }\n+)\n+;; For 32-bit modes we use ushl/r, as this does not require a SIMD zero.\n+;; Using vN.2s though\n+(define_insn \"vec_shr_<mode>\"\n+ [(set (match_operand:VH_I 0 \"register_operand\" \"=w\")\n+ (unspec:VH_I [(match_operand:VH_I 1 \"register_operand\" \"w\")\n+\t\t (match_operand:SI 2 \"immediate_operand\" \"i\")]\n+\t\t UNSPEC_VEC_SHR))]\n+ \"TARGET_SIMD\"\n+ {\n+ if (BYTES_BIG_ENDIAN)\n+ return \"shl %0.2s, %1.2s, %2\";\n+ else\n+ return \"ushr %0.2s, %1.2s, %2\";\n+ }\n+ [(set_attr \"type\" \"neon_shift_imm\")]\n+)\ndiff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc\nindex f2b53475adbe..75505828d14e 100644\n--- a/gcc/config/aarch64/aarch64.cc\n+++ b/gcc/config/aarch64/aarch64.cc\n@@ -1663,6 +1663,12 @@ aarch64_classify_vector_mode (machine_mode mode, bool any_target_p = false)\n case E_V4x2DFmode:\n return (TARGET_FLOAT || any_target_p) ? VEC_ADVSIMD | VEC_STRUCT : 0;\n \n+ /* 32-bit Advanced SIMD vectors. */\n+ case E_V4QImode:\n+ case E_V2HImode:\n+ if (BYTES_BIG_ENDIAN)\n+\treturn 0;\n+ /* FALLTHRU */\n /* 64-bit Advanced SIMD vectors. */\n case E_V8QImode:\n case E_V4HImode:\n@@ -2023,11 +2029,12 @@ aarch64_vectorize_related_mode (machine_mode vector_mode,\n \t}\n }\n \n- /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */\n+ /* Prefer to use 1 128-bit vector instead of 2 64-bit or 4 32-bit vectors. */\n if (TARGET_SIMD\n && (vec_flags & VEC_ADVSIMD)\n && known_eq (nunits, 0U)\n- && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)\n+ && (known_eq (GET_MODE_BITSIZE (vector_mode), 64U)\n+\t || known_eq (GET_MODE_BITSIZE (vector_mode), 32U))\n && maybe_ge (GET_MODE_BITSIZE (element_mode)\n \t\t * GET_MODE_NUNITS (vector_mode), 128U))\n {\n@@ -2036,6 +2043,19 @@ aarch64_vectorize_related_mode (machine_mode vector_mode,\n \treturn res;\n }\n \n+ /* Prefer to use 1 64-bit vector instead of 2 32-bit vectors. */\n+ if (TARGET_SIMD\n+ && (vec_flags & VEC_ADVSIMD)\n+ && known_eq (nunits, 0U)\n+ && known_eq (GET_MODE_BITSIZE (vector_mode), 32U)\n+ && maybe_ge (GET_MODE_BITSIZE (element_mode)\n+\t\t * GET_MODE_NUNITS (vector_mode), 64U))\n+ {\n+ machine_mode res = aarch64_simd_container_mode (element_mode, 64);\n+ if (VECTOR_MODE_P (res))\n+\treturn res;\n+ }\n+\n return default_vectorize_related_mode (vector_mode, element_mode, nunits);\n }\n \n@@ -22434,9 +22454,23 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)\n && known_eq (width, BITS_PER_SVE_VECTOR))\n return aarch64_full_sve_mode (mode).else_mode (word_mode);\n \n- gcc_assert (known_eq (width, 64) || known_eq (width, 128));\n+ gcc_assert (known_eq (width, 32)\n+\t || known_eq (width, 64)\n+\t || known_eq (width, 128));\n if (TARGET_BASE_SIMD)\n {\n+ if (!BYTES_BIG_ENDIAN && known_eq (width, 32))\n+\t{\n+\t switch (mode)\n+\t {\n+\t case E_HImode:\n+\t return V2HImode;\n+\t case E_QImode:\n+\t return V4QImode;\n+\t default:\n+\t break;\n+\t }\n+\t}\n if (known_eq (width, 128))\n \treturn aarch64_vq_mode (mode).else_mode (word_mode);\n else\n@@ -22550,18 +22584,23 @@ aarch64_autovectorize_vector_modes (vector_modes *modes, bool)\n V8QImode,\n \n /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors\n- for wider elements.\n-\n- TODO: We could support a limited form of V4QImode too, so that\n- we use 32-bit vectors for 8-bit elements. */\n+ for wider elements. */\n V4HImode,\n \n /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors\n for 64-bit elements.\n \n- TODO: We could similarly support limited forms of V2QImode and V2HImode\n- for this case. */\n- V2SImode\n+ TODO: We could similarly support limited forms of V2QImode for\n+ this case. */\n+ V2SImode,\n+\n+ /* Try using 32-bit vectors for 8-bit elements and 128-bit vectors\n+ for wider elements. */\n+ V4QImode,\n+\n+ /* Try using 32-bit vectors for 16-bit elements and 128-bit vectors\n+ for wider elements. */\n+ V2HImode,\n };\n \n /* Try using N-byte SVE modes only after trying N-byte Advanced SIMD mode.\n@@ -22592,7 +22631,13 @@ aarch64_autovectorize_vector_modes (vector_modes *modes, bool)\n \t\t\t\t\tadvsimd_modes[advsimd_i]))\n \tmodes->safe_push (sve_modes[sve_i++]);\n else\n-\tmodes->safe_push (advsimd_modes[advsimd_i++]);\n+\t{\n+\t if ((aarch64_32bit_auto_vec\n+\t || (advsimd_modes[advsimd_i] != V4QImode\n+\t\t && advsimd_modes[advsimd_i] != V2HImode)))\n+\t modes->safe_push (advsimd_modes[advsimd_i]);\n+\t advsimd_i++;\n+\t}\n }\n while (sve_i < ARRAY_SIZE (sve_modes))\n modes->safe_push (sve_modes[sve_i++]);\n@@ -25781,7 +25826,7 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)\n machine_mode vmode = GET_MODE (target);\n bool one_vector_p = rtx_equal_p (op0, op1);\n \n- gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);\n+ gcc_checking_assert (vmode == V8QImode || vmode == V16QImode || vmode == V4QImode);\n gcc_checking_assert (GET_MODE (op0) == vmode);\n gcc_checking_assert (GET_MODE (op1) == vmode);\n gcc_checking_assert (GET_MODE (sel) == vmode);\n@@ -25789,7 +25834,16 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)\n \n if (one_vector_p)\n {\n- if (vmode == V8QImode)\n+ if (vmode == V4QImode)\n+\t{\n+\t /* Expand the argument to a V16QI mode by duplicating it. */\n+\t rtx quad = gen_reg_rtx (V16QImode);\n+\t emit_insn (gen_aarch64_simd_dupv4si (gen_lowpart (V4SImode, quad), gen_lowpart (SImode, op0)));\n+\t sel = gen_lowpart (V8QImode, sel);\n+\t target = gen_lowpart (V8QImode, target);\n+\t emit_insn (gen_aarch64_qtbl1v8qi (target, quad, sel));\n+\t}\n+ else if (vmode == V8QImode)\n \t{\n \t /* Expand the argument to a V16QI mode by duplicating it. */\n \t rtx pair = gen_reg_rtx (V16QImode);\n@@ -25805,7 +25859,18 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)\n {\n rtx pair;\n \n- if (vmode == V8QImode)\n+ if (vmode == V4QImode)\n+\t{\n+\t rtx p8 = gen_reg_rtx (V8QImode);\n+\t emit_insn (gen_aarch64_vec_concatv4qi (p8, op0, op1));\n+\t pair = gen_reg_rtx (V16QImode);\n+\t emit_insn (gen_aarch64_combinev8qi (pair, p8, p8));\n+\t sel = gen_lowpart (V8QImode, sel);\n+\t target = gen_lowpart (V8QImode, target);\n+\t emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel));\n+\t}\n+\n+ else if (vmode == V8QImode)\n \t{\n \t pair = gen_reg_rtx (V16QImode);\n \t emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));\n@@ -25953,6 +26018,16 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)\n }\n out = d->target;\n \n+ /* For emulated 4byte vectors, just use a paradoxical subreg\n+ of the 8byte vector. */\n+ if (d->vmode == V2HImode || d->vmode == V4QImode)\n+ {\n+ vmode = vmode == V2HImode ? V4HImode : V8QImode;\n+ out = gen_lowpart (vmode, out);\n+ in0 = gen_lowpart (vmode, in0);\n+ in1 = gen_lowpart (vmode, in1);\n+ }\n+\n emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),\n \t\t\t\t odd ? UNSPEC_TRN2 : UNSPEC_TRN1));\n return true;\n@@ -26028,6 +26103,20 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d)\n odd = !odd;\n }\n out = d->target;\n+ /* For emulated 4byte vectors, use 8byte vectors\n+ and concat the regs together before and then do the ext\n+ that way. */\n+ if (d->vmode == V2HImode || d->vmode == V4QImode)\n+ {\n+ vmode = d->vmode == V2HImode ? V4HImode : V8QImode;\n+ out = gen_lowpart (vmode, out);\n+ rtx double_reg = gen_reg_rtx (vmode);\n+ in0 = gen_lowpart (V4QImode, in0);\n+ in1 = gen_lowpart (V4QImode, in1);\n+ emit_insn (gen_aarch64_vec_concatv4qi (gen_lowpart (V8QImode, double_reg), in0, in1));\n+ in0 = double_reg;\n+ in1 = double_reg;\n+ }\n \n emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),\n \t\t\t\t odd ? UNSPEC_UZP2 : UNSPEC_UZP1));\n@@ -26042,6 +26131,7 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)\n poly_uint64 nelt = d->perm.length ();\n rtx out, in0, in1;\n machine_mode vmode = d->vmode;\n+ bool highpart32 = false;\n \n if (GET_MODE_UNIT_SIZE (vmode) > 8)\n return false;\n@@ -26070,8 +26160,28 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)\n }\n out = d->target;\n \n+ /* For emulated 4byte vectors, just use a paradoxical subreg\n+ of the 8byte vector. The zip2 like is zip1 followed by a slection of the \"top\" half. */\n+ if (d->vmode == V2HImode || d->vmode == V4QImode)\n+ {\n+ vmode = vmode == V2HImode ? V4HImode : V8QImode;\n+ out = gen_reg_rtx (vmode);\n+ in0 = gen_lowpart (vmode, in0);\n+ in1 = gen_lowpart (vmode, in1);\n+ highpart32 = high;\n+ high = false;\n+ }\n+\n emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),\n \t\t\t\t high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));\n+\n+\n+ if (d->vmode == V2HImode || d->vmode == V4QImode)\n+ {\n+ gcc_assert (!high);\n+ gcc_assert (vmode == V4HImode || vmode == V8QImode);\n+ emit_insn (gen_aarch64_get_lanev2si (gen_lowpart (SImode, d->target), gen_lowpart (V2SImode, out), highpart32 ? const1_rtx : const0_rtx));\n+ }\n return true;\n }\n \n@@ -26111,10 +26221,32 @@ aarch64_evpc_ext (struct expand_vec_perm_d *d)\n location = d->perm.length ().to_constant () - location;\n }\n \n+ rtx op0 = d->op0;\n+ rtx op1 = d->op1;\n+ rtx target = d->target;\n+ auto vmode = d->vmode;\n+ /* For emulated 4byte vectors, use 8byte vectors\n+ and concat the regs together before and then do the ext\n+ that way. */\n+ if (d->vmode == V2HImode || d->vmode == V4QImode)\n+ {\n+ /* Convert V2HI into V4QI locations as using V8QI here is simplier. */\n+ if (vmode == V2HImode)\n+\tlocation *= 2;\n+ vmode = V8QImode;\n+ target = gen_lowpart (V8QImode, target);\n+ rtx double_reg = gen_reg_rtx (V8QImode);\n+ op0 = gen_lowpart (V4QImode, op0);\n+ op1 = gen_lowpart (V4QImode, op1);\n+ emit_insn (gen_aarch64_vec_concatv4qi (double_reg, op0, op1));\n+ op0 = double_reg;\n+ op1 = double_reg;\n+ }\n+\n offset = GEN_INT (location);\n- emit_set_insn (d->target,\n-\t\t gen_rtx_UNSPEC (d->vmode,\n-\t\t\t\t gen_rtvec (3, d->op0, d->op1, offset),\n+ emit_set_insn (target,\n+\t\t gen_rtx_UNSPEC (vmode,\n+\t\t\t\t gen_rtvec (3, op0, op1, offset),\n \t\t\t\t UNSPEC_EXT));\n return true;\n }\n@@ -26173,8 +26305,20 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d)\n \t\t\t\t\t d->target, pred, d->op0));\n return true;\n }\n- rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);\n- emit_set_insn (d->target, src);\n+\n+ auto vmode = d->vmode;\n+ rtx out = d->target;\n+ rtx in0 = d->op0;\n+ /* For emulated 4byte vectors, just use a paradoxical subreg\n+ of the 8byte vector. */\n+ if (vmode == V2HImode || vmode == V4QImode)\n+ {\n+ vmode = vmode == V2HImode ? V4HImode : V8QImode;\n+ out = gen_lowpart (vmode, out);\n+ in0 = gen_lowpart (vmode, in0);\n+ }\n+ rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec);\n+ emit_set_insn (out, src);\n return true;\n }\n \n@@ -26255,7 +26399,7 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d)\n /* Generic code will try constant permutation twice. Once with the\n original mode and again with the elements lowered to QImode.\n So wait and don't do the selector expansion ourselves. */\n- if (vmode != V8QImode && vmode != V16QImode)\n+ if (vmode != V8QImode && vmode != V16QImode && vmode != V4QImode)\n return false;\n \n /* to_constant is safe since this routine is specific to Advanced SIMD\n@@ -26294,10 +26438,11 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d)\n \t}\n }\n \n+ rtx target = d->target;\n sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));\n sel = force_reg (vmode, sel);\n \n- aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);\n+ aarch64_expand_vec_perm_1 (target, d->op0, d->op1, sel);\n return true;\n }\n \n@@ -26460,9 +26605,21 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)\n }\n gcc_assert (extractindex < nelt);\n \n+ rtx target = d->target;\n+\n+ /* For emulated 4byte vectors, just use a paradoxical subreg\n+ of the 8byte vector. */\n+ if (d->vmode == V2HImode || d->vmode == V4QImode)\n+ {\n+ mode = mode == V2HImode ? V4HImode : V8QImode;\n+ target = gen_lowpart (mode, target);\n+ insv = gen_lowpart (mode, insv);\n+ extractv = gen_lowpart (mode, extractv);\n+ }\n+\n insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);\n expand_operand ops[5];\n- create_output_operand (&ops[0], d->target, mode);\n+ create_output_operand (&ops[0], target, mode);\n create_input_operand (&ops[1], insv, mode);\n create_integer_operand (&ops[2], 1 << idx);\n create_input_operand (&ops[3], extractv, mode);\n@@ -26537,7 +26694,8 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,\n \t\t\t\t rtx target, rtx op0, rtx op1,\n \t\t\t\t const vec_perm_indices &sel)\n {\n- struct expand_vec_perm_d d;\n+ expand_vec_perm_d d;\n+ bool truncate = false;\n \n /* Check whether the mask can be applied to a single vector. */\n if (sel.ninputs () == 1\n@@ -26555,6 +26713,7 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,\n }\n else\n d.one_vector_p = false;\n+ rtx_insn *last = get_last_insn ();\n \n d.zero_op0_p = op0 == CONST0_RTX (op_mode);\n d.zero_op1_p = op1 == CONST0_RTX (op_mode);\n@@ -26572,12 +26731,14 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,\n d.op1 = op1 ? force_reg (op_mode, op1) : NULL_RTX;\n d.testing_p = !target;\n \n- if (!d.testing_p)\n- return aarch64_expand_vec_perm_const_1 (&d);\n-\n- rtx_insn *last = get_last_insn ();\n bool ret = aarch64_expand_vec_perm_const_1 (&d);\n- gcc_assert (last == get_last_insn ());\n+ if (truncate && target)\n+ {\n+ gcc_assert (target != d.target);\n+ emit_insn (gen_truncv4hiv4qi2 (target, gen_lowpart (V4HImode, d.target)));\n+ }\n+ if (d.testing_p)\n+ gcc_assert (last == get_last_insn ());\n \n return ret;\n }\ndiff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt\nindex c2c9965b0625..02fafcee955f 100644\n--- a/gcc/config/aarch64/aarch64.opt\n+++ b/gcc/config/aarch64/aarch64.opt\n@@ -75,6 +75,10 @@ Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL)\n EnumValue\n Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE)\n \n+mautovector32bits\n+Target Optimization Var(aarch64_32bit_auto_vec) Init(1)\n+Emulate a vector of 4 element of 1byte (32bits) and 2 elements of 2 bytes using the 64bits register.\n+\n mbig-endian\n Target RejectNegative Mask(BIG_END)\n Assume target CPU is configured as big endian.\ndiff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md\nindex 8269b0cdcd92..719e5e91168c 100644\n--- a/gcc/config/aarch64/iterators.md\n+++ b/gcc/config/aarch64/iterators.md\n@@ -86,6 +86,9 @@\n ;; Integer Advanced SIMD modes.\n (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])\n \n+;; Integer Advanced SIMD modes plus 32bit modes\n+(define_mode_iterator VDQS_I [V4QI V2HI V8QI V16QI V4HI V8HI V2SI V4SI V2DI])\n+\n ;; Advanced SIMD and scalar, 64 & 128-bit container, all integer modes.\n (define_mode_iterator VSDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI QI HI SI DI])\n \n@@ -93,12 +96,18 @@\n ;; integer modes; 64-bit scalar integer mode.\n (define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])\n \n+;; Same as above plus the 32bit modes\n+(define_mode_iterator VSDQS_I_DI [V4QI V2HI V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])\n+\n ;; Double vector modes.\n (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF V4BF])\n \n ;; Double vector modes suitable for moving. Includes BFmode.\n (define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF])\n \n+;; Double and Single vector modes suitable for moving; Includes BFmode.\n+(define_mode_iterator VDHMOV [V4QI V2HI V8QI V4HI V4HF V4BF V2SI V2SF])\n+\n ;; 64-bit modes for operations that implicitly clear the top bits of a Q reg.\n (define_mode_iterator VDZ [V8QI V4HI V4HF V4BF V2SI V2SF DI DF])\n \n@@ -117,6 +126,9 @@\n ;; 128 and 64-bit container; 8, 16, 32-bit vector integer modes\n (define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI])\n \n+;; 128, 64, and 32-bit container; 8, 16, 32-bit vector integer modes\n+(define_mode_iterator VDQS_BHSI [V4QI V2HI V8QI V16QI V4HI V8HI V2SI V4SI])\n+\n ;; Quad vector modes.\n (define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF])\n \n@@ -132,6 +144,9 @@\n ;; Double integer vector modes.\n (define_mode_iterator VD_I [V8QI V4HI V2SI DI])\n \n+;; VD without 2 element modes.\n+(define_mode_iterator VD_NO2E [V8QI V4HI])\n+\n ;; Quad integer vector modes.\n (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])\n \n@@ -144,6 +159,9 @@\n ;; BFmode vector modes.\n (define_mode_iterator VBF [V4BF V8BF])\n \n+;; 32bit vector integer modes.\n+(define_mode_iterator VH_I [V4QI V2HI])\n+\n ;; This mode iterator allows :P to be used for patterns that operate on\n ;; addresses in different modes. In LP64, only DI will match, while in\n ;; ILP32, either can match.\n@@ -200,13 +218,21 @@\n (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI\n \t\t\t\tV4HF V8HF V4BF V8BF V2SF V4SF V2DF])\n \n+;; All Advanced SIMD modes suitable for moving, loading, and storing.\n+;; Includes 32bit vectors V4QI and V2HI\n+(define_mode_iterator VALLS_F16 [V4QI V2HI V8QI V16QI V4HI V8HI V2SI V4SI V2DI\n+\t\t\t\t V4HF V8HF V4BF V8BF V2SF V4SF V2DF])\n+\n ;; The VALL_F16 modes except the 128-bit 2-element ones.\n (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI\n-\t\t\t\tV4HF V8HF V2SF V4SF])\n+\t\t\t\t V4HF V8HF V2SF V4SF])\n \n ;; All Advanced SIMD modes barring HF modes, plus DI.\n (define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI])\n \n+\n+(define_mode_iterator VALLSDI [V4QI V2HI V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI])\n+\n ;; All Advanced SIMD modes and DI.\n (define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI\n \t\t\t\t V4HF V8HF V4BF V8BF V2SF V4SF V2DF DI])\n@@ -215,6 +241,10 @@\n (define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI V4BF V8BF\n \t\t\t V2DI V4HF V8HF V2SF V4SF V2DF DI DF])\n \n+;; All Advanced SIMD modes (including 32bit), plus DI and DF.\n+(define_mode_iterator VALLSDIF [V4QI V2HI V8QI V16QI V4HI V8HI V2SI V4SI V4BF V8BF\n+\t\t\t V2DI V4HF V8HF V2SF V4SF V2DF DI DF])\n+\n ;; All Advanced SIMD polynomial modes and DI.\n (define_mode_iterator VALLP [V8QI V16QI V4HI V8HI V2DI DI])\n \n@@ -250,7 +280,7 @@\n (define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF])\n \n ;; VDC plus SI and SF.\n-(define_mode_iterator VDCSIF [V8QI V4HI V4BF V4HF V2SI V2SF SI SF DI DF])\n+(define_mode_iterator VDCSIF [V4QI V2HI V8QI V4HI V4BF V4HF V2SI V2SF SI SF DI DF])\n \n ;; Polynomial modes for vector combines.\n (define_mode_iterator VDC_P [V8QI V4HI DI])\n@@ -313,6 +343,9 @@\n ;; All byte modes.\n (define_mode_iterator VB [V8QI V16QI])\n \n+;; All byte modes including the 32bit modes.\n+(define_mode_iterator VB_WS [V4QI V8QI V16QI])\n+\n ;; 1 and 2 lane DI and DF modes.\n (define_mode_iterator V12DIF [V1DI V1DF V2DI V2DF])\n \n@@ -1193,8 +1226,8 @@\n ;; For scalar usage of vector/FP registers\n (define_mode_attr v [(QI \"b\") (HI \"h\") (SI \"s\") (DI \"d\")\n \t\t (HF \"h\") (SF \"s\") (DF \"d\")\n-\t\t (V8QI \"\") (V16QI \"\")\n-\t\t (V4HI \"\") (V8HI \"\")\n+\t\t (V4QI \"\") (V8QI \"\") (V16QI \"\")\n+\t\t (V2HI \"\") (V4HI \"\") (V8HI \"\")\n \t\t (V2SI \"\") (V4SI \"\")\n \t\t (V2DI \"\") (V2SF \"\")\n \t\t (V4SF \"\") (V4HF \"\")\n@@ -1224,8 +1257,10 @@\n \n ;; Map a vector to the number of units in it, if the size of the mode\n ;; is constant.\n-(define_mode_attr nunits [(V8QI \"8\") (V16QI \"16\")\n-\t\t\t (V4HI \"4\") (V8HI \"8\")\n+;; Note V4QI/V2HI even though there are 4/2 units in them, this is\n+;; talking about the total units in the underlying form\n+(define_mode_attr nunits [(V4QI \"8\") (V8QI \"8\") (V16QI \"16\")\n+\t\t\t (V2HI \"4\") (V4HI \"4\") (V8HI \"8\")\n \t\t\t (V2SI \"2\") (V4SI \"4\")\n \t\t\t (V1DI \"1\") (V2DI \"2\")\n \t\t\t (V4HF \"4\") (V8HF \"8\")\n@@ -1236,9 +1271,10 @@\n \t\t\t (V8DI \"8\")])\n \n ;; Map a mode to the number of bits in it, if the size of the mode\n-;; is constant.\n-(define_mode_attr bitsize [(V8QI \"64\") (V16QI \"128\")\n-\t\t\t (V4HI \"64\") (V8HI \"128\")\n+;; is constant. Except for V4QI and V2HI which uses the 64bit as\n+;; emulating of 32bit modes\n+(define_mode_attr bitsize [(V4QI \"64\") (V8QI \"64\") (V16QI \"128\")\n+\t\t\t (V2HI \"64\") (V4HI \"64\") (V8HI \"128\")\n \t\t\t (V2SI \"64\") (V4SI \"128\")\n \t\t\t (V1DI \"64\") (V2DI \"128\")])\n \n@@ -1293,8 +1329,8 @@\n ;; Map modes to Usg and Usj constraints for SISD right shifts\n (define_mode_attr cmode_simd [(SI \"g\") (DI \"j\")])\n \n-(define_mode_attr Vtype [(V8QI \"8b\") (V16QI \"16b\")\n-\t\t\t (V4HI \"4h\") (V8HI \"8h\")\n+(define_mode_attr Vtype [(V4QI \"8b\") (V8QI \"8b\") (V16QI \"16b\")\n+\t\t\t (V2HI \"4h\") (V4HI \"4h\") (V8HI \"8h\")\n \t\t\t (V4BF \"4h\") (V8BF \"8h\")\n (V2SI \"2s\") (V4SI \"4s\")\n (DI \"1d\") (DF \"1d\")\n@@ -1336,8 +1372,8 @@\n (define_mode_attr Vrevsuff [(V4HI \"16\") (V8HI \"16\") (V2SI \"32\")\n (V4SI \"32\") (V2DI \"64\")])\n \n-(define_mode_attr Vmtype [(V8QI \".8b\") (V16QI \".16b\")\n-\t\t\t (V4HI \".4h\") (V8HI \".8h\")\n+(define_mode_attr Vmtype [(V4QI \".8b\") (V8QI \".8b\") (V16QI \".16b\")\n+\t\t\t (V2HI \".4h\") (V4HI \".4h\") (V8HI \".8h\")\n \t\t\t (V2SI \".2s\") (V4SI \".4s\")\n \t\t\t (V2DI \".2d\") (V4HF \".4h\")\n \t\t\t (V8HF \".8h\") (V4BF \".4h\")\n@@ -1355,8 +1391,8 @@\n \t\t\t (HI \"\")])\n \n ;; Mode-to-individual element type mapping.\n-(define_mode_attr Vetype [(V8QI \"b\") (V16QI \"b\")\n-\t\t\t (V4HI \"h\") (V8HI \"h\")\n+(define_mode_attr Vetype [(V4QI \"b\") (V8QI \"b\") (V16QI \"b\")\n+\t\t\t (V2HI \"h\") (V4HI \"h\") (V8HI \"h\")\n \t\t\t (V2SI \"s\") (V4SI \"s\")\n \t\t\t (V2DI \"d\") (V1DI \"d\")\n \t\t\t (V4HF \"h\") (V8HF \"h\")\n@@ -1465,8 +1501,8 @@\n \t\t\t (SI \"s\") (DI \"d\")])\n \n ;; Mode-to-bitwise operation type mapping.\n-(define_mode_attr Vbtype [(V8QI \"8b\") (V16QI \"16b\")\n-\t\t\t (V4HI \"8b\") (V8HI \"16b\")\n+(define_mode_attr Vbtype [(V4QI \"8b\") (V8QI \"8b\") (V16QI \"16b\")\n+\t\t\t (V2HI \"8b\") (V4HI \"8b\") (V8HI \"16b\")\n \t\t\t (V2SI \"8b\") (V4SI \"16b\")\n \t\t\t (V2DI \"16b\") (V4HF \"8b\")\n \t\t\t (V8HF \"16b\") (V2SF \"8b\")\n@@ -1529,8 +1565,8 @@\n \t\t\t (V4x2DF \"v2df\") (V4x8BF \"v8bf\")])\n \n ;; Define element mode for each vector mode.\n-(define_mode_attr VEL [(V8QI \"QI\") (V16QI \"QI\")\n-\t\t (V4HI \"HI\") (V8HI \"HI\")\n+(define_mode_attr VEL [(V4QI \"QI\") (V8QI \"QI\") (V16QI \"QI\")\n+\t\t (V2HI \"HI\") (V4HI \"HI\") (V8HI \"HI\")\n \t\t (V2SI \"SI\") (V4SI \"SI\")\n \t\t (DI \"DI\") (V1DI \"DI\")\n \t\t (V2DI \"DI\")\n@@ -1551,8 +1587,8 @@\n \t\t (VNx2DF \"DF\")])\n \n ;; Define element mode for each vector mode (lower case).\n-(define_mode_attr Vel [(V8QI \"qi\") (V16QI \"qi\")\n-\t\t (V4HI \"hi\") (V8HI \"hi\")\n+(define_mode_attr Vel [(V4QI \"qi\") (V8QI \"qi\") (V16QI \"qi\")\n+\t\t (V2HI \"hi\") (V4HI \"hi\") (V8HI \"hi\")\n \t\t (V2SI \"si\") (V4SI \"si\")\n \t\t (DI \"di\") (V1DI \"si\")\n \t\t (V2DI \"di\")\n@@ -1635,7 +1671,8 @@\n (define_mode_attr V1half [(V2DI \"v1di\") (V2DF \"v1df\")])\n \n ;; Double modes of vector modes.\n-(define_mode_attr VDBL [(V8QI \"V16QI\") (V4HI \"V8HI\")\n+(define_mode_attr VDBL [(V4QI \"V8QI\") (V2HI \"V4HI\")\n+\t\t\t(V8QI \"V16QI\") (V4HI \"V8HI\")\n \t\t\t(V4HF \"V8HF\") (V4BF \"V8BF\")\n \t\t\t(V2SI \"V4SI\") (V2SF \"V4SF\")\n \t\t\t(SI \"V2SI\") (SF \"V2SF\")\n@@ -1648,7 +1685,8 @@\n (define_mode_attr Vdtype [(V4HF \"8h\") (V2SF \"4s\")])\n \n ;; Double modes of vector modes (lower case).\n-(define_mode_attr Vdbl [(V8QI \"v16qi\") (V4HI \"v8hi\")\n+(define_mode_attr Vdbl [(V4QI \"v8qi\") (V2HI \"v4hi\")\n+\t\t\t(V8QI \"v16qi\") (V4HI \"v8hi\")\n \t\t\t(V4HF \"v8hf\") (V4BF \"v8bf\")\n \t\t\t(V2SI \"v4si\") (V2SF \"v4sf\")\n \t\t\t(SI \"v2si\") (DI \"v2di\")\n@@ -1698,7 +1736,8 @@\n \t\t\t (V2DI \"4s\")])\n \n ;; Widened modes of vector modes.\n-(define_mode_attr VWIDE [(V8QI \"V8HI\") (V4HI \"V4SI\")\n+(define_mode_attr VWIDE [(V4QI \"V4HI\") (V2HI \"V2SI\")\n+\t\t\t (V8QI \"V8HI\") (V4HI \"V4SI\")\n \t\t\t (V2SI \"V2DI\") (V16QI \"V8HI\")\n \t\t\t (V8HI \"V4SI\") (V4SI \"V2DI\")\n \t\t\t (HI \"SI\") (SI \"DI\")\n@@ -1711,7 +1750,8 @@\n \t\t\t (VNx4BI \"VNx2BI\")])\n \n ;; Modes with the same number of elements but strictly 2x the width.\n-(define_mode_attr V2XWIDE [(V8QI \"V8HI\") (V4HI \"V4SI\")\n+(define_mode_attr V2XWIDE [(V4QI \"V4HI\") (V2HI \"V4SI\")\n+\t\t\t (V8QI \"V8HI\") (V4HI \"V4SI\")\n \t\t\t (V16QI \"V16HI\") (V8HI \"V8SI\")\n \t\t\t (V2SI \"V2DI\") (V4SI \"V4DI\")\n \t\t\t (V2DI \"V2TI\") (DI \"TI\")\n@@ -1739,7 +1779,8 @@\n (define_mode_attr VWIDE_PRED [(VNx8HF \"VNx4BI\") (VNx4SF \"VNx2BI\")])\n \n ;; Widened modes of vector modes, lowercase\n-(define_mode_attr Vwide [(V2SF \"v2df\") (V4HF \"v4sf\")\n+(define_mode_attr Vwide [(V4QI \"v4hi\") (V2HI \"v2si\")\n+\t\t\t (V2SF \"v2df\") (V4HF \"v4sf\")\n \t\t\t (VNx16QI \"vnx8hi\") (VNx8HI \"vnx4si\")\n \t\t\t (VNx4SI \"vnx2di\")\n \t\t\t (VNx8HF \"vnx4sf\") (VNx4SF \"vnx2df\")\n@@ -1762,7 +1803,8 @@\n \t\t\t (V8HI \"\") (V4SI \"\")])\n \n ;; Scalar mode of widened vector reduction.\n-(define_mode_attr VWIDE_S [(V8QI \"HI\") (V4HI \"SI\")\n+(define_mode_attr VWIDE_S [(V4QI \"HI\") (V2HI \"SI\")\n+\t\t\t (V8QI \"HI\") (V4HI \"SI\")\n \t\t\t (V2SI \"DI\") (V16QI \"HI\")\n \t\t\t (V8HI \"SI\") (V4SI \"DI\")])\n \n@@ -1788,7 +1830,8 @@\n \t\t\t (VNx2DI \"q\")])\n \n ;; Widened mode register suffixes for VDW/VQW.\n-(define_mode_attr Vmwtype [(V8QI \".8h\") (V4HI \".4s\")\n+(define_mode_attr Vmwtype [(V4QI \".8h\") (V2HI \".4s\")\n+\t\t\t (V8QI \".8h\") (V4HI \".4s\")\n \t\t\t (V2SI \".2d\") (V16QI \".8h\")\n \t\t\t (V8HI \".4s\") (V4SI \".2d\")\n \t\t\t (V4HF \".4s\") (V2SF \".2d\")\n@@ -1802,6 +1845,7 @@\n ;; Whether a mode fits in W or X registers (i.e. \"w\" for 32-bit modes\n ;; and \"x\" for 64-bit modes).\n (define_mode_attr single_wx [(SI \"w\") (SF \"w\")\n+\t\t\t (V4QI \"w\") (V2HI \"w\")\n \t\t\t (V8QI \"x\") (V4HI \"x\")\n \t\t\t (V4HF \"x\") (V4BF \"x\")\n \t\t\t (V2SI \"x\") (V2SF \"x\")\n@@ -1810,6 +1854,7 @@\n ;; Whether a mode fits in S or D registers (i.e. \"s\" for 32-bit modes\n ;; and \"d\" for 64-bit modes).\n (define_mode_attr single_type [(SI \"s\") (SF \"s\")\n+\t\t\t (V4QI \"s\") (V2HI \"s\")\n \t\t\t (V8QI \"d\") (V4HI \"d\")\n \t\t\t (V4HF \"d\") (V4BF \"d\")\n \t\t\t (V2SI \"d\") (V2SF \"d\")\n@@ -1818,6 +1863,7 @@\n ;; Whether a double-width mode fits in D or Q registers (i.e. \"d\" for\n ;; 32-bit modes and \"q\" for 64-bit modes).\n (define_mode_attr single_dtype [(SI \"d\") (SF \"d\")\n+\t\t\t (V4QI \"q\") (V2HI \"q\")\n \t\t\t (V8QI \"q\") (V4HI \"q\")\n \t\t\t (V4HF \"q\") (V4BF \"q\")\n \t\t\t (V2SI \"q\") (V2SF \"q\")\n@@ -1833,8 +1879,8 @@\n \n ;; Corresponding core element mode for each vector mode. This is a\n ;; variation on <vw> mapping FP modes to GP regs.\n-(define_mode_attr vwcore [(V8QI \"w\") (V16QI \"w\")\n-\t\t\t (V4HI \"w\") (V8HI \"w\")\n+(define_mode_attr vwcore [(V4QI \"w\") (V8QI \"w\") (V16QI \"w\")\n+\t\t\t (V2HI \"w\") (V4HI \"w\") (V8HI \"w\")\n \t\t\t (V2SI \"w\") (V4SI \"w\")\n \t\t\t (DI \"x\") (V2DI \"x\")\n \t\t\t (V4HF \"w\") (V8HF \"w\")\n@@ -1860,8 +1906,8 @@\n (define_mode_attr Vallxd [(QI \"8b\") (HI \"4h\") (SI \"2s\")])\n \n ;; Mode with floating-point values replaced by like-sized integers.\n-(define_mode_attr V_INT_EQUIV [(V8QI \"V8QI\") (V16QI \"V16QI\")\n-\t\t\t (V4HI \"V4HI\") (V8HI \"V8HI\")\n+(define_mode_attr V_INT_EQUIV [(V4QI \"V4QI\") (V8QI \"V8QI\") (V16QI \"V16QI\")\n+\t\t\t (V2HI \"V2HI\") (V4HI \"V4HI\") (V8HI \"V8HI\")\n \t\t\t (V2SI \"V2SI\") (V4SI \"V4SI\")\n \t\t\t (DI \"DI\") (V2DI \"V2DI\")\n \t\t\t (V4HF \"V4HI\") (V8HF \"V8HI\")\n@@ -1879,8 +1925,8 @@\n ])\n \n ;; Lower case mode with floating-point values replaced by like-sized integers.\n-(define_mode_attr v_int_equiv [(V8QI \"v8qi\") (V16QI \"v16qi\")\n-\t\t\t (V4HI \"v4hi\") (V8HI \"v8hi\")\n+(define_mode_attr v_int_equiv [(V4QI \"v4qi\") (V8QI \"v8qi\") (V16QI \"v16qi\")\n+\t\t\t (V2HI \"v2hi\") (V4HI \"v4hi\") (V8HI \"v8hi\")\n \t\t\t (V2SI \"v2si\") (V4SI \"v4si\")\n \t\t\t (DI \"di\") (V2DI \"v2di\")\n \t\t\t (V4HF \"v4hi\") (V8HF \"v8hi\")\n@@ -1954,8 +2000,8 @@\n \t\t\t (V4SF \"v4si\") (V2DF \"v2di\")])\n \n ;; Lower case element modes (as used in shift immediate patterns).\n-(define_mode_attr ve_mode [(V8QI \"qi\") (V16QI \"qi\")\n-\t\t\t (V4HI \"hi\") (V8HI \"hi\")\n+(define_mode_attr ve_mode [(V4QI \"qi\") (V8QI \"qi\") (V16QI \"qi\")\n+\t\t\t (V2HI \"hi\") (V4HI \"hi\") (V8HI \"hi\")\n \t\t\t (V2SI \"si\") (V4SI \"si\")\n \t\t\t (DI \"di\") (V2DI \"di\")\n \t\t\t (QI \"qi\") (HI \"hi\")\n@@ -2090,8 +2136,8 @@\n \t\t (SF \"_fp\")])\n \n ;; Defined to '_q' for 128-bit types.\n-(define_mode_attr q [(V8QI \"\") (V16QI \"_q\")\n-\t\t (V4HI \"\") (V8HI \"_q\")\n+(define_mode_attr q [(V4QI \"\") (V8QI \"\") (V16QI \"_q\")\n+\t\t (V2HI \"\") (V4HI \"\") (V8HI \"_q\")\n \t\t (V4BF \"\") (V8BF \"_q\")\n \t\t (V2SI \"\") (V4SI \"_q\")\n \t\t (DI \"\") (V2DI \"_q\")\n@@ -2127,6 +2173,7 @@\n \n ;; Equivalent of the \"q\" attribute for the <VDBL> mode.\n (define_mode_attr dblq [(SI \"\") (SF \"\")\n+\t\t (V4QI \"\") (V2HI \"\")\n \t\t (V8QI \"_q\") (V4HI \"_q\")\n \t\t (V4HF \"_q\") (V4BF \"_q\")\n \t\t (V2SI \"_q\") (V2SF \"_q\")\n@@ -3789,6 +3836,12 @@\n \t\t\t\t (UNSPEC_FMAXNM \"fmaxnm\")\n \t\t\t\t (UNSPEC_FMINNM \"fminnm\")])\n \n+(define_int_attr CODE \t\t [(UNSPEC_UMAXV \"UMAX\")\n+\t\t\t\t (UNSPEC_UMINV \"UMIN\")\n+\t\t\t\t (UNSPEC_SMAXV \"SMAX\")\n+\t\t\t\t (UNSPEC_SMINV \"SMIN\")])\n+\n+\n (define_code_attr binqops_op [(ss_plus \"sqadd\")\n \t\t\t (us_plus \"uqadd\")\n \t\t\t (ss_minus \"sqsub\")\ndiff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c\nindex 1290772216eb..13092b6a3f10 100644\n--- a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c\n+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c\n@@ -14,6 +14,8 @@ f (int16_t *x, int16_t *y, int8_t *z, int n)\n }\n }\n \n+/* The inner vectorized part of the loop will use q for 16bit and d for 8bit addition.\n+ And then when in the peeled off final part, will use emulated 32byte vectors V4HI and V2QI for the peeled part */\n /* { dg-final { scan-assembler-times {\\tadd\\tv[0-9]+\\.8h,} 1 } } */\n-/* { dg-final { scan-assembler-times {\\tadd\\tv[0-9]+\\.8b,} 1 } } */\n-/* { dg-final { scan-assembler-not {\\tadd\\tv[0-9]+\\.4h,} } } */\n+/* { dg-final { scan-assembler-times {\\tadd\\tv[0-9]+\\.8b,} 2 } } */\n+/* { dg-final { scan-assembler-times {\\tadd\\tv[0-9]+\\.4h,} 1 } } */\ndiff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c\nindex 6c09b5b146bf..defb564650e0 100644\n--- a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c\n+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c\n@@ -14,5 +14,6 @@ f (int32_t *x, int32_t *y, int16_t *z, int n)\n }\n }\n \n-/* { dg-final { scan-assembler-times {\\tsxtl\\tv[0-9]+\\.4s, v[0-9]+\\.4h\\n} 1 } } */\n+/* The second sxtl is the peeled off part that does V2HI->V2SI conversion. */\n+/* { dg-final { scan-assembler-times {\\tsxtl\\tv[0-9]+\\.4s, v[0-9]+\\.4h\\n} 2 } } */\n /* { dg-final { scan-assembler-times {\\tadd\\tv[0-9]+\\.4s,} 1 } } */\ndiff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c\nindex 94a66c545efe..e885f2973dc0 100644\n--- a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c\n+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c\n@@ -14,5 +14,6 @@ f (int16_t *x, int16_t *y, int8_t *z, int n)\n }\n }\n \n-/* { dg-final { scan-assembler-times {\\tsxtl\\tv[0-9]+\\.8h, v[0-9]+\\.8b\\n} 1 } } */\n+/* The second sxtl is the peeled off part that does V4QI->V4HI conversion. */\n+/* { dg-final { scan-assembler-times {\\tsxtl\\tv[0-9]+\\.8h, v[0-9]+\\.8b\\n} 2 } } */\n /* { dg-final { scan-assembler-times {\\tadd\\tv[0-9]+\\.8h,} 1 } } */\ndiff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp\nindex 0c2fd83f45c8..946ac7cff140 100644\n--- a/gcc/testsuite/lib/target-supports.exp\n+++ b/gcc/testsuite/lib/target-supports.exp\n@@ -9365,7 +9365,7 @@ proc available_vector_sizes { } {\n \tif { [check_effective_target_aarch64_sve] } {\n \t lappend result [aarch64_sve_bits]\n \t}\n-\tlappend result 128 64\n+\tlappend result 128 64 32\n } elseif { [istarget arm*-*-*]\n \t\t&& [check_effective_target_arm_neon_ok] } {\n \tlappend result 128 64\n", "prefixes": [ "v1", "01/11" ] }