@@ -206,3 +206,12 @@
BUILTIN_VDQ_BHSI (BINOP, smin)
BUILTIN_VDQ_BHSI (BINOP, umax)
BUILTIN_VDQ_BHSI (BINOP, umin)
+
+ /* Implemented by
+ aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
+ BUILTIN_VALL (BINOP, zip1)
+ BUILTIN_VALL (BINOP, zip2)
+ BUILTIN_VALL (BINOP, uzp1)
+ BUILTIN_VALL (BINOP, uzp2)
+ BUILTIN_VALL (BINOP, trn1)
+ BUILTIN_VALL (BINOP, trn2)
@@ -128,7 +128,8 @@
; simd_store4s store single structure from one lane for four registers (ST4 [index]).
; simd_tbl table lookup.
; simd_trn transpose.
-; simd_zip zip/unzip.
+; simd_uzp unzip.
+; simd_zip zip.
(define_attr "simd_type"
"simd_abd,\
@@ -230,6 +231,7 @@
simd_store4s,\
simd_tbl,\
simd_trn,\
+ simd_uzp,\
simd_zip,\
none"
(const_string "none"))
@@ -3366,6 +3368,17 @@
DONE;
})
+(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
+ [(set (match_operand:VALL 0 "register_operand" "=w")
+ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
+ (match_operand:VALL 2 "register_operand" "w")]
+ PERMUTE))]
+ "TARGET_SIMD"
+ "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "simd_type" "simd_<PERMUTE:perm_insn>")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
(define_insn "aarch64_st2<mode>_dreg"
[(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:TI [(match_operand:OI 1 "register_operand" "w")
@@ -6815,6 +6815,261 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
aarch64_expand_vec_perm_1 (target, op0, op1, sel);
}
+/* Recognize patterns suitable for the TRN instructions. */
+static bool
+aarch64_evpc_trn (struct expand_vec_perm_d *d)
+{
+ unsigned int i, odd, mask, nelt = d->nelt;
+ rtx out, in0, in1, x;
+ rtx (*gen) (rtx, rtx, rtx);
+ enum machine_mode vmode = d->vmode;
+
+ if (GET_MODE_UNIT_SIZE (vmode) > 8)
+ return false;
+
+ /* Note that these are little-endian tests.
+ We correct for big-endian later. */
+ if (d->perm[0] == 0)
+ odd = 0;
+ else if (d->perm[0] == 1)
+ odd = 1;
+ else
+ return false;
+ mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+ for (i = 0; i < nelt; i += 2)
+ {
+ if (d->perm[i] != i + odd)
+ return false;
+ if (d->perm[i + 1] != ((i + nelt + odd) & mask))
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ in1 = d->op1;
+ if (BYTES_BIG_ENDIAN)
+ {
+ x = in0, in0 = in1, in1 = x;
+ odd = !odd;
+ }
+ out = d->target;
+
+ if (odd)
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_trn2v16qi; break;
+ case V8QImode: gen = gen_aarch64_trn2v8qi; break;
+ case V8HImode: gen = gen_aarch64_trn2v8hi; break;
+ case V4HImode: gen = gen_aarch64_trn2v4hi; break;
+ case V4SImode: gen = gen_aarch64_trn2v4si; break;
+ case V2SImode: gen = gen_aarch64_trn2v2si; break;
+ case V2DImode: gen = gen_aarch64_trn2v2di; break;
+ case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
+ case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
+ case V2DFmode: gen = gen_aarch64_trn2v2df; break;
+ default:
+ return false;
+ }
+ }
+ else
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_trn1v16qi; break;
+ case V8QImode: gen = gen_aarch64_trn1v8qi; break;
+ case V8HImode: gen = gen_aarch64_trn1v8hi; break;
+ case V4HImode: gen = gen_aarch64_trn1v4hi; break;
+ case V4SImode: gen = gen_aarch64_trn1v4si; break;
+ case V2SImode: gen = gen_aarch64_trn1v2si; break;
+ case V2DImode: gen = gen_aarch64_trn1v2di; break;
+ case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
+ case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
+ case V2DFmode: gen = gen_aarch64_trn1v2df; break;
+ default:
+ return false;
+ }
+ }
+
+ emit_insn (gen (out, in0, in1));
+ return true;
+}
+
+/* Recognize patterns suitable for the UZP instructions. */
+static bool
+aarch64_evpc_uzp (struct expand_vec_perm_d *d)
+{
+ unsigned int i, odd, mask, nelt = d->nelt;
+ rtx out, in0, in1, x;
+ rtx (*gen) (rtx, rtx, rtx);
+ enum machine_mode vmode = d->vmode;
+
+ if (GET_MODE_UNIT_SIZE (vmode) > 8)
+ return false;
+
+ /* Note that these are little-endian tests.
+ We correct for big-endian later. */
+ if (d->perm[0] == 0)
+ odd = 0;
+ else if (d->perm[0] == 1)
+ odd = 1;
+ else
+ return false;
+ mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+ for (i = 0; i < nelt; i++)
+ {
+ unsigned elt = (i * 2 + odd) & mask;
+ if (d->perm[i] != elt)
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ in1 = d->op1;
+ if (BYTES_BIG_ENDIAN)
+ {
+ x = in0, in0 = in1, in1 = x;
+ odd = !odd;
+ }
+ out = d->target;
+
+ if (odd)
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
+ case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
+ case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
+ case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
+ case V4SImode: gen = gen_aarch64_uzp2v4si; break;
+ case V2SImode: gen = gen_aarch64_uzp2v2si; break;
+ case V2DImode: gen = gen_aarch64_uzp2v2di; break;
+ case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
+ case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
+ case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
+ default:
+ return false;
+ }
+ }
+ else
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
+ case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
+ case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
+ case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
+ case V4SImode: gen = gen_aarch64_uzp1v4si; break;
+ case V2SImode: gen = gen_aarch64_uzp1v2si; break;
+ case V2DImode: gen = gen_aarch64_uzp1v2di; break;
+ case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
+ case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
+ case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
+ default:
+ return false;
+ }
+ }
+
+ emit_insn (gen (out, in0, in1));
+ return true;
+}
+
+/* Recognize patterns suitable for the ZIP instructions. */
+static bool
+aarch64_evpc_zip (struct expand_vec_perm_d *d)
+{
+ unsigned int i, high, mask, nelt = d->nelt;
+ rtx out, in0, in1, x;
+ rtx (*gen) (rtx, rtx, rtx);
+ enum machine_mode vmode = d->vmode;
+
+ if (GET_MODE_UNIT_SIZE (vmode) > 8)
+ return false;
+
+ /* Note that these are little-endian tests.
+ We correct for big-endian later. */
+ high = nelt / 2;
+ if (d->perm[0] == high)
+ /* Do Nothing. */
+ ;
+ else if (d->perm[0] == 0)
+ high = 0;
+ else
+ return false;
+ mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+ for (i = 0; i < nelt / 2; i++)
+ {
+ unsigned elt = (i + high) & mask;
+ if (d->perm[i * 2] != elt)
+ return false;
+ elt = (elt + nelt) & mask;
+ if (d->perm[i * 2 + 1] != elt)
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ in1 = d->op1;
+ if (BYTES_BIG_ENDIAN)
+ {
+ x = in0, in0 = in1, in1 = x;
+ high = !high;
+ }
+ out = d->target;
+
+ if (high)
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_zip2v16qi; break;
+ case V8QImode: gen = gen_aarch64_zip2v8qi; break;
+ case V8HImode: gen = gen_aarch64_zip2v8hi; break;
+ case V4HImode: gen = gen_aarch64_zip2v4hi; break;
+ case V4SImode: gen = gen_aarch64_zip2v4si; break;
+ case V2SImode: gen = gen_aarch64_zip2v2si; break;
+ case V2DImode: gen = gen_aarch64_zip2v2di; break;
+ case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
+ case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
+ case V2DFmode: gen = gen_aarch64_zip2v2df; break;
+ default:
+ return false;
+ }
+ }
+ else
+ {
+ switch (vmode)
+ {
+ case V16QImode: gen = gen_aarch64_zip1v16qi; break;
+ case V8QImode: gen = gen_aarch64_zip1v8qi; break;
+ case V8HImode: gen = gen_aarch64_zip1v8hi; break;
+ case V4HImode: gen = gen_aarch64_zip1v4hi; break;
+ case V4SImode: gen = gen_aarch64_zip1v4si; break;
+ case V2SImode: gen = gen_aarch64_zip1v2si; break;
+ case V2DImode: gen = gen_aarch64_zip1v2di; break;
+ case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
+ case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
+ case V2DFmode: gen = gen_aarch64_zip1v2df; break;
+ default:
+ return false;
+ }
+ }
+
+ emit_insn (gen (out, in0, in1));
+ return true;
+}
+
static bool
aarch64_evpc_tbl (struct expand_vec_perm_d *d)
{
@@ -6865,7 +7120,15 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
}
if (TARGET_SIMD)
- return aarch64_evpc_tbl (d);
+ {
+ if (aarch64_evpc_zip (d))
+ return true;
+ else if (aarch64_evpc_uzp (d))
+ return true;
+ else if (aarch64_evpc_trn (d))
+ return true;
+ return aarch64_evpc_tbl (d);
+ }
return false;
}
@@ -230,6 +230,12 @@
UNSPEC_BSL ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_CONCAT ; Used in vector permute patterns.
+ UNSPEC_ZIP1 ; Used in vector permute patterns.
+ UNSPEC_ZIP2 ; Used in vector permute patterns.
+ UNSPEC_UZP1 ; Used in vector permute patterns.
+ UNSPEC_UZP2 ; Used in vector permute patterns.
+ UNSPEC_TRN1 ; Used in vector permute patterns.
+ UNSPEC_TRN2 ; Used in vector permute patterns.
])
;; -------------------------------------------------------------------
@@ -649,6 +655,9 @@
(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
+(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
+ UNSPEC_TRN1 UNSPEC_TRN2
+ UNSPEC_UZP1 UNSPEC_UZP2])
;; -------------------------------------------------------------------
;; Int Iterators Attributes.
@@ -732,3 +741,10 @@
(define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1")
(UNSPEC_SSRI "0") (UNSPEC_USRI "0")])
+(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
+ (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
+ (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
+
+(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
+ (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
+ (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])