===================================================================
@@ -447,15 +447,17 @@ psadbh (uint8x8_t s, uint8x8_t t)
/* Shuffle halfwords. */
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
+pshufh_u (uint16x4_t dest __attribute__((__unused__)),
+ uint16x4_t s, uint8_t order)
{
- return __builtin_loongson_pshufh_u (dest, s, order);
+ return __builtin_loongson_pshufh_u (s, order);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
+pshufh_s (int16x4_t dest __attribute__((__unused__)),
+ int16x4_t s, uint8_t order)
{
- return __builtin_loongson_pshufh_s (dest, s, order);
+ return __builtin_loongson_pshufh_s (s, order);
}
/* Shift left logical. */
===================================================================
@@ -403,12 +403,11 @@ (define_insn "loongson_psadbh"
;; Shuffle halfwords.
(define_insn "loongson_pshufh"
[(set (match_operand:VH 0 "register_operand" "=f")
- (unspec:VH [(match_operand:VH 1 "register_operand" "0")
- (match_operand:VH 2 "register_operand" "f")
- (match_operand:SI 3 "register_operand" "f")]
+ (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+ (match_operand:SI 2 "register_operand" "f")]
UNSPEC_LOONGSON_PSHUFH))]
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
- "pshufh\t%0,%2,%3"
+ "pshufh\t%0,%1,%2"
[(set_attr "type" "fmul")])
;; Shift left logical.
@@ -479,7 +478,7 @@ (define_insn "ussub<mode>3"
[(set_attr "type" "fadd")])
;; Unpack high data.
-(define_insn "vec_interleave_high<mode>"
+(define_insn "loongson_punpckh<V_stretch_half_suffix>"
[(set (match_operand:VWHB 0 "register_operand" "=f")
(unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
(match_operand:VWHB 2 "register_operand" "f")]
@@ -489,7 +488,7 @@ (define_insn "vec_interleave_high<mode>"
[(set_attr "type" "fdiv")])
;; Unpack low data.
-(define_insn "vec_interleave_low<mode>"
+(define_insn "loongson_punpckl<V_stretch_half_suffix>"
[(set (match_operand:VWHB 0 "register_operand" "=f")
(unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
(match_operand:VWHB 2 "register_operand" "f")]
@@ -498,6 +497,19 @@ (define_insn "vec_interleave_low<mode>"
"punpckl<V_stretch_half_suffix>\t%0,%1,%2"
[(set_attr "type" "fdiv")])
+(define_expand "vec_perm_const<mode>"
+ [(match_operand:VWHB 0 "register_operand" "")
+ (match_operand:VWHB 1 "register_operand" "")
+ (match_operand:VWHB 2 "register_operand" "")
+ (match_operand:VWHB 3 "" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ if (mips_expand_vec_perm_const (operands))
+ DONE;
+ else
+ FAIL;
+})
+
;; Integer division and modulus. For integer multiplication, see mips.md.
(define_insn "<u>div<mode>3"
===================================================================
@@ -29,6 +29,7 @@ FLOAT_MODE (TF, 16, mips_quad_format);
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (FLOAT, 16);
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
===================================================================
@@ -328,6 +328,7 @@ extern void mips_expand_atomic_qihi (uni
rtx, rtx, rtx, rtx);
extern void mips_expand_vector_init (rtx, rtx);
+extern bool mips_expand_vec_perm_const (rtx op[4]);
extern bool mips_eh_uses (unsigned int);
extern bool mips_epilogue_uses (unsigned int);
===================================================================
@@ -89,61 +89,167 @@ (define_expand "movv2sfcc"
DONE;
})
-; pul.ps - Pair Upper Lower
-(define_insn "mips_pul_ps"
+(define_insn "vec_perm_const_ps"
[(set (match_operand:V2SF 0 "register_operand" "=f")
- (vec_merge:V2SF
- (match_operand:V2SF 1 "register_operand" "f")
- (match_operand:V2SF 2 "register_operand" "f")
- (const_int 2)))]
+ (vec_select:V2SF
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" "f")
+ (match_operand:V2SF 2 "register_operand" "f"))
+ (parallel [(match_operand:SI 3 "const_0_or_1_operand" "")
+ (match_operand:SI 4 "const_2_or_3_operand" "")])))]
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
- "pul.ps\t%0,%1,%2"
+{
+ /* Let <op>L be the lower part of operand <op> and <op>U be the upper part.
+ The P[UL][UL].PS instruction always specifies the upper part of the
+ result first, so the instruction is:
+
+ P<aUL><bUL>.PS %0,<aop>,<bop>
+
+ where 0U == <aop><aUL> and 0L == <bop><bUL>.
+
+ GCC's vector indices are specified in memory order, which means
+ that vector element 0 is the lower part (L) on little-endian targets
+ and the upper part (U) on big-endian targets. vec_concat likewise
+ concatenates in memory order, which means that operand 3 (being
+ 0 or 1) selects part of operand 1 and operand 4 (being 2 or 3)
+ selects part of operand 2.
+
+ Let:
+
+ I3 = INTVAL (operands[3])
+ I4 = INTVAL (operands[4]) - 2
+
+ Taking the two endiannesses in turn:
+
+ Little-endian:
+
+ The semantics of the RTL pattern are:
+
+ { 0L, 0U } = { X[I3], X[I4 + 2] }, where X = { 1L, 1U, 2L, 2U }
+
+ so: 0L = { 1L, 1U }[I3] (= <bop><bUL>)
+ 0U = { 2L, 2U }[I4] (= <aop><aUL>)
+
+ <aop> = 2, <aUL> = I4 ? U : L
+ <bop> = 1, <bUL> = I3 ? U : L
+
+ [LL] !I4 && !I3 [UL] I4 && !I3
+ [LU] !I4 && I3 [UU] I4 && I3
+
+ Big-endian:
+
+ The semantics of the RTL pattern are:
+
+ { 0U, 0L } = { X[I3], X[I4 + 2] }, where X = { 1U, 1L, 2U, 2L }
+
+ so: 0U = { 1U, 1L }[I3] (= <aop><aUL>)
+ 0L = { 2U, 2L }[I4] (= <bop><bUL>)
+
+ <aop> = 1, <aUL> = I3 ? L : U
+ <bop> = 2, <bUL> = I4 ? L : U
+
+ [UU] !I3 && !I4 [UL] !I3 && I4
+ [LU] I3 && !I4 [LL] I3 && I4. */
+ static const char *const mnemonics[2][4] = {
+ /* LE */ { "pll.ps\t%0,%2,%1", "pul.ps\t%0,%2,%1",
+ "plu.ps\t%0,%2,%1", "puu.ps\t%0,%2,%1" },
+ /* BE */ { "puu.ps\t%0,%1,%2", "pul.ps\t%0,%1,%2",
+ "plu.ps\t%0,%1,%2", "pll.ps\t%0,%1,%2" },
+ };
+
+ unsigned mask = INTVAL (operands[3]) * 2 + (INTVAL (operands[4]) - 2);
+ return mnemonics[TARGET_BIG_ENDIAN][mask];
+}
[(set_attr "type" "fmove")
(set_attr "mode" "SF")])
-; puu.ps - Pair upper upper
-(define_insn "mips_puu_ps"
- [(set (match_operand:V2SF 0 "register_operand" "=f")
- (vec_merge:V2SF
- (match_operand:V2SF 1 "register_operand" "f")
- (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f")
- (parallel [(const_int 1)
- (const_int 0)]))
- (const_int 2)))]
+(define_expand "vec_perm_constv2sf"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V2SF 2 "register_operand" "")
+ (match_operand:V2SI 3 "" "")]
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
- "puu.ps\t%0,%1,%2"
- [(set_attr "type" "fmove")
- (set_attr "mode" "SF")])
+{
+ if (mips_expand_vec_perm_const (operands))
+ DONE;
+ else
+ FAIL;
+})
-; pll.ps - Pair Lower Lower
-(define_insn "mips_pll_ps"
- [(set (match_operand:V2SF 0 "register_operand" "=f")
- (vec_merge:V2SF
- (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f")
- (parallel [(const_int 1)
- (const_int 0)]))
- (match_operand:V2SF 2 "register_operand" "f")
- (const_int 2)))]
+;; Expanders for builtins. The instruction:
+;;
+;; P[UL][UL].PS <result>, <a>, <b>
+;;
+;; says that the upper part of <result> is taken from half of <a> and
+;; the lower part of <result> is taken from half of <b>. This means
+;; that the P[UL][UL].PS operand order matches memory order on big-endian
+;; targets; <a> is element 0 of the V2SF result while <b> is element 1.
+;; However, the P[UL][UL].PS operand order is the reverse of memory order
+;; on little-endian targets; <a> is element 1 of the V2SF result while
+;; <b> is element 0. The arguments to vec_perm_const_ps are always in
+;; memory order.
+;;
+;; Similarly, "U" corresponds to element 0 on big-endian targets but
+;; to element 1 on little-endian targets.
+(define_expand "mips_puu_ps"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V2SF 2 "register_operand" "")]
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
- "pll.ps\t%0,%1,%2"
- [(set_attr "type" "fmove")
- (set_attr "mode" "SF")])
+{
+ if (TARGET_BIG_ENDIAN)
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+ /* U */ const0_rtx, /* U */ const2_rtx));
+ else
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+ /* U */ const1_rtx, /* U */ GEN_INT (3)));
+ DONE;
+})
-; plu.ps - Pair Lower Upper
-(define_insn "mips_plu_ps"
- [(set (match_operand:V2SF 0 "register_operand" "=f")
- (vec_merge:V2SF
- (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f")
- (parallel [(const_int 1)
- (const_int 0)]))
- (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f")
- (parallel [(const_int 1)
- (const_int 0)]))
- (const_int 2)))]
+(define_expand "mips_pul_ps"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V2SF 2 "register_operand" "")]
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
- "plu.ps\t%0,%1,%2"
- [(set_attr "type" "fmove")
- (set_attr "mode" "SF")])
+{
+ if (TARGET_BIG_ENDIAN)
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+ /* U */ const0_rtx, /* L */ GEN_INT (3)));
+ else
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+ /* L */ const0_rtx, /* U */ GEN_INT (3)));
+ DONE;
+})
+
+(define_expand "mips_plu_ps"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V2SF 2 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+ if (TARGET_BIG_ENDIAN)
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+ /* L */ const1_rtx, /* U */ const2_rtx));
+ else
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+ /* U */ const1_rtx, /* L */ const2_rtx));
+ DONE;
+})
+
+(define_expand "mips_pll_ps"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V2SF 2 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+ if (TARGET_BIG_ENDIAN)
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2],
+ /* L */ const1_rtx, /* L */ GEN_INT (3)));
+ else
+ emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1],
+ /* L */ const0_rtx, /* L */ const2_rtx));
+ DONE;
+})
; vec_init
(define_expand "vec_initv2sf"
@@ -206,10 +312,10 @@ (define_expand "vec_setv2sf"
then use a PUL instruction. */
temp = gen_reg_rtx (V2SFmode);
emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1]));
- if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN)
- emit_insn (gen_mips_pul_ps (operands[0], temp, operands[0]));
- else
- emit_insn (gen_mips_pul_ps (operands[0], operands[0], temp));
+
+ emit_insn (gen_vec_perm_const_ps (operands[0], temp, operands[0],
+ operands[2],
+ GEN_INT (1 - INTVAL (operands[2]) + 2)));
DONE;
})
===================================================================
@@ -12774,12 +12774,6 @@ #define CODE_FOR_loongson_psubsh CODE_FO
#define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3
#define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3
#define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3
-#define CODE_FOR_loongson_punpckhbh CODE_FOR_vec_interleave_highv8qi
-#define CODE_FOR_loongson_punpckhhw CODE_FOR_vec_interleave_highv4hi
-#define CODE_FOR_loongson_punpckhwd CODE_FOR_vec_interleave_highv2si
-#define CODE_FOR_loongson_punpcklbh CODE_FOR_vec_interleave_lowv8qi
-#define CODE_FOR_loongson_punpcklhw CODE_FOR_vec_interleave_lowv4hi
-#define CODE_FOR_loongson_punpcklwd CODE_FOR_vec_interleave_lowv2si
static const struct mips_builtin_description mips_builtins[] = {
DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
@@ -13021,8 +13015,8 @@ static const struct mips_builtin_descrip
LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI),
LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI),
- LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI_UQI),
- LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_V4HI_UQI),
+ LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+ LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
@@ -16326,6 +16320,262 @@ mips_shift_truncation_mask (enum machine
}
+/* Generate or test for an insn that supports a constant permutation. */
+
+#define MAX_VECT_LEN 8
+
+struct expand_vec_perm_d
+{
+ rtx target, op0, op1;
+ unsigned char perm[MAX_VECT_LEN];
+ enum machine_mode vmode;
+ unsigned char nelt;
+ bool one_vector_p;
+ bool testing_p;
+};
+
+/* Recognize patterns for the MIPS3D P[UL][UL].PS instructions. */
+
+static bool
+mips_expand_vpc_ps (struct expand_vec_perm_d *d)
+{
+ unsigned perm0, perm1;
+
+ if (!(TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT))
+ return false;
+ if (d->vmode != V2SFmode)
+ return false;
+ if (d->testing_p)
+ return true;
+
+ perm0 = d->perm[0];
+ perm1 = d->perm[1];
+
+ if (d->one_vector_p)
+ perm1 += 2;
+ else if (perm0 & 2)
+ {
+ rtx x;
+ perm0 -= 2;
+ perm1 += 2;
+ x = d->op0, d->op0 = d->op1, d->op1 = x;
+ }
+ gcc_assert ((perm0 & 2) == 0);
+ gcc_assert (perm1 & 2);
+
+ emit_insn (gen_vec_perm_const_ps (d->target, d->op0, d->op1,
+ GEN_INT (perm0), GEN_INT (perm1)));
+
+ return true;
+}
+
+/* Recognize patterns for the Loongson PUNPCK* instructions. */
+
+static bool
+mips_expand_vpc_loongson_interleave (struct expand_vec_perm_d *d)
+{
+ unsigned int i, low, swap, nelt = d->nelt, mask;
+ rtx x;
+
+ if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+ return false;
+ if (GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT)
+ return false;
+ if (GET_MODE_SIZE (d->vmode) != 16)
+ return false;
+
+ /* Note that these are big-endian tests. Adjust for little-endian later. */
+ low = nelt / 2;
+ swap = nelt;
+ if (d->perm[0] == swap + low)
+ ;
+ else if (d->perm[0] == swap)
+ low = 0;
+ else if (d->perm[0] == low)
+ swap = 0;
+ else if (d->perm[0] == 0)
+ low = 0, swap = 0;
+ else
+ return false;
+ mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
+
+ for (i = 0; i < nelt / 2; i++)
+ {
+ unsigned elt;
+ elt = i + low + swap;
+ if (d->perm[i * 2] != elt)
+ return false;
+ elt = (elt + nelt) & mask;
+ if (d->perm[i * 2 + 1] != elt)
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ /* Adjust for little-endian. */
+ if (TARGET_LITTLE_ENDIAN)
+ swap = !swap, low = !low;
+ /* Adjust for matched swapped operand pattern. */
+ if (swap)
+ x = d->op0, d->op0 = d->op1, d->op1 = x;
+
+ /* Generate one of the loongson_punpck* instructions. */
+ /* ??? We should consider using standard (vec_select (vec_concat)) form. */
+ x = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, d->op0, d->op1),
+ low ? UNSPEC_LOONGSON_PUNPCKL : UNSPEC_LOONGSON_PUNPCKH);
+ emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
+ return true;
+}
+
+/* Recognize patterns for the Loongson PSHUFH instruction. */
+
+static bool
+mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d)
+{
+ unsigned i, mask, ec;
+
+ if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+ return false;
+ if (d->vmode != V4HImode)
+ return false;
+ if (!d->one_vector_p)
+ return false;
+ if (d->testing_p)
+ return true;
+
+ /* Convert the selector into the packed 8-bit form for PSHUFH.
+ The bottom two bits of the mask always control the bottom
+ 16 bits of the result; this is element 3 on big-endian targets
+ and element 0 on little-endian targets. Each pair of bits X
+ specifies a right shift by X*16; again, this means that X==0
+ refers to element 3 on big-endian targets and element 0
+ on little-endian targets. */
+ ec = TARGET_BIG_ENDIAN ? 3 : 0;
+ for (i = mask = 0; i < 4; i++)
+ mask |= ((d->perm[i ^ ec] ^ ec) & 3) << (i * 2);
+
+ emit_insn (gen_loongson_pshufh (d->target, d->op0,
+ force_reg (SImode, GEN_INT (mask))));
+ return true;
+}
+
+static bool
+mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+ if (mips_expand_vpc_ps (d))
+ return true;
+ if (mips_expand_vpc_loongson_interleave (d))
+ return true;
+ if (mips_expand_vpc_loongson_pshufh (d))
+ return true;
+ return false;
+}
+
+/* Expand a vec_perm_const pattern. */
+
+bool
+mips_expand_vec_perm_const (rtx operands[4])
+{
+ struct expand_vec_perm_d d;
+ int i, nelt, which;
+ rtx sel;
+
+ d.target = operands[0];
+ d.op0 = operands[1];
+ d.op1 = operands[2];
+ sel = operands[3];
+
+ d.vmode = GET_MODE (d.target);
+ gcc_assert (VECTOR_MODE_P (d.vmode));
+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+ d.testing_p = false;
+
+ for (i = which = 0; i < nelt; ++i)
+ {
+ rtx e = XVECEXP (sel, 0, i);
+ int ei = INTVAL (e) & (2 * nelt - 1);
+ which |= (ei < nelt ? 1 : 2);
+ d.perm[i] = ei;
+ }
+
+ switch (which)
+ {
+ default:
+ gcc_unreachable();
+
+ case 3:
+ d.one_vector_p = false;
+ if (!rtx_equal_p (d.op0, d.op1))
+ break;
+
+ /* The elements of PERM do not suggest that only the first operand
+ is used, but both operands are identical. Allow easier matching
+ of the permutation by folding the permutation into the single
+ input vector. */
+ for (i = 0; i < nelt; ++i)
+ if (d.perm[i] >= nelt)
+ d.perm[i] -= nelt;
+ /* FALLTHRU */
+
+ case 1:
+ d.op1 = d.op0;
+ d.one_vector_p = true;
+ break;
+
+ case 2:
+ for (i = 0; i < nelt; ++i)
+ d.perm[i] -= nelt;
+ d.op0 = d.op1;
+ d.one_vector_p = true;
+ break;
+ }
+
+ return mips_expand_vec_perm_const_1 (&d);
+}
+
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
+
+static bool
+mips_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+ const unsigned char *sel)
+{
+ struct expand_vec_perm_d d;
+ unsigned int i, nelt, which;
+ bool ret;
+
+ d.target = NULL_RTX;
+ d.op0 = NULL_RTX;
+ d.op1 = NULL_RTX;
+ d.vmode = vmode;
+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+ d.testing_p = true;
+ memcpy (d.perm, sel, nelt);
+
+ /* Categorize the set of elements in the selector. */
+ for (i = which = 0; i < nelt; ++i)
+ {
+ unsigned char e = d.perm[i];
+ gcc_assert (e < 2 * nelt);
+ which |= (e < nelt ? 1 : 2);
+ }
+
+ /* For all elements from second vector, fold the elements to first. */
+ if (which == 2)
+ for (i = 0; i < nelt; ++i)
+ d.perm[i] -= nelt;
+
+ /* Check whether the mask can be applied to the vector type. */
+ d.one_vector_p = (which != 3);
+
+ start_sequence ();
+ ret = mips_expand_vec_perm_const_1 (&d);
+ end_sequence ();
+
+ return ret;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -16544,6 +16794,9 @@ #define TARGET_ASM_OUTPUT_SOURCE_FILENAM
#undef TARGET_SHIFT_TRUNCATION_MASK
#define TARGET_SHIFT_TRUNCATION_MASK mips_shift_truncation_mask
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-mips.h"
===================================================================
@@ -73,8 +73,11 @@ (define_predicate "reg_or_1_operand"
;; This is used for indexing into vectors, and hence only accepts const_int.
(define_predicate "const_0_or_1_operand"
(and (match_code "const_int")
- (ior (match_test "op == CONST0_RTX (GET_MODE (op))")
- (match_test "op == CONST1_RTX (GET_MODE (op))"))))
+ (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
+
+(define_predicate "const_2_or_3_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
(define_predicate "qi_mask_operand"
(and (match_code "const_int")