@@ -26,15 +26,15 @@ RESET_FLOAT_FORMAT (DF, mips_double_format);
FLOAT_MODE (TF, 16, mips_quad_format);
/* Vector modes. */
-VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
-VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
-VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
+VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
/* Double-sized vector modes for vec_concat. */
-VECTOR_MODE (INT, QI, 16);
-VECTOR_MODE (INT, HI, 8);
-VECTOR_MODE (INT, SI, 4);
-VECTOR_MODE (FLOAT, SF, 4);
+VECTOR_MODE (INT, QI, 16); /* V16QI */
+VECTOR_MODE (INT, HI, 8); /* V8HI */
+VECTOR_MODE (INT, SI, 4); /* V4SI */
+VECTOR_MODE (FLOAT, SF, 4); /* V4SF */
VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */
VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
@@ -4638,7 +4638,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
/* The EABI conventions have traditionally been defined in terms
of TYPE_MODE, regardless of the actual type. */
info->fpr_p = ((GET_MODE_CLASS (mode) == MODE_FLOAT
- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ || mode == V2SFmode)
&& GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
break;
@@ -4653,7 +4653,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
|| SCALAR_FLOAT_TYPE_P (type)
|| VECTOR_FLOAT_TYPE_P (type))
&& (GET_MODE_CLASS (mode) == MODE_FLOAT
- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ || mode == V2SFmode)
&& GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
break;
@@ -4666,7 +4666,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
&& (type == 0 || FLOAT_TYPE_P (type))
&& (GET_MODE_CLASS (mode) == MODE_FLOAT
|| GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ || mode == V2SFmode)
&& GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FPVALUE);
/* ??? According to the ABI documentation, the real and imaginary
@@ -5103,7 +5103,7 @@ static bool
mips_return_mode_in_fpr_p (enum machine_mode mode)
{
return ((GET_MODE_CLASS (mode) == MODE_FLOAT
- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ || mode == V2SFmode
|| GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
&& GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_HWFPVALUE);
}
@@ -10786,8 +10786,14 @@ mips_cannot_change_mode_class (enum machine_mode from,
enum machine_mode to,
enum reg_class rclass)
{
- /* There are several problems with changing the modes of values in
- floating-point registers:
+ /* Allow conversions between different Loongson integer vectors,
+ and between those vectors and DImode. */
+ if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
+ && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
+ return false;
+
+ /* Otherwise, there are several problems with changing the modes of
+ values in floating-point registers:
- When a multi-word value is stored in paired floating-point
registers, the first register always holds the low word. We
@@ -10809,12 +10815,6 @@ mips_cannot_change_mode_class (enum machine_mode from,
We therefore disallow all mode changes involving FPRs. */
- /* Except for Loongson and its integral vectors. We need to be able
- to change between those modes easily. */
- if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
- && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
- return false;
-
return reg_classes_intersect_p (FP_REGS, rclass);
}
@@ -16352,7 +16352,8 @@ struct expand_vec_perm_d
return true if that's a valid instruction in the active ISA. */
static bool
-expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+mips_expand_vselect (rtx target, rtx op0,
+ const unsigned char *perm, unsigned nelt)
{
rtx rperm[MAX_VECT_LEN], x;
unsigned i;
@@ -16376,15 +16377,15 @@ expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
/* Similar, but generate a vec_concat from op0 and op1 as well. */
static bool
-expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
- const unsigned char *perm, unsigned nelt)
+mips_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+ const unsigned char *perm, unsigned nelt)
{
enum machine_mode v2mode;
rtx x;
v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
- return expand_vselect (target, x, perm, nelt);
+ return mips_expand_vselect (target, x, perm, nelt);
}
/* Recognize patterns for even-odd extraction. */
@@ -16525,18 +16526,19 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
memcpy (perm2, d->perm, sizeof(perm2));
for (i = 1; i < nelt; i += 2)
perm2[i] += nelt;
- if (expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
+ if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
return true;
}
else
{
- if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
+ if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1,
+ d->perm, nelt))
return true;
/* Try again with swapped operands. */
for (i = 0; i < nelt; ++i)
perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
- if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+ if (mips_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
return true;
}
@@ -16556,7 +16558,9 @@ mips_expand_vec_perm_const (rtx operands[4])
{
struct expand_vec_perm_d d;
int i, nelt, which;
+ unsigned char orig_perm[MAX_VECT_LEN];
rtx sel;
+ bool ok;
d.target = operands[0];
d.op0 = operands[1];
@@ -16573,8 +16577,9 @@ mips_expand_vec_perm_const (rtx operands[4])
rtx e = XVECEXP (sel, 0, i);
int ei = INTVAL (e) & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
- d.perm[i] = ei;
+ orig_perm[i] = ei;
}
+ memcpy (d.perm, orig_perm, MAX_VECT_LEN);
switch (which)
{
@@ -16585,14 +16590,8 @@ mips_expand_vec_perm_const (rtx operands[4])
d.one_vector_p = false;
if (!rtx_equal_p (d.op0, d.op1))
break;
-
- /* The backend (vec_select (vec_concat)) patterns are not duplicated
- for single-operand. Try once with the original un-folded selector. */
- if (mips_expand_vec_perm_const_1 (&d))
- return true;
-
- /* Try again after folding the selector to a single operand. */
/* FALLTHRU */
+
case 2:
for (i = 0; i < nelt; ++i)
d.perm[i] &= nelt - 1;
@@ -16606,7 +16605,25 @@ mips_expand_vec_perm_const (rtx operands[4])
break;
}
- return mips_expand_vec_perm_const_1 (&d);
+ ok = mips_expand_vec_perm_const_1 (&d);
+
+ /* If we were given a two-vector permutation which just happened to
+ have both input vectors equal, we folded this into a one-vector
+ permutation. There are several loongson patterns that are matched
+ via direct vec_select+vec_concat expansion, but we do not have
+ support in mips_expand_vec_perm_const_1 to guess the adjustment
+ that should be made for a single operand. Just try again with
+ the original permutation. */
+ if (!ok && which == 3)
+ {
+ d.op0 = operands[1];
+ d.op1 = operands[2];
+ d.one_vector_p = false;
+ memcpy (d.perm, orig_perm, MAX_VECT_LEN);
+ ok = mips_expand_vec_perm_const_1 (&d);
+ }
+
+ return ok;
}
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
@@ -16732,7 +16749,7 @@ mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt)
gcc_unreachable ();
}
- memset (&d, 0, sizeof(d));
+ memset (&d, 0, sizeof (d));
d.target = target;
d.op0 = t1;
d.op1 = t1;
@@ -16862,47 +16879,68 @@ mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx))
{
enum machine_mode vmode = GET_MODE (in);
unsigned char perm2[2];
- rtx tmp;
+ rtx last, next, fold, x;
bool ok;
- tmp = gen_reg_rtx (vmode);
+ last = in;
+ fold = gen_reg_rtx (vmode);
switch (vmode)
{
case V2SFmode:
/* Use PUL/PLU to produce { L, H } op { H, L }.
- By reversing the pair order, rather a pure interleave high,
- we don't produce erroneous exceptional conditions. */
+ By reversing the pair order, rather than a pure interleave high,
+ we avoid erroneous exceptional conditions that we might otherwise
+ produce from the computation of H op H. */
perm2[0] = 1;
perm2[1] = 2;
- ok = expand_vselect_vconcat (tmp, in, in, perm2, 2);
+ ok = mips_expand_vselect_vconcat (fold, last, last, perm2, 2);
gcc_assert (ok);
break;
case V2SImode:
/* Use interleave to produce { H, L } op { H, H }. */
- emit_insn (gen_loongson_punpckhwd (tmp, in, in));
+ emit_insn (gen_loongson_punpckhwd (fold, last, last));
break;
case V4HImode:
/* Perform the first reduction with interleave,
and subsequent reductions with shifts. */
- emit_insn (gen_loongson_punpckhwd_hi (tmp, in, in));
- emit_insn (gen (in, in, tmp));
- emit_insn (gen_vec_shr_v4hi (tmp, in, force_reg (SImode, GEN_INT (16))));
+ emit_insn (gen_loongson_punpckhwd_hi (fold, last, last));
+
+ next = gen_reg_rtx (vmode);
+ emit_insn (gen (next, last, fold));
+ last = next;
+
+ fold = gen_reg_rtx (vmode);
+ x = force_reg (SImode, GEN_INT (16));
+ emit_insn (gen_vec_shr_v4hi (fold, last, x));
break;
case V8QImode:
- emit_insn (gen_loongson_punpckhwd_qi (tmp, in, in));
- emit_insn (gen (in, in, tmp));
- emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (16))));
- emit_insn (gen (in, in, tmp));
- emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (8))));
+ emit_insn (gen_loongson_punpckhwd_qi (fold, last, last));
+
+ next = gen_reg_rtx (vmode);
+ emit_insn (gen (next, last, fold));
+ last = next;
+
+ fold = gen_reg_rtx (vmode);
+ x = force_reg (SImode, GEN_INT (16));
+ emit_insn (gen_vec_shr_v8qi (fold, last, x));
+
+ next = gen_reg_rtx (vmode);
+ emit_insn (gen (next, last, fold));
+ last = next;
+
+ fold = gen_reg_rtx (vmode);
+ x = force_reg (SImode, GEN_INT (8));
+ emit_insn (gen_vec_shr_v8qi (fold, last, x));
break;
default:
gcc_unreachable ();
}
- emit_insn (gen (target, in, tmp));
+
+ emit_insn (gen (target, last, fold));
}
/* Expand a vector minimum/maximum. */