Patchwork [v3,00/10] MIPS vectorization improvements

login
register
mail settings
Submitter Richard Henderson
Date Dec. 23, 2011, 5:41 p.m.
Message ID <4EF4BD2C.1070407@redhat.com>
Download mbox | patch
Permalink /patch/133103/
State New
Headers show

Comments

Richard Henderson - Dec. 23, 2011, 5:41 p.m.
On 12/22/2011 12:44 PM, Richard Sandiford wrote:
> Woah, thanks, that's quite some work.  OK for the patches I didn't
> respond to.

Here's a combined follow-on patch that I believe addresses all of
the comments you had.

Ok?


r~
commit 824b5ca31ea21bb02cedabf79bb98e4348c34366
Author: Richard Henderson <rth@redhat.com>
Date:   Thu Dec 22 12:23:03 2011 -0800

    mips: Feedback from rsandiford.
Richard Sandiford - Dec. 23, 2011, 6 p.m.
Richard Henderson <rth@redhat.com> writes:
> On 12/22/2011 12:44 PM, Richard Sandiford wrote:
>> Woah, thanks, that's quite some work.  OK for the patches I didn't
>> respond to.
>
> Here's a combined follow-on patch that I believe addresses all of
> the comments you had.
>
> Ok?

Yeah, looks good, thanks.

Richard
Richard Henderson - Dec. 23, 2011, 6:49 p.m.
On 12/23/2011 10:00 AM, Richard Sandiford wrote:
> Richard Henderson <rth@redhat.com> writes:
>> On 12/22/2011 12:44 PM, Richard Sandiford wrote:
>>> Woah, thanks, that's quite some work.  OK for the patches I didn't
>>> respond to.
>>
>> Here's a combined follow-on patch that I believe addresses all of
>> the comments you had.
>>
>> Ok?
> 
> Yeah, looks good, thanks.

Thanks for the review.  I've committed the squash of all those patches.


r~

Patch

diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def
index 85861a9..187c651 100644
--- a/gcc/config/mips/mips-modes.def
+++ b/gcc/config/mips/mips-modes.def
@@ -26,15 +26,15 @@  RESET_FLOAT_FORMAT (DF, mips_double_format);
 FLOAT_MODE (TF, 16, mips_quad_format);
 
 /* Vector modes.  */
-VECTOR_MODES (INT, 8);        /*       V8QI  V4HI V2SI */
-VECTOR_MODES (FLOAT, 8);      /*             V4HF V2SF */
-VECTOR_MODES (INT, 4);        /*             V4QI V2HI */
+VECTOR_MODES (INT, 4);        /* V4QI  V2HI      */
+VECTOR_MODES (INT, 8);        /* V8QI  V4HI V2SI */
+VECTOR_MODES (FLOAT, 8);      /*       V4HF V2SF */
 
 /* Double-sized vector modes for vec_concat.  */
-VECTOR_MODE (INT, QI, 16);
-VECTOR_MODE (INT, HI, 8);
-VECTOR_MODE (INT, SI, 4);
-VECTOR_MODE (FLOAT, SF, 4);
+VECTOR_MODE (INT, QI, 16);    /* V16QI           */
+VECTOR_MODE (INT, HI, 8);     /*       V8HI      */
+VECTOR_MODE (INT, SI, 4);     /*            V4SI */
+VECTOR_MODE (FLOAT, SF, 4);   /*            V4SF */
 
 VECTOR_MODES (FRACT, 4);	/* V4QQ  V2HQ */
 VECTOR_MODES (UFRACT, 4);	/* V4UQQ V2UHQ */
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index bc76078..94d2c2f 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -4638,7 +4638,7 @@  mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
       /* The EABI conventions have traditionally been defined in terms
 	 of TYPE_MODE, regardless of the actual type.  */
       info->fpr_p = ((GET_MODE_CLASS (mode) == MODE_FLOAT
-		      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+		      || mode == V2SFmode)
 		     && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
       break;
 
@@ -4653,7 +4653,7 @@  mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
 			 || SCALAR_FLOAT_TYPE_P (type)
 			 || VECTOR_FLOAT_TYPE_P (type))
 		     && (GET_MODE_CLASS (mode) == MODE_FLOAT
-			 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+			 || mode == V2SFmode)
 		     && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
       break;
 
@@ -4666,7 +4666,7 @@  mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
 		     && (type == 0 || FLOAT_TYPE_P (type))
 		     && (GET_MODE_CLASS (mode) == MODE_FLOAT
 			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
-			 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+			 || mode == V2SFmode)
 		     && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FPVALUE);
 
       /* ??? According to the ABI documentation, the real and imaginary
@@ -5103,7 +5103,7 @@  static bool
 mips_return_mode_in_fpr_p (enum machine_mode mode)
 {
   return ((GET_MODE_CLASS (mode) == MODE_FLOAT
-	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+	   || mode == V2SFmode
 	   || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
 	  && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_HWFPVALUE);
 }
@@ -10786,8 +10786,14 @@  mips_cannot_change_mode_class (enum machine_mode from,
 			       enum machine_mode to,
 			       enum reg_class rclass)
 {
-  /* There are several problems with changing the modes of values in
-     floating-point registers:
+  /* Allow conversions between different Loongson integer vectors,
+     and between those vectors and DImode.  */
+  if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
+      && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
+    return false;
+
+  /* Otherwise, there are several problems with changing the modes of
+     values in floating-point registers:
 
      - When a multi-word value is stored in paired floating-point
        registers, the first register always holds the low word.  We
@@ -10809,12 +10815,6 @@  mips_cannot_change_mode_class (enum machine_mode from,
 
      We therefore disallow all mode changes involving FPRs.  */
 
-  /* Except for Loongson and its integral vectors.  We need to be able
-     to change between those modes easily.  */
-  if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
-      && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
-    return false;
-
   return reg_classes_intersect_p (FP_REGS, rclass);
 }
 
@@ -16352,7 +16352,8 @@  struct expand_vec_perm_d
    return true if that's a valid instruction in the active ISA.  */
 
 static bool
-expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+mips_expand_vselect (rtx target, rtx op0,
+		     const unsigned char *perm, unsigned nelt)
 {
   rtx rperm[MAX_VECT_LEN], x;
   unsigned i;
@@ -16376,15 +16377,15 @@  expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
 
 static bool
-expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
-			const unsigned char *perm, unsigned nelt)
+mips_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+			     const unsigned char *perm, unsigned nelt)
 {
   enum machine_mode v2mode;
   rtx x;
 
   v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
-  return expand_vselect (target, x, perm, nelt);
+  return mips_expand_vselect (target, x, perm, nelt);
 }
 
 /* Recognize patterns for even-odd extraction.  */
@@ -16525,18 +16526,19 @@  mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
       memcpy (perm2, d->perm, sizeof(perm2));
       for (i = 1; i < nelt; i += 2)
 	perm2[i] += nelt;
-      if (expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
+      if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt))
 	return true;
     }
   else
     {
-      if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
+      if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1,
+				       d->perm, nelt))
 	return true;
 
       /* Try again with swapped operands.  */
       for (i = 0; i < nelt; ++i)
 	perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
-      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+      if (mips_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
 	return true;
     }
 
@@ -16556,7 +16558,9 @@  mips_expand_vec_perm_const (rtx operands[4])
 {
   struct expand_vec_perm_d d;
   int i, nelt, which;
+  unsigned char orig_perm[MAX_VECT_LEN];
   rtx sel;
+  bool ok;
 
   d.target = operands[0];
   d.op0 = operands[1];
@@ -16573,8 +16577,9 @@  mips_expand_vec_perm_const (rtx operands[4])
       rtx e = XVECEXP (sel, 0, i);
       int ei = INTVAL (e) & (2 * nelt - 1);
       which |= (ei < nelt ? 1 : 2);
-      d.perm[i] = ei;
+      orig_perm[i] = ei;
     }
+  memcpy (d.perm, orig_perm, MAX_VECT_LEN);
 
   switch (which)
     {
@@ -16585,14 +16590,8 @@  mips_expand_vec_perm_const (rtx operands[4])
       d.one_vector_p = false;
       if (!rtx_equal_p (d.op0, d.op1))
 	break;
-
-      /* The backend (vec_select (vec_concat)) patterns are not duplicated
-	 for single-operand.  Try once with the original un-folded selector. */
-      if (mips_expand_vec_perm_const_1 (&d))
-	return true;
-
-      /* Try again after folding the selector to a single operand.  */
       /* FALLTHRU */
+
     case 2:
       for (i = 0; i < nelt; ++i)
         d.perm[i] &= nelt - 1;
@@ -16606,7 +16605,25 @@  mips_expand_vec_perm_const (rtx operands[4])
       break;
     }
 
-  return mips_expand_vec_perm_const_1 (&d);
+  ok = mips_expand_vec_perm_const_1 (&d);
+
+  /* If we were given a two-vector permutation which just happened to
+     have both input vectors equal, we folded this into a one-vector
+     permutation.  There are several loongson patterns that are matched
+     via direct vec_select+vec_concat expansion, but we do not have
+     support in mips_expand_vec_perm_const_1 to guess the adjustment
+     that should be made for a single operand.  Just try again with
+     the original permutation.  */
+  if (!ok && which == 3)
+    {
+      d.op0 = operands[1];
+      d.op1 = operands[2];
+      d.one_vector_p = false;
+      memcpy (d.perm, orig_perm, MAX_VECT_LEN);
+      ok = mips_expand_vec_perm_const_1 (&d);
+    }
+
+  return ok;
 }
 
 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
@@ -16732,7 +16749,7 @@  mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt)
       gcc_unreachable ();
     }
 
-  memset (&d, 0, sizeof(d));
+  memset (&d, 0, sizeof (d));
   d.target = target;
   d.op0 = t1;
   d.op1 = t1;
@@ -16862,47 +16879,68 @@  mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx))
 {
   enum machine_mode vmode = GET_MODE (in);
   unsigned char perm2[2];
-  rtx tmp;
+  rtx last, next, fold, x;
   bool ok;
 
-  tmp = gen_reg_rtx (vmode);
+  last = in;
+  fold = gen_reg_rtx (vmode);
   switch (vmode)
     {
     case V2SFmode:
       /* Use PUL/PLU to produce { L, H } op { H, L }.
-         By reversing the pair order, rather a pure interleave high,
-	 we don't produce erroneous exceptional conditions.  */
+	 By reversing the pair order, rather than a pure interleave high,
+	 we avoid erroneous exceptional conditions that we might otherwise
+	 produce from the computation of H op H.  */
       perm2[0] = 1;
       perm2[1] = 2;
-      ok = expand_vselect_vconcat (tmp, in, in, perm2, 2);
+      ok = mips_expand_vselect_vconcat (fold, last, last, perm2, 2);
       gcc_assert (ok);
       break;
 
     case V2SImode:
       /* Use interleave to produce { H, L } op { H, H }.  */
-      emit_insn (gen_loongson_punpckhwd (tmp, in, in));
+      emit_insn (gen_loongson_punpckhwd (fold, last, last));
       break;
 
     case V4HImode:
       /* Perform the first reduction with interleave,
 	 and subsequent reductions with shifts.  */
-      emit_insn (gen_loongson_punpckhwd_hi (tmp, in, in));
-      emit_insn (gen (in, in, tmp));
-      emit_insn (gen_vec_shr_v4hi (tmp, in, force_reg (SImode, GEN_INT (16))));
+      emit_insn (gen_loongson_punpckhwd_hi (fold, last, last));
+
+      next = gen_reg_rtx (vmode);
+      emit_insn (gen (next, last, fold));
+      last = next;
+
+      fold = gen_reg_rtx (vmode);
+      x = force_reg (SImode, GEN_INT (16));
+      emit_insn (gen_vec_shr_v4hi (fold, last, x));
       break;
 
     case V8QImode:
-      emit_insn (gen_loongson_punpckhwd_qi (tmp, in, in));
-      emit_insn (gen (in, in, tmp));
-      emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (16))));
-      emit_insn (gen (in, in, tmp));
-      emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (8))));
+      emit_insn (gen_loongson_punpckhwd_qi (fold, last, last));
+
+      next = gen_reg_rtx (vmode);
+      emit_insn (gen (next, last, fold));
+      last = next;
+
+      fold = gen_reg_rtx (vmode);
+      x = force_reg (SImode, GEN_INT (16));
+      emit_insn (gen_vec_shr_v8qi (fold, last, x));
+
+      next = gen_reg_rtx (vmode);
+      emit_insn (gen (next, last, fold));
+      last = next;
+
+      fold = gen_reg_rtx (vmode);
+      x = force_reg (SImode, GEN_INT (8));
+      emit_insn (gen_vec_shr_v8qi (fold, last, x));
       break;
 
     default:
       gcc_unreachable ();
     }
-  emit_insn (gen (target, in, tmp));
+
+  emit_insn (gen (target, last, fold));
 }
 
 /* Expand a vector minimum/maximum.  */