Patchwork [06/10] mips: Improve support for vec_init.

login
register
mail settings
Submitter Richard Henderson
Date Dec. 21, 2011, 5 p.m.
Message ID <1324486822-18225-7-git-send-email-rth@redhat.com>
Download mbox | patch
Permalink /patch/132694/
State New
Headers show

Comments

Richard Henderson - Dec. 21, 2011, 5 p.m.
---
 gcc/config/mips/loongson.md   |   26 +++++
 gcc/config/mips/mips-ps-3d.md |   14 +--
 gcc/config/mips/mips.c        |  226 ++++++++++++++++++++++++++++++++++++-----
 3 files changed, 233 insertions(+), 33 deletions(-)
Richard Sandiford - Dec. 22, 2011, 8:05 p.m.
Richard Henderson <rth@redhat.com> writes:
> +/* Recognize broadcast patterns for the Loongson.  */
> +
> +static bool
> +mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d)
> +{
> +  unsigned i, elt;
> +  rtx t0, t1;
> +
> +  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
> +    return false;
> +  /* Note that we've already matched V2SI via punpck and V4HI via pshufh.  */
> +  if (d->vmode != V8QImode)
> +    return false;
> +  if (!d->one_vector_p)
> +    return false;
> +
> +  elt = d->perm[0];
> +  for (i = 1; i < 8; ++i)
> +    if (d->perm[i] != elt)
> +      return false;
> +
> +  if (d->testing_p)
> +    return true;
> +
> +  /* With one interleave we put two of the desired element adjacent.  */
> +  t0 = gen_reg_rtx (V8QImode);
> +  if (elt < 4)
> +    emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0));
> +  else
> +    emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0));
> +
> +  /* Shuffle that one HImode element into all locations.  */
> +  elt &= 3;
> +  elt *= 0x55;
> +  t1 = gen_reg_rtx (V4HImode);
> +  emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0),
> +				  force_reg (SImode, GEN_INT (elt))));
> +
> +  emit_move_insn (d->target, gen_lowpart (V8QImode, t1));
> +  return true;

Probably one of those where you had to stop following the rathole,
but could this be generalised to handle pairs in which perm[1]
== perm[0] + 8?  Something like:

> +  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
> +    return false;
> +  /* Note that we've already matched V2SI via punpck and V4HI via pshufh.  */
> +  if (d->vmode != V8QImode)
> +    return false;
> +  if (d->perm[1] != (d->one_vector_p ? : d->perm[0] : d->perm[0] + 8))
> +    return false;
> +
> +  for (i = 2; i < 8; ++i)
> +    if (d->perm[i] != d->perm[i & 1])
> +      return false;

then use both d->op0 and d->op1 in the pack?

> +  memset (&d, 0, sizeof(d));

missing space

Richard
Richard Henderson - Dec. 22, 2011, 8:28 p.m.
On 12/22/2011 12:05 PM, Richard Sandiford wrote:
> Probably one of those where you had to stop following the rathole,
> but could this be generalised to handle pairs in which perm[1]
> == perm[0] + 8?  Something like:
> 
>> +  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
>> +    return false;
>> +  /* Note that we've already matched V2SI via punpck and V4HI via pshufh.  */
>> +  if (d->vmode != V8QImode)
>> +    return false;
>> +  if (d->perm[1] != (d->one_vector_p ? : d->perm[0] : d->perm[0] + 8))
>> +    return false;
>> +
>> +  for (i = 2; i < 8; ++i)
>> +    if (d->perm[i] != d->perm[i & 1])
>> +      return false;
> 
> then use both d->op0 and d->op1 in the pack?

Hum.  I suppose.  But that's starting to look more like the i386
expand_vec_perm_interleave2 + valid_perm_using_mode_p, or something.

I'd rather leave this patch alone and do a follow-up if you want.


r~
Richard Sandiford - Dec. 22, 2011, 8:45 p.m.
Richard Henderson <rth@redhat.com> writes:
> On 12/22/2011 12:05 PM, Richard Sandiford wrote:
>> Probably one of those where you had to stop following the rathole,
>> but could this be generalised to handle pairs in which perm[1]
>> == perm[0] + 8?  Something like:
>> 
>>> +  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
>>> +    return false;
>>> +  /* Note that we've already matched V2SI via punpck and V4HI via pshufh.  */
>>> +  if (d->vmode != V8QImode)
>>> +    return false;
>>> +  if (d->perm[1] != (d->one_vector_p ? : d->perm[0] : d->perm[0] + 8))
>>> +    return false;
>>> +
>>> +  for (i = 2; i < 8; ++i)
>>> +    if (d->perm[i] != d->perm[i & 1])
>>> +      return false;
>> 
>> then use both d->op0 and d->op1 in the pack?
>
> Hum.  I suppose.  But that's starting to look more like the i386
> expand_vec_perm_interleave2 + valid_perm_using_mode_p, or something.
>
> I'd rather leave this patch alone and do a follow-up if you want.

Sure, and no need for the follow-up.  I was just curious.

Richard

Patch

diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md
index 8404bf0..c80a45a 100644
--- a/gcc/config/mips/loongson.md
+++ b/gcc/config/mips/loongson.md
@@ -25,6 +25,7 @@ 
   UNSPEC_LOONGSON_PCMPGT
   UNSPEC_LOONGSON_PEXTR
   UNSPEC_LOONGSON_PINSRH
+  UNSPEC_LOONGSON_VINIT
   UNSPEC_LOONGSON_PMADD
   UNSPEC_LOONGSON_PMOVMSK
   UNSPEC_LOONGSON_PMULHU
@@ -83,6 +84,9 @@ 
 ;; but with twice as many elements.
 (define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")])
 
+;; Given a vector type T, the inner mode.
+(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
+
 ;; The Loongson instruction suffixes corresponding to the conversions
 ;; specified by V_half_width.
 (define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")])
@@ -119,6 +123,28 @@ 
   DONE;
 })
 
+;; Helper for vec_init.  Initialize element 0 of the output from the input.
+;; All other elements are undefined.
+(define_insn "loongson_vec_init1_<mode>"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+	(unspec:VHB [(truncate:<V_inner>
+		       (match_operand:DI 1 "reg_or_0_operand" "Jd"))]
+		    UNSPEC_LOONGSON_VINIT))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "dmtc1\t%z1,%0"
+  [(set_attr "move_type" "mtc")
+   (set_attr "mode" "DI")])
+
+;; Helper for vec_initv2si.
+(define_insn "*vec_concatv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=f")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "register_operand" "f")
+	  (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpcklwd\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
 ;; Instruction patterns for SIMD instructions.
 
 ;; Pack with signed saturation.
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index fbbb7b0..7c3fe85 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -259,13 +259,11 @@ 
    (match_operand:V2SF 1 "")]
   "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
 {
-  rtx op0 = force_reg (SFmode, XVECEXP (operands[1], 0, 0));
-  rtx op1 = force_reg (SFmode, XVECEXP (operands[1], 0, 1));
-  emit_insn (gen_vec_initv2sf_internal (operands[0], op0, op1));
+  mips_expand_vector_init (operands[0], operands[1]);
   DONE;
 })
 
-(define_insn "vec_initv2sf_internal"
+(define_insn "vec_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand" "=f")
 	(vec_concat:V2SF
 	 (match_operand:SF 1 "register_operand" "f")
@@ -315,7 +313,7 @@ 
   /* We don't have an insert instruction, so we duplicate the float, and
      then use a PUL instruction.  */
   rtx temp = gen_reg_rtx (V2SFmode);
-  emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1]));
+  emit_insn (gen_vec_concatv2sf (temp, operands[1], operands[1]));
   operands[1] = temp;
   operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2);
 })
@@ -328,11 +326,9 @@ 
   "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
 {
   if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_vec_initv2sf_internal (operands[0], operands[1],
-	       operands[2]));
+    emit_insn (gen_vec_concatv2sf (operands[0], operands[1], operands[2]));
   else
-    emit_insn (gen_vec_initv2sf_internal (operands[0], operands[2],
-	       operands[1]));
+    emit_insn (gen_vec_concatv2sf (operands[0], operands[2], operands[1]));
   DONE;
 })
 
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index b3a3ad0..45b8454 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -15932,30 +15932,6 @@  mips_conditional_register_usage (void)
     }
 }
 
-/* Initialize vector TARGET to VALS.  */
-
-void
-mips_expand_vector_init (rtx target, rtx vals)
-{
-  enum machine_mode mode;
-  enum machine_mode inner;
-  unsigned int i, n_elts;
-  rtx mem;
-
-  mode = GET_MODE (target);
-  inner = GET_MODE_INNER (mode);
-  n_elts = GET_MODE_NUNITS (mode);
-
-  gcc_assert (VECTOR_MODE_P (mode));
-
-  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
-  for (i = 0; i < n_elts; i++)
-    emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
-                    XVECEXP (vals, 0, i));
-
-  emit_move_insn (target, mem);
-}
-
 /* When generating MIPS16 code, we want to allocate $24 (T_REG) before
    other registers for instructions for which it is possible.  This
    encourages the compiler to use CMP in cases where an XOR would
@@ -16475,6 +16451,48 @@  mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Recognize broadcast patterns for the Loongson.  */
+
+static bool
+mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d)
+{
+  unsigned i, elt;
+  rtx t0, t1;
+
+  if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+    return false;
+  /* Note that we've already matched V2SI via punpck and V4HI via pshufh.  */
+  if (d->vmode != V8QImode)
+    return false;
+  if (!d->one_vector_p)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < 8; ++i)
+    if (d->perm[i] != elt)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* With one interleave we put two of the desired element adjacent.  */
+  t0 = gen_reg_rtx (V8QImode);
+  if (elt < 4)
+    emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0));
+  else
+    emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0));
+
+  /* Shuffle that one HImode element into all locations.  */
+  elt &= 3;
+  elt *= 0x55;
+  t1 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0),
+				  force_reg (SImode, GEN_INT (elt))));
+
+  emit_move_insn (d->target, gen_lowpart (V8QImode, t1));
+  return true;
+}
+
 static bool
 mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 {
@@ -16506,6 +16524,8 @@  mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
     return true;
   if (mips_expand_vpc_loongson_pshufh (d))
     return true;
+  if (mips_expand_vpc_loongson_bcast (d))
+    return true;
   return false;
 }
 
@@ -16656,6 +16676,164 @@  mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
 
   emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
 }
+
+/* A subroutine of mips_expand_vec_init, match constant vector elements.  */
+
+static inline bool
+mips_constant_elt_p (rtx x)
+{
+  return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
+}
+
+/* A subroutine of mips_expand_vec_init, expand via broadcast.  */
+
+static void
+mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt)
+{
+  struct expand_vec_perm_d d;
+  rtx t1;
+  bool ok;
+
+  if (elt != const0_rtx)
+    elt = force_reg (GET_MODE_INNER (vmode), elt);
+  if (REG_P (elt))
+    elt = gen_lowpart (DImode, elt);
+
+  t1 = gen_reg_rtx (vmode);
+  switch (vmode)
+    {
+    case V8QImode:
+      emit_insn (gen_loongson_vec_init1_v8qi (t1, elt));
+      break;
+    case V4HImode:
+      emit_insn (gen_loongson_vec_init1_v4hi (t1, elt));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  memset (&d, 0, sizeof(d));
+  d.target = target;
+  d.op0 = t1;
+  d.op1 = t1;
+  d.vmode = vmode;
+  d.nelt = GET_MODE_NUNITS (vmode);
+  d.one_vector_p = true;
+
+  ok = mips_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+/* A subroutine of mips_expand_vec_init, replacing all of the non-constant
+   elements of VALS with zeros, copy the constant vector to TARGET.  */
+
+static void
+mips_expand_vi_constant (enum machine_mode vmode, unsigned nelt,
+			 rtx target, rtx vals)
+{
+  rtvec vec = shallow_copy_rtvec (XVEC (vals, 0));
+  unsigned i;
+
+  for (i = 0; i < nelt; ++i)
+    {
+      if (!mips_constant_elt_p (RTVEC_ELT (vec, i)))
+	RTVEC_ELT (vec, i) = const0_rtx;
+    }
+
+  emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
+}
+
+
+/* A subroutine of mips_expand_vec_init, expand via pinsrh.  */
+
+static void
+mips_expand_vi_loongson_one_pinsrh (rtx target, rtx vals, unsigned one_var)
+{
+  mips_expand_vi_constant (V4HImode, 4, target, vals);
+
+  emit_insn (gen_vec_setv4hi (target, target, XVECEXP (vals, 0, one_var),
+			      GEN_INT (one_var)));
+}
+
+/* A subroutine of mips_expand_vec_init, expand anything via memory.  */
+
+static void
+mips_expand_vi_general (enum machine_mode vmode, enum machine_mode imode,
+			unsigned nelt, unsigned nvar, rtx target, rtx vals)
+{
+  rtx mem = assign_stack_temp (vmode, GET_MODE_SIZE (vmode), 0);
+  unsigned int i, isize = GET_MODE_SIZE (imode);
+
+  if (nvar < nelt)
+    mips_expand_vi_constant (vmode, nelt, mem, vals);
+
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx x = XVECEXP (vals, 0, i);
+      if (!mips_constant_elt_p (x))
+	emit_move_insn (adjust_address (mem, imode, i * isize), x);
+    }
+
+  emit_move_insn (target, mem);
+}
+
+/* Expand a vector initialization.  */
+
+void
+mips_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode vmode = GET_MODE (target);
+  enum machine_mode imode = GET_MODE_INNER (vmode);
+  unsigned i, nelt = GET_MODE_NUNITS (vmode);
+  unsigned nvar = 0, one_var = -1u;
+  bool all_same = true;
+  rtx x;
+
+  for (i = 0; i < nelt; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!mips_constant_elt_p (x))
+	nvar++, one_var = i;
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  /* Load constants from the pool, or whatever's handy.  */
+  if (nvar == 0)
+    {
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
+      return;
+    }
+
+  /* For two-part initialization, always use CONCAT.  */
+  if (nelt == 2)
+    {
+      rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0));
+      rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1));
+      x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
+      emit_insn (gen_rtx_SET (VOIDmode, target, x));
+      return;
+    }
+
+  /* Loongson is the only cpu with vectors with more elements.  */
+  gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS);
+
+  /* If all values are identical, broadcast the value.  */
+  if (all_same)
+    {
+      mips_expand_vi_broadcast (vmode, target, XVECEXP (vals, 0, 0));
+      return;
+    }
+
+  /* If we've only got one non-variable V4HImode, use PINSRH.  */
+  if (nvar == 1 && vmode == V4HImode)
+    {
+      mips_expand_vi_loongson_one_pinsrh (target, vals, one_var);
+      return;
+    }
+
+  mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals);
+}
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP