diff mbox

[ia64] Support vec_perm_const

Message ID 4EF130FF.8070802@redhat.com
State New
Headers show

Commit Message

Richard Henderson Dec. 21, 2011, 1:06 a.m. UTC
Some improvements to the previous versions, which include more 
permutation patterns actually generated by the vectorizer in the
course of the testsuite.

Tested on ia64-linux and committed.
commit b155a608c2332b4bf64bcfe07ed059178044f97a
Author: rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Wed Dec 21 01:03:00 2011 +0000

    ia64: Implement vec_perm_const.
    
            * config/ia64/ia64.c (MAX_VECT_LEN): New.
            (struct expand_vec_perm_d): New.
            (TARGET_VECTORIZE_VEC_PERM_CONST_OK): New.
            (ia64_unpack_assemble): Use ia64_expand_vec_perm_const_1.
            (expand_vselect, expand_vselect_vconcat): New.
            (expand_vec_perm_identity, expand_vec_perm_shrp): New.
            (expand_vec_perm_1, expand_vec_perm_broadcast): New.
            (expand_vec_perm_interleave_2, expand_vec_perm_v4hi_5): New.
            (ia64_expand_vec_perm_const_1, ia64_expand_vec_perm_const): New.
            (ia64_vectorize_vec_perm_const_ok): New.
            (ia64_expand_vec_setv2sf, ia64_expand_vec_perm_even_odd): New.
            * config/ia64/ia64-protos.h: Update.
            * config/ia64/vect.md (VEC): New mode iterator.
            (vecint): New mode attribute.
            (vec_interleave_lowv8qi, vec_interleave_highv8qi): Privatize with '*'.
            (vec_interleave_lowv4hi, vec_interleave_highv4hi): Likewise.
            (vec_interleave_lowv2si, vec_interleave_highv2si): Likewise.
            (vec_interleave_lowv2sf, vec_interleave_highv2sf): Likewise.
            (mix1_even, mix1_odd, mux1_alt): Likewise.
            (mux1_brcst_qi): Remove '*' from name.
            (vec_extract_evenv8qi, vec_extract_oddv8qi): Remove.
            (vec_extract_evenv4hi, vec_extract_oddv4hi): Remove.
            (vec_extract_evenv2si, vec_extract_oddv2si): Remove.
            (vec_extract_evenv2sf, vec_extract_oddv2sf): Remove.
            (vec_extract_evenodd_helper): Remove.
            (vec_setv2sf): Use ia64_expand_vec_setv2sf.
            (vec_pack_trunc_v4hi): Use ia64_expand_vec_perm_even_odd.
            (vec_pack_trunc_v2si): Likewise.
            (vec_perm_const<VEC>): New.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@182564 138bc75d-0d04-0410-961f-82ee72b054a4
diff mbox

Patch

diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index a680c31..f7bd4c6 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -61,6 +61,10 @@  extern int ia64_hard_regno_rename_ok (int, int);
 extern enum reg_class ia64_secondary_reload_class (enum reg_class,
 						   enum machine_mode, rtx);
 extern const char *get_bundle_name (int);
+
+extern void ia64_expand_vec_perm_even_odd (rtx, rtx, rtx, int);
+extern bool ia64_expand_vec_perm_const (rtx op[4]);
+extern void ia64_expand_vec_setv2sf (rtx op[3]);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index b970607..1635a7e 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -330,6 +330,24 @@  static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
 static enum machine_mode ia64_get_reg_raw_mode (int regno);
 static section * ia64_hpux_function_section (tree, enum node_frequency,
 					     bool, bool);
+
+static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+					      const unsigned char *sel);
+
+#define MAX_VECT_LEN	8
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  enum machine_mode vmode;
+  unsigned char nelt;
+  bool one_operand_p;
+  bool testing_p; 
+};
+
+static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+
 
 /* Table of valid machine attributes.  */
 static const struct attribute_spec ia64_attribute_table[] =
@@ -626,6 +644,9 @@  static const struct attribute_spec ia64_attribute_table[] =
 #undef TARGET_DELAY_VARTRACK
 #define TARGET_DELAY_VARTRACK true
 
+#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
+#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 typedef enum
@@ -2027,28 +2048,28 @@  ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
 void
 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
 {
-  enum machine_mode mode = GET_MODE (lo);
-  rtx (*gen) (rtx, rtx, rtx);
-  rtx x;
+  enum machine_mode vmode = GET_MODE (lo);
+  unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
+  struct expand_vec_perm_d d;
+  bool ok;
 
-  switch (mode)
+  d.target = gen_lowpart (vmode, out);
+  d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
+  d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
+  d.vmode = vmode;
+  d.nelt = nelt;
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  high = (highp ? nelt / 2 : 0);
+  for (i = 0; i < nelt / 2; ++i)
     {
-    case V8QImode:
-      gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
-      break;
-    case V4HImode:
-      gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
-      break;
-    default:
-      gcc_unreachable ();
+      d.perm[i * 2] = i + high;
+      d.perm[i * 2 + 1] = i + high + nelt;
     }
 
-  x = gen_lowpart (mode, out);
-  if (TARGET_BIG_ENDIAN)
-    x = gen (x, hi, lo);
-  else
-    x = gen (x, lo, hi);
-  emit_insn (x);
+  ok = ia64_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
 }
 
 /* Return a vector of the sign-extension of VEC.  */
@@ -11046,5 +11067,557 @@  ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
 {
   return NULL;
 }
+
+/* Construct (set target (vec_select op0 (parallel perm))) and
+   return true if that's a valid instruction in the active ISA.  */
+
+static bool
+expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+{
+  rtx rperm[MAX_VECT_LEN], x;
+  unsigned i;
+
+  for (i = 0; i < nelt; ++i)
+    rperm[i] = GEN_INT (perm[i]);
+
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
+  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
+  x = gen_rtx_SET (VOIDmode, target, x);
+
+  x = emit_insn (x);
+  if (recog_memoized (x) < 0)
+    {
+      remove_insn (x);
+      return false;
+    }
+  return true;
+}
+
+/* Similar, but generate a vec_concat from op0 and op1 as well.  */
+
+static bool
+expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+			const unsigned char *perm, unsigned nelt)
+{
+  enum machine_mode v2mode;
+  rtx x;
+
+  v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
+  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
+  return expand_vselect (target, x, perm, nelt);
+}
+
+/* Try to expand a no-op permutation.  */
+
+static bool
+expand_vec_perm_identity (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt;
+
+  for (i = 0; i < nelt; ++i)
+    if (d->perm[i] != i)
+      return false;
+
+  if (!d->testing_p)
+    emit_move_insn (d->target, d->op0);
+
+  return true;
+}
+
+/* Try to expand D via a shrp instruction.  */
+
+static bool
+expand_vec_perm_shrp (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt, shift, mask;
+  rtx tmp, op0, op1;;
+
+  /* ??? Don't force V2SFmode into the integer registers.  */
+  if (d->vmode == V2SFmode)
+    return false;
+
+  mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
+
+  shift = d->perm[0];
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != ((shift + i) & mask))
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
+
+  /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
+  gcc_assert (IN_RANGE (shift, 1, 63));
+
+  /* Recall that big-endian elements are numbered starting at the top of
+     the register.  Ideally we'd have a shift-left-pair.  But since we
+     don't, convert to a shift the other direction.  */
+  if (BYTES_BIG_ENDIAN)
+    shift = 64 - shift;
+
+  tmp = gen_reg_rtx (DImode);
+  op0 = (shift < nelt ? d->op0 : d->op1);
+  op1 = (shift < nelt ? d->op1 : d->op0);
+  op0 = gen_lowpart (DImode, op0);
+  op1 = gen_lowpart (DImode, op1);
+  emit_insn (gen_shrp (tmp, op0, op1, GEN_INT (shift)));
+
+  emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
+  return true;
+}
+
+/* Try to instantiate D in a single instruction.  */
+
+static bool
+expand_vec_perm_1 (struct expand_vec_perm_d *d)
+{     
+  unsigned i, nelt = d->nelt;
+  unsigned char perm2[MAX_VECT_LEN];
+
+  /* Try single-operand selections.  */
+  if (d->one_operand_p)
+    {
+      if (expand_vec_perm_identity (d))
+	return true;
+      if (expand_vselect (d->target, d->op0, d->perm, nelt))
+	return true;
+    }
+
+  /* Try two operand selections.  */
+  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
+    return true;
+
+  /* Recognize interleave style patterns with reversed operands.  */
+  if (!d->one_operand_p)
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned e = d->perm[i];
+	  if (e >= nelt)
+	    e -= nelt;
+	  else
+	    e += nelt;
+	  perm2[i] = e;
+	}
+
+      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+	return true;
+    }
+
+  if (expand_vec_perm_shrp (d))
+    return true;
+
+  /* ??? Look for deposit-like permutations where most of the result 
+     comes from one vector unchanged and the rest comes from a 
+     sequential hunk of the other vector.  */
+
+  return false;
+}
+
+/* Pattern match broadcast permutations.  */
+
+static bool
+expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
+{
+  unsigned i, elt, nelt = d->nelt;
+  unsigned char perm2[2];
+  rtx temp;
+  bool ok;
+
+  if (!d->one_operand_p)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != elt)
+      return false;
+
+  switch (d->vmode)
+    {
+    case V2SImode:
+    case V2SFmode:
+      /* Implementable by interleave.  */
+      perm2[0] = elt;
+      perm2[1] = elt + 2;
+      ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
+      gcc_assert (ok);
+      break;
+
+    case V8QImode:
+      /* Implementable by extract + broadcast.  */
+      if (BYTES_BIG_ENDIAN)
+	elt = 7 - elt;
+      elt *= BITS_PER_UNIT;
+      temp = gen_reg_rtx (DImode);
+      emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
+			    GEN_INT (elt), GEN_INT (8)));
+      emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
+      break;
+
+    case V4HImode:
+      /* Should have been matched directly by vec_select.  */
+    default:
+      gcc_unreachable ();
+    }
+
+  return true;
+}
+
+/* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
+   two vector permutation into a single vector permutation by using
+   an interleave operation to merge the vectors.  */
+
+static bool
+expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dremap, dfinal;
+  unsigned char remap[2 * MAX_VECT_LEN];
+  unsigned contents, i, nelt, nelt2;
+  unsigned h0, h1, h2, h3;
+  rtx seq;
+  bool ok;
+
+  if (d->one_operand_p)
+    return false;
+
+  nelt = d->nelt;
+  nelt2 = nelt / 2;
+
+  /* Examine from whence the elements come.  */
+  contents = 0;
+  for (i = 0; i < nelt; ++i)
+    contents |= 1u << d->perm[i];
+
+  memset (remap, 0xff, sizeof (remap));
+  dremap = *d;
+
+  h0 = (1u << nelt2) - 1;
+  h1 = h0 << nelt2;
+  h2 = h0 << nelt;
+  h3 = h0 << (nelt + nelt2);
+  
+  if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if ((contents & 0x5555) == contents)	/* mix even elements */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
+    {
+      unsigned shift = ctz_hwi (contents);
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned which = (i + shift) & (2 * nelt - 1);
+	  remap[which] = i;
+	  dremap.perm[i] = which;
+	}
+    }
+  else
+    return false;
+
+  /* Use the remapping array set up above to move the elements from their
+     swizzled locations into their final destinations.  */
+  dfinal = *d;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = remap[d->perm[i]];
+      gcc_assert (e < nelt);
+      dfinal.perm[i] = e;
+    }
+  dfinal.op0 = gen_reg_rtx (dfinal.vmode);
+  dfinal.op1 = dfinal.op0;
+  dfinal.one_operand_p = true;
+  dremap.target = dfinal.op0;
+
+  /* Test if the final remap can be done with a single insn.  For V4HImode
+     this *will* succeed.  For V8QImode or V2SImode it may not.  */
+  start_sequence ();
+  ok = expand_vec_perm_1 (&dfinal);
+  seq = get_insns ();
+  end_sequence ();
+  if (!ok)
+    return false;
+  if (d->testing_p)
+    return true;
+
+  ok = expand_vec_perm_1 (&dremap);
+  gcc_assert (ok);
+
+  emit_insn (seq);
+  return true;
+}
+
+/* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
+   constant permutation via two mux2 and a merge.  */
+
+static bool
+expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
+{
+  unsigned char perm2[4];
+  rtx rmask[4];
+  unsigned i;
+  rtx t0, t1, mask, x;
+  bool ok;
+
+  if (d->vmode != V4HImode || d->one_operand_p)
+    return false;
+  if (d->testing_p)
+    return true;
+
+  for (i = 0; i < 4; ++i)
+    {
+      perm2[i] = d->perm[i] & 3;
+      rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
+    }
+  mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
+  mask = force_reg (V4HImode, mask);
+
+  t0 = gen_reg_rtx (V4HImode);
+  t1 = gen_reg_rtx (V4HImode);
+
+  ok = expand_vselect (t0, d->op0, perm2, 4);
+  gcc_assert (ok);
+  ok = expand_vselect (t1, d->op1, perm2, 4);
+  gcc_assert (ok);
+
+  x = gen_rtx_AND (V4HImode, mask, t0);
+  emit_insn (gen_rtx_SET (VOIDmode, t0, x));
+
+  x = gen_rtx_NOT (V4HImode, mask);
+  x = gen_rtx_AND (V4HImode, x, t1);
+  emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+  x = gen_rtx_IOR (V4HImode, t0, t1);
+  emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
+
+  return true;
+}
+
+/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
+   With all of the interface bits taken care of, perform the expansion
+   in D and return true on success.  */
+
+static bool
+ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+{
+  if (expand_vec_perm_1 (d))
+    return true;
+  if (expand_vec_perm_broadcast (d))
+    return true;
+  if (expand_vec_perm_interleave_2 (d))
+    return true;
+  if (expand_vec_perm_v4hi_5 (d))
+    return true;
+  return false;
+}
+
+bool
+ia64_expand_vec_perm_const (rtx operands[4])
+{
+  struct expand_vec_perm_d d;
+  unsigned char perm[MAX_VECT_LEN];
+  int i, nelt, which;
+  rtx sel;
+
+  d.target = operands[0];
+  d.op0 = operands[1];
+  d.op1 = operands[2];
+  sel = operands[3];
+
+  d.vmode = GET_MODE (d.target);
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+
+  gcc_assert (GET_CODE (sel) == CONST_VECTOR);
+  gcc_assert (XVECLEN (sel, 0) == nelt);
+  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+
+  for (i = which = 0; i < nelt; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      int ei = INTVAL (e) & (2 * nelt - 1);
+
+      which |= (ei < nelt ? 1 : 2);
+      d.perm[i] = ei;
+      perm[i] = ei;
+    }
+
+  switch (which)
+    {
+    default:
+      gcc_unreachable();
+
+    case 3:
+      if (!rtx_equal_p (d.op0, d.op1))
+	{
+	  d.one_operand_p = false;
+	  break;
+	}
+
+      /* The elements of PERM do not suggest that only the first operand
+	 is used, but both operands are identical.  Allow easier matching
+	 of the permutation by folding the permutation into the single
+	 input vector.  */
+      for (i = 0; i < nelt; ++i)
+	if (d.perm[i] >= nelt)
+	  d.perm[i] -= nelt;
+      /* FALLTHRU */
+
+    case 1:
+      d.op1 = d.op0;
+      d.one_operand_p = true;
+      break;
+
+    case 2:
+      for (i = 0; i < nelt; ++i)
+        d.perm[i] -= nelt;
+      d.op0 = d.op1;
+      d.one_operand_p = true;
+      break;
+    }
+
+  if (ia64_expand_vec_perm_const_1 (&d))
+    return true;
+
+  /* If the mask says both arguments are needed, but they are the same,
+     the above tried to expand with one_operand_p true.  If that didn't
+     work, retry with one_operand_p false, as that's what we used in _ok.  */
+  if (which == 3 && d.one_operand_p)
+    {
+      memcpy (d.perm, perm, sizeof (perm));
+      d.one_operand_p = false;
+      return ia64_expand_vec_perm_const_1 (&d);
+    }
+
+  return false;
+}
+
+/* Implement targetm.vectorize.vec_perm_const_ok.  */
+
+static bool
+ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+				  const unsigned char *sel)
+{
+  struct expand_vec_perm_d d;
+  unsigned int i, nelt, which;
+  bool ret;
+
+  d.vmode = vmode;
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = true;
+
+  /* Extract the values from the vector CST into the permutation
+     array in D.  */
+  memcpy (d.perm, sel, nelt);
+  for (i = which = 0; i < nelt; ++i)
+    {
+      unsigned char e = d.perm[i];
+      gcc_assert (e < 2 * nelt);
+      which |= (e < nelt ? 1 : 2);
+    }
+
+  /* For all elements from second vector, fold the elements to first.  */
+  if (which == 2)
+    for (i = 0; i < nelt; ++i)
+      d.perm[i] -= nelt;
+
+  /* Check whether the mask can be applied to the vector type.  */
+  d.one_operand_p = (which != 3);
+
+  /* Otherwise we have to go through the motions and see if we can
+     figure out how to generate the requested permutation.  */
+  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  if (!d.one_operand_p)
+    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+  start_sequence ();
+  ret = ia64_expand_vec_perm_const_1 (&d);
+  end_sequence ();
+
+  return ret;
+}
+
+void
+ia64_expand_vec_setv2sf (rtx operands[3])
+{
+  struct expand_vec_perm_d d;
+  unsigned int which;
+  bool ok;
+  
+  d.target = operands[0];
+  d.op0 = operands[0];
+  d.op1 = gen_reg_rtx (V2SFmode);
+  d.vmode = V2SFmode;
+  d.nelt = 2;
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  which = INTVAL (operands[2]);
+  gcc_assert (which <= 1);
+  d.perm[0] = 1 - which;
+  d.perm[1] = which + 2;
+
+  emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
+
+  ok = ia64_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
+
+void
+ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
+{
+  struct expand_vec_perm_d d;
+  enum machine_mode vmode = GET_MODE (target);
+  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
+  bool ok;
+
+  d.target = target;
+  d.op0 = op0;
+  d.op1 = op1;
+  d.vmode = vmode;
+  d.nelt = nelt;
+  d.one_operand_p = false;
+  d.testing_p = false;
+
+  for (i = 0; i < nelt; ++i)
+    d.perm[i] = i * 2 + odd;
+
+  ok = ia64_expand_vec_perm_const_1 (&d);
+  gcc_assert (ok);
+}
 
 #include "gt-ia64.h"
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index 2f068eb..aa77b01 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -20,11 +20,14 @@ 
 
 ;; Integer vector operations
 
+(define_mode_iterator VEC [V8QI V4HI V2SI V2SF])
 (define_mode_iterator VECINT [V8QI V4HI V2SI])
 (define_mode_iterator VECINT12 [V8QI V4HI])
 (define_mode_iterator VECINT24 [V4HI V2SI])
 (define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
 (define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")])
+(define_mode_attr vecint
+  [(V8QI "V8QI") (V4HI "V4HI") (V2SI "V2SI") (V2SF "V2SI")])
 
 (define_expand "mov<mode>"
   [(set (match_operand:VECINT 0 "general_operand" "")
@@ -756,7 +759,7 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "vec_interleave_lowv8qi"
+(define_insn "*vec_interleave_lowv8qi"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
@@ -776,7 +779,7 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "vec_interleave_highv8qi"
+(define_insn "*vec_interleave_highv8qi"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
@@ -796,7 +799,7 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "mix1_even"
+(define_insn "*mix1_even"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
@@ -816,7 +819,7 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "mix1_odd"
+(define_insn "*mix1_odd"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
@@ -872,7 +875,7 @@ 
   "mux1 %0 = %1, @shuf"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "mux1_alt"
+(define_insn "*mux1_alt"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (match_operand:V8QI 1 "gr_register_operand" "r")
@@ -900,7 +903,7 @@ 
   "mux1 %0 = %1, @brcst"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "*mux1_brcst_qi"
+(define_insn "mux1_brcst_qi"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_duplicate:V8QI
 	  (match_operand:QI 1 "gr_register_operand" "r")))]
@@ -908,31 +911,7 @@ 
   "mux1 %0 = %1, @brcst"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_expand "vec_extract_evenv8qi"
-  [(match_operand:V8QI 0 "gr_register_operand" "")
-   (match_operand:V8QI 1 "gr_register_operand" "")
-   (match_operand:V8QI 2 "gr_register_operand" "")]
-  ""
-{
-  rtx temp = gen_reg_rtx (V8QImode);
-  emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
-  emit_insn (gen_mux1_alt (operands[0], temp));
-  DONE;
-})
-
-(define_expand "vec_extract_oddv8qi"
-  [(match_operand:V8QI 0 "gr_register_operand" "")
-   (match_operand:V8QI 1 "gr_register_operand" "")
-   (match_operand:V8QI 2 "gr_register_operand" "")]
-  ""
-{
-  rtx temp = gen_reg_rtx (V8QImode);
-  emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
-  emit_insn (gen_mux1_alt (operands[0], temp));
-  DONE;
-})
-
-(define_insn "vec_interleave_lowv4hi"
+(define_insn "*vec_interleave_lowv4hi"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
@@ -950,7 +929,7 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "vec_interleave_highv4hi"
+(define_insn "*vec_interleave_highv4hi"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
@@ -1034,38 +1013,6 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_expand "vec_extract_evenodd_helper"
-  [(set (match_operand:V4HI 0 "gr_register_operand" "")
-	(vec_select:V4HI
-	  (match_operand:V4HI 1 "gr_register_operand" "")
-	  (parallel [(const_int 0) (const_int 2)
-		     (const_int 1) (const_int 3)])))]
-  "")
-
-(define_expand "vec_extract_evenv4hi"
-  [(match_operand:V4HI 0 "gr_register_operand")
-   (match_operand:V4HI 1 "gr_reg_or_0_operand")
-   (match_operand:V4HI 2 "gr_reg_or_0_operand")]
-  ""
-{
-  rtx temp = gen_reg_rtx (V4HImode);
-  emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
-  emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
-  DONE;
-})
-
-(define_expand "vec_extract_oddv4hi"
-  [(match_operand:V4HI 0 "gr_register_operand")
-   (match_operand:V4HI 1 "gr_reg_or_0_operand")
-   (match_operand:V4HI 2 "gr_reg_or_0_operand")]
-  ""
-{
-  rtx temp = gen_reg_rtx (V4HImode);
-  emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
-  emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
-  DONE;
-})
-
 (define_insn "*mux2_brcst_hi"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_duplicate:V4HI
@@ -1074,7 +1021,7 @@ 
   "mux2 %0 = %1, 0"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "vec_interleave_lowv2si"
+(define_insn "*vec_interleave_lowv2si"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
@@ -1091,7 +1038,7 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "vec_interleave_highv2si"
+(define_insn "*vec_interleave_highv2si"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
@@ -1108,36 +1055,6 @@ 
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_expand "vec_extract_evenv2si"
-  [(match_operand:V2SI 0 "gr_register_operand" "")
-   (match_operand:V2SI 1 "gr_register_operand" "")
-   (match_operand:V2SI 2 "gr_register_operand" "")]
-  ""
-{
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
-					    operands[2]));
-  else
-    emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
-					   operands[2]));
-  DONE;
-})
-
-(define_expand "vec_extract_oddv2si"
-  [(match_operand:V2SI 0 "gr_register_operand" "")
-   (match_operand:V2SI 1 "gr_register_operand" "")
-   (match_operand:V2SI 2 "gr_register_operand" "")]
-  ""
-{
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
-					   operands[2]));
-  else
-    emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
-					    operands[2]));
-  DONE;
-})
-
 (define_expand "vec_initv2si"
   [(match_operand:V2SI 0 "gr_register_operand" "")
    (match_operand 1 "" "")]
@@ -1479,7 +1396,7 @@ 
 }
   [(set_attr "itanium_class" "fmisc")])
 
-(define_insn "vec_interleave_highv2sf"
+(define_insn "*vec_interleave_highv2sf"
   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
 	(vec_select:V2SF
 	  (vec_concat:V4SF
@@ -1496,7 +1413,7 @@ 
 }
   [(set_attr "itanium_class" "fmisc")])
 
-(define_insn "vec_interleave_lowv2sf"
+(define_insn "*vec_interleave_lowv2sf"
   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
 	(vec_select:V2SF
 	  (vec_concat:V4SF
@@ -1530,58 +1447,13 @@ 
 }
   [(set_attr "itanium_class" "fmisc")])
 
-(define_expand "vec_extract_evenv2sf"
-  [(match_operand:V2SF 0 "gr_register_operand" "")
-   (match_operand:V2SF 1 "gr_register_operand" "")
-   (match_operand:V2SF 2 "gr_register_operand" "")]
-  ""
-{
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
-					    operands[2]));
-  else
-    emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
-					   operands[2]));
-  DONE;
-})
-
-(define_expand "vec_extract_oddv2sf"
-  [(match_operand:V2SF 0 "gr_register_operand" "")
-   (match_operand:V2SF 1 "gr_register_operand" "")
-   (match_operand:V2SF 2 "gr_register_operand" "")]
-  ""
-{
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
-					   operands[2]));
-  else
-    emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
-					    operands[2]));
-  DONE;
-})
-
 (define_expand "vec_setv2sf"
   [(match_operand:V2SF 0 "fr_register_operand" "")
    (match_operand:SF 1 "fr_register_operand" "")
    (match_operand 2 "const_int_operand" "")]
   ""
 {
-  rtx op0 = operands[0];
-  rtx tmp = gen_reg_rtx (V2SFmode);
-
-  emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
-
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_insn (gen_fmix_lr (op0, tmp, op0));
-      break;
-    case 1:
-      emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
-      break;
-    default:
-      gcc_unreachable ();
-    }
+  ia64_expand_vec_setv2sf (operands);
   DONE;
 })
 
@@ -1703,10 +1575,7 @@ 
 {
   rtx op1 = gen_lowpart (V8QImode, operands[1]);
   rtx op2 = gen_lowpart (V8QImode, operands[2]);
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
-  else
-    emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
+  ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
   DONE;
 })
 
@@ -1718,13 +1587,23 @@ 
 {
   rtx op1 = gen_lowpart (V4HImode, operands[1]);
   rtx op2 = gen_lowpart (V4HImode, operands[2]);
-  if (TARGET_BIG_ENDIAN)
-    emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
-  else
-    emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
+  ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
   DONE;
 })
 
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VEC 0 "register_operand" "")
+   (match_operand:VEC 1 "register_operand" "")
+   (match_operand:VEC 2 "register_operand" "")
+   (match_operand:<vecint> 3 "" "")]
+  ""
+{
+  if (ia64_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
 ;; Missing operations
 ;; fprcpa
 ;; fpsqrta