Patchwork Add sparc vec_perm patterns when VIS2.

login
register
mail settings
Submitter David Miller
Date Oct. 18, 2011, 12:29 a.m.
Message ID <20111017.202918.252824034880155678.davem@davemloft.net>
Download mbox | patch
Permalink /patch/120347/
State New
Headers show

Comments

David Miller - Oct. 18, 2011, 12:29 a.m.
This is Richard's patch to add vec_perm patterns for VIS2, I verified
that the patterns do get used in some of the slp test cases and that
such test cases pass.

I only had to make a minor adjustment due to a conflict with the
vector segregation patch, namely the mode iterator had to be changed
from V64I to VM64.

Regstrapped on sparc-linux and committed to trunk.

gcc/

	* config/sparc/sparc.md (vec_perm_constv8qi, vec_perm<mode>): New
	patterns.
	* config/sparc/sparc.c (sparc_expand_vec_perm_bmask): New function.
	* config/sparc/sparc-protos.h (sparc_expand_vec_perm_bmask): Declare.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180119 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                   |    5 ++
 gcc/config/sparc/sparc-protos.h |    1 +
 gcc/config/sparc/sparc.c        |  107 +++++++++++++++++++++++++++++++++++++++
 gcc/config/sparc/sparc.md       |   37 +++++++++++++
 4 files changed, 150 insertions(+), 0 deletions(-)

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 79a0f90..ddc5149 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -53,6 +53,11 @@ 
 	* config/sparc/sparc.md: Use register_or_zero_operand where rJ
 	is the constraint.
 
+	* config/sparc/sparc.md (vec_perm_constv8qi, vec_perm<mode>): New
+	patterns.
+	* config/sparc/sparc.c (sparc_expand_vec_perm_bmask): New function.
+	* config/sparc/sparc-protos.h (sparc_expand_vec_perm_bmask): Declare.
+
 2011-10-17  David S. Miller  <davem@davemloft.net>
 
 	* config/sparc/sparc-modes.def: Add single entry vector modes for
diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h
index 744747a..4b15b82 100644
--- a/gcc/config/sparc/sparc-protos.h
+++ b/gcc/config/sparc/sparc-protos.h
@@ -107,6 +107,7 @@  extern rtx gen_df_reg (rtx, int);
 extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx);
 extern const char *output_v8plus_mult (rtx, rtx *, const char *);
 extern void sparc_expand_vector_init (rtx, rtx);
+extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx);
 #endif /* RTX_CODE */
 
 #endif /* __SPARC_PROTOS_H__ */
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index b495690..a6601de 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -10863,6 +10863,113 @@  sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
   emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
 }
 
+void
+sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
+{
+  rtx t_1, t_2, t_3;
+
+  sel = gen_lowpart (DImode, sel);
+  switch (vmode)
+    {
+    case V2SImode:
+      /* inp = xxxxxxxAxxxxxxxB */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ....xxxxxxxAxxx. */
+      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
+				 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
+      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
+				 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .......B */
+      /* t_1 = ...A.... */
+      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
+      /* sel = ...A...B */
+      sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
+      /* sel = AAAABBBB * 4 */
+      t_1 = force_reg (SImode, GEN_INT (0x01230123));
+      /* sel = { A*4, A*4+1, A*4+2, ... } */
+      break;
+
+    case V4HImode:
+      /* inp = xxxAxxxBxxxCxxxD */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ..xxxAxxxBxxxCxx */
+      /* t_2 = ....xxxAxxxBxxxC */
+      /* t_3 = ......xxxAxxxBxx */
+      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
+				 GEN_INT (0x07),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
+				 GEN_INT (0x0700),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
+				 GEN_INT (0x070000),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
+				 GEN_INT (0x07000000),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .......D */
+      /* t_1 = .....C.. */
+      /* t_2 = ...B.... */
+      /* t_3 = .A...... */
+      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
+      t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
+      sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
+      /* sel = .A.B.C.D */
+      sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
+      /* sel = AABBCCDD * 2 */
+      t_1 = force_reg (SImode, GEN_INT (0x01010101));
+      /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
+      break;
+  
+    case V8QImode:
+      /* input = xAxBxCxDxExFxGxH */
+      sel = expand_simple_binop (DImode, AND, sel,
+				 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
+					  | 0x0f0f0f0f),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .A.B.C.D.E.F.G.H */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ..A.B.C.D.E.F.G. */
+      sel = expand_simple_binop (DImode, IOR, sel, t_1,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = .AABBCCDDEEFFGGH */
+      sel = expand_simple_binop (DImode, AND, sel,
+				 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
+					  | 0xff00ff),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = ..AB..CD..EF..GH */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ....AB..CD..EF.. */
+      sel = expand_simple_binop (DImode, IOR, sel, t_1,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = ..ABABCDCDEFEFGH */
+      sel = expand_simple_binop (DImode, AND, sel,
+				 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* sel = ....ABCD....EFGH */
+      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* t_1 = ........ABCD.... */
+      sel = gen_lowpart (SImode, sel);
+      t_1 = gen_lowpart (SImode, t_1);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Always perform the final addition/merge within the bmask insn.  */
+  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
+}
+
 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
 
 static bool
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 75238c5..d89f6f9 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -8350,6 +8350,43 @@ 
   [(set_attr "type" "fga")
    (set_attr "fptype" "double")])
 
+;; The rtl expanders will happily convert constant permutations on other
+;; modes down to V8QI.  Rely on this to avoid the complexity of the byte
+;; order of the permutation.
+(define_expand "vec_perm_constv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")
+   (match_operand:V8QI 2 "register_operand" "")
+   (match_operand:V8QI 3 "" "")]
+  "TARGET_VIS2"
+{
+  unsigned int i, mask;
+  rtx sel = operands[3];
+
+  for (i = mask = 0; i < 8; ++i)
+    mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4);
+  sel = force_reg (SImode, gen_int_mode (mask, SImode));
+
+  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
+  emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; Unlike constant permutation, we can vastly simplify the compression of
+;; the 64-bit selector input to the 32-bit %gsr value by knowing what the
+;; width of the input is.
+(define_expand "vec_perm<mode>"
+  [(match_operand:VM64 0 "register_operand" "")
+   (match_operand:VM64 1 "register_operand" "")
+   (match_operand:VM64 2 "register_operand" "")
+   (match_operand:VM64 3 "register_operand" "")]
+  "TARGET_VIS2"
+{
+  sparc_expand_vec_perm_bmask (<MODE>mode, operands[3]);
+  emit_insn (gen_bshuffle<mode>_vis (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 ;; VIS 2.0 adds edge variants which do not set the condition codes
 (define_insn "edge8n<P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")