Patchwork [ia64,rfa] vector pattern improvements

login
register
mail settings
Submitter Steve Ellcey
Date Jan. 18, 2011, 9:06 p.m.
Message ID <1295384783.21802.207.camel@hpsje.cup.hp.com>
Download mbox | patch
Permalink /patch/79362/
State New
Headers show

Comments

Steve Ellcey - Jan. 18, 2011, 9:06 p.m.
On Mon, 2011-01-10 at 17:47 -0800, Richard Henderson wrote:

> I'm not sure how to debug this problem, exactly.  How about
> stepping through vect-9.c in the debugger and logging the
> whole session?  Clearly something is going wrong, but I
> don't think we've really identified what that is.
> 
> 
> r~

I don't have very good debugger support on IA64 HP-UX so stepping
through things doesn't work very well.  Here is a patch I have based on
current ToT that fixes all the execution failures on HP-UX and doesn't
break anything on Linux.  I haven't gotten the mulv2si3 and the
vec_extract_even* and vec_extract_odd* instructions working on HP-UX yet
so I just disabled them for TARGET_BIG_ENDIAN for now.  I also disabled
vec_pack_trunc_v4hi and vec_pack_trunc_v2si because they are implemented
using the even/odd extract instructions.  I get some tests failing due
to non-vectorization on HP-UX, but none due to bad results.

I'd like to check this in so that we are at least not generating any bad
code while I continue to look at the instructions I haven't gotten
working, but I wanted to see if you had any comments on the changes
first.  Any input?

Steve Ellcey
sje@cup.hp.com
Richard Henderson - Jan. 18, 2011, 9:20 p.m.
On 01/18/2011 01:06 PM, Steve Ellcey wrote:
> I don't have very good debugger support on IA64 HP-UX so stepping
> through things doesn't work very well.

Fooey.

> I'd like to check this in so that we are at least not generating any bad
> code while I continue to look at the instructions I haven't gotten
> working, but I wanted to see if you had any comments on the changes
> first.  Any input?

Looks good.


r~

Patch

Index: config/ia64/ia64.c
===================================================================
--- config/ia64/ia64.c	(revision 168941)
+++ config/ia64/ia64.c	(working copy)
@@ -2007,7 +2007,10 @@  ia64_expand_unpack (rtx operands[3], boo
       gcc_assert (!neg);
     }
 
-  emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1]));
+  else
+    emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x));
 }
 
 /* Emit an integral vector widening sum operations.  */
@@ -2058,8 +2061,16 @@  ia64_expand_widen_sum (rtx operands[3], 
   h = gen_reg_rtx (wmode);
   s = gen_reg_rtx (wmode);
 
-  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
-  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1]));
+      emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1]));
+    }
+  else
+    {
+      emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
+      emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
+    }
   emit_insn (plus (s, l, operands[2]));
   emit_insn (plus (operands[0], h, s));
 }
@@ -2082,7 +2093,10 @@  ia64_expand_widen_mul_v4hi (rtx operands
   emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
 
   interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
-  emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l));
+  else
+    emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
 }
 
 /* Emit a signed or unsigned V8QI dot product operation.  */
@@ -2115,14 +2129,28 @@  ia64_expand_dot_prod_v8qi (rtx operands[
   h1 = gen_reg_rtx (V4HImode);
   h2 = gen_reg_rtx (V4HImode);
 
-  emit_insn (gen_vec_interleave_lowv8qi
-	     (gen_lowpart (V8QImode, l1), operands[1], x1));
-  emit_insn (gen_vec_interleave_lowv8qi
-	     (gen_lowpart (V8QImode, l2), operands[2], x2));
-  emit_insn (gen_vec_interleave_highv8qi
-	     (gen_lowpart (V8QImode, h1), operands[1], x1));
-  emit_insn (gen_vec_interleave_highv8qi
-	     (gen_lowpart (V8QImode, h2), operands[2], x2));
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_interleave_lowv8qi
+		 (gen_lowpart (V8QImode, l1), x1, operands[1]));
+      emit_insn (gen_vec_interleave_lowv8qi
+		 (gen_lowpart (V8QImode, l2), x2, operands[2]));
+      emit_insn (gen_vec_interleave_highv8qi
+		 (gen_lowpart (V8QImode, h1), x1, operands[1]));
+      emit_insn (gen_vec_interleave_highv8qi
+		 (gen_lowpart (V8QImode, h2), x2, operands[2]));
+    }
+  else
+    {
+      emit_insn (gen_vec_interleave_lowv8qi
+		 (gen_lowpart (V8QImode, l1), operands[1], x1));
+      emit_insn (gen_vec_interleave_lowv8qi
+		 (gen_lowpart (V8QImode, l2), operands[2], x2));
+      emit_insn (gen_vec_interleave_highv8qi
+		 (gen_lowpart (V8QImode, h1), operands[1], x1));
+      emit_insn (gen_vec_interleave_highv8qi
+		 (gen_lowpart (V8QImode, h2), operands[2], x2));
+    }
 
   p1 = gen_reg_rtx (V2SImode);
   p2 = gen_reg_rtx (V2SImode);
Index: config/ia64/vect.md
===================================================================
--- config/ia64/vect.md	(revision 168941)
+++ config/ia64/vect.md	(working copy)
@@ -370,7 +370,7 @@  (define_expand "mulv2si3"
   [(set (match_operand:V2SI 0 "gr_register_operand" "")
 	(mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r")
 		   (match_operand:V2SI 2 "gr_register_operand" "r")))]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   rtx t0, t1, t2, t3, t4, t5, t6, t7, x;
   rtx op1h = gen_lowpart (V4HImode, operands[1]);
@@ -709,7 +709,13 @@  (define_insn "vec_interleave_lowv8qi"
 		     (const_int 2) (const_int 10)
 		     (const_int 3) (const_int 11)])))]
   ""
-  "unpack1.l %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack1.l %0 = %r1, %r2";
+  else
+    return "%,unpack1.l %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "vec_interleave_highv8qi"
@@ -723,7 +729,13 @@  (define_insn "vec_interleave_highv8qi"
 		     (const_int 6) (const_int 14)
 		     (const_int 7) (const_int 15)])))]
   ""
-  "unpack1.h %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack1.h %0 = %r1, %r2";
+  else
+    return "%,unpack1.h %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "mix1_r"
@@ -857,7 +869,10 @@  (define_expand "vec_extract_evenv8qi"
   ""
 {
   rtx temp = gen_reg_rtx (V8QImode);
-  emit_insn (gen_mix1_r (temp, operands[1], operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mix1_l (temp, operands[2], operands[1]));
+  else
+    emit_insn (gen_mix1_r (temp, operands[1], operands[2]));
   emit_insn (gen_mux1_alt (operands[0], temp));
   DONE;
 })
@@ -869,7 +884,10 @@  (define_expand "vec_extract_oddv8qi"
   ""
 {
   rtx temp = gen_reg_rtx (V8QImode);
-  emit_insn (gen_mix1_l (temp, operands[1], operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mix1_r (temp, operands[2], operands[1]));
+  else
+    emit_insn (gen_mix1_l (temp, operands[1], operands[2]));
   emit_insn (gen_mux1_alt (operands[0], temp));
   DONE;
 })
@@ -885,7 +903,13 @@  (define_insn "vec_interleave_lowv4hi"
 		     (const_int 1)
 		     (const_int 5)])))]
   ""
-  "unpack2.l %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack2.l %0 = %r1, %r2";
+  else
+    return "%,unpack2.l %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "vec_interleave_highv4hi"
@@ -899,7 +923,13 @@  (define_insn "vec_interleave_highv4hi"
 		     (const_int 3)
 		     (const_int 7)])))]
   ""
-  "unpack2.h %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack2.h %0 = %r1, %r2";
+  else
+    return "%,unpack2.h %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_insn "mix2_r"
@@ -958,13 +988,13 @@  (define_expand "vec_extract_evenodd_help
 		     (const_int 2)
 		     (const_int 1)
 		     (const_int 3)])))]
-  "")
+  "!TARGET_BIG_ENDIAN")
 
 (define_expand "vec_extract_evenv4hi"
   [(match_operand:V4HI 0 "gr_register_operand")
    (match_operand:V4HI 1 "gr_reg_or_0_operand")
    (match_operand:V4HI 2 "gr_reg_or_0_operand")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   rtx temp = gen_reg_rtx (V4HImode);
   emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
@@ -976,7 +1006,7 @@  (define_expand "vec_extract_oddv4hi"
   [(match_operand:V4HI 0 "gr_register_operand")
    (match_operand:V4HI 1 "gr_reg_or_0_operand")
    (match_operand:V4HI 2 "gr_reg_or_0_operand")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   rtx temp = gen_reg_rtx (V4HImode);
   emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
@@ -1002,7 +1032,13 @@  (define_insn "vec_interleave_lowv2si"
 	  (parallel [(const_int 0)
 		     (const_int 2)])))]
   ""
-  "unpack4.l %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.h %0 = %r1, %r2";
+  else
+    return "%,unpack4.l %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 ;; Note that mix4.l performs the exact same operation.
@@ -1015,14 +1051,20 @@  (define_insn "vec_interleave_highv2si"
 	  (parallel [(const_int 1)
 		     (const_int 3)])))]
   ""
-  "unpack4.h %0 = %r2, %r1"
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.l %0 = %r1, %r2";
+  else
+    return "%,unpack4.h %0 = %r2, %r1";
+}
   [(set_attr "itanium_class" "mmshf")])
 
 (define_expand "vec_extract_evenv2si"
   [(match_operand:V2SI 0 "gr_register_operand" "")
    (match_operand:V2SI 1 "gr_register_operand" "")
    (match_operand:V2SI 2 "gr_register_operand" "")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
 					 operands[2]));
@@ -1033,7 +1075,7 @@  (define_expand "vec_extract_oddv2si"
   [(match_operand:V2SI 0 "gr_register_operand" "")
    (match_operand:V2SI 1 "gr_register_operand" "")
    (match_operand:V2SI 2 "gr_register_operand" "")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
 					  operands[2]));
@@ -1397,7 +1439,7 @@  (define_expand "vec_extract_evenv2sf"
   [(match_operand:V2SF 0 "gr_register_operand" "")
    (match_operand:V2SF 1 "gr_register_operand" "")
    (match_operand:V2SF 2 "gr_register_operand" "")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
 					 operands[2]));
@@ -1408,7 +1450,7 @@  (define_expand "vec_extract_oddv2sf"
   [(match_operand:V2SF 0 "gr_register_operand" "")
    (match_operand:V2SF 1 "gr_register_operand" "")
    (match_operand:V2SF 2 "gr_register_operand" "")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
 					  operands[2]));
@@ -1540,7 +1582,7 @@  (define_expand "vec_pack_trunc_v4hi"
   [(match_operand:V8QI 0 "gr_register_operand" "")
    (match_operand:V4HI 1 "gr_register_operand" "")
    (match_operand:V4HI 2 "gr_register_operand" "")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   rtx op1 = gen_lowpart(V8QImode, operands[1]);
   rtx op2 = gen_lowpart(V8QImode, operands[2]);
@@ -1552,7 +1594,7 @@  (define_expand "vec_pack_trunc_v2si"
   [(match_operand:V4HI 0 "gr_register_operand" "")
    (match_operand:V2SI 1 "gr_register_operand" "")
    (match_operand:V2SI 2 "gr_register_operand" "")]
-  ""
+  "!TARGET_BIG_ENDIAN"
 {
   rtx op1 = gen_lowpart(V4HImode, operands[1]);
   rtx op2 = gen_lowpart(V4HImode, operands[2]);