diff mbox series

[committed] amdgcn: Align VGPR pairs

Message ID 2177fb7a-9d28-d7c2-1e23-ca8e26eba664@codesourcery.com
State New
Headers show
Series [committed] amdgcn: Align VGPR pairs | expand

Commit Message

Andrew Stubbs Feb. 21, 2020, 11:57 a.m. UTC
This patch changes the way VGPR register pairs (for 64-bit values) are 
allocated.

There are no hardware restrictions on the alignment of such pairs 
(unlike for scalar registers), but there's also not a full set of 64-bit 
instructions, meaning that many operations get decomposed into two or 
more real instructions.

This creates an early-clobber problem when the inputs and outputs 
partially overlap, so up to now we've been adding the early-clobber 
constraint and fixing it that way.

To complicate matters, most of the patterns don't have any trouble if 
the inputs and output registers match exactly, and often having them do 
so reduces register pressure, so we've been adding '0' match-constraints 
to allow that.

All this works, but there have been several bugs with missed 
early-clobber cases, and several more where the match-constraints 
conflict with other "real" match constraints, or generally confuse LRA. 
The fix is usually to explode the number of alternatives. The presence 
of these constraints also tends to mess up the alternative scoring 
system, which can make for suboptimal decisions. To make things worse 
the exact effects tend to change over time, creating an ongoing 
maintenance burden.

This patch forces registers pairs to be allocated aligned, and removes 
all the early-clobber work-arounds, leaving only actual early-clobber cases.

Andrew
diff mbox series

Patch

amdgcn: Align VGPR pairs

Aligning the registers is not needed by the architecture, but doing so
allows us to remove the requirement for bug-prone early-clobber
constraints from many split patterns (and avoid adding more in future).

2020-02-20  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs.
	* config/gcn/gcn-valu.md (addv64di3): Remove early-clobber.
	(addv64di3_exec): Likewise.
	(subv64di3): Likewise.
	(subv64di3_exec): Likewise.
	(addv64di3_zext): Likewise.
	(addv64di3_zext_exec): Likewise.
	(addv64di3_zext_dup): Likewise.
	(addv64di3_zext_dup_exec): Likewise.
	(addv64di3_zext_dup2): Likewise.
	(addv64di3_zext_dup2_exec): Likewise.
	(addv64di3_sext_dup2): Likewise.
	(addv64di3_sext_dup2_exec): Likewise.
	(<expander>v64di3): Likewise.
	(<expander>v64di3_exec): Likewise.
	(*<reduc_op>_dpp_shr_v64di): Likewise.
	(*plus_carry_dpp_shr_v64di): Likewise.
	* config/gcn/gcn.md (adddi3): Likewise.
	(addptrdi3): Likewise.
	(<expander>di3): Likewise.

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 6b774b1ef4c..5519c12d03c 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1143,10 +1143,10 @@ 
    (set_attr "length" "4,8,4,8")])
 
 (define_insn_and_split "addv64di3"
-  [(set (match_operand:V64DI 0 "register_operand"   "= &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"   "=  v")
 	(plus:V64DI
-	  (match_operand:V64DI 1 "register_operand" "%vDb,vDb0")
-	  (match_operand:V64DI 2 "gcn_alu_operand"  "vDb0, vDb")))
+	  (match_operand:V64DI 1 "register_operand" "%vDb")
+	  (match_operand:V64DI 2 "gcn_alu_operand"  " vDb")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
@@ -1172,14 +1172,13 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_exec"
-  [(set (match_operand:V64DI 0 "register_operand"	     "= &v,  &v, &v")
+  [(set (match_operand:V64DI 0 "register_operand"		  "=  v")
 	(vec_merge:V64DI
 	  (plus:V64DI
-	    (match_operand:V64DI 1 "register_operand"	     "%vDb,vDb0,vDb")
-	    (match_operand:V64DI 2 "gcn_alu_operand"	     "vDb0, vDb,vDb"))
-	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
-							     "   U,   U,  0")
-	  (match_operand:DI 4 "gcn_exec_reg_operand"	     "   e,   e,  e")))
+	    (match_operand:V64DI 1 "register_operand"		  "%vDb")
+	    (match_operand:V64DI 2 "gcn_alu_operand"		  " vDb"))
+	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" "  U0")
+	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "   e")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
@@ -1210,10 +1209,10 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "subv64di3"
-  [(set (match_operand:V64DI 0 "register_operand"  "=&v,  &v,  &v, &v")
-	(minus:V64DI                                                 
-	  (match_operand:V64DI 1 "gcn_alu_operand" "vDb,vDb0,   v, v0")
-	  (match_operand:V64DI 2 "gcn_alu_operand" " v0,   v,vDb0,vDb")))
+  [(set (match_operand:V64DI 0 "register_operand"  "= v,  v")
+	(minus:V64DI                                        
+	  (match_operand:V64DI 1 "gcn_alu_operand" "vDb,  v")
+	  (match_operand:V64DI 2 "gcn_alu_operand" "  v,vDb")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
@@ -1239,14 +1238,13 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "subv64di3_exec"
-  [(set (match_operand:V64DI 0 "register_operand"    "= &v,   &v,   &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"		 "=  v,   v")
 	(vec_merge:V64DI                                                         
 	  (minus:V64DI                                                           
-	    (match_operand:V64DI 1 "gcn_alu_operand" "vSvB,vSvB0,    v,  v0")
-	    (match_operand:V64DI 2 "gcn_alu_operand" "  v0,    v,vSvB0,vSvB"))
-	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
-						     "  U0,   U0,   U0,  U0")
-	  (match_operand:DI 4 "gcn_exec_reg_operand" "   e,    e,    e,   e")))
+	    (match_operand:V64DI 1 "gcn_alu_operand"		 "vSvB,   v")
+	    (match_operand:V64DI 2 "gcn_alu_operand"		 "   v,vSvB"))
+	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0,  U0")
+	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "   e,   e")))
    (clobber (reg:DI VCC_REG))]
   "register_operand (operands[1], VOIDmode)
    || register_operand (operands[2], VOIDmode)"
@@ -1278,11 +1276,11 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext"
-  [(set (match_operand:V64DI 0 "register_operand"    "=&v, &v,  &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"    "= v,  v")
 	(plus:V64DI
 	  (zero_extend:V64DI
-	    (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB,  vA,  vB"))
-	  (match_operand:V64DI 2 "gcn_alu_operand"   "vDb,vDA,0vDb,0vDA")))
+	    (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
+	  (match_operand:V64DI 2 "gcn_alu_operand"   "vDb,vDA")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
@@ -1306,15 +1304,14 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_exec"
-  [(set (match_operand:V64DI 0 "register_operand"	 "=&v,  &v, &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"		  "= v,  v")
 	(vec_merge:V64DI
 	  (plus:V64DI
 	    (zero_extend:V64DI
-	      (match_operand:V64SI 1 "gcn_alu_operand"	 "0vA,  vA,0vB,  vB"))
-	    (match_operand:V64DI 2 "gcn_alu_operand"	 "vDb,0vDb,vDA,0vDA"))
-	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
-							 " U0,  U0, U0,  U0")
-	  (match_operand:DI 4 "gcn_exec_reg_operand"	 "  e,   e,  e,   e")))
+	      (match_operand:V64SI 1 "gcn_alu_operand"		  " vA, vB"))
+	    (match_operand:V64DI 2 "gcn_alu_operand"		  "vDb,vDA"))
+	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "  e,  e")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
@@ -1343,12 +1340,12 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup"
-  [(set (match_operand:V64DI 0 "register_operand"   "= &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"   "= v,  v")
 	(plus:V64DI
 	  (zero_extend:V64DI
 	    (vec_duplicate:V64SI
-	      (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
-	  (match_operand:V64DI 2 "gcn_alu_operand"  "vDA0,vDb0")))
+	      (match_operand:SI 1 "gcn_alu_operand" "BSv,ASv")))
+	  (match_operand:V64DI 2 "gcn_alu_operand"  "vDA,vDb")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
@@ -1372,15 +1369,15 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup_exec"
-  [(set (match_operand:V64DI 0 "register_operand"		 "= &v,  &v")
+  [(set (match_operand:V64DI 0 "register_operand"		  "= v,  v")
 	(vec_merge:V64DI
 	  (plus:V64DI
 	    (zero_extend:V64DI
 	      (vec_duplicate:V64SI
-		(match_operand:SI 1 "gcn_alu_operand"		 " ASv, BSv")))
-	    (match_operand:V64DI 2 "gcn_alu_operand"		 "vDb0,vDA0"))
-	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0,  U0")
-	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "   e,   e")))
+		(match_operand:SI 1 "gcn_alu_operand"		  "ASv,BSv")))
+	    (match_operand:V64DI 2 "gcn_alu_operand"		  "vDb,vDA"))
+	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "  e,  e")))
    (clobber (reg:DI VCC_REG))]
   ""
   "#"
@@ -1409,7 +1406,7 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup2"
-  [(set (match_operand:V64DI 0 "register_operand"		     "= &v")
+  [(set (match_operand:V64DI 0 "register_operand"		     "=  v")
 	(plus:V64DI
 	  (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
 	  (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv"))))
@@ -1435,7 +1432,7 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_zext_dup2_exec"
-  [(set (match_operand:V64DI 0 "register_operand"		       "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"		       "= v")
 	(vec_merge:V64DI
 	  (plus:V64DI
 	    (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
@@ -1472,7 +1469,7 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_sext_dup2"
-  [(set (match_operand:V64DI 0 "register_operand"		      "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"		      "= v")
 	(plus:V64DI
 	  (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
 	  (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand"  "BSv"))))
@@ -1500,7 +1497,7 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "addv64di3_sext_dup2_exec"
-  [(set (match_operand:V64DI 0 "register_operand"		       "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"		       "= v")
 	(vec_merge:V64DI
 	  (plus:V64DI
 	    (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
@@ -1907,10 +1904,10 @@ 
    (set_attr "length" "8,8")])
 
 (define_insn_and_split "<expander>v64di3"
-  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand"	    "=  v,RD")
 	(bitop:V64DI
-	  (match_operand:V64DI 1 "gcn_valu_src0_operand"	  "%  v,RD")
-	  (match_operand:V64DI 2 "gcn_valu_src1com_operand"	  "vSvB, v")))]
+	  (match_operand:V64DI 1 "gcn_valu_src0_operand"    "%  v,RD")
+	  (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
   ""
   "@
    #
@@ -1932,7 +1929,7 @@ 
    (set_attr "length" "16,8")])
 
 (define_insn_and_split "<expander>v64di3_exec"
-  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand"		  "=  v,RD")
 	(vec_merge:V64DI
 	  (bitop:V64DI
 	    (match_operand:V64DI 1 "gcn_valu_src0_operand"	  "%  v,RD")
@@ -2963,11 +2960,11 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
-  [(set (match_operand:V64DI 0 "register_operand"   "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"   "=v")
 	(unspec:V64DI
-	  [(match_operand:V64DI 1 "register_operand" "v0")
-	   (match_operand:V64DI 2 "register_operand" "v0")
-	   (match_operand:SI 3 "const_int_operand"    "n")]
+	  [(match_operand:V64DI 1 "register_operand" "v")
+	   (match_operand:V64DI 2 "register_operand" "v")
+	   (match_operand:SI 3 "const_int_operand"   "n")]
 	  REDUC_2REG_UNSPEC))]
   ""
   "#"
@@ -3029,11 +3026,11 @@ 
    (set_attr "length" "8")])
 
 (define_insn_and_split "*plus_carry_dpp_shr_v64di"
-  [(set (match_operand:V64DI 0 "register_operand"   "=&v")
+  [(set (match_operand:V64DI 0 "register_operand"   "=v")
 	(unspec:V64DI
-	  [(match_operand:V64DI 1 "register_operand" "v0")
-	   (match_operand:V64DI 2 "register_operand" "v0")
-	   (match_operand:SI 3 "const_int_operand"    "n")]
+	  [(match_operand:V64DI 1 "register_operand" "v")
+	   (match_operand:V64DI 2 "register_operand" "v")
+	   (match_operand:SI 3 "const_int_operand"   "n")]
 	  UNSPEC_PLUS_CARRY_DPP_SHR))
    (clobber (reg:DI VCC_REG))]
   ""
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 0b80ee299bf..e6ac3796a58 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -458,7 +458,15 @@  gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	    || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
 	    || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
   if (VGPR_REGNO_P (regno))
-    return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode)
+    /* Vector instructions do not care about the alignment of register
+       pairs, but where there is no 64-bit instruction, many of the
+       define_split do not work if the input and output registers partially
+       overlap.  We tried to fix this with early clobber and match
+       constraints, but it was bug prone, added complexity, and conflicts
+       with the 'U0' constraints on vec_merge.
+       Therefore, we restrict ourselved to aligned registers.  */
+    return (vgpr_1reg_mode_p (mode)
+	    || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
 	    /* TImode is used by DImode compare_and_swap.  */
 	    || mode == TImode);
   return false;
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index a4705361d4a..b527d9a7a8b 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -1065,22 +1065,16 @@ 
 ; through some RTL optimisation passes, and means the CC reg we set isn't
 ; dependent on the constraint alternative (which doesn't seem to work well).
 
-; There's an early clobber in the case where "v[0:1]=v[1:2]+?" but
-; "v[0:1]=v[0:1]+?" is fine (as is "v[1:2]=v[0:1]+?", but that's trickier).
-
 ; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
 ; used as an operand due to the read of VCC, so we restrict constants to the
 ; inlinable range for that alternative.
 
 (define_insn_and_split "adddi3"
-  [(set (match_operand:DI 0 "register_operand"		
-					      "=&Sg,&Sg,&Sg,&Sg,&v,&v,&v,&v")
-	(plus:DI (match_operand:DI 1 "register_operand" 
-					      "  Sg,  0,  0, Sg, v, 0, 0, v")
-		 (match_operand:DI 2 "nonmemory_operand"
-					      "   0,SgB,  0,SgB, 0,vA, 0,vA")))
-   (clobber (match_scratch:BI 3		      "= cs, cs, cs, cs, X, X, X, X"))
-   (clobber (match_scratch:DI 4		      "=  X,  X,  X,  X,cV,cV,cV,cV"))]
+  [(set (match_operand:DI 0 "register_operand"		 "=Sg, v")
+	(plus:DI (match_operand:DI 1 "register_operand"  " Sg, v")
+		 (match_operand:DI 2 "nonmemory_operand" "SgB,vA")))
+   (clobber (match_scratch:BI 3				 "=cs, X"))
+   (clobber (match_scratch:DI 4				 "= X,cV"))]
   ""
   "#"
   "&& reload_completed"
@@ -1109,7 +1103,7 @@ 
 		  cc));
     DONE;
   }
-  [(set_attr "type" "mult,mult,mult,mult,vmult,vmult,vmult,vmult")
+  [(set_attr "type" "mult,vmult")
    (set_attr "length" "8")])
 
 (define_expand "adddi3_scc"
@@ -1196,11 +1190,14 @@ 
 ; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
 ; Note that it is not safe to save/clobber/restore SCC because doing so will
 ; break data-flow analysis, so this must use vector registers.
+;
+; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
+; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
 
 (define_insn "addptrdi3"
-  [(set (match_operand:DI 0 "register_operand"		 "= &v")
-	(plus:DI (match_operand:DI 1 "register_operand"	 "  v0")
-		 (match_operand:DI 2 "nonmemory_operand" "vDA0")))]
+  [(set (match_operand:DI 0 "register_operand"		 "= v")
+	(plus:DI (match_operand:DI 1 "register_operand"	 " v0")
+		 (match_operand:DI 2 "nonmemory_operand" "vDA")))]
   ""
   {
     rtx new_operands[4] = { operands[0], operands[1], operands[2],
@@ -1470,15 +1467,14 @@ 
 (define_code_iterator vec_and_scalar64_com [and ior xor])
 
 (define_insn_and_split "<expander>di3"
-   [(set (match_operand:DI 0 "register_operand"  "= Sg,   &v,   &v")
+   [(set (match_operand:DI 0 "register_operand"  "= Sg,    v")
 	 (vec_and_scalar64_com:DI
-	  (match_operand:DI 1 "gcn_alu_operand"  "%SgA,vSvDB,vSvDB")
-	   (match_operand:DI 2 "gcn_alu_operand" " SgC,    v,    0")))
-   (clobber (match_scratch:BI 3			 "= cs,    X,    X"))]
+	  (match_operand:DI 1 "gcn_alu_operand"  "%SgA,vSvDB")
+	   (match_operand:DI 2 "gcn_alu_operand" " SgC,    v")))
+   (clobber (match_scratch:BI 3			 "= cs,    X"))]
   ""
   "@
    s_<mnemonic>0\t%0, %1, %2
-   #
    #"
   "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
   [(parallel [(set (match_dup 4)
@@ -1495,7 +1491,7 @@ 
     operands[8] = gcn_operand_part (DImode, operands[1], 1);
     operands[9] = gcn_operand_part (DImode, operands[2], 1);
   }
-  [(set_attr "type" "sop2,vop2,vop2")
+  [(set_attr "type" "sop2,vop2")
    (set_attr "length" "8")])
 
 (define_insn "<expander>di3"