diff mbox

Fix IRA issue, PR79728

Message ID 17fdd1e6-2cfc-2506-84fd-8b04f1b18ec3@redhat.com
State New
Headers show

Commit Message

Bernd Schmidt March 3, 2017, 1:51 p.m. UTC
This is an ICE where setup_pressure_classes fails if xmm0 is a global 
reg. Instead of GENERAL/FLOAT/SSE/MMX_REGS, it computes only 
SSE_FIRST_REG as the third register class. The problem is that the costs 
for moving between SSE_FIRST_REG and SSE_REGS are inflated because we 
think we have no available registers in SSE_FIRST_REG (since the only 
register in that class is global), and that somewhat confuses the algorithm.

The following fixes it by tweaking contains_regs_of_mode. Out of 
caution, I've retained the old meaning for code in reload which uses this.

Bootstrapped and tested on x86_64-linux. Ok?


Bernd

Comments

Bernd Schmidt March 10, 2017, 9:18 p.m. UTC | #1
Ping (minus the require-effective-target line, as Uros pointed out).


Bernd

On 03/03/2017 02:51 PM, Bernd Schmidt wrote:
> This is an ICE where setup_pressure_classes fails if xmm0 is a global
> reg. Instead of GENERAL/FLOAT/SSE/MMX_REGS, it computes only
> SSE_FIRST_REG as the third register class. The problem is that the costs
> for moving between SSE_FIRST_REG and SSE_REGS are inflated because we
> think we have no available registers in SSE_FIRST_REG (since the only
> register in that class is global), and that somewhat confuses the
> algorithm.
>
> The following fixes it by tweaking contains_regs_of_mode. Out of
> caution, I've retained the old meaning for code in reload which uses this.
>
> Bootstrapped and tested on x86_64-linux. Ok?
>
>
> Bernd
Jeff Law March 14, 2017, 5:44 p.m. UTC | #2
On 03/03/2017 06:51 AM, Bernd Schmidt wrote:
> This is an ICE where setup_pressure_classes fails if xmm0 is a global
> reg. Instead of GENERAL/FLOAT/SSE/MMX_REGS, it computes only
> SSE_FIRST_REG as the third register class. The problem is that the costs
> for moving between SSE_FIRST_REG and SSE_REGS are inflated because we
> think we have no available registers in SSE_FIRST_REG (since the only
> register in that class is global), and that somewhat confuses the
> algorithm.
>
> The following fixes it by tweaking contains_regs_of_mode. Out of
> caution, I've retained the old meaning for code in reload which uses this.
>
> Bootstrapped and tested on x86_64-linux. Ok?
>
>
> Bernd
>
> global-reg-cost.diff
>
>
> 	PR rtl-optimization/79728
> 	* regs.h (struct target_regs): New field
> 	x_contains_allocatable_regs_of_mode.
> 	(contains_allocatable_regs_of_mode): New macro.
> 	* reginfo.c (init_reg_sets_1): Initialize it, and change
> 	contains_reg_of_mode so it includes global regs as well.
> 	* reload.c (push_reload): Use contains_allocatable_regs_of_mode
> 	rather thanc ontains_regs_of_mode.
>
> 	PR rtl-optimization/79728
> 	* gcc.target/i386/sse-globalreg.c: New test.
OK.
jeff
Jeff Law March 14, 2017, 5:50 p.m. UTC | #3
On 03/03/2017 06:51 AM, Bernd Schmidt wrote:
> This is an ICE where setup_pressure_classes fails if xmm0 is a global
> reg. Instead of GENERAL/FLOAT/SSE/MMX_REGS, it computes only
> SSE_FIRST_REG as the third register class. The problem is that the costs
> for moving between SSE_FIRST_REG and SSE_REGS are inflated because we
> think we have no available registers in SSE_FIRST_REG (since the only
> register in that class is global), and that somewhat confuses the
> algorithm.
>
> The following fixes it by tweaking contains_regs_of_mode. Out of
> caution, I've retained the old meaning for code in reload which uses this.
>
> Bootstrapped and tested on x86_64-linux. Ok?
>
>
> Bernd
>
> global-reg-cost.diff
>
>
> 	PR rtl-optimization/79728
> 	* regs.h (struct target_regs): New field
> 	x_contains_allocatable_regs_of_mode.
> 	(contains_allocatable_regs_of_mode): New macro.
> 	* reginfo.c (init_reg_sets_1): Initialize it, and change
> 	contains_reg_of_mode so it includes global regs as well.
> 	* reload.c (push_reload): Use contains_allocatable_regs_of_mode
> 	rather thanc ontains_regs_of_mode.
>
> 	PR rtl-optimization/79728
> 	* gcc.target/i386/sse-globalreg.c: New test.
And I went ahead and installed on the trunk.

jeff
diff mbox

Patch

	PR rtl-optimization/79728
	* regs.h (struct target_regs): New field
	x_contains_allocatable_regs_of_mode.
	(contains_allocatable_regs_of_mode): New macro.
	* reginfo.c (init_reg_sets_1): Initialize it, and change
	contains_reg_of_mode so it includes global regs as well.
	* reload.c (push_reload): Use contains_allocatable_regs_of_mode
	rather thanc ontains_regs_of_mode.

	PR rtl-optimization/79728
	* gcc.target/i386/sse-globalreg.c: New test.

Index: gcc/reginfo.c
===================================================================
--- gcc/reginfo.c	(revision 245685)
+++ gcc/reginfo.c	(working copy)
@@ -468,19 +468,29 @@  init_reg_sets_1 (void)
   memset (contains_reg_of_mode, 0, sizeof (contains_reg_of_mode));
   for (m = 0; m < (unsigned int) MAX_MACHINE_MODE; m++)
     {
-      HARD_REG_SET ok_regs;
+      HARD_REG_SET ok_regs, ok_regs2;
       CLEAR_HARD_REG_SET (ok_regs);
+      CLEAR_HARD_REG_SET (ok_regs2);
       for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
-	if (!fixed_regs [j] && HARD_REGNO_MODE_OK (j, (machine_mode) m))
-	  SET_HARD_REG_BIT (ok_regs, j);
+	if (!TEST_HARD_REG_BIT (fixed_nonglobal_reg_set, j)
+	    && HARD_REGNO_MODE_OK (j, (machine_mode) m))
+	  {
+	    SET_HARD_REG_BIT (ok_regs, j);
+	    if (!fixed_regs[j])
+	      SET_HARD_REG_BIT (ok_regs2, j);
+	  }
 
       for (i = 0; i < N_REG_CLASSES; i++)
 	if ((targetm.class_max_nregs ((reg_class_t) i, (machine_mode) m)
 	     <= reg_class_size[i])
 	    && hard_reg_set_intersect_p (ok_regs, reg_class_contents[i]))
 	  {
-	     contains_reg_of_mode [i][m] = 1;
-	     have_regs_of_mode [m] = 1;
+	     contains_reg_of_mode[i][m] = 1;
+	     if (hard_reg_set_intersect_p (ok_regs2, reg_class_contents[i]))
+	       {
+		 have_regs_of_mode[m] = 1;
+		 contains_allocatable_reg_of_mode[i][m] = 1;
+	       }
 	  }
      }
 }
Index: gcc/regs.h
===================================================================
--- gcc/regs.h	(revision 245685)
+++ gcc/regs.h	(working copy)
@@ -218,6 +218,10 @@  struct target_regs {
   /* 1 if the corresponding class contains a register of the given mode.  */
   char x_contains_reg_of_mode[N_REG_CLASSES][MAX_MACHINE_MODE];
 
+  /* 1 if the corresponding class contains a register of the given mode
+     which is not global and can therefore be allocated.  */
+  char x_contains_allocatable_reg_of_mode[N_REG_CLASSES][MAX_MACHINE_MODE];
+
   /* Record for each mode whether we can move a register directly to or
      from an object of that mode in memory.  If we can't, we won't try
      to use that mode directly when accessing a field of that mode.  */
@@ -243,6 +247,8 @@  extern struct target_regs *this_target_r
   (this_target_regs->x_have_regs_of_mode)
 #define contains_reg_of_mode \
   (this_target_regs->x_contains_reg_of_mode)
+#define contains_allocatable_reg_of_mode \
+  (this_target_regs->x_contains_allocatable_reg_of_mode)
 #define direct_load \
   (this_target_regs->x_direct_load)
 #define direct_store \
Index: gcc/reload.c
===================================================================
--- gcc/reload.c	(revision 245685)
+++ gcc/reload.c	(working copy)
@@ -1055,7 +1055,7 @@  push_reload (rtx in, rtx out, rtx *inloc
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass)
 #endif
-      && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))]
+      && contains_allocatable_reg_of_mode[rclass][GET_MODE (SUBREG_REG (in))]
       && (CONSTANT_P (SUBREG_REG (in))
 	  || GET_CODE (SUBREG_REG (in)) == PLUS
 	  || strict_low
@@ -1164,7 +1164,7 @@  push_reload (rtx in, rtx out, rtx *inloc
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass)
 #endif
-      && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))]
+      && contains_allocatable_reg_of_mode[rclass][GET_MODE (SUBREG_REG (out))]
       && (CONSTANT_P (SUBREG_REG (out))
 	  || strict_low
 	  || (((REG_P (SUBREG_REG (out))
Index: gcc/testsuite/gcc.target/i386/sse-globalreg.c
===================================================================
--- gcc/testsuite/gcc.target/i386/sse-globalreg.c	(nonexistent)
+++ gcc/testsuite/gcc.target/i386/sse-globalreg.c	(working copy)
@@ -0,0 +1,6 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -w" } */
+/* { dg-require-effective-target sse2 } */
+
+register int a __asm__("xmm0");
+void fn1() {}