diff mbox series

Support multiple registers for the frame pointer

Message ID 3a1551a1-b44e-29c6-841f-24d8a54a002c@codesourcery.com
State New
Headers show
Series Support multiple registers for the frame pointer | expand

Commit Message

Kwok Cheung Yeung Nov. 2, 2019, 5:28 p.m. UTC
The AMD GCN architecture uses 64-bit pointers, but the scalar registers 
are 32-bit wide, so pointers must reside in a pair of registers.

The two hard registers holding the frame pointer are currently fixed, 
but if they are changed to unfixed (so that the FP can be eliminated), 
GCC would sometimes allocate the second register to a pseudo while the 
frame pointer was in use, clobbering the value of the FP and crashing 
the program.

GCC currently does not handle multi-register hard frame pointers 
properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only set 
for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers that 
may be used, which means that the register allocators consider 
HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
registers needed to store the frame pointer using hard_regno_nregs, and 
sets the required variables for HARD_FRAME_POINTER_REGNUM and however 
many adjacent registers are needed (which on most architectures should 
be zero).

Bootstrapped on x86_64 and tested with no regressions, which is not 
surprising as nothing different happens when the FP fits into a single 
register. I believe this is true for the 64-bit variants of the more 
popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any 
other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)?

I have not included any specific testcases for this issue as it can 
affect pretty much everything not using -fomit-frame-pointer on AMD GCN.

Okay for trunk?

Kwok Yeung


     Add support for using multiple registers to hold the frame pointer

     2019-11-02  Kwok Cheung Yeung  <kcy@codesourcery.com>

     	gcc/
     	* ira.c (setup_alloc_regs): Setup no_unit_alloc_regs for
     	frame pointer in multiple registers.
     	(ira_setup_eliminable_regset): Setup eliminable_regset,
     	ira_no_alloc_regs and regs_ever_live for frame pointer in
     	multiple registers.


@@ -2248,6 +2254,7 @@ ira_setup_eliminable_regset (void)
  {
    int i;
    static const struct {const int from, to; } eliminables[] = 
ELIMINABLE_REGS;
+  int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM, Pmode);

    /* Setup is_leaf as frame_pointer_required may use it.  This function
       is called by sched_init before ira if scheduling is enabled.  */
@@ -2276,7 +2283,8 @@ ira_setup_eliminable_regset (void)
         frame pointer in LRA.  */

    if (frame_pointer_needed)
-    df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
+    for (i = 0; i < fp_reg_count; i++)
+      df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);

    ira_no_alloc_regs = no_unit_alloc_regs;
    CLEAR_HARD_REG_SET (eliminable_regset);
@@ -2306,17 +2314,21 @@ ira_setup_eliminable_regset (void)
      }
    if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
      {
-      if (!TEST_HARD_REG_BIT (crtl->asm_clobbers, 
HARD_FRAME_POINTER_REGNUM))
-	{
-	  SET_HARD_REG_BIT (eliminable_regset, HARD_FRAME_POINTER_REGNUM);
-	  if (frame_pointer_needed)
-	    SET_HARD_REG_BIT (ira_no_alloc_regs, HARD_FRAME_POINTER_REGNUM);
-	}
-      else if (frame_pointer_needed)
-	error ("%s cannot be used in %<asm%> here",
-	       reg_names[HARD_FRAME_POINTER_REGNUM]);
-      else
-	df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
+      for (i = 0; i < fp_reg_count; i++)
+	if (!TEST_HARD_REG_BIT (crtl->asm_clobbers,
+				HARD_FRAME_POINTER_REGNUM + i))
+	  {
+	    SET_HARD_REG_BIT (eliminable_regset,
+			      HARD_FRAME_POINTER_REGNUM + i);
+	    if (frame_pointer_needed)
+	      SET_HARD_REG_BIT (ira_no_alloc_regs,
+				HARD_FRAME_POINTER_REGNUM + i);
+	  }
+	else if (frame_pointer_needed)
+	  error ("%s cannot be used in %<asm%> here",
+		 reg_names[HARD_FRAME_POINTER_REGNUM + i]);
+	else
+	  df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);
      }
  }

Comments

Vladimir Makarov Nov. 4, 2019, 3:22 p.m. UTC | #1
On 2019-11-02 1:28 p.m., Kwok Cheung Yeung wrote:
> The AMD GCN architecture uses 64-bit pointers, but the scalar 
> registers are 32-bit wide, so pointers must reside in a pair of 
> registers.
>
> The two hard registers holding the frame pointer are currently fixed, 
> but if they are changed to unfixed (so that the FP can be eliminated), 
> GCC would sometimes allocate the second register to a pseudo while the 
> frame pointer was in use, clobbering the value of the FP and crashing 
> the program.
>
> GCC currently does not handle multi-register hard frame pointers 
> properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
> ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only 
> set for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers 
> that may be used, which means that the register allocators consider 
> HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
> registers needed to store the frame pointer using hard_regno_nregs, 
> and sets the required variables for HARD_FRAME_POINTER_REGNUM and 
> however many adjacent registers are needed (which on most 
> architectures should be zero).
>
> Bootstrapped on x86_64 and tested with no regressions, which is not 
> surprising as nothing different happens when the FP fits into a single 
> register. I believe this is true for the 64-bit variants of the more 
> popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there 
> any other architectures similar to GCN (i.e. 64-bit pointers with 
> 32-bit GPRs)?
>
> I have not included any specific testcases for this issue as it can 
> affect pretty much everything not using -fomit-frame-pointer on AMD GCN.
>
> Okay for trunk?
>
>
Yes.  You can commit the patch to the trunk.

Thank you.
Georg-Johann Lay Nov. 4, 2019, 4:22 p.m. UTC | #2
Am 04.11.19 um 16:22 schrieb Vladimir Makarov:
> 
> On 2019-11-02 1:28 p.m., Kwok Cheung Yeung wrote:
>> The AMD GCN architecture uses 64-bit pointers, but the scalar 
>> registers are 32-bit wide, so pointers must reside in a pair of 
>> registers.
>>
>> The two hard registers holding the frame pointer are currently fixed, 
>> but if they are changed to unfixed (so that the FP can be eliminated), 
>> GCC would sometimes allocate the second register to a pseudo while the 
>> frame pointer was in use, clobbering the value of the FP and crashing 
>> the program.
>>
>> GCC currently does not handle multi-register hard frame pointers 
>> properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
>> ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only 
>> set for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers 
>> that may be used, which means that the register allocators consider 
>> HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
>> registers needed to store the frame pointer using hard_regno_nregs, 
>> and sets the required variables for HARD_FRAME_POINTER_REGNUM and 
>> however many adjacent registers are needed (which on most 
>> architectures should be zero).
>>
>> Bootstrapped on x86_64 and tested with no regressions, which is not 
>> surprising as nothing different happens when the FP fits into a single 
>> register. I believe this is true for the 64-bit variants of the more 
>> popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there 
>> any other architectures similar to GCN (i.e. 64-bit pointers with 
>> 32-bit GPRs)?
>>
>> I have not included any specific testcases for this issue as it can 
>> affect pretty much everything not using -fomit-frame-pointer on AMD GCN.
>>
>> Okay for trunk?
>>
>>
> Yes.  You can commit the patch to the trunk.
> 
> Thank you.

The avr port already uses 2 hard-reg frame pointer ever since...

Does this patch has an impact on the avr port and its handling of
the frame pointer?

Johann
Dimitar Dimitrov Nov. 4, 2019, 8:02 p.m. UTC | #3
On Sat, 2 Nov 2019, 19:28:38 EET Kwok Cheung Yeung wrote:
> The AMD GCN architecture uses 64-bit pointers, but the scalar registers
> are 32-bit wide, so pointers must reside in a pair of registers.
...
> Bootstrapped on x86_64 and tested with no regressions, which is not
> surprising as nothing different happens when the FP fits into a single
> register. I believe this is true for the 64-bit variants of the more
> popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any
> other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)?
Yes. PRU uses four 8-bit HW registers to hold 32-bit pointers. 

> 
...
> diff --git a/gcc/ira.c b/gcc/ira.c
> index 9f8da67..25e9359 100644
> --- a/gcc/ira.c
> +++ b/gcc/ira.c
> @@ -515,7 +515,13 @@ setup_alloc_regs (bool use_hard_frame_p)
>   #endif
>     no_unit_alloc_regs = fixed_nonglobal_reg_set;
>     if (! use_hard_frame_p)
> -    SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
> +    {
> +      int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM,
> Pmode);
> +      for (int reg = HARD_FRAME_POINTER_REGNUM;
> +	   reg < HARD_FRAME_POINTER_REGNUM + fp_reg_count;
> +	   reg++)
> +	SET_HARD_REG_BIT (no_unit_alloc_regs, reg);
> +    }
Please consider using the existing helper function instead:
   add_to_hard_reg_set (&no_unit_alloc_regs, Pmode, reg);


Regards,
Dimitar
Georg-Johann Lay Nov. 4, 2019, 9:53 p.m. UTC | #4
Kwok Cheung Yeung schrieb:
> The AMD GCN architecture uses 64-bit pointers, but the scalar registers 
> are 32-bit wide, so pointers must reside in a pair of registers.
> 
> The two hard registers holding the frame pointer are currently fixed, 
> but if they are changed to unfixed (so that the FP can be eliminated), 
> GCC would sometimes allocate the second register to a pseudo while the 
> frame pointer was in use, clobbering the value of the FP and crashing 
> the program.
> 
> GCC currently does not handle multi-register hard frame pointers 
> properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
> ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only set 
> for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers that 
> may be used, which means that the register allocators consider 
> HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
> registers needed to store the frame pointer using hard_regno_nregs, and 
> sets the required variables for HARD_FRAME_POINTER_REGNUM and however 
> many adjacent registers are needed (which on most architectures should 
> be zero).
> 
> Bootstrapped on x86_64 and tested with no regressions, which is not 
> surprising as nothing different happens when the FP fits into a single 
> register. I believe this is true for the 64-bit variants of the more 
> popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any 
> other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)?

If 16-bit pointers with 8-bit GPRs is similar enough: The avr port.

Johann

> I have not included any specific testcases for this issue as it can 
> affect pretty much everything not using -fomit-frame-pointer on AMD GCN.
> 
> Okay for trunk?
> 
> Kwok Yeung
Richard Sandiford Nov. 5, 2019, 5:14 p.m. UTC | #5
Dimitar Dimitrov <dimitar@dinux.eu> writes:
> On Sat, 2 Nov 2019, 19:28:38 EET Kwok Cheung Yeung wrote:
>> The AMD GCN architecture uses 64-bit pointers, but the scalar registers
>> are 32-bit wide, so pointers must reside in a pair of registers.
> ...
>> Bootstrapped on x86_64 and tested with no regressions, which is not
>> surprising as nothing different happens when the FP fits into a single
>> register. I believe this is true for the 64-bit variants of the more
>> popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any
>> other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)?
> Yes. PRU uses four 8-bit HW registers to hold 32-bit pointers. 
>
>> 
> ...
>> diff --git a/gcc/ira.c b/gcc/ira.c
>> index 9f8da67..25e9359 100644
>> --- a/gcc/ira.c
>> +++ b/gcc/ira.c
>> @@ -515,7 +515,13 @@ setup_alloc_regs (bool use_hard_frame_p)
>>   #endif
>>     no_unit_alloc_regs = fixed_nonglobal_reg_set;
>>     if (! use_hard_frame_p)
>> -    SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
>> +    {
>> +      int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM,
>> Pmode);
>> +      for (int reg = HARD_FRAME_POINTER_REGNUM;
>> +	   reg < HARD_FRAME_POINTER_REGNUM + fp_reg_count;
>> +	   reg++)
>> +	SET_HARD_REG_BIT (no_unit_alloc_regs, reg);
>> +    }
> Please consider using the existing helper function instead:
>    add_to_hard_reg_set (&no_unit_alloc_regs, Pmode, reg);

+1
Kwok Cheung Yeung Nov. 5, 2019, 11:09 p.m. UTC | #6
Hello

On 04/11/2019 04:22 pm, Georg-Johann Lay wrote:
 > The avr port already uses 2 hard-reg frame pointer ever since...
 >
 > Does this patch has an impact on the avr port and its handling of
 > the frame pointer?

I am not familiar with the AVR port, but looking at the source, it looks 
like it prevents the clobbering by defining FRAME_POINTER_REGNUM+1 as 
eliminable in ELIMINABLE_REGS, so it will be added to eliminable_regset 
and ira_no_alloc_regs (when using the FP) in ira_setup_eliminable_regset 
anyway. It does not have a separate HARD_FRAME_POINTER_REGNUM, so the 
code in the 'if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)' block does not 
affect this port.

I don't understand how the elimination from FRAME_POINTER_REGNUM+1 to 
STACK_POINTER_REGNUM+1 works though - avr_initial_elimination_offset 
doesn't appear to deal with the second word at all...

All in all, I don't believe that it will affect the AVR port - at worst, 
it is simply redundant for that platform. I don't have an AVR board to 
confirm that on though. In any case, I don't think it is ever wrong to 
prevent all the constituent registers of the FP from being allocated 
when the frame pointer is in use.

Thanks,

Kwok
Kwok Cheung Yeung Nov. 6, 2019, 12:21 p.m. UTC | #7
On 04/11/2019 08:02 pm, Dimitar Dimitrov wrote:
> On Sat, 2 Nov 2019, 19:28:38 EET Kwok Cheung Yeung wrote:
>> diff --git a/gcc/ira.c b/gcc/ira.c
>> index 9f8da67..25e9359 100644
>> --- a/gcc/ira.c
>> +++ b/gcc/ira.c
>> @@ -515,7 +515,13 @@ setup_alloc_regs (bool use_hard_frame_p)
>>    #endif
>>      no_unit_alloc_regs = fixed_nonglobal_reg_set;
>>      if (! use_hard_frame_p)
>> -    SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
>> +    {
>> +      int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM,
>> Pmode);
>> +      for (int reg = HARD_FRAME_POINTER_REGNUM;
>> +	   reg < HARD_FRAME_POINTER_REGNUM + fp_reg_count;
>> +	   reg++)
>> +	SET_HARD_REG_BIT (no_unit_alloc_regs, reg);
>> +    }
> Please consider using the existing helper function instead:
>     add_to_hard_reg_set (&no_unit_alloc_regs, Pmode, reg);
> 

Thank you for the suggestion - I have applied the change. As Vladimir 
has already given his approval for the patch, I will commit it later 
today if there are no objections.

Best regards

Kwok Yeung


     Add support for using multiple registers to hold the frame pointer

     2019-11-06  Kwok Cheung Yeung  <kcy@codesourcery.com>

     	gcc/
     	* ira.c (setup_alloc_regs): Setup no_unit_alloc_regs for
     	frame pointer in multiple registers.
     	(ira_setup_eliminable_regset): Setup eliminable_regset,
     	ira_no_alloc_regs and regs_ever_live for frame pointer in
     	multiple registers.

diff --git a/gcc/ira.c b/gcc/ira.c
index 9f8da67..5df9953 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -515,7 +515,8 @@ setup_alloc_regs (bool use_hard_frame_p)
  #endif
    no_unit_alloc_regs = fixed_nonglobal_reg_set;
    if (! use_hard_frame_p)
-    SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
+    add_to_hard_reg_set (&no_unit_alloc_regs, Pmode,
+			 HARD_FRAME_POINTER_REGNUM);
    setup_class_hard_regs ();
  }

@@ -2248,6 +2249,7 @@ ira_setup_eliminable_regset (void)
  {
    int i;
    static const struct {const int from, to; } eliminables[] = 
ELIMINABLE_REGS;
+  int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM, Pmode);

    /* Setup is_leaf as frame_pointer_required may use it.  This function
       is called by sched_init before ira if scheduling is enabled.  */
@@ -2276,7 +2278,8 @@ ira_setup_eliminable_regset (void)
         frame pointer in LRA.  */

    if (frame_pointer_needed)
-    df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
+    for (i = 0; i < fp_reg_count; i++)
+      df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);

    ira_no_alloc_regs = no_unit_alloc_regs;
    CLEAR_HARD_REG_SET (eliminable_regset);
@@ -2306,17 +2309,21 @@ ira_setup_eliminable_regset (void)
      }
    if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
      {
-      if (!TEST_HARD_REG_BIT (crtl->asm_clobbers, 
HARD_FRAME_POINTER_REGNUM))
-	{
-	  SET_HARD_REG_BIT (eliminable_regset, HARD_FRAME_POINTER_REGNUM);
-	  if (frame_pointer_needed)
-	    SET_HARD_REG_BIT (ira_no_alloc_regs, HARD_FRAME_POINTER_REGNUM);
-	}
-      else if (frame_pointer_needed)
-	error ("%s cannot be used in %<asm%> here",
-	       reg_names[HARD_FRAME_POINTER_REGNUM]);
-      else
-	df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
+      for (i = 0; i < fp_reg_count; i++)
+	if (!TEST_HARD_REG_BIT (crtl->asm_clobbers,
+				HARD_FRAME_POINTER_REGNUM + i))
+	  {
+	    SET_HARD_REG_BIT (eliminable_regset,
+			      HARD_FRAME_POINTER_REGNUM + i);
+	    if (frame_pointer_needed)
+	      SET_HARD_REG_BIT (ira_no_alloc_regs,
+				HARD_FRAME_POINTER_REGNUM + i);
+	  }
+	else if (frame_pointer_needed)
+	  error ("%s cannot be used in %<asm%> here",
+		 reg_names[HARD_FRAME_POINTER_REGNUM + i]);
+	else
+	  df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);
      }
  }
diff mbox series

Patch

diff --git a/gcc/ira.c b/gcc/ira.c
index 9f8da67..25e9359 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -515,7 +515,13 @@  setup_alloc_regs (bool use_hard_frame_p)
  #endif
    no_unit_alloc_regs = fixed_nonglobal_reg_set;
    if (! use_hard_frame_p)
-    SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
+    {
+      int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM, 
Pmode);
+      for (int reg = HARD_FRAME_POINTER_REGNUM;
+	   reg < HARD_FRAME_POINTER_REGNUM + fp_reg_count;
+	   reg++)
+	SET_HARD_REG_BIT (no_unit_alloc_regs, reg);
+    }
    setup_class_hard_regs ();
  }