Patchwork [RFC] Fix PR rtl-optimization/54315 (partially)

login
register
mail settings
Submitter Eric Botcazou
Date Oct. 8, 2012, 11:28 a.m.
Message ID <9002359.9Aq014m8Xc@polaris>
Download mbox | patch
Permalink /patch/189997/
State New
Headers show

Comments

Eric Botcazou - Oct. 8, 2012, 11:28 a.m.
Hi,

this PR is about the other path in the RTL expander where a temporary is 
created on the stack for a value returned in registers: BLKmode structures 
returned in registers without PARALLELs, i.e. for which the back-end is 
permitted (but not required) to create (REG:BLK).  The canonical example is 
the ARM (and not the PA anymore, as it returns in PARALLELs these days) but 
x86-64 also returns small unions by means of this mechanism.

The idea is the same as for PARALLELs in:
  http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00811.html
i.e. to keep the returned value in pseudo-registers as long as possible.
copy_blkmode_from_reg is changed to require an explicit target and isn't 
invoked from expand_call, but from expand_assignment/store_expr/store_field 
instead where a target is available.  The following assertion is added:

  /* BLKmode registers created in the back-end shouldn't have survived.  */
  gcc_assert (mode != BLKmode);

despite the code in expand_call.  The GET_MODE (valreg) != BLKmode test in 
expand_call was added recently by Jakub, but is very likely superfluous since

  /* Register in which non-BLKmode value will be returned,
     or 0 if no value or if value is BLKmode.  */
  rtx valreg;

and hard_function_value fixes up (REG:BLK) coming from the back-ends.

The patch was fully tested on x86_64-suse-linux, where it removes half of the 
useless stores in the original testcase for PR rtl-optimization/54315, and 
manually tested for arm-linux-gnueabi (for now), where it also removes stores 
for small structures.  Comments?


2012-10-08  Eric Botcazou  <ebotcazou@adacore.com>

	* calls.c (expand_call): Don't deal specifically with BLKmode values
	returned in naked registers.
	* expr.h (copy_blkmode_from_reg): Adjust prototype.
	* expr.c (copy_blkmode_from_reg): Rename first parameter into TARGET and
	make it required.  Assert that SRCREG hasn't BLKmode.  Add a couple of 
	short-circuits for common cases and be prepared for sub-word registers.
	(expand_assignment): Call copy_blkmode_from_reg for BLKmode values
	returned in naked registers.
	(store_expr): Likewise.
	(store_field): Likewise.
Eric Botcazou - Oct. 20, 2012, 8:56 p.m.
> The patch was fully tested on x86_64-suse-linux, where it removes half of
> the useless stores in the original testcase for PR rtl-optimization/54315,
> and manually tested for arm-linux-gnueabi (for now), where it also removes
> stores for small structures.  Comments?
>
> 2012-10-08  Eric Botcazou  <ebotcazou@adacore.com>
> 
> 	* calls.c (expand_call): Don't deal specifically with BLKmode values
> 	returned in naked registers.
> 	* expr.h (copy_blkmode_from_reg): Adjust prototype.
> 	* expr.c (copy_blkmode_from_reg): Rename first parameter into TARGET and
> 	make it required.  Assert that SRCREG hasn't BLKmode.  Add a couple of
> 	short-circuits for common cases and be prepared for sub-word registers.
> 	(expand_assignment): Call copy_blkmode_from_reg for BLKmode values
> 	returned in naked registers.
> 	(store_expr): Likewise.
> 	(store_field): Likewise.

Applied after testing on x86_64-suse-linux and mips64el-linux-gnu, where it 
also removes stores for small structures (at least for n32).

Patch

Index: expr.h
===================================================================
--- expr.h	(revision 192137)
+++ expr.h	(working copy)
@@ -335,7 +335,7 @@  extern rtx emit_group_move_into_temps (r
 extern void emit_group_store (rtx, rtx, tree, int);
 
 /* Copy BLKmode object from a set of registers.  */
-extern rtx copy_blkmode_from_reg (rtx, rtx, tree);
+extern void copy_blkmode_from_reg (rtx, rtx, tree);
 
 /* Mark REG as holding a parameter for the next CALL_INSN.
    Mode is TYPE_MODE of the non-promoted parameter, or VOIDmode.  */
Index: expr.c
===================================================================
--- expr.c	(revision 192137)
+++ expr.c	(working copy)
@@ -2086,39 +2086,23 @@  emit_group_store (rtx orig_dst, rtx src,
     emit_move_insn (orig_dst, dst);
 }
 
-/* Generate code to copy a BLKmode object of TYPE out of a
-   set of registers starting with SRCREG into TGTBLK.  If TGTBLK
-   is null, a stack temporary is created.  TGTBLK is returned.
-
-   The purpose of this routine is to handle functions that return
-   BLKmode structures in registers.  Some machines (the PA for example)
-   want to return all small structures in registers regardless of the
-   structure's alignment.  */
+/* Copy a BLKmode object of TYPE out of a register SRCREG into TARGET.
 
-rtx
-copy_blkmode_from_reg (rtx tgtblk, rtx srcreg, tree type)
+   This is used on targets that return BLKmode values in registers.  */
+
+void
+copy_blkmode_from_reg (rtx target, rtx srcreg, tree type)
 {
   unsigned HOST_WIDE_INT bytes = int_size_in_bytes (type);
   rtx src = NULL, dst = NULL;
   unsigned HOST_WIDE_INT bitsize = MIN (TYPE_ALIGN (type), BITS_PER_WORD);
   unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0;
+  enum machine_mode mode = GET_MODE (srcreg);
+  enum machine_mode tmode = GET_MODE (target);
   enum machine_mode copy_mode;
 
-  if (tgtblk == 0)
-    {
-      tgtblk = assign_temp (build_qualified_type (type,
-						  (TYPE_QUALS (type)
-						   | TYPE_QUAL_CONST)),
-			    1, 1);
-      preserve_temp_slots (tgtblk);
-    }
-
-  /* This code assumes srcreg is at least a full word.  If it isn't, copy it
-     into a new pseudo which is a full word.  */
-
-  if (GET_MODE (srcreg) != BLKmode
-      && GET_MODE_SIZE (GET_MODE (srcreg)) < UNITS_PER_WORD)
-    srcreg = convert_to_mode (word_mode, srcreg, TYPE_UNSIGNED (type));
+  /* BLKmode registers created in the back-end shouldn't have survived.  */
+  gcc_assert (mode != BLKmode);
 
   /* If the structure doesn't take up a whole number of words, see whether
      SRCREG is padded on the left or on the right.  If it's on the left,
@@ -2136,22 +2120,54 @@  copy_blkmode_from_reg (rtx tgtblk, rtx s
     padding_correction
       = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) * BITS_PER_UNIT));
 
+  /* We can use a single move if we have an exact mode for the size.  */
+  else if (MEM_P (target)
+	   && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (target))
+	       || MEM_ALIGN (target) >= GET_MODE_ALIGNMENT (mode))
+	   && bytes == GET_MODE_SIZE (mode))
+  {
+    emit_move_insn (adjust_address (target, mode, 0), srcreg);
+    return;
+  }
+
+  /* And if we additionally have the same mode for a register.  */
+  else if (REG_P (target)
+	   && GET_MODE (target) == mode
+	   && bytes == GET_MODE_SIZE (mode))
+  {
+    emit_move_insn (target, srcreg);
+    return;
+  }
+
+  /* This code assumes srcreg is at least a full word.  If it isn't, copy it
+     into a new pseudo which is a full word.  */
+  if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+    {
+      srcreg = convert_to_mode (word_mode, srcreg, TYPE_UNSIGNED (type));
+      mode = word_mode;
+    }
+
   /* Copy the structure BITSIZE bits at a time.  If the target lives in
      memory, take care of not reading/writing past its end by selecting
      a copy mode suited to BITSIZE.  This should always be possible given
      how it is computed.
 
+     If the target lives in register, make sure not to select a copy mode
+     larger than the mode of the register.
+
      We could probably emit more efficient code for machines which do not use
      strict alignment, but it doesn't seem worth the effort at the current
      time.  */
 
   copy_mode = word_mode;
-  if (MEM_P (tgtblk))
+  if (MEM_P (target))
     {
       enum machine_mode mem_mode = mode_for_size (bitsize, MODE_INT, 1);
       if (mem_mode != BLKmode)
 	copy_mode = mem_mode;
     }
+  else if (REG_P (target) && GET_MODE_BITSIZE (tmode) < BITS_PER_WORD)
+    copy_mode = tmode;
 
   for (bitpos = 0, xbitpos = padding_correction;
        bitpos < bytes * BITS_PER_UNIT;
@@ -2160,15 +2176,15 @@  copy_blkmode_from_reg (rtx tgtblk, rtx s
       /* We need a new source operand each time xbitpos is on a
 	 word boundary and when xbitpos == padding_correction
 	 (the first time through).  */
-      if (xbitpos % BITS_PER_WORD == 0
-	  || xbitpos == padding_correction)
-	src = operand_subword_force (srcreg, xbitpos / BITS_PER_WORD,
-				     GET_MODE (srcreg));
+      if (xbitpos % BITS_PER_WORD == 0 || xbitpos == padding_correction)
+	src = operand_subword_force (srcreg, xbitpos / BITS_PER_WORD, mode);
 
       /* We need a new destination operand each time bitpos is on
 	 a word boundary.  */
-      if (bitpos % BITS_PER_WORD == 0)
-	dst = operand_subword (tgtblk, bitpos / BITS_PER_WORD, 1, BLKmode);
+      if (REG_P (target) && GET_MODE_BITSIZE (tmode) < BITS_PER_WORD)
+	dst = target;
+      else if (bitpos % BITS_PER_WORD == 0)
+	dst = operand_subword (target, bitpos / BITS_PER_WORD, 1, tmode);
 
       /* Use xbitpos for the source extraction (right justified) and
 	 bitpos for the destination store (left justified).  */
@@ -2177,8 +2193,6 @@  copy_blkmode_from_reg (rtx tgtblk, rtx s
 					  xbitpos % BITS_PER_WORD, 1, false,
 					  NULL_RTX, copy_mode, copy_mode));
     }
-
-  return tgtblk;
 }
 
 /* Copy BLKmode value SRC into a register of mode MODE.  Return the
@@ -4883,7 +4897,12 @@  expand_assignment (tree to, tree from, b
 	emit_group_store (to_rtx, value, TREE_TYPE (from),
 			  int_size_in_bytes (TREE_TYPE (from)));
       else if (GET_MODE (to_rtx) == BLKmode)
-	emit_block_move (to_rtx, value, expr_size (from), BLOCK_OP_NORMAL);
+	{
+	  if (REG_P (value))
+	    copy_blkmode_from_reg (to_rtx, value, TREE_TYPE (from));
+	  else
+	    emit_block_move (to_rtx, value, expr_size (from), BLOCK_OP_NORMAL);
+	}
       else
 	{
 	  if (POINTER_TYPE_P (TREE_TYPE (to)))
@@ -5225,21 +5244,26 @@  store_expr (tree exp, rtx target, int ca
 	 supposed to be bit-copied or bit-initialized.  */
       && expr_size (exp) != const0_rtx)
     {
-      if (GET_MODE (temp) != GET_MODE (target)
-	  && GET_MODE (temp) != VOIDmode)
+      if (GET_MODE (temp) != GET_MODE (target) && GET_MODE (temp) != VOIDmode)
 	{
-	  int unsignedp = TYPE_UNSIGNED (TREE_TYPE (exp));
-	  if (GET_MODE (target) == BLKmode
-	      && GET_MODE (temp) == BLKmode)
-	    emit_block_move (target, temp, expr_size (exp),
-			     (call_param_p
-			      ? BLOCK_OP_CALL_PARM
-			      : BLOCK_OP_NORMAL));
-	  else if (GET_MODE (target) == BLKmode)
-	    store_bit_field (target, INTVAL (expr_size (exp)) * BITS_PER_UNIT,
-			     0, 0, 0, GET_MODE (temp), temp);
+	  if (GET_MODE (target) == BLKmode)
+	    {
+	      if (REG_P (temp))
+	        {
+		  if (TREE_CODE (exp) == CALL_EXPR)
+		    copy_blkmode_from_reg (target, temp, TREE_TYPE (exp));
+		  else
+		    store_bit_field (target,
+				     INTVAL (expr_size (exp)) * BITS_PER_UNIT,
+				     0, 0, 0, GET_MODE (temp), temp);
+		}
+	      else
+		emit_block_move (target, temp, expr_size (exp),
+				 (call_param_p
+				  ? BLOCK_OP_CALL_PARM : BLOCK_OP_NORMAL));
+	    }
 	  else
-	    convert_move (target, temp, unsignedp);
+	    convert_move (target, temp, TYPE_UNSIGNED (TREE_TYPE (exp)));
 	}
 
       else if (GET_MODE (temp) == BLKmode && TREE_CODE (exp) == STRING_CST)
@@ -6328,7 +6352,8 @@  store_field (rtx target, HOST_WIDE_INT b
      twice, once with emit_move_insn and once via store_field.  */
 
   if (mode == BLKmode
-      && (REG_P (target) || GET_CODE (target) == SUBREG))
+      && (REG_P (target) || GET_CODE (target) == SUBREG)
+      && TREE_CODE (exp) != CALL_EXPR)
     {
       rtx object = assign_temp (type, 1, 1);
       rtx blk_object = adjust_address (object, BLKmode, 0);
@@ -6477,6 +6502,16 @@  store_field (rtx target, HOST_WIDE_INT b
 	  temp = temp_target;
 	}
 
+      /* Handle calls that return BLKmode values in registers.  */
+      else if (mode == BLKmode
+	       && REG_P (temp)
+	       && TREE_CODE (exp) == CALL_EXPR)
+	{
+	  rtx temp_target = gen_reg_rtx (GET_MODE (temp));
+	  copy_blkmode_from_reg (temp_target, temp, TREE_TYPE (exp));
+	  temp = temp_target;
+	}
+
       /* Store the value in the bitfield.  */
       store_bit_field (target, bitsize, bitpos,
 		       bitregion_start, bitregion_end,
Index: calls.c
===================================================================
--- calls.c	(revision 192137)
+++ calls.c	(working copy)
@@ -3332,16 +3332,6 @@  expand_call (tree exp, rtx target, int i
 		sibcall_failure = 1;
 	    }
 	}
-      else if (TYPE_MODE (rettype) == BLKmode)
-	{
-	  rtx val = valreg;
-	  if (GET_MODE (val) != BLKmode)
-	    val = avoid_likely_spilled_reg (val);
-	  target = copy_blkmode_from_reg (target, val, rettype);
-
-	  /* We can not support sibling calls for this case.  */
-	  sibcall_failure = 1;
-	}
       else
 	target = copy_to_reg (avoid_likely_spilled_reg (valreg));