diff mbox

Handle ASHIFTRT with constant shift count >= BITS_PER_WORD in subreg lowering (PR rtl-optimization/50339)

Message ID 20130221164240.GE1215@tucnak.zalov.cz
State New
Headers show

Commit Message

Jakub Jelinek Feb. 21, 2013, 4:42 p.m. UTC
Hi!

This patch teaches lower-subreg pass to also handle ASHIFTRTs with
BITS_PER_WORD to 2*BITS_PER_WORD-1 constant shift counts, like it already
handles similar LSHIFTRTs.
While for LSHIFTRT we should zero the upper half, for ASHIFTRT we either
should set it to upper source half >> (BITS_PER_WORD-1), or for
shifts by >> (2*BITS_PER_WORD-1) we can optimize that to one shift
followed by copying it from the lower to the upper half.

On the testcase from the PR this removes 3 unnecessary moves, so we are one
more better than 4.7 (thus fix a regression), and on the other testcases
either we generated the same quality of code, or better.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-02-21  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/50339
	* lower-subreg.h (struct lower_subreg_choices): Add splitting_ashiftrt
	field.
	* lower-subreg.c (compute_splitting_shift): Handle ASHIFTRT.
	(compute_costs): Call compute_splitting_shift also for ASHIFTRT
	into splitting_ashiftrt field.
	(find_decomposable_shift_zext, resolve_shift_zext): Handle also
	ASHIFTRT.
	(dump_choices): Fix up printing LSHIFTRT choices, print ASHIFTRT
	choices.


	Jakub

Comments

Richard Henderson Feb. 21, 2013, 6:40 p.m. UTC | #1
On 02/21/2013 08:42 AM, Jakub Jelinek wrote:
> @@ -1243,12 +1258,20 @@ resolve_shift_zext (rtx insn)
>    dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
>                                            GET_MODE (SET_DEST (set)),
>                                            offset1);
> -  dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
> -                                           GET_MODE (SET_DEST (set)),
> -                                           offset2);
> +  dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
> +					    GET_MODE (SET_DEST (set)),
> +					    offset2);
>    src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
>                                           GET_MODE (op_operand),
>                                           src_offset);
> +  if (GET_CODE (op) == ASHIFTRT
> +      && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
> +    {
> +      rtx tem = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
> +			      BITS_PER_WORD - 1, dest_upper, 0);
> +      if (dest_upper != tem)
> +	emit_move_insn (dest_upper, tem);
> +    }
>    if (GET_CODE (op) != ZERO_EXTEND)
>      {
>        int shift_count = INTVAL (XEXP (op, 1));
> @@ -1257,12 +1280,15 @@ resolve_shift_zext (rtx insn)
>  				LSHIFT_EXPR : RSHIFT_EXPR,
>  				word_mode, src_reg,
>  				shift_count - BITS_PER_WORD,
> -				dest_reg, 1);
> +				dest_reg, GET_CODE (op) != ASHIFTRT);
>      }
>  
>    if (dest_reg != src_reg)
>      emit_move_insn (dest_reg, src_reg);
> -  emit_move_insn (dest_zero, CONST0_RTX (word_mode));
> +  if (GET_CODE (op) != ASHIFTRT)
> +    emit_move_insn (dest_upper, CONST0_RTX (word_mode));
> +  else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
> +    emit_move_insn (dest_upper, copy_rtx (src_reg));
>    insns = get_insns ();

Am I missing something?  This looks like it would clobber the input too
early in the case of

	(set (reg:DI x)	(ashiftrt (reg:DI x) (const_int 60)))

where src_reg and dest_upper could resolve to the same concatn, and thus
the same SImode registers underneath?

Don't you need to delay that upper copy til the final block?


r~
diff mbox

Patch

--- gcc/lower-subreg.h.jj	2013-02-21 14:10:39.033592663 +0100
+++ gcc/lower-subreg.h	2013-02-21 15:26:18.773634801 +0100
@@ -34,6 +34,7 @@  struct lower_subreg_choices {
      should be split.  */
   bool splitting_ashift[MAX_BITS_PER_WORD];
   bool splitting_lshiftrt[MAX_BITS_PER_WORD];
+  bool splitting_ashiftrt[MAX_BITS_PER_WORD];
 
   /* True if there is at least one mode that is worth splitting.  */
   bool something_to_do;
--- gcc/lower-subreg.c.jj	2013-02-21 14:10:38.975592966 +0100
+++ gcc/lower-subreg.c	2013-02-21 15:27:15.114316148 +0100
@@ -57,9 +57,9 @@  along with GCC; see the file COPYING3.
    to do this.
 
    This pass only splits moves with modes that are wider than
-   word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
-   modes that are twice the width of word_mode.  The latter could be
-   generalized if there was a need to do this, but the trend in
+   word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
+   integer modes that are twice the width of word_mode.  The latter
+   could be generalized if there was a need to do this, but the trend in
    architectures is to not need this.
 
    There are two useful preprocessor defines for use by maintainers:
@@ -152,7 +152,7 @@  compute_splitting_shift (bool speed_p, s
 			 bool *splitting, enum rtx_code code,
 			 int word_move_zero_cost, int word_move_cost)
 {
-  int wide_cost, narrow_cost, i;
+  int wide_cost, narrow_cost, upper_cost, i;
 
   for (i = 0; i < BITS_PER_WORD; i++)
     {
@@ -163,13 +163,20 @@  compute_splitting_shift (bool speed_p, s
       else
 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
 
+      if (code != ASHIFTRT)
+	upper_cost = word_move_zero_cost;
+      else if (i == BITS_PER_WORD - 1)
+	upper_cost = word_move_cost;
+      else
+	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
+				 BITS_PER_WORD - 1);
+
       if (LOG_COSTS)
 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
-		 i + BITS_PER_WORD, wide_cost, narrow_cost,
-		 word_move_zero_cost);
+		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
 
-      if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
+      if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
 	splitting[i] = true;
     }
 }
@@ -248,6 +255,9 @@  compute_costs (bool speed_p, struct cost
       compute_splitting_shift (speed_p, rtxes,
 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
 			       word_move_zero_cost, word_move_cost);
+      compute_splitting_shift (speed_p, rtxes,
+			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
+			       word_move_zero_cost, word_move_cost);
     }
 }
 
@@ -1153,6 +1163,7 @@  find_decomposable_shift_zext (rtx insn,
   op = SET_SRC (set);
   if (GET_CODE (op) != ASHIFT
       && GET_CODE (op) != LSHIFTRT
+      && GET_CODE (op) != ASHIFTRT
       && GET_CODE (op) != ZERO_EXTEND)
     return false;
 
@@ -1173,6 +1184,8 @@  find_decomposable_shift_zext (rtx insn,
     {
       bool *splitting = (GET_CODE (op) == ASHIFT
 			 ? choices[speed_p].splitting_ashift
+			 : GET_CODE (op) == ASHIFTRT
+			 ? choices[speed_p].splitting_ashiftrt
 			 : choices[speed_p].splitting_lshiftrt);
       if (!CONST_INT_P (XEXP (op, 1))
 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
@@ -1200,7 +1213,7 @@  resolve_shift_zext (rtx insn)
   rtx op;
   rtx op_operand;
   rtx insns;
-  rtx src_reg, dest_reg, dest_zero;
+  rtx src_reg, dest_reg, dest_upper;
   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
 
   set = single_set (insn);
@@ -1210,6 +1223,7 @@  resolve_shift_zext (rtx insn)
   op = SET_SRC (set);
   if (GET_CODE (op) != ASHIFT
       && GET_CODE (op) != LSHIFTRT
+      && GET_CODE (op) != ASHIFTRT
       && GET_CODE (op) != ZERO_EXTEND)
     return NULL_RTX;
 
@@ -1223,7 +1237,8 @@  resolve_shift_zext (rtx insn)
   /* src_reg_num is the number of the word mode register which we
      are operating on.  For a left shift and a zero_extend on little
      endian machines this is register 0.  */
-  src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
+  src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
+		? 1 : 0;
 
   if (WORDS_BIG_ENDIAN
       && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
@@ -1243,12 +1258,20 @@  resolve_shift_zext (rtx insn)
   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
                                           GET_MODE (SET_DEST (set)),
                                           offset1);
-  dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
-                                           GET_MODE (SET_DEST (set)),
-                                           offset2);
+  dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
+					    GET_MODE (SET_DEST (set)),
+					    offset2);
   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
                                          GET_MODE (op_operand),
                                          src_offset);
+  if (GET_CODE (op) == ASHIFTRT
+      && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
+    {
+      rtx tem = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
+			      BITS_PER_WORD - 1, dest_upper, 0);
+      if (dest_upper != tem)
+	emit_move_insn (dest_upper, tem);
+    }
   if (GET_CODE (op) != ZERO_EXTEND)
     {
       int shift_count = INTVAL (XEXP (op, 1));
@@ -1257,12 +1280,15 @@  resolve_shift_zext (rtx insn)
 				LSHIFT_EXPR : RSHIFT_EXPR,
 				word_mode, src_reg,
 				shift_count - BITS_PER_WORD,
-				dest_reg, 1);
+				dest_reg, GET_CODE (op) != ASHIFTRT);
     }
 
   if (dest_reg != src_reg)
     emit_move_insn (dest_reg, src_reg);
-  emit_move_insn (dest_zero, CONST0_RTX (word_mode));
+  if (GET_CODE (op) != ASHIFTRT)
+    emit_move_insn (dest_upper, CONST0_RTX (word_mode));
+  else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
+    emit_move_insn (dest_upper, copy_rtx (src_reg));
   insns = get_insns ();
 
   end_sequence ();
@@ -1328,7 +1354,8 @@  dump_choices (bool speed_p, const char *
 	   GET_MODE_NAME (twice_word_mode));
 
   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
-  dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
+  dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
+  dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
   fprintf (dump_file, "\n");
 }