Patchwork ARM generate constants for non load scheduling arch patch

login
register
mail settings
Submitter sarah@hederstierna.com
Date April 4, 2011, 8:20 p.m.
Message ID <CE36BD26828FA5408B9F87E4DD2ACB0B92B7726494@MBXVS01.HMC.local>
Download mbox | patch
Permalink /patch/89706/
State New
Headers show

Comments

sarah@hederstierna.com - April 4, 2011, 8:20 p.m.
Hi!

I was recently cleaning my old sources attic, and I found this old GCC-patch for ARM that never was contributed. First I was thinking to just delete it, but then I thought that maybe someone can make use it, or be inspired to do something with it.

The patch is 5-6 years old so I really think it needs checking again for correctness by some ARM maintainer.

The idea is that when generating constants for ARM, then on architectures where LDR is very expensive and optimizing for speed, it can be faster to allow adding some more instructions to avoid LDR-instructions.

For example ARM7TDMI-S core without load scheduling support, when optimizing with -O3 we could allow some more instructions to get better pipeline efficiency and faster code without LDR.
(Also some instructions like shifts could potentially be merged into the following intructions.)

Please check to patch if you are interested, it contains first a section where I added a check for non-load-scheduling target, then three new separated different constant-generating algorithms that could be added stand-alone.

And again, the patches are very old I cannot guarantee its 100% up to date.
Check it out if your are interested and use with care :)

Thanks and Best Regards,

Fredrik Hederstierna
Securitas Direct AB
Malmoe SWEDEN

Patch

Index: gcc/config/arm/arm.c
===================================================================
*** gcc/config/arm/arm.c	(revision 171910)
--- gcc/config/arm/arm.c	(working copy)
*************** arm_split_constant (enum rtx_code code, 
*** 2535,2576 ****
  	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
        */
        if (!after_arm_reorg
! 	  && !cond
! 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
! 				1, 0)
! 	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
! 		 + (code != SET))))
! 	{
! 	  if (code == SET)
! 	    {
! 	      /* Currently SET is the only monadic value for CODE, all
! 		 the rest are diadic.  */
! 	      if (TARGET_USE_MOVT)
! 		arm_emit_movpair (target, GEN_INT (val));
! 	      else
! 		emit_set_insn (target, GEN_INT (val));
! 
! 	      return 1;
! 	    }
! 	  else
! 	    {
! 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
! 
! 	      if (TARGET_USE_MOVT)
! 		arm_emit_movpair (temp, GEN_INT (val));
! 	      else
! 		emit_set_insn (temp, GEN_INT (val));
  
! 	      /* For MINUS, the value is subtracted from, since we never
! 		 have subtraction of a constant.  */
! 	      if (code == MINUS)
! 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
! 	      else
! 		emit_set_insn (target,
! 			       gen_rtx_fmt_ee (code, mode, source, temp));
! 	      return 2;
! 	    }
! 	}
      }
  
    return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
--- 2535,2589 ----
  	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
        */
        if (!after_arm_reorg
! 	  && !cond)
!         {
!           bool size_p = optimize_function_for_size_p (cfun);
!           int constant_insn_limit = arm_constant_limit (size_p) + (code != SET);
!           
!           /* If not optimize for size and set constant code,
!              then allow two extra insn if not load scheduling supported */
!           if ((!size_p) && (optimize > 2)
!               && (code == SET)
!               && (constant_insn_limit == 1)
!               && !arm_ld_sched)
!             {
!               constant_insn_limit += 2;
!             }
  
! 	  if (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
! 				1, 0) > constant_insn_limit)
!             {
!               if (code == SET)
!                 {
!                   /* Currently SET is the only monadic value for CODE, all
!                      the rest are diadic.  */
!                   if (TARGET_USE_MOVT)
!                     arm_emit_movpair (target, GEN_INT (val));
!                   else
!                     emit_set_insn (target, GEN_INT (val));
!                   
!                   return 1;
!                 }
!               else
!                 {
!                   rtx temp = subtargets ? gen_reg_rtx (mode) : target;
!                   
!                   if (TARGET_USE_MOVT)
!                     arm_emit_movpair (temp, GEN_INT (val));
!                   else
!                     emit_set_insn (temp, GEN_INT (val));
!                   
!                   /* For MINUS, the value is subtracted from, since we never
!                      have subtraction of a constant.  */
!                   if (code == MINUS)
!                     emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
!                   else
!                     emit_set_insn (target,
!                                    gen_rtx_fmt_ee (code, mode, source, temp));
!                   return 2;
!                 }
!             }
!         }
      }
  
    return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
*************** arm_gen_constant (enum rtx_code code, en
*** 2884,2889 ****
--- 2897,2929 ----
  	  return 1;
  	}
  
+       /* See of we can generate this by set 8 bits and then shift. */
+ 
+       /* For ARM we can generate 8 bit value with even number of
+          shift steps, but we have a special case where we must need
+          an odd number of steps, example 0x7f800000 or 0x40800000.
+          Constants of the form eg. (0x00FF0000 >> 1).
+          Also the shift may well merge into a subsequent insn. */
+ 
+       /* If odd number leading zeros and 8 bit constant */
+       if ((clear_sign_bit_copies & 1) &&
+           ((clear_sign_bit_copies + clear_zero_bit_copies) == 24))
+         {
+           temp1 = ARM_SIGN_EXTEND (remainder << clear_sign_bit_copies);
+           
+           if (generate)
+             {
+               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+               emit_constant_insn (cond,
+                                   gen_rtx_SET (VOIDmode, new_src,
+                                                GEN_INT (temp1)));
+               emit_constant_insn (cond,
+                                   gen_lshrsi3 (target, new_src,
+                                                GEN_INT (clear_sign_bit_copies)));
+             }
+           return 2;                      
+         }
+ 
        /* See if we can do this by sign_extending a constant that is known
  	 to be negative.  This is a good, way of doing it, since the shift
  	 may well merge into a subsequent insn.  */
*************** arm_gen_constant (enum rtx_code code, en
*** 2959,2964 ****
--- 2999,3152 ----
  	    }
  	}
  
+       /* See of we can generate constant by set 8 bits and then shift in zeros
+          from left or right.
+          Eg. 0xfffefdf8 (using lsl) or 0x1ffefdff (using lsr)
+       */
+ 
+       /* If any leading zeros */
+       if (clear_sign_bit_copies > 0)
+         {
+           /* Shift up leading zeros */
+           temp1 = remainder << clear_sign_bit_copies;
+           /* Set lowest bits */
+           temp2 = (1 << clear_sign_bit_copies) - 1;
+           temp1 |= temp2;
+           
+           /* Try negate and check if value can be loaded into 8 bits */
+           temp2 = ~temp1;
+           /* Count and shift down trailing zeros */
+           for (i = 0; i <= 31; i++)
+             {
+               if (temp2 & 1)
+                 break;
+               else
+                 temp2 >>= 1;
+             }
+           
+           if ((temp2 < 256) || (((temp2 & 0xffff0000) == 0) &&
+                                 (temp2 & 0x8000)))
+             {
+ 	      if (generate)
+ 		{
+ 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+                   
+ 		  insns = arm_gen_constant (code, /* SET */
+                                             mode,
+                                             cond,
+ 					    temp1,
+ 					    new_src,
+                                             source,
+                                             subtargets,
+                                             1);
+ 		  source = new_src;
+ 		}
+ 	      else
+ 		{
+ 		  rtx new_src = subtargets ? NULL_RTX : target;
+ 
+ 		  insns = arm_gen_constant (code, /* SET */
+                                             mode,
+                                             cond,
+ 					    temp1,
+ 					    new_src,
+                                             source,
+                                             subtargets,
+                                             0);
+ 		  source = new_src;
+ 		}
+ 
+               if (generate)
+                 {
+                   /* rtx new_src = subtargets ? gen_reg_rtx (mode) : target; */
+                   rtx shift = GEN_INT (clear_sign_bit_copies);
+                   
+                   
+                   emit_constant_insn (cond,
+                                       gen_rtx_SET (VOIDmode,
+                                                    target,
+                                                    gen_rtx_LSHIFTRT (mode,
+                                                                      source,
+                                                                      shift)));
+                   
+                   /* emit_constant_insn (cond, gen_lshrsi3 (target, new_src, shift)); */
+                 }
+               
+               return insns + 1;
+             }
+         }
+       
+       /* If any trailing zeros */
+       if (clear_zero_bit_copies > 0)
+         {
+           /* Shift down trailing zeros */
+           temp1 = remainder >> clear_zero_bit_copies;
+           /* Set highest bits */
+           temp2 = (1 << clear_zero_bit_copies) - 1;
+           temp1 |= (temp2 << (32 - clear_zero_bit_copies));
+           
+           /* Try negate and check if value can be loaded into 8 bits */
+           temp2 = ~temp1;
+           /* Count and shift down trailing zeros */
+           for (i = 0; i <= 31; i++)
+             {
+               if (temp2 & 1)
+                 break;
+               else
+                 temp2 >>= 1;
+             }
+ 
+           if ((temp2 < 256) || (((temp2 & 0xffff0000) == 0) &&
+                                 (temp2 & 0x8000)))
+             {
+ 	      if (generate)
+ 		{
+ 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+                   
+ 		  insns = arm_gen_constant (code, /* SET */
+                                             mode,
+                                             cond,
+ 					    temp1,
+ 					    new_src,
+                                             source,
+                                             subtargets,
+                                             1);
+ 		  source = new_src;
+ 		}
+ 	      else
+ 		{
+ 		  rtx new_src = subtargets ? NULL_RTX : target;
+                   
+ 		  insns = arm_gen_constant (code, /* SET */
+                                             mode,
+                                             cond,
+ 					    temp1,
+ 					    new_src,
+                                             source,
+                                             subtargets,
+                                             0);
+ 		  source = new_src;
+ 		}
+ 
+               if (generate)
+                 {
+                   /* rtx new_src = subtargets ? gen_reg_rtx (mode) : target; */
+                   rtx shift = GEN_INT (clear_zero_bit_copies);
+                   
+                   emit_constant_insn (cond,
+                                       gen_rtx_SET (VOIDmode,
+                                                    target,
+                                                    gen_rtx_ASHIFT (mode,
+                                                                    source,
+                                                                    shift)));
+ 
+                   /* emit_constant_insn (cond, gen_lshrsi3 (target, new_src, shift)); */
+                 }
+               
+               return insns + 1;
+             }
+         }
+       
        /* See if we can generate this by setting the bottom (or the top)
  	 16 bits, and then shifting these into the other half of the
  	 word.  We only look for the simplest cases, to do more would cost
*************** arm_gen_constant (enum rtx_code code, en
*** 2991,2998 ****
  						     GEN_INT (i)),
  				     source)));
  		  return insns + 1;
! 		}
! 	    }
  
  	  /* Don't duplicate cases already considered.  */
  	  for (i = 17; i < 24; i++)
--- 3179,3211 ----
  						     GEN_INT (i)),
  				     source)));
  		  return insns + 1;
! 		}	    
! 
!               /* Also check cases where we could generate constant using XOR.
!                  eg. (0x11110000 + ~0xffff1111) */
!               else if (((((temp2 | 0xffff0000) ^ ((temp2 | 0xffff0000) << i))
!                          & 0xffffffff) == remainder)
!                        && !const_ok_for_arm (temp2 | 0xffff0000))
!                 {
!                   rtx new_src = (subtargets
!                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
!                                  : target);
!                   insns = arm_gen_constant (code, mode, cond,
!                                             (temp2 | 0xffff0000), new_src,
!                                             source, subtargets, generate);
!                   source = new_src;
!                   if (generate)
!                     emit_constant_insn
!                       (cond,
!                        gen_rtx_SET
!                        (VOIDmode, target,
!                         gen_rtx_XOR (mode,
!                                      gen_rtx_ASHIFT (mode, source,
!                                                      GEN_INT (i)),
!                                      source)));
!                   return insns + 1;
!                 }                            
!             }
  
  	  /* Don't duplicate cases already considered.  */
  	  for (i = 17; i < 24; i++)