diff mbox

[AArch64,5/5] Improve immediate generation

Message ID 000d01d0e57b$fd70a360$f851ea20$@com
State New
Headers show

Commit Message

Wilco Sept. 2, 2015, 12:36 p.m. UTC
Cleanup the remainder of aarch64_internal_mov_immediate. Compute the number of 16-bit aligned 16-bit
masks that are all-zeroes or all-ones, and emit the smallest sequence using a single loop skipping
either all-ones or all-zeroes.

Passes GCC regression tests/bootstrap. Minor changes in generated code for some special cases but
codesize is identical.

ChangeLog:
2015-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

	* gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate):
	Cleanup immediate generation code.

---
 gcc/config/aarch64/aarch64.c | 137 ++++++++++++-------------------------------
 1 file changed, 39 insertions(+), 98 deletions(-)

Comments

James Greenhalgh Sept. 18, 2015, 2:32 p.m. UTC | #1
On Wed, Sep 02, 2015 at 01:36:28PM +0100, Wilco Dijkstra wrote:
> Cleanup the remainder of aarch64_internal_mov_immediate. Compute the number
> of 16-bit aligned 16-bit masks that are all-zeroes or all-ones, and emit the
> smallest sequence using a single loop skipping either all-ones or all-zeroes.
> 
> Passes GCC regression tests/bootstrap. Minor changes in generated code for
> some special cases but codesize is identical.

OK.

Thanks,
James

> 
> ChangeLog:
> 2015-09-02  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate):
> 	Cleanup immediate generation code.
> 
> ---
>  gcc/config/aarch64/aarch64.c | 137 ++++++++++++-------------------------------
>  1 file changed, 39 insertions(+), 98 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index af9a3d3..ca4428a 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1367,75 +1367,42 @@ static int
>  aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>  				machine_mode mode)
>  {
> -  unsigned HOST_WIDE_INT mask;
>    int i;
> -  bool first;
> -  unsigned HOST_WIDE_INT val, val2;
> -  int one_match, zero_match, first_not_ffff_match;
> -  int num_insns = 0;
> +  unsigned HOST_WIDE_INT val, val2, mask;
> +  int one_match, zero_match;
> +  int num_insns;
>  
> -  if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
> +  val = INTVAL (imm);
> +
> +  if (aarch64_move_imm (val, mode))
>      {
>        if (generate)
>  	emit_insn (gen_rtx_SET (dest, imm));
> -      num_insns++;
> -      return num_insns;
> +      return 1;
>      }
>  
> -  if (mode == SImode)
> +  if ((val >> 32) == 0 || mode == SImode)
>      {
> -      /* We know we can't do this in 1 insn, and we must be able to do it
> -	 in two; so don't mess around looking for sequences that don't buy
> -	 us anything.  */
>        if (generate)
>  	{
> -	  emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
> -	  emit_insn (gen_insv_immsi (dest, GEN_INT (16),
> -				     GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
> +	  emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
> +	  if (mode == SImode)
> +	    emit_insn (gen_insv_immsi (dest, GEN_INT (16),
> +				       GEN_INT ((val >> 16) & 0xffff)));
> +	  else
> +	    emit_insn (gen_insv_immdi (dest, GEN_INT (16),
> +				       GEN_INT ((val >> 16) & 0xffff)));
>  	}
> -      num_insns += 2;
> -      return num_insns;
> +      return 2;
>      }
>  
>    /* Remaining cases are all for DImode.  */
>  
> -  val = INTVAL (imm);
> -
> -  one_match = 0;
> -  zero_match = 0;
>    mask = 0xffff;
> -  first_not_ffff_match = -1;
> -
> -  for (i = 0; i < 64; i += 16, mask <<= 16)
> -    {
> -      if ((val & mask) == mask)
> -	one_match++;
> -      else
> -	{
> -	  if (first_not_ffff_match < 0)
> -	    first_not_ffff_match = i;
> -	  if ((val & mask) == 0)
> -	    zero_match++;
> -	}
> -    }
> -
> -  if (one_match == 2)
> -    {
> -      /* Set one of the quarters and then insert back into result.  */
> -      mask = 0xffffll << first_not_ffff_match;
> -      if (generate)
> -	{
> -	  emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
> -	  emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
> -				     GEN_INT ((val >> first_not_ffff_match)
> -					      & 0xffff)));
> -	}
> -      num_insns += 2;
> -      return num_insns;
> -    }
> -
> -  if (zero_match == 2)
> -    goto simple_sequence;
> +  zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
> +    ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
> +  one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
> +    ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
>  
>    if (zero_match != 2 && one_match != 2)
>      {
> @@ -1463,58 +1430,32 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>  	    {
>  	      emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
>  	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> -			 GEN_INT ((val >> i) & 0xffff)));
> +					 GEN_INT ((val >> i) & 0xffff)));
>  	    }
> -	  return 2;
>  	}
>      }
>  
> -  if (one_match > zero_match)
> -    {
> -      /* Set either first three quarters or all but the third.	 */
> -      mask = 0xffffll << (16 - first_not_ffff_match);
> -      if (generate)
> -	emit_insn (gen_rtx_SET (dest,
> -				GEN_INT (val | mask | 0xffffffff00000000ull)));
> -      num_insns ++;
> +  /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
> +     are emitted by the initial mov.  If one_match > zero_match, skip set bits,
> +     otherwise skip zero bits.  */
>  
> -      /* Now insert other two quarters.	 */
> -      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
> -	   i < 64; i += 16, mask <<= 16)
> -	{
> -	  if ((val & mask) != mask)
> -	    {
> -	      if (generate)
> -		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> -					   GEN_INT ((val >> i) & 0xffff)));
> -	      num_insns ++;
> -	    }
> -	}
> -      return num_insns;
> -    }
> -
> - simple_sequence:
> -  first = true;
> +  num_insns = 1;
>    mask = 0xffff;
> -  for (i = 0; i < 64; i += 16, mask <<= 16)
> +  val2 = one_match > zero_match ? ~val : val;
> +  i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
> +
> +  if (generate)
> +    emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
> +					   ? (val | ~(mask << i))
> +					   : (val & (mask << i)))));
> +  for (i += 16; i < 64; i += 16)
>      {
> -      if ((val & mask) != 0)
> -	{
> -	  if (first)
> -	    {
> -	      if (generate)
> -		emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
> -	      num_insns ++;
> -	      first = false;
> -	    }
> -	  else
> -	    {
> -	      if (generate)
> -		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> -					   GEN_INT ((val >> i) & 0xffff)));
> -	      num_insns ++;
> -	    }
> -	}
> +      if ((val2 & (mask << i)) == 0)
> +	continue;
> +      if (generate)
> +	emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> +				   GEN_INT ((val >> i) & 0xffff)));
> +      num_insns ++;
>      }
>  
>    return num_insns;
> -- 
> 1.8.3
> 
> 
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index af9a3d3..ca4428a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1367,75 +1367,42 @@  static int
 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
 				machine_mode mode)
 {
-  unsigned HOST_WIDE_INT mask;
   int i;
-  bool first;
-  unsigned HOST_WIDE_INT val, val2;
-  int one_match, zero_match, first_not_ffff_match;
-  int num_insns = 0;
+  unsigned HOST_WIDE_INT val, val2, mask;
+  int one_match, zero_match;
+  int num_insns;
 
-  if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
+  val = INTVAL (imm);
+
+  if (aarch64_move_imm (val, mode))
     {
       if (generate)
 	emit_insn (gen_rtx_SET (dest, imm));
-      num_insns++;
-      return num_insns;
+      return 1;
     }
 
-  if (mode == SImode)
+  if ((val >> 32) == 0 || mode == SImode)
     {
-      /* We know we can't do this in 1 insn, and we must be able to do it
-	 in two; so don't mess around looking for sequences that don't buy
-	 us anything.  */
       if (generate)
 	{
-	  emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
-	  emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-				     GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
+	  emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
+	  if (mode == SImode)
+	    emit_insn (gen_insv_immsi (dest, GEN_INT (16),
+				       GEN_INT ((val >> 16) & 0xffff)));
+	  else
+	    emit_insn (gen_insv_immdi (dest, GEN_INT (16),
+				       GEN_INT ((val >> 16) & 0xffff)));
 	}
-      num_insns += 2;
-      return num_insns;
+      return 2;
     }
 
   /* Remaining cases are all for DImode.  */
 
-  val = INTVAL (imm);
-
-  one_match = 0;
-  zero_match = 0;
   mask = 0xffff;
-  first_not_ffff_match = -1;
-
-  for (i = 0; i < 64; i += 16, mask <<= 16)
-    {
-      if ((val & mask) == mask)
-	one_match++;
-      else
-	{
-	  if (first_not_ffff_match < 0)
-	    first_not_ffff_match = i;
-	  if ((val & mask) == 0)
-	    zero_match++;
-	}
-    }
-
-  if (one_match == 2)
-    {
-      /* Set one of the quarters and then insert back into result.  */
-      mask = 0xffffll << first_not_ffff_match;
-      if (generate)
-	{
-	  emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
-	  emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
-				     GEN_INT ((val >> first_not_ffff_match)
-					      & 0xffff)));
-	}
-      num_insns += 2;
-      return num_insns;
-    }
-
-  if (zero_match == 2)
-    goto simple_sequence;
+  zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
+    ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
+  one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
+    ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
 
   if (zero_match != 2 && one_match != 2)
     {
@@ -1463,58 +1430,32 @@  aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
 	    {
 	      emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
 	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-			 GEN_INT ((val >> i) & 0xffff)));
+					 GEN_INT ((val >> i) & 0xffff)));
 	    }
-	  return 2;
 	}
     }
 
-  if (one_match > zero_match)
-    {
-      /* Set either first three quarters or all but the third.	 */
-      mask = 0xffffll << (16 - first_not_ffff_match);
-      if (generate)
-	emit_insn (gen_rtx_SET (dest,
-				GEN_INT (val | mask | 0xffffffff00000000ull)));
-      num_insns ++;
+  /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
+     are emitted by the initial mov.  If one_match > zero_match, skip set bits,
+     otherwise skip zero bits.  */
 
-      /* Now insert other two quarters.	 */
-      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
-	   i < 64; i += 16, mask <<= 16)
-	{
-	  if ((val & mask) != mask)
-	    {
-	      if (generate)
-		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-					   GEN_INT ((val >> i) & 0xffff)));
-	      num_insns ++;
-	    }
-	}
-      return num_insns;
-    }
-
- simple_sequence:
-  first = true;
+  num_insns = 1;
   mask = 0xffff;
-  for (i = 0; i < 64; i += 16, mask <<= 16)
+  val2 = one_match > zero_match ? ~val : val;
+  i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
+
+  if (generate)
+    emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
+					   ? (val | ~(mask << i))
+					   : (val & (mask << i)))));
+  for (i += 16; i < 64; i += 16)
     {
-      if ((val & mask) != 0)
-	{
-	  if (first)
-	    {
-	      if (generate)
-		emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
-	      num_insns ++;
-	      first = false;
-	    }
-	  else
-	    {
-	      if (generate)
-		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-					   GEN_INT ((val >> i) & 0xffff)));
-	      num_insns ++;
-	    }
-	}
+      if ((val2 & (mask << i)) == 0)
+	continue;
+      if (generate)
+	emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+				   GEN_INT ((val >> i) & 0xffff)));
+      num_insns ++;
     }
 
   return num_insns;