diff mbox

[AArch64] Split X-reg UBFIZ into W-reg LSL when possible

Message ID 5849294D.6040003@foss.arm.com
State New
Headers show

Commit Message

Kyrill Tkachov Dec. 8, 2016, 9:35 a.m. UTC
Hi all,

Similar to the previous patch this transforms X-reg UBFIZ instructions into W-reg LSL instructions
when the UBFIZ operands add up to 32, so we can take advantage of the implicit zero-extension to DImode
when writing to a W-register.

This is done by splitting the existing *andim_ashift<mode>_bfi pattern into its two SImode and DImode
specialisations and changing the DImode pattern into a define_insn_and_split that splits into a
zero-extended SImode ashift when the operands match up.

So for the code in the testcase we generate:
LSL     W0, W0, 5

instead of:
UBFIZ   X0, X0, 5, 27

Bootstrapped and tested on aarch64-none-linux-gnu.

Since we're in stage 3 perhaps this is not for GCC 6, but it is fairly low risk.
I'm happy for it to wait for the next release if necessary.

Thanks,
Kyrill

2016-12-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.md (*andim_ashift<mode>_bfiz): Split into...
     (*andim_ashiftsi_bfiz): ...This...
     (*andim_ashiftdi_bfiz): ...And this.  Add split to ashift when
     possible.

2016-12-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * gcc.target/aarch64/ubfiz_lsl_1.c: New test.

Comments

James Greenhalgh Dec. 15, 2016, 11:56 a.m. UTC | #1
On Thu, Dec 08, 2016 at 09:35:09AM +0000, Kyrill Tkachov wrote:
> Hi all,
> 
> Similar to the previous patch this transforms X-reg UBFIZ instructions into
> W-reg LSL instructions when the UBFIZ operands add up to 32, so we can take
> advantage of the implicit zero-extension to DImode
> when writing to a W-register.
> 
> This is done by splitting the existing *andim_ashift<mode>_bfi pattern into
> its two SImode and DImode specialisations and changing the DImode pattern
> into a define_insn_and_split that splits into a
> zero-extended SImode ashift when the operands match up.
> 
> So for the code in the testcase we generate:
> LSL     W0, W0, 5
> 
> instead of:
> UBFIZ   X0, X0, 5, 27
> 
> Bootstrapped and tested on aarch64-none-linux-gnu.
> 
> Since we're in stage 3 perhaps this is not for GCC 6, but it is fairly low
> risk.  I'm happy for it to wait for the next release if necessary.

My comments on the previous patch also apply here. This patch should only
need to add one new split pattern.

Thanks,
James

> 
> Thanks,
> Kyrill
> 
> 2016-12-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     * config/aarch64/aarch64.md (*andim_ashift<mode>_bfiz): Split into...
>     (*andim_ashiftsi_bfiz): ...This...
>     (*andim_ashiftdi_bfiz): ...And this.  Add split to ashift when
>     possible.
> 
> 2016-12-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     * gcc.target/aarch64/ubfiz_lsl_1.c: New test.
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a6f659c26bb5156d652b6c1f09123e682e9ff648..d1083381876572616a61f8f59d523f258dd077f4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4459,13 +4459,33 @@  (define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
 
 ;; XXX We should match (any_extend (ashift)) here, like (and (ashift)) below
 
-(define_insn "*andim_ashift<mode>_bfiz"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+(define_insn "*andim_ashiftsi_bfiz"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "n"))
+		 (match_operand 3 "const_int_operand" "n")))]
+  "aarch64_mask_and_shift_for_ubfiz_p (SImode, operands[3], operands[2])"
+  "ubfiz\\t%w0, %w1, %2, %P3"
+  [(set_attr "type" "bfx")]
+)
+
+(define_insn_and_split "*andim_ashiftdi_bfiz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
 			     (match_operand 2 "const_int_operand" "n"))
 		 (match_operand 3 "const_int_operand" "n")))]
-  "aarch64_mask_and_shift_for_ubfiz_p (<MODE>mode, operands[3], operands[2])"
-  "ubfiz\\t%<w>0, %<w>1, %2, %P3"
+  "aarch64_mask_and_shift_for_ubfiz_p (DImode, operands[3], operands[2])"
+  "ubfiz\\t%x0, %x1, %2, %P3"
+  ;; When the bitposition and width of the equivalent extraction add up to 32
+  ;; we can use a W-reg LSL instruction taking advantage of the implicit
+  ;; zero-extension of the X-reg.
+  "&& (INTVAL (operands[2]) + popcount_hwi (INTVAL (operands[3])))
+      == GET_MODE_BITSIZE (SImode)"
+  [(set (match_dup 0)
+	(zero_extend:DI (ashift:SI (match_dup 4) (match_dup 2))))]
+  {
+    operands[4] = gen_lowpart (SImode, operands[1]);
+  }
   [(set_attr "type" "bfx")]
 )
 
diff --git a/gcc/testsuite/gcc.target/aarch64/ubfiz_lsl_1.c b/gcc/testsuite/gcc.target/aarch64/ubfiz_lsl_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..d3fd3f234f2324d71813298210fdcf0660ac45b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ubfiz_lsl_1.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Check that an X-reg UBFIZ can be simplified into a W-reg LSL.  */
+
+long long
+f2 (long long x)
+{
+  return (x << 5) & 0xffffffff;
+}
+
+/* { dg-final { scan-assembler "lsl\tw" } } */
+/* { dg-final { scan-assembler-not "ubfiz\tx" } } */