diff mbox

[ARM] Optimise NotDI AND/OR ZeroExtendSI for ARMv7A

Message ID 20140319165354.GB28292@e104535-lin.arm.com
State New
Headers show

Commit Message

Ian Bolton March 19, 2014, 4:53 p.m. UTC
This is a follow-on patch to one already committed:
http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01128.html

It implements patterns to simplify our RTL as follows:

OR (Not:DI (A:DI), ZeroExtend:DI (B:SI))
  -->  the top half can be done with a MVN

AND (Not:DI (A:DI), ZeroExtend:DI (B:SI))
  -->  the top half becomes zero.

I've added test cases for both of these and also the existing
anddi_notdi patterns.  The tests all pass.

Full regression runs passed.

OK for stage 1?

Cheers,
Ian


2014-03-19  Ian Bolton  <ian.bolton@arm.com>

gcc/
	* config/arm/arm.md (*anddi_notdi_zesidi): New pattern
	* config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern.

testsuite/
	* gcc.target/arm/anddi_notdi-1.c: New test.
	* gcc.target/arm/iordi_notdi-1.c: New test case.

Comments

Richard Earnshaw March 21, 2014, 1:57 p.m. UTC | #1
On 19/03/14 16:53, Ian Bolton wrote:
> This is a follow-on patch to one already committed:
> http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01128.html
> 
> It implements patterns to simplify our RTL as follows:
> 
> OR (Not:DI (A:DI), ZeroExtend:DI (B:SI))
>   -->  the top half can be done with a MVN
> 
> AND (Not:DI (A:DI), ZeroExtend:DI (B:SI))
>   -->  the top half becomes zero.
> 
> I've added test cases for both of these and also the existing
> anddi_notdi patterns.  The tests all pass.
> 
> Full regression runs passed.
> 
> OK for stage 1?
> 
> Cheers,
> Ian
> 
> 
> 2014-03-19  Ian Bolton  <ian.bolton@arm.com>
> 
> gcc/
> 	* config/arm/arm.md (*anddi_notdi_zesidi): New pattern
> 	* config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern.
> 
> testsuite/
> 	* gcc.target/arm/anddi_notdi-1.c: New test.
> 	* gcc.target/arm/iordi_notdi-1.c: New test case.
> 
> 
> arm-and-ior-notdi-zeroextend-patch-v1.txt
> 
> 
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index 2ddda02..d2d85ee 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -2962,6 +2962,28 @@
>     (set_attr "type" "multiple")]
>  )
>  
> +(define_insn_and_split "*anddi_notdi_zesidi"
> +  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
> +        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
> +                (zero_extend:DI
> +                 (match_operand:SI 1 "s_register_operand" "r,r"))))]

The early clobber and register tying here is unnecessary.  All of the
input operands are consumed in the first instruction, so you can
eliminate the ties and the restriction on the overlap.  Something like
(untested):

+(define_insn_and_split "*anddi_notdi_zesidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r"))
+                (zero_extend:DI
+                 (match_operand:SI 1 "s_register_operand" "r"))))]

Ok for stage-1 with that change (though I'd recommend a another test run
to validate the above).

R.

> +  "TARGET_32BIT"
> +  "#"
> +  "TARGET_32BIT && reload_completed"
> +  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
> +   (set (match_dup 3) (const_int 0))]
> +  "
> +  {
> +    operands[3] = gen_highpart (SImode, operands[0]);
> +    operands[0] = gen_lowpart (SImode, operands[0]);
> +    operands[2] = gen_lowpart (SImode, operands[2]);
> +  }"
> +  [(set_attr "length" "8")
> +   (set_attr "predicable" "yes")
> +   (set_attr "predicable_short_it" "no")
> +   (set_attr "type" "multiple")]
> +)
> +
>  (define_insn_and_split "*anddi_notsesidi_di"
>    [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
>  	(and:DI (not:DI (sign_extend:DI
> diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
> index 467c619..10bc8b1 100644
> --- a/gcc/config/arm/thumb2.md
> +++ b/gcc/config/arm/thumb2.md
> @@ -1418,6 +1418,30 @@
>     (set_attr "type" "multiple")]
>  )
>  
> +(define_insn_and_split "*iordi_notdi_zesidi"
> +  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
> +	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
> +		(zero_extend:DI
> +		 (match_operand:SI 1 "s_register_operand" "r,r"))))]
> +  "TARGET_THUMB2"
> +  "#"
> +  "TARGET_THUMB2 && reload_completed"
> +  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
> +   (set (match_dup 3) (not:SI (match_dup 4)))]
> +  "
> +  {
> +    operands[3] = gen_highpart (SImode, operands[0]);
> +    operands[0] = gen_lowpart (SImode, operands[0]);
> +    operands[1] = gen_lowpart (SImode, operands[1]);
> +    operands[4] = gen_highpart (SImode, operands[2]);
> +    operands[2] = gen_lowpart (SImode, operands[2]);
> +  }"
> +  [(set_attr "length" "8")
> +   (set_attr "predicable" "yes")
> +   (set_attr "predicable_short_it" "no")
> +   (set_attr "type" "multiple")]
> +)
> +
>  (define_insn_and_split "*iordi_notsesidi_di"
>    [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
>  	(ior:DI (not:DI (sign_extend:DI
> diff --git a/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
> new file mode 100644
> index 0000000..cfb33fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
> @@ -0,0 +1,65 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fno-inline --save-temps" } */
> +
> +extern void abort (void);
> +
> +typedef long long s64int;
> +typedef int s32int;
> +typedef unsigned long long u64int;
> +typedef unsigned int u32int;
> +
> +s64int
> +anddi_di_notdi (s64int a, s64int b)
> +{
> +  return (a & ~b);
> +}
> +
> +s64int
> +anddi_di_notzesidi (s64int a, u32int b)
> +{
> +  return (a & ~(u64int) b);
> +}
> +
> +s64int
> +anddi_notdi_zesidi (s64int a, u32int b)
> +{
> +  return (~a & (u64int) b);
> +}
> +
> +s64int
> +anddi_di_notsesidi (s64int a, s32int b)
> +{
> +  return (a & ~(s64int) b);
> +}
> +
> +int main ()
> +{
> +  s64int a64 = 0xdeadbeef0000ffffll;
> +  s64int b64 = 0x000000005f470112ll;
> +  s64int c64 = 0xdeadbeef300f0000ll;
> +
> +  u32int c32 = 0x01124f4f;
> +  s32int d32 = 0xabbaface;
> +
> +  s64int z = anddi_di_notdi (c64, b64);
> +  if (z != 0xdeadbeef20080000ll)
> +    abort ();
> +
> +  z = anddi_di_notzesidi (a64, c32);
> +  if (z != 0xdeadbeef0000b0b0ll)
> +    abort ();
> +
> +  z = anddi_notdi_zesidi (c64, c32);
> +  if (z != 0x0000000001104f4fll)
> +    abort ();
> +
> +  z = anddi_di_notsesidi (a64, d32);
> +  if (z != 0x0000000000000531ll)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "bic\t" 6 } } */
> +
> +/* { dg-final { cleanup-saved-temps } } */
> diff --git a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
> index cda9c0e..249f080 100644
> --- a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
> +++ b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
> @@ -9,19 +9,25 @@ typedef unsigned long long u64int;
>  typedef unsigned int u32int;
>  
>  s64int
> -iordi_notdi (s64int a, s64int b)
> +iordi_di_notdi (s64int a, s64int b)
>  {
>    return (a | ~b);
>  }
>  
>  s64int
> -iordi_notzesidi (s64int a, u32int b)
> +iordi_di_notzesidi (s64int a, u32int b)
>  {
>    return (a | ~(u64int) b);
>  }
>  
>  s64int
> -iordi_notsesidi (s64int a, s32int b)
> +iordi_notdi_zesidi (s64int a, u32int b)
> +{
> +  return (~a | (u64int) b);
> +}
> +
> +s64int
> +iordi_di_notsesidi (s64int a, s32int b)
>  {
>    return (a | ~(s64int) b);
>  }
> @@ -30,25 +36,30 @@ int main ()
>  {
>    s64int a64 = 0xdeadbeef00000000ll;
>    s64int b64 = 0x000000004f4f0112ll;
> +  s64int c64 = 0xdeadbeef000f0000ll;
>  
>    u32int c32 = 0x01124f4f;
>    s32int d32 = 0xabbaface;
>  
> -  s64int z = iordi_notdi (a64, b64);
> +  s64int z = iordi_di_notdi (a64, b64);
>    if (z != 0xffffffffb0b0feedll)
>      abort ();
>  
> -  z = iordi_notzesidi (a64, c32);
> +  z = iordi_di_notzesidi (a64, c32);
>    if (z != 0xfffffffffeedb0b0ll)
>      abort ();
>  
> -  z = iordi_notsesidi (a64, d32);
> +  z = iordi_notdi_zesidi (c64, c32);
> +  if (z != 0x21524110fff2ffffll)
> +    abort ();
> +
> +  z = iordi_di_notsesidi (a64, d32);
>    if (z != 0xdeadbeef54450531ll)
>      abort ();
>  
>    return 0;
>  }
>  
> -/* { dg-final { scan-assembler-times "orn\t" 5 { target arm_thumb2 } } } */
> +/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */
>  
>  /* { dg-final { cleanup-saved-temps } } */
>
diff mbox

Patch

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 2ddda02..d2d85ee 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -2962,6 +2962,28 @@ 
    (set_attr "type" "multiple")]
 )
 
+(define_insn_and_split "*anddi_notdi_zesidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
+                (zero_extend:DI
+                 (match_operand:SI 1 "s_register_operand" "r,r"))))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
 (define_insn_and_split "*anddi_notsesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(and:DI (not:DI (sign_extend:DI
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 467c619..10bc8b1 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1418,6 +1418,30 @@ 
    (set_attr "type" "multiple")]
 )
 
+(define_insn_and_split "*iordi_notdi_zesidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
+		(zero_extend:DI
+		 (match_operand:SI 1 "s_register_operand" "r,r"))))]
+  "TARGET_THUMB2"
+  "#"
+  "TARGET_THUMB2 && reload_completed"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (not:SI (match_dup 4)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[4] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
 (define_insn_and_split "*iordi_notsesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(ior:DI (not:DI (sign_extend:DI
diff --git a/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
new file mode 100644
index 0000000..cfb33fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
@@ -0,0 +1,65 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline --save-temps" } */
+
+extern void abort (void);
+
+typedef long long s64int;
+typedef int s32int;
+typedef unsigned long long u64int;
+typedef unsigned int u32int;
+
+s64int
+anddi_di_notdi (s64int a, s64int b)
+{
+  return (a & ~b);
+}
+
+s64int
+anddi_di_notzesidi (s64int a, u32int b)
+{
+  return (a & ~(u64int) b);
+}
+
+s64int
+anddi_notdi_zesidi (s64int a, u32int b)
+{
+  return (~a & (u64int) b);
+}
+
+s64int
+anddi_di_notsesidi (s64int a, s32int b)
+{
+  return (a & ~(s64int) b);
+}
+
+int main ()
+{
+  s64int a64 = 0xdeadbeef0000ffffll;
+  s64int b64 = 0x000000005f470112ll;
+  s64int c64 = 0xdeadbeef300f0000ll;
+
+  u32int c32 = 0x01124f4f;
+  s32int d32 = 0xabbaface;
+
+  s64int z = anddi_di_notdi (c64, b64);
+  if (z != 0xdeadbeef20080000ll)
+    abort ();
+
+  z = anddi_di_notzesidi (a64, c32);
+  if (z != 0xdeadbeef0000b0b0ll)
+    abort ();
+
+  z = anddi_notdi_zesidi (c64, c32);
+  if (z != 0x0000000001104f4fll)
+    abort ();
+
+  z = anddi_di_notsesidi (a64, d32);
+  if (z != 0x0000000000000531ll)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bic\t" 6 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
index cda9c0e..249f080 100644
--- a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
+++ b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
@@ -9,19 +9,25 @@  typedef unsigned long long u64int;
 typedef unsigned int u32int;
 
 s64int
-iordi_notdi (s64int a, s64int b)
+iordi_di_notdi (s64int a, s64int b)
 {
   return (a | ~b);
 }
 
 s64int
-iordi_notzesidi (s64int a, u32int b)
+iordi_di_notzesidi (s64int a, u32int b)
 {
   return (a | ~(u64int) b);
 }
 
 s64int
-iordi_notsesidi (s64int a, s32int b)
+iordi_notdi_zesidi (s64int a, u32int b)
+{
+  return (~a | (u64int) b);
+}
+
+s64int
+iordi_di_notsesidi (s64int a, s32int b)
 {
   return (a | ~(s64int) b);
 }
@@ -30,25 +36,30 @@  int main ()
 {
   s64int a64 = 0xdeadbeef00000000ll;
   s64int b64 = 0x000000004f4f0112ll;
+  s64int c64 = 0xdeadbeef000f0000ll;
 
   u32int c32 = 0x01124f4f;
   s32int d32 = 0xabbaface;
 
-  s64int z = iordi_notdi (a64, b64);
+  s64int z = iordi_di_notdi (a64, b64);
   if (z != 0xffffffffb0b0feedll)
     abort ();
 
-  z = iordi_notzesidi (a64, c32);
+  z = iordi_di_notzesidi (a64, c32);
   if (z != 0xfffffffffeedb0b0ll)
     abort ();
 
-  z = iordi_notsesidi (a64, d32);
+  z = iordi_notdi_zesidi (c64, c32);
+  if (z != 0x21524110fff2ffffll)
+    abort ();
+
+  z = iordi_di_notsesidi (a64, d32);
   if (z != 0xdeadbeef54450531ll)
     abort ();
 
   return 0;
 }
 
-/* { dg-final { scan-assembler-times "orn\t" 5 { target arm_thumb2 } } } */
+/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */
 
 /* { dg-final { cleanup-saved-temps } } */