Patchwork [ARM] Optimise NotDI AND/OR ZeroExtendSI for ARMv7A

login
register
mail settings
Submitter Ian Bolton
Date March 27, 2014, 10:55 a.m.
Message ID <000601cf49ab$1d75fa20$5861ee60$@bolton@arm.com>
Download mbox | patch
Permalink /patch/334292/
State New
Headers show

Comments

Ian Bolton - March 27, 2014, 10:55 a.m.
> -----Original Message-----
> From: Richard Earnshaw
> Sent: 21 March 2014 13:57
> To: Ian Bolton
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH, ARM] Optimise NotDI AND/OR ZeroExtendSI for ARMv7A
> 
> On 19/03/14 16:53, Ian Bolton wrote:
> > This is a follow-on patch to one already committed:
> > http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01128.html
> >
> > It implements patterns to simplify our RTL as follows:
> >
> > OR (Not:DI (A:DI), ZeroExtend:DI (B:SI))
> >   -->  the top half can be done with a MVN
> >
> > AND (Not:DI (A:DI), ZeroExtend:DI (B:SI))
> >   -->  the top half becomes zero.
> >
> > I've added test cases for both of these and also the existing
> > anddi_notdi patterns.  The tests all pass.
> >
> > Full regression runs passed.
> >
> > OK for stage 1?
> >
> > Cheers,
> > Ian
> >
> >
> > 2014-03-19  Ian Bolton  <ian.bolton@arm.com>
> >
> > gcc/
> > 	* config/arm/arm.md (*anddi_notdi_zesidi): New pattern
> > 	* config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern.
> >
> > testsuite/
> > 	* gcc.target/arm/anddi_notdi-1.c: New test.
> > 	* gcc.target/arm/iordi_notdi-1.c: New test case.
> >
> >
> > arm-and-ior-notdi-zeroextend-patch-v1.txt
> >
> >
> > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> > index 2ddda02..d2d85ee 100644
> > --- a/gcc/config/arm/arm.md
> > +++ b/gcc/config/arm/arm.md
> > @@ -2962,6 +2962,28 @@
> >     (set_attr "type" "multiple")]
> >  )
> >
> > +(define_insn_and_split "*anddi_notdi_zesidi"
> > +  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
> > +        (and:DI (not:DI (match_operand:DI 2 "s_register_operand"
> "0,?r"))
> > +                (zero_extend:DI
> > +                 (match_operand:SI 1 "s_register_operand" "r,r"))))]
> 
> The early clobber and register tying here is unnecessary.  All of the
> input operands are consumed in the first instruction, so you can
> eliminate the ties and the restriction on the overlap.  Something like
> (untested):
> 
> +(define_insn_and_split "*anddi_notdi_zesidi"
> +  [(set (match_operand:DI 0 "s_register_operand" "=r")
> +        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r"))
> +                (zero_extend:DI
> +                 (match_operand:SI 1 "s_register_operand" "r"))))]
> 
> Ok for stage-1 with that change (though I'd recommend a another test
> run
> to validate the above).
> 
> R.

Thanks, Richard.  Regression runs came back OK with that change, so
I will consider this ready for stage 1.

The patch is attached for reference.
 
Cheers,
Ian

Patch

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 2ddda02..4176b7ff 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -2962,6 +2962,28 @@ 
    (set_attr "type" "multiple")]
 )
 
+(define_insn_and_split "*anddi_notdi_zesidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r"))
+                (zero_extend:DI
+                 (match_operand:SI 1 "s_register_operand" "r"))))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
 (define_insn_and_split "*anddi_notsesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(and:DI (not:DI (sign_extend:DI
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 467c619..10bc8b1 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1418,6 +1418,30 @@ 
    (set_attr "type" "multiple")]
 )
 
+(define_insn_and_split "*iordi_notdi_zesidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
+		(zero_extend:DI
+		 (match_operand:SI 1 "s_register_operand" "r,r"))))]
+  "TARGET_THUMB2"
+  "#"
+  "TARGET_THUMB2 && reload_completed"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (not:SI (match_dup 4)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[4] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
 (define_insn_and_split "*iordi_notsesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(ior:DI (not:DI (sign_extend:DI
diff --git a/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
new file mode 100644
index 0000000..cfb33fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
@@ -0,0 +1,65 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline --save-temps" } */
+
+extern void abort (void);
+
+typedef long long s64int;
+typedef int s32int;
+typedef unsigned long long u64int;
+typedef unsigned int u32int;
+
+s64int
+anddi_di_notdi (s64int a, s64int b)
+{
+  return (a & ~b);
+}
+
+s64int
+anddi_di_notzesidi (s64int a, u32int b)
+{
+  return (a & ~(u64int) b);
+}
+
+s64int
+anddi_notdi_zesidi (s64int a, u32int b)
+{
+  return (~a & (u64int) b);
+}
+
+s64int
+anddi_di_notsesidi (s64int a, s32int b)
+{
+  return (a & ~(s64int) b);
+}
+
+int main ()
+{
+  s64int a64 = 0xdeadbeef0000ffffll;
+  s64int b64 = 0x000000005f470112ll;
+  s64int c64 = 0xdeadbeef300f0000ll;
+
+  u32int c32 = 0x01124f4f;
+  s32int d32 = 0xabbaface;
+
+  s64int z = anddi_di_notdi (c64, b64);
+  if (z != 0xdeadbeef20080000ll)
+    abort ();
+
+  z = anddi_di_notzesidi (a64, c32);
+  if (z != 0xdeadbeef0000b0b0ll)
+    abort ();
+
+  z = anddi_notdi_zesidi (c64, c32);
+  if (z != 0x0000000001104f4fll)
+    abort ();
+
+  z = anddi_di_notsesidi (a64, d32);
+  if (z != 0x0000000000000531ll)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bic\t" 6 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
index cda9c0e..249f080 100644
--- a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
+++ b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
@@ -9,19 +9,25 @@  typedef unsigned long long u64int;
 typedef unsigned int u32int;
 
 s64int
-iordi_notdi (s64int a, s64int b)
+iordi_di_notdi (s64int a, s64int b)
 {
   return (a | ~b);
 }
 
 s64int
-iordi_notzesidi (s64int a, u32int b)
+iordi_di_notzesidi (s64int a, u32int b)
 {
   return (a | ~(u64int) b);
 }
 
 s64int
-iordi_notsesidi (s64int a, s32int b)
+iordi_notdi_zesidi (s64int a, u32int b)
+{
+  return (~a | (u64int) b);
+}
+
+s64int
+iordi_di_notsesidi (s64int a, s32int b)
 {
   return (a | ~(s64int) b);
 }
@@ -30,25 +36,30 @@  int main ()
 {
   s64int a64 = 0xdeadbeef00000000ll;
   s64int b64 = 0x000000004f4f0112ll;
+  s64int c64 = 0xdeadbeef000f0000ll;
 
   u32int c32 = 0x01124f4f;
   s32int d32 = 0xabbaface;
 
-  s64int z = iordi_notdi (a64, b64);
+  s64int z = iordi_di_notdi (a64, b64);
   if (z != 0xffffffffb0b0feedll)
     abort ();
 
-  z = iordi_notzesidi (a64, c32);
+  z = iordi_di_notzesidi (a64, c32);
   if (z != 0xfffffffffeedb0b0ll)
     abort ();
 
-  z = iordi_notsesidi (a64, d32);
+  z = iordi_notdi_zesidi (c64, c32);
+  if (z != 0x21524110fff2ffffll)
+    abort ();
+
+  z = iordi_di_notsesidi (a64, d32);
   if (z != 0xdeadbeef54450531ll)
     abort ();
 
   return 0;
 }
 
-/* { dg-final { scan-assembler-times "orn\t" 5 { target arm_thumb2 } } } */
+/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */
 
 /* { dg-final { cleanup-saved-temps } } */