diff mbox

Add {u,}mulvhi4 patterns on i?86 (PR target/66112)

Message ID 20150512184327.GB1751@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek May 12, 2015, 6:43 p.m. UTC
Hi!

This patch improves expansion of __builtin_mul_overflow for HImode, both
signed and unsigned, on x86_64/i686.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2015-05-12  Jakub Jelinek  <jakub@redhat.com>

	PR target/66112
	* config/i386/i386.md (mulv<mode>4, umulv<mode>4, *umulv<mode>4):
	Use SWI248 iterator instead of SWI.
	(*mulv<mode>4_1): Use SWI48 instead of SWI.  Simplify output template.
	Use eq_attr "alternative" "0" instead of match_test in
	length_immediate attribute computation.
	(*mulvhi4, *mulvhi4_1): New define_insns.

	* gcc.target/i386/pr66112-2.c: New test.


	Jakub

Comments

Ramana Radhakrishnan May 12, 2015, 7:26 p.m. UTC | #1
On Tue, May 12, 2015 at 7:43 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch improves expansion of __builtin_mul_overflow for HImode, both
> signed and unsigned, on x86_64/i686.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2015-05-12  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/66112
>         * config/i386/i386.md (mulv<mode>4, umulv<mode>4, *umulv<mode>4):
>         Use SWI248 iterator instead of SWI.

These names along with the other *v<mode>4 patterns need documentation
in md.texi.


Ramana

>         (*mulv<mode>4_1): Use SWI48 instead of SWI.  Simplify output template.
>         Use eq_attr "alternative" "0" instead of match_test in
>         length_immediate attribute computation.
>         (*mulvhi4, *mulvhi4_1): New define_insns.
>
>         * gcc.target/i386/pr66112-2.c: New test.
>
> --- gcc/config/i386/i386.md.jj  2015-05-11 09:08:21.000000000 +0200
> +++ gcc/config/i386/i386.md     2015-05-12 11:26:55.642794479 +0200
> @@ -6602,14 +6602,14 @@
>    [(parallel [(set (reg:CCO FLAGS_REG)
>                    (eq:CCO (mult:<DWI>
>                               (sign_extend:<DWI>
> -                                (match_operand:SWI48 1 "register_operand"))
> +                                (match_operand:SWI248 1 "register_operand"))
>                               (match_dup 4))
>                            (sign_extend:<DWI>
> -                             (mult:SWI48 (match_dup 1)
> -                                         (match_operand:SWI48 2
> -                                            "<general_operand>")))))
> -             (set (match_operand:SWI48 0 "register_operand")
> -                  (mult:SWI48 (match_dup 1) (match_dup 2)))])
> +                             (mult:SWI248 (match_dup 1)
> +                                          (match_operand:SWI248 2
> +                                             "<general_operand>")))))
> +             (set (match_operand:SWI248 0 "register_operand")
> +                  (mult:SWI248 (match_dup 1) (match_dup 2)))])
>     (set (pc) (if_then_else
>                (eq (reg:CCO FLAGS_REG) (const_int 0))
>                (label_ref (match_operand 3))
> @@ -6665,16 +6665,14 @@
>                    (match_operand:<DWI> 3 "const_int_operand" "K,i"))
>                 (sign_extend:<DWI>
>                    (mult:SWI48 (match_dup 1)
> -                              (match_operand:SWI 2 "x86_64_immediate_operand"
> -                                                   "K,<i>")))))
> +                              (match_operand:SWI48 2
> +                                 "x86_64_immediate_operand" "K,<i>")))))
>     (set (match_operand:SWI48 0 "register_operand" "=r,r")
>         (mult:SWI48 (match_dup 1) (match_dup 2)))]
>    "!(MEM_P (operands[1]) && MEM_P (operands[2]))
>     && CONST_INT_P (operands[2])
>     && INTVAL (operands[2]) == INTVAL (operands[3])"
> -  "@
> -   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
> -   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> +  "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
>    [(set_attr "type" "imul")
>     (set (attr "athlon_decode")
>         (cond [(eq_attr "cpu" "athlon")
> @@ -6689,26 +6687,78 @@
>     (set_attr "bdver1_decode" "direct")
>     (set_attr "mode" "<MODE>")
>     (set (attr "length_immediate")
> -       (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
> +       (cond [(eq_attr "alternative" "0")
>                   (const_string "1")
>                (match_test "<MODE_SIZE> == 8")
>                   (const_string "4")]
>               (const_string "<MODE_SIZE>")))])
>
> +(define_insn "*mulvhi4"
> +  [(set (reg:CCO FLAGS_REG)
> +       (eq:CCO (mult:SI
> +                  (sign_extend:SI
> +                     (match_operand:HI 1 "nonimmediate_operand" "0"))
> +                  (sign_extend:SI
> +                     (match_operand:HI 2 "general_operand" "mr")))
> +               (sign_extend:SI
> +                  (mult:HI (match_dup 1) (match_dup 2)))))
> +   (set (match_operand:HI 0 "register_operand" "=r")
> +       (mult:HI (match_dup 1) (match_dup 2)))]
> +  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
> +  "imul{w}\t{%2, %0|%0, %2}"
> +  [(set_attr "type" "imul")
> +   (set_attr "prefix_0f" "1")
> +   (set_attr "athlon_decode" "vector")
> +   (set_attr "amdfam10_decode" "direct")
> +   (set_attr "bdver1_decode" "double")
> +   (set_attr "mode" "HI")])
> +
> +(define_insn "*mulvhi4_1"
> +  [(set (reg:CCO FLAGS_REG)
> +       (eq:CCO (mult:SI
> +                  (sign_extend:SI
> +                     (match_operand:HI 1 "nonimmediate_operand" "rm,rm"))
> +                  (match_operand:SI 3 "const_int_operand" "K,i"))
> +               (sign_extend:SI
> +                  (mult:HI (match_dup 1)
> +                              (match_operand:HI 2
> +                                 "x86_64_immediate_operand" "K,n")))))
> +   (set (match_operand:HI 0 "register_operand" "=r,r")
> +       (mult:HI (match_dup 1) (match_dup 2)))]
> +  "!(MEM_P (operands[1]) && MEM_P (operands[2]))
> +   && CONST_INT_P (operands[2])
> +   && INTVAL (operands[2]) == INTVAL (operands[3])"
> +  "imul{w}\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "type" "imul")
> +   (set_attr "prefix_0f" "0")
> +   (set (attr "athlon_decode")
> +       (cond [(eq_attr "cpu" "athlon")
> +                 (const_string "vector")
> +              (eq_attr "alternative" "1")
> +                 (const_string "vector")]
> +             (const_string "direct")))
> +   (set_attr "amdfam10_decode" "vector")
> +   (set_attr "bdver1_decode" "double")
> +   (set_attr "mode" "HI")
> +   (set (attr "length_immediate")
> +       (cond [(eq_attr "alternative" "0")
> +                 (const_string "1")]
> +             (const_string "2")))])
> +
>  (define_expand "umulv<mode>4"
>    [(parallel [(set (reg:CCO FLAGS_REG)
>                    (eq:CCO (mult:<DWI>
>                               (zero_extend:<DWI>
> -                                (match_operand:SWI48 1
> +                                (match_operand:SWI248 1
>                                                       "nonimmediate_operand"))
>                               (zero_extend:<DWI>
> -                                (match_operand:SWI48 2
> +                                (match_operand:SWI248 2
>                                                       "nonimmediate_operand")))
>                            (zero_extend:<DWI>
> -                             (mult:SWI48 (match_dup 1) (match_dup 2)))))
> -             (set (match_operand:SWI48 0 "register_operand")
> -                  (mult:SWI48 (match_dup 1) (match_dup 2)))
> -             (clobber (match_scratch:SWI48 4))])
> +                             (mult:SWI248 (match_dup 1) (match_dup 2)))))
> +             (set (match_operand:SWI248 0 "register_operand")
> +                  (mult:SWI248 (match_dup 1) (match_dup 2)))
> +             (clobber (match_scratch:SWI248 4))])
>     (set (pc) (if_then_else
>                (eq (reg:CCO FLAGS_REG) (const_int 0))
>                (label_ref (match_operand 3))
> @@ -6723,14 +6773,14 @@
>    [(set (reg:CCO FLAGS_REG)
>         (eq:CCO (mult:<DWI>
>                    (zero_extend:<DWI>
> -                     (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
> +                     (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
>                    (zero_extend:<DWI>
> -                     (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
> +                     (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
>                 (zero_extend:<DWI>
> -                  (mult:SWI48 (match_dup 1) (match_dup 2)))))
> -   (set (match_operand:SWI48 0 "register_operand" "=a")
> -       (mult:SWI48 (match_dup 1) (match_dup 2)))
> -   (clobber (match_scratch:SWI48 3 "=d"))]
> +                  (mult:SWI248 (match_dup 1) (match_dup 2)))))
> +   (set (match_operand:SWI248 0 "register_operand" "=a")
> +       (mult:SWI248 (match_dup 1) (match_dup 2)))
> +   (clobber (match_scratch:SWI248 3 "=d"))]
>    "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "mul{<imodesuffix>}\t%2"
>    [(set_attr "type" "imul")
> --- gcc/testsuite/gcc.target/i386/pr66112-2.c.jj        2015-05-12 10:46:18.565328732 +0200
> +++ gcc/testsuite/gcc.target/i386/pr66112-2.c   2015-05-12 10:50:16.203437790 +0200
> @@ -0,0 +1,29 @@
> +/* PR target/66112 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +unsigned short int
> +foo (int a, int b)
> +{
> +  unsigned short int res;
> +  a &= 0xffff;
> +  b &= 0xffff;
> +  if (__builtin_mul_overflow (a, b, &res))
> +    res = 0x123;
> +  return res;
> +}
> +
> +short int
> +bar (int a, int b)
> +{
> +  short int res;
> +  a = (short int) a;
> +  b = (short int) b;
> +  if (__builtin_mul_overflow (a, b, &res))
> +    res = 0x123;
> +  return res;
> +}
> +
> +/* { dg-final { scan-assembler-times "jn?o\[ \t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "mulw\[ \t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "imulw\[ \t\]" 1 } } */
>
>         Jakub
Uros Bizjak May 12, 2015, 7:59 p.m. UTC | #2
On Tue, May 12, 2015 at 8:43 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch improves expansion of __builtin_mul_overflow for HImode, both
> signed and unsigned, on x86_64/i686.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2015-05-12  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/66112
>         * config/i386/i386.md (mulv<mode>4, umulv<mode>4, *umulv<mode>4):
>         Use SWI248 iterator instead of SWI.
>         (*mulv<mode>4_1): Use SWI48 instead of SWI.  Simplify output template.
>         Use eq_attr "alternative" "0" instead of match_test in
>         length_immediate attribute computation.
>         (*mulvhi4, *mulvhi4_1): New define_insns.
>
>         * gcc.target/i386/pr66112-2.c: New test.

OK for mainline.

Thanks,
Uros.
diff mbox

Patch

--- gcc/config/i386/i386.md.jj	2015-05-11 09:08:21.000000000 +0200
+++ gcc/config/i386/i386.md	2015-05-12 11:26:55.642794479 +0200
@@ -6602,14 +6602,14 @@ 
   [(parallel [(set (reg:CCO FLAGS_REG)
 		   (eq:CCO (mult:<DWI>
 			      (sign_extend:<DWI>
-				 (match_operand:SWI48 1 "register_operand"))
+				 (match_operand:SWI248 1 "register_operand"))
 			      (match_dup 4))
 			   (sign_extend:<DWI>
-			      (mult:SWI48 (match_dup 1)
-					  (match_operand:SWI48 2
-					     "<general_operand>")))))
-	      (set (match_operand:SWI48 0 "register_operand")
-		   (mult:SWI48 (match_dup 1) (match_dup 2)))])
+			      (mult:SWI248 (match_dup 1)
+					   (match_operand:SWI248 2
+					      "<general_operand>")))))
+	      (set (match_operand:SWI248 0 "register_operand")
+		   (mult:SWI248 (match_dup 1) (match_dup 2)))])
    (set (pc) (if_then_else
 	       (eq (reg:CCO FLAGS_REG) (const_int 0))
 	       (label_ref (match_operand 3))
@@ -6665,16 +6665,14 @@ 
 		   (match_operand:<DWI> 3 "const_int_operand" "K,i"))
 		(sign_extend:<DWI>
 		   (mult:SWI48 (match_dup 1)
-			       (match_operand:SWI 2 "x86_64_immediate_operand"
-						    "K,<i>")))))
+			       (match_operand:SWI48 2
+				  "x86_64_immediate_operand" "K,<i>")))))
    (set (match_operand:SWI48 0 "register_operand" "=r,r")
 	(mult:SWI48 (match_dup 1) (match_dup 2)))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))
    && CONST_INT_P (operands[2])
    && INTVAL (operands[2]) == INTVAL (operands[3])"
-  "@
-   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
-   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "imul")
    (set (attr "athlon_decode")
 	(cond [(eq_attr "cpu" "athlon")
@@ -6689,26 +6687,78 @@ 
    (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "<MODE>")
    (set (attr "length_immediate")
-	(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+	(cond [(eq_attr "alternative" "0")
 		  (const_string "1")
 	       (match_test "<MODE_SIZE> == 8")
 		  (const_string "4")]
 	      (const_string "<MODE_SIZE>")))])
 
+(define_insn "*mulvhi4"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (mult:SI
+		   (sign_extend:SI
+		      (match_operand:HI 1 "nonimmediate_operand" "0"))
+		   (sign_extend:SI
+		      (match_operand:HI 2 "general_operand" "mr")))
+		(sign_extend:SI
+		   (mult:HI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (match_dup 1) (match_dup 2)))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "1")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "HI")])
+
+(define_insn "*mulvhi4_1"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (mult:SI
+		   (sign_extend:SI
+		      (match_operand:HI 1 "nonimmediate_operand" "rm,rm"))
+		   (match_operand:SI 3 "const_int_operand" "K,i"))
+		(sign_extend:SI
+		   (mult:HI (match_dup 1)
+			       (match_operand:HI 2
+				  "x86_64_immediate_operand" "K,n")))))
+   (set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (match_dup 1) (match_dup 2)))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && CONST_INT_P (operands[2])
+   && INTVAL (operands[2]) == INTVAL (operands[3])"
+  "imul{w}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "HI")
+   (set (attr "length_immediate")
+	(cond [(eq_attr "alternative" "0")
+		  (const_string "1")]
+	      (const_string "2")))])
+
 (define_expand "umulv<mode>4"
   [(parallel [(set (reg:CCO FLAGS_REG)
 		   (eq:CCO (mult:<DWI>
 			      (zero_extend:<DWI>
-				 (match_operand:SWI48 1
+				 (match_operand:SWI248 1
 						      "nonimmediate_operand"))
 			      (zero_extend:<DWI>
-				 (match_operand:SWI48 2
+				 (match_operand:SWI248 2
 						      "nonimmediate_operand")))
 			   (zero_extend:<DWI>
-			      (mult:SWI48 (match_dup 1) (match_dup 2)))))
-	      (set (match_operand:SWI48 0 "register_operand")
-		   (mult:SWI48 (match_dup 1) (match_dup 2)))
-	      (clobber (match_scratch:SWI48 4))])
+			      (mult:SWI248 (match_dup 1) (match_dup 2)))))
+	      (set (match_operand:SWI248 0 "register_operand")
+		   (mult:SWI248 (match_dup 1) (match_dup 2)))
+	      (clobber (match_scratch:SWI248 4))])
    (set (pc) (if_then_else
 	       (eq (reg:CCO FLAGS_REG) (const_int 0))
 	       (label_ref (match_operand 3))
@@ -6723,14 +6773,14 @@ 
   [(set (reg:CCO FLAGS_REG)
 	(eq:CCO (mult:<DWI>
 		   (zero_extend:<DWI>
-		      (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
+		      (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
 		   (zero_extend:<DWI>
-		      (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
+		      (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
 		(zero_extend:<DWI>
-		   (mult:SWI48 (match_dup 1) (match_dup 2)))))
-   (set (match_operand:SWI48 0 "register_operand" "=a")
-	(mult:SWI48 (match_dup 1) (match_dup 2)))
-   (clobber (match_scratch:SWI48 3 "=d"))]
+		   (mult:SWI248 (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SWI248 0 "register_operand" "=a")
+	(mult:SWI248 (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SWI248 3 "=d"))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "mul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
--- gcc/testsuite/gcc.target/i386/pr66112-2.c.jj	2015-05-12 10:46:18.565328732 +0200
+++ gcc/testsuite/gcc.target/i386/pr66112-2.c	2015-05-12 10:50:16.203437790 +0200
@@ -0,0 +1,29 @@ 
+/* PR target/66112 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned short int
+foo (int a, int b)
+{
+  unsigned short int res;
+  a &= 0xffff;
+  b &= 0xffff;
+  if (__builtin_mul_overflow (a, b, &res))
+    res = 0x123;
+  return res;
+}
+
+short int
+bar (int a, int b)
+{
+  short int res;
+  a = (short int) a;
+  b = (short int) b;
+  if (__builtin_mul_overflow (a, b, &res))
+    res = 0x123;
+  return res;
+}
+
+/* { dg-final { scan-assembler-times "jn?o\[ \t\]" 2 } } */
+/* { dg-final { scan-assembler-times "mulw\[ \t\]" 2 } } */
+/* { dg-final { scan-assembler-times "imulw\[ \t\]" 1 } } */