diff mbox series

Fix various arithmetic patterns with %[abcd]h destination (PR target/82524)

Message ID 20171012194919.GD14653@tucnak
State New
Headers show
Series Fix various arithmetic patterns with %[abcd]h destination (PR target/82524) | expand

Commit Message

Jakub Jelinek Oct. 12, 2017, 7:49 p.m. UTC
Hi!

As mentioned in the PR, there are two bugs in these.  One is that
the zero_extract destination is effectively another input operand (for the
remaining bits that are unchanged) and thus the constraint can't be =Q,
but has to be +Q.
And the other problem is that then LRA ICEs whenever it has 3 different
operands, because it is unable to reload it properly.  Uros mentioned
that it could be reloaded by using *insvqi_2-like insn to move the
8 bits from the operand that should use "0" constraint into the destination
register, but LRA isn't able to do that right now.
So this patch instead adds insn conditions that either the destination
is the same as the first input operand or as one of the input operands
(the latter for commutative patterns).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.3?

2017-10-12  Jakub Jelinek  <jakub@redhat.com>

	PR target/82524
	* config/i386/i386.md (addqi_ext_1, andqi_ext_1,
	*andqi_ext_1_cc, *<code>qi_ext_1, *xorqi_ext_1_cc): Change
	=Q constraints to +Q and into insn condition add check
	that operands[0] and operands[1] are equal.
	(*addqi_ext_2, *andqi_ext_2, *<code>qi_ext_2): Change
	=Q constraints to +Q and into insn condition add check
	that operands[0] is equal to either operands[1] or operands[2].

	* gcc.c-torture/execute/pr82524.c: New test.


	Jakub

Comments

Uros Bizjak Oct. 13, 2017, 6:48 a.m. UTC | #1
On Thu, Oct 12, 2017 at 9:49 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, there are two bugs in these.  One is that
> the zero_extract destination is effectively another input operand (for the
> remaining bits that are unchanged) and thus the constraint can't be =Q,
> but has to be +Q.
> And the other problem is that then LRA ICEs whenever it has 3 different
> operands, because it is unable to reload it properly.  Uros mentioned
> that it could be reloaded by using *insvqi_2-like insn to move the
> 8 bits from the operand that should use "0" constraint into the destination
> register, but LRA isn't able to do that right now.
> So this patch instead adds insn conditions that either the destination
> is the same as the first input operand or as one of the input operands
> (the latter for commutative patterns).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.3?
>
> 2017-10-12  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/82524
>         * config/i386/i386.md (addqi_ext_1, andqi_ext_1,
>         *andqi_ext_1_cc, *<code>qi_ext_1, *xorqi_ext_1_cc): Change
>         =Q constraints to +Q and into insn condition add check
>         that operands[0] and operands[1] are equal.
>         (*addqi_ext_2, *andqi_ext_2, *<code>qi_ext_2): Change
>         =Q constraints to +Q and into insn condition add check
>         that operands[0] is equal to either operands[1] or operands[2].
>
>         * gcc.c-torture/execute/pr82524.c: New test.

OK for mainline and gcc-7 branch.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2017-10-12 14:05:15.000000000 +0200
> +++ gcc/config/i386/i386.md     2017-10-12 17:07:11.723151868 +0200
> @@ -6264,7 +6264,7 @@ (define_insn "*add<mode>_5"
>     (set_attr "mode" "<MODE>")])
>
>  (define_insn "addqi_ext_1"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -6275,7 +6275,8 @@ (define_insn "addqi_ext_1"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])"
>  {
>    switch (get_attr_type (insn))
>      {
> @@ -6300,7 +6301,7 @@ (define_insn "addqi_ext_1"
>     (set_attr "mode" "QI")])
>
>  (define_insn "*addqi_ext_2"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -6314,7 +6315,9 @@ (define_insn "*addqi_ext_2"
>                                (const_int 8)
>                                (const_int 8)) 0)) 0))
>    (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])
> +   || rtx_equal_p (operands[0], operands[2])"
>    "add{b}\t{%h2, %h0|%h0, %h2}"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "QI")])
> @@ -8998,7 +9001,7 @@ (define_insn "*andqi_2_slp"
>     (set_attr "mode" "QI")])
>
>  (define_insn "andqi_ext_1"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9009,7 +9012,8 @@ (define_insn "andqi_ext_1"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])"
>    "and{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
> @@ -9027,7 +9031,7 @@ (define_insn "*andqi_ext_1_cc"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m"))
>           (const_int 0)))
> -   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9037,14 +9041,16 @@ (define_insn "*andqi_ext_1_cc"
>                                (const_int 8)
>                                (const_int 8)) 0)
>             (match_dup 2)) 0))]
> -  "ix86_match_ccmode (insn, CCNOmode)"
> +  "ix86_match_ccmode (insn, CCNOmode)
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && rtx_equal_p (operands[0], operands[1])"
>    "and{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
>     (set_attr "mode" "QI")])
>
>  (define_insn "*andqi_ext_2"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9058,7 +9064,9 @@ (define_insn "*andqi_ext_2"
>                                (const_int 8)
>                                (const_int 8)) 0)) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])
> +   || rtx_equal_p (operands[0], operands[2])"
>    "and{b}\t{%h2, %h0|%h0, %h2}"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "QI")])
> @@ -9431,7 +9439,7 @@ (define_insn "*<code><mode>_3"
>     (set_attr "mode" "<MODE>")])
>
>  (define_insn "*<code>qi_ext_1"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9442,14 +9450,16 @@ (define_insn "*<code>qi_ext_1"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
> +  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && rtx_equal_p (operands[0], operands[1])"
>    "<logic>{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
>     (set_attr "mode" "QI")])
>
>  (define_insn "*<code>qi_ext_2"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9463,7 +9473,10 @@ (define_insn "*<code>qi_ext_2"
>                                (const_int 8)
>                                (const_int 8)) 0)) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
> +  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && (rtx_equal_p (operands[0], operands[1])
> +       || rtx_equal_p (operands[0], operands[2]))"
>    "<logic>{b}\t{%h2, %h0|%h0, %h2}"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "QI")])
> @@ -9552,7 +9565,7 @@ (define_insn "*xorqi_ext_1_cc"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m"))
>           (const_int 0)))
> -   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9562,7 +9575,9 @@ (define_insn "*xorqi_ext_1_cc"
>                                (const_int 8)
>                                (const_int 8)) 0)
>           (match_dup 2)) 0))]
> -  "ix86_match_ccmode (insn, CCNOmode)"
> +  "ix86_match_ccmode (insn, CCNOmode)
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && rtx_equal_p (operands[0], operands[1])"
>    "xor{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
> --- gcc/testsuite/gcc.c-torture/execute/pr82524.c.jj    2017-10-12 17:39:28.244825800 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr82524.c       2017-10-12 17:39:12.000000000 +0200
> @@ -0,0 +1,37 @@
> +/* PR target/82524 */
> +
> +struct S { unsigned char b, g, r, a; };
> +union U { struct S c; unsigned v; };
> +
> +static inline unsigned char
> +foo (unsigned char a, unsigned char b)
> +{
> +  return ((a + 1) * b) >> 8;
> +}
> +
> +__attribute__((noinline, noclone)) unsigned
> +bar (union U *x, union U *y)
> +{
> +  union U z;
> +  unsigned char v = x->c.a;
> +  unsigned char w = foo (y->c.a, 255 - v);
> +  z.c.r = foo (x->c.r, v) + foo (y->c.r, w);
> +  z.c.g = foo (x->c.g, v) + foo (y->c.g, w);
> +  z.c.b = foo (x->c.b, v) + foo (y->c.b, w);
> +  z.c.a = 0;
> +  return z.v;
> +}
> +
> +int
> +main ()
> +{
> +  union U a, b, c;
> +  if ((unsigned char) ~0 != 255 || sizeof (unsigned) != 4)
> +    return 0;
> +  a.c = (struct S) { 255, 255, 255, 0 };
> +  b.c = (struct S) { 255, 255, 255, 255 };
> +  c.v = bar (&a, &b);
> +  if (c.c.b != 255 || c.c.g != 255 || c.c.r != 255 || c.c.a != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
>
>         Jakub
diff mbox series

Patch

--- gcc/config/i386/i386.md.jj	2017-10-12 14:05:15.000000000 +0200
+++ gcc/config/i386/i386.md	2017-10-12 17:07:11.723151868 +0200
@@ -6264,7 +6264,7 @@  (define_insn "*add<mode>_5"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "addqi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -6275,7 +6275,8 @@  (define_insn "addqi_ext_1"
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])"
 {
   switch (get_attr_type (insn))
     {
@@ -6300,7 +6301,7 @@  (define_insn "addqi_ext_1"
    (set_attr "mode" "QI")])
 
 (define_insn "*addqi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -6314,7 +6315,9 @@  (define_insn "*addqi_ext_2"
 			       (const_int 8)
 			       (const_int 8)) 0)) 0))
   (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])
+   || rtx_equal_p (operands[0], operands[2])"
   "add{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -8998,7 +9001,7 @@  (define_insn "*andqi_2_slp"
    (set_attr "mode" "QI")])
 
 (define_insn "andqi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9009,7 +9012,8 @@  (define_insn "andqi_ext_1"
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])"
   "and{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
@@ -9027,7 +9031,7 @@  (define_insn "*andqi_ext_1_cc"
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m"))
 	  (const_int 0)))
-   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9037,14 +9041,16 @@  (define_insn "*andqi_ext_1_cc"
 			       (const_int 8)
 			       (const_int 8)) 0)
 	    (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)"
+  "ix86_match_ccmode (insn, CCNOmode)
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "and{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
 (define_insn "*andqi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9058,7 +9064,9 @@  (define_insn "*andqi_ext_2"
 			       (const_int 8)
 			       (const_int 8)) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])
+   || rtx_equal_p (operands[0], operands[2])"
   "and{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -9431,7 +9439,7 @@  (define_insn "*<code><mode>_3"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<code>qi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9442,14 +9450,16 @@  (define_insn "*<code>qi_ext_1"
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
 (define_insn "*<code>qi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9463,7 +9473,10 @@  (define_insn "*<code>qi_ext_2"
 			       (const_int 8)
 			       (const_int 8)) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && (rtx_equal_p (operands[0], operands[1])
+       || rtx_equal_p (operands[0], operands[2]))"
   "<logic>{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -9552,7 +9565,7 @@  (define_insn "*xorqi_ext_1_cc"
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m"))
 	  (const_int 0)))
-   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9562,7 +9575,9 @@  (define_insn "*xorqi_ext_1_cc"
 			       (const_int 8)
 			       (const_int 8)) 0)
 	  (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)"
+  "ix86_match_ccmode (insn, CCNOmode)
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "xor{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
--- gcc/testsuite/gcc.c-torture/execute/pr82524.c.jj	2017-10-12 17:39:28.244825800 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr82524.c	2017-10-12 17:39:12.000000000 +0200
@@ -0,0 +1,37 @@ 
+/* PR target/82524 */
+
+struct S { unsigned char b, g, r, a; };
+union U { struct S c; unsigned v; };
+
+static inline unsigned char
+foo (unsigned char a, unsigned char b)
+{
+  return ((a + 1) * b) >> 8;
+}
+
+__attribute__((noinline, noclone)) unsigned
+bar (union U *x, union U *y)
+{
+  union U z;
+  unsigned char v = x->c.a;
+  unsigned char w = foo (y->c.a, 255 - v);
+  z.c.r = foo (x->c.r, v) + foo (y->c.r, w);
+  z.c.g = foo (x->c.g, v) + foo (y->c.g, w);
+  z.c.b = foo (x->c.b, v) + foo (y->c.b, w);
+  z.c.a = 0;
+  return z.v;
+}
+
+int
+main ()
+{
+  union U a, b, c;
+  if ((unsigned char) ~0 != 255 || sizeof (unsigned) != 4)
+    return 0;
+  a.c = (struct S) { 255, 255, 255, 0 };
+  b.c = (struct S) { 255, 255, 255, 255 };
+  c.v = bar (&a, &b);
+  if (c.c.b != 255 || c.c.g != 255 || c.c.r != 255 || c.c.a != 0)
+    __builtin_abort ();
+  return 0;
+}