diff mbox series

s390x: Fix popcounthi2_z196 expander [PR93533]

Message ID 20200201204153.GD17695@tucnak
State New
Headers show
Series s390x: Fix popcounthi2_z196 expander [PR93533] | expand

Commit Message

Jakub Jelinek Feb. 1, 2020, 8:41 p.m. UTC
Hi!

The following testcase started to ICE when .POPCOUNT matching has been added
to match.pd; we had __builtin_popcount*, but nothing would use the
popcounthi2 expander before.

The problem is that the popcounthi2_z196 expander doesn't emit valid RTL:
error: unrecognizable insn:
(insn 138 137 139 27 (set (reg:SI 190)
        (ashift:SI (reg:HI 95 [ _105 ])
            (const_int 8 [0x8]))) -1
     (nil))
during RTL pass: vregs
The following patch is an attempt to fix that, furthermore I've tried to
slightly simplify it as well, it makes no sense to me to perform
(x + (x << 8)) >> 8 when we need to either zero extend or mask the result
at the end in order to avoid bits from above HImode to affect it, when we
can do
(x + (x >> 8)) & 0xff (or zero extension).

Bootstrapped/regtested on s390x-linux, ok for trunk?

2020-02-01  Jakub jelinek  <jakub@redhat.com>

	PR target/93533
	* config/s390/s390.md (popcounthi2_z196): Fix up expander to emit
	valid RTL to sum up the lowest and second lowest bytes of the popcnt
	result.


	Jakub

Comments

Andreas Krebbel Feb. 3, 2020, 7:40 a.m. UTC | #1
On 2/1/20 9:41 PM, Jakub Jelinek wrote:
> Hi!
> 
> The following testcase started to ICE when .POPCOUNT matching has been added
> to match.pd; we had __builtin_popcount*, but nothing would use the
> popcounthi2 expander before.
> 
> The problem is that the popcounthi2_z196 expander doesn't emit valid RTL:
> error: unrecognizable insn:
> (insn 138 137 139 27 (set (reg:SI 190)
>         (ashift:SI (reg:HI 95 [ _105 ])
>             (const_int 8 [0x8]))) -1
>      (nil))
> during RTL pass: vregs
> The following patch is an attempt to fix that, furthermore I've tried to
> slightly simplify it as well, it makes no sense to me to perform
> (x + (x << 8)) >> 8 when we need to either zero extend or mask the result
> at the end in order to avoid bits from above HImode to affect it, when we
> can do
> (x + (x >> 8)) & 0xff (or zero extension).
> 
> Bootstrapped/regtested on s390x-linux, ok for trunk?
Ok. Thanks for the fix!

Andreas

> 
> 2020-02-01  Jakub jelinek  <jakub@redhat.com>
> 
> 	PR target/93533
> 	* config/s390/s390.md (popcounthi2_z196): Fix up expander to emit
> 	valid RTL to sum up the lowest and second lowest bytes of the popcnt
> 	result.
> 
> --- gcc/config/s390/s390.md.jj	2020-01-12 11:54:36.412413424 +0100
> +++ gcc/config/s390/s390.md	2020-02-01 13:34:21.671431689 +0100
> @@ -11670,21 +11670,28 @@ (define_expand "popcountsi2"
>  })
>  
>  (define_expand "popcounthi2_z196"
> -  [; popcnt op0, op1
> -   (parallel [(set (match_operand:HI 0 "register_operand" "")
> +  [; popcnt op2, op1
> +   (parallel [(set (match_dup 2)
>  		   (unspec:HI [(match_operand:HI 1 "register_operand")]
>  			      UNSPEC_POPCNT))
>  	      (clobber (reg:CC CC_REGNUM))])
> -   ; sllk op2, op0, 8
> -   (set (match_dup 2)
> -	(ashift:SI (match_dup 0) (const_int 8)))
> -   ; ar op0, op2
> -   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
> +   ; lr op3, op2
> +   (set (match_dup 3) (subreg:SI (match_dup 2) 0))
> +   ; srl op4, op3, 8
> +   (set (match_dup 4) (lshiftrt:SI (match_dup 3) (const_int 8)))
> +   ; ar op3, op4
> +   (parallel [(set (match_dup 3) (plus:SI (match_dup 3) (match_dup 4)))
>  	      (clobber (reg:CC CC_REGNUM))])
> -   ; srl op0, op0, 8
> -   (set (match_dup 0) (lshiftrt:HI (match_dup 0) (const_int 8)))]
> +   ; llgc op0, op3
> +   (parallel [(set (match_operand:HI 0 "register_operand" "")
> +		   (and:HI (subreg:HI (match_dup 3) 2) (const_int 255)))
> +	      (clobber (reg:CC CC_REGNUM))])]
>    "TARGET_Z196"
> -  "operands[2] = gen_reg_rtx (SImode);")
> +{
> +  operands[2] = gen_reg_rtx (HImode);
> +  operands[3] = gen_reg_rtx (SImode);
> +  operands[4] = gen_reg_rtx (SImode);
> +})
>  
>  (define_expand "popcounthi2"
>    [(set (match_dup 2)
> --- gcc/testsuite/gcc.c-torture/compile/pr93533.c.jj	2020-02-01 13:44:16.296681108 +0100
> +++ gcc/testsuite/gcc.c-torture/compile/pr93533.c	2020-02-01 13:43:52.034038073 +0100
> @@ -0,0 +1,9 @@
> +/* PR target/93533 */
> +
> +unsigned
> +foo (unsigned short a)
> +{
> +  a = a - (a >> 1 & 21845);
> +  a = (a & 13107) + (a >> 2 & 13107);
> +  return (unsigned short) ((a + (a >> 4) & 3855) * 257) >> 8;
> +}
> --- gcc/testsuite/gcc.target/s390/pr93533.c.jj	2020-02-01 13:45:41.433428499 +0100
> +++ gcc/testsuite/gcc.target/s390/pr93533.c	2020-02-01 13:45:32.984552824 +0100
> @@ -0,0 +1,5 @@
> +/* PR target/93533 */
> +/* { dg-do compile } */
> +/* { dg-options "-march=z196 -O2" } */
> +
> +#include "../../gcc.c-torture/compile/pr93533.c"
> 
> 	Jakub
>
diff mbox series

Patch

--- gcc/config/s390/s390.md.jj	2020-01-12 11:54:36.412413424 +0100
+++ gcc/config/s390/s390.md	2020-02-01 13:34:21.671431689 +0100
@@ -11670,21 +11670,28 @@  (define_expand "popcountsi2"
 })
 
 (define_expand "popcounthi2_z196"
-  [; popcnt op0, op1
-   (parallel [(set (match_operand:HI 0 "register_operand" "")
+  [; popcnt op2, op1
+   (parallel [(set (match_dup 2)
 		   (unspec:HI [(match_operand:HI 1 "register_operand")]
 			      UNSPEC_POPCNT))
 	      (clobber (reg:CC CC_REGNUM))])
-   ; sllk op2, op0, 8
-   (set (match_dup 2)
-	(ashift:SI (match_dup 0) (const_int 8)))
-   ; ar op0, op2
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+   ; lr op3, op2
+   (set (match_dup 3) (subreg:SI (match_dup 2) 0))
+   ; srl op4, op3, 8
+   (set (match_dup 4) (lshiftrt:SI (match_dup 3) (const_int 8)))
+   ; ar op3, op4
+   (parallel [(set (match_dup 3) (plus:SI (match_dup 3) (match_dup 4)))
 	      (clobber (reg:CC CC_REGNUM))])
-   ; srl op0, op0, 8
-   (set (match_dup 0) (lshiftrt:HI (match_dup 0) (const_int 8)))]
+   ; llgc op0, op3
+   (parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (and:HI (subreg:HI (match_dup 3) 2) (const_int 255)))
+	      (clobber (reg:CC CC_REGNUM))])]
   "TARGET_Z196"
-  "operands[2] = gen_reg_rtx (SImode);")
+{
+  operands[2] = gen_reg_rtx (HImode);
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_reg_rtx (SImode);
+})
 
 (define_expand "popcounthi2"
   [(set (match_dup 2)
--- gcc/testsuite/gcc.c-torture/compile/pr93533.c.jj	2020-02-01 13:44:16.296681108 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr93533.c	2020-02-01 13:43:52.034038073 +0100
@@ -0,0 +1,9 @@ 
+/* PR target/93533 */
+
+unsigned
+foo (unsigned short a)
+{
+  a = a - (a >> 1 & 21845);
+  a = (a & 13107) + (a >> 2 & 13107);
+  return (unsigned short) ((a + (a >> 4) & 3855) * 257) >> 8;
+}
--- gcc/testsuite/gcc.target/s390/pr93533.c.jj	2020-02-01 13:45:41.433428499 +0100
+++ gcc/testsuite/gcc.target/s390/pr93533.c	2020-02-01 13:45:32.984552824 +0100
@@ -0,0 +1,5 @@ 
+/* PR target/93533 */
+/* { dg-do compile } */
+/* { dg-options "-march=z196 -O2" } */
+
+#include "../../gcc.c-torture/compile/pr93533.c"