diff mbox series

IBM Z: Fix testcase vcond-shift.c

Message ID 20210301160014.1823864-1-stefansf@linux.ibm.com
State New
Headers show
Series IBM Z: Fix testcase vcond-shift.c | expand

Commit Message

Stefan Schulze Frielinghaus March 1, 2021, 4 p.m. UTC
As of commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 expressions
x CMP y ? -1 : 0 are fold into x CMP y.  Due to this we do not see
shifts anymore after expand in our testcases but comparisons.  Thus
replace instructions vesraX by corresponding vchX.  Keep testcases
vchX_{lt,gt} where only a relational comparison is done and no shift in
order to keep test coverage for vectorization.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/vector/vcond-shift.c: Replace vesraX
	instructions by corresponding vchX instructions.
---
 .../gcc.target/s390/vector/vcond-shift.c      | 31 ++++++++++---------
 1 file changed, 17 insertions(+), 14 deletions(-)

Comments

Andreas Krebbel March 2, 2021, 7:08 a.m. UTC | #1
On 3/1/21 5:00 PM, Stefan Schulze Frielinghaus wrote:
> As of commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 expressions
> x CMP y ? -1 : 0 are fold into x CMP y.  Due to this we do not see
> shifts anymore after expand in our testcases but comparisons.  Thus
> replace instructions vesraX by corresponding vchX.  Keep testcases
> vchX_{lt,gt} where only a relational comparison is done and no shift in
> order to keep test coverage for vectorization.

The vcond-shift optimization verified by the testcase is currently implemented in s390_expand_vcond
but due to the common code change we go the vec_cmp route now. So we probably should do the same
also in s390_expand_vec_compare now. Perhaps like this ... it appears to fix the testcase for me:

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 9d2cee950d0b..9d9f5a0f6f4e 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -6562,6 +6562,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,

   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
     {
+      cmp_op2 = force_operand (cmp_op2, 0);
       switch (cond)
        {
          /* NE a != b -> !(a == b) */
@@ -6600,6 +6601,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
     }
   else
     {
+      /* Turn x < 0 into x >> (bits - )  */
+      if (cond == LT && cmp_op2 == CONST0_RTX (mode))
+       {
+         int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
+         rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
+                                        GEN_INT (shift), target,
+                                        0, OPTAB_DIRECT);
+         if (res != target)
+           emit_move_insn (target, res);
+         return;
+       }
+      cmp_op2 = force_operand (cmp_op2, 0);
+
       switch (cond)
        {
          /* NE: a != b -> !(a == b) */
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index bc52211c55e5..c80d582a300d 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -1589,7 +1589,7 @@
   [(set (match_operand:<TOINTVEC>  0 "register_operand" "")
        (match_operator:<TOINTVEC> 1 "vcond_comparison_operator"
          [(match_operand:V_HW     2 "register_operand" "")
-          (match_operand:V_HW     3 "register_operand" "")]))]
+          (match_operand:V_HW     3 "nonmemory_operand" "")]))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);

Andreas


> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/s390/vector/vcond-shift.c: Replace vesraX
> 	instructions by corresponding vchX instructions.
> ---
>  .../gcc.target/s390/vector/vcond-shift.c      | 31 ++++++++++---------
>  1 file changed, 17 insertions(+), 14 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> index a6b4e97aa50..9e472aef960 100644
> --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> @@ -3,10 +3,13 @@
>  /* { dg-do compile { target { s390*-*-* } } } */
>  /* { dg-options "-O3 -march=z13 -mzarch" } */
>  
> -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
> -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
> -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
> -/* { dg-final { scan-assembler-not "vzero\t*" } } */
> +/* { dg-final { scan-assembler-times "vzero\t" 9 } } */
> +/* { dg-final { scan-assembler-times "vchf\t" 6 } } */
> +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,1" 2 } } */
> +/* { dg-final { scan-assembler-times "vchh\t" 6 } } */
> +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,1" 2 } } */
> +/* { dg-final { scan-assembler-times "vchb\t" 6 } } */
> +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,1" 2 } } */
>  /* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
>  /* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
>  /* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
> @@ -15,19 +18,19 @@
>  #define ITER(X) (2 * (16 / sizeof (X[1])))
>  
>  void
> -vesraf_div (int *x)
> +vchf_vesraf_div (int *x)
>  {
>    int i;
>    int *xx = __builtin_assume_aligned (x, 8);
>  
>    /* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1
> -     which in turn should get simplified to (xx + (xx >> 31)) >> 1.  */
> +     which in turn should get simplified to (xx - (xx < 0)) >> 1.  */
>    for (i = 0; i < ITER (xx); i++)
>      xx[i] = xx[i] / 2;
>  }
>  
>  void
> -vesrah_div (short *x)
> +vchh_vesrah_div (short *x)
>  {
>    int i;
>    short *xx = __builtin_assume_aligned (x, 8);
> @@ -38,7 +41,7 @@ vesrah_div (short *x)
>  
>  
>  void
> -vesrab_div (signed char *x)
> +vchb_vesrab_div (signed char *x)
>  {
>    int i;
>    signed char *xx = __builtin_assume_aligned (x, 8);
> @@ -50,7 +53,7 @@ vesrab_div (signed char *x)
>  
>  
>  int
> -vesraf_lt (int *x)
> +vchf_lt (int *x)
>  {
>    int i;
>    int *xx = __builtin_assume_aligned (x, 8);
> @@ -60,7 +63,7 @@ vesraf_lt (int *x)
>  }
>  
>  int
> -vesrah_lt (short *x)
> +vchh_lt (short *x)
>  {
>    int i;
>    short *xx = __builtin_assume_aligned (x, 8);
> @@ -70,7 +73,7 @@ vesrah_lt (short *x)
>  }
>  
>  int
> -vesrab_lt (signed char *x)
> +vchb_lt (signed char *x)
>  {
>    int i;
>    signed char *xx = __builtin_assume_aligned (x, 8);
> @@ -82,7 +85,7 @@ vesrab_lt (signed char *x)
>  
>  
>  int
> -vesraf_ge (int *x)
> +vchf_ge (int *x)
>  {
>    int i;
>    int *xx = __builtin_assume_aligned (x, 8);
> @@ -92,7 +95,7 @@ vesraf_ge (int *x)
>  }
>  
>  int
> -vesrah_ge (short *x)
> +vchh_ge (short *x)
>  {
>    int i;
>    short *xx = __builtin_assume_aligned (x, 8);
> @@ -102,7 +105,7 @@ vesrah_ge (short *x)
>  }
>  
>  int
> -vesrab_ge (signed char *x)
> +vchb_ge (signed char *x)
>  {
>    int i;
>    signed char *xx = __builtin_assume_aligned (x, 8);
>
Stefan Schulze Frielinghaus March 2, 2021, 7:42 a.m. UTC | #2
On Tue, Mar 02, 2021 at 08:08:14AM +0100, Andreas Krebbel wrote:
> On 3/1/21 5:00 PM, Stefan Schulze Frielinghaus wrote:
> > As of commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 expressions
> > x CMP y ? -1 : 0 are fold into x CMP y.  Due to this we do not see
> > shifts anymore after expand in our testcases but comparisons.  Thus
> > replace instructions vesraX by corresponding vchX.  Keep testcases
> > vchX_{lt,gt} where only a relational comparison is done and no shift in
> > order to keep test coverage for vectorization.
> 
> The vcond-shift optimization verified by the testcase is currently implemented in s390_expand_vcond
> but due to the common code change we go the vec_cmp route now. So we probably should do the same
> also in s390_expand_vec_compare now. Perhaps like this ... it appears to fix the testcase for me:
> 
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 9d2cee950d0b..9d9f5a0f6f4e 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -6562,6 +6562,7 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
> 
>    if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
>      {
> +      cmp_op2 = force_operand (cmp_op2, 0);
>        switch (cond)
>         {
>           /* NE a != b -> !(a == b) */
> @@ -6600,6 +6601,19 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
>      }
>    else
>      {
> +      /* Turn x < 0 into x >> (bits - )  */
> +      if (cond == LT && cmp_op2 == CONST0_RTX (mode))
> +       {
> +         int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
> +         rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
> +                                        GEN_INT (shift), target,
> +                                        0, OPTAB_DIRECT);
> +         if (res != target)
> +           emit_move_insn (target, res);
> +         return;
> +       }
> +      cmp_op2 = force_operand (cmp_op2, 0);
> +
>        switch (cond)
>         {
>           /* NE: a != b -> !(a == b) */
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index bc52211c55e5..c80d582a300d 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -1589,7 +1589,7 @@
>    [(set (match_operand:<TOINTVEC>  0 "register_operand" "")
>         (match_operator:<TOINTVEC> 1 "vcond_comparison_operator"
>           [(match_operand:V_HW     2 "register_operand" "")
> -          (match_operand:V_HW     3 "register_operand" "")]))]
> +          (match_operand:V_HW     3 "nonmemory_operand" "")]))]
>    "TARGET_VX"
>  {
>    s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);

Sounds great to me.  Also eliminates the extra vzero :)

Cheers,
Stefan

> 
> Andreas
> 
> 
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 	* gcc.target/s390/vector/vcond-shift.c: Replace vesraX
> > 	instructions by corresponding vchX instructions.
> > ---
> >  .../gcc.target/s390/vector/vcond-shift.c      | 31 ++++++++++---------
> >  1 file changed, 17 insertions(+), 14 deletions(-)
> > 
> > diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > index a6b4e97aa50..9e472aef960 100644
> > --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > @@ -3,10 +3,13 @@
> >  /* { dg-do compile { target { s390*-*-* } } } */
> >  /* { dg-options "-O3 -march=z13 -mzarch" } */
> >  
> > -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
> > -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
> > -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
> > -/* { dg-final { scan-assembler-not "vzero\t*" } } */
> > +/* { dg-final { scan-assembler-times "vzero\t" 9 } } */
> > +/* { dg-final { scan-assembler-times "vchf\t" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,1" 2 } } */
> > +/* { dg-final { scan-assembler-times "vchh\t" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,1" 2 } } */
> > +/* { dg-final { scan-assembler-times "vchb\t" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,1" 2 } } */
> >  /* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
> >  /* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
> >  /* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
> > @@ -15,19 +18,19 @@
> >  #define ITER(X) (2 * (16 / sizeof (X[1])))
> >  
> >  void
> > -vesraf_div (int *x)
> > +vchf_vesraf_div (int *x)
> >  {
> >    int i;
> >    int *xx = __builtin_assume_aligned (x, 8);
> >  
> >    /* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1
> > -     which in turn should get simplified to (xx + (xx >> 31)) >> 1.  */
> > +     which in turn should get simplified to (xx - (xx < 0)) >> 1.  */
> >    for (i = 0; i < ITER (xx); i++)
> >      xx[i] = xx[i] / 2;
> >  }
> >  
> >  void
> > -vesrah_div (short *x)
> > +vchh_vesrah_div (short *x)
> >  {
> >    int i;
> >    short *xx = __builtin_assume_aligned (x, 8);
> > @@ -38,7 +41,7 @@ vesrah_div (short *x)
> >  
> >  
> >  void
> > -vesrab_div (signed char *x)
> > +vchb_vesrab_div (signed char *x)
> >  {
> >    int i;
> >    signed char *xx = __builtin_assume_aligned (x, 8);
> > @@ -50,7 +53,7 @@ vesrab_div (signed char *x)
> >  
> >  
> >  int
> > -vesraf_lt (int *x)
> > +vchf_lt (int *x)
> >  {
> >    int i;
> >    int *xx = __builtin_assume_aligned (x, 8);
> > @@ -60,7 +63,7 @@ vesraf_lt (int *x)
> >  }
> >  
> >  int
> > -vesrah_lt (short *x)
> > +vchh_lt (short *x)
> >  {
> >    int i;
> >    short *xx = __builtin_assume_aligned (x, 8);
> > @@ -70,7 +73,7 @@ vesrah_lt (short *x)
> >  }
> >  
> >  int
> > -vesrab_lt (signed char *x)
> > +vchb_lt (signed char *x)
> >  {
> >    int i;
> >    signed char *xx = __builtin_assume_aligned (x, 8);
> > @@ -82,7 +85,7 @@ vesrab_lt (signed char *x)
> >  
> >  
> >  int
> > -vesraf_ge (int *x)
> > +vchf_ge (int *x)
> >  {
> >    int i;
> >    int *xx = __builtin_assume_aligned (x, 8);
> > @@ -92,7 +95,7 @@ vesraf_ge (int *x)
> >  }
> >  
> >  int
> > -vesrah_ge (short *x)
> > +vchh_ge (short *x)
> >  {
> >    int i;
> >    short *xx = __builtin_assume_aligned (x, 8);
> > @@ -102,7 +105,7 @@ vesrah_ge (short *x)
> >  }
> >  
> >  int
> > -vesrab_ge (signed char *x)
> > +vchb_ge (signed char *x)
> >  {
> >    int i;
> >    signed char *xx = __builtin_assume_aligned (x, 8);
> > 
>
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
index a6b4e97aa50..9e472aef960 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
@@ -3,10 +3,13 @@ 
 /* { dg-do compile { target { s390*-*-* } } } */
 /* { dg-options "-O3 -march=z13 -mzarch" } */
 
-/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
-/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
-/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
-/* { dg-final { scan-assembler-not "vzero\t*" } } */
+/* { dg-final { scan-assembler-times "vzero\t" 9 } } */
+/* { dg-final { scan-assembler-times "vchf\t" 6 } } */
+/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,1" 2 } } */
+/* { dg-final { scan-assembler-times "vchh\t" 6 } } */
+/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,1" 2 } } */
+/* { dg-final { scan-assembler-times "vchb\t" 6 } } */
+/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,1" 2 } } */
 /* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
 /* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
 /* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
@@ -15,19 +18,19 @@ 
 #define ITER(X) (2 * (16 / sizeof (X[1])))
 
 void
-vesraf_div (int *x)
+vchf_vesraf_div (int *x)
 {
   int i;
   int *xx = __builtin_assume_aligned (x, 8);
 
   /* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1
-     which in turn should get simplified to (xx + (xx >> 31)) >> 1.  */
+     which in turn should get simplified to (xx - (xx < 0)) >> 1.  */
   for (i = 0; i < ITER (xx); i++)
     xx[i] = xx[i] / 2;
 }
 
 void
-vesrah_div (short *x)
+vchh_vesrah_div (short *x)
 {
   int i;
   short *xx = __builtin_assume_aligned (x, 8);
@@ -38,7 +41,7 @@  vesrah_div (short *x)
 
 
 void
-vesrab_div (signed char *x)
+vchb_vesrab_div (signed char *x)
 {
   int i;
   signed char *xx = __builtin_assume_aligned (x, 8);
@@ -50,7 +53,7 @@  vesrab_div (signed char *x)
 
 
 int
-vesraf_lt (int *x)
+vchf_lt (int *x)
 {
   int i;
   int *xx = __builtin_assume_aligned (x, 8);
@@ -60,7 +63,7 @@  vesraf_lt (int *x)
 }
 
 int
-vesrah_lt (short *x)
+vchh_lt (short *x)
 {
   int i;
   short *xx = __builtin_assume_aligned (x, 8);
@@ -70,7 +73,7 @@  vesrah_lt (short *x)
 }
 
 int
-vesrab_lt (signed char *x)
+vchb_lt (signed char *x)
 {
   int i;
   signed char *xx = __builtin_assume_aligned (x, 8);
@@ -82,7 +85,7 @@  vesrab_lt (signed char *x)
 
 
 int
-vesraf_ge (int *x)
+vchf_ge (int *x)
 {
   int i;
   int *xx = __builtin_assume_aligned (x, 8);
@@ -92,7 +95,7 @@  vesraf_ge (int *x)
 }
 
 int
-vesrah_ge (short *x)
+vchh_ge (short *x)
 {
   int i;
   short *xx = __builtin_assume_aligned (x, 8);
@@ -102,7 +105,7 @@  vesrah_ge (short *x)
 }
 
 int
-vesrab_ge (signed char *x)
+vchb_ge (signed char *x)
 {
   int i;
   signed char *xx = __builtin_assume_aligned (x, 8);