diff mbox

[5.1,rs6000] Fix PR65787

Message ID 1429220775.20720.4.camel@gnopaine
State New
Headers show

Commit Message

Bill Schmidt April 16, 2015, 9:46 p.m. UTC
Hi,

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65787 identifies an issue
where the powerpc64le vector swap optimization miscompiles some code.
The code for handling vector extract operations did not expect to find
those operations wrapped in a PARALLEL with a CLOBBER, but this test
shows that this can now happen.  This patch adds that possibility to the
handling for vector extract.  I've added a small test case to verify
that we now catch this.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk and gcc-5-branch?

After this is complete I will investigate whether we need to backport
this to 4.8 and 4.9 also.

Thanks,
Bill


[gcc]

2015-04-16  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR target/65787
	* config/rs6000/rs6000.c (rtx_is_swappable_p): Handle case where
	vec_extract operation is wrapped in a PARALLEL with a CLOBBER.
	(adjust_extract): Likewise.

[gcc/testsuite]

2015-04-16  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR target/65787
	* gcc.target/powerpc/pr65787.c: New.

Comments

Bill Schmidt April 17, 2015, 12:27 p.m. UTC | #1
Note that Jakub requested a small change in the bugzilla commentary,
which I've implemented.  I'm doing a regstrap now.

Bill

On Thu, 2015-04-16 at 16:46 -0500, Bill Schmidt wrote:
> Hi,
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65787 identifies an issue
> where the powerpc64le vector swap optimization miscompiles some code.
> The code for handling vector extract operations did not expect to find
> those operations wrapped in a PARALLEL with a CLOBBER, but this test
> shows that this can now happen.  This patch adds that possibility to the
> handling for vector extract.  I've added a small test case to verify
> that we now catch this.
> 
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions.  Is this ok for trunk and gcc-5-branch?
> 
> After this is complete I will investigate whether we need to backport
> this to 4.8 and 4.9 also.
> 
> Thanks,
> Bill
> 
> 
> [gcc]
> 
> 2015-04-16  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR target/65787
> 	* config/rs6000/rs6000.c (rtx_is_swappable_p): Handle case where
> 	vec_extract operation is wrapped in a PARALLEL with a CLOBBER.
> 	(adjust_extract): Likewise.
> 
> [gcc/testsuite]
> 
> 2015-04-16  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR target/65787
> 	* gcc.target/powerpc/pr65787.c: New.
> 
> 
> Index: gcc/config/rs6000/rs6000.c
> ===================================================================
> --- gcc/config/rs6000/rs6000.c	(revision 222158)
> +++ gcc/config/rs6000/rs6000.c	(working copy)
> @@ -34204,6 +34204,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
>        else
>  	return 0;
> 
> +    case PARALLEL: {
> +      /* A vec_extract operation may be wrapped in a PARALLEL with a
> +	 clobber, so account for that possibility.  */
> +      unsigned int len = XVECLEN (op, 0);
> +
> +      if (len != 2)
> +	return 0;
> +
> +      if (GET_CODE (XVECEXP (op, 0, 1)) != CLOBBER)
> +	return 0;
> +
> +      return rtx_is_swappable_p (XVECEXP (op, 0, 0), special);
> +    }
> +
>      case UNSPEC:
>        {
>  	/* Various operations are unsafe for this optimization, at least
> @@ -34603,7 +34617,10 @@ permute_store (rtx_insn *insn)
>  static void
>  adjust_extract (rtx_insn *insn)
>  {
> -  rtx src = SET_SRC (PATTERN (insn));
> +  rtx pattern = PATTERN (insn);
> +  if (GET_CODE (pattern) == PARALLEL)
> +    pattern = XVECEXP (pattern, 0, 0);
> +  rtx src = SET_SRC (pattern);
>    /* The vec_select may be wrapped in a vec_duplicate for a splat, so
>       account for that.  */
>    rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
> Index: gcc/testsuite/gcc.target/powerpc/pr65787.c
> ===================================================================
> --- gcc/testsuite/gcc.target/powerpc/pr65787.c	(revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/pr65787.c	(working copy)
> @@ -0,0 +1,21 @@
> +/* { dg-do compile { target { powerpc64le-*-* } } } */
> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
> +/* { dg-options "-mcpu=power8 -O3" } */
> +/* { dg-final { scan-assembler "xxsldwi \[0-9\]*,\[0-9\]*,\[0-9\]*,3" } } */
> +/* { dg-final { scan-assembler-not "xxpermdi" } } */
> +
> +/* This test verifies that a vector extract operand properly has its
> +   lane changed by the swap optimization.  Element 2 of LE corresponds
> +   to element 1 of BE.  When doublewords are swapped, this becomes
> +   element 3 of BE, so we need to shift the vector left by 3 words
> +   to be able to extract the correct value from BE element zero.  */
> +
> +typedef float  v4f32 __attribute__ ((__vector_size__ (16)));
> +
> +void foo (float);
> +extern v4f32 x, y;
> +
> +int main() {
> +  v4f32 z = x + y;
> +  foo (z[2]);
> +}
> 
>
diff mbox

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 222158)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -34204,6 +34204,20 @@  rtx_is_swappable_p (rtx op, unsigned int *special)
       else
 	return 0;
 
+    case PARALLEL: {
+      /* A vec_extract operation may be wrapped in a PARALLEL with a
+	 clobber, so account for that possibility.  */
+      unsigned int len = XVECLEN (op, 0);
+
+      if (len != 2)
+	return 0;
+
+      if (GET_CODE (XVECEXP (op, 0, 1)) != CLOBBER)
+	return 0;
+
+      return rtx_is_swappable_p (XVECEXP (op, 0, 0), special);
+    }
+
     case UNSPEC:
       {
 	/* Various operations are unsafe for this optimization, at least
@@ -34603,7 +34617,10 @@  permute_store (rtx_insn *insn)
 static void
 adjust_extract (rtx_insn *insn)
 {
-  rtx src = SET_SRC (PATTERN (insn));
+  rtx pattern = PATTERN (insn);
+  if (GET_CODE (pattern) == PARALLEL)
+    pattern = XVECEXP (pattern, 0, 0);
+  rtx src = SET_SRC (pattern);
   /* The vec_select may be wrapped in a vec_duplicate for a splat, so
      account for that.  */
   rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
Index: gcc/testsuite/gcc.target/powerpc/pr65787.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr65787.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr65787.c	(working copy)
@@ -0,0 +1,21 @@ 
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3" } */
+/* { dg-final { scan-assembler "xxsldwi \[0-9\]*,\[0-9\]*,\[0-9\]*,3" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* This test verifies that a vector extract operand properly has its
+   lane changed by the swap optimization.  Element 2 of LE corresponds
+   to element 1 of BE.  When doublewords are swapped, this becomes
+   element 3 of BE, so we need to shift the vector left by 3 words
+   to be able to extract the correct value from BE element zero.  */
+
+typedef float  v4f32 __attribute__ ((__vector_size__ (16)));
+
+void foo (float);
+extern v4f32 x, y;
+
+int main() {
+  v4f32 z = x + y;
+  foo (z[2]);
+}