diff mbox series

[rs6000] Gimple folding of splat_uX

Message ID 1512578218.11602.59.camel@brimstone.rchland.ibm.com
State New
Headers show
Series [rs6000] Gimple folding of splat_uX | expand

Commit Message

will schmidt Dec. 6, 2017, 4:36 p.m. UTC
Hi,
Add support for gimple folding of splat_u{8,16,32}.
Testcase coverage is primarily handled by existing tests
testsuite/gcc.target/powerpc/fold-vec-splat_*.c
    
One new test added to verify we continue to receive
an 'invalid argument, must be a 5-bit immediate' error
when we try to splat a non-constant value.
    
Regtests currently running across assorted power systems.
OK for trunk with successful results?
   
Thanks
-Will
    
[gcc]

2017-12-05  Will Schmidt  <will_schmidt@vnet.ibm.com>
    
	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
	early folding of splat_u{8,16,32}.
    
[testsuite]

2017-12-05  Will Schmidt  <will_schmidt@vnet.ibm.com>

	* gcc.target/powerpc/fold-vec-splat-misc-invalid.c: New.

Comments

Richard Biener Dec. 7, 2017, 11:21 a.m. UTC | #1
On Wed, Dec 6, 2017 at 5:36 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> Hi,
> Add support for gimple folding of splat_u{8,16,32}.
> Testcase coverage is primarily handled by existing tests
> testsuite/gcc.target/powerpc/fold-vec-splat_*.c
>
> One new test added to verify we continue to receive
> an 'invalid argument, must be a 5-bit immediate' error
> when we try to splat a non-constant value.
>
> Regtests currently running across assorted power systems.
> OK for trunk with successful results?
>
> Thanks
> -Will
>
> [gcc]
>
> 2017-12-05  Will Schmidt  <will_schmidt@vnet.ibm.com>
>
>         * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
>         early folding of splat_u{8,16,32}.
>
> [testsuite]
>
> 2017-12-05  Will Schmidt  <will_schmidt@vnet.ibm.com>
>
>         * gcc.target/powerpc/fold-vec-splat-misc-invalid.c: New.
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 045a014..1470557 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -16614,10 +16614,33 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case VSX_BUILTIN_CMPLE_2DI:
>      case VSX_BUILTIN_CMPLE_U2DI:
>        fold_compare_helper (gsi, LE_EXPR, stmt);
>        return true;
>
> +    /* flavors of vec_splat_[us]{8,16,32}.  */
> +    case ALTIVEC_BUILTIN_VSPLTISB:
> +    case ALTIVEC_BUILTIN_VSPLTISH:
> +    case ALTIVEC_BUILTIN_VSPLTISW:
> +      {
> +        arg0 = gimple_call_arg (stmt, 0);
> +        lhs = gimple_call_lhs (stmt);
> +        /* Only fold the vec_splat_*() if arg0 is constant.  */
> +        if ( TREE_CODE (arg0) != INTEGER_CST)
> +          return false;

Is there a reason to not do this for non-constants?  (even not for
float constants?)
You should probably double-check there is a LHS, folding runs before DCE.

> +        tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> +                                          TREE_INT_CST_LOW (arg0));
> +        vec<constructor_elt, va_gc> *ctor_elts = NULL;
> +        unsigned int n_elts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs));
> +        for (unsigned int i=0; i < n_elts ; i++)
> +          CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, splat_value);
> +        tree splat_tree = build_constructor (TREE_TYPE (lhs), ctor_elts);

Just use

           tree slat_tree = build_vector_from_val (splat_value);

that would also work for non-constants btw.

> +        g = gimple_build_assign (lhs, splat_tree);
> +        gimple_set_location (g, gimple_location (stmt));
> +        gsi_replace (gsi, g, true);
> +        return true;
> +      }
> +
>      default:
>        if (TARGET_DEBUG_BUILTIN)
>         fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
>                  fn_code, fn_name1, fn_name2);
>        break;
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> new file mode 100644
> index 0000000..20f5b05
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> @@ -0,0 +1,33 @@
> +/* Verify that overloaded built-ins for vec_splat_s8 and vec_splat_s16
> +   generate errors as expected when we attempt to use invalid inputs.  */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-mvsx -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed short
> +testss_1 (unsigned int ui)
> +{
> +  return vec_splat_s16 (ui);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
> +}
> +
> +vector unsigned short
> +testss_2 (signed int si)
> +{
> +  return vec_splat_u16 (si);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
> +}
> +
> +vector signed char
> +testsc_1 (unsigned int ui)
> +{
> +  return vec_splat_s8 (ui); /* { dg-error "argument 1 must be a 5-bit signed literal" } */
> +}
> +
> +vector unsigned char
> +testsc_2 (signed int si)
> +{
> +  return vec_splat_u8 (si);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
> +}
> +
>
>
Jakub Jelinek Dec. 7, 2017, 11:26 a.m. UTC | #2
On Thu, Dec 07, 2017 at 12:21:03PM +0100, Richard Biener wrote:
> > +        if ( TREE_CODE (arg0) != INTEGER_CST)

Also watch formatting, the space after ( doesn't belong there.

> > +          return false;
> 
> Is there a reason to not do this for non-constants?  (even not for
> float constants?)
> You should probably double-check there is a LHS, folding runs before DCE.
> 
> > +        tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> > +                                          TREE_INT_CST_LOW (arg0));

	tree splat_value = fold_convert (TREE_TYPE (TREE_TYPE (lhs)), arg0);
?
TREE_INT_CST_LOW should not be used unless necessary, e.g. it can throw
the upper bits of large constants, even if the type is wide.

	Jakub
Richard Biener Dec. 7, 2017, 11:35 a.m. UTC | #3
On Thu, Dec 7, 2017 at 12:26 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Dec 07, 2017 at 12:21:03PM +0100, Richard Biener wrote:
>> > +        if ( TREE_CODE (arg0) != INTEGER_CST)
>
> Also watch formatting, the space after ( doesn't belong there.
>
>> > +          return false;
>>
>> Is there a reason to not do this for non-constants?  (even not for
>> float constants?)
>> You should probably double-check there is a LHS, folding runs before DCE.
>>
>> > +        tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
>> > +                                          TREE_INT_CST_LOW (arg0));
>
>         tree splat_value = fold_convert (TREE_TYPE (TREE_TYPE (lhs)), arg0);
> ?
> TREE_INT_CST_LOW should not be used unless necessary, e.g. it can throw
> the upper bits of large constants, even if the type is wide.

Indeed.  For non-constants even better use gimple_convert () which will emit
a separate stmt when necessary.

Richard.

>         Jakub
Segher Boessenkool Dec. 7, 2017, 11:53 a.m. UTC | #4
Hi!

On Wed, Dec 06, 2017 at 10:36:58AM -0600, Will Schmidt wrote:
> Add support for gimple folding of splat_u{8,16,32}.
> Testcase coverage is primarily handled by existing tests
> testsuite/gcc.target/powerpc/fold-vec-splat_*.c
>     
> One new test added to verify we continue to receive
> an 'invalid argument, must be a 5-bit immediate' error
> when we try to splat a non-constant value.

I don't have much to add, maybe Bill does?  On the next version then I
guess.  Some more formatting stuff:

> +	 arg0 = gimple_call_arg (stmt, 0);
> +	 lhs = gimple_call_lhs (stmt);
> +	 /* Only fold the vec_splat_*() if arg0 is constant.  */
> +	 if ( TREE_CODE (arg0) != INTEGER_CST)
> +	   return false;
> +	 tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> +					   TREE_INT_CST_LOW (arg0));
> +	 vec<constructor_elt, va_gc> *ctor_elts = NULL;
> +	 unsigned int n_elts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs));
> +	 for (unsigned int i=0; i < n_elts ; i++)

+	 for (unsigned int i = 0; i < n_elts; i++)

Well that's all ;-)


Segher
will schmidt Dec. 7, 2017, 3:13 p.m. UTC | #5
On Thu, 2017-12-07 at 12:21 +0100, Richard Biener wrote:
> On Wed, Dec 6, 2017 at 5:36 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> > Hi,
> > Add support for gimple folding of splat_u{8,16,32}.
> > Testcase coverage is primarily handled by existing tests
> > testsuite/gcc.target/powerpc/fold-vec-splat_*.c
> >
> > One new test added to verify we continue to receive
> > an 'invalid argument, must be a 5-bit immediate' error
> > when we try to splat a non-constant value.
> >
> > Regtests currently running across assorted power systems.
> > OK for trunk with successful results?
> >
> > Thanks
> > -Will
> >
> > [gcc]
> >
> > 2017-12-05  Will Schmidt  <will_schmidt@vnet.ibm.com>
> >
> >         * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
> >         early folding of splat_u{8,16,32}.
> >
> > [testsuite]
> >
> > 2017-12-05  Will Schmidt  <will_schmidt@vnet.ibm.com>
> >
> >         * gcc.target/powerpc/fold-vec-splat-misc-invalid.c: New.
> >
> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > index 045a014..1470557 100644
> > --- a/gcc/config/rs6000/rs6000.c
> > +++ b/gcc/config/rs6000/rs6000.c
> > @@ -16614,10 +16614,33 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >      case VSX_BUILTIN_CMPLE_2DI:
> >      case VSX_BUILTIN_CMPLE_U2DI:
> >        fold_compare_helper (gsi, LE_EXPR, stmt);
> >        return true;
> >
> > +    /* flavors of vec_splat_[us]{8,16,32}.  */
> > +    case ALTIVEC_BUILTIN_VSPLTISB:
> > +    case ALTIVEC_BUILTIN_VSPLTISH:
> > +    case ALTIVEC_BUILTIN_VSPLTISW:
> > +      {
> > +        arg0 = gimple_call_arg (stmt, 0);
> > +        lhs = gimple_call_lhs (stmt);
> > +        /* Only fold the vec_splat_*() if arg0 is constant.  */
> > +        if ( TREE_CODE (arg0) != INTEGER_CST)
> > +          return false;
> 
> Is there a reason to not do this for non-constants?  (even not for
> float constants?)

The full regtest failed elsewhere in the suite if I didn't limit this to
just constants, which is part of why I also added the dg-error checking
test.  [Something in the area of gcc/config/rs6000/emmintrin.h
_mm_slli_epi16() which calls vec_splat_s16(__B), iirc.]  The net of that
seemed to be that I had folded something I shouldn't have.  I'll
incorporate the other suggested changes from you and Jakub and Segher,
and double-check the results.

> You should probably double-check there is a LHS, folding runs before DCE.
Anything without a LHS gets rejected early in
rs6000_gimple_fold_builtin() , so I should be covered there.  :-)


> > +        tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> > +                                          TREE_INT_CST_LOW (arg0));
> > +        vec<constructor_elt, va_gc> *ctor_elts = NULL;
> > +        unsigned int n_elts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs));
> > +        for (unsigned int i=0; i < n_elts ; i++)
> > +          CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, splat_value);
> > +        tree splat_tree = build_constructor (TREE_TYPE (lhs), ctor_elts);
> 
> Just use
> 
>            tree slat_tree = build_vector_from_val (splat_value);
> 
> that would also work for non-constants btw.

Alright, thanks. :-)

-Will

> 
> > +        g = gimple_build_assign (lhs, splat_tree);
> > +        gimple_set_location (g, gimple_location (stmt));
> > +        gsi_replace (gsi, g, true);
> > +        return true;
> > +      }
> > +
> >      default:
> >        if (TARGET_DEBUG_BUILTIN)
> >         fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> >                  fn_code, fn_name1, fn_name2);
> >        break;
> > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> > new file mode 100644
> > index 0000000..20f5b05
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> > @@ -0,0 +1,33 @@
> > +/* Verify that overloaded built-ins for vec_splat_s8 and vec_splat_s16
> > +   generate errors as expected when we attempt to use invalid inputs.  */
> > +
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target powerpc_vsx_ok } */
> > +/* { dg-options "-mvsx -O2" } */
> > +
> > +#include <altivec.h>
> > +
> > +vector signed short
> > +testss_1 (unsigned int ui)
> > +{
> > +  return vec_splat_s16 (ui);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
> > +}
> > +
> > +vector unsigned short
> > +testss_2 (signed int si)
> > +{
> > +  return vec_splat_u16 (si);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
> > +}
> > +
> > +vector signed char
> > +testsc_1 (unsigned int ui)
> > +{
> > +  return vec_splat_s8 (ui); /* { dg-error "argument 1 must be a 5-bit signed literal" } */
> > +}
> > +
> > +vector unsigned char
> > +testsc_2 (signed int si)
> > +{
> > +  return vec_splat_u8 (si);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
> > +}
> > +
> >
> >
>
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 045a014..1470557 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16614,10 +16614,33 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case VSX_BUILTIN_CMPLE_2DI:
     case VSX_BUILTIN_CMPLE_U2DI:
       fold_compare_helper (gsi, LE_EXPR, stmt);
       return true;
 
+    /* flavors of vec_splat_[us]{8,16,32}.  */
+    case ALTIVEC_BUILTIN_VSPLTISB:
+    case ALTIVEC_BUILTIN_VSPLTISH:
+    case ALTIVEC_BUILTIN_VSPLTISW:
+      {
+	 arg0 = gimple_call_arg (stmt, 0);
+	 lhs = gimple_call_lhs (stmt);
+	 /* Only fold the vec_splat_*() if arg0 is constant.  */
+	 if ( TREE_CODE (arg0) != INTEGER_CST)
+	   return false;
+	 tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
+					   TREE_INT_CST_LOW (arg0));
+	 vec<constructor_elt, va_gc> *ctor_elts = NULL;
+	 unsigned int n_elts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs));
+	 for (unsigned int i=0; i < n_elts ; i++)
+	   CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, splat_value);
+	 tree splat_tree = build_constructor (TREE_TYPE (lhs), ctor_elts);
+	 g = gimple_build_assign (lhs, splat_tree);
+	 gimple_set_location (g, gimple_location (stmt));
+	 gsi_replace (gsi, g, true);
+	 return true;
+      }
+
     default:
       if (TARGET_DEBUG_BUILTIN)
 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
 		 fn_code, fn_name1, fn_name2);
       break;
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
new file mode 100644
index 0000000..20f5b05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
@@ -0,0 +1,33 @@ 
+/* Verify that overloaded built-ins for vec_splat_s8 and vec_splat_s16
+   generate errors as expected when we attempt to use invalid inputs.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mvsx -O2" } */
+
+#include <altivec.h>
+
+vector signed short
+testss_1 (unsigned int ui)
+{
+  return vec_splat_s16 (ui);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
+}
+
+vector unsigned short
+testss_2 (signed int si)
+{
+  return vec_splat_u16 (si);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
+}
+
+vector signed char
+testsc_1 (unsigned int ui)
+{
+  return vec_splat_s8 (ui); /* { dg-error "argument 1 must be a 5-bit signed literal" } */
+}
+
+vector unsigned char
+testsc_2 (signed int si)
+{
+  return vec_splat_u8 (si);/* { dg-error "argument 1 must be a 5-bit signed literal" } */
+}
+