diff mbox series

Fix PR90006

Message ID alpine.LSU.2.20.1904081512380.27537@zhemvz.fhfr.qr
State New
Headers show
Series Fix PR90006 | expand

Commit Message

Richard Biener April 8, 2019, 1:13 p.m. UTC
The following fixes SLP vectorization to properly consider
calls like lrint demoting on ilp32 targets.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2019-04-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/90006
	* tree-vect-data-refs.c (vect_get_smallest_scalar_type): Handle
	calls like lrint.

	* gcc.dg/vect/bb-slp-pr90006.c: New testcase.

Comments

Richard Sandiford April 8, 2019, 8:59 p.m. UTC | #1
Richard Biener <rguenther@suse.de> writes:
> The following fixes SLP vectorization to properly consider
> calls like lrint demoting on ilp32 targets.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Richard.
>
> 2019-04-08  Richard Biener  <rguenther@suse.de>
>
> 	PR tree-optimization/90006
> 	* tree-vect-data-refs.c (vect_get_smallest_scalar_type): Handle
> 	calls like lrint.
>
> 	* gcc.dg/vect/bb-slp-pr90006.c: New testcase.
>
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c	(revision 270202)
> +++ gcc/tree-vect-data-refs.c	(working copy)
> @@ -144,6 +144,15 @@ vect_get_smallest_scalar_type (gimple *s
>        if (rhs < lhs)
>          scalar_type = rhs_type;
>      }
> +  else if (is_gimple_call (stmt)
> +	   && gimple_call_num_args (stmt) > 0)
> +    {
> +      tree rhs_type = TREE_TYPE (gimple_call_arg (stmt, 0));
> +
> +      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
> +      if (rhs < lhs)
> +        scalar_type = rhs_type;
> +    }
>  
>    *lhs_size_unit = lhs;
>    *rhs_size_unit = rhs;

Looks like this causes a few regressions on SVE.  One is that we reach
here before doing much sanity checking, so for SLP we can see vectorised
calls with variable-length parameters.  That's easily fixed with the
same tree_fits_uhwi_p as for lhs.

The more difficult one is that we have various internal functions
whose first parameter isn't interesting for finding vector types.
E.g. for conditional internal functions like IFN_COND_DIV, the first
parameter is the boolean condition.  Using that here meant we ended up
thinking we needed vectors of 8-bit data, and so had multiple copies for
wider elements.  (The idea instead is that the condition width adapts
to match the data.)

I think the same thing could happen for masked loads and stores,
where the first parameter is a pointer.  E.g. if we have a 64-bit
load or store on an ILP32 target, we might end up thinking that we
need vectors of 32-bit elements when we don't.

The patch below fixes the cases caught by the testsuite, but I'm
not sure whether there are others lurking.  I guess the problem is
that returning a narrower type than necessary is really a QoI thing:
we just end up unrolling/interleaving the loop more times than
necessary, and most tests wouldn't pick that up.  (Given the PRs
about unrolling, it might accidentally be a good thing. :-))

Tested on aarch64-linux-gnu (with and without SVE) and x86_64-linux-gnu.
OK to install?

Richard


2019-04-08  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-data-refs.c (vect_get_smallest_scalar_type): Always
	use gimple_expr_type for load and store calls.  Skip over the
	condition argument in a conditional internal function.
	Protect use of TREE_INT_CST_LOW.

Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	2019-04-08 21:55:28.062370229 +0100
+++ gcc/tree-vect-data-refs.c	2019-04-08 21:55:34.382348514 +0100
@@ -145,14 +145,29 @@ vect_get_smallest_scalar_type (stmt_vec_
       if (rhs < lhs)
         scalar_type = rhs_type;
     }
-  else if (is_gimple_call (stmt_info->stmt)
-	   && gimple_call_num_args (stmt_info->stmt) > 0)
+  else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
     {
-      tree rhs_type = TREE_TYPE (gimple_call_arg (stmt_info->stmt, 0));
-
-      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
-      if (rhs < lhs)
-        scalar_type = rhs_type;
+      unsigned int i = 0;
+      if (gimple_call_internal_p (call))
+	{
+	  internal_fn ifn = gimple_call_internal_fn (call);
+	  if (internal_load_fn_p (ifn) || internal_store_fn_p (ifn))
+	    /* gimple_expr_type already picked the type of the loaded
+	       or stored data.  */
+	    i = ~0U;
+	  else if (internal_fn_mask_index (ifn) == 0)
+	    i = 1;
+	}
+      if (i < gimple_call_num_args (call))
+	{
+	  tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
+	  if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
+	    {
+	      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
+	      if (rhs < lhs)
+		scalar_type = rhs_type;
+	    }
+	}
     }
 
   *lhs_size_unit = lhs;
Richard Biener April 9, 2019, 6:42 a.m. UTC | #2
On April 8, 2019 10:59:49 PM GMT+02:00, Richard Sandiford <richard.sandiford@arm.com> wrote:
>Richard Biener <rguenther@suse.de> writes:
>> The following fixes SLP vectorization to properly consider
>> calls like lrint demoting on ilp32 targets.
>>
>> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>>
>> Richard.
>>
>> 2019-04-08  Richard Biener  <rguenther@suse.de>
>>
>> 	PR tree-optimization/90006
>> 	* tree-vect-data-refs.c (vect_get_smallest_scalar_type): Handle
>> 	calls like lrint.
>>
>> 	* gcc.dg/vect/bb-slp-pr90006.c: New testcase.
>>
>> Index: gcc/tree-vect-data-refs.c
>> ===================================================================
>> --- gcc/tree-vect-data-refs.c	(revision 270202)
>> +++ gcc/tree-vect-data-refs.c	(working copy)
>> @@ -144,6 +144,15 @@ vect_get_smallest_scalar_type (gimple *s
>>        if (rhs < lhs)
>>          scalar_type = rhs_type;
>>      }
>> +  else if (is_gimple_call (stmt)
>> +	   && gimple_call_num_args (stmt) > 0)
>> +    {
>> +      tree rhs_type = TREE_TYPE (gimple_call_arg (stmt, 0));
>> +
>> +      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
>> +      if (rhs < lhs)
>> +        scalar_type = rhs_type;
>> +    }
>>  
>>    *lhs_size_unit = lhs;
>>    *rhs_size_unit = rhs;
>
>Looks like this causes a few regressions on SVE.  One is that we reach
>here before doing much sanity checking, so for SLP we can see
>vectorised
>calls with variable-length parameters.  That's easily fixed with the
>same tree_fits_uhwi_p as for lhs.
>
>The more difficult one is that we have various internal functions
>whose first parameter isn't interesting for finding vector types.
>E.g. for conditional internal functions like IFN_COND_DIV, the first
>parameter is the boolean condition.  Using that here meant we ended up
>thinking we needed vectors of 8-bit data, and so had multiple copies
>for
>wider elements.  (The idea instead is that the condition width adapts
>to match the data.)
>
>I think the same thing could happen for masked loads and stores,
>where the first parameter is a pointer.  E.g. if we have a 64-bit
>load or store on an ILP32 target, we might end up thinking that we
>need vectors of 32-bit elements when we don't.

EH... 

>The patch below fixes the cases caught by the testsuite, but I'm
>not sure whether there are others lurking.  I guess the problem is
>that returning a narrower type than necessary is really a QoI thing:
>we just end up unrolling/interleaving the loop more times than
>necessary, and most tests wouldn't pick that up.  (Given the PRs
>about unrolling, it might accidentally be a good thing. :-))
>
>Tested on aarch64-linux-gnu (with and without SVE) and
>x86_64-linux-gnu.
>OK to install?

OK. 

Richard. 

>Richard
>
>
>2019-04-08  Richard Sandiford  <richard.sandiford@arm.com>
>
>gcc/
>	* tree-vect-data-refs.c (vect_get_smallest_scalar_type): Always
>	use gimple_expr_type for load and store calls.  Skip over the
>	condition argument in a conditional internal function.
>	Protect use of TREE_INT_CST_LOW.
>
>Index: gcc/tree-vect-data-refs.c
>===================================================================
>--- gcc/tree-vect-data-refs.c	2019-04-08 21:55:28.062370229 +0100
>+++ gcc/tree-vect-data-refs.c	2019-04-08 21:55:34.382348514 +0100
>@@ -145,14 +145,29 @@ vect_get_smallest_scalar_type (stmt_vec_
>       if (rhs < lhs)
>         scalar_type = rhs_type;
>     }
>-  else if (is_gimple_call (stmt_info->stmt)
>-	   && gimple_call_num_args (stmt_info->stmt) > 0)
>+  else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
>     {
>-      tree rhs_type = TREE_TYPE (gimple_call_arg (stmt_info->stmt,
>0));
>-
>-      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
>-      if (rhs < lhs)
>-        scalar_type = rhs_type;
>+      unsigned int i = 0;
>+      if (gimple_call_internal_p (call))
>+	{
>+	  internal_fn ifn = gimple_call_internal_fn (call);
>+	  if (internal_load_fn_p (ifn) || internal_store_fn_p (ifn))
>+	    /* gimple_expr_type already picked the type of the loaded
>+	       or stored data.  */
>+	    i = ~0U;
>+	  else if (internal_fn_mask_index (ifn) == 0)
>+	    i = 1;
>+	}
>+      if (i < gimple_call_num_args (call))
>+	{
>+	  tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
>+	  if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
>+	    {
>+	      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
>+	      if (rhs < lhs)
>+		scalar_type = rhs_type;
>+	    }
>+	}
>     }
> 
>   *lhs_size_unit = lhs;
diff mbox series

Patch

Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	(revision 270202)
+++ gcc/tree-vect-data-refs.c	(working copy)
@@ -144,6 +144,15 @@  vect_get_smallest_scalar_type (gimple *s
       if (rhs < lhs)
         scalar_type = rhs_type;
     }
+  else if (is_gimple_call (stmt)
+	   && gimple_call_num_args (stmt) > 0)
+    {
+      tree rhs_type = TREE_TYPE (gimple_call_arg (stmt, 0));
+
+      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
+      if (rhs < lhs)
+        scalar_type = rhs_type;
+    }
 
   *lhs_size_unit = lhs;
   *rhs_size_unit = rhs;
Index: gcc/testsuite/gcc.dg/vect/bb-slp-pr90006.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/bb-slp-pr90006.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/bb-slp-pr90006.c	(working copy)
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-math-errno" } */
+/* { dg-additional-options "-march=x86-64" { target x86_64-*-* i?86-*-* } } */
+
+long int lrint(double x);
+
+int a, b;
+union c {
+    int d;
+};
+
+int e()
+{
+  int f, g, h;
+  long i, j, k;
+  double l, m = b = lrint(0.3127);
+  a = b >> 16 >> 8 & 255;
+  ((union c *)e)->d = a;
+  k = m;
+  h = k >> 16 >> 8 & 255;
+  ((union c *)(e + 4))->d = h;
+  j = lrint(l);
+  g = j >> 16 >> 8 & 255;
+  ((union c *)(e + 8))->d = g;
+  i = lrint(0.292);
+  f = i >> 16 >> 8 & 255;
+  ((union c *)(e + 12))->d = f;
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target { { x86_64-*-* i?86-*-* } && ilp32 } } } } */