Patchwork -ftree-loop-linear fixes (PR tree-optimization/46970) (take 2)

login
register
mail settings
Submitter Jakub Jelinek
Date Dec. 20, 2010, 10:28 p.m.
Message ID <20101220222822.GB16156@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/76239/
State New
Headers show

Comments

Jakub Jelinek - Dec. 20, 2010, 10:28 p.m.
On Sun, Dec 19, 2010 at 11:05:07PM -0600, Sebastian Pop wrote:
> The patch looks good to me, but I cannot approve it.
> 
> >   /* We might have some leftover.  */
> > -  if (gimple_cond_code (exit_cond) == LT_EXPR)
> > -    extra = -1 * stepint;
> > -  else if (gimple_cond_code (exit_cond) == NE_EXPR)
> > -    extra = -1 * stepint;
> > -  else if (gimple_cond_code (exit_cond) == GT_EXPR)
> > -    extra = -1 * stepint;
> > -  else if (gimple_cond_code (exit_cond) == EQ_EXPR)
> > -    extra = 1 * stepint;
> > +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
> > +    {
> > +      if (gimple_cond_code (exit_cond) == LT_EXPR)
> > +       extra = -1 * stepint;
> > +      else if (gimple_cond_code (exit_cond) == NE_EXPR)
> > +       extra = -1 * stepint;
> > +      else if (gimple_cond_code (exit_cond) == GT_EXPR)
> > +       extra = -1 * stepint;
> > +      else if (gimple_cond_code (exit_cond) == EQ_EXPR)
> > +       extra = 1 * stepint;
> > +    }
> > +  else
> > +    {
> > +      if (gimple_cond_code (exit_cond) == LE_EXPR)
> > +       extra = 1 * stepint;
> > +      else if (gimple_cond_code (exit_cond) == GE_EXPR)
> > +       extra = 1 * stepint;
> > +    }
> 
> It would be easier to read the following equivalent code:

Or a switch, as done in this patch, bootstrapped/regtested on x86_64-linux
and i686-linux:

2010-12-20  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/46970
	* lambda-code.c (gcc_loop_to_lambda_loop): Give up if
	step doesn't fit into host int or if def stmt of inductionvar
	is neither PHI nor increment by step.  If exit condition
	compares induction variable before increment, adjust ubound
	differently.

	* gcc.dg/pr46970-1.c: New test.
	* gcc.dg/pr46970-2.c: New test.


	Jakub
Sebastian Pop - Dec. 20, 2010, 10:36 p.m.
On Mon, Dec 20, 2010 at 16:28, Jakub Jelinek <jakub@redhat.com> wrote:
> On Sun, Dec 19, 2010 at 11:05:07PM -0600, Sebastian Pop wrote:
>> The patch looks good to me, but I cannot approve it.
>>
>> >   /* We might have some leftover.  */
>> > -  if (gimple_cond_code (exit_cond) == LT_EXPR)
>> > -    extra = -1 * stepint;
>> > -  else if (gimple_cond_code (exit_cond) == NE_EXPR)
>> > -    extra = -1 * stepint;
>> > -  else if (gimple_cond_code (exit_cond) == GT_EXPR)
>> > -    extra = -1 * stepint;
>> > -  else if (gimple_cond_code (exit_cond) == EQ_EXPR)
>> > -    extra = 1 * stepint;
>> > +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
>> > +    {
>> > +      if (gimple_cond_code (exit_cond) == LT_EXPR)
>> > +       extra = -1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == NE_EXPR)
>> > +       extra = -1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == GT_EXPR)
>> > +       extra = -1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == EQ_EXPR)
>> > +       extra = 1 * stepint;
>> > +    }
>> > +  else
>> > +    {
>> > +      if (gimple_cond_code (exit_cond) == LE_EXPR)
>> > +       extra = 1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == GE_EXPR)
>> > +       extra = 1 * stepint;
>> > +    }
>>
>> It would be easier to read the following equivalent code:
>
> Or a switch, as done in this patch, bootstrapped/regtested on x86_64-linux
> and i686-linux:

I like the switch, thanks.
Richi, could you have a look at this patch as well.

Thanks,
Sebastian

>
> 2010-12-20  Jakub Jelinek  <jakub@redhat.com>
>
>        PR tree-optimization/46970
>        * lambda-code.c (gcc_loop_to_lambda_loop): Give up if
>        step doesn't fit into host int or if def stmt of inductionvar
>        is neither PHI nor increment by step.  If exit condition
>        compares induction variable before increment, adjust ubound
>        differently.
>
>        * gcc.dg/pr46970-1.c: New test.
>        * gcc.dg/pr46970-2.c: New test.
>
> --- gcc/lambda-code.c.jj        2010-08-20 16:05:41.000000000 +0200
> +++ gcc/lambda-code.c   2010-12-20 21:19:12.000000000 +0100
> @@ -1289,7 +1289,7 @@ gcc_loop_to_lambda_loop (struct loop *lo
>   if (gimple_code (phi) != GIMPLE_PHI)
>     {
>       tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
> -      if (!op)
> +      if (!op || !is_gimple_assign (phi))
>        {
>
>          if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo
>
>       return NULL;
>     }
> -  if (TREE_CODE (step) != INTEGER_CST)
> +  if (!host_integerp (step, 0)
> +      || (HOST_WIDE_INT) TREE_INT_CST_LOW (step)
> +         != (int) TREE_INT_CST_LOW (step))
>     {
>
>       if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo
>       return NULL;
>     }
>
> +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
> +    {
> +      gimple stmt = SSA_NAME_DEF_STMT (inductionvar);
> +      tree rhs2;
> +      int stepintval = stepint;
> +      switch (gimple_assign_rhs_code (stmt))
> +       {
> +       case MINUS_EXPR:
> +         stepintval = -stepint;
> +         /* FALLTHRU */
> +       case PLUS_EXPR:
> +       case POINTER_PLUS_EXPR:
> +         rhs2 = gimple_assign_rhs2 (stmt);
> +         if (TREE_CODE (rhs2) == INTEGER_CST
> +             && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval
> +             && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1))
> +           break;
> +         /* FALLTHRU */
> +       default:
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           fprintf (dump_file,
> +                    "Unable to convert loop: Cannot find PHI node for induction variable\n");
> +         return NULL;
> +       }
> +    }
> +
>   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
>     {
>       lboundvar = PHI_ARG_DEF (phi, 1);
> @@ -1417,14 +1445,30 @@ gcc_loop_to_lambda_loop (struct loop *lo
>
>
>   /* We might have some leftover.  */
> -  if (gimple_cond_code (exit_cond) == LT_EXPR)
> -    extra = -1 * stepint;
> -  else if (gimple_cond_code (exit_cond) == NE_EXPR)
> -    extra = -1 * stepint;
> -  else if (gimple_cond_code (exit_cond) == GT_EXPR)
> -    extra = -1 * stepint;
> -  else if (gimple_cond_code (exit_cond) == EQ_EXPR)
> -    extra = 1 * stepint;
> +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
> +    switch (gimple_cond_code (exit_cond))
> +      {
> +      case LT_EXPR:
> +      case NE_EXPR:
> +      case GT_EXPR:
> +       extra = -1 * stepint;
> +       break;
> +      case EQ_EXPR:
> +       extra = 1 * stepint;
> +       break;
> +      default:
> +       break;
> +      }
> +  else
> +    switch (gimple_cond_code (exit_cond))
> +      {
> +      case LE_EXPR:
> +      case GE_EXPR:
> +       extra = 1 * stepint;
> +       break;
> +      default:
> +       break;
> +      }
>
>   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
>                                          outerinductionvars,
> --- gcc/testsuite/gcc.dg/pr46970-1.c.jj 2010-12-17 16:08:01.000000000 +0100
> +++ gcc/testsuite/gcc.dg/pr46970-1.c    2010-12-17 16:07:46.000000000 +0100
> @@ -0,0 +1,27 @@
> +/* PR tree-optimization/46970 */
> +/* { dg-do run } */
> +/* { dg-options "-Os -ftree-loop-linear" } */
> +
> +int
> +foo (int n, int *a)
> +{
> +  int i, j;
> +
> +  for (i = 0; i < n; i++)
> +    for (j = 0; j < n; j++)
> +      a[j] = i + n;
> +
> +  for (j = 0; j < n; j++)
> +    if (a[j] != i + n - 1)
> +      __builtin_abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  int a[16];
> +  foo (16, a);
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr46970-2.c.jj 2010-12-17 18:01:05.000000000 +0100
> +++ gcc/testsuite/gcc.dg/pr46970-2.c    2010-12-17 16:43:46.000000000 +0100
> @@ -0,0 +1,28 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ftree-loop-linear" } */
> +/* { dg-require-effective-target size32plus } */
> +
> +double u[1782225];
> +
> +__attribute__((noinline, noclone)) int
> +foo (int N, int *res)
> +{
> +  unsigned int i, j;
> +  double sum = 0;
> +  for (i = 0; i < N; i++)
> +    for (j = 0; j < N; j++)
> +      sum = sum + u[i + 1335 * j];
> +  *res = sum + N;
> +}
> +
> +int
> +main (void)
> +{
> +  int i;
> +  for (i = 0; i < 1782225; i++)
> +    u[i] = 1.0;
> +  foo (64, &i);
> +  if (i != 4160)
> +    __builtin_abort ();
> +  return 0;
> +}
>
>        Jakub
>
Richard Guenther - Dec. 22, 2010, 11:45 a.m.
On Mon, Dec 20, 2010 at 11:28 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Sun, Dec 19, 2010 at 11:05:07PM -0600, Sebastian Pop wrote:
>> The patch looks good to me, but I cannot approve it.
>>
>> >   /* We might have some leftover.  */
>> > -  if (gimple_cond_code (exit_cond) == LT_EXPR)
>> > -    extra = -1 * stepint;
>> > -  else if (gimple_cond_code (exit_cond) == NE_EXPR)
>> > -    extra = -1 * stepint;
>> > -  else if (gimple_cond_code (exit_cond) == GT_EXPR)
>> > -    extra = -1 * stepint;
>> > -  else if (gimple_cond_code (exit_cond) == EQ_EXPR)
>> > -    extra = 1 * stepint;
>> > +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
>> > +    {
>> > +      if (gimple_cond_code (exit_cond) == LT_EXPR)
>> > +       extra = -1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == NE_EXPR)
>> > +       extra = -1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == GT_EXPR)
>> > +       extra = -1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == EQ_EXPR)
>> > +       extra = 1 * stepint;
>> > +    }
>> > +  else
>> > +    {
>> > +      if (gimple_cond_code (exit_cond) == LE_EXPR)
>> > +       extra = 1 * stepint;
>> > +      else if (gimple_cond_code (exit_cond) == GE_EXPR)
>> > +       extra = 1 * stepint;
>> > +    }
>>
>> It would be easier to read the following equivalent code:
>
> Or a switch, as done in this patch, bootstrapped/regtested on x86_64-linux
> and i686-linux:
>
> 2010-12-20  Jakub Jelinek  <jakub@redhat.com>
>
>        PR tree-optimization/46970
>        * lambda-code.c (gcc_loop_to_lambda_loop): Give up if
>        step doesn't fit into host int or if def stmt of inductionvar
>        is neither PHI nor increment by step.  If exit condition
>        compares induction variable before increment, adjust ubound
>        differently.
>
>        * gcc.dg/pr46970-1.c: New test.
>        * gcc.dg/pr46970-2.c: New test.
>
> --- gcc/lambda-code.c.jj        2010-08-20 16:05:41.000000000 +0200
> +++ gcc/lambda-code.c   2010-12-20 21:19:12.000000000 +0100
> @@ -1289,7 +1289,7 @@ gcc_loop_to_lambda_loop (struct loop *lo
>   if (gimple_code (phi) != GIMPLE_PHI)
>     {
>       tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
> -      if (!op)
> +      if (!op || !is_gimple_assign (phi))
>        {
>
>          if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo
>
>       return NULL;
>     }
> -  if (TREE_CODE (step) != INTEGER_CST)
> +  if (!host_integerp (step, 0)
> +      || (HOST_WIDE_INT) TREE_INT_CST_LOW (step)
> +         != (int) TREE_INT_CST_LOW (step))

That casting looks odd.  Why isn't the (supposed) use of step as 'int' not
changed to a HOST_WIDE_INT?

>     {
>
>       if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo
>       return NULL;
>     }
>
> +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
> +    {
> +      gimple stmt = SSA_NAME_DEF_STMT (inductionvar);
> +      tree rhs2;
> +      int stepintval = stepint;
> +      switch (gimple_assign_rhs_code (stmt))
> +       {
> +       case MINUS_EXPR:
> +         stepintval = -stepint;
> +         /* FALLTHRU */
> +       case PLUS_EXPR:
> +       case POINTER_PLUS_EXPR:
> +         rhs2 = gimple_assign_rhs2 (stmt);
> +         if (TREE_CODE (rhs2) == INTEGER_CST
> +             && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval
> +             && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1))

That would also simplify this code - why not tree_int_cst_equal (step,
rhs2) btw?

The rest of the patch looks sensible, though I know nothing about
tree-loop-linear.

Richard.

> +           break;
> +         /* FALLTHRU */
> +       default:
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           fprintf (dump_file,
> +                    "Unable to convert loop: Cannot find PHI node for induction variable\n");
> +         return NULL;
> +       }
> +    }
> +
>   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
>     {
>       lboundvar = PHI_ARG_DEF (phi, 1);
> @@ -1417,14 +1445,30 @@ gcc_loop_to_lambda_loop (struct loop *lo
>
>
>   /* We might have some leftover.  */
> -  if (gimple_cond_code (exit_cond) == LT_EXPR)
> -    extra = -1 * stepint;
> -  else if (gimple_cond_code (exit_cond) == NE_EXPR)
> -    extra = -1 * stepint;
> -  else if (gimple_cond_code (exit_cond) == GT_EXPR)
> -    extra = -1 * stepint;
> -  else if (gimple_cond_code (exit_cond) == EQ_EXPR)
> -    extra = 1 * stepint;
> +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
> +    switch (gimple_cond_code (exit_cond))
> +      {
> +      case LT_EXPR:
> +      case NE_EXPR:
> +      case GT_EXPR:
> +       extra = -1 * stepint;
> +       break;
> +      case EQ_EXPR:
> +       extra = 1 * stepint;
> +       break;
> +      default:
> +       break;
> +      }
> +  else
> +    switch (gimple_cond_code (exit_cond))
> +      {
> +      case LE_EXPR:
> +      case GE_EXPR:
> +       extra = 1 * stepint;
> +       break;
> +      default:
> +       break;
> +      }
>
>   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
>                                          outerinductionvars,
> --- gcc/testsuite/gcc.dg/pr46970-1.c.jj 2010-12-17 16:08:01.000000000 +0100
> +++ gcc/testsuite/gcc.dg/pr46970-1.c    2010-12-17 16:07:46.000000000 +0100
> @@ -0,0 +1,27 @@
> +/* PR tree-optimization/46970 */
> +/* { dg-do run } */
> +/* { dg-options "-Os -ftree-loop-linear" } */
> +
> +int
> +foo (int n, int *a)
> +{
> +  int i, j;
> +
> +  for (i = 0; i < n; i++)
> +    for (j = 0; j < n; j++)
> +      a[j] = i + n;
> +
> +  for (j = 0; j < n; j++)
> +    if (a[j] != i + n - 1)
> +      __builtin_abort ();
> +
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  int a[16];
> +  foo (16, a);
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr46970-2.c.jj 2010-12-17 18:01:05.000000000 +0100
> +++ gcc/testsuite/gcc.dg/pr46970-2.c    2010-12-17 16:43:46.000000000 +0100
> @@ -0,0 +1,28 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ftree-loop-linear" } */
> +/* { dg-require-effective-target size32plus } */
> +
> +double u[1782225];
> +
> +__attribute__((noinline, noclone)) int
> +foo (int N, int *res)
> +{
> +  unsigned int i, j;
> +  double sum = 0;
> +  for (i = 0; i < N; i++)
> +    for (j = 0; j < N; j++)
> +      sum = sum + u[i + 1335 * j];
> +  *res = sum + N;
> +}
> +
> +int
> +main (void)
> +{
> +  int i;
> +  for (i = 0; i < 1782225; i++)
> +    u[i] = 1.0;
> +  foo (64, &i);
> +  if (i != 4160)
> +    __builtin_abort ();
> +  return 0;
> +}
>
>        Jakub
>
Jakub Jelinek - Jan. 4, 2011, 12:45 p.m.
On Wed, Dec 22, 2010 at 12:45:31PM +0100, Richard Guenther wrote:
> > @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo
> >
> >       return NULL;
> >     }
> > -  if (TREE_CODE (step) != INTEGER_CST)
> > +  if (!host_integerp (step, 0)
> > +      || (HOST_WIDE_INT) TREE_INT_CST_LOW (step)
> > +         != (int) TREE_INT_CST_LOW (step))
> 
> That casting looks odd.  Why isn't the (supposed) use of step as 'int' not
> changed to a HOST_WIDE_INT?

I've briefly looked at it, but changing int to HOST_WIDE_INT would be a huge
change, int is used basically everywhere in lambda-code, starting from
*lambda_vector and lot of other places.  Changing just LL_STEP to
HOST_WIDE_INT wouldn't help.  lambda-code looks very suspicious in many
places, e.g. I don't see anywhere where it would bother with checking for
overflows etc., wonder how it ever could get through patch review.
Niceties like:
  switch (TREE_CODE (expr))
    {
    case INTEGER_CST:
      {
        lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
        LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
        if (extra != 0)
          LLE_CONSTANT (lle) += extra;

        LLE_DENOMINATOR (lle) = 1;
      }
      break;
where it both doesn't bother to check that expr is host_integerp and doesn't
care if expr + extra doesn't overflow.

> > @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo
> >       return NULL;
> >     }
> >
> > +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
> > +    {
> > +      gimple stmt = SSA_NAME_DEF_STMT (inductionvar);
> > +      tree rhs2;
> > +      int stepintval = stepint;
> > +      switch (gimple_assign_rhs_code (stmt))
> > +       {
> > +       case MINUS_EXPR:
> > +         stepintval = -stepint;
> > +         /* FALLTHRU */
> > +       case PLUS_EXPR:
> > +       case POINTER_PLUS_EXPR:
> > +         rhs2 = gimple_assign_rhs2 (stmt);
> > +         if (TREE_CODE (rhs2) == INTEGER_CST
> > +             && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval
> > +             && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1))
> 
> That would also simplify this code - why not tree_int_cst_equal (step,
> rhs2) btw?

Because for MINUS_EXPR we don't want to check step, but - step ?
And compare_tree_int is an UHWI comparison instead of HWI.

	Jakub
Richard Guenther - Jan. 4, 2011, 4:22 p.m.
On Tue, Jan 4, 2011 at 1:45 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Dec 22, 2010 at 12:45:31PM +0100, Richard Guenther wrote:
>> > @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo
>> >
>> >       return NULL;
>> >     }
>> > -  if (TREE_CODE (step) != INTEGER_CST)
>> > +  if (!host_integerp (step, 0)
>> > +      || (HOST_WIDE_INT) TREE_INT_CST_LOW (step)
>> > +         != (int) TREE_INT_CST_LOW (step))
>>
>> That casting looks odd.  Why isn't the (supposed) use of step as 'int' not
>> changed to a HOST_WIDE_INT?
>
> I've briefly looked at it, but changing int to HOST_WIDE_INT would be a huge
> change, int is used basically everywhere in lambda-code, starting from
> *lambda_vector and lot of other places.  Changing just LL_STEP to
> HOST_WIDE_INT wouldn't help.  lambda-code looks very suspicious in many
> places, e.g. I don't see anywhere where it would bother with checking for
> overflows etc., wonder how it ever could get through patch review.
> Niceties like:
>  switch (TREE_CODE (expr))
>    {
>    case INTEGER_CST:
>      {
>        lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
>        LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
>        if (extra != 0)
>          LLE_CONSTANT (lle) += extra;
>
>        LLE_DENOMINATOR (lle) = 1;
>      }
>      break;
> where it both doesn't bother to check that expr is host_integerp and doesn't
> care if expr + extra doesn't overflow.
>
>> > @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo
>> >       return NULL;
>> >     }
>> >
>> > +  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
>> > +    {
>> > +      gimple stmt = SSA_NAME_DEF_STMT (inductionvar);
>> > +      tree rhs2;
>> > +      int stepintval = stepint;
>> > +      switch (gimple_assign_rhs_code (stmt))
>> > +       {
>> > +       case MINUS_EXPR:
>> > +         stepintval = -stepint;
>> > +         /* FALLTHRU */
>> > +       case PLUS_EXPR:
>> > +       case POINTER_PLUS_EXPR:
>> > +         rhs2 = gimple_assign_rhs2 (stmt);
>> > +         if (TREE_CODE (rhs2) == INTEGER_CST
>> > +             && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval
>> > +             && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1))
>>
>> That would also simplify this code - why not tree_int_cst_equal (step,
>> rhs2) btw?
>
> Because for MINUS_EXPR we don't want to check step, but - step ?
> And compare_tree_int is an UHWI comparison instead of HWI.

Ugh.  Sebastian - can we nuke tree-loop-linear compeltely and
make -ftree-loop-linear an alias for -floop-interchange without
regressions?  I'd like to reduce the number of broken passes from
2 to 1 this way ...

Richard.

>        Jakub
>
Sebastian Pop - Jan. 4, 2011, 4:54 p.m.
On Tue, Jan 4, 2011 at 10:22, Richard Guenther
<richard.guenther@gmail.com> wrote:
> Ugh.  Sebastian - can we nuke tree-loop-linear compeltely and
> make -ftree-loop-linear an alias for -floop-interchange without
> regressions?  I'd like to reduce the number of broken passes from
> 2 to 1 this way ...

I wouldn't mind removing tree-loop-linear, although other people
should also give their opinion on this matter: tree-loop-linear has no
external dependences whereas -floop-interchange depends on cloog and ppl.

Also we should get all the testsuite/gcc.dg/tree-ssa/ltrans-*.c
passing with -floop-interchange.  I will add all these testcases to
the graphite testsuite and see where we stand.

Sebastian
Sebastian Pop - Jan. 7, 2011, 12:24 a.m.
On Tue, Jan 4, 2011 at 10:54, Sebastian Pop <sebpop@gmail.com> wrote:
> On Tue, Jan 4, 2011 at 10:22, Richard Guenther
> <richard.guenther@gmail.com> wrote:
>> Ugh.  Sebastian - can we nuke tree-loop-linear compeltely and
>> make -ftree-loop-linear an alias for -floop-interchange without
>> regressions?  I'd like to reduce the number of broken passes from
>> 2 to 1 this way ...
>
> I wouldn't mind removing tree-loop-linear, although other people
> should also give their opinion on this matter: tree-loop-linear has no
> external dependences whereas -floop-interchange depends on cloog and ppl.
>
> Also we should get all the testsuite/gcc.dg/tree-ssa/ltrans-*.c
> passing with -floop-interchange.  I will add all these testcases to
> the graphite testsuite and see where we stand.

I already included the ltrans-{1,2,3,4,5,6,8}.c testcases in the graphite
testsuite as interchange-{1,2,3,4,5,6,7}.c respectively, out of which
all except interchange-{1,2}.c are interchanged.

interchange-1.c is the equivalent in C of the critical loop of
CFP2000/171.swim: with N = 1335

  for (i = 0; i < N; i++)
    {
      for (j = 0; j < N; j++)
        sum = sum + u[i + 1335 * j];

      u[1336 * i] *= 2;
    }

it looks like the data dependence analysis of graphite does not
validate the loop distribution before interchange:

  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      sum = sum + u[i + 1335 * j];

  for (i = 0; i < N; i++)
    u[1336 * i] *= 2;

Sebastian

Patch

--- gcc/lambda-code.c.jj	2010-08-20 16:05:41.000000000 +0200
+++ gcc/lambda-code.c	2010-12-20 21:19:12.000000000 +0100
@@ -1289,7 +1289,7 @@  gcc_loop_to_lambda_loop (struct loop *lo
   if (gimple_code (phi) != GIMPLE_PHI)
     {
       tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
-      if (!op)
+      if (!op || !is_gimple_assign (phi))
 	{
 
 	  if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1332,7 +1332,9 @@  gcc_loop_to_lambda_loop (struct loop *lo
 
       return NULL;
     }
-  if (TREE_CODE (step) != INTEGER_CST)
+  if (!host_integerp (step, 0)
+      || (HOST_WIDE_INT) TREE_INT_CST_LOW (step)
+	  != (int) TREE_INT_CST_LOW (step))
     {
 
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1366,6 +1368,32 @@  gcc_loop_to_lambda_loop (struct loop *lo
       return NULL;
     }
 
+  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
+    {
+      gimple stmt = SSA_NAME_DEF_STMT (inductionvar);
+      tree rhs2;
+      int stepintval = stepint;
+      switch (gimple_assign_rhs_code (stmt))
+	{
+	case MINUS_EXPR:
+	  stepintval = -stepint;
+	  /* FALLTHRU */
+	case PLUS_EXPR:
+	case POINTER_PLUS_EXPR:
+	  rhs2 = gimple_assign_rhs2 (stmt);
+	  if (TREE_CODE (rhs2) == INTEGER_CST
+	      && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval
+	      && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1))
+	    break;
+	  /* FALLTHRU */
+	default:
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file,
+		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
+	  return NULL;
+	}
+    }
+
   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
     {
       lboundvar = PHI_ARG_DEF (phi, 1);
@@ -1417,14 +1445,30 @@  gcc_loop_to_lambda_loop (struct loop *lo
 
 
   /* We might have some leftover.  */
-  if (gimple_cond_code (exit_cond) == LT_EXPR)
-    extra = -1 * stepint;
-  else if (gimple_cond_code (exit_cond) == NE_EXPR)
-    extra = -1 * stepint;
-  else if (gimple_cond_code (exit_cond) == GT_EXPR)
-    extra = -1 * stepint;
-  else if (gimple_cond_code (exit_cond) == EQ_EXPR)
-    extra = 1 * stepint;
+  if (SSA_NAME_DEF_STMT (inductionvar) != phi)
+    switch (gimple_cond_code (exit_cond))
+      {
+      case LT_EXPR:
+      case NE_EXPR:
+      case GT_EXPR:
+	extra = -1 * stepint;
+	break;
+      case EQ_EXPR:
+	extra = 1 * stepint;
+	break;
+      default:
+	break;
+      }
+  else
+    switch (gimple_cond_code (exit_cond))
+      {
+      case LE_EXPR:
+      case GE_EXPR:
+	extra = 1 * stepint;
+	break;
+      default:
+	break;
+      }
 
   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
 					  outerinductionvars,
--- gcc/testsuite/gcc.dg/pr46970-1.c.jj	2010-12-17 16:08:01.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr46970-1.c	2010-12-17 16:07:46.000000000 +0100
@@ -0,0 +1,27 @@ 
+/* PR tree-optimization/46970 */
+/* { dg-do run } */
+/* { dg-options "-Os -ftree-loop-linear" } */
+
+int
+foo (int n, int *a)
+{
+  int i, j;
+
+  for (i = 0; i < n; i++)
+    for (j = 0; j < n; j++)
+      a[j] = i + n;
+
+  for (j = 0; j < n; j++)
+    if (a[j] != i + n - 1)
+      __builtin_abort ();
+
+  return 0;
+}
+
+int
+main ()
+{
+  int a[16];
+  foo (16, a);
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr46970-2.c.jj	2010-12-17 18:01:05.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr46970-2.c	2010-12-17 16:43:46.000000000 +0100
@@ -0,0 +1,28 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-linear" } */
+/* { dg-require-effective-target size32plus } */
+
+double u[1782225];
+
+__attribute__((noinline, noclone)) int
+foo (int N, int *res)
+{
+  unsigned int i, j;
+  double sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum = sum + u[i + 1335 * j];
+  *res = sum + N;
+}
+
+int
+main (void)
+{
+  int i;
+  for (i = 0; i < 1782225; i++)
+    u[i] = 1.0;
+  foo (64, &i);
+  if (i != 4160)
+    __builtin_abort ();
+  return 0;
+}