diff mbox

Fix PR 51389

Message ID 4F1FF417.1080005@ispras.ru
State New
Headers show

Commit Message

Andrey Belevantsev Jan. 25, 2012, 12:22 p.m. UTC
Hello,

In this PR data dependence analysis goes wild by trying to process >20k 
datarefs, so the patch limits the number of datarefs per loop we handle to 
1000 via a param.  On the way find_data_references_in_loop is made static 
and predcom/parloops are fixed for compute_data_dependences_for_loop 
returning false.

Bootstrapped and tested on x86_64-linux, no testcase as it is really a 
memory hog.  Strictly speaking, this could be a regression only from the 
time when -O3 didn't have predcom/vectorization turned on by default, so -- 
ok for trunk or 4.8?  Branches?

Andrey

2012-01-25  Andrey Belevantsev  <abel@ispras.ru>

	PR middle-end/51389

	* Makefile.in (tree-data-ref.o): Depend on $(PARAMS_H).
	* tree-data-ref.c (find_data_references_in_loop): Make static.
	Bail out for too many datarefs in a loop.
	* tree-data-ref.h (find_data_references_in_loop): Remove declaration.
	* tree-predcom.c (tree_predictive_commoning_loop): Bail out when
	compute_data_dependences_for_loop returns false.
	* tree-parloops.c (loop_parallel_p): Likewise.
	* params.def (PARAM_DATADEPS_MAX_DATAREFS_IN_LOOP): New param.

Comments

Richard Biener Jan. 25, 2012, 12:38 p.m. UTC | #1
2012/1/25 Andrey Belevantsev <abel@ispras.ru>:
> Hello,
>
> In this PR data dependence analysis goes wild by trying to process >20k
> datarefs, so the patch limits the number of datarefs per loop we handle to
> 1000 via a param.  On the way find_data_references_in_loop is made static
> and predcom/parloops are fixed for compute_data_dependences_for_loop
> returning false.
>
> Bootstrapped and tested on x86_64-linux, no testcase as it is really a
> memory hog.  Strictly speaking, this could be a regression only from the
> time when -O3 didn't have predcom/vectorization turned on by default, so --
> ok for trunk or 4.8?  Branches?

You limit the number of data references we find - but that isn't really
the scalability issue (it's linear in the number of stmts).  It's really
compute_all_dependences that hits the quadraticness, thus _that_
function should fail instead (I realize it doesn't return any success/failure
state yet, but the number of callers is limited).

Btw, new params need documentation in doc/invoke.texi.

The tree-predcom.c and tree-parloops.c changes are ok for trunk anyway.

Thanks,
Richard.

> Andrey
>
> 2012-01-25  Andrey Belevantsev  <abel@ispras.ru>
>
>        PR middle-end/51389
>
>        * Makefile.in (tree-data-ref.o): Depend on $(PARAMS_H).
>        * tree-data-ref.c (find_data_references_in_loop): Make static.
>        Bail out for too many datarefs in a loop.
>        * tree-data-ref.h (find_data_references_in_loop): Remove declaration.
>        * tree-predcom.c (tree_predictive_commoning_loop): Bail out when
>        compute_data_dependences_for_loop returns false.
>        * tree-parloops.c (loop_parallel_p): Likewise.
>        * params.def (PARAM_DATADEPS_MAX_DATAREFS_IN_LOOP): New param.
>
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index f450d3e..43295aa 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -2598,7 +2598,7 @@ tree-scalar-evolution.o : tree-scalar-evolution.c
> $(CONFIG_H) $(SYSTEM_H) \
>    $(TREE_PASS_H) $(PARAMS_H) gt-tree-scalar-evolution.h
>  tree-data-ref.o : tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>    gimple-pretty-print.h $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \
> -   $(TREE_PASS_H) langhooks.h tree-affine.h
> +   $(TREE_PASS_H) langhooks.h tree-affine.h $(PARAMS_H)
>  sese.o : sese.c sese.h $(CONFIG_H) $(SYSTEM_H) coretypes.h
> tree-pretty-print.h \
>    $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) tree-pass.h value-prof.h
>  graphite.o : graphite.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
> $(DIAGNOSTIC_CORE_H) \
> diff --git a/gcc/params.def b/gcc/params.def
> index 239b684..e50fa26 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -820,6 +820,12 @@ DEFPARAM (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION,
>          "maximum number of basic blocks per function to be analyzed by
> Graphite",
>          100, 0, 0)
>
> +/* Avoid data dependence analysis on very large loops.  */
> +DEFPARAM (PARAM_DATADEPS_MAX_DATAREFS_IN_LOOP,
> +         "datadeps-max-datarefs-in-loop",
> +         "Maximum number of datarefs in loop for loop data dependence
> analysis",
> +         1000, 0, 0)
> +
>  /* Avoid doing loop invariant motion on very large loops.  */
>
>  DEFPARAM (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP,
> diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
> index a0d86ec..5a7659a 100644
> --- a/gcc/tree-data-ref.c
> +++ b/gcc/tree-data-ref.c
> @@ -85,6 +85,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-pass.h"
>  #include "langhooks.h"
>  #include "tree-affine.h"
> +#include "params.h"
>
>  static struct datadep_stats
>  {
> @@ -4338,7 +4339,7 @@ find_data_references_in_bb (struct loop *loop,
> basic_block bb,
>    TODO: This function should be made smarter so that it can handle address
>    arithmetic as if they were array accesses, etc.  */
>
> -tree
> +static tree
>  find_data_references_in_loop (struct loop *loop,
>                              VEC (data_reference_p, heap) **datarefs)
>  {
> @@ -4351,7 +4352,9 @@ find_data_references_in_loop (struct loop *loop,
>     {
>       bb = bbs[i];
>
> -      if (find_data_references_in_bb (loop, bb, datarefs) ==
> chrec_dont_know)
> +      if (find_data_references_in_bb (loop, bb, datarefs) ==
> chrec_dont_know
> +         || ((int) VEC_length (data_reference_p, *datarefs)
> +             > PARAM_VALUE (PARAM_DATADEPS_MAX_DATAREFS_IN_LOOP)))
>         {
>           free (bbs);
>           return chrec_dont_know;
> diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
> index 0f12962..66258ab 100644
> --- a/gcc/tree-data-ref.h
> +++ b/gcc/tree-data-ref.h
> @@ -394,8 +394,6 @@ extern bool compute_data_dependences_for_loop (struct
> loop *, bool,
>  extern bool compute_data_dependences_for_bb (basic_block, bool,
>                                              VEC (data_reference_p, heap)
> **,
>                                              VEC (ddr_p, heap) **);
> -extern tree find_data_references_in_loop (struct loop *,
> -                                          VEC (data_reference_p, heap) **);
>  extern void print_direction_vector (FILE *, lambda_vector, int);
>  extern void print_dir_vectors (FILE *, VEC (lambda_vector, heap) *, int);
>  extern void print_dist_vectors (FILE *, VEC (lambda_vector, heap) *, int);
> diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
> index 339ddcc..c5b7655 100644
> --- a/gcc/tree-parloops.c
> +++ b/gcc/tree-parloops.c
> @@ -387,8 +387,14 @@ loop_parallel_p (struct loop *loop, struct obstack *
> parloop_obstack)
>   datarefs = VEC_alloc (data_reference_p, heap, 10);
>   dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
>   loop_nest = VEC_alloc (loop_p, heap, 3);
> -  compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
> -                                    &dependence_relations);
> +  if (! compute_data_dependences_for_loop (loop, true, &loop_nest,
> &datarefs,
> +                                          &dependence_relations))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
> +      ret = false;
> +      goto end;
> +    }
>   if (dump_file && (dump_flags & TDF_DETAILS))
>     dump_data_dependence_relations (dump_file, dependence_relations);
>
> @@ -405,6 +411,7 @@ loop_parallel_p (struct loop *loop, struct obstack *
> parloop_obstack)
>     fprintf (dump_file,
>             "  FAILED: data dependencies exist across iterations\n");
>
> + end:
>   VEC_free (loop_p, heap, loop_nest);
>   free_dependence_relations (dependence_relations);
>   free_data_refs (datarefs);
> diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
> index 751bdeb..6419878 100644
> --- a/gcc/tree-predcom.c
> +++ b/gcc/tree-predcom.c
> @@ -2476,8 +2476,17 @@ tree_predictive_commoning_loop (struct loop *loop)
>   datarefs = VEC_alloc (data_reference_p, heap, 10);
>   dependences = VEC_alloc (ddr_p, heap, 10);
>   loop_nest = VEC_alloc (loop_p, heap, 3);
> -  compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
> -                                    &dependences);
> +  if (! compute_data_dependences_for_loop (loop, true, &loop_nest,
> &datarefs,
> +                                          &dependences))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file, "Cannot analyze data dependencies\n");
> +      VEC_free (loop_p, heap, loop_nest);
> +      free_data_refs (datarefs);
> +      free_dependence_relations (dependences);
> +      return false;
> +    }
> +
>   if (dump_file && (dump_flags & TDF_DETAILS))
>     dump_data_dependence_relations (dump_file, dependences);
>
>
Andrey Belevantsev Jan. 25, 2012, 12:47 p.m. UTC | #2
On 25.01.2012 16:38, Richard Guenther wrote:
> 2012/1/25 Andrey Belevantsev<abel@ispras.ru>:
>> Hello,
>>
>> In this PR data dependence analysis goes wild by trying to process>20k
>> datarefs, so the patch limits the number of datarefs per loop we handle to
>> 1000 via a param.  On the way find_data_references_in_loop is made static
>> and predcom/parloops are fixed for compute_data_dependences_for_loop
>> returning false.
>>
>> Bootstrapped and tested on x86_64-linux, no testcase as it is really a
>> memory hog.  Strictly speaking, this could be a regression only from the
>> time when -O3 didn't have predcom/vectorization turned on by default, so --
>> ok for trunk or 4.8?  Branches?
>
> You limit the number of data references we find - but that isn't really
> the scalability issue (it's linear in the number of stmts).  It's really
> compute_all_dependences that hits the quadraticness, thus _that_
> function should fail instead (I realize it doesn't return any success/failure
> state yet, but the number of callers is limited).
Sure, I've just had the impression that the datarefs vector is no use 
without the dependencies themselves.  The possibility of making 
find_data_references_in_loop static also kind of hints in this direction. 
Anyways, I don't have any problem with fixing compute_all_dependences instead.

> Btw, new params need documentation in doc/invoke.texi.
Bah, I convinced myself params.def was the only documentation :-)

> The tree-predcom.c and tree-parloops.c changes are ok for trunk anyway.
I will commit them separately.

Andrey
Richard Biener Jan. 25, 2012, 2:21 p.m. UTC | #3
2012/1/25 Andrey Belevantsev <abel@ispras.ru>:
> On 25.01.2012 16:38, Richard Guenther wrote:
>>
>> 2012/1/25 Andrey Belevantsev<abel@ispras.ru>:
>>>
>>> Hello,
>>>
>>> In this PR data dependence analysis goes wild by trying to process>20k
>>> datarefs, so the patch limits the number of datarefs per loop we handle
>>> to
>>> 1000 via a param.  On the way find_data_references_in_loop is made static
>>> and predcom/parloops are fixed for compute_data_dependences_for_loop
>>> returning false.
>>>
>>> Bootstrapped and tested on x86_64-linux, no testcase as it is really a
>>> memory hog.  Strictly speaking, this could be a regression only from the
>>> time when -O3 didn't have predcom/vectorization turned on by default, so
>>> --
>>> ok for trunk or 4.8?  Branches?
>>
>>
>> You limit the number of data references we find - but that isn't really
>> the scalability issue (it's linear in the number of stmts).  It's really
>> compute_all_dependences that hits the quadraticness, thus _that_
>> function should fail instead (I realize it doesn't return any
>> success/failure
>> state yet, but the number of callers is limited).
>
> Sure, I've just had the impression that the datarefs vector is no use
> without the dependencies themselves.  The possibility of making
> find_data_references_in_loop static also kind of hints in this direction.
> Anyways, I don't have any problem with fixing compute_all_dependences
> instead.

I'd prefer that.

>> Btw, new params need documentation in doc/invoke.texi.
>
> Bah, I convinced myself params.def was the only documentation :-)
>
>
>> The tree-predcom.c and tree-parloops.c changes are ok for trunk anyway.
>
> I will commit them separately.

Thanks,
Richard.

> Andrey
diff mbox

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index f450d3e..43295aa 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2598,7 +2598,7 @@  tree-scalar-evolution.o : tree-scalar-evolution.c 
$(CONFIG_H) $(SYSTEM_H) \
     $(TREE_PASS_H) $(PARAMS_H) gt-tree-scalar-evolution.h
  tree-data-ref.o : tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
     gimple-pretty-print.h $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \
-   $(TREE_PASS_H) langhooks.h tree-affine.h
+   $(TREE_PASS_H) langhooks.h tree-affine.h $(PARAMS_H)
  sese.o : sese.c sese.h $(CONFIG_H) $(SYSTEM_H) coretypes.h 
tree-pretty-print.h \
     $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) tree-pass.h value-prof.h
  graphite.o : graphite.c $(CONFIG_H) $(SYSTEM_H) coretypes.h 
$(DIAGNOSTIC_CORE_H) \
diff --git a/gcc/params.def b/gcc/params.def
index 239b684..e50fa26 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -820,6 +820,12 @@  DEFPARAM (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION,
  	  "maximum number of basic blocks per function to be analyzed by Graphite",
  	  100, 0, 0)

+/* Avoid data dependence analysis on very large loops.  */
+DEFPARAM (PARAM_DATADEPS_MAX_DATAREFS_IN_LOOP,
+	  "datadeps-max-datarefs-in-loop",
+	  "Maximum number of datarefs in loop for loop data dependence analysis",
+	  1000, 0, 0)
+
  /* Avoid doing loop invariant motion on very large loops.  */

  DEFPARAM (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP,
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index a0d86ec..5a7659a 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -85,6 +85,7 @@  along with GCC; see the file COPYING3.  If not see
  #include "tree-pass.h"
  #include "langhooks.h"
  #include "tree-affine.h"
+#include "params.h"

  static struct datadep_stats
  {
@@ -4338,7 +4339,7 @@  find_data_references_in_bb (struct loop *loop, 
basic_block bb,
     TODO: This function should be made smarter so that it can handle address
     arithmetic as if they were array accesses, etc.  */

-tree
+static tree
  find_data_references_in_loop (struct loop *loop,
  			      VEC (data_reference_p, heap) **datarefs)
  {
@@ -4351,7 +4352,9 @@  find_data_references_in_loop (struct loop *loop,
      {
        bb = bbs[i];

-      if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
+      if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know
+	  || ((int) VEC_length (data_reference_p, *datarefs)
+	      > PARAM_VALUE (PARAM_DATADEPS_MAX_DATAREFS_IN_LOOP)))
          {
            free (bbs);
            return chrec_dont_know;
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index 0f12962..66258ab 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -394,8 +394,6 @@  extern bool compute_data_dependences_for_loop (struct 
loop *, bool,
  extern bool compute_data_dependences_for_bb (basic_block, bool,
                                               VEC (data_reference_p, heap) **,
                                               VEC (ddr_p, heap) **);
-extern tree find_data_references_in_loop (struct loop *,
-                                          VEC (data_reference_p, heap) **);
  extern void print_direction_vector (FILE *, lambda_vector, int);
  extern void print_dir_vectors (FILE *, VEC (lambda_vector, heap) *, int);
  extern void print_dist_vectors (FILE *, VEC (lambda_vector, heap) *, int);
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 339ddcc..c5b7655 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -387,8 +387,14 @@  loop_parallel_p (struct loop *loop, struct obstack * 
parloop_obstack)
    datarefs = VEC_alloc (data_reference_p, heap, 10);
    dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
    loop_nest = VEC_alloc (loop_p, heap, 3);
-  compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
-				     &dependence_relations);
+  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
+					   &dependence_relations))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
+      ret = false;
+      goto end;
+    }
    if (dump_file && (dump_flags & TDF_DETAILS))
      dump_data_dependence_relations (dump_file, dependence_relations);

@@ -405,6 +411,7 @@  loop_parallel_p (struct loop *loop, struct obstack * 
parloop_obstack)
      fprintf (dump_file,
  	     "  FAILED: data dependencies exist across iterations\n");

+ end:
    VEC_free (loop_p, heap, loop_nest);
    free_dependence_relations (dependence_relations);
    free_data_refs (datarefs);
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 751bdeb..6419878 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -2476,8 +2476,17 @@  tree_predictive_commoning_loop (struct loop *loop)
    datarefs = VEC_alloc (data_reference_p, heap, 10);
    dependences = VEC_alloc (ddr_p, heap, 10);
    loop_nest = VEC_alloc (loop_p, heap, 3);
-  compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
-				     &dependences);
+  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
+					   &dependences))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Cannot analyze data dependencies\n");
+      VEC_free (loop_p, heap, loop_nest);
+      free_data_refs (datarefs);
+      free_dependence_relations (dependences);
+      return false;
+    }
+
    if (dump_file && (dump_flags & TDF_DETAILS))
      dump_data_dependence_relations (dump_file, dependences);