diff mbox series

Allow loop crossing paths in back threader copier.

Message ID 20211130154731.231707-1-aldyh@redhat.com
State New
Headers show
Series Allow loop crossing paths in back threader copier. | expand

Commit Message

Aldy Hernandez Nov. 30, 2021, 3:47 p.m. UTC
We are currently restricting loop crossing paths in the generic copier
used by the back threader, but we should be able to handle them after
loop_done has completed.

This fixes the PR at -O2, though the problem remains at -O1 because we
have no threaders smart enough to elide the undefined read.  DOM3 could
be a candidate when it is converted to either a hybrid threader or
replaced with the backward threader (when ranger can handle floats).

Tested on x86-64 Linux.

OK for trunk?

	PR tree-optimization/80548

gcc/ChangeLog:

	* attribs.c (sorted_attr_string): Add assert for -Wstringop-overread.
	* tree-ssa-threadupdate.c
	(back_jt_path_registry::duplicate_thread_path): Allow paths that
	cross loops after loop_done.
	(back_jt_path_registry::update_cfg): Diagnose dropped threads
	after duplicate_thread_path.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr80548.c: New test.
---
 gcc/attribs.c                  |  1 +
 gcc/testsuite/gcc.dg/pr80548.c | 23 +++++++++++++++++++++++
 gcc/tree-ssa-threadupdate.c    | 19 +++++++++++--------
 3 files changed, 35 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr80548.c

Comments

Richard Biener Dec. 1, 2021, 1:36 p.m. UTC | #1
On Tue, Nov 30, 2021 at 4:48 PM Aldy Hernandez via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> We are currently restricting loop crossing paths in the generic copier
> used by the back threader, but we should be able to handle them after
> loop_done has completed.

But this isn't a cost thing but a correctness (as in, can we update
loops properly)
thing.  We are passing 'loop' down to copy_bbs and very much rely on
seeing either BBs of that loop or copying complete subloops of it.

You will likely see excess loop fixup caused by this or ICEs in case the
caller of this function will not always set LOOPS_NEED_FIXUP
(not that copy_bbs handling of loops is perfect).

> This fixes the PR at -O2, though the problem remains at -O1 because we
> have no threaders smart enough to elide the undefined read.  DOM3 could
> be a candidate when it is converted to either a hybrid threader or
> replaced with the backward threader (when ranger can handle floats).
>
> Tested on x86-64 Linux.
>
> OK for trunk?
>
>         PR tree-optimization/80548
>
> gcc/ChangeLog:
>
>         * attribs.c (sorted_attr_string): Add assert for -Wstringop-overread.
>         * tree-ssa-threadupdate.c
>         (back_jt_path_registry::duplicate_thread_path): Allow paths that
>         cross loops after loop_done.
>         (back_jt_path_registry::update_cfg): Diagnose dropped threads
>         after duplicate_thread_path.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/pr80548.c: New test.
> ---
>  gcc/attribs.c                  |  1 +
>  gcc/testsuite/gcc.dg/pr80548.c | 23 +++++++++++++++++++++++
>  gcc/tree-ssa-threadupdate.c    | 19 +++++++++++--------
>  3 files changed, 35 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr80548.c
>
> diff --git a/gcc/attribs.c b/gcc/attribs.c
> index c252f5af07b..9a079b8405a 100644
> --- a/gcc/attribs.c
> +++ b/gcc/attribs.c
> @@ -1035,6 +1035,7 @@ sorted_attr_string (tree arglist)
>        attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
>        str_len_sum += len + 1;
>      }
> +  gcc_assert (arglist);
>
>    /* Replace "=,-" with "_".  */
>    for (i = 0; i < strlen (attr_str); i++)
> diff --git a/gcc/testsuite/gcc.dg/pr80548.c b/gcc/testsuite/gcc.dg/pr80548.c
> new file mode 100644
> index 00000000000..2327111143e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr80548.c
> @@ -0,0 +1,23 @@
> +// { dg-do compile }
> +// { dg-options "-O2 -Wuninitialized" }
> +
> +int g (void);
> +void h (int, int);
> +
> +void f (int b)
> +{
> +  int x, y;
> +
> +  if (b)
> +    {
> +      x = g ();
> +      y = g ();
> +    }
> +
> +  while (g ())
> +    if (b)
> +      {
> +        h (x, y); // { dg-bogus "uninit" }
> +        y = g ();
> +      }
> +}
> diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
> index 8aac733ac25..b194c11e23d 100644
> --- a/gcc/tree-ssa-threadupdate.c
> +++ b/gcc/tree-ssa-threadupdate.c
> @@ -2410,13 +2410,14 @@ back_jt_path_registry::duplicate_thread_path (edge entry,
>       missuses of the functions.  I.e. if you ask to copy something weird,
>       it will work, but the state of structures probably will not be
>       correct.  */
> -  for (i = 0; i < n_region; i++)
> -    {
> -      /* We do not handle subloops, i.e. all the blocks must belong to the
> -        same loop.  */
> -      if (region[i]->loop_father != loop)
> -       return false;
> -    }
> +  if (!(cfun->curr_properties & PROP_loop_opts_done))
> +    for (i = 0; i < n_region; i++)
> +      {
> +       /* We do not handle subloops, i.e. all the blocks must belong to the
> +          same loop.  */
> +       if (region[i]->loop_father != loop)
> +         return false;
> +      }
>
>    initialize_original_copy_tables ();
>
> @@ -2651,9 +2652,11 @@ back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
>           visited_starting_edges.add (entry);
>           retval = true;
>           m_num_threaded_edges++;
> +         path->release ();
>         }
> +      else
> +       cancel_thread (path, "Failure in duplicate_thread_path");
>
> -      path->release ();
>        m_paths.unordered_remove (0);
>        free (region);
>      }
> --
> 2.31.1
>
Aldy Hernandez Dec. 1, 2021, 4 p.m. UTC | #2
On Wed, Dec 1, 2021 at 2:36 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Nov 30, 2021 at 4:48 PM Aldy Hernandez via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > We are currently restricting loop crossing paths in the generic copier
> > used by the back threader, but we should be able to handle them after
> > loop_done has completed.
>
> But this isn't a cost thing but a correctness (as in, can we update
> loops properly)
> thing.  We are passing 'loop' down to copy_bbs and very much rely on
> seeing either BBs of that loop or copying complete subloops of it.
>
> You will likely see excess loop fixup caused by this or ICEs in case the
> caller of this function will not always set LOOPS_NEED_FIXUP
> (not that copy_bbs handling of loops is perfect).

Ughh, fair enough.

In which case at least I've commented in the PR what the problem is
here.  Perhaps we could benefit from a BB copier that could handle
loops, which is way above my circle of competence ;-).

Aldy
Richard Biener Dec. 2, 2021, 11:37 a.m. UTC | #3
On Wed, Dec 1, 2021 at 5:00 PM Aldy Hernandez <aldyh@redhat.com> wrote:
>
> On Wed, Dec 1, 2021 at 2:36 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Nov 30, 2021 at 4:48 PM Aldy Hernandez via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > We are currently restricting loop crossing paths in the generic copier
> > > used by the back threader, but we should be able to handle them after
> > > loop_done has completed.
> >
> > But this isn't a cost thing but a correctness (as in, can we update
> > loops properly)
> > thing.  We are passing 'loop' down to copy_bbs and very much rely on
> > seeing either BBs of that loop or copying complete subloops of it.
> >
> > You will likely see excess loop fixup caused by this or ICEs in case the
> > caller of this function will not always set LOOPS_NEED_FIXUP
> > (not that copy_bbs handling of loops is perfect).
>
> Ughh, fair enough.
>
> In which case at least I've commented in the PR what the problem is
> here.  Perhaps we could benefit from a BB copier that could handle
> loops, which is way above my circle of competence ;-).

Well, more high level APIs exercising the copier are able to handle loops
but it really depends on the high-level transform what you need to do ;)

So for "loop-crossing" threadings it depends on the actual details what
the "loop transform" is that you are doing.  If you just rely on
LOOPS_NEED_FIXUP then for example a #pragma GCC unroll 0
might be lost and that is - counter to PROP_after_loop_opts - also
effective to prevent the RTL unroller from unrolling a loop.

What's probably OK is for example changing the code to only
prohibit copying loop headers:

+     /* We do not handle subloops, i.e. all the blocks must belong to the
+         same loop.  */
+       if (region[i]->loop_father != loop
             && region[i]->loop_father->header != region[i])
+         return false;

that should allow threadings covering an exit edge for example.  It's
reasonable (with some extra setup here!) to also handle the case
of entering and exiting a loop (the "subloop" the comment mentions),
but that requires us to do the loop structure copying/setup before
copying the BBs (maybe look at gimple_duplicate_sese_tail).

Note the above adjustment to handle exits does not necessarily result in correct
->loop_father assignment but LOOPS_NEED_FIXUP should be able to
deal with it without using loop annotations.  These kind of threads should also
be OK when loop opts are not finished yet.

Richard.


> Aldy
>
diff mbox series

Patch

diff --git a/gcc/attribs.c b/gcc/attribs.c
index c252f5af07b..9a079b8405a 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -1035,6 +1035,7 @@  sorted_attr_string (tree arglist)
       attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
       str_len_sum += len + 1;
     }
+  gcc_assert (arglist);
 
   /* Replace "=,-" with "_".  */
   for (i = 0; i < strlen (attr_str); i++)
diff --git a/gcc/testsuite/gcc.dg/pr80548.c b/gcc/testsuite/gcc.dg/pr80548.c
new file mode 100644
index 00000000000..2327111143e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr80548.c
@@ -0,0 +1,23 @@ 
+// { dg-do compile }
+// { dg-options "-O2 -Wuninitialized" }
+
+int g (void);
+void h (int, int);
+
+void f (int b)
+{
+  int x, y;
+
+  if (b)
+    {
+      x = g ();
+      y = g ();
+    }
+
+  while (g ())
+    if (b)
+      {
+        h (x, y); // { dg-bogus "uninit" }
+        y = g ();
+      }
+}
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index 8aac733ac25..b194c11e23d 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -2410,13 +2410,14 @@  back_jt_path_registry::duplicate_thread_path (edge entry,
      missuses of the functions.  I.e. if you ask to copy something weird,
      it will work, but the state of structures probably will not be
      correct.  */
-  for (i = 0; i < n_region; i++)
-    {
-      /* We do not handle subloops, i.e. all the blocks must belong to the
-	 same loop.  */
-      if (region[i]->loop_father != loop)
-	return false;
-    }
+  if (!(cfun->curr_properties & PROP_loop_opts_done))
+    for (i = 0; i < n_region; i++)
+      {
+	/* We do not handle subloops, i.e. all the blocks must belong to the
+	   same loop.  */
+	if (region[i]->loop_father != loop)
+	  return false;
+      }
 
   initialize_original_copy_tables ();
 
@@ -2651,9 +2652,11 @@  back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
 	  visited_starting_edges.add (entry);
 	  retval = true;
 	  m_num_threaded_edges++;
+	  path->release ();
 	}
+      else
+	cancel_thread (path, "Failure in duplicate_thread_path");
 
-      path->release ();
       m_paths.unordered_remove (0);
       free (region);
     }