diff mbox series

Add TODO_update_ssa for SLP BB vectorization (PR tree-optimization/91885).

Message ID dc3275a4-1fe3-a72c-b22b-292c4c1665e6@suse.cz
State New
Headers show
Series Add TODO_update_ssa for SLP BB vectorization (PR tree-optimization/91885). | expand

Commit Message

Martin Liška Sept. 25, 2019, 10:06 a.m. UTC
Hi.

Similarly to SLP pass, we should probably set TODO_update_ssa
when a SLP BB vectorization happens from the normal vect pass.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-09-25  Martin Liska  <mliska@suse.cz>

	PR tree-optimization/91885
	* tree-vectorizer.c (try_vectorize_loop_1):
	Add TODO_update_ssa similarly to what slp
	pass does.

gcc/testsuite/ChangeLog:

2019-09-25  Martin Liska  <mliska@suse.cz>

	PR tree-optimization/91885
	* gcc.dg/pr91885.c: New test.
---
 gcc/testsuite/gcc.dg/pr91885.c | 47 ++++++++++++++++++++++++++++++++++
 gcc/tree-vectorizer.c          |  2 +-
 2 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr91885.c

Comments

Richard Biener Sept. 26, 2019, 7:32 a.m. UTC | #1
On Wed, Sep 25, 2019 at 12:06 PM Martin Liška <mliska@suse.cz> wrote:
>
> Hi.
>
> Similarly to SLP pass, we should probably set TODO_update_ssa
> when a SLP BB vectorization happens from the normal vect pass.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

Hmm, this was supposed to be handled by

  if (num_vectorized_loops > 0)
    {
      /* If we vectorized any loop only virtual SSA form needs to be updated.
         ???  Also while we try hard to update loop-closed SSA form we fail
         to properly do this in some corner-cases (see PR56286).  */
      rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
      return TODO_cleanup_cfg;
    }

but there isn't an equivalent of num_vectorized_bbs here.  Given the above
effectively short-cuts your patch (all paths only ever set ret =
TODO_cleanup_cfg
right now...) it will work without pessimizing things by running
update-ssa twice.

Can you check whether TODO_update_ssa_only_virtuals is enough as a fix?

Otherwise OK.

Thanks,
Richard.

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> 2019-09-25  Martin Liska  <mliska@suse.cz>
>
>         PR tree-optimization/91885
>         * tree-vectorizer.c (try_vectorize_loop_1):
>         Add TODO_update_ssa similarly to what slp
>         pass does.
>
> gcc/testsuite/ChangeLog:
>
> 2019-09-25  Martin Liska  <mliska@suse.cz>
>
>         PR tree-optimization/91885
>         * gcc.dg/pr91885.c: New test.
> ---
>  gcc/testsuite/gcc.dg/pr91885.c | 47 ++++++++++++++++++++++++++++++++++
>  gcc/tree-vectorizer.c          |  2 +-
>  2 files changed, 48 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr91885.c
>
>
Martin Liška Sept. 26, 2019, 7:38 a.m. UTC | #2
On 9/26/19 9:32 AM, Richard Biener wrote:
> On Wed, Sep 25, 2019 at 12:06 PM Martin Liška <mliska@suse.cz> wrote:
>>
>> Hi.
>>
>> Similarly to SLP pass, we should probably set TODO_update_ssa
>> when a SLP BB vectorization happens from the normal vect pass.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
> 
> Hmm, this was supposed to be handled by
> 
>   if (num_vectorized_loops > 0)
>     {
>       /* If we vectorized any loop only virtual SSA form needs to be updated.
>          ???  Also while we try hard to update loop-closed SSA form we fail
>          to properly do this in some corner-cases (see PR56286).  */
>       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
>       return TODO_cleanup_cfg;
>     }
> 
> but there isn't an equivalent of num_vectorized_bbs here.  Given the above
> effectively short-cuts your patch (all paths only ever set ret =
> TODO_cleanup_cfg
> right now...) it will work without pessimizing things by running
> update-ssa twice.
> 
> Can you check whether TODO_update_ssa_only_virtuals is enough as a fix?

Yes, it's enough.

> 
> Otherwise OK.

I'm going to install the patch then.

Martin

> 
> Thanks,
> Richard.
> 
>> Thanks,
>> Martin
>>
>> gcc/ChangeLog:
>>
>> 2019-09-25  Martin Liska  <mliska@suse.cz>
>>
>>         PR tree-optimization/91885
>>         * tree-vectorizer.c (try_vectorize_loop_1):
>>         Add TODO_update_ssa similarly to what slp
>>         pass does.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2019-09-25  Martin Liska  <mliska@suse.cz>
>>
>>         PR tree-optimization/91885
>>         * gcc.dg/pr91885.c: New test.
>> ---
>>  gcc/testsuite/gcc.dg/pr91885.c | 47 ++++++++++++++++++++++++++++++++++
>>  gcc/tree-vectorizer.c          |  2 +-
>>  2 files changed, 48 insertions(+), 1 deletion(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/pr91885.c
>>
>>
Jakub Jelinek Sept. 27, 2019, 10:29 a.m. UTC | #3
On Thu, Sep 26, 2019 at 09:38:44AM +0200, Martin Liška wrote:
> >> 2019-09-25  Martin Liska  <mliska@suse.cz>
> >>
> >>         PR tree-optimization/91885
> >>         * tree-vectorizer.c (try_vectorize_loop_1):
> >>         Add TODO_update_ssa similarly to what slp
> >>         pass does.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> 2019-09-25  Martin Liska  <mliska@suse.cz>
> >>
> >>         PR tree-optimization/91885
> >>         * gcc.dg/pr91885.c: New test.

The testcase wasn't ilp32 clean, fixed thusly, tested on x86_64-linux
-m32/-m64, committed as obvious:

2019-09-27  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/91885
	* gcc.dg/pr91885.c (__int64_t): Change from long to long long.
	(__uint64_t): Change from unsigned long to unsigned long long.

--- gcc/testsuite/gcc.dg/pr91885.c.jj	2019-09-26 21:34:25.008866730 +0200
+++ gcc/testsuite/gcc.dg/pr91885.c	2019-09-27 12:26:12.613900421 +0200
@@ -2,8 +2,8 @@
 /* { dg-options "-O3 -fprofile-generate" } */
 /* { dg-require-profiling "-fprofile-generate" } */
 
-typedef signed long int __int64_t;
-typedef unsigned long int __uint64_t;
+typedef signed long long int __int64_t;
+typedef unsigned long long int __uint64_t;
 typedef __int64_t int64_t;
 typedef __uint64_t uint64_t;
 inline void


	Jakub
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/pr91885.c b/gcc/testsuite/gcc.dg/pr91885.c
new file mode 100644
index 00000000000..934e8d3e6c3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr91885.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fprofile-generate" } */
+/* { dg-require-profiling "-fprofile-generate" } */
+
+typedef signed long int __int64_t;
+typedef unsigned long int __uint64_t;
+typedef __int64_t int64_t;
+typedef __uint64_t uint64_t;
+inline void
+BLI_endian_switch_int64 (int64_t *val)
+{
+  uint64_t tval = *val;
+  *val = ((tval >> 56)) | ((tval << 40) & 0x00ff000000000000ll)
+	 | ((tval << 24) & 0x0000ff0000000000ll)
+	 | ((tval << 8) & 0x000000ff00000000ll)
+	 | ((tval >> 8) & 0x00000000ff000000ll)
+	 | ((tval >> 24) & 0x0000000000ff0000ll)
+	 | ((tval >> 40) & 0x000000000000ff00ll) | ((tval << 56));
+}
+typedef struct anim_index_entry
+{
+  unsigned long long seek_pos_dts;
+  unsigned long long pts;
+} anim_index_entry;
+extern struct anim_index_entry *
+MEM_callocN (int);
+struct anim_index
+{
+  int num_entries;
+  struct anim_index_entry *entries;
+};
+struct anim_index *
+IMB_indexer_open (const char *name)
+{
+  char header[13];
+  struct anim_index *idx;
+  int i;
+  idx->entries = MEM_callocN (8);
+  if (((1 == 0) != (header[8] == 'V')))
+    {
+      for (i = 0; i < idx->num_entries; i++)
+	{
+	  BLI_endian_switch_int64 ((int64_t *) &idx->entries[i].seek_pos_dts);
+	  BLI_endian_switch_int64 ((int64_t *) &idx->entries[i].pts);
+	}
+    }
+}
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index c3004f6f3a2..b3da0a16dc0 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -943,7 +943,7 @@  try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
 	      fold_loop_internal_call (loop_vectorized_call,
 				       boolean_true_node);
 	      loop_vectorized_call = NULL;
-	      ret |= TODO_cleanup_cfg;
+	      ret |= TODO_cleanup_cfg | TODO_update_ssa;
 	    }
 	}
       /* If outer loop vectorization fails for LOOP_VECTORIZED guarded