diff mbox

Add counter inits to zero_iter_bb in expand_omp_for_init_counts

Message ID 560D2B09.4020501@mentor.com
State New
Headers show

Commit Message

Tom de Vries Oct. 1, 2015, 12:46 p.m. UTC
Hi,

this patch adds initialization in zero_iter_bb of counters introduced in 
expand_omp_for_init_counts.

This removes the need to set TREE_NO_WARNING on those counters.

Build on x86_64 and reg-tested with gomp.exp and target-libgomp c.exp.

OK for trunk, if bootstrap and reg-test on x86_64 succeeds?

Thanks,
- Tom

Comments

Jakub Jelinek Oct. 1, 2015, 12:49 p.m. UTC | #1
On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
> this patch adds initialization in zero_iter_bb of counters introduced in
> expand_omp_for_init_counts.
> 
> This removes the need to set TREE_NO_WARNING on those counters.

Why do you think it is a good idea?  I'd be afraid it slows things down
unnecessarily.  Furthermore, I'd prefer not to change this area of code before
gomp-4_1-branch is merged, as it will be a nightmare for the merge
otherwise.

	Jakub
Tom de Vries Oct. 1, 2015, 1:37 p.m. UTC | #2
On 01/10/15 14:49, Jakub Jelinek wrote:
> On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
>> this patch adds initialization in zero_iter_bb of counters introduced in
>> expand_omp_for_init_counts.
>>
>> This removes the need to set TREE_NO_WARNING on those counters.
>
> Why do you think it is a good idea?

In replace_ssa_name, I've recently added the assert:
...
           gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name));
...

On the gomp-4_0-branch, this assert triggers for a collapsed acc loop, 
which uses expand_omp_for_generic for omp-expansion.  The assert 
triggers because (some of) the counters added by 
expand_omp_for_init_counts are not initialized on all paths.

On trunk, for the test-case in the patch, this assert doesn't trigger 
because the omp function is split off before ssa.

> I'd be afraid it slows things down unnecessarily.

I think zero_iter_bb is a block that is expected not to be executed 
frequently.

I've attached an sdiff of x86_64 assembly for the test-case (before 
left, after right). AFAICT, this patch has the effect that it speeds up 
the frequent path with one instruction.

>  Furthermore, I'd prefer not to change this area of code before
> gomp-4_1-branch is merged, as it will be a nightmare for the merge
> otherwise.

Committing to gomp-4_0-branch for now would work for me.

Thanks,
- Tom
.file	"collapse.c"			.file	"collapse.c"
	.text					.text
	.p2align 4,,15				.p2align 4,,15
	.type	foo._omp_fn.0, @funct		.type	foo._omp_fn.0, @funct
foo._omp_fn.0:				foo._omp_fn.0:
.LFB12:					.LFB12:
	.cfi_startproc				.cfi_startproc
	pushq	%rbp				pushq	%rbp
	.cfi_def_cfa_offset 16			.cfi_def_cfa_offset 16
	.cfi_offset 6, -16			.cfi_offset 6, -16
	pushq	%rbx				pushq	%rbx
	.cfi_def_cfa_offset 24			.cfi_def_cfa_offset 24
	.cfi_offset 3, -24			.cfi_offset 3, -24
	xorl	%esi, %esi	      <
	subq	$24, %rsp			subq	$24, %rsp
	.cfi_def_cfa_offset 48			.cfi_def_cfa_offset 48
	movl	(%rdi), %eax			movl	(%rdi), %eax
	movl	4(%rdi), %ebp			movl	4(%rdi), %ebp
	testl	%eax, %eax			testl	%eax, %eax
	jle	.L8		      |		jle	.L9
	testl	%ebp, %ebp			testl	%ebp, %ebp
	jle	.L8		      |		jle	.L9
	movslq	%ebp, %rbx			movslq	%ebp, %rbx
	movslq	%eax, %rsi			movslq	%eax, %rsi
	imulq	%rbx, %rsi			imulq	%rbx, %rsi
.L8:					.L8:
	leaq	8(%rsp), %r8			leaq	8(%rsp), %r8
	xorl	%edi, %edi			xorl	%edi, %edi
	movq	%rsp, %rcx			movq	%rsp, %rcx
	movl	$1, %edx			movl	$1, %edx
	call	GOMP_loop_runtime_sta		call	GOMP_loop_runtime_sta
	testb	%al, %al			testb	%al, %al
	jne	.L10				jne	.L10
.L6:					.L6:
	call	GOMP_loop_end_nowait		call	GOMP_loop_end_nowait
	addq	$24, %rsp			addq	$24, %rsp
	.cfi_remember_state			.cfi_remember_state
	.cfi_def_cfa_offset 24			.cfi_def_cfa_offset 24
	popq	%rbx				popq	%rbx
	.cfi_def_cfa_offset 16			.cfi_def_cfa_offset 16
	popq	%rbp				popq	%rbp
	.cfi_def_cfa_offset 8			.cfi_def_cfa_offset 8
	ret					ret
	.p2align 4,,10				.p2align 4,,10
	.p2align 3				.p2align 3
.L15:					.L15:
	.cfi_restore_state			.cfi_restore_state
	leaq	8(%rsp), %rsi			leaq	8(%rsp), %rsi
	movq	%rsp, %rdi			movq	%rsp, %rdi
	call	GOMP_loop_runtime_nex		call	GOMP_loop_runtime_nex
	testb	%al, %al			testb	%al, %al
	je	.L6				je	.L6
.L10:					.L10:
	movq	(%rsp), %rsi			movq	(%rsp), %rsi
	movq	8(%rsp), %r9			movq	8(%rsp), %r9
	movq	%rsi, %rax			movq	%rsi, %rax
	cqto					cqto
	idivq	%rbx				idivq	%rbx
	movslq	%eax, %r8			movslq	%eax, %r8
	.p2align 4,,10				.p2align 4,,10
	.p2align 3				.p2align 3
.L4:					.L4:
	leaq	(%r8,%r8,4), %rdi		leaq	(%r8,%r8,4), %rdi
	movslq	%edx, %rcx			movslq	%edx, %rcx
	addq	$1, %rsi			addq	$1, %rsi
	cmpq	%rsi, %r9			cmpq	%rsi, %r9
	leaq	(%rdi,%rdi,4), %rdi		leaq	(%rdi,%rdi,4), %rdi
	leaq	(%rcx,%rdi,4), %rcx		leaq	(%rcx,%rdi,4), %rcx
	movl	$1, a(,%rcx,4)			movl	$1, a(,%rcx,4)
	jle	.L15				jle	.L15
	addl	$1, %edx			addl	$1, %edx
	cmpl	%edx, %ebp			cmpl	%edx, %ebp
	jg	.L4				jg	.L4
	addl	$1, %eax			addl	$1, %eax
	xorl	%edx, %edx			xorl	%edx, %edx
	movslq	%eax, %r8			movslq	%eax, %r8
	jmp	.L4				jmp	.L4
				      >	.L9:
				      >		xorl	%ebx, %ebx
				      >		xorl	%esi, %esi
				      >		jmp	.L8
	.cfi_endproc				.cfi_endproc
.LFE12:					.LFE12:
	.size	foo._omp_fn.0, .-foo.		.size	foo._omp_fn.0, .-foo.
	.p2align 4,,15				.p2align 4,,15
	.globl	foo				.globl	foo
	.type	foo, @function			.type	foo, @function
foo:					foo:
.LFB10:					.LFB10:
	.cfi_startproc				.cfi_startproc
	subq	$24, %rsp			subq	$24, %rsp
	.cfi_def_cfa_offset 32			.cfi_def_cfa_offset 32
	xorl	%ecx, %ecx			xorl	%ecx, %ecx
	xorl	%edx, %edx			xorl	%edx, %edx
	movl	%edi, (%rsp)			movl	%edi, (%rsp)
	movl	%esi, 4(%rsp)			movl	%esi, 4(%rsp)
	movl	$foo._omp_fn.0, %edi		movl	$foo._omp_fn.0, %edi
	movq	%rsp, %rsi			movq	%rsp, %rsi
	call	GOMP_parallel			call	GOMP_parallel
	addq	$24, %rsp			addq	$24, %rsp
	.cfi_def_cfa_offset 8			.cfi_def_cfa_offset 8
	ret					ret
	.cfi_endproc				.cfi_endproc
.LFE10:					.LFE10:
	.size	foo, .-foo			.size	foo, .-foo
Tom de Vries Oct. 8, 2015, 8:31 a.m. UTC | #3
[ was: Re: [patch] Add counter inits to zero_iter_bb in 
expand_omp_for_init_counts ]

On 01/10/15 15:37, Tom de Vries wrote:
> On 01/10/15 14:49, Jakub Jelinek wrote:
>> On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
>>> this patch adds initialization in zero_iter_bb of counters introduced in
>>> expand_omp_for_init_counts.
>>>
>>> This removes the need to set TREE_NO_WARNING on those counters.
>>
>> Why do you think it is a good idea?
>
> In replace_ssa_name, I've recently added the assert:
> ...
>            gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (name));
> ...
>
> On the gomp-4_0-branch, this assert triggers for a collapsed acc loop,
> which uses expand_omp_for_generic for omp-expansion.  The assert
> triggers because (some of) the counters added by
> expand_omp_for_init_counts are not initialized on all paths.
>
> On trunk, for the test-case in the patch, this assert doesn't trigger
> because the omp function is split off before ssa.
>
>> I'd be afraid it slows things down unnecessarily.
>
> I think zero_iter_bb is a block that is expected not to be executed
> frequently.
>
> I've attached an sdiff of x86_64 assembly for the test-case (before
> left, after right). AFAICT, this patch has the effect that it speeds up
> the frequent path with one instruction.
>
>>  Furthermore, I'd prefer not to change this area of code before
>> gomp-4_1-branch is merged, as it will be a nightmare for the merge
>> otherwise.
>
> Committing to gomp-4_0-branch for now would work for me.
>

Committed to gomp-4_0-branch.

Thanks,
- Tom
Thomas Schwinge Oct. 23, 2015, 2:27 p.m. UTC | #4
Hi Jakub and Tom!

On Thu, 1 Oct 2015 15:37:26 +0200, Tom de Vries <Tom_deVries@mentor.com> wrote:
> On 01/10/15 14:49, Jakub Jelinek wrote:
> > On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
> >> this patch adds initialization in zero_iter_bb of counters introduced in
> >> expand_omp_for_init_counts.
> >>
> >> This removes the need to set TREE_NO_WARNING on those counters.
> >
> > Why do you think it is a good idea?
> 
> [...]

> >  Furthermore, I'd prefer not to change this area of code before
> > gomp-4_1-branch is merged, as it will be a nightmare for the merge
> > otherwise.
> 
> Committing to gomp-4_0-branch for now would work for me.

Well, the "nightmare" to merge this thus fell onto me...  In particular,
as part of my gomp-4_0-branch r229255,
<http://news.gmane.org/find-root.php?message_id=%3C877fmd7lig.fsf%40schwinge.name%3E>,
I "butchered" your (Tom's) code (gomp-4_0-branch r228595) to work in
context of Jakub's changes -- would you please check that out (current
gomp-4_0-branch)?  Even though there are no testsuite regressions, given
my lack of detailed understanding of this code, I'm not too sure about my
changes.

Here's, briefly, what I did: in gcc/omp-low.c:expand_omp_for_init_counts
extend created_zero_iter_bb handling for fd->ordered; at the end of that
function, individually decide (lame...) whether to use zero_iter1_bb or
zero_iter2_bb; in gcc/omp-low.c:expand_omp_for_generic remove
TREE_NO_WARNING code for the new zero_iter2_bb case, too.


Grüße,
 Thomas
Tom de Vries Oct. 28, 2015, 9:40 a.m. UTC | #5
On 23/10/15 16:27, Thomas Schwinge wrote:
> Hi Jakub and Tom!
>
> On Thu, 1 Oct 2015 15:37:26 +0200, Tom de Vries <Tom_deVries@mentor.com> wrote:
>> On 01/10/15 14:49, Jakub Jelinek wrote:
>>> On Thu, Oct 01, 2015 at 02:46:01PM +0200, Tom de Vries wrote:
>>>> this patch adds initialization in zero_iter_bb of counters introduced in
>>>> expand_omp_for_init_counts.
>>>>
>>>> This removes the need to set TREE_NO_WARNING on those counters.
>>>
>>> Why do you think it is a good idea?
>>
>> [...]
>
>>>   Furthermore, I'd prefer not to change this area of code before
>>> gomp-4_1-branch is merged, as it will be a nightmare for the merge
>>> otherwise.
>>
>> Committing to gomp-4_0-branch for now would work for me.
>
> Well, the "nightmare" to merge this thus fell onto me...  In particular,
> as part of my gomp-4_0-branch r229255,
> <http://news.gmane.org/find-root.php?message_id=%3C877fmd7lig.fsf%40schwinge.name%3E>,
> I "butchered" your (Tom's) code (gomp-4_0-branch r228595) to work in
> context of Jakub's changes -- would you please check that out (current
> gomp-4_0-branch)?  Even though there are no testsuite regressions, given
> my lack of detailed understanding of this code, I'm not too sure about my
> changes.
>
> Here's, briefly, what I did: in gcc/omp-low.c:expand_omp_for_init_counts
> extend created_zero_iter_bb handling for fd->ordered; at the end of that
> function, individually decide (lame...) whether to use zero_iter1_bb or
> zero_iter2_bb; in gcc/omp-low.c:expand_omp_for_generic remove
> TREE_NO_WARNING code for the new zero_iter2_bb case, too.

Hi Thomas,

the merge looks good to me.

Thanks,
- Tom
diff mbox

Patch

Add counter inits to zero_iter_bb in expand_omp_for_init_counts

2015-10-01  Tom de Vries  <tom@codesourcery.com>

	* omp-low.c (expand_omp_for_init_counts): Add inits for counters in
	zero_iter_bb.
	(expand_omp_for_generic): Remove TREE_NO_WARNING setttings on counters.

	* gcc.dg/gomp/collapse-2.c: New test.
---
 gcc/omp-low.c                          | 26 +++++++++++++++++++-------
 gcc/testsuite/gcc.dg/gomp/collapse-2.c | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/gomp/collapse-2.c

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 8bcad08..8181757 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5732,6 +5732,7 @@  expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
       return;
     }
 
+  bool created_zero_iter_bb = false;
   for (i = 0; i < fd->collapse; i++)
     {
       tree itype = TREE_TYPE (fd->loops[i].v);
@@ -5774,6 +5775,7 @@  expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
 				       entry_bb);
+	      created_zero_iter_bb = true;
 	    }
 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
 	  ne->probability = REG_BR_PROB_BASE / 2000 - 1;
@@ -5826,6 +5828,23 @@  expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
 	}
     }
+
+  if (created_zero_iter_bb)
+    {
+      gimple_stmt_iterator gsi = gsi_after_labels (zero_iter_bb);
+      /* Atm counts[0] doesn't seem to be used beyond create_zero_iter_bb,
+	 but for robustness-sake we include that one as well.  */
+      for (i = 0; i < fd->collapse; i++)
+	{
+	  tree var = counts[i];
+	  if (!SSA_VAR_P (var))
+	    continue;
+
+	  tree zero = build_zero_cst (type);
+	  gassign *assign_stmt = gimple_build_assign (var, zero);
+	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+	}
+    }
 }
 
 
@@ -6116,7 +6135,6 @@  expand_omp_for_generic (struct omp_region *region,
   bool broken_loop = region->cont == NULL;
   edge e, ne;
   tree *counts = NULL;
-  int i;
 
   gcc_assert (!broken_loop || !in_combined_parallel);
   gcc_assert (fd->iter_type == long_integer_type_node
@@ -6185,12 +6203,6 @@  expand_omp_for_generic (struct omp_region *region,
 
       if (zero_iter_bb)
 	{
-	  /* Some counts[i] vars might be uninitialized if
-	     some loop has zero iterations.  But the body shouldn't
-	     be executed in that case, so just avoid uninit warnings.  */
-	  for (i = first_zero_iter; i < fd->collapse; i++)
-	    if (SSA_VAR_P (counts[i]))
-	      TREE_NO_WARNING (counts[i]) = 1;
 	  gsi_prev (&gsi);
 	  e = split_block (entry_bb, gsi_stmt (gsi));
 	  entry_bb = e->dest;
diff --git a/gcc/testsuite/gcc.dg/gomp/collapse-2.c b/gcc/testsuite/gcc.dg/gomp/collapse-2.c
new file mode 100644
index 0000000..5319f89
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/collapse-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp -fdump-tree-ssa" } */
+
+#define N 100
+
+int a[N][N];
+
+void
+foo (int m, int n)
+{
+  int i, j;
+#pragma omp parallel
+#pragma omp for collapse(2) schedule (runtime)
+  for (i = 0; i < m; i++)
+    for (j = 0; j < n; j++)
+      a[i][j] = 1;
+}
+
+/* { dg-final { scan-tree-dump-not "(?n)PHI.*count.*\\(D\\)" "ssa" } } */
-- 
1.9.1