Patchwork Fix COMPLEX_EXPR expansion (PR middle-end/56015)

login
register
mail settings
Submitter Jakub Jelinek
Date Jan. 17, 2013, 6:22 p.m.
Message ID <20130117182236.GV7269@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/213343/
State New
Headers show

Comments

Jakub Jelinek - Jan. 17, 2013, 6:22 p.m.
Hi!

If target (the real part thereof) overlap op1 (i.e. the imag part of COMPLEX_EXPR
to be stored), we end up with first overwriting the real part and then
using wrong value in op1.
Fixed by testing for overlap, and either, if possible, doing the writes
in different order if overlap is detected (first write imag part, then real
part), or, if not possible, forcing op1 into a register.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-01-17  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/56015
	* expr.c (expand_expr_real_2) <case COMPLEX_EXPR>: Handle
	the case where writing real complex part of target modifies
	op1.

	* gfortran.dg/pr56015.f90: New test.


	Jakub
Richard Guenther - Jan. 18, 2013, 11:11 a.m.
On Thu, Jan 17, 2013 at 7:22 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> If target (the real part thereof) overlap op1 (i.e. the imag part of COMPLEX_EXPR
> to be stored), we end up with first overwriting the real part and then
> using wrong value in op1.
> Fixed by testing for overlap, and either, if possible, doing the writes
> in different order if overlap is detected (first write imag part, then real
> part), or, if not possible, forcing op1 into a register.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

(I wonder if it's worth doing the work to eventually swap instead of simply
forcing it to a reg always for overlaps)

I suppose a similar testcase using vector extracts / inserts would also
work now? (and maybe fail before this patch?)

Thanks,
Richard.

> 2013-01-17  Jakub Jelinek  <jakub@redhat.com>
>
>         PR middle-end/56015
>         * expr.c (expand_expr_real_2) <case COMPLEX_EXPR>: Handle
>         the case where writing real complex part of target modifies
>         op1.
>
>         * gfortran.dg/pr56015.f90: New test.
>
> --- gcc/expr.c.jj       2013-01-11 09:02:48.000000000 +0100
> +++ gcc/expr.c  2013-01-17 11:21:34.326854116 +0100
> @@ -8860,6 +8860,54 @@ expand_expr_real_2 (sepops ops, rtx targ
>
>        if (!target)
>         target = gen_reg_rtx (TYPE_MODE (type));
> +      else
> +       /* If target overlaps with op1, then either we need to force
> +          op1 into a pseudo (if target also overlaps with op0),
> +          or write the complex parts in reverse order.  */
> +       switch (GET_CODE (target))
> +         {
> +         case CONCAT:
> +           if (reg_overlap_mentioned_p (XEXP (target, 0), op1))
> +             {
> +               if (reg_overlap_mentioned_p (XEXP (target, 1), op0))
> +                 {
> +                 complex_expr_force_op1:
> +                   temp = gen_reg_rtx (GET_MODE_INNER (GET_MODE (target)));
> +                   emit_move_insn (temp, op1);
> +                   op1 = temp;
> +                   break;
> +                 }
> +             complex_expr_swap_order:
> +               /* Move the imaginary (op1) and real (op0) parts to their
> +                  location.  */
> +               write_complex_part (target, op1, true);
> +               write_complex_part (target, op0, false);
> +
> +               return target;
> +             }
> +           break;
> +         case MEM:
> +           temp = adjust_address_nv (target,
> +                                     GET_MODE_INNER (GET_MODE (target)), 0);
> +           if (reg_overlap_mentioned_p (temp, op1))
> +             {
> +               enum machine_mode imode = GET_MODE_INNER (GET_MODE (target));
> +               temp = adjust_address_nv (target, imode,
> +                                         GET_MODE_SIZE (imode));
> +               if (reg_overlap_mentioned_p (temp, op0))
> +                 goto complex_expr_force_op1;
> +               goto complex_expr_swap_order;
> +             }
> +           break;
> +         default:
> +           if (reg_overlap_mentioned_p (target, op1))
> +             {
> +               if (reg_overlap_mentioned_p (target, op0))
> +                 goto complex_expr_force_op1;
> +               goto complex_expr_swap_order;
> +             }
> +           break;
> +         }
>
>        /* Move the real (op0) and imaginary (op1) parts to their location.  */
>        write_complex_part (target, op0, false);
> --- gcc/testsuite/gfortran.dg/pr56015.f90.jj    2013-01-17 11:16:27.793639775 +0100
> +++ gcc/testsuite/gfortran.dg/pr56015.f90       2013-01-17 11:15:59.000000000 +0100
> @@ -0,0 +1,16 @@
> +! PR middle-end/56015
> +! { dg-do run }
> +! { dg-options "-Ofast -fno-inline" }
> +
> +program pr56015
> +  implicit none
> +  complex*16 p(10)
> +  p(:) = (0.1d0, 0.2d0)
> +  p(:) = (0.0d0, 1.0d0) * p(:)
> +  call foo (p)
> +contains
> +  subroutine foo (p)
> +    complex*16 p(10)
> +    if (any (p .ne. (-0.2d0, 0.1d0))) call abort
> +  end subroutine
> +end program pr56015
>
>         Jakub
Jakub Jelinek - Jan. 18, 2013, 5:09 p.m.
On Fri, Jan 18, 2013 at 12:11:33PM +0100, Richard Biener wrote:
> On Thu, Jan 17, 2013 at 7:22 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> (I wonder if it's worth doing the work to eventually swap instead of simply
> forcing it to a reg always for overlaps)

It is just a couple of lines, and we might get better code (I think the
case where both overlaps is really even rarer).

> I suppose a similar testcase using vector extracts / inserts would also
> work now? (and maybe fail before this patch?)

Vectors are different, for vectors we always create the resulting vector
and then store the whole vector, don't set it partially.
At least for
typedef long V __attribute__((vector_size (16)));

V v;

void
foo (void)
{
  v = (V) { v[1], v[0] };
}

Dunno if you have some other testcase in mind.

	Jakub

Patch

--- gcc/expr.c.jj	2013-01-11 09:02:48.000000000 +0100
+++ gcc/expr.c	2013-01-17 11:21:34.326854116 +0100
@@ -8860,6 +8860,54 @@  expand_expr_real_2 (sepops ops, rtx targ
 
       if (!target)
 	target = gen_reg_rtx (TYPE_MODE (type));
+      else
+	/* If target overlaps with op1, then either we need to force
+	   op1 into a pseudo (if target also overlaps with op0),
+	   or write the complex parts in reverse order.  */
+	switch (GET_CODE (target))
+	  {
+	  case CONCAT:
+	    if (reg_overlap_mentioned_p (XEXP (target, 0), op1))
+	      {
+		if (reg_overlap_mentioned_p (XEXP (target, 1), op0))
+		  {
+		  complex_expr_force_op1:
+		    temp = gen_reg_rtx (GET_MODE_INNER (GET_MODE (target)));
+		    emit_move_insn (temp, op1);
+		    op1 = temp;
+		    break;
+		  }
+	      complex_expr_swap_order:
+		/* Move the imaginary (op1) and real (op0) parts to their
+		   location.  */
+		write_complex_part (target, op1, true);
+		write_complex_part (target, op0, false);
+
+		return target;
+	      }
+	    break;
+	  case MEM:
+	    temp = adjust_address_nv (target,
+				      GET_MODE_INNER (GET_MODE (target)), 0);
+	    if (reg_overlap_mentioned_p (temp, op1))
+	      {
+		enum machine_mode imode = GET_MODE_INNER (GET_MODE (target));
+		temp = adjust_address_nv (target, imode,
+					  GET_MODE_SIZE (imode));
+		if (reg_overlap_mentioned_p (temp, op0))
+		  goto complex_expr_force_op1;
+		goto complex_expr_swap_order;
+	      }
+	    break;
+	  default:
+	    if (reg_overlap_mentioned_p (target, op1))
+	      {
+		if (reg_overlap_mentioned_p (target, op0))
+		  goto complex_expr_force_op1;
+		goto complex_expr_swap_order;
+	      }
+	    break;
+	  }
 
       /* Move the real (op0) and imaginary (op1) parts to their location.  */
       write_complex_part (target, op0, false);
--- gcc/testsuite/gfortran.dg/pr56015.f90.jj	2013-01-17 11:16:27.793639775 +0100
+++ gcc/testsuite/gfortran.dg/pr56015.f90	2013-01-17 11:15:59.000000000 +0100
@@ -0,0 +1,16 @@ 
+! PR middle-end/56015
+! { dg-do run }
+! { dg-options "-Ofast -fno-inline" }
+
+program pr56015
+  implicit none
+  complex*16 p(10)
+  p(:) = (0.1d0, 0.2d0)
+  p(:) = (0.0d0, 1.0d0) * p(:)
+  call foo (p)
+contains
+  subroutine foo (p)
+    complex*16 p(10)
+    if (any (p .ne. (-0.2d0, 0.1d0))) call abort
+  end subroutine
+end program pr56015