diff mbox

Fix COMPONENT_REF expansion (PR rtl-optimization/77919)

Message ID 20161028084604.GT3541@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 28, 2016, 8:46 a.m. UTC
On Fri, Oct 28, 2016 at 01:32:22AM -0600, Jeff Law wrote:
> >I think so.  I'll leave the rest to people more familiar with RTL
> >expansion -- generally I thought the callers of expand() have to deal
> >with expansions that return a different mode?
> You generally have to deal with expansions that return the object in a new
> pseudo instead of the one you asked for -- so the caller has to test for
> that and emit a copy when it happens.
> 
> I don't offhand recall cases where we have to deal with getting a result in
> a different mode than was asked.  But given the history of the expanders, I
> wouldn't be surprised if there's oddball cases where that can happen.

I've already committed the original patch based on Eric's review, but
managed to come up with another testcase that still ICEs (one with two
different complex modes).  Is the following ok for trunk if it passes
bootstrap/regtest?

2016-10-28  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/77919
	* expr.c (expand_expr_real_1) <normal_inner_ref>: Only avoid forcing
	into memory if both modes are complex and their inner modes have the
	same precision.  If the two modes are different complex modes, convert
	each part separately and generate a new CONCAT.

	* g++.dg/torture/pr77919-2.C: New test.



	Jakub

Comments

Richard Biener Oct. 28, 2016, 8:52 a.m. UTC | #1
On Fri, 28 Oct 2016, Jakub Jelinek wrote:

> On Fri, Oct 28, 2016 at 01:32:22AM -0600, Jeff Law wrote:
> > >I think so.  I'll leave the rest to people more familiar with RTL
> > >expansion -- generally I thought the callers of expand() have to deal
> > >with expansions that return a different mode?
> > You generally have to deal with expansions that return the object in a new
> > pseudo instead of the one you asked for -- so the caller has to test for
> > that and emit a copy when it happens.
> > 
> > I don't offhand recall cases where we have to deal with getting a result in
> > a different mode than was asked.  But given the history of the expanders, I
> > wouldn't be surprised if there's oddball cases where that can happen.
> 
> I've already committed the original patch based on Eric's review, but
> managed to come up with another testcase that still ICEs (one with two
> different complex modes).  Is the following ok for trunk if it passes
> bootstrap/regtest?

As we're dealing with memory isn't GET_MODE_SIZE the correct thing to
use?

> 2016-10-28  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR rtl-optimization/77919
> 	* expr.c (expand_expr_real_1) <normal_inner_ref>: Only avoid forcing
> 	into memory if both modes are complex and their inner modes have the
> 	same precision.  If the two modes are different complex modes, convert
> 	each part separately and generate a new CONCAT.
> 
> 	* g++.dg/torture/pr77919-2.C: New test.
> 
> --- gcc/expr.c.jj	2016-10-28 10:35:14.753234774 +0200
> +++ gcc/expr.c	2016-10-28 10:35:28.760057716 +0200
> @@ -10422,10 +10422,35 @@ expand_expr_real_1 (tree exp, rtx target
>  	  {
>  	    if (bitpos == 0
>  		&& bitsize == GET_MODE_BITSIZE (GET_MODE (op0))
> -		&& COMPLEX_MODE_P (mode1))
> +		&& COMPLEX_MODE_P (mode1)
> +		&& COMPLEX_MODE_P (GET_MODE (op0))
> +		&& (GET_MODE_PRECISION (GET_MODE_INNER (mode1))
> +		    == GET_MODE_PRECISION (GET_MODE_INNER (GET_MODE (op0)))))
>  	      {
>  		if (reversep)
>  		  op0 = flip_storage_order (GET_MODE (op0), op0);
> +		if (mode1 != GET_MODE (op0))
> +		  {
> +		    rtx parts[2];
> +		    for (int i = 0; i < 2; i++)
> +		      {
> +			rtx op = read_complex_part (op0, i != 0);
> +			if (GET_CODE (op) == SUBREG)
> +			  op = force_reg (GET_MODE (op), op);
> +			rtx temp = gen_lowpart_common (GET_MODE_INNER (mode1),
> +						       op);
> +			if (temp)
> +			  op = temp;
> +			else
> +			  {
> +			    if (!REG_P (op) && !MEM_P (op))
> +			      op = force_reg (GET_MODE (op), op);
> +			    op = gen_lowpart (GET_MODE_INNER (mode1), op);
> +			  }
> +			parts[i] = op;
> +		      }
> +		    op0 = gen_rtx_CONCAT (mode1, parts[0], parts[1]);
> +		  }
>  		return op0;
>  	      }
>  	    if (bitpos == 0
> --- gcc/testsuite/g++.dg/torture/pr77919-2.C.jj	2016-10-28 10:35:49.294798140 +0200
> +++ gcc/testsuite/g++.dg/torture/pr77919-2.C	2016-10-28 10:29:38.000000000 +0200
> @@ -0,0 +1,10 @@
> +// PR rtl-optimization/77919
> +// { dg-do compile }
> +
> +typedef _Complex long long B;
> +struct A { A (double) {} _Complex double i; };
> +typedef struct { B b; } C;
> +struct D { D (const B &x) : b (x) {} B b; };
> +static inline B foo (const double *x) { C *a; a = (C *) x; return a->b; }
> +static inline D baz (const A &x) { return foo ((double *) &x); }
> +D b = baz (0);
> 
> 
> 	Jakub
> 
>
Jakub Jelinek Oct. 28, 2016, 8:59 a.m. UTC | #2
On Fri, Oct 28, 2016 at 10:52:34AM +0200, Richard Biener wrote:
> > I've already committed the original patch based on Eric's review, but
> > managed to come up with another testcase that still ICEs (one with two
> > different complex modes).  Is the following ok for trunk if it passes
> > bootstrap/regtest?
> 
> As we're dealing with memory isn't GET_MODE_SIZE the correct thing to
> use?

GET_MODE_PRECISION is what the case VIEW_CONVERT_EXPR case tests:
      /* If the input and output modes are both the same, we are done.  */
      if (mode == GET_MODE (op0))
        ;
      /* If neither mode is BLKmode, and both modes are the same size
         then we can use gen_lowpart.  */
      else if (mode != BLKmode && GET_MODE (op0) != BLKmode
               && (GET_MODE_PRECISION (mode)
                   == GET_MODE_PRECISION (GET_MODE (op0)))
               && !COMPLEX_MODE_P (GET_MODE (op0)))  
        {
          if (GET_CODE (op0) == SUBREG)
            op0 = force_reg (GET_MODE (op0), op0);
          temp = gen_lowpart_common (mode, op0);  
          if (temp)
            op0 = temp;
          else
            {
              if (!REG_P (op0) && !MEM_P (op0))
                op0 = force_reg (GET_MODE (op0), op0);
              op0 = gen_lowpart (mode, op0);
            }
        }
The CONCAT operands can be a MEM (just likely won't be both MEM or adjacent
MEM).

BTW, the VCE part could also handle 2 different complex modes.

	Jakub
Jakub Jelinek Oct. 29, 2016, 4:12 p.m. UTC | #3
On Fri, Oct 28, 2016 at 10:59:35AM +0200, Jakub Jelinek wrote:
> On Fri, Oct 28, 2016 at 10:52:34AM +0200, Richard Biener wrote:
> > > I've already committed the original patch based on Eric's review, but
> > > managed to come up with another testcase that still ICEs (one with two
> > > different complex modes).  Is the following ok for trunk if it passes
> > > bootstrap/regtest?
> > 
> > As we're dealing with memory isn't GET_MODE_SIZE the correct thing to
> > use?
> 
> GET_MODE_PRECISION is what the case VIEW_CONVERT_EXPR case tests:

BTW, testing GET_MODE_SIZE or GET_MODE_BITSIZE doesn't make sense there,
            if (bitpos == 0
                && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))
should already ensure that.  The GET_MODE_PRECISION check will force into
memory say x86 XCmode to {TC,CTI}mode or vice versa conversions, which are
better done through memory anyway.  If it isn't needed, then the question
is why VCE uses it.

Anyway, I've successfully bootstrapped/regtested the patch as is on
x86_64-linux and i686-linux.

	Jakub
Richard Biener Oct. 29, 2016, 7:15 p.m. UTC | #4
On October 29, 2016 6:12:50 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>On Fri, Oct 28, 2016 at 10:59:35AM +0200, Jakub Jelinek wrote:
>> On Fri, Oct 28, 2016 at 10:52:34AM +0200, Richard Biener wrote:
>> > > I've already committed the original patch based on Eric's review,
>but
>> > > managed to come up with another testcase that still ICEs (one
>with two
>> > > different complex modes).  Is the following ok for trunk if it
>passes
>> > > bootstrap/regtest?
>> > 
>> > As we're dealing with memory isn't GET_MODE_SIZE the correct thing
>to
>> > use?
>> 
>> GET_MODE_PRECISION is what the case VIEW_CONVERT_EXPR case tests:
>
>BTW, testing GET_MODE_SIZE or GET_MODE_BITSIZE doesn't make sense
>there,
>            if (bitpos == 0
>                && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))
>should already ensure that.  The GET_MODE_PRECISION check will force
>into
>memory say x86 XCmode to {TC,CTI}mode or vice versa conversions, which
>are
>better done through memory anyway.  If it isn't needed, then the
>question
>is why VCE uses it.
>
>Anyway, I've successfully bootstrapped/regtested the patch as is on
>x86_64-linux and i686-linux.

OK.

Richard.

>	Jakub
diff mbox

Patch

--- gcc/expr.c.jj	2016-10-28 10:35:14.753234774 +0200
+++ gcc/expr.c	2016-10-28 10:35:28.760057716 +0200
@@ -10422,10 +10422,35 @@  expand_expr_real_1 (tree exp, rtx target
 	  {
 	    if (bitpos == 0
 		&& bitsize == GET_MODE_BITSIZE (GET_MODE (op0))
-		&& COMPLEX_MODE_P (mode1))
+		&& COMPLEX_MODE_P (mode1)
+		&& COMPLEX_MODE_P (GET_MODE (op0))
+		&& (GET_MODE_PRECISION (GET_MODE_INNER (mode1))
+		    == GET_MODE_PRECISION (GET_MODE_INNER (GET_MODE (op0)))))
 	      {
 		if (reversep)
 		  op0 = flip_storage_order (GET_MODE (op0), op0);
+		if (mode1 != GET_MODE (op0))
+		  {
+		    rtx parts[2];
+		    for (int i = 0; i < 2; i++)
+		      {
+			rtx op = read_complex_part (op0, i != 0);
+			if (GET_CODE (op) == SUBREG)
+			  op = force_reg (GET_MODE (op), op);
+			rtx temp = gen_lowpart_common (GET_MODE_INNER (mode1),
+						       op);
+			if (temp)
+			  op = temp;
+			else
+			  {
+			    if (!REG_P (op) && !MEM_P (op))
+			      op = force_reg (GET_MODE (op), op);
+			    op = gen_lowpart (GET_MODE_INNER (mode1), op);
+			  }
+			parts[i] = op;
+		      }
+		    op0 = gen_rtx_CONCAT (mode1, parts[0], parts[1]);
+		  }
 		return op0;
 	      }
 	    if (bitpos == 0
--- gcc/testsuite/g++.dg/torture/pr77919-2.C.jj	2016-10-28 10:35:49.294798140 +0200
+++ gcc/testsuite/g++.dg/torture/pr77919-2.C	2016-10-28 10:29:38.000000000 +0200
@@ -0,0 +1,10 @@ 
+// PR rtl-optimization/77919
+// { dg-do compile }
+
+typedef _Complex long long B;
+struct A { A (double) {} _Complex double i; };
+typedef struct { B b; } C;
+struct D { D (const B &x) : b (x) {} B b; };
+static inline B foo (const double *x) { C *a; a = (C *) x; return a->b; }
+static inline D baz (const A &x) { return foo ((double *) &x); }
+D b = baz (0);