diff mbox series

[v2,01/31] PR target/58901: reload: Handle SUBREG of MEM with a mode-dependent address

Message ID alpine.LFD.2.21.2011232039080.656242@eddie.linux-mips.org
State Superseded
Headers show
Series [v2,01/31] PR target/58901: reload: Handle SUBREG of MEM with a mode-dependent address | expand

Commit Message

Maciej W. Rozycki Nov. 24, 2020, 6:19 a.m. UTC
From: Matt Thomas <matt@3am-software.com>

Fix an ICE with the handling of RTL expressions like:

(subreg:QI (mem/c:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 0 %r0 [orig:67 i ] [67])
                    (const_int 4 [0x4]))
                (reg/v/f:SI 7 %r7 [orig:59 doacross ] [59]))
            (const_int 40 [0x28])) [1 MEM[(unsigned int *)doacross_63 + 40B + i_106 * 4]+0 S4 A32]) 0)

that causes the compilation of libgomp to fail:

during RTL pass: reload
.../libgomp/ordered.c: In function 'GOMP_doacross_wait':
.../libgomp/ordered.c:507:1: internal compiler error: in change_address_1, at emit-rtl.c:2275
  507 | }
      | ^
0x10a3462b change_address_1
	.../gcc/emit-rtl.c:2275
0x10a353a7 adjust_address_1(rtx_def*, machine_mode, poly_int<1u, long>, int, int, int, poly_int<1u, long>)
	.../gcc/emit-rtl.c:2409
0x10ae2993 alter_subreg(rtx_def**, bool)
	.../gcc/final.c:3368
0x10ae25cf cleanup_subreg_operands(rtx_insn*)
	.../gcc/final.c:3322
0x110922a3 reload(rtx_insn*, int)
	.../gcc/reload1.c:1232
0x10de2bf7 do_reload
	.../gcc/ira.c:5812
0x10de3377 execute
	.../gcc/ira.c:5986

in a `vax-netbsdelf' build, where an attempt is made to change the mode
of the contained memory reference to the mode of the containing SUBREG.
Such RTL expressions are produced by the VAX shift and rotate patterns
(`ashift', `ashiftrt', `rotate', `rotatert') where the count operand
always has the QI mode regardless of the mode, either SI or DI, of the
datum shifted or rotated.

Such a mode change cannot work where the memory reference uses the
indexed addressing mode, where a multiplier is implied that in the VAX
ISA depends on the width of the memory access requested and therefore
changing the machine mode would change the address calculation as well.

Avoid the attempt then by forcing the reload of any SUBREGs containing
a mode-dependent memory reference, also fixing these regressions:

FAIL: gcc.c-torture/compile/pr46883.c   -Os  (internal compiler error)
FAIL: gcc.c-torture/compile/pr46883.c   -Os  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -g  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -g  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
FAIL: gcc.dg/20050629-1.c (internal compiler error)
FAIL: gcc.dg/20050629-1.c (test for excess errors)
FAIL: c-c++-common/torture/pr53505.c   -Os  (internal compiler error)
FAIL: c-c++-common/torture/pr53505.c   -Os  (test for excess errors)
FAIL: gfortran.dg/coarray_failed_images_1.f08   -Os  (internal compiler error)
FAIL: gfortran.dg/coarray_stopped_images_1.f08   -Os  (internal compiler error)

With test case #0 included it causes a reload with:

(insn 15 14 16 4 (set (reg:SI 31)
        (ashift:SI (const_int 1 [0x1])
            (subreg:QI (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ]) 0))) "pr58901-0.c":15:12 94 {ashlsi3}
     (expr_list:REG_DEAD (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
        (nil)))

as follows:

Reloads for insn # 15
Reload 0: reload_in (SI) = (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
	ALL_REGS, RELOAD_FOR_INPUT (opnum = 2)
	reload_in_reg: (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
	reload_reg_rtx: (reg:SI 5 %r5)

resulting in:

(insn 37 14 15 4 (set (reg:SI 5 %r5)
        (mem/c:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 1 %r1 [orig:25 i ] [25])
                        (const_int 4 [0x4]))
                    (reg/v/f:SI 4 %r4 [orig:29 s ] [29]))
                (const_int 4 [0x4])) [1 MEM[(int *)s_8(D) + 4B + _5 * 4]+0 S4 A32])) "pr58901-0.c":15:12 12 {movsi_2}
     (nil))
(insn 15 37 16 4 (set (reg:SI 2 %r2 [31])
        (ashift:SI (const_int 1 [0x1])
            (reg:QI 5 %r5))) "pr58901-0.c":15:12 94 {ashlsi3}
     (nil))

and assembly like:

.L3:
	movl 4(%r4)[%r1],%r5
	ashl %r5,$1,%r2
	xorl2 %r2,%r0
	incl %r1
	cmpl %r1,%r3
	jneq .L3

produced for the loop, providing optimization has been enabled.  

Likewise with test case #1 the reload of:

(insn 17 16 18 4 (set (reg:SI 34)
        (and:SI (subreg:SI (reg/v:DI 27 [ t ]) 4)
            (const_int 1 [0x1]))) "pr58901-1.c":18:20 77 {*andsi_const_int}
     (expr_list:REG_DEAD (reg/v:DI 27 [ t ])
        (nil)))

is as follows:

Reloads for insn # 17
Reload 0: reload_in (DI) = (reg/v:DI 27 [ t ])
	reload_out (SI) = (reg:SI 2 %r2 [34])
	ALL_REGS, RELOAD_OTHER (opnum = 0)
	reload_in_reg: (reg/v:DI 27 [ t ])
	reload_out_reg: (reg:SI 2 %r2 [34])
	reload_reg_rtx: (reg:DI 4 %r4)

resulting in:

(insn 40 16 17 4 (set (reg:DI 4 %r4)
        (mem/c:DI (plus:SI (mult:SI (reg/v:SI 1 %r1 [orig:26 i ] [26])
                    (const_int 8 [0x8]))
                (reg/v/f:SI 3 %r3 [orig:30 s ] [30])) [1 MEM[(const struct s *)s_13(D) + _7 * 8]+0 S8 A32])) "pr58901-1.c":18:20 11 {movdi}
     (nil))
(insn 17 40 41 4 (set (reg:SI 4 %r4)
        (and:SI (reg:SI 5 %r5 [+4 ])
            (const_int 1 [0x1]))) "pr58901-1.c":18:20 77 {*andsi_const_int}
     (nil))

and assembly like:

.L3:
	movq (%r3)[%r1],%r4
	bicl3 $-2,%r5,%r4
	addl2 %r4,%r0
	jaoblss %r0,%r1,.L3

First posted at: <https://gcc.gnu.org/ml/gcc/2014-06/msg00060.html>.

2020-11-24  Matt Thomas  <matt@3am-software.com>
	    Maciej W. Rozycki  <macro@linux-mips.org>

	gcc/
	PR target/58901
	* reload.c (push_reload): Also reload the inner expression of a 
	SUBREG for pseudos associated with a mode-dependent memory 
	reference.
	(find_reloads): Force a reload likewise.

2020-11-24  Maciej W. Rozycki  <macro@linux-mips.org>

	gcc/testsuite/
	PR target/58901
	* gcc.c-torture/compile/pr58901-0.c: New test.
	* gcc.c-torture/compile/pr58901-1.c: New test.
---
Hi Eric,

On Fri, 20 Nov 2020, Maciej W. Rozycki wrote:

> > The handling of this family of reloads is supposed to be done by the block of 
> > code just above though, i.e. at line 1023.  Can't we add the test based on 
> > mode_dependent_address_p to this block, e.g. after:
> > 
> > 	  || (REG_P (SUBREG_REG (in))
> > 	      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
> > 	      && !REG_CAN_CHANGE_MODE_P (REGNO (SUBREG_REG (in)),
> > 					 GET_MODE (SUBREG_REG 
> > (in)), inmode))))
> > 
> > instead?
> 
>  Thank you for your input, I'll have a look.  Coming from Matt this is the 
> only change of the series I have just merged without looking into it too 
> much, so as not to spend too much time with side issues (there were too 
> many already).

 You were right about the correct placement of the check, but it was a 
part of the solution only.

 I looked into why it did not work on its own and (mind that I have not 
worked with this part of GCC before) I have realised something was not 
right about it.  The thing was the reload was only requested as optional 
(and later discarded) under the premise of being an optimisation only 
(i.e. called from reload.c:4125 in `find_reloads'), while indeed it is 
required whether optimising or not, even though the VAX backend does not 
appear to produce mode-dependent addresses in the latter case, or at least 
I haven't seen it doing so.

 So I have investigated what needs to be done for the reload to become 
unconditional and come up with this update.  Note that we need to handle a 
mode change whether it is for the low part of the inner reference or not.  
The latter case is what turned out to actually trigger with pr53505.c, 
which this update actually produces better code for, removing a useless 
move (and frame slot creation for a static register):

@@ -5,7 +5,7 @@
 .globl foo
 	.type	foo, @function
 foo:
-	.word 0x40
+	.word 0
 	subl2 $4,%sp
 	movl 4(%ap),%r2
 	clrl %r0
@@ -18,7 +18,6 @@
 	calls $0,abort
 .L3:
 	movq (%r2)[%r0],%r4
-	movl %r5,%r6
 	rotl $16,%r5,%r3
 	bicl2 $-2,%r3
 	addl2 %r3,%r1

which was of course the result of an unnecessary reload of the outer 
expression the original change did.

 I have reduced the libgomp build failure and pr53505.c to test cases #0 
and #1 included respectively (where #1 actually triggers across all 
optimisation levels rather than just `-Os', though with 05/31 this goes 
back to `-Os' only); while the failure is VAX-specific code involved is 
not, so I think there'll be no harm with including them in the generic 
part of our testsuite for good measure.

 I think it is interesting to note that with #0 for some reason for the 
failure to trigger the struct has to be passed by reference rather than 
value and for #1 there has to be the intermediate struct assignment in the 
loop; otherwise the indexed addressing mode won't trigger and the base 
register will instead be incremented by four.

 Also I wonder why with #0 the middle end tries to be too smart and makes 
the loop's continuation condition `ne' rather `gt' as with #1, causing the 
resulting RTL not to match the pattern for `jaoblss' and consequently more 
elaborate code to be produced, even though in both cases the control 
statement uses the `<' relational operator.  Anyway, that's unrelated.

>  It'll take me a couple of days to push the new version through regression 
> testing, and I'll post it once that is complete along with any other 
> updates someone may request.

 I actually missed the high-part case with my first attempt and thankfully 
pr53505.c has caught it.  It means however that I've had to rerun testing, 
and given I have literally only just started it now I expect results Thu 
morning only.

 NB I find the reindentation resulting in `push_reload' awful, just as I 
do either version of the massive logical expression involved.  Perhaps we 
could factor these out into `static inline' functions sometime, and then 
have them split into individual returns within?

  Maciej
---
 gcc/reload.c                                    |  105 +++++++++++++++---------
 gcc/testsuite/gcc.c-torture/compile/pr58901-0.c |   17 +++
 gcc/testsuite/gcc.c-torture/compile/pr58901-1.c |   21 ++++
 3 files changed, 107 insertions(+), 36 deletions(-)

Comments

Eric Botcazou Nov. 24, 2020, 11:03 a.m. UTC | #1
> First posted at: <https://gcc.gnu.org/ml/gcc/2014-06/msg00060.html>.
> 
> 2020-11-24  Matt Thomas  <matt@3am-software.com>
> 	    Maciej W. Rozycki  <macro@linux-mips.org>
> 
> 	gcc/
> 	PR target/58901
> 	* reload.c (push_reload): Also reload the inner expression of a
> 	SUBREG for pseudos associated with a mode-dependent memory
> 	reference.
> 	(find_reloads): Force a reload likewise.

Thanks for pursuing this.  Although the duplication is unfortunate, I think 
that this version is more in line with the existing processing so I would go 
for it.  You probably need a formal ack from Ulrich(?) or Jeff though.
Maciej W. Rozycki Nov. 26, 2020, 5:22 p.m. UTC | #2
On Tue, 24 Nov 2020, Eric Botcazou wrote:

> > First posted at: <https://gcc.gnu.org/ml/gcc/2014-06/msg00060.html>.
> > 
> > 2020-11-24  Matt Thomas  <matt@3am-software.com>
> > 	    Maciej W. Rozycki  <macro@linux-mips.org>
> > 
> > 	gcc/
> > 	PR target/58901
> > 	* reload.c (push_reload): Also reload the inner expression of a
> > 	SUBREG for pseudos associated with a mode-dependent memory
> > 	reference.
> > 	(find_reloads): Force a reload likewise.
> 
> Thanks for pursuing this.  Although the duplication is unfortunate, I think 
> that this version is more in line with the existing processing so I would go 
> for it.  You probably need a formal ack from Ulrich(?) or Jeff though.

 This has completed verification now with good results.

 OK to apply then?  That would let the whole series through once it's gone 
through final verification (now ongoing; ETC: Mon, Nov 30th).

  Maciej
Maciej W. Rozycki Nov. 27, 2020, 3:51 a.m. UTC | #3
On Thu, 26 Nov 2020, Maciej W. Rozycki wrote:

> > > 	PR target/58901
> > > 	* reload.c (push_reload): Also reload the inner expression of a
> > > 	SUBREG for pseudos associated with a mode-dependent memory
> > > 	reference.
> > > 	(find_reloads): Force a reload likewise.
> > 
> > Thanks for pursuing this.  Although the duplication is unfortunate, I think 
> > that this version is more in line with the existing processing so I would go 
> > for it.  You probably need a formal ack from Ulrich(?) or Jeff though.
> 
>  This has completed verification now with good results.
> 
>  OK to apply then?  That would let the whole series through once it's gone 
> through final verification (now ongoing; ETC: Mon, Nov 30th).

 FAOD verification included bootstraps with native `powerpc64le-linux-gnu' 
and `x86_64-linux-gnu' systems, a cross-build of a `vax-netbsdelf' target 
compiler with a `powerpc64le-linux-gnu' build/host system, and then full 
regression-testing of all the components built across the three setups.

  Maciej
Ulrich Weigand Nov. 27, 2020, 10:52 a.m. UTC | #4
On Tue, Nov 24, 2020 at 06:19:41AM +0000, Maciej W. Rozycki wrote: 

>  NB I find the reindentation resulting in `push_reload' awful, just as I 
> do either version of the massive logical expression involved.  Perhaps we 
> could factor these out into `static inline' functions sometime, and then 
> have them split into individual returns within?

I'm generally hesitant to do a lot of changes to the reload code base
at this stage.  The strategy rather is to move all remaining targets
over to LRA and then simply delete reload :-)

Given that you're modernizing the vax target, I'm wondering if you
shouldn't rather go all the way and move it over to LRA as well,
then you wouldn't be affected by any remaining reload deficiencies.
(The major hurdle so far was that LRA doesn't support CC0, but it
looks like you're removing that anyway ...)

> +      && (strict_low
> +	  || (subreg_lowpart_p (in)
> +	      && (CONSTANT_P (SUBREG_REG (in))
> +		  || GET_CODE (SUBREG_REG (in)) == PLUS
> +		  || strict_low

If we do this, then that "|| strict_low" clause is now redundant. 

> +	      && REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER
> +	      && reg_equiv_mem (REGNO (SUBREG_REG (in)))
> +	      && (mode_dependent_address_p
> +		  (XEXP (reg_equiv_mem (REGNO (SUBREG_REG (in))), 0),
> +		   MEM_ADDR_SPACE (reg_equiv_mem (REGNO (SUBREG_REG (in)))))))))

I guess this is not incorrect, but I'm wondering if it would be
*complete* -- there are other cases where reload replaces a pseudo
with a MEM even where reg_equiv_mem is null.

That said, if it fixes the test suite errors you're seeing, it would
probably be OK to go with just this minimal change -- unless we can
just move to LRA as mentioned above.

Bye,
Ulrich
Maciej W. Rozycki Nov. 27, 2020, 7:22 p.m. UTC | #5
On Fri, 27 Nov 2020, Ulrich Weigand wrote:

> >  NB I find the reindentation resulting in `push_reload' awful, just as I 
> > do either version of the massive logical expression involved.  Perhaps we 
> > could factor these out into `static inline' functions sometime, and then 
> > have them split into individual returns within?
> 
> I'm generally hesitant to do a lot of changes to the reload code base
> at this stage.  The strategy rather is to move all remaining targets
> over to LRA and then simply delete reload :-)
> 
> Given that you're modernizing the vax target, I'm wondering if you
> shouldn't rather go all the way and move it over to LRA as well,
> then you wouldn't be affected by any remaining reload deficiencies.
> (The major hurdle so far was that LRA doesn't support CC0, but it
> looks like you're removing that anyway ...)

 I considered moving to LRA, but decided to make one step at a time, 
especially given the number of issues the VAX port has been suffering 
from.  For example there are cases evident from regression test failures 
where new pseudos are created past-reload.  That would require tracking 
down, and I think switching to LRA would best be made with cleaner test 
results so as not to introduce another variable into the picture.

 So I would consider it GCC 12 material, so that we have an actual release 
with the VAX port converted to MODE_CC, but still using reload.  I think 
it could make some backports easier too if NetBSD people wanted to do it.

> > +      && (strict_low
> > +	  || (subreg_lowpart_p (in)
> > +	      && (CONSTANT_P (SUBREG_REG (in))
> > +		  || GET_CODE (SUBREG_REG (in)) == PLUS
> > +		  || strict_low
> 
> If we do this, then that "|| strict_low" clause is now redundant. 

 Oh!  I noticed the redundancy (which was in the way of the extra 
condition I was about to add) and rewrote the expression so as to remove 
it, and looks like I then left the line in place.  Maybe I didn't save the 
final change in the editor or something.  Sigh!  Thanks for spotting it.

> > +	      && REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER
> > +	      && reg_equiv_mem (REGNO (SUBREG_REG (in)))
> > +	      && (mode_dependent_address_p
> > +		  (XEXP (reg_equiv_mem (REGNO (SUBREG_REG (in))), 0),
> > +		   MEM_ADDR_SPACE (reg_equiv_mem (REGNO (SUBREG_REG (in)))))))))
> 
> I guess this is not incorrect, but I'm wondering if it would be
> *complete* -- there are other cases where reload replaces a pseudo
> with a MEM even where reg_equiv_mem is null.

 I wasn't aware of that.  What would be the usual scenario?

> That said, if it fixes the test suite errors you're seeing, it would
> probably be OK to go with just this minimal change -- unless we can
> just move to LRA as mentioned above.

 I've looked through the test results and indeed these suspicious ICEs 
remain:

.../gcc/testsuite/gcc.dg/pr83623.c:13:1: internal compiler error: in change_address_1, at emit-rtl.c:2275
.../gcc/testsuite/gcc.dg/torture/vshuf-main.inc:27:1: internal compiler error: in change_address_1, at emit-rtl.c:2275

corresponding to:

FAIL: gcc.dg/pr83623.c (internal compiler error)
FAIL: gcc.dg/pr83623.c (test for excess errors)
FAIL: gcc.dg/torture/vshuf-v16qi.c   -O2  (internal compiler error)
FAIL: gcc.dg/torture/vshuf-v16qi.c   -O2  (test for excess errors)

I'll investigate.

  Maciej
Maciej W. Rozycki Nov. 27, 2020, 8:47 p.m. UTC | #6
On Fri, 27 Nov 2020, Maciej W. Rozycki wrote:

> > That said, if it fixes the test suite errors you're seeing, it would
> > probably be OK to go with just this minimal change -- unless we can
> > just move to LRA as mentioned above.
> 
>  I've looked through the test results and indeed these suspicious ICEs 
> remain:
> 
> .../gcc/testsuite/gcc.dg/pr83623.c:13:1: internal compiler error: in change_address_1, at emit-rtl.c:2275
> .../gcc/testsuite/gcc.dg/torture/vshuf-main.inc:27:1: internal compiler error: in change_address_1, at emit-rtl.c:2275

 I've double-checked these and this:

> corresponding to:
> 
> FAIL: gcc.dg/pr83623.c (internal compiler error)
> FAIL: gcc.dg/pr83623.c (test for excess errors)

comes from this insn:

(insn 17 14 145 (set (reg:SI 1 %r1)
        (zero_extract:SI (mem/c:SI (symbol_ref:SI ("x") <var_decl 0x7ffff7f80120 x>) [1 x+0 S4 A128])
            (const_int 16 [0x10])
            (const_int 16 [0x10]))) ".../gcc/testsuite/gcc.dg/pr83623.c":12:9 101 {*vax.md:805}
     (nil))

and this:

> FAIL: gcc.dg/torture/vshuf-v16qi.c   -O2  (internal compiler error)
> FAIL: gcc.dg/torture/vshuf-v16qi.c   -O2  (test for excess errors)

likewise:

(insn 83 82 84 (set (reg:SI 5 %r5 [84])
        (zero_extract:SI (mem/c:SI (symbol_ref:SI ("b") <var_decl 0x7ffff7f801b0 b>) [0 b+0 S4 A128])
            (const_int 8 [0x8])
            (const_int 16 [0x10]))) ".../gcc/testsuite/gcc.dg/torture/vshuf-main.inc":28:1 101 {*vax.md:805}
     (nil))

So these are not related (and addressed with 22/31 BTW).

 I'll make the "|| strict_low" update then and push this change along with 
the rest once all the verification has completed, presumably this coming 
Monday.

 Thanks for your review!

  Maciej
Jeff Law Nov. 29, 2020, 5:31 p.m. UTC | #7
On 11/27/20 12:22 PM, Maciej W. Rozycki wrote:
> On Fri, 27 Nov 2020, Ulrich Weigand wrote:
>
>>>  NB I find the reindentation resulting in `push_reload' awful, just as I 
>>> do either version of the massive logical expression involved.  Perhaps we 
>>> could factor these out into `static inline' functions sometime, and then 
>>> have them split into individual returns within?
>> I'm generally hesitant to do a lot of changes to the reload code base
>> at this stage.  The strategy rather is to move all remaining targets
>> over to LRA and then simply delete reload :-)
>>
>> Given that you're modernizing the vax target, I'm wondering if you
>> shouldn't rather go all the way and move it over to LRA as well,
>> then you wouldn't be affected by any remaining reload deficiencies.
>> (The major hurdle so far was that LRA doesn't support CC0, but it
>> looks like you're removing that anyway ...)
>  I considered moving to LRA, but decided to make one step at a time, 
> especially given the number of issues the VAX port has been suffering 
> from.  For example there are cases evident from regression test failures 
> where new pseudos are created past-reload.  That would require tracking 
> down, and I think switching to LRA would best be made with cleaner test 
> results so as not to introduce another variable into the picture.
>
>  So I would consider it GCC 12 material, so that we have an actual release 
> with the VAX port converted to MODE_CC, but still using reload.  I think 
> it could make some backports easier too if NetBSD people wanted to do it.
That was my plan on H8 as well (switch to LRA next cycle).  I am aware
of one reload issue that the move away from cc0 triggers, but I'm
ignoring it for now.
Jeff
diff mbox series

Patch

Index: gcc/gcc/reload.c
===================================================================
--- gcc.orig/gcc/reload.c
+++ gcc/gcc/reload.c
@@ -1043,53 +1043,73 @@  push_reload (rtx in, rtx out, rtx *inloc
      Also reload the inner expression if it does not require a secondary
      reload but the SUBREG does.
 
-     Finally, reload the inner expression if it is a register that is in
+     Also reload the inner expression if it is a register that is in
      the class whose registers cannot be referenced in a different size
      and M1 is not the same size as M2.  If subreg_lowpart_p is false, we
      cannot reload just the inside since we might end up with the wrong
      register class.  But if it is inside a STRICT_LOW_PART, we have
-     no choice, so we hope we do get the right register class there.  */
+     no choice, so we hope we do get the right register class there.
+
+     Finally, reload the inner expression if it is a pseudo that will
+     become a MEM and the MEM has a mode-dependent address, as in that
+     case we obviously cannot change the mode of the MEM to that of the
+     containing SUBREG as that would change the interpretation of the
+     address.  */
 
   scalar_int_mode inner_mode;
   if (in != 0 && GET_CODE (in) == SUBREG
-      && (subreg_lowpart_p (in) || strict_low)
       && targetm.can_change_mode_class (GET_MODE (SUBREG_REG (in)),
 					inmode, rclass)
       && contains_allocatable_reg_of_mode[rclass][GET_MODE (SUBREG_REG (in))]
-      && (CONSTANT_P (SUBREG_REG (in))
-	  || GET_CODE (SUBREG_REG (in)) == PLUS
-	  || strict_low
-	  || (((REG_P (SUBREG_REG (in))
-		&& REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER)
-	       || MEM_P (SUBREG_REG (in)))
-	      && (paradoxical_subreg_p (inmode, GET_MODE (SUBREG_REG (in)))
-		  || (known_le (GET_MODE_SIZE (inmode), UNITS_PER_WORD)
-		      && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (in)),
-						 &inner_mode)
-		      && GET_MODE_SIZE (inner_mode) <= UNITS_PER_WORD
-		      && paradoxical_subreg_p (inmode, inner_mode)
-		      && LOAD_EXTEND_OP (inner_mode) != UNKNOWN)
-		  || (WORD_REGISTER_OPERATIONS
-		      && partial_subreg_p (inmode, GET_MODE (SUBREG_REG (in)))
-		      && (known_equal_after_align_down
-			  (GET_MODE_SIZE (inmode) - 1,
-			   GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))) - 1,
-			   UNITS_PER_WORD)))))
-	  || (REG_P (SUBREG_REG (in))
-	      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
-	      /* The case where out is nonzero
-		 is handled differently in the following statement.  */
-	      && (out == 0 || subreg_lowpart_p (in))
-	      && (complex_word_subreg_p (inmode, SUBREG_REG (in))
-		  || !targetm.hard_regno_mode_ok (subreg_regno (in), inmode)))
-	  || (secondary_reload_class (1, rclass, inmode, in) != NO_REGS
-	      && (secondary_reload_class (1, rclass, GET_MODE (SUBREG_REG (in)),
-					  SUBREG_REG (in))
-		  == NO_REGS))
+      && (strict_low
+	  || (subreg_lowpart_p (in)
+	      && (CONSTANT_P (SUBREG_REG (in))
+		  || GET_CODE (SUBREG_REG (in)) == PLUS
+		  || strict_low
+		  || (((REG_P (SUBREG_REG (in))
+			&& REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER)
+		       || MEM_P (SUBREG_REG (in)))
+		      && (paradoxical_subreg_p (inmode,
+						GET_MODE (SUBREG_REG (in)))
+			  || (known_le (GET_MODE_SIZE (inmode), UNITS_PER_WORD)
+			      && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG
+								   (in)),
+							 &inner_mode)
+			      && GET_MODE_SIZE (inner_mode) <= UNITS_PER_WORD
+			      && paradoxical_subreg_p (inmode, inner_mode)
+			      && LOAD_EXTEND_OP (inner_mode) != UNKNOWN)
+			  || (WORD_REGISTER_OPERATIONS
+			      && partial_subreg_p (inmode,
+						   GET_MODE (SUBREG_REG (in)))
+			      && (known_equal_after_align_down
+				  (GET_MODE_SIZE (inmode) - 1,
+				   GET_MODE_SIZE (GET_MODE (SUBREG_REG
+							    (in))) - 1,
+				   UNITS_PER_WORD)))))
+		  || (REG_P (SUBREG_REG (in))
+		      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
+		      /* The case where out is nonzero
+			 is handled differently in the following statement.  */
+		      && (out == 0 || subreg_lowpart_p (in))
+		      && (complex_word_subreg_p (inmode, SUBREG_REG (in))
+			  || !targetm.hard_regno_mode_ok (subreg_regno (in),
+							  inmode)))
+		  || (secondary_reload_class (1, rclass, inmode, in) != NO_REGS
+		      && (secondary_reload_class (1, rclass,
+						  GET_MODE (SUBREG_REG (in)),
+						  SUBREG_REG (in))
+			  == NO_REGS))
+		  || (REG_P (SUBREG_REG (in))
+		      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
+		      && !REG_CAN_CHANGE_MODE_P (REGNO (SUBREG_REG (in)),
+						 GET_MODE (SUBREG_REG (in)),
+						 inmode))))
 	  || (REG_P (SUBREG_REG (in))
-	      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
-	      && !REG_CAN_CHANGE_MODE_P (REGNO (SUBREG_REG (in)),
-					 GET_MODE (SUBREG_REG (in)), inmode))))
+	      && REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER
+	      && reg_equiv_mem (REGNO (SUBREG_REG (in)))
+	      && (mode_dependent_address_p
+		  (XEXP (reg_equiv_mem (REGNO (SUBREG_REG (in))), 0),
+		   MEM_ADDR_SPACE (reg_equiv_mem (REGNO (SUBREG_REG (in)))))))))
     {
 #ifdef LIMIT_RELOAD_CLASS
       in_subreg_loc = inloc;
@@ -3157,6 +3177,19 @@  find_reloads (rtx_insn *insn, int replac
 				  && paradoxical_subreg_p (operand_mode[i],
 							   inner_mode)
 				  && LOAD_EXTEND_OP (inner_mode) != UNKNOWN)))
+		      /* We must force a reload of a SUBREG's inner expression
+			 if it is a pseudo that will become a MEM and the MEM
+			 has a mode-dependent address, as in that case we
+			 obviously cannot change the mode of the MEM to that
+			 of the containing SUBREG as that would change the
+			 interpretation of the address.  */
+		      || (REG_P (operand)
+			  && REGNO (operand) >= FIRST_PSEUDO_REGISTER
+			  && reg_equiv_mem (REGNO (operand))
+			  && (mode_dependent_address_p
+			      (XEXP (reg_equiv_mem (REGNO (operand)), 0),
+			       (MEM_ADDR_SPACE
+				(reg_equiv_mem (REGNO (operand)))))))
 		      )
 		    force_reload = 1;
 		}
Index: gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-0.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-0.c
@@ -0,0 +1,17 @@ 
+typedef signed int __attribute__ ((mode (SI))) int_t;
+
+struct s
+{
+  int_t n;
+  int_t c[];
+};
+
+int_t
+ashlsi (int_t x, const struct s *s)
+{
+  int_t i;
+
+  for (i = 0; i < s->n; i++)
+    x ^= 1 << s->c[i];
+  return x;
+}
Index: gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-1.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-1.c
@@ -0,0 +1,21 @@ 
+typedef int __attribute__ ((mode (SI))) int_t;
+
+struct s
+{
+  int_t n;
+  int_t m : 1;
+  int_t l : 31;
+};
+
+int_t
+movdi (int_t x, const struct s *s)
+{
+  int_t i;
+
+  for (i = 0; i < x; i++)
+    {
+      const struct s t = s[i];
+      x += t.m ? 1 : 0;
+    }
+  return x;
+}