diff mbox series

[v3,01/31] PR target/58901: reload: Handle SUBREG of MEM with a mode-dependent address

Message ID alpine.LFD.2.21.2011272049060.656242@eddie.linux-mips.org
State Accepted
Headers show
Series [v3,01/31] PR target/58901: reload: Handle SUBREG of MEM with a mode-dependent address | expand

Commit Message

Maciej W. Rozycki Nov. 27, 2020, 8:50 p.m. UTC
From: Matt Thomas <matt@3am-software.com>

Fix an ICE with the handling of RTL expressions like:

(subreg:QI (mem/c:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 0 %r0 [orig:67 i ] [67])
                    (const_int 4 [0x4]))
                (reg/v/f:SI 7 %r7 [orig:59 doacross ] [59]))
            (const_int 40 [0x28])) [1 MEM[(unsigned int *)doacross_63 + 40B + i_106 * 4]+0 S4 A32]) 0)

that causes the compilation of libgomp to fail:

during RTL pass: reload
.../libgomp/ordered.c: In function 'GOMP_doacross_wait':
.../libgomp/ordered.c:507:1: internal compiler error: in change_address_1, at emit-rtl.c:2275
  507 | }
      | ^
0x10a3462b change_address_1
	.../gcc/emit-rtl.c:2275
0x10a353a7 adjust_address_1(rtx_def*, machine_mode, poly_int<1u, long>, int, int, int, poly_int<1u, long>)
	.../gcc/emit-rtl.c:2409
0x10ae2993 alter_subreg(rtx_def**, bool)
	.../gcc/final.c:3368
0x10ae25cf cleanup_subreg_operands(rtx_insn*)
	.../gcc/final.c:3322
0x110922a3 reload(rtx_insn*, int)
	.../gcc/reload1.c:1232
0x10de2bf7 do_reload
	.../gcc/ira.c:5812
0x10de3377 execute
	.../gcc/ira.c:5986

in a `vax-netbsdelf' build, where an attempt is made to change the mode
of the contained memory reference to the mode of the containing SUBREG.
Such RTL expressions are produced by the VAX shift and rotate patterns
(`ashift', `ashiftrt', `rotate', `rotatert') where the count operand
always has the QI mode regardless of the mode, either SI or DI, of the
datum shifted or rotated.

Such a mode change cannot work where the memory reference uses the
indexed addressing mode, where a multiplier is implied that in the VAX
ISA depends on the width of the memory access requested and therefore
changing the machine mode would change the address calculation as well.

Avoid the attempt then by forcing the reload of any SUBREGs containing
a mode-dependent memory reference, also fixing these regressions:

FAIL: gcc.c-torture/compile/pr46883.c   -Os  (internal compiler error)
FAIL: gcc.c-torture/compile/pr46883.c   -Os  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -g  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -g  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (test for excess errors)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler error)
FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
FAIL: gcc.dg/20050629-1.c (internal compiler error)
FAIL: gcc.dg/20050629-1.c (test for excess errors)
FAIL: c-c++-common/torture/pr53505.c   -Os  (internal compiler error)
FAIL: c-c++-common/torture/pr53505.c   -Os  (test for excess errors)
FAIL: gfortran.dg/coarray_failed_images_1.f08   -Os  (internal compiler error)
FAIL: gfortran.dg/coarray_stopped_images_1.f08   -Os  (internal compiler error)

With test case #0 included it causes a reload with:

(insn 15 14 16 4 (set (reg:SI 31)
        (ashift:SI (const_int 1 [0x1])
            (subreg:QI (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ]) 0))) "pr58901-0.c":15:12 94 {ashlsi3}
     (expr_list:REG_DEAD (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
        (nil)))

as follows:

Reloads for insn # 15
Reload 0: reload_in (SI) = (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
	ALL_REGS, RELOAD_FOR_INPUT (opnum = 2)
	reload_in_reg: (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
	reload_reg_rtx: (reg:SI 5 %r5)

resulting in:

(insn 37 14 15 4 (set (reg:SI 5 %r5)
        (mem/c:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 1 %r1 [orig:25 i ] [25])
                        (const_int 4 [0x4]))
                    (reg/v/f:SI 4 %r4 [orig:29 s ] [29]))
                (const_int 4 [0x4])) [1 MEM[(int *)s_8(D) + 4B + _5 * 4]+0 S4 A32])) "pr58901-0.c":15:12 12 {movsi_2}
     (nil))
(insn 15 37 16 4 (set (reg:SI 2 %r2 [31])
        (ashift:SI (const_int 1 [0x1])
            (reg:QI 5 %r5))) "pr58901-0.c":15:12 94 {ashlsi3}
     (nil))

and assembly like:

.L3:
	movl 4(%r4)[%r1],%r5
	ashl %r5,$1,%r2
	xorl2 %r2,%r0
	incl %r1
	cmpl %r1,%r3
	jneq .L3

produced for the loop, providing optimization has been enabled.  

Likewise with test case #1 the reload of:

(insn 17 16 18 4 (set (reg:SI 34)
        (and:SI (subreg:SI (reg/v:DI 27 [ t ]) 4)
            (const_int 1 [0x1]))) "pr58901-1.c":18:20 77 {*andsi_const_int}
     (expr_list:REG_DEAD (reg/v:DI 27 [ t ])
        (nil)))

is as follows:

Reloads for insn # 17
Reload 0: reload_in (DI) = (reg/v:DI 27 [ t ])
	reload_out (SI) = (reg:SI 2 %r2 [34])
	ALL_REGS, RELOAD_OTHER (opnum = 0)
	reload_in_reg: (reg/v:DI 27 [ t ])
	reload_out_reg: (reg:SI 2 %r2 [34])
	reload_reg_rtx: (reg:DI 4 %r4)

resulting in:

(insn 40 16 17 4 (set (reg:DI 4 %r4)
        (mem/c:DI (plus:SI (mult:SI (reg/v:SI 1 %r1 [orig:26 i ] [26])
                    (const_int 8 [0x8]))
                (reg/v/f:SI 3 %r3 [orig:30 s ] [30])) [1 MEM[(const struct s *)s_13(D) + _7 * 8]+0 S8 A32])) "pr58901-1.c":18:20 11 {movdi}
     (nil))
(insn 17 40 41 4 (set (reg:SI 4 %r4)
        (and:SI (reg:SI 5 %r5 [+4 ])
            (const_int 1 [0x1]))) "pr58901-1.c":18:20 77 {*andsi_const_int}
     (nil))

and assembly like:

.L3:
	movq (%r3)[%r1],%r4
	bicl3 $-2,%r5,%r4
	addl2 %r4,%r0
	jaoblss %r0,%r1,.L3

First posted at: <https://gcc.gnu.org/ml/gcc/2014-06/msg00060.html>.

2020-11-27  Matt Thomas  <matt@3am-software.com>
	    Maciej W. Rozycki  <macro@linux-mips.org>

	gcc/
	PR target/58901
	* reload.c (push_reload): Also reload the inner expression of a 
	SUBREG for pseudos associated with a mode-dependent memory 
	reference.
	(find_reloads): Force a reload likewise.

2020-11-27  Maciej W. Rozycki  <macro@linux-mips.org>

	gcc/testsuite/
	PR target/58901
	* gcc.c-torture/compile/pr58901-0.c: New test.
	* gcc.c-torture/compile/pr58901-1.c: New test.
---
 gcc/reload.c                                    |  104 +++++++++++++++---------
 gcc/testsuite/gcc.c-torture/compile/pr58901-0.c |   17 +++
 gcc/testsuite/gcc.c-torture/compile/pr58901-1.c |   21 ++++
 3 files changed, 106 insertions(+), 36 deletions(-)

Comments

Jeff Law Nov. 30, 2020, 6:51 p.m. UTC | #1
On 11/27/20 1:50 PM, Maciej W. Rozycki wrote:
> From: Matt Thomas <matt@3am-software.com>
>
> Fix an ICE with the handling of RTL expressions like:
>
> (subreg:QI (mem/c:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 0 %r0 [orig:67 i ] [67])
>                     (const_int 4 [0x4]))
>                 (reg/v/f:SI 7 %r7 [orig:59 doacross ] [59]))
>             (const_int 40 [0x28])) [1 MEM[(unsigned int *)doacross_63 + 40B + i_106 * 4]+0 S4 A32]) 0)
>
> that causes the compilation of libgomp to fail:
>
> during RTL pass: reload
> .../libgomp/ordered.c: In function 'GOMP_doacross_wait':
> .../libgomp/ordered.c:507:1: internal compiler error: in change_address_1, at emit-rtl.c:2275
>   507 | }
>       | ^
> 0x10a3462b change_address_1
> 	.../gcc/emit-rtl.c:2275
> 0x10a353a7 adjust_address_1(rtx_def*, machine_mode, poly_int<1u, long>, int, int, int, poly_int<1u, long>)
> 	.../gcc/emit-rtl.c:2409
> 0x10ae2993 alter_subreg(rtx_def**, bool)
> 	.../gcc/final.c:3368
> 0x10ae25cf cleanup_subreg_operands(rtx_insn*)
> 	.../gcc/final.c:3322
> 0x110922a3 reload(rtx_insn*, int)
> 	.../gcc/reload1.c:1232
> 0x10de2bf7 do_reload
> 	.../gcc/ira.c:5812
> 0x10de3377 execute
> 	.../gcc/ira.c:5986
>
> in a `vax-netbsdelf' build, where an attempt is made to change the mode
> of the contained memory reference to the mode of the containing SUBREG.
> Such RTL expressions are produced by the VAX shift and rotate patterns
> (`ashift', `ashiftrt', `rotate', `rotatert') where the count operand
> always has the QI mode regardless of the mode, either SI or DI, of the
> datum shifted or rotated.
>
> Such a mode change cannot work where the memory reference uses the
> indexed addressing mode, where a multiplier is implied that in the VAX
> ISA depends on the width of the memory access requested and therefore
> changing the machine mode would change the address calculation as well.
>
> Avoid the attempt then by forcing the reload of any SUBREGs containing
> a mode-dependent memory reference, also fixing these regressions:
>
> FAIL: gcc.c-torture/compile/pr46883.c   -Os  (internal compiler error)
> FAIL: gcc.c-torture/compile/pr46883.c   -Os  (test for excess errors)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O2  (internal compiler error)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O2  (test for excess errors)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -g  (internal compiler error)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O3 -g  (test for excess errors)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (internal compiler error)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (test for excess errors)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler error)
> FAIL: gcc.c-torture/execute/20120808-1.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
> FAIL: gcc.dg/20050629-1.c (internal compiler error)
> FAIL: gcc.dg/20050629-1.c (test for excess errors)
> FAIL: c-c++-common/torture/pr53505.c   -Os  (internal compiler error)
> FAIL: c-c++-common/torture/pr53505.c   -Os  (test for excess errors)
> FAIL: gfortran.dg/coarray_failed_images_1.f08   -Os  (internal compiler error)
> FAIL: gfortran.dg/coarray_stopped_images_1.f08   -Os  (internal compiler error)
>
> With test case #0 included it causes a reload with:
>
> (insn 15 14 16 4 (set (reg:SI 31)
>         (ashift:SI (const_int 1 [0x1])
>             (subreg:QI (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ]) 0))) "pr58901-0.c":15:12 94 {ashlsi3}
>      (expr_list:REG_DEAD (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
>         (nil)))
>
> as follows:
>
> Reloads for insn # 15
> Reload 0: reload_in (SI) = (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
> 	ALL_REGS, RELOAD_FOR_INPUT (opnum = 2)
> 	reload_in_reg: (reg:SI 30 [ MEM[(int *)s_8(D) + 4B + _5 * 4] ])
> 	reload_reg_rtx: (reg:SI 5 %r5)
>
> resulting in:
>
> (insn 37 14 15 4 (set (reg:SI 5 %r5)
>         (mem/c:SI (plus:SI (plus:SI (mult:SI (reg/v:SI 1 %r1 [orig:25 i ] [25])
>                         (const_int 4 [0x4]))
>                     (reg/v/f:SI 4 %r4 [orig:29 s ] [29]))
>                 (const_int 4 [0x4])) [1 MEM[(int *)s_8(D) + 4B + _5 * 4]+0 S4 A32])) "pr58901-0.c":15:12 12 {movsi_2}
>      (nil))
> (insn 15 37 16 4 (set (reg:SI 2 %r2 [31])
>         (ashift:SI (const_int 1 [0x1])
>             (reg:QI 5 %r5))) "pr58901-0.c":15:12 94 {ashlsi3}
>      (nil))
>
> and assembly like:
>
> .L3:
> 	movl 4(%r4)[%r1],%r5
> 	ashl %r5,$1,%r2
> 	xorl2 %r2,%r0
> 	incl %r1
> 	cmpl %r1,%r3
> 	jneq .L3
>
> produced for the loop, providing optimization has been enabled.  
>
> Likewise with test case #1 the reload of:
>
> (insn 17 16 18 4 (set (reg:SI 34)
>         (and:SI (subreg:SI (reg/v:DI 27 [ t ]) 4)
>             (const_int 1 [0x1]))) "pr58901-1.c":18:20 77 {*andsi_const_int}
>      (expr_list:REG_DEAD (reg/v:DI 27 [ t ])
>         (nil)))
>
> is as follows:
>
> Reloads for insn # 17
> Reload 0: reload_in (DI) = (reg/v:DI 27 [ t ])
> 	reload_out (SI) = (reg:SI 2 %r2 [34])
> 	ALL_REGS, RELOAD_OTHER (opnum = 0)
> 	reload_in_reg: (reg/v:DI 27 [ t ])
> 	reload_out_reg: (reg:SI 2 %r2 [34])
> 	reload_reg_rtx: (reg:DI 4 %r4)
>
> resulting in:
>
> (insn 40 16 17 4 (set (reg:DI 4 %r4)
>         (mem/c:DI (plus:SI (mult:SI (reg/v:SI 1 %r1 [orig:26 i ] [26])
>                     (const_int 8 [0x8]))
>                 (reg/v/f:SI 3 %r3 [orig:30 s ] [30])) [1 MEM[(const struct s *)s_13(D) + _7 * 8]+0 S8 A32])) "pr58901-1.c":18:20 11 {movdi}
>      (nil))
> (insn 17 40 41 4 (set (reg:SI 4 %r4)
>         (and:SI (reg:SI 5 %r5 [+4 ])
>             (const_int 1 [0x1]))) "pr58901-1.c":18:20 77 {*andsi_const_int}
>      (nil))
>
> and assembly like:
>
> .L3:
> 	movq (%r3)[%r1],%r4
> 	bicl3 $-2,%r5,%r4
> 	addl2 %r4,%r0
> 	jaoblss %r0,%r1,.L3
>
> First posted at: <https://gcc.gnu.org/ml/gcc/2014-06/msg00060.html>.
>
> 2020-11-27  Matt Thomas  <matt@3am-software.com>
> 	    Maciej W. Rozycki  <macro@linux-mips.org>
>
> 	gcc/
> 	PR target/58901
> 	* reload.c (push_reload): Also reload the inner expression of a 
> 	SUBREG for pseudos associated with a mode-dependent memory 
> 	reference.
> 	(find_reloads): Force a reload likewise.
>
> 2020-11-27  Maciej W. Rozycki  <macro@linux-mips.org>
>
> 	gcc/testsuite/
> 	PR target/58901
> 	* gcc.c-torture/compile/pr58901-0.c: New test.
> 	* gcc.c-torture/compile/pr58901-1.c: New test.
So one could make the argument that the (subreg (mem)) should never have
been generated in the first place.   I'm guessing they ultimately stem
from using QImode for the shift count.

The mode for the shift count has always been a bit controversial.  
We've got some ports that use QImode similar to the vax and others that
use the target's most natural mode, even if it's wider than what the
target really needs.  I can't offhand recall if there was a general
consensus on the best way for a target to handle this.

I'm certain there's other problems in the reloading of mode dependent
addresses.  See 66087 on the m68k.  This patch doesn't address that
issue (I was hoping it would, but such is life).  I'm not too inclined
to have folks chasing these issues though as reload (IMHO) should be on
the chopping block for gcc-12.

My overall thought WRT this patch is similar to Ulrich's.  Let's go with
it even though we know it's not necessarily 100% complete.  We can have
(subreg (mem)) coming into reload as say from 66087.  We may have other
ways where reload replaces a pseudo with a mem that aren't necessarily
caught here.

Jeff
diff mbox series

Patch

Index: gcc/gcc/reload.c
===================================================================
--- gcc.orig/gcc/reload.c
+++ gcc/gcc/reload.c
@@ -1043,53 +1043,72 @@  push_reload (rtx in, rtx out, rtx *inloc
      Also reload the inner expression if it does not require a secondary
      reload but the SUBREG does.
 
-     Finally, reload the inner expression if it is a register that is in
+     Also reload the inner expression if it is a register that is in
      the class whose registers cannot be referenced in a different size
      and M1 is not the same size as M2.  If subreg_lowpart_p is false, we
      cannot reload just the inside since we might end up with the wrong
      register class.  But if it is inside a STRICT_LOW_PART, we have
-     no choice, so we hope we do get the right register class there.  */
+     no choice, so we hope we do get the right register class there.
+
+     Finally, reload the inner expression if it is a pseudo that will
+     become a MEM and the MEM has a mode-dependent address, as in that
+     case we obviously cannot change the mode of the MEM to that of the
+     containing SUBREG as that would change the interpretation of the
+     address.  */
 
   scalar_int_mode inner_mode;
   if (in != 0 && GET_CODE (in) == SUBREG
-      && (subreg_lowpart_p (in) || strict_low)
       && targetm.can_change_mode_class (GET_MODE (SUBREG_REG (in)),
 					inmode, rclass)
       && contains_allocatable_reg_of_mode[rclass][GET_MODE (SUBREG_REG (in))]
-      && (CONSTANT_P (SUBREG_REG (in))
-	  || GET_CODE (SUBREG_REG (in)) == PLUS
-	  || strict_low
-	  || (((REG_P (SUBREG_REG (in))
-		&& REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER)
-	       || MEM_P (SUBREG_REG (in)))
-	      && (paradoxical_subreg_p (inmode, GET_MODE (SUBREG_REG (in)))
-		  || (known_le (GET_MODE_SIZE (inmode), UNITS_PER_WORD)
-		      && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (in)),
-						 &inner_mode)
-		      && GET_MODE_SIZE (inner_mode) <= UNITS_PER_WORD
-		      && paradoxical_subreg_p (inmode, inner_mode)
-		      && LOAD_EXTEND_OP (inner_mode) != UNKNOWN)
-		  || (WORD_REGISTER_OPERATIONS
-		      && partial_subreg_p (inmode, GET_MODE (SUBREG_REG (in)))
-		      && (known_equal_after_align_down
-			  (GET_MODE_SIZE (inmode) - 1,
-			   GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))) - 1,
-			   UNITS_PER_WORD)))))
-	  || (REG_P (SUBREG_REG (in))
-	      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
-	      /* The case where out is nonzero
-		 is handled differently in the following statement.  */
-	      && (out == 0 || subreg_lowpart_p (in))
-	      && (complex_word_subreg_p (inmode, SUBREG_REG (in))
-		  || !targetm.hard_regno_mode_ok (subreg_regno (in), inmode)))
-	  || (secondary_reload_class (1, rclass, inmode, in) != NO_REGS
-	      && (secondary_reload_class (1, rclass, GET_MODE (SUBREG_REG (in)),
-					  SUBREG_REG (in))
-		  == NO_REGS))
+      && (strict_low
+	  || (subreg_lowpart_p (in)
+	      && (CONSTANT_P (SUBREG_REG (in))
+		  || GET_CODE (SUBREG_REG (in)) == PLUS
+		  || (((REG_P (SUBREG_REG (in))
+			&& REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER)
+		       || MEM_P (SUBREG_REG (in)))
+		      && (paradoxical_subreg_p (inmode,
+						GET_MODE (SUBREG_REG (in)))
+			  || (known_le (GET_MODE_SIZE (inmode), UNITS_PER_WORD)
+			      && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG
+								   (in)),
+							 &inner_mode)
+			      && GET_MODE_SIZE (inner_mode) <= UNITS_PER_WORD
+			      && paradoxical_subreg_p (inmode, inner_mode)
+			      && LOAD_EXTEND_OP (inner_mode) != UNKNOWN)
+			  || (WORD_REGISTER_OPERATIONS
+			      && partial_subreg_p (inmode,
+						   GET_MODE (SUBREG_REG (in)))
+			      && (known_equal_after_align_down
+				  (GET_MODE_SIZE (inmode) - 1,
+				   GET_MODE_SIZE (GET_MODE (SUBREG_REG
+							    (in))) - 1,
+				   UNITS_PER_WORD)))))
+		  || (REG_P (SUBREG_REG (in))
+		      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
+		      /* The case where out is nonzero
+			 is handled differently in the following statement.  */
+		      && (out == 0 || subreg_lowpart_p (in))
+		      && (complex_word_subreg_p (inmode, SUBREG_REG (in))
+			  || !targetm.hard_regno_mode_ok (subreg_regno (in),
+							  inmode)))
+		  || (secondary_reload_class (1, rclass, inmode, in) != NO_REGS
+		      && (secondary_reload_class (1, rclass,
+						  GET_MODE (SUBREG_REG (in)),
+						  SUBREG_REG (in))
+			  == NO_REGS))
+		  || (REG_P (SUBREG_REG (in))
+		      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
+		      && !REG_CAN_CHANGE_MODE_P (REGNO (SUBREG_REG (in)),
+						 GET_MODE (SUBREG_REG (in)),
+						 inmode))))
 	  || (REG_P (SUBREG_REG (in))
-	      && REGNO (SUBREG_REG (in)) < FIRST_PSEUDO_REGISTER
-	      && !REG_CAN_CHANGE_MODE_P (REGNO (SUBREG_REG (in)),
-					 GET_MODE (SUBREG_REG (in)), inmode))))
+	      && REGNO (SUBREG_REG (in)) >= FIRST_PSEUDO_REGISTER
+	      && reg_equiv_mem (REGNO (SUBREG_REG (in)))
+	      && (mode_dependent_address_p
+		  (XEXP (reg_equiv_mem (REGNO (SUBREG_REG (in))), 0),
+		   MEM_ADDR_SPACE (reg_equiv_mem (REGNO (SUBREG_REG (in)))))))))
     {
 #ifdef LIMIT_RELOAD_CLASS
       in_subreg_loc = inloc;
@@ -3157,6 +3176,19 @@  find_reloads (rtx_insn *insn, int replac
 				  && paradoxical_subreg_p (operand_mode[i],
 							   inner_mode)
 				  && LOAD_EXTEND_OP (inner_mode) != UNKNOWN)))
+		      /* We must force a reload of a SUBREG's inner expression
+			 if it is a pseudo that will become a MEM and the MEM
+			 has a mode-dependent address, as in that case we
+			 obviously cannot change the mode of the MEM to that
+			 of the containing SUBREG as that would change the
+			 interpretation of the address.  */
+		      || (REG_P (operand)
+			  && REGNO (operand) >= FIRST_PSEUDO_REGISTER
+			  && reg_equiv_mem (REGNO (operand))
+			  && (mode_dependent_address_p
+			      (XEXP (reg_equiv_mem (REGNO (operand)), 0),
+			       (MEM_ADDR_SPACE
+				(reg_equiv_mem (REGNO (operand)))))))
 		      )
 		    force_reload = 1;
 		}
Index: gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-0.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-0.c
@@ -0,0 +1,17 @@ 
+typedef signed int __attribute__ ((mode (SI))) int_t;
+
+struct s
+{
+  int_t n;
+  int_t c[];
+};
+
+int_t
+ashlsi (int_t x, const struct s *s)
+{
+  int_t i;
+
+  for (i = 0; i < s->n; i++)
+    x ^= 1 << s->c[i];
+  return x;
+}
Index: gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-1.c
===================================================================
--- /dev/null
+++ gcc/gcc/testsuite/gcc.c-torture/compile/pr58901-1.c
@@ -0,0 +1,21 @@ 
+typedef int __attribute__ ((mode (SI))) int_t;
+
+struct s
+{
+  int_t n;
+  int_t m : 1;
+  int_t l : 31;
+};
+
+int_t
+movdi (int_t x, const struct s *s)
+{
+  int_t i;
+
+  for (i = 0; i < x; i++)
+    {
+      const struct s t = s[i];
+      x += t.m ? 1 : 0;
+    }
+  return x;
+}