Message ID | 20140115215503.GG892@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
On 15 January 2014 21:55, Jakub Jelinek <jakub@redhat.com> wrote: ... > The patch removes the likely_spilled_retval stuff Joern wrote a few years > ago because I believe this change should obsolete that. But, have tried to > reproduce the ICE using sh-elf target and haven't managed to do so, 4.9 is > quite a different compiler from 2005-ish gcc. You wouldn't see an ICE, but wrong-code. That being said, it sounds like your case 2) should handle this.
On Thu, Jan 16, 2014 at 03:41:57AM +0000, Joern Rennecke wrote: > On 15 January 2014 21:55, Jakub Jelinek <jakub@redhat.com> wrote: > ... > > The patch removes the likely_spilled_retval stuff Joern wrote a few years > > ago because I believe this change should obsolete that. But, have tried to > > reproduce the ICE using sh-elf target and haven't managed to do so, 4.9 is > > quite a different compiler from 2005-ish gcc. > > You wouldn't see an ICE, but wrong-code. I've mistyped it above, yeah, I was looking with your functions reverted and without the rest of my patch if combiner would even attempt to combine something into the second insn (loading fr1, after the fr0 set) and that didn't happen on the trunk. > That being said, it sounds like your case 2) should handle this. Anyway, for all the changed *.o files I've done: for i in `cat /tmp/M1`; do readelf -Ws $i | awk '($4=="FUNC" && $7!="UND"){print $8" "$3}' | sort > /tmp/1; readelf -Ws ../obj877/$i | awk '($4=="FUNC" && $7!="UND"){print $8" "$3}' | sort > /tmp/2; j=`cat /tmp/1 | wc -l`; k=`diff -up /tmp/1 /tmp/2 | grep -v '^+++' | grep '^+' | wc -l`; echo $i $k/$j; done to see how many functions in those files actually changed because of this patch and the numbers are below. The numbers are smaller though, because it only looks at function sizes, there could be code generation change even in function that has the same size (e.g. if later alignment makes the change not visible in the size). For x86_64, the sums are that 5384 functions out of the 43856 symbols (in these ~ 8% of all *.o files) changed function size. That still looks like a big change. I wonder if instead of not creating the LOG_LINKS we could just somehow taint them, add a flag to struct insn_link that we need to be careful about combining across that link, then in combine_instructions or the flags together from all the used links and if try_combine is called with this crosses_likely_spilled_setter_p flag, if recog accepts the result look at all constraints on all operands of the new insn and if for any constraint anywhere REG_CLASS_FROM_CONSTRAINT is likely spilled, punt. Thoughts on if this could be reliable? i686: gcc/builtins.o 0/201 gcc/gimple-fold.o 1/71 gcc/tree-sra.o 1/124 gcc/tree-ssa-structalias.o 0/164 gcc/tree-vect-stmts.o 0/128 gcc/tree-vrp.o 0/172 gcc/cp/error.o 0/61 i686-pc-linux-gnu/libcilkrts/.libs/cilk-abi-cilk-for.o 0/16 x86_64: gcc/builtins.o 0/195 gcc/cfgexpand.o 0/112 gcc/cfghooks.o 0/56 gcc/cgraphunit.o 1/30 gcc/df-core.o 1/82 gcc/final.o 2/89 gcc/fold-const.o 1/144 gcc/ggc-common.o 2/45 gcc/gimple-fold.o 0/70 gcc/gimple-pretty-print.o 1/74 gcc/godump.o 3/18 gcc/haifa-sched.o 3/162 gcc/i386.o 1/500 gcc/ipa-inline.o 0/46 gcc/opts-common.o 1/18 gcc/opts.o 1/17 gcc/passes.o 7/120 gcc/reg-stack.o 0/44 gcc/sched-rgn.o 1/86 gcc/sel-sched-ir.o 1/238 gcc/toplev.o 0/27 gcc/tree-eh.o 0/138 gcc/tree-inline.o 1/111 gcc/tree-sra.o 1/121 gcc/tree-ssa-live.o 0/45 gcc/tree-ssa-phiprop.o 0/24 gcc/tree-ssa-sccvn.o 0/119 gcc/tree-ssa-structalias.o 0/165 gcc/tree-vect-stmts.o 1/129 gcc/tree-vrp.o 1/176 gcc/varasm.o 2/196 gcc/ada/ali-util.o 1/111 gcc/ada/errout.o 1/86 gcc/ada/lib-xref.o 1/93 gcc/ada/urealp.o 2/88 gcc/ada/utils.o 0/141 gcc/build/read-rtl.o 2/23 gcc/c/c-decl.o 1/160 gcc/c/c-objc-common.o 1/8 gcc/c-family/c-opts.o 0/20 gcc/c-family/c-pch.o 1/8 gcc/cp/cp-objcp-common.o 1/15 gcc/cp/optimize.o 0/6 gcc/fortran/expr.o 1/72 gcc/fortran/intrinsic.o 0/44 gcc/fortran/scanner.o 1/47 gcc/go/expressions.o 81/765 gcc/go/gogo.o 14/351 gcc/go/gogo-tree.o 5/64 gcc/go/go.o 1/6 gcc/go/import-archive.o 1/24 gcc/go/import.o 1/47 gcc/go/parse.o 4/148 gcc/go/runtime.o 3/11 gcc/go/statements.o 21/402 gcc/go/types.o 31/566 gcc/go/unsafe.o 0/5 x86_64-unknown-linux-gnu/boehm-gc/.libs/finalize.o 2/23 x86_64-unknown-linux-gnu/boehm-gc/.libs/pthread_support.o 3/36 x86_64-unknown-linux-gnu/boehm-gc/.libs/ptr_chck.o 1/9 x86_64-unknown-linux-gnu/libcilkrts/.libs/cilk-abi-cilk-for.o 2/15 x86_64-unknown-linux-gnu/libcilkrts/.libs/cilk_fiber-unix.o 0/15 x86_64-unknown-linux-gnu/libgfortran/.libs/environ.o 2/18 x86_64-unknown-linux-gnu/libgfortran/.libs/file_pos.o 1/4 x86_64-unknown-linux-gnu/libgfortran/.libs/inquire.o 1/2 x86_64-unknown-linux-gnu/libgfortran/.libs/intrinsics.o 2/34 x86_64-unknown-linux-gnu/libgfortran/.libs/list_read.o 1/22 x86_64-unknown-linux-gnu/libgfortran/.libs/open.o 2/4 x86_64-unknown-linux-gnu/libgfortran/.libs/transfer.o 3/49 x86_64-unknown-linux-gnu/libgfortran/.libs/unit.o 1/19 x86_64-unknown-linux-gnu/libgfortran/.libs/unix.o 1/60 x86_64-unknown-linux-gnu/libgo/.libs/bufio.o 1/70 x86_64-unknown-linux-gnu/libgo/.libs/bytes.o 2/87 x86_64-unknown-linux-gnu/libgo/.libs/expvar.o 0/49 x86_64-unknown-linux-gnu/libgo/.libs/flag.o 4/94 x86_64-unknown-linux-gnu/libgo/.libs/fmt.o 8/159 x86_64-unknown-linux-gnu/libgo/.libs/image.o 2/132 x86_64-unknown-linux-gnu/libgo/.libs/io.o 2/44 x86_64-unknown-linux-gnu/libgo/.libs/malloc.o 0/19 x86_64-unknown-linux-gnu/libgo/.libs/net.o 32/533 x86_64-unknown-linux-gnu/libgo/.libs/os.o 4/135 x86_64-unknown-linux-gnu/libgo/.libs/proc.o 1/92 x86_64-unknown-linux-gnu/libgo/.libs/reflect-go.o 22/458 x86_64-unknown-linux-gnu/libgo/.libs/regexp.o 2/98 x86_64-unknown-linux-gnu/libgo/.libs/runtime-go.o 0/32 x86_64-unknown-linux-gnu/libgo/.libs/sort.o 3/42 x86_64-unknown-linux-gnu/libgo/.libs/strconv.o 1/74 x86_64-unknown-linux-gnu/libgo/.libs/strings.o 3/89 x86_64-unknown-linux-gnu/libgo/.libs/sync.o 1/23 x86_64-unknown-linux-gnu/libgo/.libs/testing.o 0/104 x86_64-unknown-linux-gnu/libgomp/.libs/task.o 0/12 x86_64-unknown-linux-gnu/libitm/.libs/beginend.o 1/21 x86_64-unknown-linux-gnu/libitm/.libs/method-serial.o 1/240 x86_64-unknown-linux-gnu/libitm/.libs/retry.o 3/7 x86_64-unknown-linux-gnu/libitm/.libs/useraction.o 1/5 x86_64-unknown-linux-gnu/libjava/.libs/defineclass.o 0/46 x86_64-unknown-linux-gnu/libjava/.libs/gnu-CORBA.o 492/1766 x86_64-unknown-linux-gnu/libjava/.libs/gnu-java-awt-dnd-peer-gtk.o 7/32 x86_64-unknown-linux-gnu/libjava/.libs/gnu-java-awt-peer-gtk.o 242/1138 x86_64-unknown-linux-gnu/libjava/.libs/gnu-java-awt-peer-swing.o 92/325 x86_64-unknown-linux-gnu/libjava/.libs/gnu-java-beans.o 92/474 x86_64-unknown-linux-gnu/libjava/.libs/gnu-java-lang-management.o 25/131 x86_64-unknown-linux-gnu/libjava/.libs/gnu-javax-management.o 20/64 x86_64-unknown-linux-gnu/libjava/.libs/gnu-javax-rmi.o 53/124 x86_64-unknown-linux-gnu/libjava/.libs/gnu-javax-sound-midi.o 18/207 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-aelfred2.o 40/229 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-dom.o 158/1328 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-libxmlj.o 60/468 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-pipeline.o 52/298 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-stream.o 127/559 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-transform.o 86/396 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-util.o 39/203 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-validation.o 67/504 x86_64-unknown-linux-gnu/libjava/.libs/gnu-xml-xpath.o 64/374 x86_64-unknown-linux-gnu/libjava/.libs/java-lang-management.o 12/68 x86_64-unknown-linux-gnu/libjava/.libs/javax-imageio.o 142/742 x86_64-unknown-linux-gnu/libjava/.libs/javax-rmi.o 4/49 x86_64-unknown-linux-gnu/libjava/.libs/javax-xml.o 32/350 x86_64-unknown-linux-gnu/libjava/.libs/jni.o 1/303 x86_64-unknown-linux-gnu/libjava/.libs/jvmti.o 8/68 x86_64-unknown-linux-gnu/libjava/.libs/link.o 8/39 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-CORBA_2_3.o 2/24 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-CORBA.o 122/898 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-CosNaming.o 34/323 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-DynamicAny.o 25/631 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-IOP.o 14/149 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-Messaging.o 2/7 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-PortableInterceptor.o 21/155 x86_64-unknown-linux-gnu/libjava/.libs/org-omg-PortableServer.o 31/239 x86_64-unknown-linux-gnu/libjava/.libs/org-relaxng.o 5/35 x86_64-unknown-linux-gnu/libjava/.libs/org-w3c.o 2/25 x86_64-unknown-linux-gnu/libjava/.libs/org-xml.o 37/296 x86_64-unknown-linux-gnu/libjava/.libs/posix-threads.o 2/21 x86_64-unknown-linux-gnu/libjava/.libs/prims.o 3/75 x86_64-unknown-linux-gnu/libjava/.libs/stacktrace.o 1/20 x86_64-unknown-linux-gnu/libjava/.libs/verify.o 1/36 x86_64-unknown-linux-gnu/32/libcilkrts/.libs/cilk-abi-cilk-for.o 1/16 x86_64-unknown-linux-gnu/libffi/src/.libs/closures.o 1/11 x86_64-unknown-linux-gnu/libgo/archive/.libs/tar.o 1/41 x86_64-unknown-linux-gnu/libgo/archive/.libs/zip.o 5/77 x86_64-unknown-linux-gnu/libgo/compress/.libs/flate.o 3/90 x86_64-unknown-linux-gnu/libgo/compress/.libs/gzip.o 5/16 x86_64-unknown-linux-gnu/libgo/compress/.libs/lzw.o 1/22 x86_64-unknown-linux-gnu/libgo/compress/.libs/zlib.o 4/15 x86_64-unknown-linux-gnu/libgo/container/.libs/heap.o 4/7 x86_64-unknown-linux-gnu/libgo/crypto/.libs/cipher.o 7/42 x86_64-unknown-linux-gnu/libgo/crypto/.libs/ecdsa.o 3/22 x86_64-unknown-linux-gnu/libgo/crypto/.libs/elliptic.o 0/67 x86_64-unknown-linux-gnu/libgo/crypto/.libs/hmac.o 3/8 x86_64-unknown-linux-gnu/libgo/crypto/.libs/rsa.o 4/23 x86_64-unknown-linux-gnu/libgo/crypto/.libs/tls.o 20/175 x86_64-unknown-linux-gnu/libgo/crypto/.libs/x509.o 1/63 x86_64-unknown-linux-gnu/libgo/database/.libs/sql.o 13/135 x86_64-unknown-linux-gnu/libgo/debug/.libs/dwarf.o 8/163 x86_64-unknown-linux-gnu/libgo/debug/.libs/elf.o 0/104 x86_64-unknown-linux-gnu/libgo/debug/.libs/macho.o 0/41 x86_64-unknown-linux-gnu/libgo/debug/.libs/pe.o 0/25 x86_64-unknown-linux-gnu/libgo/encoding/.libs/asn1.o 3/60 x86_64-unknown-linux-gnu/libgo/encoding/.libs/binary.o 2/51 x86_64-unknown-linux-gnu/libgo/encoding/.libs/gob.o 23/223 x86_64-unknown-linux-gnu/libgo/encoding/.libs/hex.o 1/15 x86_64-unknown-linux-gnu/libgo/encoding/.libs/json.o 10/178 x86_64-unknown-linux-gnu/libgo/encoding/.libs/pem.o 1/10 x86_64-unknown-linux-gnu/libgo/encoding/.libs/xml.o 10/112 x86_64-unknown-linux-gnu/libgo/exp/.libs/proxy.o 1/21 x86_64-unknown-linux-gnu/libgo/go/.libs/ast.o 33/301 x86_64-unknown-linux-gnu/libgo/go/.libs/build.o 3/40 x86_64-unknown-linux-gnu/libgo/go/.libs/doc.o 0/76 x86_64-unknown-linux-gnu/libgo/go/.libs/format.o 1/4 x86_64-unknown-linux-gnu/libgo/go/.libs/parser.o 4/172 x86_64-unknown-linux-gnu/libgo/go/.libs/printer.o 6/69 x86_64-unknown-linux-gnu/libgo/go/.libs/scanner.o 1/37 x86_64-unknown-linux-gnu/libgo/go/.libs/token.o 1/43 x86_64-unknown-linux-gnu/libgo/html/.libs/template.o 5/125 x86_64-unknown-linux-gnu/libgo/image/.libs/color.o 10/25 x86_64-unknown-linux-gnu/libgo/image/.libs/draw.o 2/12 x86_64-unknown-linux-gnu/libgo/image/.libs/gif.o 3/28 x86_64-unknown-linux-gnu/libgo/image/.libs/jpeg.o 2/42 x86_64-unknown-linux-gnu/libgo/image/.libs/png.o 4/28 x86_64-unknown-linux-gnu/libgo/log/.libs/syslog.o 2/25 x86_64-unknown-linux-gnu/libgo/math/.libs/big.o 2/138 x86_64-unknown-linux-gnu/libgo/math/.libs/cmplx.o 0/29 x86_64-unknown-linux-gnu/libgo/math/.libs/rand.o 2/43 x86_64-unknown-linux-gnu/libgo/net/.libs/http.o 25/377 x86_64-unknown-linux-gnu/libgo/net/.libs/rpc.o 13/92 x86_64-unknown-linux-gnu/libgo/net/.libs/smtp.o 2/34 x86_64-unknown-linux-gnu/libgo/net/.libs/textproto.o 1/72 x86_64-unknown-linux-gnu/libgo/net/.libs/url.o 1/37 x86_64-unknown-linux-gnu/libgo/old/.libs/regexp.o 1/99 x86_64-unknown-linux-gnu/libgo/old/.libs/template.o 4/53 x86_64-unknown-linux-gnu/libgo/os/.libs/exec.o 4/75 x86_64-unknown-linux-gnu/libgo/path/.libs/filepath.o 2/30 x86_64-unknown-linux-gnu/libgo/runtime/.libs/pprof.o 1/49 x86_64-unknown-linux-gnu/libgo/testing/.libs/quick.o 2/15 x86_64-unknown-linux-gnu/libgo/text/.libs/scanner.o 0/28 x86_64-unknown-linux-gnu/libgo/text/.libs/template.o 7/147 x86_64-unknown-linux-gnu/libjava/gnu/.libs/awt.o 1/4 x86_64-unknown-linux-gnu/libjava/gnu/.libs/classpath.o 3/34 x86_64-unknown-linux-gnu/libjava/java/.libs/applet.o 5/34 x86_64-unknown-linux-gnu/libjava/java/.libs/awt.o 392/2163 x86_64-unknown-linux-gnu/libjava/java/.libs/beans.o 26/259 x86_64-unknown-linux-gnu/libjava/java/.libs/io.o 100/893 x86_64-unknown-linux-gnu/libjava/java/.libs/lang.o 37/1127 x86_64-unknown-linux-gnu/libjava/java/.libs/math.o 27/195 x86_64-unknown-linux-gnu/libjava/java/.libs/net.o 111/557 x86_64-unknown-linux-gnu/libjava/java/.libs/nio.o 38/517 x86_64-unknown-linux-gnu/libjava/java/.libs/process-Posix.o 1/20 x86_64-unknown-linux-gnu/libjava/java/.libs/rmi.o 2/46 x86_64-unknown-linux-gnu/libjava/java/.libs/security.o 77/433 x86_64-unknown-linux-gnu/libjava/java/.libs/sql.o 7/83 x86_64-unknown-linux-gnu/libjava/java/.libs/text.o 96/485 x86_64-unknown-linux-gnu/libjava/java/.libs/util.o 236/2270 x86_64-unknown-linux-gnu/libjava/javax/.libs/accessibility.o 7/61 x86_64-unknown-linux-gnu/libjava/javax/.libs/activation.o 19/155 x86_64-unknown-linux-gnu/libjava/javax/.libs/crypto.o 27/179 x86_64-unknown-linux-gnu/libjava/javax/.libs/management.o 72/394 x86_64-unknown-linux-gnu/libjava/javax/.libs/naming.o 46/229 x86_64-unknown-linux-gnu/libjava/javax/.libs/print.o 10/53 x86_64-unknown-linux-gnu/libjava/javax/.libs/swing.o 1159/3899 x86_64-unknown-linux-gnu/libjava/libltdl/.libs/ltdl.o 9/56 x86_64-unknown-linux-gnu/libjava/sun/.libs/misc.o 1/7 x86_64-unknown-linux-gnu/libsanitizer/asan/.libs/asan_interceptors.o 0/1142 x86_64-unknown-linux-gnu/libsanitizer/sanitizer_common/.libs/sanitizer_thread_registry.o 0/35 x86_64-unknown-linux-gnu/libsanitizer/tsan/.libs/tsan_interceptors.o 0/1352 x86_64-unknown-linux-gnu/libstdc++-v3/src/.libs/compatibility.o 30/140 Jakub
On Wed, 15 Jan 2014, Jakub Jelinek wrote: > Hi! > > As discussed in the PR, when combine combines something across a setter > of a likely spilled non-fixed hard register, we may get RA ICE, as the > combined insn might need for reload a hard reg that is live at the point > where combiner has combined the insn. > > The following patch attempts to fix that by not creating LOG_LINKS in > certain cases. Err - your description makes it sound like a register allocator / reload / LRA issue but you are papering over it in combine? Vlad claims that reload or LRA cannot fix up here but surely splitting the live range of the live-crossing hardreg twice and spilling it must be possible, no? > There are two cases which the patch handles separately: > 1) CALL_INSN and preceeding load_register_parameters sequence > (from the call insn up to the earliest insn in the sequence that > sets some likely spilled non-fixed hard register that's PR59477, right? > 2) other setters of likely spilled non-fixed hard register > (loading of return value of a function into the return register(s), > explicit register vars, ...) > > 1) is handled specially, by disallowing LOG_LINKS from before the first > insn into the sequence, because in between that first insn and > the CALL_INSN a likely spilled hard register is live. LOG_LINKS from > before the sequence to after the CALL_INSN or from within the sequence > to after the call or from within the sequence to within the sequence > are allowed > 2) other setters are handled just like a basic block boundary for LOG_LINKS, > no LOG_LINKS are allowed to cross that boundary It seems this is very conservative - the time combine runs you only have pseudos in the insn that will eventually create the conflict. In fact, (define_insn "*ashl<mode>3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm") (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)" suggests that IRA can happily choose sth else than cx here even though Vlad claims "As r90 in inns 29 needs to be in cx and cx is already alive, neither reload nor LRA can do anything." Other passes may have exactly the same issue - if live-ranges cannot be split during IRA then two shift insns with overlapping shift amount life-ranges will cause exactly the same problem, no? Jakub says I don't think this is a RA bug, the problem is I think that combine changes: (insn 20 19 21 2 (set (reg:DI 2 cx) (const_int 0 [0])) pr59477.C:20 62 {*movdi_internal_rex64} (nil)) -(insn 21 20 22 2 (set (reg:SI 37 r8) - (subreg:SI (reg:DI 64) 0)) pr59477.C:20 64 {*movsi_internal} - (expr_list:REG_DEAD (reg:DI 64) - (nil))) +(insn 21 20 22 2 (parallel [ + (set (reg:DI 37 r8) + (ashift:DI (reg:DI 65) + (subreg:QI (reg:SI 63 [ v ]) 0))) + (clobber (reg:CC 17 flags)) + ]) pr59477.C:20 503 {*ashldi3_1} + (expr_list:REG_UNUSED (reg:CC 17 flags) + (expr_list:REG_DEAD (reg:SI 63 [ v ]) + (expr_list:REG_DEAD (reg:DI 65) + (nil))))) why is there a non-argument setup insn inbetween argument setup and the call? Having argument register constraints handled like ;; bar (_20, _20, _20, _20); (insn 21 20 22 (set (reg:SI 2 cx) (reg:SI 97 [ D.1780 ])) t.c:10 -1 (nil)) (insn 22 21 23 (set (reg:SI 1 dx) (reg:SI 97 [ D.1780 ])) t.c:10 -1 (nil)) (insn 23 22 24 (set (reg:SI 4 si) (reg:SI 97 [ D.1780 ])) t.c:10 -1 (nil)) (insn 24 23 25 (set (reg:SI 5 di) (reg:SI 97 [ D.1780 ])) t.c:10 -1 (nil)) (call_insn 25 24 0 (call (mem:QI (symbol_ref:DI ("bar") [flags 0x41] <function_decl 0x7fc7b0b52600 bar>) [0 bar S1 A8]) looks less than ideal - we get a live-range split which is probably good for RA, but ultimatively there shouldn't be separate insns involving hard regs before reload - if anything gets between those insns this limits RA / reload freedom. We should be able to tell the RA that the call needs its inputs in certain registers - just as we do with asm()s. So, looking at what combine does: Trying 18 -> 28: Successfully matched this instruction: (set (reg:SI 37 r8) (subreg:SI (reg:DI 89 [ D.2308 ]) 0)) that's (insn 18 17 20 2 (parallel [ (set (reg:DI 95) (ior:DI (reg:DI 93) (reg:DI 91 [ D.2309 ]))) (clobber (reg:CC 17 flags)) ]) t.c:4 421 {*iordi_1} (expr_list:REG_DEAD (reg:DI 93) (expr_list:REG_DEAD (reg:DI 91 [ D.2309 ]) (expr_list:REG_UNUSED (reg:CC 17 flags) (nil))))) (insn 28 27 29 2 (set (reg:SI 37 r8) (subreg:SI (reg:DI 95) 0)) t.c:20 90 {*movsi_internal} (expr_list:REG_DEAD (reg:DI 95) (nil))) Trying 11 -> 28: Successfully matched this instruction: (set (reg:DI 37 r8) (ashift:DI (reg:DI 90) (subreg:QI (reg:SI 88 [ v ]) 0))) and we're toast. I think the problem is still either a missed feature in LRA/reload (split live-ranges), a problem in how we represent calls & argument setup (magic hard-reg uses), or a backend problem (should spill itself if necessary, via a post-reload splitter or always spill and undo via a peephole2). Of course papering over in combine might be the best at this stage. So the above was just observations from the less experienced people in this area. Richard. > Bootstrapped/regtested on x86_64-linux and i686-linux. > > For i686-linux stage3 + target libraries, just 8 *.o files are affected by > the patch, for x86_64-linux approx. 8% of *.o files are affected > by the patch, though haven't checked in detail how much it affects each > file, so far only looked at x86_64-linux builtins.o, where the patch > pessimized the code in a single spot: > - movq (%rax,%rdx), %rdi > + addq %rax, %rdx > + movq (%rdx), %rdi > call _ZL12validate_argPK9tree_node9tree_code > which comes from insn 112 not being combined with 114, because both si > and di are on x86_64 likely spilled non-fixed hard registers and thus we > disallowed LOG_LINKS from before insn 113 into the 113..114 range, because > of the fear that the combined pattern might during reload need the hard > register that is live across it: > (insn 112 111 113 17 (parallel [ > (set (reg/f:DI 134) > (plus:DI (reg:DI 132) > (reg/v:DI 113 [ off ]))) > (clobber (reg:CC 17 flags)) > ]) ../../gcc/gimple.h:2066 266 {*adddi_1} > (expr_list:REG_DEAD (reg:DI 132) > (expr_list:REG_DEAD (reg/v:DI 113 [ off ]) > (expr_list:REG_UNUSED (reg:CC 17 flags) > (nil))))) > (insn 113 112 114 17 (set (reg:SI 4 si) > (reg:SI 94 [ D.97472 ])) ../../gcc/builtins.c:11417 90 {*movsi_internal} > (expr_list:REG_DEAD (reg:SI 94 [ D.97472 ]) > (nil))) > (insn 114 113 115 17 (set (reg:DI 5 di) > (mem/f:DI (reg/f:DI 134) [8 *_43+0 S8 A64])) ../../gcc/builtins.c:11417 89 {*movdi_internal} > (expr_list:REG_DEAD (reg/f:DI 134) > (nil))) > (call_insn/c/i 115 114 116 17 (set (reg:QI 0 ax) > (call (mem:QI (symbol_ref:DI ("_ZL12validate_argPK9tree_node9tree_code") [flags 0x3] <function_decl 0x7f4045caf200 validate_arg>) [0 validate_arg S1 A8]) > (const_int 0 [0]))) ../../gcc/builtins.c:11417 680 {*call_value} > (expr_list:REG_DEAD (reg:DI 5 di) > (expr_list:REG_DEAD (reg:SI 4 si) > (expr_list:REG_EH_REGION (const_int 0 [0]) > (nil)))) > (expr_list:REG_FRAME_RELATED_EXPR (use (reg:DI 5 di)) > (expr_list:REG_BR_PRED (use (reg:SI 4 si)) > (nil)))) > > Thoughts on this? I can surely investigate more tomorrow, look at more > random object files that differ and see how many changes it creates. > Unfortunately, because the patch works at LOG_LINKS creation time, it is > hard to gather statistics during compilation, because while lots of > LOG_LINKS will be affected, only much smaller actual combinations will not > be successfully performed because of the missing LOG_LINKS. > Typically the call argument load sequence contains just (set (reg:.. hard reg) (reg:.. pseudo)) > insns and we wouldn't combine into that anyway. > > The patch removes the likely_spilled_retval stuff Joern wrote a few years > ago because I believe this change should obsolete that. But, have tried to > reproduce the ICE using sh-elf target and haven't managed to do so, 4.9 is > quite a different compiler from 2005-ish gcc. > > 2014-01-15 Jakub Jelinek <jakub@redhat.com> > > PR rtl-optimization/59477 > * combine.c (create_log_links): For sequences of insns loading > hard register parameters for a call starting with load of some > non-fixed likely spilled hard register don't record LOG_LINKS > from before the first load to any insn in between the first > load and the call insn. For setters of non-fixed likely > spilled hard registers other than in calls and the above sequences > disallow recoding of any LOG_LINKS crossing such insns. > (struct likely_spilled_retval_info): Remove. > (likely_spilled_retval_1, likely_spilled_retval): Remove. > (try_combine): Don't call likely_spilled_retval. > > * g++.dg/torture/pr59477.C: New test. > > --- gcc/combine.c.jj 2014-01-15 08:11:22.636430002 +0100 > +++ gcc/combine.c 2014-01-15 15:45:01.588819713 +0100 > @@ -985,20 +985,54 @@ create_log_links (void) > basic_block bb; > rtx *next_use, insn; > df_ref *def_vec, *use_vec; > + int *undo_list; > > next_use = XCNEWVEC (rtx, max_reg_num ()); > + undo_list = XCNEWVEC (int, max_reg_num ()); > > /* Pass through each block from the end, recording the uses of each > register and establishing log links when def is encountered. > Note that we do not clear next_use array in order to save time, > so we have to test whether the use is in the same basic block as def. > > + Avoid creating log links across instructions that set hard registers > + that are likely to be spilled, otherwise reload might die if some > + instruction that might need some particular hard register is combined > + into the area where that hard register is already live. Usually > + likely spilled non-fixed hard registers should occur in the IL > + for function call parameter passing and call return value, return > + value of the whole function and then for explicit register variables. > + > + For calls expansion in calls.c usually emits instructions setting the > + hard registers immediately before the call, with simple rhs. For this > + case the code below attempts to identify the block of insns between the > + first insn loading parameter into a hard register where the hard register > + is likely spilled and last insn loading parameter into (any) hard > + register (call_block_start and call_block_end). We allow adding log links > + to instructions in that range only from instructions in that range, when > + in the backward walk we reach the call_block_start insn, we remove all > + next_use elements for insns in that block, using undo_list array > + and undo_list_head. That way it is e.g. possible to have log links > + from before a call argument setup sequence to insns after the call. > + In the call_block_start .. call_block_end range we don't set > + likely_spilled_setter_luid though. > + > + For all other setters of likely spilled non-fixed hard registers, we just > + disallow any log links across those insns, by setting > + likely_spilled_setter_luid and not creating any log links pointing to > + insns with higher luid than that. > + > There are a few cases below when we do not consider the definition or > usage -- these are taken from original flow.c did. Don't ask me why it is > done this way; I don't know and if it works, I don't want to know. */ > > FOR_EACH_BB_FN (bb, cfun) > { > + int likely_spilled_setter_luid = -1; > + int call_block_start = -1; > + int call_block_end = -1; > + int undo_list_head = 0; > + > FOR_BB_INSNS_REVERSE (bb, insn) > { > if (!NONDEBUG_INSN_P (insn)) > @@ -1007,12 +1041,86 @@ create_log_links (void) > /* Log links are created only once. */ > gcc_assert (!LOG_LINKS (insn)); > > + int luid = DF_INSN_LUID (insn); > + > + if (CALL_P (insn)) > + { > + rtx last_reg = NULL_RTX; > + gcc_assert (call_block_start == -1 && call_block_end == -1); > + for (rtx link = CALL_INSN_FUNCTION_USAGE (insn); > + link != NULL_RTX; > + link = XEXP (link, 1)) > + if (GET_CODE (XEXP (link, 0)) == USE > + && REG_P (XEXP (XEXP (link, 0), 0)) > + && HARD_REGISTER_P (XEXP (XEXP (link, 0), 0))) > + { > + rtx reg = XEXP (XEXP (link, 0), 0); > + int nregs = hard_regno_nregs[REGNO (reg)][GET_MODE (reg)]; > + while (nregs-- > 0) > + if (! TEST_HARD_REG_BIT (fixed_reg_set, > + REGNO (reg) + nregs) > + && targetm.class_likely_spilled_p > + (REGNO_REG_CLASS (REGNO (reg) + nregs))) > + break; > + if (nregs >= 0) > + last_reg = reg; > + } > + if (last_reg) > + { > + for (rtx prev = PREV_INSN (insn); > + prev; prev = PREV_INSN (prev)) > + { > + if (!NONDEBUG_INSN_P (prev)) > + continue; > + if (BLOCK_FOR_INSN (prev) != bb > + || !NONJUMP_INSN_P (prev)) > + break; > + rtx set = single_set (prev); > + if (! set) > + break; > + rtx dest = SET_DEST (set); > + if (GET_CODE (dest) == SUBREG) > + dest = SUBREG_REG (dest); > + if (!REG_P (dest) || !HARD_REGISTER_P (dest)) > + break; > + if (reg_overlap_mentioned_p (dest, last_reg)) > + { > + if (call_block_end != -1) > + call_block_start = DF_INSN_LUID (prev); > + break; > + } > + else if (call_block_end == -1) > + call_block_end = DF_INSN_LUID (prev); > + } > + if (call_block_start == -1) > + call_block_end = -1; > + } > + } > + > for (def_vec = DF_INSN_DEFS (insn); *def_vec; def_vec++) > { > df_ref def = *def_vec; > int regno = DF_REF_REGNO (def); > rtx use_insn; > > + if (HARD_REGISTER_NUM_P (regno) > + && !CALL_P (insn) > + && !(luid >= call_block_start && luid <= call_block_end) > + && !(DF_REF_FLAGS (def) > + & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER))) > + { > + enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (def)); > + int nregs = hard_regno_nregs[regno][mode]; > + while (nregs-- > 0) > + if (! TEST_HARD_REG_BIT (fixed_reg_set, regno + nregs) > + && targetm.class_likely_spilled_p > + (REGNO_REG_CLASS (regno + nregs))) > + { > + likely_spilled_setter_luid = luid; > + break; > + } > + } > + > if (!next_use[regno]) > continue; > > @@ -1034,7 +1142,10 @@ create_log_links (void) > continue; > > use_insn = next_use[regno]; > - if (BLOCK_FOR_INSN (use_insn) == bb) > + if (BLOCK_FOR_INSN (use_insn) == bb > + && (likely_spilled_setter_luid == -1 > + || (DF_INSN_LUID (use_insn) > + <= likely_spilled_setter_luid))) > { > /* flow.c claimed: > > @@ -1060,6 +1171,33 @@ create_log_links (void) > next_use[regno] = NULL_RTX; > } > > + if (luid == call_block_start) > + { > + /* At the start of the call argument block, remove > + all next_use array values pointing to instructions > + within the block. The undo_list array normally contains > + all zeros, non-zero values form a chain of what needs > + to be cleared in next_use array. Value of -1 in the array > + means tail of the chain, other non-zero values are next > + regno that need to be cleared plus 1 (as zero is a valid > + register number). */ > + if (undo_list_head) > + { > + int r = undo_list_head; > + do > + { > + int regno = r - 1; > + r = undo_list[regno]; > + undo_list[regno] = 0; > + next_use[regno] = NULL_RTX; > + } > + while (r != -1); > + undo_list_head = 0; > + } > + call_block_start = -1; > + call_block_end = -1; > + } > + > for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++) > { > df_ref use = *use_vec; > @@ -1070,11 +1208,23 @@ create_log_links (void) > if (DF_REF_FLAGS (use) & DF_REF_CALL_STACK_USAGE) > continue; > > + if (luid >= call_block_start > + && luid <= call_block_end > + && undo_list[regno] == 0) > + { > + if (undo_list_head == 0) > + undo_list[regno] = -1; > + else > + undo_list[regno] = undo_list_head; > + undo_list_head = regno + 1; > + } > + > next_use[regno] = insn; > } > } > } > > + free (undo_list); > free (next_use); > } > > @@ -2222,87 +2372,6 @@ cant_combine_insn_p (rtx insn) > return 0; > } > > -struct likely_spilled_retval_info > -{ > - unsigned regno, nregs; > - unsigned mask; > -}; > - > -/* Called via note_stores by likely_spilled_retval_p. Remove from info->mask > - hard registers that are known to be written to / clobbered in full. */ > -static void > -likely_spilled_retval_1 (rtx x, const_rtx set, void *data) > -{ > - struct likely_spilled_retval_info *const info = > - (struct likely_spilled_retval_info *) data; > - unsigned regno, nregs; > - unsigned new_mask; > - > - if (!REG_P (XEXP (set, 0))) > - return; > - regno = REGNO (x); > - if (regno >= info->regno + info->nregs) > - return; > - nregs = hard_regno_nregs[regno][GET_MODE (x)]; > - if (regno + nregs <= info->regno) > - return; > - new_mask = (2U << (nregs - 1)) - 1; > - if (regno < info->regno) > - new_mask >>= info->regno - regno; > - else > - new_mask <<= regno - info->regno; > - info->mask &= ~new_mask; > -} > - > -/* Return nonzero iff part of the return value is live during INSN, and > - it is likely spilled. This can happen when more than one insn is needed > - to copy the return value, e.g. when we consider to combine into the > - second copy insn for a complex value. */ > - > -static int > -likely_spilled_retval_p (rtx insn) > -{ > - rtx use = BB_END (this_basic_block); > - rtx reg, p; > - unsigned regno, nregs; > - /* We assume here that no machine mode needs more than > - 32 hard registers when the value overlaps with a register > - for which TARGET_FUNCTION_VALUE_REGNO_P is true. */ > - unsigned mask; > - struct likely_spilled_retval_info info; > - > - if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != USE || insn == use) > - return 0; > - reg = XEXP (PATTERN (use), 0); > - if (!REG_P (reg) || !targetm.calls.function_value_regno_p (REGNO (reg))) > - return 0; > - regno = REGNO (reg); > - nregs = hard_regno_nregs[regno][GET_MODE (reg)]; > - if (nregs == 1) > - return 0; > - mask = (2U << (nregs - 1)) - 1; > - > - /* Disregard parts of the return value that are set later. */ > - info.regno = regno; > - info.nregs = nregs; > - info.mask = mask; > - for (p = PREV_INSN (use); info.mask && p != insn; p = PREV_INSN (p)) > - if (INSN_P (p)) > - note_stores (PATTERN (p), likely_spilled_retval_1, &info); > - mask = info.mask; > - > - /* Check if any of the (probably) live return value registers is > - likely spilled. */ > - nregs --; > - do > - { > - if ((mask & 1 << nregs) > - && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno + nregs))) > - return 1; > - } while (nregs--); > - return 0; > -} > - > /* Adjust INSN after we made a change to its destination. > > Changing the destination can invalidate notes that say something about > @@ -2512,8 +2581,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx > if (cant_combine_insn_p (i3) > || cant_combine_insn_p (i2) > || (i1 && cant_combine_insn_p (i1)) > - || (i0 && cant_combine_insn_p (i0)) > - || likely_spilled_retval_p (i3)) > + || (i0 && cant_combine_insn_p (i0))) > return 0; > > combine_attempts++; > --- gcc/testsuite/g++.dg/torture/pr59477.C.jj 2014-01-15 09:38:18.348864654 +0100 > +++ gcc/testsuite/g++.dg/torture/pr59477.C 2014-01-15 09:38:18.348864654 +0100 > @@ -0,0 +1,25 @@ > +// PR rtl-optimization/59477 > +// { dg-do compile } > + > +struct A > +{ > + unsigned *a; > + unsigned long b; > + A (unsigned long x) : a (), b (x) {} > +}; > + > +struct B > +{ > + B (int); > + B (const B &) {} > +}; > + > +B bar (B, B, A); > +int v; > + > +void > +foo () > +{ > + B c = 0; > + bar (c, c, A (1ULL << v)); > +} > > Jakub > >
On 16 January 2014 08:26, Jakub Jelinek <jakub@redhat.com> wrote: > I wonder if instead of not creating the LOG_LINKS we could just somehow taint > them, add a flag to struct insn_link that we need to be careful about > combining across that link, then in combine_instructions or the flags together > from all the used links and if try_combine is called with this > crosses_likely_spilled_setter_p > flag, if recog accepts the result look at all constraints on all operands of the > new insn and if for any constraint anywhere REG_CLASS_FROM_CONSTRAINT is likely > spilled, punt. Thoughts on if this could be reliable? You would miss spills from secondary/tertiary reloads. OTOH, as Richard already said, what you do check is at the same time paranoidly conservative. Since the failure mode here is an ICE, I think it would be appropriate to use a target hook to look at the live likely-spilled register and the combined insn, and tell if the combination is safe - and set the default not to worry.
> I think the problem is still either a missed feature in LRA/reload > (split live-ranges), a problem in how we represent calls & argument > setup (magic hard-reg uses), or a backend problem (should spill > itself if necessary, via a post-reload splitter or always spill > and undo via a peephole2). > > Of course papering over in combine might be the best at this > stage. So the above was just observations from the less experienced > people in this area. Yes, this is ultimately a RA issue, but an ancient and hard one as far as I understand so papering over it makes sense I think. That being said, while Joern's machinery was IMO acceptable because relatively simple and localized, Jakub's looks more complicated and far-reaching (that's why I suggested to try to extend the existing machinery if possible) so I think that we ought to try something simpler first.
On 1/20/2014, 4:12 AM, Eric Botcazou wrote: >> I think the problem is still either a missed feature in LRA/reload >> (split live-ranges), a problem in how we represent calls & argument >> setup (magic hard-reg uses), or a backend problem (should spill >> itself if necessary, via a post-reload splitter or always spill >> and undo via a peephole2). >> >> Of course papering over in combine might be the best at this >> stage. So the above was just observations from the less experienced >> people in this area. > > Yes, this is ultimately a RA issue, but an ancient and hard one as far as I > understand so papering over it makes sense I think. That being said, while > Joern's machinery was IMO acceptable because relatively simple and localized, > Jakub's looks more complicated and far-reaching (that's why I suggested to try > to extend the existing machinery if possible) so I think that we ought to try > something simpler first. > I've just started to work on fixing this in LRA. But I am not going to work on fixing this in reload. It would be just wasting enormous amount of time.
> I've just started to work on fixing this in LRA. But I am not going to > work on fixing this in reload. It would be just wasting enormous amount > of time. Yes, fixing it in LRA only would be sufficient for PR rtl-optimization/59477, since it's an x86-specific issue. Other architectures aren't as sensitive as x86 in this area, and the existing counter-measures in combine.c are usually sufficient for them.
--- gcc/combine.c.jj 2014-01-15 08:11:22.636430002 +0100 +++ gcc/combine.c 2014-01-15 15:45:01.588819713 +0100 @@ -985,20 +985,54 @@ create_log_links (void) basic_block bb; rtx *next_use, insn; df_ref *def_vec, *use_vec; + int *undo_list; next_use = XCNEWVEC (rtx, max_reg_num ()); + undo_list = XCNEWVEC (int, max_reg_num ()); /* Pass through each block from the end, recording the uses of each register and establishing log links when def is encountered. Note that we do not clear next_use array in order to save time, so we have to test whether the use is in the same basic block as def. + Avoid creating log links across instructions that set hard registers + that are likely to be spilled, otherwise reload might die if some + instruction that might need some particular hard register is combined + into the area where that hard register is already live. Usually + likely spilled non-fixed hard registers should occur in the IL + for function call parameter passing and call return value, return + value of the whole function and then for explicit register variables. + + For calls expansion in calls.c usually emits instructions setting the + hard registers immediately before the call, with simple rhs. For this + case the code below attempts to identify the block of insns between the + first insn loading parameter into a hard register where the hard register + is likely spilled and last insn loading parameter into (any) hard + register (call_block_start and call_block_end). We allow adding log links + to instructions in that range only from instructions in that range, when + in the backward walk we reach the call_block_start insn, we remove all + next_use elements for insns in that block, using undo_list array + and undo_list_head. That way it is e.g. possible to have log links + from before a call argument setup sequence to insns after the call. + In the call_block_start .. call_block_end range we don't set + likely_spilled_setter_luid though. + + For all other setters of likely spilled non-fixed hard registers, we just + disallow any log links across those insns, by setting + likely_spilled_setter_luid and not creating any log links pointing to + insns with higher luid than that. + There are a few cases below when we do not consider the definition or usage -- these are taken from original flow.c did. Don't ask me why it is done this way; I don't know and if it works, I don't want to know. */ FOR_EACH_BB_FN (bb, cfun) { + int likely_spilled_setter_luid = -1; + int call_block_start = -1; + int call_block_end = -1; + int undo_list_head = 0; + FOR_BB_INSNS_REVERSE (bb, insn) { if (!NONDEBUG_INSN_P (insn)) @@ -1007,12 +1041,86 @@ create_log_links (void) /* Log links are created only once. */ gcc_assert (!LOG_LINKS (insn)); + int luid = DF_INSN_LUID (insn); + + if (CALL_P (insn)) + { + rtx last_reg = NULL_RTX; + gcc_assert (call_block_start == -1 && call_block_end == -1); + for (rtx link = CALL_INSN_FUNCTION_USAGE (insn); + link != NULL_RTX; + link = XEXP (link, 1)) + if (GET_CODE (XEXP (link, 0)) == USE + && REG_P (XEXP (XEXP (link, 0), 0)) + && HARD_REGISTER_P (XEXP (XEXP (link, 0), 0))) + { + rtx reg = XEXP (XEXP (link, 0), 0); + int nregs = hard_regno_nregs[REGNO (reg)][GET_MODE (reg)]; + while (nregs-- > 0) + if (! TEST_HARD_REG_BIT (fixed_reg_set, + REGNO (reg) + nregs) + && targetm.class_likely_spilled_p + (REGNO_REG_CLASS (REGNO (reg) + nregs))) + break; + if (nregs >= 0) + last_reg = reg; + } + if (last_reg) + { + for (rtx prev = PREV_INSN (insn); + prev; prev = PREV_INSN (prev)) + { + if (!NONDEBUG_INSN_P (prev)) + continue; + if (BLOCK_FOR_INSN (prev) != bb + || !NONJUMP_INSN_P (prev)) + break; + rtx set = single_set (prev); + if (! set) + break; + rtx dest = SET_DEST (set); + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + if (!REG_P (dest) || !HARD_REGISTER_P (dest)) + break; + if (reg_overlap_mentioned_p (dest, last_reg)) + { + if (call_block_end != -1) + call_block_start = DF_INSN_LUID (prev); + break; + } + else if (call_block_end == -1) + call_block_end = DF_INSN_LUID (prev); + } + if (call_block_start == -1) + call_block_end = -1; + } + } + for (def_vec = DF_INSN_DEFS (insn); *def_vec; def_vec++) { df_ref def = *def_vec; int regno = DF_REF_REGNO (def); rtx use_insn; + if (HARD_REGISTER_NUM_P (regno) + && !CALL_P (insn) + && !(luid >= call_block_start && luid <= call_block_end) + && !(DF_REF_FLAGS (def) + & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER))) + { + enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (def)); + int nregs = hard_regno_nregs[regno][mode]; + while (nregs-- > 0) + if (! TEST_HARD_REG_BIT (fixed_reg_set, regno + nregs) + && targetm.class_likely_spilled_p + (REGNO_REG_CLASS (regno + nregs))) + { + likely_spilled_setter_luid = luid; + break; + } + } + if (!next_use[regno]) continue; @@ -1034,7 +1142,10 @@ create_log_links (void) continue; use_insn = next_use[regno]; - if (BLOCK_FOR_INSN (use_insn) == bb) + if (BLOCK_FOR_INSN (use_insn) == bb + && (likely_spilled_setter_luid == -1 + || (DF_INSN_LUID (use_insn) + <= likely_spilled_setter_luid))) { /* flow.c claimed: @@ -1060,6 +1171,33 @@ create_log_links (void) next_use[regno] = NULL_RTX; } + if (luid == call_block_start) + { + /* At the start of the call argument block, remove + all next_use array values pointing to instructions + within the block. The undo_list array normally contains + all zeros, non-zero values form a chain of what needs + to be cleared in next_use array. Value of -1 in the array + means tail of the chain, other non-zero values are next + regno that need to be cleared plus 1 (as zero is a valid + register number). */ + if (undo_list_head) + { + int r = undo_list_head; + do + { + int regno = r - 1; + r = undo_list[regno]; + undo_list[regno] = 0; + next_use[regno] = NULL_RTX; + } + while (r != -1); + undo_list_head = 0; + } + call_block_start = -1; + call_block_end = -1; + } + for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++) { df_ref use = *use_vec; @@ -1070,11 +1208,23 @@ create_log_links (void) if (DF_REF_FLAGS (use) & DF_REF_CALL_STACK_USAGE) continue; + if (luid >= call_block_start + && luid <= call_block_end + && undo_list[regno] == 0) + { + if (undo_list_head == 0) + undo_list[regno] = -1; + else + undo_list[regno] = undo_list_head; + undo_list_head = regno + 1; + } + next_use[regno] = insn; } } } + free (undo_list); free (next_use); } @@ -2222,87 +2372,6 @@ cant_combine_insn_p (rtx insn) return 0; } -struct likely_spilled_retval_info -{ - unsigned regno, nregs; - unsigned mask; -}; - -/* Called via note_stores by likely_spilled_retval_p. Remove from info->mask - hard registers that are known to be written to / clobbered in full. */ -static void -likely_spilled_retval_1 (rtx x, const_rtx set, void *data) -{ - struct likely_spilled_retval_info *const info = - (struct likely_spilled_retval_info *) data; - unsigned regno, nregs; - unsigned new_mask; - - if (!REG_P (XEXP (set, 0))) - return; - regno = REGNO (x); - if (regno >= info->regno + info->nregs) - return; - nregs = hard_regno_nregs[regno][GET_MODE (x)]; - if (regno + nregs <= info->regno) - return; - new_mask = (2U << (nregs - 1)) - 1; - if (regno < info->regno) - new_mask >>= info->regno - regno; - else - new_mask <<= regno - info->regno; - info->mask &= ~new_mask; -} - -/* Return nonzero iff part of the return value is live during INSN, and - it is likely spilled. This can happen when more than one insn is needed - to copy the return value, e.g. when we consider to combine into the - second copy insn for a complex value. */ - -static int -likely_spilled_retval_p (rtx insn) -{ - rtx use = BB_END (this_basic_block); - rtx reg, p; - unsigned regno, nregs; - /* We assume here that no machine mode needs more than - 32 hard registers when the value overlaps with a register - for which TARGET_FUNCTION_VALUE_REGNO_P is true. */ - unsigned mask; - struct likely_spilled_retval_info info; - - if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != USE || insn == use) - return 0; - reg = XEXP (PATTERN (use), 0); - if (!REG_P (reg) || !targetm.calls.function_value_regno_p (REGNO (reg))) - return 0; - regno = REGNO (reg); - nregs = hard_regno_nregs[regno][GET_MODE (reg)]; - if (nregs == 1) - return 0; - mask = (2U << (nregs - 1)) - 1; - - /* Disregard parts of the return value that are set later. */ - info.regno = regno; - info.nregs = nregs; - info.mask = mask; - for (p = PREV_INSN (use); info.mask && p != insn; p = PREV_INSN (p)) - if (INSN_P (p)) - note_stores (PATTERN (p), likely_spilled_retval_1, &info); - mask = info.mask; - - /* Check if any of the (probably) live return value registers is - likely spilled. */ - nregs --; - do - { - if ((mask & 1 << nregs) - && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno + nregs))) - return 1; - } while (nregs--); - return 0; -} - /* Adjust INSN after we made a change to its destination. Changing the destination can invalidate notes that say something about @@ -2512,8 +2581,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx if (cant_combine_insn_p (i3) || cant_combine_insn_p (i2) || (i1 && cant_combine_insn_p (i1)) - || (i0 && cant_combine_insn_p (i0)) - || likely_spilled_retval_p (i3)) + || (i0 && cant_combine_insn_p (i0))) return 0; combine_attempts++; --- gcc/testsuite/g++.dg/torture/pr59477.C.jj 2014-01-15 09:38:18.348864654 +0100 +++ gcc/testsuite/g++.dg/torture/pr59477.C 2014-01-15 09:38:18.348864654 +0100 @@ -0,0 +1,25 @@ +// PR rtl-optimization/59477 +// { dg-do compile } + +struct A +{ + unsigned *a; + unsigned long b; + A (unsigned long x) : a (), b (x) {} +}; + +struct B +{ + B (int); + B (const B &) {} +}; + +B bar (B, B, A); +int v; + +void +foo () +{ + B c = 0; + bar (c, c, A (1ULL << v)); +}