===================================================================
@@ -996,7 +996,8 @@ (define_expand "doloop_end"
(use (match_operand 1 "" "")) ; iterations; zero if unknown
(use (match_operand 2 "" "")) ; max iterations
(use (match_operand 3 "" "")) ; loop level
- (use (match_operand 4 "" ""))] ; label
+ (use (match_operand 4 "" "")) ; label
+ (use (match_operand 5 "" ""))] ; flag: 1 if loop entered at top, else 0
"TARGET_32BIT"
"
{
===================================================================
@@ -1933,6 +1933,7 @@ (define_insn "*tablejump_internal"
; operand 2 is the maximum number of loop iterations
; operand 3 is the number of levels of enclosed loops
; operand 4 is the label to jump to at the top of the loop
+; operand 5 indicates if the loop is entered at the top
(define_expand "doloop_end"
[(parallel [(set (pc) (if_then_else
(ne (match_operand:SI 0 "" "")
@@ -1943,7 +1944,7 @@ (define_expand "doloop_end"
(plus:SI (match_dup 0)
(const_int -1)))
(unspec [(const_int 0)] UNSPEC_LSETUP_END)
- (clobber (match_scratch:SI 5 ""))])]
+ (clobber (match_operand 5 ""))])] ; match_scratch
""
{
/* The loop optimizer doesn't check the predicates... */
@@ -1956,6 +1957,7 @@ (define_expand "doloop_end"
&& (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0xFFFFFFFF)
FAIL;
bfin_hardware_loop ();
+ operands[5] = gen_rtx_SCRATCH (SImode);
})
(define_insn "loop_end"
===================================================================
@@ -1425,6 +1425,7 @@ (define_insn_and_split "eh_return"
; operand 2 is the maximum number of loop iterations
; operand 3 is the number of levels of enclosed loops
; operand 4 is the label to jump to at the top of the loop
+; operand 5 indicates if the loop is entered at the top
(define_expand "doloop_end"
[(parallel [(set (pc) (if_then_else
(ne (match_operand:SI 0 "" "")
@@ -1434,12 +1435,13 @@ (define_expand "doloop_end"
(set (match_dup 0)
(plus:SI (match_dup 0)
(const_int -1)))
- (clobber (match_scratch:SI 5 ""))])]
+ (clobber (match_operand 5 ""))])] ; match_scratch
"TARGET_INSNS_64PLUS && optimize"
{
/* The loop optimizer doesn't check the predicates... */
if (GET_MODE (operands[0]) != SImode)
FAIL;
+ operands[5] = gen_rtx_SCRATCH (SImode);
})
(define_insn "mvilc"
===================================================================
@@ -3960,7 +3960,8 @@ (define_expand "doloop_end"
(use (match_operand 1 "" "")) ; iterations; zero if unknown
(use (match_operand 2 "" "")) ; max iterations
(use (match_operand 3 "" "")) ; loop level
- (use (match_operand 4 "" ""))] ; label
+ (use (match_operand 4 "" "")) ; label
+ (use (match_operand 5 "" ""))] ; flag: 1 if loop entered at top, else 0
""
{
/* Only use cloop on innermost loops. */
===================================================================
@@ -2079,7 +2079,8 @@ (define_expand "doloop_begin"
[(use (match_operand 0 "register_operand" ""))
(use (match_operand:QI 1 "const_int_operand" ""))
(use (match_operand:QI 2 "const_int_operand" ""))
- (use (match_operand:QI 3 "const_int_operand" ""))]
+ (use (match_operand:QI 3 "const_int_operand" ""))
+ (use (match_operand 4 "" ""))]
"!profile_arc_flag && TARGET_OPT_REPEAT"
"if (INTVAL (operands[3]) > 1)
FAIL;
@@ -2115,7 +2116,8 @@ (define_expand "doloop_end"
(use (match_operand:QI 1 "const_int_operand" ""))
(use (match_operand:QI 2 "const_int_operand" ""))
(use (match_operand:QI 3 "const_int_operand" ""))
- (use (label_ref (match_operand 4 "" "")))]
+ (use (label_ref (match_operand 4 "" "")))
+ (use (match_operand 5 "" ""))]
"!profile_arc_flag && TARGET_OPT_REPEAT"
"if (INTVAL (operands[3]) > 1)
FAIL;
===================================================================
@@ -13158,7 +13158,8 @@ (define_expand "doloop_end"
(use (match_operand 1 "" "")) ; iterations; zero if unknown
(use (match_operand 2 "" "")) ; max iterations
(use (match_operand 3 "" "")) ; loop level
- (use (match_operand 4 "" ""))] ; label
+ (use (match_operand 4 "" "")) ; label
+ (use (match_operand 5 "" ""))] ; flag: 1 if loop entered at top, else 0
""
"
{
===================================================================
@@ -8093,7 +8093,8 @@ (define_expand "doloop_end"
(use (match_operand 1 "" "")) ; iterations; zero if unknown
(use (match_operand 2 "" "")) ; max iterations
(use (match_operand 3 "" "")) ; loop level
- (use (match_operand 4 "" ""))] ; label
+ (use (match_operand 4 "" "")) ; label
+ (use (match_operand 5 "" ""))] ; flag: 1 if loop entered at top, else 0
""
{
if (GET_MODE (operands[0]) == SImode && !TARGET_CPU_ZARCH)
===================================================================
@@ -8223,11 +8223,14 @@ (define_expand "doloop_end"
(pc)))
(set (match_dup 0)
(plus:SI (match_dup 0) (const_int -1)))
- (clobber (reg:SI T_REG))])]
+ (clobber (reg:SI T_REG))])
+ (match_operand 5 "" "")]
"TARGET_SH2"
{
if (GET_MODE (operands[0]) != SImode)
FAIL;
+ emit_insn (gen_doloop_end_split (operands[0], operands[4], operands[0]));
+ DONE;
})
(define_insn_and_split "doloop_end_split"
===================================================================
@@ -4490,7 +4490,8 @@ (define_insn "dsync"
(use (match_operand 1 "" "")) ; iterations; zero if unknown
(use (match_operand 2 "" "")) ; max iterations
(use (match_operand 3 "" "")) ; loop level
- (use (match_operand 4 "" ""))] ; label
+ (use (match_operand 4 "" "")) ; label
+ (match_operand 5 "" "")]
""
"
{
===================================================================
@@ -2316,7 +2316,8 @@ (define_expand "doloop_end"
(use (match_operand 1 "" "")) ;; iterations; zero if unknown
(use (match_operand 2 "" "")) ;; max iterations
(use (match_operand 3 "" "")) ;; loop level
- (use (match_operand 4 "" ""))] ;; label
+ (use (match_operand 4 "" "")) ;; label
+ (use (match_operand 5 "" ""))] ;; flag: 1 if loop entered at top, else 0
""
{
if (optimize > 0 && flag_modulo_sched)
===================================================================
@@ -1322,7 +1322,8 @@ (define_expand "doloop_end"
(use (match_operand 1 "" "")) ;; iterations; zero if unknown
(use (match_operand 2 "" "")) ;; max iterations
(use (match_operand 3 "" "")) ;; loop level
- (use (match_operand 4 "" ""))] ;; label
+ (use (match_operand 4 "" "")) ;; label
+ (use (match_operand 5 "" ""))] ;; flag: 1 if loop entered at top, else 0
""
{
if (optimize > 0)
===================================================================
@@ -5501,7 +5501,9 @@ minus the smallest one (both inclusive).
determined until run-time; operand 2 is the actual or estimated maximum
number of iterations as a @code{const_int}; operand 3 is the number of
enclosed loops as a @code{const_int} (an innermost loop has a value of
-1); operand 4 is the label to jump to if the register is nonzero.
+1); operand 4 is the label to jump to if the register is nonzero;
+operand 5 is const1_rtx if the loop in entered at its top, const0_rtx
+otherwise.
@xref{Looping Patterns}.
This optional instruction pattern should be defined for machines with
===================================================================
@@ -551,7 +551,8 @@ doloop_modify (struct loop *loop, struct
init = gen_doloop_begin (counter_reg,
desc->const_iter ? desc->niter_expr : const0_rtx,
GEN_INT (desc->niter_max),
- GEN_INT (level));
+ GEN_INT (level),
+ doloop_seq);
if (init)
{
start_sequence ();
@@ -608,6 +609,7 @@ doloop_optimize (struct loop *loop)
struct niter_desc *desc;
unsigned word_mode_size;
unsigned HOST_WIDE_INT word_mode_max;
+ int entered_at_top;
if (dump_file)
fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
@@ -666,8 +668,10 @@ doloop_optimize (struct loop *loop)
not like. */
start_label = block_label (desc->in_edge->dest);
doloop_reg = gen_reg_rtx (mode);
+ entered_at_top = loop_preheader_edge (loop)->dest == desc->in_edge->dest;
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
- GEN_INT (level), start_label);
+ GEN_INT (level), start_label,
+ GEN_INT (entered_at_top));
word_mode_size = GET_MODE_PRECISION (word_mode);
word_mode_max
@@ -697,7 +701,8 @@ doloop_optimize (struct loop *loop)
}
PUT_MODE (doloop_reg, word_mode);
doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
- GEN_INT (level), start_label);
+ GEN_INT (level), start_label,
+ GEN_INT (entered_at_top));
}
if (! doloop_seq)
{
ARCompact is one of the architectures that have zero-overhead loops that are initiated with an instruction at the loop top. There is a way to set up loops before jumping into their middle, by poking values into control registers, but that method is more costly and thus only pays off with a higher minimum iteration count. Thus, the iteration count to doloop_end isn't all that helpful without an indication if the loop is entered at its top. Also, loops that are well-formed ones entered at the top can at rtl expansion time can get mangled by the rtl optimizers, and/or their doloop_begin pattern moved away so that it no longer matches the loop. In order to give the port a chance at machine_dependent_reorg / instruction output time to verify if there are matching patterns present, it first has to be able to record which doloop_begin and doloop_end statements belong together. The patch attached below as doloop-patch-2-2 addresses these two issue by adding an operand to doloop_end to indicate if the loop is entered at its top, and one to doloop_begin which is the doloop_end instruction. I have tested this patch with a variant of contrib/config-list.mk that I trimmed to test configurations existing ports with doloop_end patterns, i.e.: LIST = \ arm-linux-androideabi arm-uclinux_eabi arm-eabi \ arm-symbianelf \ bfin-elf bfin-uclinux bfin-linux-uclibc bfin-rtems bfin-openbsd \ c6x-elf c6x-uclinux \ ia64-elf \ ia64-freebsd6 ia64-linux ia64-hpux \ mep-elf \ powerpc-darwin8 \ powerpc-darwin7 powerpc64-darwin powerpc-freebsd6 powerpc-netbsd \ powerpc-eabispe powerpc-eabisimaltivec powerpc-eabisim ppc-elf \ powerpc-eabialtivec powerpc-xilinx-eabi powerpc-eabi \ powerpc-rtems4.11OPT-enable-threads=yes powerpc-linux_spe \ powerpc-linux_paired powerpc64-linux_altivec \ powerpc-wrs-vxworks powerpc-wrs-vxworksae powerpc-lynxos powerpcle-elf \ powerpcle-eabisim powerpcle-eabi rs6000-ibm-aix4.3 rs6000-ibm-aix5.1.0 \ rs6000-ibm-aix5.2.0 rs6000-ibm-aix5.3.0 rs6000-ibm-aix6.0 \ s390-linux-gnu s390x-linux-gnu s390x-ibm-tpf sh-elf \ shle-linux sh-netbsdelf sh-superh-elf sh5el-netbsd sh64-netbsd sh64-linux \ sh64-elfOPT-with-newlib sh-rtems sh-wrs-vxworks \ spu-elf tilegx-linux-gnu tilepro-linux-gnu \ FWIW, I left out arm-wrs-vxworks, arm-netbsdelf and ia64-hp-vms because these configurations are currently broken and there are sufficient working configurations to cover arm / ia64. As baseline for testing I used revision 191658, with a patch set to get c6x / mep / rs6000 tilegx / tilepro to build, to be found in the second attachment loop-patch-2-1 . This allows the above mentioned list of configurations to build, except for powerpc*-darwin*. To be clear, I'm asking here for approval of doloop-patch-2-2, not the collection of patches to get a working baseline. If/how c6x / mep / rs6000 / tilegx / tilepro ports should be fixed would be subject to separate discussions. 2012-09-26 J"orn Rennecke <joern.rennecke@arc.com> * loop-doloop.c (doloop_modify): Pass doloop_end pattern to gen_doloop_begin. * loop-doloop.c (doloop_optimize): Pass flag to indicate if loop is entered at top to gen_doloop_end. * config/arm/thumb2.md (doloop_end): Accept extra operand. * config/bfin/bfin.md (doloop_end): Likewise. * config/c6x/c6x.md (doloop_end): Likewise. * config/ia64/ia64.md (doloop_end): Likewise. * config/mep/mep.md (doloop_begin, doloop_end): Likewise. * config/rs6000/rs6000.md (doloop_end): Likewise. * config/s390/s390.md (doloop_end): Likewise. * config/sh/sh.md (doloop_end): Likewise. * config/spu/spu.md (doloop_end): Likewise. * config/tilegx/tilegx.md (doloop_end): Likewise. * config/tilepro/tilepro.md (doloop_end): Likewise. * doc/md.texi (doloop_end): Document new operand. 2012-09-25 Joern Rennecke <joern.rennecke@embecosm.com> PR target/54662 * config/mep/t-mep (mep-pragma.o): Use ALL_COMPILERFLAGS instead of ALL_CFLAGS. PR target/52550 * config/tilegx/tilegx.c (tilegx_expand_prologue): Remove unused variable cfa_ofset. * config/tilegx/tilepro.c (tilepro_expand_prologue): Likewise. PR target/52500 * config/c6x/c6x.h (DBX_REGISTER_NUMBER): Cast to unsigned int. PR target/52495 * config/rs6000/rs6000.h [CROSS_DIRECTORY_STRUCTURE] (RS6000_WEAK): Define as 1. Index: gcc/gcc/config/c6x/c6x.h =================================================================== --- gcc/gcc/config/c6x/c6x.h (revision 191658) +++ gcc/gcc/config/c6x/c6x.h (working copy) @@ -519,7 +519,7 @@ #define REGISTER_NAMES \ "B24", "B25", "B26", "B27", "B28", "B29", "B30", "B31", \ "FP", "ARGP", "ILC" } -#define DBX_REGISTER_NUMBER(N) (dbx_register_map[(N)]) +#define DBX_REGISTER_NUMBER(N) ((unsigned int) dbx_register_map[(N)]) extern int const dbx_register_map[FIRST_PSEUDO_REGISTER]; Index: gcc/gcc/config/mep/t-mep =================================================================== --- gcc/gcc/config/mep/t-mep (revision 191658) +++ gcc/gcc/config/mep/t-mep (working copy) @@ -30,7 +30,7 @@ mep-pragma.o: $(srcdir)/config/mep/mep-p coretypes.h $(TM_H) $(TREE_H) $(RTL_H) $(C_PRAGMA_H) \ $(CPPLIB_H) hard-reg-set.h output.h $(srcdir)/config/mep/mep-protos.h \ function.h insn-config.h reload.h $(TARGET_H) - $(COMPILER) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< MULTILIB_OPTIONS = mel mall-opts mfar MULTILIB_DIRNAMES = el allopt far Index: gcc/gcc/config/rs6000/rs6000.h =================================================================== --- gcc/gcc/config/rs6000/rs6000.h (revision 191658) +++ gcc/gcc/config/rs6000/rs6000.h (working copy) @@ -1901,6 +1901,8 @@ #define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FI #ifdef HAVE_GAS_WEAK #define RS6000_WEAK 1 +#elif defined (CROSS_DIRECTORY_STRUCTURE) +#define RS6000_WEAK 1 #else #define RS6000_WEAK 0 #endif Index: gcc/gcc/config/tilegx/tilegx.c =================================================================== --- gcc/gcc/config/tilegx/tilegx.c (revision 191658) +++ gcc/gcc/config/tilegx/tilegx.c (working copy) @@ -3953,8 +3953,6 @@ tilegx_expand_prologue (void) address. */ rtx chain_addr = gen_rtx_REG (Pmode, next_scratch_regno--); rtx size_rtx = GEN_INT (-(total_size - UNITS_PER_WORD)); - int cfa_offset = - frame_pointer_needed ? UNITS_PER_WORD - total_size : UNITS_PER_WORD; if (add_operand (size_rtx, Pmode)) { Index: gcc/gcc/config/tilepro/tilepro.c =================================================================== --- gcc/gcc/config/tilepro/tilepro.c (revision 191658) +++ gcc/gcc/config/tilepro/tilepro.c (working copy) @@ -3574,8 +3574,6 @@ tilepro_expand_prologue (void) address. */ rtx chain_addr = gen_rtx_REG (Pmode, next_scratch_regno--); rtx size_rtx = gen_int_si (-(total_size - UNITS_PER_WORD)); - int cfa_offset = - frame_pointer_needed ? UNITS_PER_WORD - total_size : UNITS_PER_WORD; if (add_operand (size_rtx, Pmode)) {