===================================================================
@@ -94,6 +94,30 @@
false));
})
+(define_special_predicate "pop_operation"
+ (match_code "parallel")
+{
+ return pop_operation_p (op);
+})
+
+(define_special_predicate "ldwm_operation"
+ (match_code "parallel")
+{
+ return ldstwm_operation_p (op, /*load_p=*/true);
+})
+
+(define_special_predicate "stwm_operation"
+ (match_code "parallel")
+{
+ return ldstwm_operation_p (op, /*load_p=*/false);
+})
+
+(define_predicate "nios2_hard_register_operand"
+ (match_code "reg")
+{
+ return GP_REG_P (REGNO (op));
+})
+
(define_predicate "stack_memory_operand"
(match_code "mem")
{
===================================================================
@@ -52,6 +52,10 @@ extern bool nios2_unspec_reloc_p (rtx);
extern int nios2_label_align (rtx);
extern bool nios2_cdx_narrow_form_p (rtx_insn *);
+extern bool pop_operation_p (rtx);
+extern bool ldstwm_operation_p (rtx, bool);
+extern bool gen_ldstwm_peep (bool, int, rtx, rtx *);
+
extern void nios2_adjust_reg_alloc_order (void);
#ifdef TREE_CODE
===================================================================
@@ -71,6 +71,8 @@ static void nios2_load_pic_register (voi
static void nios2_register_custom_code (unsigned int, enum nios2_ccs_code, int);
static const char *nios2_unspec_reloc_name (int);
static void nios2_register_builtin_fndecl (unsigned, tree);
+static rtx nios2_ldst_parallel (bool, bool, bool, rtx, int,
+ unsigned HOST_WIDE_INT, bool);
/* Threshold for data being put into the small data/bss area, instead
of the normal data area (references to the small data/bss area take
@@ -456,6 +458,25 @@ restore_reg (int regno, unsigned offset)
RTX_FRAME_RELATED_P (insn) = 1;
}
+/* This routine tests for the base register update SET in load/store
+ multiple RTL insns, used in pop_operation_p and ldstwm_operation_p. */
+static bool
+base_reg_adjustment_p (rtx set, rtx *base_reg, rtx *offset)
+{
+ if (GET_CODE (set) == SET
+ && REG_P (SET_DEST (set))
+ && GET_CODE (SET_SRC (set)) == PLUS
+ && REG_P (XEXP (SET_SRC (set), 0))
+ && rtx_equal_p (SET_DEST (set), XEXP (SET_SRC (set), 0))
+ && CONST_INT_P (XEXP (SET_SRC (set), 1)))
+ {
+ *base_reg = XEXP (SET_SRC (set), 0);
+ *offset = XEXP (SET_SRC (set), 1);
+ return true;
+ }
+ return false;
+}
+
/* Temp regno used inside prologue/epilogue. */
#define TEMP_REG_NUM 8
@@ -4030,6 +4051,432 @@ nios2_cdx_narrow_form_p (rtx_insn *insn)
return false;
}
+/* Main function to implement the pop_operation predicate that
+ check pop.n insn pattern integrity. The CDX pop.n patterns mostly
+ hardcode the restored registers, so the main checking is for the
+ SP offsets. */
+bool
+pop_operation_p (rtx op)
+{
+ int i;
+ HOST_WIDE_INT last_offset = -1, len = XVECLEN (op, 0);
+ rtx base_reg, offset;
+
+ if (len < 3 /* At least has a return, SP-update, and RA restore. */
+ || GET_CODE (XVECEXP (op, 0, 0)) != RETURN
+ || !base_reg_adjustment_p (XVECEXP (op, 0, 1), &base_reg, &offset)
+ || !rtx_equal_p (base_reg, stack_pointer_rtx)
+ || !CONST_INT_P (offset)
+ || (INTVAL (offset) & 3) != 0)
+ return false;
+
+ for (i = len - 1; i > 1; i--)
+ {
+ rtx set = XVECEXP (op, 0, i);
+ rtx curr_base_reg, curr_offset;
+
+ if (GET_CODE (set) != SET || !MEM_P (SET_SRC (set))
+ || !split_mem_address (XEXP (SET_SRC (set), 0),
+ &curr_base_reg, &curr_offset)
+ || !rtx_equal_p (base_reg, curr_base_reg)
+ || !CONST_INT_P (curr_offset))
+ return false;
+ if (i == len - 1)
+ {
+ last_offset = INTVAL (curr_offset);
+ if ((last_offset & 3) != 0 || last_offset > 60)
+ return false;
+ }
+ else
+ {
+ last_offset += 4;
+ if (INTVAL (curr_offset) != last_offset)
+ return false;
+ }
+ }
+ if (last_offset < 0 || last_offset + 4 != INTVAL (offset))
+ return false;
+
+ return true;
+}
+
+
+/* Masks of registers that are valid for CDX ldwm/stwm instructions.
+ The instruction can encode subsets drawn from either R2-R13 or
+ R14-R23 + FP + RA. */
+#define CDX_LDSTWM_VALID_REGS_0 0x00003ffc
+#define CDX_LDSTWM_VALID_REGS_1 0x90ffc000
+
+static bool
+nios2_ldstwm_regset_p (unsigned int regno, unsigned int *regset)
+{
+ if (*regset == 0)
+ {
+ if (CDX_LDSTWM_VALID_REGS_0 & (1 << regno))
+ *regset = CDX_LDSTWM_VALID_REGS_0;
+ else if (CDX_LDSTWM_VALID_REGS_1 & (1 << regno))
+ *regset = CDX_LDSTWM_VALID_REGS_1;
+ else
+ return false;
+ return true;
+ }
+ else
+ return (*regset & (1 << regno)) != 0;
+}
+
+/* Main function to implement ldwm_operation/stwm_operation
+ predicates that check ldwm/stwm insn pattern integrity. */
+bool
+ldstwm_operation_p (rtx op, bool load_p)
+{
+ int start, i, end = XVECLEN (op, 0) - 1, last_regno = -1;
+ unsigned int regset = 0;
+ rtx base_reg, offset;
+ rtx first_elt = XVECEXP (op, 0, 0);
+ bool inc_p = true;
+ bool wb_p = base_reg_adjustment_p (first_elt, &base_reg, &offset);
+ if (GET_CODE (XVECEXP (op, 0, end)) == RETURN)
+ end--;
+ start = wb_p ? 1 : 0;
+ for (i = start; i <= end; i++)
+ {
+ int regno;
+ rtx reg, mem, elt = XVECEXP (op, 0, i);
+ /* Return early if not a SET at all. */
+ if (GET_CODE (elt) != SET)
+ return false;
+ reg = load_p ? SET_DEST (elt) : SET_SRC (elt);
+ mem = load_p ? SET_SRC (elt) : SET_DEST (elt);
+ if (!REG_P (reg) || !MEM_P (mem))
+ return false;
+ regno = REGNO (reg);
+ if (!nios2_ldstwm_regset_p (regno, ®set))
+ return false;
+ /* If no writeback to determine direction, use offset of first MEM. */
+ if (wb_p)
+ inc_p = INTVAL (offset) > 0;
+ else if (i == start)
+ {
+ rtx first_base, first_offset;
+ if (!split_mem_address (XEXP (mem, 0),
+ &first_base, &first_offset))
+ return false;
+ base_reg = first_base;
+ inc_p = INTVAL (first_offset) >= 0;
+ }
+ /* Ensure that the base register is not loaded into. */
+ if (load_p && regno == (int) REGNO (base_reg))
+ return false;
+ /* Check for register order inc/dec integrity. */
+ if (last_regno >= 0)
+ {
+ if (inc_p && last_regno >= regno)
+ return false;
+ if (!inc_p && last_regno <= regno)
+ return false;
+ }
+ last_regno = regno;
+ }
+ return true;
+}
+
+/* Helper for nios2_ldst_parallel, for generating a parallel vector
+ SET element. */
+static rtx
+gen_ldst (bool load_p, int regno, rtx base_mem, int offset)
+{
+ rtx reg = gen_rtx_REG (SImode, regno);
+ rtx mem = adjust_address_nv (base_mem, SImode, offset);
+ return gen_rtx_SET (load_p ? reg : mem,
+ load_p ? mem : reg);
+}
+
+/* A general routine for creating the body RTL pattern of
+ ldwm/stwm/push.n/pop.n insns.
+ LOAD_P: true/false for load/store direction.
+ REG_INC_P: whether registers are incrementing/decrementing in the
+ *RTL vector* (not necessarily the order defined in the ISA specification).
+ OFFSET_INC_P: Same as REG_INC_P, but for the memory offset order.
+ BASE_MEM: starting MEM.
+ BASE_UPDATE: amount to update base register; zero means no writeback.
+ REGMASK: register mask to load/store.
+ RET_P: true if to tag a (return) element at the end.
+
+ Note that this routine does not do any checking. It's the job of the
+ caller to do the right thing, and the insn patterns to do the
+ safe-guarding. */
+static rtx
+nios2_ldst_parallel (bool load_p, bool reg_inc_p, bool offset_inc_p,
+ rtx base_mem, int base_update,
+ unsigned HOST_WIDE_INT regmask, bool ret_p)
+{
+ rtvec p;
+ int regno, b = 0, i = 0, n = 0, len = popcount_hwi (regmask);
+ if (ret_p) len++, i++, b++;
+ if (base_update != 0) len++, i++;
+ p = rtvec_alloc (len);
+ for (regno = (reg_inc_p ? 0 : 31);
+ regno != (reg_inc_p ? 32 : -1);
+ regno += (reg_inc_p ? 1 : -1))
+ if ((regmask & (1 << regno)) != 0)
+ {
+ int offset = (offset_inc_p ? 4 : -4) * n++;
+ RTVEC_ELT (p, i++) = gen_ldst (load_p, regno, base_mem, offset);
+ }
+ if (ret_p)
+ RTVEC_ELT (p, 0) = ret_rtx;
+ if (base_update != 0)
+ {
+ rtx reg, offset;
+ if (!split_mem_address (XEXP (base_mem, 0), ®, &offset))
+ gcc_unreachable ();
+ RTVEC_ELT (p, b) =
+ gen_rtx_SET (reg, plus_constant (Pmode, reg, base_update));
+ }
+ return gen_rtx_PARALLEL (VOIDmode, p);
+}
+
+/* CDX ldwm/stwm peephole optimization pattern related routines. */
+
+/* Data structure and sorting function for ldwm/stwm peephole optimizers. */
+struct ldstwm_operand
+{
+ int offset; /* Offset from base register. */
+ rtx reg; /* Register to store at this offset. */
+ rtx mem; /* Original mem. */
+ bool bad; /* True if this load/store can't be combined. */
+ bool rewrite; /* True if we should rewrite using scratch. */
+};
+
+static int
+compare_ldstwm_operands (const void *arg1, const void *arg2)
+{
+ const struct ldstwm_operand *op1 = (const struct ldstwm_operand *) arg1;
+ const struct ldstwm_operand *op2 = (const struct ldstwm_operand *) arg2;
+ if (op1->bad)
+ return op2->bad ? 0 : 1;
+ else if (op2->bad)
+ return -1;
+ else
+ return op1->offset - op2->offset;
+}
+
+/* Helper function: return true if a load/store using REGNO with address
+ BASEREG and offset OFFSET meets the constraints for a 2-byte CDX ldw.n,
+ stw.n, ldwsp.n, or stwsp.n instruction. */
+static bool
+can_use_cdx_ldstw (int regno, int basereg, int offset)
+{
+ if (CDX_REG_P (regno) && CDX_REG_P (basereg)
+ && (offset & 0x3) == 0 && 0 <= offset && offset < 0x40)
+ return true;
+ else if (basereg == SP_REGNO
+ && offset >= 0 && offset < 0x80 && (offset & 0x3) == 0)
+ return true;
+ return false;
+}
+
+/* This function is called from peephole2 optimizers to try to merge
+ a series of individual loads and stores into a ldwm or stwm. It
+ can also rewrite addresses inside the individual loads and stores
+ using a common base register using a scratch register and smaller
+ offsets if that allows them to use CDX ldw.n or stw.n instructions
+ instead of 4-byte loads or stores.
+ N is the number of insns we are trying to merge. SCRATCH is non-null
+ if there is a scratch register available. The OPERANDS array contains
+ alternating REG (even) and MEM (odd) operands. */
+bool
+gen_ldstwm_peep (bool load_p, int n, rtx scratch, rtx *operands)
+{
+ /* CDX ldwm/stwm instructions allow a maximum of 12 registers to be
+ specified. */
+#define MAX_LDSTWM_OPS 12
+ struct ldstwm_operand sort[MAX_LDSTWM_OPS];
+ int basereg = -1;
+ int baseoffset;
+ int i, m, lastoffset, lastreg;
+ unsigned int regmask = 0, usemask = 0, regset;
+ bool needscratch;
+ int newbasereg;
+ int nbytes;
+
+ if (!TARGET_HAS_CDX)
+ return false;
+ if (n < 2 || n > MAX_LDSTWM_OPS)
+ return false;
+
+ /* Check all the operands for validity and initialize the sort array.
+ The places where we return false here are all situations that aren't
+ expected to ever happen -- invalid patterns, invalid registers, etc. */
+ for (i = 0; i < n; i++)
+ {
+ rtx base, offset;
+ rtx reg = operands[i];
+ rtx mem = operands[i + n];
+ int r, o, regno;
+ bool bad = false;
+
+ if (!REG_P (reg) || !MEM_P (mem))
+ return false;
+
+ regno = REGNO (reg);
+ if (regno > 31)
+ return false;
+ if (load_p && (regmask & (1 << regno)) != 0)
+ return false;
+ regmask |= 1 << regno;
+
+ if (!split_mem_address (XEXP (mem, 0), &base, &offset))
+ return false;
+ r = REGNO (base);
+ o = INTVAL (offset);
+
+ if (basereg == -1)
+ basereg = r;
+ else if (r != basereg)
+ bad = true;
+ usemask |= 1 << r;
+
+ sort[i].bad = bad;
+ sort[i].rewrite = false;
+ sort[i].offset = o;
+ sort[i].reg = reg;
+ sort[i].mem = mem;
+ }
+
+ /* If we are doing a series of register loads, we can't safely reorder
+ them if any of the regs used in addr expressions are also being set. */
+ if (load_p && (regmask & usemask))
+ return false;
+
+ /* Sort the array by increasing mem offset order, then check that
+ offsets are valid and register order matches mem order. At the
+ end of this loop, m is the number of loads/stores we will try to
+ combine; the rest are leftovers. */
+ qsort (sort, n, sizeof (struct ldstwm_operand), compare_ldstwm_operands);
+
+ baseoffset = sort[0].offset;
+ needscratch = baseoffset != 0;
+ if (needscratch && !scratch)
+ return false;
+
+ lastreg = regmask = regset = 0;
+ lastoffset = baseoffset;
+ for (m = 0; m < n && !sort[m].bad; m++)
+ {
+ int thisreg = REGNO (sort[m].reg);
+ if (sort[m].offset != lastoffset
+ || (m > 0 && lastreg >= thisreg)
+ || !nios2_ldstwm_regset_p (thisreg, ®set))
+ break;
+ lastoffset += 4;
+ lastreg = thisreg;
+ regmask |= (1 << thisreg);
+ }
+
+ /* For loads, make sure we are not overwriting the scratch reg.
+ The peephole2 pattern isn't supposed to match unless the register is
+ unused all the way through, so this isn't supposed to happen anyway. */
+ if (load_p
+ && needscratch
+ && ((1 << REGNO (scratch)) & regmask) != 0)
+ return false;
+ newbasereg = needscratch ? (int) REGNO (scratch) : basereg;
+
+ /* We may be able to combine only the first m of the n total loads/stores
+ into a single instruction. If m < 2, there's no point in emitting
+ a ldwm/stwm at all, but we might be able to do further optimizations
+ if we have a scratch. We will count the instruction lengths of the
+ old and new patterns and store the savings in nbytes. */
+ if (m < 2)
+ {
+ if (!needscratch)
+ return false;
+ m = 0;
+ nbytes = 0;
+ }
+ else
+ nbytes = -4; /* Size of ldwm/stwm. */
+ if (needscratch)
+ {
+ int bo = baseoffset > 0 ? baseoffset : -baseoffset;
+ if (CDX_REG_P (newbasereg)
+ && CDX_REG_P (basereg)
+ && bo <= 128 && bo > 0 && (bo & (bo - 1)) == 0)
+ nbytes -= 2; /* Size of addi.n/subi.n. */
+ else
+ nbytes -= 4; /* Size of non-CDX addi. */
+ }
+
+ /* Count the size of the input load/store instructions being replaced. */
+ for (i = 0; i < m; i++)
+ if (can_use_cdx_ldstw (REGNO (sort[i].reg), basereg, sort[i].offset))
+ nbytes += 2;
+ else
+ nbytes += 4;
+
+ /* We may also be able to save a bit if we can rewrite non-CDX
+ load/stores that can't be combined into the ldwm/stwm into CDX
+ load/stores using the scratch reg. For example, this might happen
+ if baseoffset is large, by bringing in the offsets in the load/store
+ instructions within the range that fits in the CDX instruction. */
+ if (needscratch && CDX_REG_P (newbasereg))
+ for (i = m; i < n && !sort[i].bad; i++)
+ if (!can_use_cdx_ldstw (REGNO (sort[i].reg), basereg, sort[i].offset)
+ && can_use_cdx_ldstw (REGNO (sort[i].reg), newbasereg,
+ sort[i].offset - baseoffset))
+ {
+ sort[i].rewrite = true;
+ nbytes += 2;
+ }
+
+ /* Are we good to go? */
+ if (nbytes <= 0)
+ return false;
+
+ /* Emit the scratch load. */
+ if (needscratch)
+ emit_insn (gen_rtx_SET (scratch, XEXP (sort[0].mem, 0)));
+
+ /* Emit the ldwm/stwm insn. */
+ if (m > 0)
+ {
+ rtvec p = rtvec_alloc (m);
+ for (i = 0; i < m; i++)
+ {
+ int offset = sort[i].offset;
+ rtx mem, reg = sort[i].reg;
+ rtx base_reg = gen_rtx_REG (Pmode, newbasereg);
+ if (needscratch)
+ offset -= baseoffset;
+ mem = gen_rtx_MEM (SImode, plus_constant (Pmode, base_reg, offset));
+ if (load_p)
+ RTVEC_ELT (p, i) = gen_rtx_SET (reg, mem);
+ else
+ RTVEC_ELT (p, i) = gen_rtx_SET (mem, reg);
+ }
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+ }
+
+ /* Emit any leftover load/stores as individual instructions, doing
+ the previously-noted rewrites to use the scratch reg. */
+ for (i = m; i < n; i++)
+ {
+ rtx reg = sort[i].reg;
+ rtx mem = sort[i].mem;
+ if (sort[i].rewrite)
+ {
+ int offset = sort[i].offset - baseoffset;
+ mem = gen_rtx_MEM (SImode, plus_constant (Pmode, scratch, offset));
+ }
+ if (load_p)
+ emit_move_insn (reg, mem);
+ else
+ emit_move_insn (mem, reg);
+ }
+ return true;
+}
+
/* Implement TARGET_MACHINE_DEPENDENT_REORG:
We use this hook when emitting CDX code to enforce the 4-byte
alignment requirement for labels that are used as the targets of
===================================================================
@@ -0,0 +1,277 @@
+(* Auto-generate Nios II R2 CDX ldwm/stwm/push.n/pop.n patterns
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Contributed by Mentor Graphics.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3, or (at your option) any later
+ version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>.
+
+ This is a Standard ML program. There are multiple Standard ML
+ implementations widely available. We recommend the MLton optimizing
+ SML compiler, due to its ease of creating a standalone executable.
+
+ http://www.mlton.org/
+
+ Or from your favourite OS's friendly packaging system. Tested with
+ MLton Release 20130715, though other versions will probably work too.
+
+ Run with:
+ mlton -output a.out /path/to/gcc/config/nios2/nios2-ldstwm.sml
+ ./a.out >/path/to/gcc/config/nios2/ldstwm.md
+*)
+
+datatype ld_st = ld | st;
+datatype push_pop = push | pop;
+datatype inc_dec = inc | dec;
+
+fun for ls f = map f ls;
+fun conds cond str = if cond then str else "";
+fun ints n = if n>=0 then (Int.toString n) else ("-" ^ (Int.toString (~n)));
+
+fun pushpop_pattern pptype n fp =
+ let
+ val sp_reg = "(reg:SI SP_REGNO)";
+ val ra_reg = "(reg:SI RA_REGNO)";
+ val fp_reg = "(reg:SI FP_REGNO)";
+
+ fun sets lhs rhs = "(set " ^ lhs ^
+ (if pptype=push then " "
+ else " ") ^ rhs ^ ")";
+ val sp_adj =
+ "(set " ^ sp_reg ^ "\n " ^
+ "(plus:SI " ^ sp_reg ^
+ " (match_operand 1 \"const_int_operand\" \"\")))";
+
+ fun reg i regi = "(reg:SI " ^ (ints regi) ^ ")";
+ fun mem i opndi =
+ if pptype=push then
+ "(mem:SI (plus:SI (reg:SI SP_REGNO) (const_int " ^ (ints (~4*i)) ^ ")))"
+ else
+ "(match_operand:SI " ^
+ (ints opndi) ^ " \"stack_memory_operand\" \"\")";
+
+ val start = 1 + (if fp then 2 else 1);
+ val lim = n + (if fp then 2 else 1);
+ fun set_elt i regi opndi =
+ if pptype=push then (sets (mem i opndi) (reg i regi))
+ else (sets (reg i regi) (mem i opndi));
+ fun get_elt_list (i, regi, opndi) =
+ if i > lim then []
+ else (set_elt i regi opndi) :: get_elt_list (i+1, regi-1, opndi+1);
+
+ val set_elements = get_elt_list (start, 16+n-1, start+1);
+
+ val ra_set = if pptype=push then sets (mem 1 2) ra_reg
+ else sets ra_reg (mem 1 2);
+ val fp_set = (conds fp (if pptype=push then sets (mem 2 3) fp_reg
+ else sets fp_reg (mem 2 3)));
+ val ret = (conds (pptype=pop) "(return)");
+ val element_list =
+ List.filter (fn x => x<>"")
+ ([ret, sp_adj, ra_set, fp_set] @ set_elements);
+
+ fun reg_index i = 16 + n - i;
+ fun pop_opnds 0 spl = (conds fp ("fp" ^ spl)) ^ "ra"
+ | pop_opnds n spl = "r" ^ (ints (reg_index n)) ^ spl ^ (pop_opnds (n-1) spl);
+ fun push_opnds 0 spl = "ra" ^ (conds fp (spl ^ "fp"))
+ | push_opnds n spl = (push_opnds (n-1) spl) ^ spl ^ "r" ^ (ints (reg_index n));
+
+ val spadj_opnd = if pptype=push then 2 else (start+n);
+ val spadj = ints spadj_opnd;
+ val regsave_num = n + (if fp then 2 else 1);
+
+ val ppname = if pptype=push then "push" else "pop";
+ val name = if pptype=push then "push" ^ "_" ^ (push_opnds n "_")
+ else "pop" ^ "_" ^ (pop_opnds n "_");
+ in
+ "(define_insn \"*cdx_" ^ name ^ "\"\n" ^
+ " [(match_parallel 0 \"" ^
+ (conds (pptype=pop) "pop_operation") ^ "\"\n" ^
+ " [" ^ (String.concatWith ("\n ") element_list) ^ "])]\n" ^
+ " \"TARGET_HAS_CDX && XVECLEN (operands[0], 0) == " ^
+ (ints (length element_list)) ^
+ (conds (pptype=push)
+ ("\n && (-INTVAL (operands[1]) & 3) == 0\n" ^
+ " && (-INTVAL (operands[1]) - " ^
+ (ints (4*regsave_num)) ^ ") <= 60")) ^
+ "\"\n" ^
+ (if pptype=pop then
+ "{\n" ^
+ " rtx x = XEXP (operands[" ^ spadj ^ "], 0);\n" ^
+ " operands[" ^ spadj ^ "] = REG_P (x) ? const0_rtx : XEXP (x, 1);\n" ^
+ " return \"pop.n\\\\t{" ^ (pop_opnds n ", ") ^ "}, %" ^ spadj ^ "\";\n" ^
+ "}\n"
+ else
+ "{\n" ^
+ " operands[" ^ spadj ^ "] = " ^
+ "GEN_INT (-INTVAL (operands[1]) - " ^ (ints (4*regsave_num)) ^ ");\n" ^
+ " return \"push.n\\\\t{" ^ (push_opnds n ", ") ^ "}, %" ^ spadj ^ "\";\n" ^
+ "}\n") ^
+ " [(set_attr \"type\" \"" ^ ppname ^ "\")])\n\n"
+ end;
+
+fun ldstwm_pattern ldst n id wb pc =
+ let
+ val ldstwm = (if ldst=ld then "ldwm" else "stwm");
+ val name = "*cdx_" ^ ldstwm ^ (Int.toString n) ^
+ (if id=inc then "_inc" else "_dec") ^
+ (conds wb "_wb") ^ (conds pc "_ret");
+ val base_reg_referenced_p = ref false;
+ val base_regno = ints (n+1);
+ fun plus_addr base offset =
+ "(plus:SI " ^ base ^ " (const_int " ^ (ints offset) ^ "))";
+ fun base_reg () =
+ if !base_reg_referenced_p then
+ "(match_dup " ^ base_regno ^ ")"
+ else (base_reg_referenced_p := true;
+ "(match_operand:SI " ^ base_regno ^
+ " \"register_operand\" \"" ^ (conds wb "+&") ^ "r\")");
+ fun reg i = "(match_operand:SI " ^ (ints i) ^
+ " \"nios2_hard_register_operand\" \"" ^
+ (conds (ldst=ld) "") ^ "\")";
+
+ fun addr 1 = if id=inc then base_reg ()
+ else plus_addr (base_reg ()) (~4)
+ | addr i = let val offset = if id=inc then (i-1)*4 else (~i*4)
+ in plus_addr (base_reg ()) offset end;
+
+ fun mem i = "(mem:SI " ^ (addr i) ^ ")";
+ fun lhs i = if ldst=ld then reg i else mem i;
+ fun rhs i = if ldst=st then reg i else mem i;
+ fun sets lhs rhs = "(set " ^ lhs ^ "\n " ^ rhs ^ ")";
+ fun set_elements i =
+ if i > n then []
+ else (sets (lhs i) (rhs i)) :: (set_elements (i+1));
+
+ fun opnds 1 = "%1"
+ | opnds n = opnds(n-1) ^ ", %" ^ (Int.toString n);
+
+ val asm_template = ldstwm ^ "\\\\t{" ^ (opnds n) ^ "}" ^
+ (if id=inc
+ then ", (%" ^ base_regno ^ ")++"
+ else ", --(%" ^ base_regno ^ ")") ^
+ (conds wb ", writeback") ^
+ (conds pc ", ret");
+ val wbtmp =
+ if wb then
+ (sets (base_reg ())
+ (plus_addr (base_reg ())
+ ((if id=inc then n else ~n)*4)))
+ else "";
+ val pctmp = conds pc "(return)";
+ val set_list = List.filter (fn x => x<>"")
+ ([pctmp, wbtmp] @ (set_elements 1));
+ in
+ if ldst=st andalso pc then ""
+ else
+ "(define_insn \"" ^ name ^ "\"\n" ^
+ " [(match_parallel 0 \"" ^ ldstwm ^ "_operation\"\n" ^
+ " [" ^ (String.concatWith ("\n ") set_list) ^ "])]\n" ^
+ " \"TARGET_HAS_CDX && XVECLEN (operands[0], 0) == " ^
+ (ints (length set_list)) ^ "\"\n" ^
+ " \"" ^ asm_template ^ "\"\n" ^
+ " [(set_attr \"type\" \"" ^ ldstwm ^ "\")])\n\n"
+ end;
+
+fun peephole_pattern ldst n scratch_p =
+ let
+ fun sets lhs rhs = "(set " ^ lhs ^ "\n " ^ rhs ^ ")";
+ fun single_set i indent =
+ let val reg = "(match_operand:SI " ^ (ints i) ^
+ " \"register_operand\" \"\")";
+ val mem = "(match_operand:SI " ^ (ints (i+n)) ^
+ " \"memory_operand\" \"\")";
+ in
+ if ldst=ld then sets reg mem
+ else sets mem reg
+ end;
+
+ fun single_sets i =
+ if i=n then []
+ else (single_set i " ") :: (single_sets (i+1));
+
+ val scratch = ints (2*n);
+ val peephole_elements =
+ let val tmp = single_sets 0 in
+ if scratch_p
+ then (["(match_scratch:SI " ^ scratch ^ " \"r\")"] @
+ tmp @
+ ["(match_dup " ^ scratch ^ ")"])
+ else tmp
+ end;
+ in
+ "(define_peephole2\n" ^
+ " [" ^ (String.concatWith ("\n ") peephole_elements) ^ "]\n" ^
+ " \"TARGET_HAS_CDX\"\n" ^
+ " [(const_int 0)]\n" ^
+ "{\n" ^
+ " if (gen_ldstwm_peep (" ^
+ (if ldst=st then "false" else "true") ^ ", " ^ (ints n) ^ ", " ^
+ (if scratch_p then ("operands[" ^ scratch ^ "]") else "NULL_RTX") ^
+ ", operands))\n" ^
+ " DONE;\n" ^
+ " else\n" ^
+ " FAIL;\n" ^
+ "})\n\n"
+ end;
+
+
+print
+("/* Nios II R2 CDX ldwm/stwm/push.h/pop.n instruction patterns.\n" ^
+ " This file was automatically generated using nios2-ldstwm.sml.\n" ^
+ " Please do not edit manually.\n" ^
+ "\n" ^
+ " Copyright (C) 2014-2015 Free Software Foundation, Inc.\n" ^
+ " Contributed by Mentor Graphics.\n" ^
+ "\n" ^
+ " This file is part of GCC.\n" ^
+ "\n" ^
+ " GCC is free software; you can redistribute it and/or modify it\n" ^
+ " under the terms of the GNU General Public License as published\n" ^
+ " by the Free Software Foundation; either version 3, or (at your\n" ^
+ " option) any later version.\n" ^
+ "\n" ^
+ " GCC is distributed in the hope that it will be useful, but WITHOUT\n" ^
+ " ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n" ^
+ " or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n" ^
+ " License for more details.\n" ^
+ "\n" ^
+ " You should have received a copy of the GNU General Public License and\n" ^
+ " a copy of the GCC Runtime Library Exception along with this program;\n" ^
+ " see the files COPYING3 and COPYING.RUNTIME respectively. If not, see\n" ^
+ " <http://www.gnu.org/licenses/>. */\n\n");
+
+fun seq a b = if a=b then [b]
+ else a :: (seq (if a<b then a+1 else a-1) b);
+
+(* push/pop patterns *)
+for (seq 0 8) (fn n =>
+ for [push, pop] (fn p =>
+ for [true, false] (fn fp =>
+ print (pushpop_pattern p n fp))));
+
+(* ldwm/stwm patterns *)
+for [ld, st] (fn l =>
+ for (seq 1 12) (fn n =>
+ for [inc, dec] (fn id =>
+ for [true, false] (fn wb =>
+ for [true, false] (fn pc =>
+ print (ldstwm_pattern l n id wb pc))))));
+
+(* peephole patterns *)
+for [ld, st] (fn l =>
+ for (seq 12 2) (fn n =>
+ print (peephole_pattern l n true)));
+
===================================================================
@@ -1169,3 +1169,6 @@
emit_move_insn (operands[0], gen_rtx_REG (Pmode, TP_REGNO));
DONE;
})
+;; Include the ldwm/stwm/push.n/pop.n patterns and peepholes.
+(include "ldstwm.md")
+
===================================================================
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 -fomit-frame-pointer -funroll-all-loops -finline-functions -march=r2 -mcdx -w" } */
+
+/* Based on gcc.c-torture/compile/920501-23.c.
+ This test used to result in assembler errors with R2 CDX because of
+ a bug in regrename; it wasn't re-validating insns after renaming, so
+ ldwm/stwm instructions with incorrect registers were being emitted. */
+
+typedef unsigned char qi;
+typedef unsigned short hi;
+typedef unsigned long si;
+typedef unsigned long long di;
+subi(a){return 100-a;}
+add(a,b){return a+b;}
+mul(a){return 85*a;}
+memshift(p)unsigned*p;{unsigned x;for(;;){x=*p++>>16;if(x)return x;}}
+ldw(xp)si*xp;{return xp[4];}
+ldws_m(xp)si*xp;{si x;do{x=xp[3];xp+=3;}while(x);}
+postinc_si(p)si*p;{si x;for(;;){x=*p++;if(x)return x;}}
+preinc_si(p)si*p;{si x;for(;;){x=*++p;if(x)return x;}}
+postinc_di(p)di*p;{di x;for(;;){x=*p++;if(x)return x;}}
+preinc_di(p)di*p;{di x;for(;;){x=*++p;if(x)return x;}}
+inc_overlap(p,a)di*p;{do{p=*(di**)p;p=(di*)((int)p+4);}while(*p);}
+di move_di(p,p2)di*p,*p2;{di x=p;p2=((di*)x)[1];return p2[1];}
===================================================================
@@ -0,0 +1,66 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 -fomit-frame-pointer -funroll-loops -march=r2 -mcdx -w" } */
+
+/* Based on gcc.c-torture/execute/20021120-1.c.
+ This test used to result in assembler errors with R2 CDX because of
+ a bug in regrename; it wasn't re-validating insns after renaming, so
+ ldwm/stwm instructions with incorrect registers were being emitted. */
+
+/* Macros to emit "L Nxx R" for each octal number xx between 000 and 037. */
+#define OP1(L, N, R, I, J) L N##I##J R
+#define OP2(L, N, R, I) \
+ OP1(L, N, R, 0, I), OP1(L, N, R, 1, I), \
+ OP1(L, N, R, 2, I), OP1(L, N, R, 3, I)
+#define OP(L, N, R) \
+ OP2(L, N, R, 0), OP2(L, N, R, 1), OP2(L, N, R, 2), OP2(L, N, R, 3), \
+ OP2(L, N, R, 4), OP2(L, N, R, 5), OP2(L, N, R, 6), OP2(L, N, R, 7)
+
+/* Declare 32 unique variables with prefix N. */
+#define DECLARE(N) OP (, N,)
+
+/* Copy 32 variables with prefix N from the array at ADDR.
+ Leave ADDR pointing to the end of the array. */
+#define COPYIN(N, ADDR) OP (, N, = *(ADDR++))
+
+/* Likewise, but copy the other way. */
+#define COPYOUT(N, ADDR) OP (*(ADDR++) =, N,)
+
+/* Add the contents of the array at ADDR to 32 variables with prefix N.
+ Leave ADDR pointing to the end of the array. */
+#define ADD(N, ADDR) OP (, N, += *(ADDR++))
+
+volatile double gd[32];
+volatile float gf[32];
+
+void foo (int n)
+{
+ double DECLARE(d);
+ float DECLARE(f);
+ volatile double *pd;
+ volatile float *pf;
+ int i;
+
+ pd = gd; COPYIN (d, pd);
+ for (i = 0; i < n; i++)
+ {
+ pf = gf; COPYIN (f, pf);
+ pd = gd; ADD (d, pd);
+ pd = gd; ADD (d, pd);
+ pd = gd; ADD (d, pd);
+ pf = gf; COPYOUT (f, pf);
+ }
+ pd = gd; COPYOUT (d, pd);
+}
+
+int main ()
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ gd[i] = i, gf[i] = i;
+ foo (1);
+ for (i = 0; i < 32; i++)
+ if (gd[i] != i * 4 || gf[i] != i)
+ abort ();
+ exit (0);
+}