Message ID | 20181010080016.12317-5-claziss@gmail.com |
---|---|
State | New |
Headers | show |
Series | ARC updates | expand |
* Claudiu Zissulescu <claziss@gmail.com> [2018-10-10 11:00:14 +0300]: > Simple peephole rules which combines multiple ld/st instructions into > 64-bit load/store instructions. It only works for architectures which > are having double load/store option on. > > gcc/ > Claudiu Zissulescu <claziss@synopsys.com> > > * config/arc/arc-protos.h (gen_operands_ldd_std): Add. > * config/arc/arc.c (operands_ok_ldd_std): New function. > (mem_ok_for_ldd_std): Likewise. > (gen_operands_ldd_std): Likewise. > * config/arc/arc.md: Add peephole2 rules for std/ldd. > --- > gcc/config/arc/arc-protos.h | 1 + > gcc/config/arc/arc.c | 163 ++++++++++++++++++++++++++++++++++++ > gcc/config/arc/arc.md | 67 +++++++++++++++ > 3 files changed, 231 insertions(+) > > diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h > index 24bea6e1efb..55f8ed4c643 100644 > --- a/gcc/config/arc/arc-protos.h > +++ b/gcc/config/arc/arc-protos.h > @@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int); > extern unsigned int arc_compute_function_type (struct function *); > extern bool arc_is_uncached_mem_p (rtx); > extern bool arc_lra_p (void); > +extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute); > #endif /* RTX_CODE */ > > extern unsigned int arc_compute_frame_size (int); > diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c > index 18dd0de6af7..9bc69e9fbc9 100644 > --- a/gcc/config/arc/arc.c > +++ b/gcc/config/arc/arc.c > @@ -10803,6 +10803,169 @@ arc_cannot_substitute_mem_equiv_p (rtx) > return true; > } > > +/* Checks whether the operands are valid for use in an LDD/STD > + instruction. Assumes that RT, RT2, and RN are REG. This is > + guaranteed by the patterns. Assumes that the address in the base > + register RN is word aligned. Pattern guarantees that both memory > + accesses use the same base register, the offsets are constants > + within the range, and the gap between the offsets is 4. If preload > + complete then check that registers are legal. WBACK indicates > + whether address is updated. */ You've got tabs instead of whitespace inside both this comment block, and others within this patch. It should be period and two whitespace at the end of each sentence. > + > +static bool > +operands_ok_ldd_std (rtx rt, rtx rt2, rtx rn ATTRIBUTE_UNUSED, > + HOST_WIDE_INT offset) Why have the RN parameter at all? I took a quick look through patches 5/6 and don't see any additional changes to this function, we should probably just drop this at this point. > +{ > + unsigned int t, t2; > + > + if (!reload_completed) > + return true; > + > + if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & -4, Couldn't we use (~0x3) instead of -4? Maybe I'm just feeling slow today, but the bit pattern for negative numbers don't just pop into my head like those for positive numbers. > + (offset & (GET_MODE_SIZE (DImode) - 1) & 3 > + ? 0 : -(-GET_MODE_SIZE (DImode) | -4) >> 1)))) > + return false; > + > + t = REGNO (rt); > + t2 = REGNO (rt2); > + > + if ((t2 == 63) Can we use PROGRAM_COUNTER_REGNO here? > + || (t % 2 != 0) /* First destination register is not even. */ > + || (t2 != t + 1)) > + return false; > + > + return true; > +} > + > +/* Helper for gen_operands_ldd_std. Returns true iff the memory > + operand MEM's address contains an immediate offset from the base > + register and has no side effects, in which case it sets BASE and > + OFFSET accordingly. */ > + > +static bool > +mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset) > +{ > + rtx addr; > + > + gcc_assert (base != NULL && offset != NULL); > + > + /* TODO: Handle more general memory operand patterns, such as > + PRE_DEC and PRE_INC. */ > + > + if (side_effects_p (mem)) > + return false; > + > + /* Can't deal with subregs. */ > + if (GET_CODE (mem) == SUBREG) > + return false; > + > + gcc_assert (MEM_P (mem)); > + > + *offset = const0_rtx; > + > + addr = XEXP (mem, 0); > + > + /* If addr isn't valid for DImode, then we can't handle it. */ > + if (!arc_legitimate_address_p (DImode, addr, > + reload_in_progress || reload_completed)) > + return false; > + > + if (REG_P (addr)) > + { > + *base = addr; > + return true; > + } > + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) > + { > + *base = XEXP (addr, 0); > + *offset = XEXP (addr, 1); > + return (REG_P (*base) && CONST_INT_P (*offset)); > + } > + > + return false; > +} > + > +/* Called from peephole2 to replace two word-size accesses with a > + single LDD/STD instruction. Returns true iff we can generate a new > + instruction sequence. That is, both accesses use the same base > + register and the gap between constant offsets is 4. OPERANDS are > + the operands found by the peephole matcher; OPERANDS[0,1] are > + register operands, and OPERANDS[2,3] are the corresponding memory > + operands. LOAD indicates whether the access is load or store. */ > + > +bool > +gen_operands_ldd_std (rtx *operands, bool load, bool commute) > +{ > + int i, gap; > + HOST_WIDE_INT offsets[2], offset; > + int nops = 2; > + rtx cur_base, cur_offset, tmp; > + rtx base = NULL_RTX; > + > + /* Check that the memory references are immediate offsets from the > + same base register. Extract the base register, the destination > + registers, and the corresponding memory offsets. */ > + for (i = 0; i < nops; i++) > + { > + if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset)) > + return false; > + > + if (i == 0) > + base = cur_base; > + else if (REGNO (base) != REGNO (cur_base)) > + return false; > + > + offsets[i] = INTVAL (cur_offset); > + if (GET_CODE (operands[i]) == SUBREG) > + { > + tmp = SUBREG_REG (operands[i]); > + gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp)); > + operands[i] = tmp; > + } > + } > + > + /* Make sure there is no dependency between the individual loads. */ > + if (load && REGNO (operands[0]) == REGNO (base)) > + return false; /* RAW */ > + > + if (load && REGNO (operands[0]) == REGNO (operands[1])) > + return false; /* WAW */ > + > + /* Make sure the instructions are ordered with lower memory access first. */ > + if (offsets[0] > offsets[1]) > + { > + gap = offsets[0] - offsets[1]; > + offset = offsets[1]; > + > + /* Swap the instructions such that lower memory is accessed first. */ > + std::swap (operands[0], operands[1]); > + std::swap (operands[2], operands[3]); > + } > + else > + { > + gap = offsets[1] - offsets[0]; > + offset = offsets[0]; > + } > + > + /* Make sure accesses are to consecutive memory locations. */ > + if (gap != 4) > + return false; > + > + /* Make sure we generate legal instructions. */ > + if (operands_ok_ldd_std (operands[0], operands[1], base, offset)) > + return true; > + > + if (load && commute) > + { > + /* Try reordering registers. */ > + std::swap (operands [0], operands[1]); > + if (operands_ok_ldd_std (operands[0], operands[1], base, offset)) > + return true; > + } > + > + return false; > +} > + > #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P > #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p > > diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md > index 1ed230fa5f0..b968022e64a 100644 > --- a/gcc/config/arc/arc.md > +++ b/gcc/config/arc/arc.md > @@ -6363,6 +6363,73 @@ archs4x, archs4xd, archs4xd_slow" > [(set (reg:CC CC_REG) (compare:CC (match_dup 3) > (ashift:SI (match_dup 1) (match_dup 2))))]) > > +(define_peephole2 ; std > +[(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "register_operand" ""))] > + "TARGET_LL64" > + [(const_int 0)] > +{ > + if (!gen_operands_ldd_std (operands, false, false)) > + FAIL; > + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); > + operands[2] = adjust_address (operands[2], DImode, 0); > + emit_insn (gen_rtx_SET (operands[2], operands[0])); > + DONE; > + }) > + > +(define_peephole2 ; ldd > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))] > + "TARGET_LL64" > + [(const_int 0)] > +{ > + if (!gen_operands_ldd_std (operands, true, false)) > + FAIL; > + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); > + operands[2] = adjust_address (operands[2], DImode, 0); > + emit_insn (gen_rtx_SET (operands[0], operands[2])); > + DONE; > +}) > + > +;; We require consecutive registers for LDD instruction. Check if we > +;; can reorder them and use an LDD. > + > +(define_peephole2 ; swap the destination registers of two loads > + ; before a commutative operation. > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 3 "memory_operand" "")) > + (set (match_operand:SI 4 "register_operand" "") > + (match_operator:SI 5 "commutative_operator" > + [(match_operand 6 "register_operand" "") > + (match_operand 7 "register_operand" "") ]))] > + "TARGET_LL64 > + && (((rtx_equal_p(operands[0], operands[6])) > + && (rtx_equal_p(operands[1], operands[7]))) > + || ((rtx_equal_p(operands[0], operands[7])) > + && (rtx_equal_p(operands[1], operands[6])))) > + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) > + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" > + [(set (match_dup 0) (match_dup 2)) > + (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] > + { > + if (!gen_operands_ldd_std (operands, true, true)) > + { > + FAIL; > + } > + else > + { > + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); > + operands[2] = adjust_address (operands[2], DImode, 0); > + } > + } > +) > + > ;; include the arc-FPX instructions > (include "fpx.md") > > -- > 2.17.1 >
On 22 October 2018 19:49:35 CEST, Andrew Burgess <andrew.burgess@embecosm.com> wrote: >* Claudiu Zissulescu <claziss@gmail.com> [2018-10-10 11:00:14 +0300]: >> --- a/gcc/config/arc/arc.c >> +++ b/gcc/config/arc/arc.c >> @@ -10803,6 +10803,169 @@ arc_cannot_substitute_mem_equiv_p (rtx) >> return true; >> } >> >> +/* Checks whether the operands are valid for use in an LDD/STD >> + instruction. Assumes that RT, RT2, and RN are REG. This is >> + guaranteed by the patterns. Assumes that the address in the base >> + register RN is word aligned. Pattern guarantees that both memory >> + accesses use the same base register, the offsets are constants >> + within the range, and the gap between the offsets is 4. If >preload >> + complete then check that registers are legal. WBACK indicates >> + whether address is updated. */ > >You've got tabs instead of whitespace inside both this comment block, >and others within this patch. It should be period and two whitespace >at the end of each sentence. See contrib/check_GNU_style.py Also: s/If preload/If reload/ thanks,
Thank you for your review. Please find attached a new respin patch with
your feedback in.
Please let me know if it is ok,
Claudiu
From 4ff7d8419783eceeffbaf27df017d0a93c3af942 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@gmail.com>
Date: Thu, 9 Aug 2018 14:29:05 +0300
Subject: [PATCH] [ARC] Add peephole rules to combine store/loads into double
store/loads
Simple peephole rules which combines multiple ld/st instructions into
64-bit load/store instructions. It only works for architectures which
are having double load/store option on.
gcc/
Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-protos.h (gen_operands_ldd_std): Add.
* config/arc/arc.c (operands_ok_ldd_std): New function.
(mem_ok_for_ldd_std): Likewise.
(gen_operands_ldd_std): Likewise.
* config/arc/arc.md: Add peephole2 rules for std/ldd.
---
gcc/config/arc/arc-protos.h | 1 +
gcc/config/arc/arc.c | 161 ++++++++++++++++++++++++++++++++++++
gcc/config/arc/arc.md | 69 ++++++++++++++++
3 files changed, 231 insertions(+)
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 24bea6e1efb..55f8ed4c643 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int);
extern unsigned int arc_compute_function_type (struct function *);
extern bool arc_is_uncached_mem_p (rtx);
extern bool arc_lra_p (void);
+extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute);
#endif /* RTX_CODE */
extern unsigned int arc_compute_frame_size (int);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 18dd0de6af7..daf785dbdb8 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -10803,6 +10803,167 @@ arc_cannot_substitute_mem_equiv_p (rtx)
return true;
}
+/* Checks whether the operands are valid for use in an LDD/STD
+ instruction. Assumes that RT, and RT2 are REG. This is guaranteed
+ by the patterns. Assumes that the address in the base register RN
+ is word aligned. Pattern guarantees that both memory accesses use
+ the same base register, the offsets are constants within the range,
+ and the gap between the offsets is 4. If reload complete then
+ check that registers are legal. */
+
+static bool
+operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset)
+{
+ unsigned int t, t2;
+
+ if (!reload_completed)
+ return true;
+
+ if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03),
+ (offset & (GET_MODE_SIZE (DImode) - 1) & 3
+ ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1))))
+ return false;
+
+ t = REGNO (rt);
+ t2 = REGNO (rt2);
+
+ if ((t2 == PROGRAM_COUNTER_REGNO)
+ || (t % 2 != 0) /* First destination register is not even. */
+ || (t2 != t + 1))
+ return false;
+
+ return true;
+}
+
+/* Helper for gen_operands_ldd_std. Returns true iff the memory
+ operand MEM's address contains an immediate offset from the base
+ register and has no side effects, in which case it sets BASE and
+ OFFSET accordingly. */
+
+static bool
+mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset)
+{
+ rtx addr;
+
+ gcc_assert (base != NULL && offset != NULL);
+
+ /* TODO: Handle more general memory operand patterns, such as
+ PRE_DEC and PRE_INC. */
+
+ if (side_effects_p (mem))
+ return false;
+
+ /* Can't deal with subregs. */
+ if (GET_CODE (mem) == SUBREG)
+ return false;
+
+ gcc_assert (MEM_P (mem));
+
+ *offset = const0_rtx;
+
+ addr = XEXP (mem, 0);
+
+ /* If addr isn't valid for DImode, then we can't handle it. */
+ if (!arc_legitimate_address_p (DImode, addr,
+ reload_in_progress || reload_completed))
+ return false;
+
+ if (REG_P (addr))
+ {
+ *base = addr;
+ return true;
+ }
+ else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return (REG_P (*base) && CONST_INT_P (*offset));
+ }
+
+ return false;
+}
+
+/* Called from peephole2 to replace two word-size accesses with a
+ single LDD/STD instruction. Returns true iff we can generate a new
+ instruction sequence. That is, both accesses use the same base
+ register and the gap between constant offsets is 4. OPERANDS are
+ the operands found by the peephole matcher; OPERANDS[0,1] are
+ register operands, and OPERANDS[2,3] are the corresponding memory
+ operands. LOAD indicates whether the access is load or store. */
+
+bool
+gen_operands_ldd_std (rtx *operands, bool load, bool commute)
+{
+ int i, gap;
+ HOST_WIDE_INT offsets[2], offset;
+ int nops = 2;
+ rtx cur_base, cur_offset, tmp;
+ rtx base = NULL_RTX;
+
+ /* Check that the memory references are immediate offsets from the
+ same base register. Extract the base register, the destination
+ registers, and the corresponding memory offsets. */
+ for (i = 0; i < nops; i++)
+ {
+ if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset))
+ return false;
+
+ if (i == 0)
+ base = cur_base;
+ else if (REGNO (base) != REGNO (cur_base))
+ return false;
+
+ offsets[i] = INTVAL (cur_offset);
+ if (GET_CODE (operands[i]) == SUBREG)
+ {
+ tmp = SUBREG_REG (operands[i]);
+ gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
+ operands[i] = tmp;
+ }
+ }
+
+ /* Make sure there is no dependency between the individual loads. */
+ if (load && REGNO (operands[0]) == REGNO (base))
+ return false; /* RAW. */
+
+ if (load && REGNO (operands[0]) == REGNO (operands[1]))
+ return false; /* WAW. */
+
+ /* Make sure the instructions are ordered with lower memory access first. */
+ if (offsets[0] > offsets[1])
+ {
+ gap = offsets[0] - offsets[1];
+ offset = offsets[1];
+
+ /* Swap the instructions such that lower memory is accessed first. */
+ std::swap (operands[0], operands[1]);
+ std::swap (operands[2], operands[3]);
+ }
+ else
+ {
+ gap = offsets[1] - offsets[0];
+ offset = offsets[0];
+ }
+
+ /* Make sure accesses are to consecutive memory locations. */
+ if (gap != 4)
+ return false;
+
+ /* Make sure we generate legal instructions. */
+ if (operands_ok_ldd_std (operands[0], operands[1], offset))
+ return true;
+
+ if (load && commute)
+ {
+ /* Try reordering registers. */
+ std::swap (operands[0], operands[1]);
+ if (operands_ok_ldd_std (operands[0], operands[1], offset))
+ return true;
+ }
+
+ return false;
+}
+
#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
#define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 1ed230fa5f0..526fd17a0cf 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -6363,6 +6363,75 @@ archs4x, archs4xd, archs4xd_slow"
[(set (reg:CC CC_REG) (compare:CC (match_dup 3)
(ashift:SI (match_dup 1) (match_dup 2))))])
+(define_peephole2 ; std
+ [(set (match_operand:SI 2 "memory_operand" "")
+ (match_operand:SI 0 "register_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_operand:SI 1 "register_operand" ""))]
+ "TARGET_LL64"
+ [(const_int 0)]
+{
+ if (!gen_operands_ldd_std (operands, false, false))
+ FAIL;
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ operands[2] = adjust_address (operands[2], DImode, 0);
+ emit_insn (gen_rtx_SET (operands[2], operands[0]));
+ DONE;
+})
+
+(define_peephole2 ; ldd
+ [(set (match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))]
+ "TARGET_LL64"
+ [(const_int 0)]
+{
+ if (!gen_operands_ldd_std (operands, true, false))
+ FAIL;
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ operands[2] = adjust_address (operands[2], DImode, 0);
+ emit_insn (gen_rtx_SET (operands[0], operands[2]));
+ DONE;
+})
+
+;; We require consecutive registers for LDD instruction. Check if we
+;; can reorder them and use an LDD.
+
+(define_peephole2 ; swap the destination registers of two loads
+ ; before a commutative operation.
+ [(set (match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))
+ (set (match_operand:SI 4 "register_operand" "")
+ (match_operator:SI 5 "commutative_operator"
+ [(match_operand 6 "register_operand" "")
+ (match_operand 7 "register_operand" "") ]))]
+ "TARGET_LL64
+ && (((rtx_equal_p (operands[0], operands[6]))
+ && (rtx_equal_p (operands[1], operands[7])))
+ || ((rtx_equal_p (operands[0], operands[7]))
+ && (rtx_equal_p (operands[1], operands[6]))))
+ && (peep2_reg_dead_p (3, operands[0])
+ || rtx_equal_p (operands[0], operands[4]))
+ && (peep2_reg_dead_p (3, operands[1])
+ || rtx_equal_p (operands[1], operands[4]))"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+ {
+ if (!gen_operands_ldd_std (operands, true, true))
+ {
+ FAIL;
+ }
+ else
+ {
+ operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ operands[2] = adjust_address (operands[2], DImode, 0);
+ }
+ }
+)
+
;; include the arc-FPX instructions
(include "fpx.md")
PING. On Wed, 2018-10-31 at 10:33 +0200, claziss@gmail.com wrote: > Thank you for your review. Please find attached a new respin patch > with > your feedback in. > > Please let me know if it is ok, > Claudiu
* claziss@gmail.com <claziss@gmail.com> [2018-10-31 10:33:33 +0200]: > Thank you for your review. Please find attached a new respin patch with > your feedback in. > > Please let me know if it is ok, > Claudiu > From 4ff7d8419783eceeffbaf27df017d0a93c3af942 Mon Sep 17 00:00:00 2001 > From: Claudiu Zissulescu <claziss@gmail.com> > Date: Thu, 9 Aug 2018 14:29:05 +0300 > Subject: [PATCH] [ARC] Add peephole rules to combine store/loads into double > store/loads > > Simple peephole rules which combines multiple ld/st instructions into > 64-bit load/store instructions. It only works for architectures which > are having double load/store option on. > > gcc/ > Claudiu Zissulescu <claziss@synopsys.com> > > * config/arc/arc-protos.h (gen_operands_ldd_std): Add. > * config/arc/arc.c (operands_ok_ldd_std): New function. > (mem_ok_for_ldd_std): Likewise. > (gen_operands_ldd_std): Likewise. > * config/arc/arc.md: Add peephole2 rules for std/ldd. Looks good. Thanks, Andrew > --- > gcc/config/arc/arc-protos.h | 1 + > gcc/config/arc/arc.c | 161 ++++++++++++++++++++++++++++++++++++ > gcc/config/arc/arc.md | 69 ++++++++++++++++ > 3 files changed, 231 insertions(+) > > diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h > index 24bea6e1efb..55f8ed4c643 100644 > --- a/gcc/config/arc/arc-protos.h > +++ b/gcc/config/arc/arc-protos.h > @@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int); > extern unsigned int arc_compute_function_type (struct function *); > extern bool arc_is_uncached_mem_p (rtx); > extern bool arc_lra_p (void); > +extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute); > #endif /* RTX_CODE */ > > extern unsigned int arc_compute_frame_size (int); > diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c > index 18dd0de6af7..daf785dbdb8 100644 > --- a/gcc/config/arc/arc.c > +++ b/gcc/config/arc/arc.c > @@ -10803,6 +10803,167 @@ arc_cannot_substitute_mem_equiv_p (rtx) > return true; > } > > +/* Checks whether the operands are valid for use in an LDD/STD > + instruction. Assumes that RT, and RT2 are REG. This is guaranteed > + by the patterns. Assumes that the address in the base register RN > + is word aligned. Pattern guarantees that both memory accesses use > + the same base register, the offsets are constants within the range, > + and the gap between the offsets is 4. If reload complete then > + check that registers are legal. */ > + > +static bool > +operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset) > +{ > + unsigned int t, t2; > + > + if (!reload_completed) > + return true; > + > + if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03), > + (offset & (GET_MODE_SIZE (DImode) - 1) & 3 > + ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1)))) > + return false; > + > + t = REGNO (rt); > + t2 = REGNO (rt2); > + > + if ((t2 == PROGRAM_COUNTER_REGNO) > + || (t % 2 != 0) /* First destination register is not even. */ > + || (t2 != t + 1)) > + return false; > + > + return true; > +} > + > +/* Helper for gen_operands_ldd_std. Returns true iff the memory > + operand MEM's address contains an immediate offset from the base > + register and has no side effects, in which case it sets BASE and > + OFFSET accordingly. */ > + > +static bool > +mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset) > +{ > + rtx addr; > + > + gcc_assert (base != NULL && offset != NULL); > + > + /* TODO: Handle more general memory operand patterns, such as > + PRE_DEC and PRE_INC. */ > + > + if (side_effects_p (mem)) > + return false; > + > + /* Can't deal with subregs. */ > + if (GET_CODE (mem) == SUBREG) > + return false; > + > + gcc_assert (MEM_P (mem)); > + > + *offset = const0_rtx; > + > + addr = XEXP (mem, 0); > + > + /* If addr isn't valid for DImode, then we can't handle it. */ > + if (!arc_legitimate_address_p (DImode, addr, > + reload_in_progress || reload_completed)) > + return false; > + > + if (REG_P (addr)) > + { > + *base = addr; > + return true; > + } > + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) > + { > + *base = XEXP (addr, 0); > + *offset = XEXP (addr, 1); > + return (REG_P (*base) && CONST_INT_P (*offset)); > + } > + > + return false; > +} > + > +/* Called from peephole2 to replace two word-size accesses with a > + single LDD/STD instruction. Returns true iff we can generate a new > + instruction sequence. That is, both accesses use the same base > + register and the gap between constant offsets is 4. OPERANDS are > + the operands found by the peephole matcher; OPERANDS[0,1] are > + register operands, and OPERANDS[2,3] are the corresponding memory > + operands. LOAD indicates whether the access is load or store. */ > + > +bool > +gen_operands_ldd_std (rtx *operands, bool load, bool commute) > +{ > + int i, gap; > + HOST_WIDE_INT offsets[2], offset; > + int nops = 2; > + rtx cur_base, cur_offset, tmp; > + rtx base = NULL_RTX; > + > + /* Check that the memory references are immediate offsets from the > + same base register. Extract the base register, the destination > + registers, and the corresponding memory offsets. */ > + for (i = 0; i < nops; i++) > + { > + if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset)) > + return false; > + > + if (i == 0) > + base = cur_base; > + else if (REGNO (base) != REGNO (cur_base)) > + return false; > + > + offsets[i] = INTVAL (cur_offset); > + if (GET_CODE (operands[i]) == SUBREG) > + { > + tmp = SUBREG_REG (operands[i]); > + gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp)); > + operands[i] = tmp; > + } > + } > + > + /* Make sure there is no dependency between the individual loads. */ > + if (load && REGNO (operands[0]) == REGNO (base)) > + return false; /* RAW. */ > + > + if (load && REGNO (operands[0]) == REGNO (operands[1])) > + return false; /* WAW. */ > + > + /* Make sure the instructions are ordered with lower memory access first. */ > + if (offsets[0] > offsets[1]) > + { > + gap = offsets[0] - offsets[1]; > + offset = offsets[1]; > + > + /* Swap the instructions such that lower memory is accessed first. */ > + std::swap (operands[0], operands[1]); > + std::swap (operands[2], operands[3]); > + } > + else > + { > + gap = offsets[1] - offsets[0]; > + offset = offsets[0]; > + } > + > + /* Make sure accesses are to consecutive memory locations. */ > + if (gap != 4) > + return false; > + > + /* Make sure we generate legal instructions. */ > + if (operands_ok_ldd_std (operands[0], operands[1], offset)) > + return true; > + > + if (load && commute) > + { > + /* Try reordering registers. */ > + std::swap (operands[0], operands[1]); > + if (operands_ok_ldd_std (operands[0], operands[1], offset)) > + return true; > + } > + > + return false; > +} > + > #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P > #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p > > diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md > index 1ed230fa5f0..526fd17a0cf 100644 > --- a/gcc/config/arc/arc.md > +++ b/gcc/config/arc/arc.md > @@ -6363,6 +6363,75 @@ archs4x, archs4xd, archs4xd_slow" > [(set (reg:CC CC_REG) (compare:CC (match_dup 3) > (ashift:SI (match_dup 1) (match_dup 2))))]) > > +(define_peephole2 ; std > + [(set (match_operand:SI 2 "memory_operand" "") > + (match_operand:SI 0 "register_operand" "")) > + (set (match_operand:SI 3 "memory_operand" "") > + (match_operand:SI 1 "register_operand" ""))] > + "TARGET_LL64" > + [(const_int 0)] > +{ > + if (!gen_operands_ldd_std (operands, false, false)) > + FAIL; > + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); > + operands[2] = adjust_address (operands[2], DImode, 0); > + emit_insn (gen_rtx_SET (operands[2], operands[0])); > + DONE; > +}) > + > +(define_peephole2 ; ldd > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 3 "memory_operand" ""))] > + "TARGET_LL64" > + [(const_int 0)] > +{ > + if (!gen_operands_ldd_std (operands, true, false)) > + FAIL; > + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); > + operands[2] = adjust_address (operands[2], DImode, 0); > + emit_insn (gen_rtx_SET (operands[0], operands[2])); > + DONE; > +}) > + > +;; We require consecutive registers for LDD instruction. Check if we > +;; can reorder them and use an LDD. > + > +(define_peephole2 ; swap the destination registers of two loads > + ; before a commutative operation. > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 2 "memory_operand" "")) > + (set (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 3 "memory_operand" "")) > + (set (match_operand:SI 4 "register_operand" "") > + (match_operator:SI 5 "commutative_operator" > + [(match_operand 6 "register_operand" "") > + (match_operand 7 "register_operand" "") ]))] > + "TARGET_LL64 > + && (((rtx_equal_p (operands[0], operands[6])) > + && (rtx_equal_p (operands[1], operands[7]))) > + || ((rtx_equal_p (operands[0], operands[7])) > + && (rtx_equal_p (operands[1], operands[6])))) > + && (peep2_reg_dead_p (3, operands[0]) > + || rtx_equal_p (operands[0], operands[4])) > + && (peep2_reg_dead_p (3, operands[1]) > + || rtx_equal_p (operands[1], operands[4]))" > + [(set (match_dup 0) (match_dup 2)) > + (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] > + { > + if (!gen_operands_ldd_std (operands, true, true)) > + { > + FAIL; > + } > + else > + { > + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); > + operands[2] = adjust_address (operands[2], DImode, 0); > + } > + } > +) > + > ;; include the arc-FPX instructions > (include "fpx.md") > > -- > 2.17.1 >
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index 24bea6e1efb..55f8ed4c643 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -46,6 +46,7 @@ extern int arc_return_address_register (unsigned int); extern unsigned int arc_compute_function_type (struct function *); extern bool arc_is_uncached_mem_p (rtx); extern bool arc_lra_p (void); +extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute); #endif /* RTX_CODE */ extern unsigned int arc_compute_frame_size (int); diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 18dd0de6af7..9bc69e9fbc9 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -10803,6 +10803,169 @@ arc_cannot_substitute_mem_equiv_p (rtx) return true; } +/* Checks whether the operands are valid for use in an LDD/STD + instruction. Assumes that RT, RT2, and RN are REG. This is + guaranteed by the patterns. Assumes that the address in the base + register RN is word aligned. Pattern guarantees that both memory + accesses use the same base register, the offsets are constants + within the range, and the gap between the offsets is 4. If preload + complete then check that registers are legal. WBACK indicates + whether address is updated. */ + +static bool +operands_ok_ldd_std (rtx rt, rtx rt2, rtx rn ATTRIBUTE_UNUSED, + HOST_WIDE_INT offset) +{ + unsigned int t, t2; + + if (!reload_completed) + return true; + + if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & -4, + (offset & (GET_MODE_SIZE (DImode) - 1) & 3 + ? 0 : -(-GET_MODE_SIZE (DImode) | -4) >> 1)))) + return false; + + t = REGNO (rt); + t2 = REGNO (rt2); + + if ((t2 == 63) + || (t % 2 != 0) /* First destination register is not even. */ + || (t2 != t + 1)) + return false; + + return true; +} + +/* Helper for gen_operands_ldd_std. Returns true iff the memory + operand MEM's address contains an immediate offset from the base + register and has no side effects, in which case it sets BASE and + OFFSET accordingly. */ + +static bool +mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset) +{ + rtx addr; + + gcc_assert (base != NULL && offset != NULL); + + /* TODO: Handle more general memory operand patterns, such as + PRE_DEC and PRE_INC. */ + + if (side_effects_p (mem)) + return false; + + /* Can't deal with subregs. */ + if (GET_CODE (mem) == SUBREG) + return false; + + gcc_assert (MEM_P (mem)); + + *offset = const0_rtx; + + addr = XEXP (mem, 0); + + /* If addr isn't valid for DImode, then we can't handle it. */ + if (!arc_legitimate_address_p (DImode, addr, + reload_in_progress || reload_completed)) + return false; + + if (REG_P (addr)) + { + *base = addr; + return true; + } + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return (REG_P (*base) && CONST_INT_P (*offset)); + } + + return false; +} + +/* Called from peephole2 to replace two word-size accesses with a + single LDD/STD instruction. Returns true iff we can generate a new + instruction sequence. That is, both accesses use the same base + register and the gap between constant offsets is 4. OPERANDS are + the operands found by the peephole matcher; OPERANDS[0,1] are + register operands, and OPERANDS[2,3] are the corresponding memory + operands. LOAD indicates whether the access is load or store. */ + +bool +gen_operands_ldd_std (rtx *operands, bool load, bool commute) +{ + int i, gap; + HOST_WIDE_INT offsets[2], offset; + int nops = 2; + rtx cur_base, cur_offset, tmp; + rtx base = NULL_RTX; + + /* Check that the memory references are immediate offsets from the + same base register. Extract the base register, the destination + registers, and the corresponding memory offsets. */ + for (i = 0; i < nops; i++) + { + if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset)) + return false; + + if (i == 0) + base = cur_base; + else if (REGNO (base) != REGNO (cur_base)) + return false; + + offsets[i] = INTVAL (cur_offset); + if (GET_CODE (operands[i]) == SUBREG) + { + tmp = SUBREG_REG (operands[i]); + gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp)); + operands[i] = tmp; + } + } + + /* Make sure there is no dependency between the individual loads. */ + if (load && REGNO (operands[0]) == REGNO (base)) + return false; /* RAW */ + + if (load && REGNO (operands[0]) == REGNO (operands[1])) + return false; /* WAW */ + + /* Make sure the instructions are ordered with lower memory access first. */ + if (offsets[0] > offsets[1]) + { + gap = offsets[0] - offsets[1]; + offset = offsets[1]; + + /* Swap the instructions such that lower memory is accessed first. */ + std::swap (operands[0], operands[1]); + std::swap (operands[2], operands[3]); + } + else + { + gap = offsets[1] - offsets[0]; + offset = offsets[0]; + } + + /* Make sure accesses are to consecutive memory locations. */ + if (gap != 4) + return false; + + /* Make sure we generate legal instructions. */ + if (operands_ok_ldd_std (operands[0], operands[1], base, offset)) + return true; + + if (load && commute) + { + /* Try reordering registers. */ + std::swap (operands [0], operands[1]); + if (operands_ok_ldd_std (operands[0], operands[1], base, offset)) + return true; + } + + return false; +} + #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 1ed230fa5f0..b968022e64a 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -6363,6 +6363,73 @@ archs4x, archs4xd, archs4xd_slow" [(set (reg:CC CC_REG) (compare:CC (match_dup 3) (ashift:SI (match_dup 1) (match_dup 2))))]) +(define_peephole2 ; std +[(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "register_operand" ""))] + "TARGET_LL64" + [(const_int 0)] +{ + if (!gen_operands_ldd_std (operands, false, false)) + FAIL; + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + emit_insn (gen_rtx_SET (operands[2], operands[0])); + DONE; + }) + +(define_peephole2 ; ldd + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_LL64" + [(const_int 0)] +{ + if (!gen_operands_ldd_std (operands, true, false)) + FAIL; + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + emit_insn (gen_rtx_SET (operands[0], operands[2])); + DONE; +}) + +;; We require consecutive registers for LDD instruction. Check if we +;; can reorder them and use an LDD. + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation. + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 4 "register_operand" "") + (match_operator:SI 5 "commutative_operator" + [(match_operand 6 "register_operand" "") + (match_operand 7 "register_operand" "") ]))] + "TARGET_LL64 + && (((rtx_equal_p(operands[0], operands[6])) + && (rtx_equal_p(operands[1], operands[7]))) + || ((rtx_equal_p(operands[0], operands[7])) + && (rtx_equal_p(operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] + { + if (!gen_operands_ldd_std (operands, true, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + ;; include the arc-FPX instructions (include "fpx.md")