2010-12-08 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
* config/arm/arm.c (const_ok_for_op): Add support for movw/addw/subw.
(count_insns_for_constant, find_best_start): Delete functions.
(optimal_immediate_sequence): New function.
(optimal_immediate_sequence_1): New function.
(arm_gen_constant): Remove old movw support.
Move constant splitting code to optimal_immediate_sequence.
Rewrite constant negation/invertion code.
* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
(*arm_subsi3_insn): Add subw support.
(*arm_movsi_insn): Change 'j' constraint to 'ja'.
* config/arm/constraints.md (j): Rename ...
(ja): ... the this.
(jb, jB): New constraints.
* config/arm/thumb2.md (*thumb2_movsi_insn): Change 'j' constraint
to 'ja'.
* config/arm/vfp.md (*arm_movsi_vfp): Likewise.
gcc/testsuite/
* gcc.target/arm/thumb2-replicated-constant1.c: New file.
* gcc.target/arm/thumb2-replicated-constant2.c: New file.
* gcc.target/arm/thumb2-replicated-constant3.c: New file.
* gcc.target/arm/thumb2-replicated-constant4.c: New file.
---
src/gcc-mainline/gcc/config/arm/arm-protos.h | 1
src/gcc-mainline/gcc/config/arm/arm.c | 456 ++++++++++++--------
src/gcc-mainline/gcc/config/arm/arm.md | 35 +-
src/gcc-mainline/gcc/config/arm/constraints.md | 18 +
src/gcc-mainline/gcc/config/arm/thumb2.md | 4
src/gcc-mainline/gcc/config/arm/vfp.md | 8
.../gcc.target/arm/thumb2-replicated-constant1.c | 27 +
.../gcc.target/arm/thumb2-replicated-constant2.c | 75 +++
.../gcc.target/arm/thumb2-replicated-constant3.c | 28 +
.../gcc.target/arm/thumb2-replicated-constant4.c | 22 +
10 files changed, 476 insertions(+), 198 deletions(-)
create mode 100644 src/gcc-mainline/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c
create mode 100644 src/gcc-mainline/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c
create mode 100644 src/gcc-mainline/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c
create mode 100644 src/gcc-mainline/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c
@@ -46,6 +46,7 @@ extern bool arm_vector_mode_supported_p (enum machine_mode);
extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
extern int const_ok_for_arm (HOST_WIDE_INT);
+extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int);
extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
@@ -81,7 +81,6 @@ inline static int thumb1_index_register_rtx_p (rtx, int);
static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
-static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
static rtx emit_sfm (int, int);
static unsigned arm_size_return_regs (void);
static bool arm_assemble_integer (rtx, unsigned int, int);
@@ -129,7 +128,12 @@ static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int);
-static int count_insns_for_constant (HOST_WIDE_INT, int);
+static int optimal_immediate_sequence (enum rtx_code code,
+ unsigned HOST_WIDE_INT val,
+ int return_sequence[]);
+static int optimal_immediate_sequence_1 (enum rtx_code code,
+ unsigned HOST_WIDE_INT val,
+ int return_sequence[], int i);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
static enum machine_mode arm_promote_function_mode (const_tree,
@@ -2426,7 +2430,7 @@ const_ok_for_arm (HOST_WIDE_INT i)
}
/* Return true if I is a valid constant for the operation CODE. */
-static int
+int
const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
{
if (const_ok_for_arm (i))
@@ -2434,7 +2438,21 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
switch (code)
{
+ case SET:
+ /* See if we can use movw. */
+ if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
+ return 1;
+ else
+ return 0;
+
case PLUS:
+ /* See if we can use addw or subw. */
+ if (TARGET_THUMB2
+ && ((i & 0xfffff000) == 0
+ || ((-i) & 0xfffff000) == 0))
+ return 1;
+ /* else fall through. */
+
case COMPARE:
case EQ:
case NE:
@@ -2550,68 +2568,42 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1);
}
-/* Return the number of instructions required to synthesize the given
- constant, if we start emitting them from bit-position I. */
+/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
+ ARM/THUMB2 immediates, and add up to VAL.
+ RETURN_SEQUENCE must be an int[4].
+ Thr function return value gives the number of insns required. */
static int
-count_insns_for_constant (HOST_WIDE_INT remainder, int i)
-{
- HOST_WIDE_INT temp1;
- int step_size = TARGET_ARM ? 2 : 1;
- int num_insns = 0;
-
- gcc_assert (TARGET_ARM || i == 0);
-
- do
- {
- int end;
-
- if (i <= 0)
- i += 32;
- if (remainder & (((1 << step_size) - 1) << (i - step_size)))
- {
- end = i - 8;
- if (end < 0)
- end += 32;
- temp1 = remainder & ((0x0ff << end)
- | ((i < end) ? (0xff >> (32 - end)) : 0));
- remainder &= ~temp1;
- num_insns++;
- i -= 8 - step_size;
- }
- i -= step_size;
- } while (remainder);
- return num_insns;
-}
-
-static int
-find_best_start (unsigned HOST_WIDE_INT remainder)
+optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
+ int return_sequence[])
{
int best_consecutive_zeros = 0;
int i;
int best_start = 0;
+ int insns1, insns2;
+ int tmp_sequence[4];
/* If we aren't targetting ARM, the best place to start is always at
- the bottom. */
- if (! TARGET_ARM)
- return 0;
-
- for (i = 0; i < 32; i += 2)
+ the bottom, otherwise look more closely. */
+ if (TARGET_ARM)
{
- int consecutive_zeros = 0;
-
- if (!(remainder & (3 << i)))
+ for (i = 0; i < 32; i += 2)
{
- while ((i < 32) && !(remainder & (3 << i)))
- {
- consecutive_zeros += 2;
- i += 2;
- }
- if (consecutive_zeros > best_consecutive_zeros)
+ int consecutive_zeros = 0;
+
+ if (!(val & (3 << i)))
{
- best_consecutive_zeros = consecutive_zeros;
- best_start = i - consecutive_zeros;
+ while ((i < 32) && !(val & (3 << i)))
+ {
+ consecutive_zeros += 2;
+ i += 2;
+ }
+ if (consecutive_zeros > best_consecutive_zeros)
+ {
+ best_consecutive_zeros = consecutive_zeros;
+ best_start = i - consecutive_zeros;
+ }
+ i -= 2;
}
- i -= 2;
}
}
@@ -2638,13 +2630,161 @@ find_best_start (unsigned HOST_WIDE_INT remainder)
the constant starting from `best_start', and also starting from
zero (i.e. with bit 31 first to be output). If `best_start' doesn't
yield a shorter sequence, we may as well use zero. */
+ insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
if (best_start != 0
- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
- && (count_insns_for_constant (remainder, 0) <=
- count_insns_for_constant (remainder, best_start)))
- best_start = 0;
+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
+ {
+ insns2 = optimal_immediate_sequence_1 (code, val, tmp_sequence, 0);
+ if (insns2 <= insns1)
+ {
+ memcpy (return_sequence, tmp_sequence, sizeof(tmp_sequence));
+ insns1 = insns2;
+ }
+ }
+
+ return insns1;
+}
+
+/* As for optimal_immediate_sequence, but starting at bit-position I. */
+static int
+optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
+ int return_sequence[], int i)
+{
+ int remainder = val & 0xffffffff;
+ int insns = 0;
+
+ /* Try and find a way of doing the job in either two or three
+ instructions.
+
+ In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
+ location. We start at position I. This may be the MSB, or
+ optimial_immediate_sequence may have positioned it at the largest block
+ of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
+ wrapping around to the top of the word when we drop off the bottom.
+ In the worst case this code should produce no more than four insns.
+
+ In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
+ constants, shifted to any arbitrary location. We should always start
+ at the MSB. */
+ do
+ {
+ int end;
+ int b1, b2, b3, b4;
+ unsigned HOST_WIDE_INT result;
+ int loc;
+
+ gcc_assert (insns < 4);
+
+ if (i <= 0)
+ i += 32;
+
+ /* First, find the next normal 12/8-bit shifted/rotated immediate. */
+ if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
+ {
+ loc = i;
+ if (i <= 12 && TARGET_THUMB2 && code == PLUS)
+ /* We can use addw/subw for the last 12 bits. */
+ result = remainder;
+ else
+ {
+ /* Use an 8-bit shifted/rotated immediate. */
+ end = i - 8;
+ if (end < 0)
+ end += 32;
+ result = remainder & ((0x0ff << end)
+ | ((i < end) ? (0xff >> (32 - end))
+ : 0));
+ i -= 8;
+ }
+ }
+ else
+ {
+ /* Arm allows rotates by a multiple of two. Thumb-2 allows
+ arbitrary shifts. */
+ i -= TARGET_ARM ? 2 : 1;
+ continue;
+ }
+
+ /* Next, see if we can do a better job with a thumb2 replicated
+ constant.
+
+ We do it this way around to catch the cases like 0x01F001E0 where
+ two 8-bit immediates would work, but a replicated constant would
+ make it worse.
+
+ TODO: 16-bit constants that don't clear all the bits, but still win.
+ TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
+ if (TARGET_THUMB2)
+ {
+ b1 = (remainder & 0xff000000) >> 24;
+ b2 = (remainder & 0x00ff0000) >> 16;
+ b3 = (remainder & 0x0000ff00) >> 8;
+ b4 = remainder & 0xff;
- return best_start;
+ if (loc > 24)
+ {
+ /* The 8-bit immediate already found clears b1 (and maybe b2),
+ but must leave b3 and b4 alone. */
+
+ /* First try to find a 32-bit replicated constant that clears
+ almost everything. We can assume that we can't do it in one,
+ or else we wouldn't be here. */
+ unsigned int tmp = b1 & b2 & b3 & b4;
+ unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
+ + (tmp << 24);
+ unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
+ + (tmp == b3) + (tmp == b4);
+ if (tmp
+ && (matching_bytes >= 3
+ || (matching_bytes == 2
+ && const_ok_for_op (remainder & ~tmp2, code))))
+ {
+ /* At least 3 of the bytes match, and the fourth has at
+ least as many bits set, or two of the bytes match
+ and it will only require one more insn to finish. */
+ result = tmp2;
+ i = tmp != b1 ? 32
+ : tmp != b2 ? 24
+ : tmp != b3 ? 16
+ : 8;
+ }
+
+ /* Second, try to find a 16-bit replicated constant that can
+ leave three of the bytes clear. If b2 or b4 is already
+ zero, then we can. If the 8-bit from above would not
+ clear b2 anyway, then we still win. */
+ else if (b1 == b3 && (!b2 || !b4
+ || (remainder & 0x00ff0000 & ~result)))
+ {
+ result = remainder & 0xff00ff00;
+ i = 24;
+ }
+ }
+ else if (loc > 16)
+ {
+ /* The 8-bit immediate already found clears b2 (and maybe b3)
+ and we don't get here unless b1 is alredy clear, but it will
+ leave b4 unchanged. */
+
+ /* If we can clear b2 and b4 at once, then we win, since the
+ 8-bits couldn't possibly reach that far. */
+ if (b2 == b4)
+ {
+ result = remainder & 0x00ff00ff;
+ i = 16;
+ }
+ }
+ }
+
+ return_sequence[insns++] = result;
+ remainder &= ~result;
+
+ if (code == SET || code == MINUS)
+ code = PLUS;
+ }
+ while (remainder);
+
+ return insns;
}
/* Emit an instruction with the indicated PATTERN. If COND is
@@ -2661,7 +2801,6 @@ emit_constant_insn (rtx cond, rtx pattern)
/* As above, but extra parameter GENERATE which, if clear, suppresses
RTL generation. */
-/* ??? This needs more work for thumb2. */
static int
arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
@@ -2673,15 +2812,14 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
int final_invert = 0;
int can_negate_initial = 0;
int i;
- int num_bits_set = 0;
int set_sign_bit_copies = 0;
int clear_sign_bit_copies = 0;
int clear_zero_bit_copies = 0;
int set_zero_bit_copies = 0;
- int insns = 0;
+ int insns = 0, neg_insns, inv_insns;
unsigned HOST_WIDE_INT temp1, temp2;
unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
- int step_size = TARGET_ARM ? 2 : 1;
+ int immediates[4], neg_immediates[4], inv_immediates[4];
/* Find out which operations are safe for a given CODE. Also do a quick
check for degenerate cases; these can occur when DImode operations
@@ -2793,9 +2931,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
}
/* If we can do it in one insn get out quickly. */
- if (const_ok_for_arm (val)
- || (can_negate_initial && const_ok_for_arm (-val))
- || (can_invert && const_ok_for_arm (~val)))
+ if (const_ok_for_op (val, code))
{
if (generate)
emit_constant_insn (cond,
@@ -2848,15 +2984,6 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
switch (code)
{
case SET:
- /* See if we can use movw. */
- if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
- {
- if (generate)
- emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
- GEN_INT (val)));
- return 1;
- }
-
/* See if we can do this by sign_extending a constant that is known
to be negative. This is a good, way of doing it, since the shift
may well merge into a subsequent insn. */
@@ -3207,121 +3334,100 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
break;
}
- for (i = 0; i < 32; i++)
- if (remainder & (1 << i))
- num_bits_set++;
+ /* Calculate what the instruction sequences would be if we generated it
+ normally, negated, or inverted. */
+ if (code == AND)
+ /* AND cannot be split into multiple insns, so invert and use BIC. */
+ insns = 99;
+ else
+ insns = optimal_immediate_sequence (code, remainder, immediates);
- if ((code == AND)
- || (code != IOR && can_invert && num_bits_set > 16))
- remainder ^= 0xffffffff;
- else if (code == PLUS && num_bits_set > 16)
- remainder = (-remainder) & 0xffffffff;
+ if (can_negate)
+ neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
+ neg_immediates);
+ else
+ neg_insns = 99;
+
+ if (can_invert)
+ inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
+ inv_immediates);
+ else
+ inv_insns = 99;
- /* For XOR, if more than half the bits are set and there's a sequence
- of more than 8 consecutive ones in the pattern then we can XOR by the
- inverted constant and then invert the final result; this may save an
- instruction and might also lead to the final mvn being merged with
- some other operation. */
- else if (code == XOR && num_bits_set > 16
- && (count_insns_for_constant (remainder ^ 0xffffffff,
- find_best_start
- (remainder ^ 0xffffffff))
- < count_insns_for_constant (remainder,
- find_best_start (remainder))))
+ /* Is the negated immediate sequence more efficient? */
+ if (neg_insns < insns && neg_insns <= inv_insns)
{
- remainder ^= 0xffffffff;
- final_invert = 1;
+ insns = neg_insns;
+ memcpy (immediates, neg_immediates, sizeof (immediates));
}
else
+ can_negate = 0;
+
+ /* Is the inverted immediate sequence more efficient?
+ We must allow for an extra NOT instruction for XOR operations, although
+ there is some chance that the final 'mvn' will get optimized later. */
+ if (inv_insns < insns && (code != XOR || (inv_insns + 1) < insns))
{
- can_invert = 0;
- can_negate = 0;
- }
+ insns = inv_insns;
+ memcpy (immediates, inv_immediates, sizeof (immediates));
- /* Now try and find a way of doing the job in either two or three
- instructions.
- We start by looking for the largest block of zeros that are aligned on
- a 2-bit boundary, we then fill up the temps, wrapping around to the
- top of the word when we drop off the bottom.
- In the worst case this code should produce no more than four insns.
- Thumb-2 constants are shifted, not rotated, so the MSB is always the
- best place to start. */
+ if (code == XOR)
+ final_invert = 1;
+ }
+ else
+ can_invert = 0;
- /* ??? Use thumb2 replicated constants when the high and low halfwords are
- the same. */
- {
- /* Now start emitting the insns. */
- i = find_best_start (remainder);
- do
- {
- int end;
+ /* Now output the chosen sequence as instructions. */
+ if (generate)
+ {
+ for (i = 0; i < insns; i++)
+ {
+ rtx new_src, temp1_rtx;
- if (i <= 0)
- i += 32;
- if (remainder & (3 << (i - 2)))
- {
- end = i - 8;
- if (end < 0)
- end += 32;
- temp1 = remainder & ((0x0ff << end)
- | ((i < end) ? (0xff >> (32 - end)) : 0));
- remainder &= ~temp1;
-
- if (generate)
- {
- rtx new_src, temp1_rtx;
+ temp1 = immediates[i];
- if (code == SET || code == MINUS)
- {
- new_src = (subtargets ? gen_reg_rtx (mode) : target);
- if (can_invert && code != MINUS)
- temp1 = ~temp1;
- }
- else
- {
- if ((final_invert || remainder) && subtargets)
- new_src = gen_reg_rtx (mode);
- else
- new_src = target;
- if (can_invert)
- temp1 = ~temp1;
- else if (can_negate)
- temp1 = -temp1;
- }
+ if (code == SET || code == MINUS)
+ {
+ new_src = (subtargets ? gen_reg_rtx (mode) : target);
+ if (can_invert && code != MINUS)
+ temp1 = ~temp1;
+ }
+ else
+ {
+ if ((final_invert || i < (insns - 1)) && subtargets)
+ new_src = gen_reg_rtx (mode);
+ else
+ new_src = target;
+ if (can_invert)
+ temp1 = ~temp1;
+ else if (can_negate)
+ temp1 = -temp1;
+ }
- temp1 = trunc_int_for_mode (temp1, mode);
- temp1_rtx = GEN_INT (temp1);
+ temp1 = trunc_int_for_mode (temp1, mode);
+ temp1_rtx = GEN_INT (temp1);
- if (code == SET)
- ;
- else if (code == MINUS)
- temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
- else
- temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+ if (code == SET)
+ ;
+ else if (code == MINUS)
+ temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+ else
+ temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
- emit_constant_insn (cond,
- gen_rtx_SET (VOIDmode, new_src,
- temp1_rtx));
- source = new_src;
- }
+ emit_constant_insn (cond,
+ gen_rtx_SET (VOIDmode, new_src,
+ temp1_rtx));
+ source = new_src;
- if (code == SET)
- {
- can_invert = 0;
- code = PLUS;
- }
- else if (code == MINUS)
+ if (code == SET)
+ {
+ can_invert = 0;
code = PLUS;
-
- insns++;
- i -= 8 - step_size;
- }
- /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
- shifts. */
- i -= step_size;
- }
- while (remainder);
- }
+ }
+ else if (code == MINUS)
+ code = PLUS;
+ }
+ }
if (final_invert)
{
@@ -696,21 +696,24 @@
;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will
;; put the duplicated register first, and not try the commutative version.
(define_insn_and_split "*arm_addsi3"
- [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r")
- (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")
- (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
+ (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
+ (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,jb,jb,L, L,jB,jB,?n")))]
"TARGET_32BIT"
"@
add%?\\t%0, %1, %2
add%?\\t%0, %1, %2
add%?\\t%0, %2, %1
+ addw%?\\t%0, %1, %2
+ addw%?\\t%0, %1, %2
sub%?\\t%0, %1, #%n2
sub%?\\t%0, %1, #%n2
+ subw%?\\t%0, %1, #%n2
+ subw%?\\t%0, %1, #%n2
#"
"TARGET_32BIT
&& GET_CODE (operands[2]) == CONST_INT
- && !(const_ok_for_arm (INTVAL (operands[2]))
- || const_ok_for_arm (-INTVAL (operands[2])))
+ && !const_ok_for_op (INTVAL (operands[2]), PLUS)
&& (reload_completed || !arm_eliminable_register (operands[1]))"
[(clobber (const_int 0))]
"
@@ -719,7 +722,7 @@
operands[1], 0);
DONE;
"
- [(set_attr "length" "4,4,4,4,4,16")
+ [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
(set_attr "predicable" "yes")]
)
@@ -1173,27 +1176,31 @@
; ??? Check Thumb-2 split length
(define_insn_and_split "*arm_subsi3_insn"
- [(set (match_operand:SI 0 "s_register_operand" "=r,r,rk,r,r")
- (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n,r")
- (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r,?n")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=r,r,rk,r, k, r,r")
+ (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k, rk,k, ?n,r")
+ (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, jb,jb,r,?n")))]
"TARGET_32BIT"
"@
rsb%?\\t%0, %2, %1
sub%?\\t%0, %1, %2
sub%?\\t%0, %1, %2
+ subw%?\\t%0, %1, %2
+ subw%?\\t%0, %1, %2
#
#"
"&& ((GET_CODE (operands[1]) == CONST_INT
- && !const_ok_for_arm (INTVAL (operands[1])))
+ && !(const_ok_for_arm (INTVAL (operands[1]))
+ || satisfies_constraint_jb (operands[2])))
|| (GET_CODE (operands[2]) == CONST_INT
- && !const_ok_for_arm (INTVAL (operands[2]))))"
+ && !(const_ok_for_arm (INTVAL (operands[2]))
+ || satisfies_constraint_jb (operands[2]))))"
[(clobber (const_int 0))]
"
arm_split_constant (MINUS, SImode, curr_insn,
INTVAL (operands[1]), operands[0], operands[2], 0);
DONE;
"
- [(set_attr "length" "4,4,4,16,16")
+ [(set_attr "length" "4,4,4,4,4,16,16")
(set_attr "predicable" "yes")]
)
@@ -5128,8 +5135,8 @@
)
(define_insn "*arm_movsi_insn"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
- (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, rk,m")
+ (match_operand:SI 1 "general_operand" "rk, I,K,ja,mi,rk"))]
"TARGET_ARM && ! TARGET_IWMMXT
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
&& ( register_operand (operands[0], SImode)
@@ -25,13 +25,13 @@
;; In ARM state, 'l' is an alias for 'r'
;; The following normal constraints have been used:
-;; in ARM/Thumb-2 state: G, H, I, j, J, K, L, M
+;; in ARM/Thumb-2 state: G, H, I, J, K, L, M
;; in Thumb-1 state: I, J, K, L, M, N, O
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, ja, jb, jB
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -67,13 +67,25 @@
(define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS"
"In Thumb state the core registers @code{r8}-@code{r15}.")
-(define_constraint "j"
+(define_constraint "ja"
"A constant suitable for a MOVW instruction. (ARM/Thumb-2)"
(and (match_test "TARGET_32BIT && arm_arch_thumb2")
(ior (match_code "high")
(and (match_code "const_int")
(match_test "(ival & 0xffff0000) == 0")))))
+(define_constraint "jb"
+ "A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
+ (and (match_test "TARGET_THUMB2")
+ (and (match_code "const_int")
+ (match_test "(ival & 0xfffff000) == 0"))))
+
+(define_constraint "jB"
+ "A constant that satisfies the jb constrant if negated."
+ (and (match_test "TARGET_THUMB2")
+ (and (match_code "const_int")
+ (match_test "((-ival) & 0xfffff000) == 0"))))
+
(define_register_constraint "k" "STACK_REG"
"@internal The stack register.")
@@ -165,8 +165,8 @@
;; regs. The high register alternatives are not taken into account when
;; choosing register preferences in order to reflect their expense.
(define_insn "*thumb2_movsi_insn"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m")
- (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l ,*hk,m,*m")
+ (match_operand:SI 1 "general_operand" "rk ,I,K,ja,mi,*mi,l,*hk"))]
"TARGET_THUMB2 && ! TARGET_IWMMXT
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
&& ( register_operand (operands[0], SImode)
@@ -50,8 +50,8 @@
;; ??? For now do not allow loading constants into vfp regs. This causes
;; problems because small constants get converted into adds.
(define_insn "*arm_movsi_vfp"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv")
- (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, rk,m ,*t,r,*t,*t, *Uv")
+ (match_operand:SI 1 "general_operand" "rk, I,K,ja,mi,rk,r,*t,*t,*Uvi,*t"))]
"TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT
&& ( s_register_operand (operands[0], SImode)
|| s_register_operand (operands[1], SImode))"
@@ -90,8 +90,8 @@
;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
;; high/low register alternatives for loads and stores here.
(define_insn "*thumb2_movsi_vfp"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv")
- (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv")
+ (match_operand:SI 1 "general_operand" "rk, I,K,ja,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
"TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
&& ( s_register_operand (operands[0], SImode)
|| s_register_operand (operands[1], SImode))"
new file mode 100644
@@ -0,0 +1,27 @@
+/* Ensure simple replicated constant immediates work. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a + 0xfefefefe;
+}
+
+/* { dg-final { scan-assembler "add.*#-16843010" } } */
+
+int
+foo2 (int a)
+{
+ return a - 0xab00ab00;
+}
+
+/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
+
+int
+foo3 (int a)
+{
+ return a & 0x00cd00cd;
+}
+
+/* { dg-final { scan-assembler "and.*#13435085" } } */
new file mode 100644
@@ -0,0 +1,75 @@
+/* Ensure split constants can use replicated patterns. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a + 0xfe00fe01;
+}
+
+/* { dg-final { scan-assembler "add.*#-33489408" } } */
+/* { dg-final { scan-assembler "add.*#1" } } */
+
+int
+foo2 (int a)
+{
+ return a + 0xdd01dd00;
+}
+
+/* { dg-final { scan-assembler "add.*#-587145984" } } */
+/* { dg-final { scan-assembler "add.*#65536" } } */
+
+int
+foo3 (int a)
+{
+ return a + 0x00443344;
+}
+
+/* { dg-final { scan-assembler "add.*#4456516" } } */
+/* { dg-final { scan-assembler "add.*#13056" } } */
+
+int
+foo4 (int a)
+{
+ return a + 0x77330033;
+}
+
+/* { dg-final { scan-assembler "add.*#1996488704" } } */
+/* { dg-final { scan-assembler "add.*#3342387" } } */
+
+int
+foo5 (int a)
+{
+ return a + 0x11221122;
+}
+
+/* { dg-final { scan-assembler "add.*#285217024" } } */
+/* { dg-final { scan-assembler "add.*#2228258" } } */
+
+int
+foo6 (int a)
+{
+ return a + 0x66666677;
+}
+
+/* { dg-final { scan-assembler "add.*#1717986918" } } */
+/* { dg-final { scan-assembler "add.*#17" } } */
+
+int
+foo7 (int a)
+{
+ return a + 0x99888888;
+}
+
+/* { dg-final { scan-assembler "add.*#-2004318072" } } */
+/* { dg-final { scan-assembler "add.*#285212672" } } */
+
+int
+foo8 (int a)
+{
+ return a + 0xdddddfff;
+}
+
+/* { dg-final { scan-assembler "add.*#-572662307" } } */
+/* { dg-final { scan-assembler "addw.*#546" } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* Ensure negated/inverted replicated constant immediates work. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ return a | 0xffffff00;
+}
+
+/* { dg-final { scan-assembler "orn.*#255" } } */
+
+int
+foo2 (int a)
+{
+ return a & 0xffeeffee;
+}
+
+/* { dg-final { scan-assembler "bic.*#1114129" } } */
+
+int
+foo3 (int a)
+{
+ return a & 0xaaaaaa00;
+}
+
+/* { dg-final { scan-assembler "and.*#-1431655766" } } */
+/* { dg-final { scan-assembler "bic.*#170" } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* Ensure replicated constants don't make things worse. */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+ /* It might be tempting to use 0x01000100, but it wouldn't help. */
+ return a + 0x01f001e0;
+}
+
+/* { dg-final { scan-assembler "add.*#32505856" } } */
+/* { dg-final { scan-assembler "add.*#480" } } */
+
+int
+foo2 (int a)
+{
+ return a + 0x0f100e10;
+}
+
+/* { dg-final { scan-assembler "add.*#252706816" } } */
+/* { dg-final { scan-assembler "add.*#3600" } } */