Comments
Patch
2012-02-23 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.md (zero_extend<mode>di2): Add extra alternatives
for NEON registers.
(extend<mode>di2): Likewise.
Prevent extend splitters doing NEON alternatives.
* config/arm/iterators.md (qhs_extenddi_cstr, qhs_zextenddi_cstr):
Adjust constraints to add new alternatives.
* config/arm/neon.md: Add splitters for zero- and sign-extend.
gcc/testsuite/
* gcc.target/arm/neon-extend-1.c: New file.
* gcc.target/arm/neon-extend-2.c: New file.
---
gcc/config/arm/arm.md | 26 +++++++++++++++-----------
gcc/config/arm/iterators.md | 4 ++--
gcc/config/arm/neon.md | 22 ++++++++++++++++++++++
gcc/testsuite/gcc.target/arm/neon-extend-1.c | 13 +++++++++++++
gcc/testsuite/gcc.target/arm/neon-extend-2.c | 13 +++++++++++++
5 files changed, 65 insertions(+), 13 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-1.c
create mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-2.c
@@ -4479,33 +4479,35 @@
;; Zero and sign extension instructions.
(define_insn "zero_extend<mode>di2"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
+ [(set (match_operand:DI 0 "s_register_operand" "=w, r")
(zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
"<qhs_zextenddi_cstr>")))]
"TARGET_32BIT <qhs_zextenddi_cond>"
"#"
- [(set_attr "length" "8")
- (set_attr "ce_count" "2")
- (set_attr "predicable" "yes")]
+ [(set_attr "length" "8,8")
+ (set_attr "ce_count" "2,2")
+ (set_attr "predicable" "yes,yes")]
)
(define_insn "extend<mode>di2"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
+ [(set (match_operand:DI 0 "s_register_operand" "=w,r")
(sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
"<qhs_extenddi_cstr>")))]
"TARGET_32BIT <qhs_sextenddi_cond>"
"#"
- [(set_attr "length" "8")
- (set_attr "ce_count" "2")
- (set_attr "shift" "1")
- (set_attr "predicable" "yes")]
+ [(set_attr "length" "8,8")
+ (set_attr "ce_count" "2,2")
+ (set_attr "shift" "1,1")
+ (set_attr "predicable" "yes,yes")]
)
;; Splits for all extensions to DImode
(define_split
[(set (match_operand:DI 0 "s_register_operand" "")
(zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && (!TARGET_NEON
+ || (reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))))"
[(set (match_dup 0) (match_dup 1))]
{
rtx lo_part = gen_lowpart (SImode, operands[0]);
@@ -4531,7 +4533,9 @@
(define_split
[(set (match_operand:DI 0 "s_register_operand" "")
(sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && (!TARGET_NEON
+ || (reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))))"
[(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
{
rtx lo_part = gen_lowpart (SImode, operands[0]);
@@ -409,8 +409,8 @@
(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
(HI "nonimmediate_operand")
(QI "arm_reg_or_extendqisi_mem_op")])
-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
-(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r,r") (HI "r,rm") (QI "r,rUq")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r,r") (HI "r,rm") (QI "r,rm")])
;; Mode attributes used for fixed-point support.
(define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16")
@@ -5879,3 +5879,25 @@
(const_string "neon_fp_vadd_qqq_vabs_qq"))
(const_string "neon_int_5")))]
)
+
+;; Copy from core-to-neon regs, then extend, not vice-versa
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+ (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
+ {
+ operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+ (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
+ {
+ operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+ })
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+void
+f (unsigned int a)
+{
+ unsigned long long b = a;
+ asm volatile ("@ extended to %0" : : "w" (b));
+}
+
+/* { dg-final { scan-assembler "vdup.32" } } */
+/* { dg-final { scan-assembler "vshr.u64" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+void
+f (int a)
+{
+ long long b = a;
+ asm volatile ("@ extended to %0" : : "w" (b));
+}
+
+/* { dg-final { scan-assembler "vdup.32" } } */
+/* { dg-final { scan-assembler "vshr.s64" } } */
Hi All, This patch converts SImode to DImode extends that also move from core registers to VFP/NEON registers. Currently, the compiler does extends in core registers first, and then does the move. This adds to register pressure, which I would imagine to be a bad thing. If the value is not in a properly aligned register (the first parameter to a register never is) then it also has to move that around also. With my patch, it first moves the SImode value into the NEON register, and then extends it, which uses no extra registers. Zero extend, before and after (assuming the value is passed in r0): mov r2, r0 | vdup.32 d16, r0 movs r3, #0 | vshr.u64 d16, d16, #32 fmdrr d16, r2, r3 | Sign extend: mov r2, r0 | vdup.32 d16, r0 asrs r3, r0, #31 | vshr.s64 d16, d16, #32 fmdrr d16, r2, r3 | OK for 4.8? Andrew P.S. I have experimented with doing zero-extends something like vmov.i64 d7, #0 fmsr s14, r0 But, somehow the immediate load doesn't seem to work, and it limits the target register to VFP_LO_REGS. It's also not possible to load into only s15, so I'm not sure there's any advantage.