@@ -247,6 +247,8 @@ struct tune_params
performance. The first element covers Thumb state and the second one
is for ARM state. */
bool logical_op_non_short_circuit[2];
+ /* Prefer Neon for 64-bit bitops. */
+ bool prefer_neon_for_64bits;
};
extern const struct tune_params *current_tune;
@@ -816,6 +816,10 @@ int arm_arch_thumb2;
int arm_arch_arm_hwdiv;
int arm_arch_thumb_hwdiv;
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+ than core registers. */
+int prefer_neon_for_64bits = 0;
+
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
we must report the mode of the memory reference from
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
@@ -895,6 +899,7 @@ const struct tune_params arm_slowmul_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
const struct tune_params arm_fastmul_tune =
@@ -908,6 +913,7 @@ const struct tune_params arm_fastmul_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
/* StrongARM has early execution of branches, so a sequence that is worth
@@ -924,6 +930,7 @@ const struct tune_params arm_strongarm_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
const struct tune_params arm_xscale_tune =
@@ -937,6 +944,7 @@ const struct tune_params arm_xscale_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
const struct tune_params arm_9e_tune =
@@ -950,6 +958,7 @@ const struct tune_params arm_9e_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
const struct tune_params arm_v6t2_tune =
@@ -963,6 +972,7 @@ const struct tune_params arm_v6t2_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
@@ -977,6 +987,7 @@ const struct tune_params arm_cortex_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
const struct tune_params arm_cortex_a15_tune =
@@ -990,6 +1001,7 @@ const struct tune_params arm_cortex_a15_tune =
arm_default_branch_cost,
true, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -1006,6 +1018,7 @@ const struct tune_params arm_cortex_a5_tune =
arm_cortex_a5_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
const struct tune_params arm_cortex_a9_tune =
@@ -1019,6 +1032,7 @@ const struct tune_params arm_cortex_a9_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
@@ -1034,6 +1048,7 @@ const struct tune_params arm_v6m_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{false, false}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
const struct tune_params arm_fa726te_tune =
@@ -1047,6 +1062,7 @@ const struct tune_params arm_fa726te_tune =
arm_default_branch_cost,
false, /* Prefer LDRD/STRD. */
{true, true}, /* Prefer non short circuit. */
+ false /* Prefer Neon for 64-bits bitops. */
};
@@ -2077,6 +2093,12 @@ arm_option_override (void)
global_options.x_param_values,
global_options_set.x_param_values);
+ /* Use Neon to perform 64-bits operations rather than core
+ registers. */
+ prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
+ if (use_neon_for_64bits == 1)
+ prefer_neon_for_64bits = true;
+
/* Use the alternative scheduling-pressure algorithm by default. */
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
global_options.x_param_values,
@@ -356,6 +356,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
|| (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
+/* Should NEON be used for 64-bits bitops. */
+#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
+
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
then TARGET_AAPCS_BASED must be true -- but the converse does not
hold. TARGET_BPABI implies the use of the BPABI runtime library,
@@ -541,6 +544,10 @@ extern int arm_arch_arm_hwdiv;
/* Nonzero if chip supports integer division instruction in Thumb mode. */
extern int arm_arch_thumb_hwdiv;
+/* Nonzero if we should use Neon to handle 64-bits operations rather
+ than core registers. */
+extern int prefer_neon_for_64bits;
+
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)
#endif
@@ -202,7 +202,7 @@
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
; arm_arch6. This attribute is used to compute attribute "enabled",
; use type "any" to enable an alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,nota8,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
(const_string "any"))
(define_attr "arch_enabled" "no,yes"
@@ -241,18 +241,18 @@
(eq_attr "tune" "cortexa8"))
(const_string "yes")
- (and (eq_attr "arch" "neon_onlya8")
- (eq_attr "tune" "cortexa8")
- (match_test "TARGET_NEON"))
+ (and (eq_attr "arch" "avoid_neon_for_64bits")
+ (match_test "TARGET_NEON")
+ (not (match_test "TARGET_PREFER_NEON_64BITS")))
(const_string "yes")
(and (eq_attr "arch" "nota8")
(not (eq_attr "tune" "cortexa8")))
(const_string "yes")
- (and (eq_attr "arch" "neon_nota8")
- (not (eq_attr "tune" "cortexa8"))
- (match_test "TARGET_NEON"))
+ (and (eq_attr "arch" "neon_for_64bits")
+ (match_test "TARGET_NEON")
+ (match_test "TARGET_PREFER_NEON_64BITS"))
(const_string "yes")
(and (eq_attr "arch" "iwmmxt2")
@@ -4370,7 +4370,7 @@
[(set_attr "length" "*,8,8,*")
(set_attr "predicable" "no,yes,yes,no")
(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
- (set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
)
(define_expand "one_cmplsi2"
@@ -251,3 +251,7 @@ that may trigger Cortex-M3 errata.
munaligned-access
Target Report Var(unaligned_access) Init(2)
Enable unaligned word and halfword accesses to packed data.
+
+mneon-for-64bits
+Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
+Use Neon to perform 64-bits operations rather than core registers.
@@ -617,7 +617,7 @@
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
(set_attr "conds" "*,clob,clob,*,clob,clob,clob")
(set_attr "length" "*,8,8,*,8,8,8")
- (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
)
(define_insn "*sub<mode>3_neon"
@@ -654,7 +654,7 @@
[(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
(set_attr "conds" "*,clob,clob,clob,*")
(set_attr "length" "*,8,8,8,*")
- (set_attr "arch" "nota8,*,*,*,onlya8")]
+ (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
)
(define_insn "*mul<mode>3_neon"
@@ -816,7 +816,7 @@
}
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
(set_attr "length" "*,*,8,8,*,*")
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
)
;; The concrete forms of the Neon immediate-logic instructions are vbic and
@@ -861,7 +861,7 @@
}
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
(set_attr "length" "*,*,8,8,*,*")
- (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
)
(define_insn "orn<mode>3_neon"
@@ -957,7 +957,7 @@
veor\t%P0, %P1, %P2"
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
(set_attr "length" "*,8,8,*")
- (set_attr "arch" "nota8,*,*,onlya8")]
+ (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
)
(define_insn "one_cmpl<mode>2"
@@ -1279,7 +1279,7 @@
}
DONE;
}"
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,*,*")]
)
@@ -1380,7 +1380,7 @@
DONE;
}"
- [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
(set_attr "opt" "*,*,speed,speed,*,*")]
)