From patchwork Fri Jan 25 18:22:42 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [ARM,2/5] Update cortex-a7 vfp/neon pipeline description From: Greta Yorsh X-Patchwork-Id: 215822 Message-Id: <000e01cdfb28$f79f0570$e6dd1050$@yorsh@arm.com> To: "GCC Patches" Cc: "Richard Earnshaw" , "Ramana Radhakrishnan" , , , "Greta Yorsh" Date: Fri, 25 Jan 2013 18:22:42 -0000 This patch updates the description of vmul, vdiv, vsqrt, vmla,vmls, vfma, vfms operations for vfp and neon. It uses ffmas and ffmad type attribute introduced by the previous patch. gcc/ 2013-01-03 Greta Yorsh * config/arm/cortex-a7.md (cortex_a7_neon_mul, cortex_a7_neon_mla): New reservations. (cortex_a7_fpfmad): New reservation. (cortex_a7_fpmacs): Use ffmas and update required units. (cortex_a7_fpmuld): Update required units and latency. (cortex_a7_fpmacd): Likewise. (cortex_a7_fdivs, cortex_a7_fdivd): Likewise. (cortex_a7_neon). Likewise. (bypass) Update participating units. diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md index 74d4ca0..ce70576 100644 --- a/gcc/config/arm/cortex-a7.md +++ b/gcc/config/arm/cortex-a7.md @@ -202,6 +202,9 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Floating-point arithmetic. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Neon integer, neon floating point, and single-precision floating +;; point instructions of the same type have the same timing +;; characteristics, but neon instructions cannot dual-issue. (define_insn_reservation "cortex_a7_fpalu" 4 (and (eq_attr "tune" "cortexa7") @@ -229,18 +232,37 @@ (eq_attr "neon_type" "none"))) "cortex_a7_ex1+cortex_a7_fpmul_pipe") -;; For single-precision multiply-accumulate, the add (accumulate) is issued -;; whilst the multiply is in F4. The multiply result can then be forwarded -;; from F5 to F1. The issue unit is only used once (when we first start -;; processing the instruction), but the usage of the FP add pipeline could -;; block other instructions attempting to use it simultaneously. We try to -;; avoid that using cortex_a7_fpadd_pipe. +(define_insn_reservation "cortex_a7_neon_mul" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "neon_type" + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ + neon_mul_ddd_16_scalar_32_16_long_scalar,\ + neon_mul_qqd_32_scalar,\ + neon_fp_vmul_ddd,\ + neon_fp_vmul_qqd")) + "(cortex_a7_both+cortex_a7_fpmul_pipe)*2") (define_insn_reservation "cortex_a7_fpmacs" 8 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "fmacs") + (and (eq_attr "type" "fmacs,ffmas") (eq_attr "neon_type" "none"))) - "cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe") + "cortex_a7_ex1+cortex_a7_fpmul_pipe") + +(define_insn_reservation "cortex_a7_neon_mla" 8 + (and (eq_attr "tune" "cortexa7") + (eq_attr "neon_type" + "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ + neon_mla_qqq_32_qqd_32_scalar,\ + neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vmla_ddd_scalar,\ + neon_fp_vmla_qqq_scalar")) + "cortex_a7_both+cortex_a7_fpmul_pipe") ;; Non-multiply instructions can issue between two cycles of a ;; double-precision multiply. @@ -249,15 +271,19 @@ (and (eq_attr "tune" "cortexa7") (and (eq_attr "type" "fmuld") (eq_attr "neon_type" "none"))) - "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\ - cortex_a7_ex1+cortex_a7_fpmul_pipe") + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") (define_insn_reservation "cortex_a7_fpmacd" 11 (and (eq_attr "tune" "cortexa7") (and (eq_attr "type" "fmacd") (eq_attr "neon_type" "none"))) - "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\ - cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe") + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") + +(define_insn_reservation "cortex_a7_fpfmad" 8 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "ffmad") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Floating-point divide/square root instructions. @@ -267,13 +293,13 @@ (and (eq_attr "tune" "cortexa7") (and (eq_attr "type" "fdivs") (eq_attr "neon_type" "none"))) - "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14") + "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 13") -(define_insn_reservation "cortex_a7_fdivd" 29 +(define_insn_reservation "cortex_a7_fdivd" 31 (and (eq_attr "tune" "cortexa7") (and (eq_attr "type" "fdivd") (eq_attr "neon_type" "none"))) - "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28") + "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 28") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; VFP to/from core transfers. @@ -338,16 +364,36 @@ ;; i.e. a latency of two. (define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd" - "cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\ - cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\ - cortex_a7_f2r") + "cortex_a7_fpalu,\ + cortex_a7_fpmuls,cortex_a7_fpmacs,\ + cortex_a7_fpmuld,cortex_a7_fpmacd, cortex_a7_fpfmad,\ + cortex_a7_fdivs, cortex_a7_fdivd,\ + cortex_a7_f2r") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; NEON load/store. +;; NEON ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Simple modeling for all neon instructions not covered earlier. (define_insn_reservation "cortex_a7_neon" 4 (and (eq_attr "tune" "cortexa7") - (eq_attr "neon_type" "!none")) + (eq_attr "neon_type" + "!none,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ + neon_mla_qqq_32_qqd_32_scalar,\ + neon_mul_ddd_16_scalar_32_16_long_scalar,\ + neon_mul_qqd_32_scalar,\ + neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ + neon_fp_vmul_ddd,\ + neon_fp_vmul_qqd,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vmla_ddd_scalar,\ + neon_fp_vmla_qqq_scalar")) "cortex_a7_both*2")