diff mbox

[AArch64] Vector cost model.

Message ID 51C9C9C4.6080001@arm.com
State New
Headers show

Commit Message

Tejas Belagod June 25, 2013, 4:48 p.m. UTC
Hi,

The attached patch implements a generic vector cost model for aarch64.

Regression tested on aarch64-none-elf.

OK?

Thanks,
Tejas Belagod.
ARM.

2013-06-25  Tejas Belagod  <tejas.belagod@arm.com>

gcc/
	* config/aarch64/aarch64-protos.h (cpu_vector_cost): New.
	(tune_params): New member 'const vec_costs'.
	* config/aarch64/aarch64.c (generic_vector_cost): New.
	(generic_tunings): New member 'generic_vector_cost'.
	(aarch64_builtin_vectorization_cost): New.
	(aarch64_add_stmt_cost): New.
	(TARGET_VECTORIZE_ADD_STMT_COST): New.
	(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.

Comments

Marcus Shawcroft July 2, 2013, 9:15 a.m. UTC | #1
On 25 June 2013 17:48, Tejas Belagod <tbelagod@arm.com> wrote:

> 2013-06-25  Tejas Belagod  <tejas.belagod@arm.com>
>
> gcc/
>         * config/aarch64/aarch64-protos.h (cpu_vector_cost): New.
>         (tune_params): New member 'const vec_costs'.
>         * config/aarch64/aarch64.c (generic_vector_cost): New.
>         (generic_tunings): New member 'generic_vector_cost'.
>         (aarch64_builtin_vectorization_cost): New.
>         (aarch64_add_stmt_cost): New.
>         (TARGET_VECTORIZE_ADD_STMT_COST): New.
>         (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.

OK
/Marcus
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 001842e..28d8bae 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -126,11 +126,34 @@  struct cpu_regmove_cost
   const int FP2FP;
 };
 
+/* Cost for vector insn classes.  */
+struct cpu_vector_cost
+{
+  const int scalar_stmt_cost;		 /* Cost of any scalar operation,
+					    excluding load and store.  */
+  const int scalar_load_cost;		 /* Cost of scalar load.  */
+  const int scalar_store_cost;		 /* Cost of scalar store.  */
+  const int vec_stmt_cost;		 /* Cost of any vector operation,
+					    excluding load, store,
+					    vector-to-scalar and
+					    scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;		 /* Cost of vec-to-scalar operation.  */
+  const int scalar_to_vec_cost;		 /* Cost of scalar-to-vector
+					    operation.  */
+  const int vec_align_load_cost;	 /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost;	 /* Cost of unaligned vector load.  */
+  const int vec_unalign_store_cost;	 /* Cost of unaligned vector store.  */
+  const int vec_store_cost;		 /* Cost of vector store.  */
+  const int cond_taken_branch_cost;	 /* Cost of taken branch.  */
+  const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
+};
+
 struct tune_params
 {
   const struct cpu_rtx_cost_table *const insn_extra_cost;
   const struct cpu_addrcost_table *const addr_cost;
   const struct cpu_regmove_cost *const regmove_cost;
+  const struct cpu_vector_cost *const vec_costs;
   const int memmov_cost;
 };
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d32563d..05eae32 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -45,6 +45,8 @@ 
 #include "gimple.h"
 #include "optabs.h"
 #include "dwarf2.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
 
 /* Classifies an address.
 
@@ -178,6 +180,26 @@  static const struct cpu_regmove_cost generic_regmove_cost =
   NAMED_PARAM (FP2FP, 4)
 };
 
+/* Generic costs for vector insn classes.  */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost generic_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 1),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 1),
+  NAMED_PARAM (vec_to_scalar_cost, 1),
+  NAMED_PARAM (scalar_to_vec_cost, 1),
+  NAMED_PARAM (vec_align_load_cost, 1),
+  NAMED_PARAM (vec_unalign_load_cost, 1),
+  NAMED_PARAM (vec_unalign_store_cost, 1),
+  NAMED_PARAM (vec_store_cost, 1),
+  NAMED_PARAM (cond_taken_branch_cost, 3),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
@@ -186,6 +208,7 @@  static const struct tune_params generic_tunings =
   &generic_rtx_cost_table,
   &generic_addrcost_table,
   &generic_regmove_cost,
+  &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4)
 };
 
@@ -4641,6 +4664,101 @@  aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
   return aarch64_tune_params->memmov_cost;
 }
 
+/* Vectorizer cost model target hooks.  */
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+				    tree vectype,
+				    int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+	return aarch64_tune_params->vec_costs->scalar_stmt_cost;
+
+      case scalar_load:
+	return aarch64_tune_params->vec_costs->scalar_load_cost;
+
+      case scalar_store:
+	return aarch64_tune_params->vec_costs->scalar_store_cost;
+
+      case vector_stmt:
+	return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+      case vector_load:
+	return aarch64_tune_params->vec_costs->vec_align_load_cost;
+
+      case vector_store:
+	return aarch64_tune_params->vec_costs->vec_store_cost;
+
+      case vec_to_scalar:
+	return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
+
+      case scalar_to_vec:
+	return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
+
+      case unaligned_load:
+	return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
+
+      case unaligned_store:
+	return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
+
+      case cond_branch_taken:
+	return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
+
+      case cond_branch_not_taken:
+	return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
+
+      case vec_perm:
+      case vec_promote_demote:
+	return aarch64_tune_params->vec_costs->vec_stmt_cost;
+
+      case vec_construct:
+        elements = TYPE_VECTOR_SUBPARTS (vectype);
+	return elements / 2 + 1;
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+static unsigned
+aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		       struct _stmt_vec_info *stmt_info, int misalign,
+		       enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost =
+	    aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Statements in an inner loop relative to the loop being
+	 vectorized are weighted more heavily.  The value here is
+	 a function (linear for now) of the loop nest level.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+	{
+	  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+	  struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
+	  unsigned nest_level = loop_depth (loop);
+
+	  count *= nest_level;
+	}
+
+      retval = (unsigned) (count * stmt_cost);
+      cost[where] += retval;
+    }
+
+  return retval;
+}
+
 static void initialize_aarch64_code_model (void);
 
 /* Parse the architecture extension string.  */
@@ -8021,6 +8139,13 @@  aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
 
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  aarch64_builtin_vectorization_cost
+
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode