diff mbox series

[2/3,Aarch64] Implement Aarch64 SIMD ABI

Message ID 1537456506.24844.14.camel@cavium.com
State New
Headers show
Series [1/3,Aarch64] Implement Aarch64 SIMD ABI | expand

Commit Message

Steve Ellcey Sept. 20, 2018, 3:15 p.m. UTC
This is the second of three Aarch64 patch for SIMD ABI support.  It
defines the TARGET_SIMD_CLONE_* macros so that GCC will recognize
and vectorize loops containing SIMD functions.  It requires that
patch one of the Aarch64 SIMD ABI get checked in first.

This patch has not been fully regression tested yet but is fairly
safe and I am posting it to see if there are any comments on it.

Steve Ellcey
sellcey@cavium.com


2018-09-20  Steve Ellcey  <sellcey@cavium.com>

	* config/aarch64/aarch64.c (cgraph.h): New include.
	(aarch64_simd_clone_compute_vecsize_and_simdlen): New function.
	(aarch64_simd_clone_adjust): Ditto.
	(aarch64_simd_clone_usable): Ditto.
	(TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN): New macro.
	(TARGET_SIMD_CLONE_ADJUST): Ditto.
	(TARGET_SIMD_CLONE_USABLE): Ditto.
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8cc738c..a86f32d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -40,6 +40,7 @@ 
 #include "regs.h"
 #include "emit-rtl.h"
 #include "recog.h"
+#include "cgraph.h"
 #include "diagnostic.h"
 #include "insn-attr.h"
 #include "alias.h"
@@ -17472,6 +17473,131 @@  aarch64_speculation_safe_value (machine_mode mode,
   return result;
 }
 
+/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
+   CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
+   CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
+   or number of vecsize_mangle variants that should be emitted.  */
+
+static int
+aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+					struct cgraph_simd_clone *clonei,
+					tree base_type,
+					int num ATTRIBUTE_UNUSED)
+{
+  int ret = 0;
+
+  if (clonei->simdlen
+      && (clonei->simdlen < 2
+	  || clonei->simdlen > 1024
+	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+    {
+      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		  "unsupported simdlen %d", clonei->simdlen);
+      return 0;
+    }
+
+  tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+  if (TREE_CODE (ret_type) != VOID_TYPE)
+    switch (TYPE_MODE (ret_type))
+      {
+      case E_QImode:
+      case E_HImode:
+      case E_SImode:
+      case E_DImode:
+      case E_SFmode:
+      case E_DFmode:
+      /* case E_SCmode: */
+      /* case E_DCmode: */
+	break;
+      default:
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported return type %qT for simd\n", ret_type);
+	return 0;
+      }
+
+  tree t;
+  for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
+    /* FIXME: Shouldn't we allow such arguments if they are uniform?  */
+    switch (TYPE_MODE (TREE_TYPE (t)))
+      {
+      case E_QImode:
+      case E_HImode:
+      case E_SImode:
+      case E_DImode:
+      case E_SFmode:
+      case E_DFmode:
+      /* case E_SCmode: */
+      /* case E_DCmode: */
+	break;
+      default:
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported argument type %qT for simd\n", TREE_TYPE (t));
+	return 0;
+      }
+
+  if (TARGET_SIMD)
+    {
+    clonei->vecsize_mangle = 'n';
+    clonei->mask_mode = VOIDmode;
+    clonei->vecsize_int = 128;
+    clonei->vecsize_float = 128;
+
+    if (clonei->simdlen == 0)
+      {
+      if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+	clonei->simdlen = clonei->vecsize_int;
+      else
+	clonei->simdlen = clonei->vecsize_float;
+      clonei->simdlen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+      }
+    else if (clonei->simdlen > 16)
+      {
+      /* If it is possible for given SIMDLEN to pass CTYPE value in
+	 registers (v0-v7) accept that SIMDLEN, otherwise warn and don't
+	 emit corresponding clone.  */
+      int cnt = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)) * clonei->simdlen;
+      if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+	cnt /= clonei->vecsize_int;
+      else
+	cnt /= clonei->vecsize_float;
+      if (cnt > 8)
+	{
+	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+		    "unsupported simdlen %d", clonei->simdlen);
+	return 0;
+	}
+      }
+      ret = 1;
+    }
+  return ret;
+}
+
+/* Add target attribute to SIMD clone NODE if needed.  */
+
+static void
+aarch64_simd_clone_adjust (struct cgraph_node *node ATTRIBUTE_UNUSED)
+{
+}
+
+/* If SIMD clone NODE can't be used in a vectorized loop
+   in current function, return -1, otherwise return a badness of using it
+   (0 if it is most desirable from vecsize_mangle point of view, 1
+   slightly less desirable, etc.).  */
+
+static int
+aarch64_simd_clone_usable (struct cgraph_node *node)
+{
+  switch (node->simdclone->vecsize_mangle)
+    {
+    case 'n':
+      if (!TARGET_SIMD)
+	return -1;
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -17947,6 +18073,16 @@  aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_SPECULATION_SAFE_VALUE
 #define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
 
+#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
+#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
+  aarch64_simd_clone_compute_vecsize_and_simdlen
+
+#undef TARGET_SIMD_CLONE_ADJUST
+#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust
+
+#undef TARGET_SIMD_CLONE_USABLE
+#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests