diff mbox

Vectorize conversions directly

Message ID 4D00D204.3080300@ispras.ru
State New
Headers show

Commit Message

Dmitry Plotnikov Dec. 9, 2010, 12:56 p.m. UTC
Thank you for comments!  New patch attached.

On 11/27/2010 12:10 AM, Richard Henderson wrote:
>> +(define_insn "floatv2siv2sf2"
>> +  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
>> +       (fix:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))]
> Wrong rtl code here; s/fix/float/.
Fixed.
>> +(define_insn "floatunsv2siv2sf2"
>> +  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
>> +       (unspec:V2SF [(match_operand:V2SI 1 "s_register_operand" "w")]
>> +                    UNSPEC_FLOATU))]
> Why are you not using the unsigned_float rtl code?
>
Unspecs replaced with unsigned_float and unsigned_fix.

Comments

Richard Henderson Dec. 10, 2010, 3:45 p.m. UTC | #1
On 12/09/2010 04:56 AM, Dmitry Plotnikov wrote:
> 2010-12-09  Dmitry Plotnikov  <dplotnikov@ispras.ru>
> 
> gcc/
> 	* tree-cfg.c (verify_gimple_assign_unary): Allow vector conversions.
> 	* tree-vect-stmts.c (supportable_convert_operation): New function.
> 	  (vectorizable_conversion): Call it.  Change condition and behavior 
> 	  for NONE modifier case.
> 	* tree-vectorizer.h (supportable_convert_operation): New prototype.
> 	* tree.h (VECTOR_INTEGER_TYPE_P): New macro.
> 
> gcc/config/arm/
> 	* neon.md (floatv2siv2sf2): New.
> 	  (floatunsv2siv2sf2): New.
> 	  (fix_truncv2sfv2si2): New.
> 	  (fix_truncunsv2sfv2si2): New.
> 	  (floatv4siv4sf2): New.
> 	  (floatunsv4siv4sf2): New.
> 	  (fix_truncv4sfv4si2): New.
> 	  (fix_truncunsv4sfv4si2): New.
> 	
> gcc/testsuite/
> 	* gcc.target/arm/vect-vcvt.c: New test.
> 	* gcc.target/arm/vect-vcvtq.c: New test.

Patch looks generally ok; I'll let another Richard approve the ARM bits.

> +  /* First check if we can done conversion directly.  */

 ... if we can do the conversion ...

> +++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -fdump-tree-vect-details" } */

Missing dg-final bits for this test.

Neither test requires stdarg.h; remove it.


r~
Richard Henderson Oct. 20, 2011, 4:39 p.m. UTC | #2
On 10/20/2011 09:24 AM, Dmitry Plotnikov wrote:
> gcc/
>     * tree-cfg.c (verify_gimple_assign_unary): Allow vector conversions.
>     * tree-vect-stmts.c (supportable_convert_operation): New function.
>       (vectorizable_conversion): Call it.  Change condition and behavior
>       for NONE modifier case.
>     * tree-vectorizer.h (supportable_convert_operation): New prototype.
>     * tree.h (VECTOR_INTEGER_TYPE_P): New macro.
> 
> gcc/config/arm/
>     * neon.md (floatv2siv2sf2): New.
>       (floatunsv2siv2sf2): New.
>       (fix_truncv2sfv2si2): New.
>       (fix_truncunsv2sfv2si2): New.
>       (floatv4siv4sf2): New.
>       (floatunsv4siv4sf2): New.
>       (fix_truncv4sfv4si2): New.
>       (fix_truncunsv4sfv4si2): New.
>    
> gcc/testsuite/
>     * gcc.target/arm/vect-vcvt.c: New test.
>     * gcc.target/arm/vect-vcvtq.c: New test.
> 
> gcc/testsuite/lib/
>     * target-supports.exp (check_effective_target_vect_intfloat_cvt): True
>       for ARM NEON.
>       (check_effective_target_vect_uintfloat_cvt): Likewise.
>       (check_effective_target_vect_intfloat_cvt): Likewise.
>       (check_effective_target_vect_floatuint_cvt): Likewise.
>       (check_effective_target_vect_floatint_cvt): Likewise.
>       (check_effective_target_vect_extract_even_odd): Likewise.

Please move supportable_convert_operation to optabs.c; eventually
we ought to use can_fix_p/can_float_p.

> +  if (code == FIX_TRUNC_EXPR)
> +    optab1 = (TYPE_UNSIGNED (vectype_out)) ? ufixtrunc_optab : sfixtrunc_optab;
> +  else if (code == FLOAT_EXPR)
> +    optab1 = (TYPE_UNSIGNED (vectype_in)) ? ufloat_optab : sfloat_optab;
> +  
> +  m1 = TYPE_MODE (vectype_in);

Looks like a missing 

	else
	  gcc_unreachable()

there, since there's no check for optab1 != NULL later.

Otherwise the generic parts of the patch look good.
Please get separate approval for the arm portions of the patch.

After the generic parts of the patch goes in I will endevour to adjust the i386
and rs6000 backends to similarly populate the optabs, so that we can remove the
builtin path here.


r~
Ramana Radhakrishnan Oct. 21, 2011, 11:38 a.m. UTC | #3
> Otherwise the generic parts of the patch look good.
> Please get separate approval for the arm portions of the patch.

Is it just me or has no one else seen this patch on the archives at
gcc-patches@. ?


Ramana
diff mbox

Patch

2010-12-09  Dmitry Plotnikov  <dplotnikov@ispras.ru>

gcc/
	* tree-cfg.c (verify_gimple_assign_unary): Allow vector conversions.
	* tree-vect-stmts.c (supportable_convert_operation): New function.
	  (vectorizable_conversion): Call it.  Change condition and behavior 
	  for NONE modifier case.
	* tree-vectorizer.h (supportable_convert_operation): New prototype.
	* tree.h (VECTOR_INTEGER_TYPE_P): New macro.

gcc/config/arm/
	* neon.md (floatv2siv2sf2): New.
	  (floatunsv2siv2sf2): New.
	  (fix_truncv2sfv2si2): New.
	  (fix_truncunsv2sfv2si2): New.
	  (floatv4siv4sf2): New.
	  (floatunsv4siv4sf2): New.
	  (fix_truncv4sfv4si2): New.
	  (fix_truncunsv4sfv4si2): New.
	
gcc/testsuite/
	* gcc.target/arm/vect-vcvt.c: New test.
	* gcc.target/arm/vect-vcvtq.c: New test.

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 06bbc52..d484060 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3053,6 +3053,62 @@ 
   [(set_attr "neon_type" "neon_bp_simple")]
 )
 
+(define_insn "floatv2siv2sf2"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+       (float:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.s32\t%P0, %P1"
+)
+
+(define_insn "floatunsv2siv2sf2"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+       (unsigned_float:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))] 
+  "TARGET_NEON"
+  "vcvt.f32.u32\t%P0, %P1"
+)
+
+(define_insn "fix_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (fix:V2SI (match_operand:V2SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.s32.f32\t%P0, %P1"
+)
+
+(define_insn "fixuns_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (unsigned_fix:V2SI (match_operand:V2SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.u32.f32\t%P0, %P1"
+)
+
+(define_insn "floatv4siv4sf2"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+       (float:V4SF (match_operand:V4SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.s32\t%q0, %q1"
+)
+
+(define_insn "floatunsv4siv4sf2"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+       (unsigned_float:V4SF (match_operand:V4SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.u32\t%q0, %q1"
+)
+
+(define_insn "fix_truncv4sfv4si2"
+  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
+        (fix:V4SI (match_operand:V4SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.s32.f32\t%q0, %q1"
+)
+
+(define_insn "fixuns_truncv4sfv4si2"
+  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
+        (unsigned_fix:V4SI (match_operand:V4SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.u32.f32\t%q0, %q1"
+)
+
 (define_insn "neon_vcvt<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
 	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index e3ab9d9..6b1fb4f 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3277,7 +3277,9 @@  verify_gimple_assign_unary (gimple stmt)
 
     case FLOAT_EXPR:
       {
-	if (!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+	if ((!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+	    && (!VECTOR_INTEGER_TYPE_P (rhs1_type)
+	        || !VECTOR_FLOAT_TYPE_P(lhs_type)))
 	  {
 	    error ("invalid types in conversion to floating point");
 	    debug_generic_expr (lhs_type);
@@ -3290,7 +3292,9 @@  verify_gimple_assign_unary (gimple stmt)
 
     case FIX_TRUNC_EXPR:
       {
-	if (!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+        if ((!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+            && (!VECTOR_INTEGER_TYPE_P (lhs_type)
+                || !VECTOR_FLOAT_TYPE_P(rhs1_type)))
 	  {
 	    error ("invalid types in conversion to integer");
 	    debug_generic_expr (lhs_type);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index e5bfcbe..bc05c55 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1638,6 +1638,59 @@  vect_gen_widened_results_half (enum tree_code code,
   return new_stmt;
 }
 
+/* Function supportable_convert_operation
+
+   Check whether an operation represented by the code CODE is a
+   convert operation that is supported by the target platform in
+   vector form (i.e., when operating on arguments of type VECTYPE_IN
+   producing a result of type VECTYPE_OUT).
+   
+   Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+   This function checks if these operations are supported
+   by the target platform either directly (via vector tree-codes), or via
+   target builtins.
+   
+   Output:
+   - CODE1 is code of vector operation to be used when
+   vectorizing the operation, if available.
+   - DECL is decl of target builtin functions to be used
+   when vectorizing the operation, if available.  In this case,
+   CODE1 is CALL_EXPR.  */
+
+bool
+supportable_convert_operation (enum tree_code code,
+                                   tree vectype_out, tree vectype_in,
+                                   tree *decl, enum tree_code *code1)
+{
+  enum machine_mode m1,m2;
+  convert_optab optab1 = NULL;
+
+  /* First check if we can done conversion directly.  */
+  if (code == FIX_TRUNC_EXPR)
+    optab1 = (TYPE_UNSIGNED (vectype_out)) ? ufixtrunc_optab : sfixtrunc_optab;
+  else if (code == FLOAT_EXPR)
+    optab1 = (TYPE_UNSIGNED (vectype_in)) ? ufloat_optab : sfloat_optab;
+  
+  m1 = TYPE_MODE (vectype_in);
+  m2 = TYPE_MODE (vectype_out);
+
+  if (convert_optab_handler (optab1, m2, m1) != CODE_FOR_nothing)
+    {
+      *code1 = code;
+      return true;
+    }
+  
+  /* Now check for builtin.  */
+  if (targetm.vectorize.builtin_conversion
+      && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+    {
+      *code1 = CALL_EXPR;
+      *decl = targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in);
+      return true;
+    }
+  return false;
+}
+
 
 /* Check if STMT performs a conversion operation, that can be vectorized.
    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
@@ -1667,7 +1720,6 @@  vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
   tree vectype_out, vectype_in;
   int ncopies, j;
   tree rhs_type;
-  tree builtin_decl;
   enum { NARROW, NONE, WIDEN } modifier;
   int i;
   VEC(tree,heap) *vec_oprnds0 = NULL;
@@ -1756,7 +1808,7 @@  vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
 
   /* Supportable by target?  */
   if ((modifier == NONE
-       && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+       && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
       || (modifier == WIDEN
 	  && !supportable_widening_operation (code, stmt,
 					      vectype_out, vectype_in,
@@ -1806,19 +1858,28 @@  vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
 	  else
 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
 
-	  builtin_decl =
-	    targetm.vectorize.builtin_conversion (code,
-						  vectype_out, vectype_in);
 	  FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
-	    {
-	      /* Arguments are ready. create the new vector stmt.  */
-	      new_stmt = gimple_build_call (builtin_decl, 1, vop0);
-	      new_temp = make_ssa_name (vec_dest, new_stmt);
-	      gimple_call_set_lhs (new_stmt, new_temp);
-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-	      if (slp_node)
-		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
-	    }
+          {
+            /* Arguments are ready, create the new vector stmt.  */
+            if (code1 == CALL_EXPR)
+              {
+                new_stmt = gimple_build_call (decl1, 1, vop0);
+                new_temp = make_ssa_name (vec_dest, new_stmt);
+                gimple_call_set_lhs (new_stmt, new_temp);
+              }
+            else
+              {
+                gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
+                new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
+                                                        NULL);
+                new_temp = make_ssa_name (vec_dest, new_stmt);
+                gimple_assign_set_lhs (new_stmt, new_temp);
+              }
+
+            vect_finish_stmt_generation (stmt, new_stmt, gsi);
+            if (slp_node)
+              VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+          }
 
 	  if (j == 0)
 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b2cc2d1..8d61608 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -799,6 +799,9 @@  extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
                                  bool *, slp_tree, slp_instance);
 extern void vect_remove_stores (gimple);
 extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
+extern bool supportable_convert_operation (enum tree_code, tree, tree,
+                                          tree *, enum tree_code *);
+
 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
                                     tree, int);
 extern void vect_get_load_cost (struct data_reference *, int, bool,
diff --git a/gcc/tree.h b/gcc/tree.h
index 8ba2044..34d3bbc 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1047,6 +1047,13 @@  extern void omp_clause_range_check_failed (const_tree, const char *, int,
   (TREE_CODE (TYPE) == COMPLEX_TYPE	\
    && TREE_CODE (TREE_TYPE (TYPE)) == REAL_TYPE)
 
+/* Nonzero if TYPE represents a vector integer type.  */
+                
+#define VECTOR_INTEGER_TYPE_P(TYPE)                   \
+             (TREE_CODE (TYPE) == VECTOR_TYPE      \
+                 && TREE_CODE (TREE_TYPE (TYPE)) == INTEGER_TYPE)
+
+
 /* Nonzero if TYPE represents a vector floating-point type.  */
 
 #define VECTOR_FLOAT_TYPE_P(TYPE)	\
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
new file mode 100644
index 0000000..f33206c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -fdump-tree-vect-details" } */
+
+#include <stdarg.h>
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+  int i;
+
+  /* int -> float */
+  for (i = 0; i < N; i++)
+    fa[i] = (float) ib[i];
+
+  /* float -> int */
+  for (i = 0; i < N; i++)
+    ia[i] = (int) fa[i];
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
new file mode 100644
index 0000000..3412cf2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -fdump-tree-vect-details -mvectorize-with-neon-quad" } */
+
+#include <stdarg.h>
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+  int i;
+
+  /* int -> float */
+  for (i = 0; i < N; i++)
+    fa[i] = (float) ib[i];
+
+  /* float -> int */
+  for (i = 0; i < N; i++)
+    ia[i] = (int) fa[i];
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */