diff mbox series

[i386] Fold __builtin_ia32_shufpd to VEC_PERM_EXPR

Message ID alpine.DEB.2.02.1905191824000.13168@grove.saclay.inria.fr
State New
Headers show
Series [i386] Fold __builtin_ia32_shufpd to VEC_PERM_EXPR | expand

Commit Message

Marc Glisse May 19, 2019, 4:39 p.m. UTC
Hello,

dropping the builtin as early as possible seems like it can only help us 
optimize the code. Jakub suggested in the PR that he liked this approach 
better than using __builtin_shuffle in the header. There is already some 
coverage in the testsuite (as I noticed when I tried to restrict the 
argument to [0, 3]...).

If this one is ok, I may add a few more (say shufps to begin with) later.

Bootstrap+regtest on x86_64-pc-linux-gnu.

2019-05-20  Marc Glisse  <marc.glisse@inria.fr>

 	PR rtl-optimization/43147
 	* config/i386/i386.c (ix86_gimple_fold_builtin): Handle
 	IX86_BUILTIN_SHUFPD.
diff mbox series

Patch

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 271376)
+++ gcc/config/i386/i386.c	(working copy)
@@ -17290,21 +17290,21 @@  ix86_fold_builtin (tree fndecl, int n_ar
 
 bool
 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi);
   tree fndecl = gimple_call_fndecl (stmt);
   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   int n_args = gimple_call_num_args (stmt);
   enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
   tree decl = NULL_TREE;
-  tree arg0, arg1;
+  tree arg0, arg1, arg2;
   enum rtx_code rcode;
   unsigned HOST_WIDE_INT count;
   bool is_vshift;
 
   switch (fn_code)
     {
     case IX86_BUILTIN_TZCNT32:
       decl = builtin_decl_implicit (BUILT_IN_CTZ);
       goto fold_tzcnt_lzcnt;
 
@@ -17594,20 +17594,46 @@  ix86_gimple_fold_builtin (gimple_stmt_it
 	     arithmetic right shift the result is zero.  */
 	  location_t loc = gimple_location (stmt);
 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
 					   build_zero_cst (TREE_TYPE (arg0)));
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	  return true;
 	}
       break;
 
+    case IX86_BUILTIN_SHUFPD:
+      arg2 = gimple_call_arg (stmt, 2);
+      if (TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  location_t loc = gimple_location (stmt);
+	  unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
+	  arg0 = gimple_call_arg (stmt, 0);
+	  arg1 = gimple_call_arg (stmt, 1);
+	  tree itype = long_long_integer_type_node;
+	  tree vtype = build_vector_type (itype, 2); /* V2DI */
+	  tree_vector_builder elts (vtype, 2, 1);
+	  /* Ignore bits other than the lowest 2.  */
+	  elts.quick_push (build_int_cst (itype, imask & 1));
+	  imask >>= 1;
+	  elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
+	  tree omask = elts.build ();
+	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
+					   VEC_PERM_EXPR,
+					   arg0, arg1, omask);
+	  gimple_set_location (g, loc);
+	  gsi_replace (gsi, g, false);
+	  return true;
+	}
+      // Do not error yet, the constant could be propagated later?
+      break;
+
     default:
       break;
     }
 
   return false;
 }
 
 /* Handler for an SVML-style interface to
    a library with vectorized intrinsics.  */