diff mbox

Fix up floatunsv{4,8}siv{4,8}sf2

Message ID 20111104104350.GI1052@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 4, 2011, 10:43 a.m. UTC
On Thu, Nov 03, 2011 at 08:01:21PM +0100, Uros Bizjak wrote:
> On Thu, Nov 3, 2011 at 6:54 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> > So, what do you prefer and do you want the expander to be moved into
> > i386.c or kept like this?  Do we perhaps want for -ffast-math
> > keep the slightly faster, but imprecise version (I'd prefer not to)?
> 
> IMO, let's go with the fastest solution (X313c), but please move
> expander to i386.c.

Here is what I've committed after bootstrapping/regtesting it on
x86_64-linux and i686-linux again.

2011-11-04  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (ix86_expand_vector_convert_uns_vsivsf): New
	function.
	* config/i386/i386-protos.h (ix86_expand_vector_convert_uns_vsivsf):
	New prototype.
	* config/i386/sse.md (floatuns<sseintvecmodelower><mode>2): Use it.
	For floatunsv8siv8sf2 require TARGET_AVX2.

	* gcc.dg/torture/vec-cvt-1.c: Enable commented out inttoflttestui
	test.



	Jakub
diff mbox

Patch

--- gcc/config/i386/i386.c.jj	2011-11-04 07:55:07.000000000 +0100
+++ gcc/config/i386/i386.c	2011-11-04 08:10:24.000000000 +0100
@@ -17016,6 +17016,43 @@  ix86_expand_convert_uns_sisf_sse (rtx ta
     emit_move_insn (target, fp_hi);
 }
 
+/* floatunsv{4,8}siv{4,8}sf2 expander.  Expand code to convert
+   a vector of unsigned ints VAL to vector of floats TARGET.  */
+
+void
+ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
+{
+  rtx tmp[8];
+  REAL_VALUE_TYPE TWO16r;
+  enum machine_mode intmode = GET_MODE (val);
+  enum machine_mode fltmode = GET_MODE (target);
+  rtx (*cvt) (rtx, rtx);
+
+  if (intmode == V4SImode)
+    cvt = gen_floatv4siv4sf2;
+  else
+    cvt = gen_floatv8siv8sf2;
+  tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
+  tmp[0] = force_reg (intmode, tmp[0]);
+  tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
+				OPTAB_DIRECT);
+  tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
+				NULL_RTX, 1, OPTAB_DIRECT);
+  tmp[3] = gen_reg_rtx (fltmode);
+  emit_insn (cvt (tmp[3], tmp[1]));
+  tmp[4] = gen_reg_rtx (fltmode);
+  emit_insn (cvt (tmp[4], tmp[2]));
+  real_ldexp (&TWO16r, &dconst1, 16);
+  tmp[5] = const_double_from_real_value (TWO16r, SFmode);
+  tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
+  tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
+				OPTAB_DIRECT);
+  tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
+				OPTAB_DIRECT);
+  if (tmp[7] != target)
+    emit_move_insn (target, tmp[7]);
+}
+
 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
    pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
    This is done by doing just signed conversion if < 0x1p31, and otherwise by
--- gcc/config/i386/i386-protos.h.jj	2011-11-04 07:55:07.000000000 +0100
+++ gcc/config/i386/i386-protos.h	2011-11-04 08:10:34.000000000 +0100
@@ -109,6 +109,7 @@  extern void ix86_expand_convert_uns_sixf
 extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
 extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
 extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
+extern void ix86_expand_vector_convert_uns_vsivsf (rtx, rtx);
 extern rtx ix86_expand_adjust_ufix_to_sfix_si (rtx, rtx *);
 extern enum ix86_fpcmp_strategy ix86_fp_comparison_strategy (enum rtx_code);
 extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
--- gcc/config/i386/sse.md.jj	2011-11-04 07:55:07.000000000 +0100
+++ gcc/config/i386/sse.md	2011-11-04 08:10:46.000000000 +0100
@@ -2242,30 +2242,12 @@  (define_insn "float<sseintvecmodelower><
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "floatuns<sseintvecmodelower><mode>2"
-  [(set (match_dup 5)
-	(float:VF1
-	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "")))
-   (set (match_dup 6)
-	(lt:VF1 (match_dup 5) (match_dup 3)))
-   (set (match_dup 7)
-	(and:VF1 (match_dup 6) (match_dup 4)))
-   (set (match_operand:VF1 0 "register_operand" "")
-	(plus:VF1 (match_dup 5) (match_dup 7)))]
-  "TARGET_SSE2"
+  [(match_operand:VF1 0 "register_operand" "")
+   (match_operand:<sseintvecmode> 1 "register_operand" "")]
+  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
 {
-  REAL_VALUE_TYPE TWO32r;
-  rtx x;
-  int i;
-
-  real_ldexp (&TWO32r, &dconst1, 32);
-  x = const_double_from_real_value (TWO32r, SFmode);
-
-  operands[3] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
-  operands[4] = force_reg (<MODE>mode,
-			   ix86_build_const_vector (<MODE>mode, 1, x));
-
-  for (i = 5; i < 8; i++)
-    operands[i] = gen_reg_rtx (<MODE>mode);
+  ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
+  DONE;
 })
 
 (define_insn "avx_cvtps2dq256"
--- gcc/testsuite/gcc.dg/torture/vec-cvt-1.c.jj	2011-11-04 07:55:07.000000000 +0100
+++ gcc/testsuite/gcc.dg/torture/vec-cvt-1.c	2011-11-04 07:56:30.000000000 +0100
@@ -205,7 +205,7 @@  main ()
   inttoflttestsl ();
   inttoflttestuc ();
   inttoflttestus ();
-//  inttoflttestui ();
+  inttoflttestui ();
   inttoflttestul ();
   return 0;
 }