diff mbox

Fix fixuns_trunc<mode><sseintvecmodelower>2 and vec_pack_ufix_trunc_<mode>

Message ID 20111103211742.GD1052@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 3, 2011, 9:17 p.m. UTC
Hi!

This patch fixes the other bug I've mentioned.  Subtracting 0x1p32
doesn't work in all cases correctly, so this patch changes it to do what we
do for scalar -O2 -m32 -msse2 -mfpmath=sse double -> uint and float -> uint
conversions, in particular subtract just 0x1p31 instead of 0x1p32,
doing the comparison using signalling instead of non-signalling insn
(after all, trying to convert qNaN to unsigned int is undefined behavior)
which means we can emit it even with just -msse2, and at the end xoring
in 0x80000000 into the integer if the float/double was bigger or equal than
0x1p31.

Ok for trunk?

2011-11-03  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (ix86_expand_adjust_ufix_to_sfix_si): Add
	XORP argument.  Subtract 0x1p31 instead of 0x1p32.  Use normal
	signalling comparison instead of non-signalling.  Store into
	*XORP pseudo holding 0x80000000 integers if 0x1p31 has been
	subtracted and 0 otherwise.
	* config/i386/i386-protos.h (ix86_expand_adjust_ufix_to_sfix_si):
	Adjust prototype.
	* config/i386/sse.md (fixuns_trunc<mode><sseintvecmodelower>2): Enable
	already for TARGET_SSE2.  Xor in vector initialized by
	ix86_expand_adjust_ufix_to_sfix_si at the end.
	(vec_pack_ufix_trunc_<mode>): Likewise.

	* gcc.dg/torture/vec-cvt-1.c: Enable flttointtestui test.


	Jakub

Comments

Richard Henderson Nov. 3, 2011, 10:08 p.m. UTC | #1
On 11/03/2011 02:17 PM, Jakub Jelinek wrote:
> 	* config/i386/i386.c (ix86_expand_adjust_ufix_to_sfix_si): Add
> 	XORP argument.  Subtract 0x1p31 instead of 0x1p32.  Use normal
> 	signalling comparison instead of non-signalling.  Store into
> 	*XORP pseudo holding 0x80000000 integers if 0x1p31 has been
> 	subtracted and 0 otherwise.
> 	* config/i386/i386-protos.h (ix86_expand_adjust_ufix_to_sfix_si):
> 	Adjust prototype.
> 	* config/i386/sse.md (fixuns_trunc<mode><sseintvecmodelower>2): Enable
> 	already for TARGET_SSE2.  Xor in vector initialized by
> 	ix86_expand_adjust_ufix_to_sfix_si at the end.
> 	(vec_pack_ufix_trunc_<mode>): Likewise.
> 
> 	* gcc.dg/torture/vec-cvt-1.c: Enable flttointtestui test.

Ok.


r~
diff mbox

Patch

--- gcc/config/i386/i386.c.jj	2011-11-03 17:11:27.000000000 +0100
+++ gcc/config/i386/i386.c	2011-11-03 21:59:21.000000000 +0100
@@ -17018,16 +17018,17 @@  ix86_expand_convert_uns_sisf_sse (rtx ta
 
 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
    pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
-   This is done by subtracting 0x1p32 from VAL if VAL is greater or equal
-   (non-signalling) than 0x1p31.  */
+   This is done by doing just signed conversion if < 0x1p31, and otherwise by
+   subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards.  */
 
 rtx
-ix86_expand_adjust_ufix_to_sfix_si (rtx val)
+ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
 {
-  REAL_VALUE_TYPE MTWO32r, TWO31r;
-  rtx two31r, mtwo32r, tmp[3];
+  REAL_VALUE_TYPE TWO31r;
+  rtx two31r, tmp[4];
   enum machine_mode mode = GET_MODE (val);
   enum machine_mode scalarmode = GET_MODE_INNER (mode);
+  enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
   rtx (*cmp) (rtx, rtx, rtx, rtx);
   int i;
 
@@ -17037,22 +17038,33 @@  ix86_expand_adjust_ufix_to_sfix_si (rtx 
   two31r = const_double_from_real_value (TWO31r, scalarmode);
   two31r = ix86_build_const_vector (mode, 1, two31r);
   two31r = force_reg (mode, two31r);
-  real_ldexp (&MTWO32r, &dconstm1, 32);
-  mtwo32r = const_double_from_real_value (MTWO32r, scalarmode);
-  mtwo32r = ix86_build_const_vector (mode, 1, mtwo32r);
-  mtwo32r = force_reg (mode, mtwo32r);
   switch (mode)
     {
-    case V8SFmode: cmp = gen_avx_cmpv8sf3; break;
-    case V4SFmode: cmp = gen_avx_cmpv4sf3; break;
-    case V4DFmode: cmp = gen_avx_cmpv4df3; break;
-    case V2DFmode: cmp = gen_avx_cmpv2df3; break;
+    case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
+    case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
+    case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
+    case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
     default: gcc_unreachable ();
     }
-  emit_insn (cmp (tmp[0], val, two31r, GEN_INT (29)));
-  tmp[1] = expand_simple_binop (mode, AND, tmp[0], mtwo32r, tmp[1],
+  tmp[3] = gen_rtx_LE (mode, two31r, val);
+  emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
+  tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
 				0, OPTAB_DIRECT);
-  return expand_simple_binop (mode, PLUS, val, tmp[1], tmp[2],
+  if (intmode == V4SImode || TARGET_AVX2)
+    *xorp = expand_simple_binop (intmode, ASHIFT,
+				 gen_lowpart (intmode, tmp[0]),
+				 GEN_INT (31), NULL_RTX, 0,
+				 OPTAB_DIRECT);
+  else
+    {
+      rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
+      two31 = ix86_build_const_vector (intmode, 1, two31);
+      *xorp = expand_simple_binop (intmode, AND,
+				   gen_lowpart (intmode, tmp[0]),
+				   two31, NULL_RTX, 0,
+				   OPTAB_DIRECT);
+    }
+  return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
 			      0, OPTAB_DIRECT);
 }
 
--- gcc/config/i386/i386-protos.h.jj	2011-11-03 16:11:20.000000000 +0100
+++ gcc/config/i386/i386-protos.h	2011-11-03 20:48:48.000000000 +0100
@@ -109,7 +109,7 @@  extern void ix86_expand_convert_uns_sixf
 extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
 extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
 extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
-extern rtx ix86_expand_adjust_ufix_to_sfix_si (rtx);
+extern rtx ix86_expand_adjust_ufix_to_sfix_si (rtx, rtx *);
 extern enum ix86_fpcmp_strategy ix86_fp_comparison_strategy (enum rtx_code);
 extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
 					    rtx[]);
--- gcc/config/i386/sse.md.jj	2011-11-03 17:25:22.000000000 +0100
+++ gcc/config/i386/sse.md	2011-11-03 22:09:54.000000000 +0100
@@ -2330,10 +2330,13 @@  (define_insn "fix_truncv4sfv4si2"
 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
   [(match_operand:<sseintvecmode> 0 "register_operand" "")
    (match_operand:VF1 1 "register_operand" "")]
-  "TARGET_AVX"
+  "TARGET_SSE2"
 {
-  rtx tmp = ix86_expand_adjust_ufix_to_sfix_si (operands[1]);
-  emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+  rtx tmp[3];
+  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+  tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
+  emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
+  emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
   DONE;
 })
 
@@ -3120,12 +3123,29 @@  (define_expand "vec_pack_ufix_trunc_<mod
   [(match_operand:<ssepackfltmode> 0 "register_operand" "")
    (match_operand:VF2 1 "register_operand" "")
    (match_operand:VF2 2 "register_operand" "")]
-  "TARGET_AVX"
+  "TARGET_SSE2"
 {
-  rtx tmp[2];
-  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1]);
-  tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2]);
-  emit_insn (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp[0], tmp[1]));
+  rtx tmp[7];
+  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+  tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
+  tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
+  emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
+  if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
+    {
+      tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
+      ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
+    }
+  else
+    {
+      tmp[5] = gen_reg_rtx (V8SFmode);
+      ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
+					gen_lowpart (V8SFmode, tmp[3]), 0);
+      tmp[5] = gen_lowpart (V8SImode, tmp[5]);
+    }
+  tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
+				operands[0], 0, OPTAB_DIRECT);
+  if (tmp[6] != operands[0])
+    emit_move_insn (operands[0], tmp[6]);
   DONE;
 })
 
--- gcc/testsuite/gcc.dg/torture/vec-cvt-1.c.jj	2011-11-03 17:43:11.000000000 +0100
+++ gcc/testsuite/gcc.dg/torture/vec-cvt-1.c	2011-11-03 21:39:54.000000000 +0100
@@ -197,7 +197,7 @@  main ()
   flttointtestsl ();
   flttointtestuc ();
   flttointtestus ();
-//  flttointtestui ();
+  flttointtestui ();
   flttointtestul ();
   inttoflttestsc ();
   inttoflttestss ();