diff mbox

[i386] : Fix PR70799, STV pass does not convert DImode shifts

Message ID CAFULd4bzgSVmHtY61rFpaYdZPhcGFY9TffZZ06Pz=W0Kz0p4XQ@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak Nov. 8, 2016, 7:02 p.m. UTC
Hello!

Attached patch converts non-variable DImode shifts to SSE shifts on
32bit targets.

Please note that the patch doesn't convert variable shifts. We can't
just use Qimode register from integer shifts in its SImode to
implement SSE shifts. The bits outside QImode can be non-zero, the
narrowest mode to copy value from integer to SSE register is SImode,
and since SSE shifts truncate for count values outside allowed range,
it is possible to truncate shifted value to zero when using count
register in a wider mode (SImode).

The problem above can be solved by zero-extending the count value from
QImode to SImode first, but since we are saving only *one* shift
operation (out of two), I think this additional operation won't make
the conversion profitable anymore.

The patch also converts only the non-variable counts that would
otherwise perform two shift operations (e.g. shifts > 31 bits would
originally result in one SImode register being zero).

The patch noticeably improves compiled assembly from crypto code in
libgo and from random generators in libgfortran, resulting in longer
STV sequences on 32bit targets.

2016-11-08  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386/i386.c (dimode_scalar_to_vector_candidate_p):
    Handle ASHIFT and LSHIFTRT.
    (dimode_scalar_chain::compute_convert_gain): Ditto.
    (dimode_scalar_chain::convert_insn): Ditto.

testsuite/ChangeLog:

2016-11-08  Uros Bizjak  <ubizjak@gmail.com>

    * gcc.target/i386/pr70799-2.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
diff mbox

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 241929)
+++ config/i386/i386.c	(working copy)
@@ -2805,11 +2805,24 @@  dimode_scalar_to_vector_candidate_p (rtx_insn *ins
 
   switch (GET_CODE (src))
     {
+    case ASHIFT:
+    case LSHIFTRT:
+      /* Consider only non-variable shifts narrower
+	 than general register width.  */
+      if (!(CONST_INT_P (XEXP (src, 1))
+	    && IN_RANGE (INTVAL (XEXP (src, 1)), 0, 31)))
+	return false;
+      break;
+
     case PLUS:
     case MINUS:
     case IOR:
     case XOR:
     case AND:
+      if (!REG_P (XEXP (src, 1))
+	  && !MEM_P (XEXP (src, 1))
+	  && !CONST_INT_P (XEXP (src, 1)))
+	return false;
       break;
 
     case REG:
@@ -2832,11 +2845,6 @@  dimode_scalar_to_vector_candidate_p (rtx_insn *ins
 	  || !REG_P (XEXP (XEXP (src, 0), 0))))
       return false;
 
-  if (!REG_P (XEXP (src, 1))
-      && !MEM_P (XEXP (src, 1))
-      && !CONST_INT_P (XEXP (src, 1)))
-      return false;
-
   if ((GET_MODE (XEXP (src, 0)) != DImode
        && !CONST_INT_P (XEXP (src, 0)))
       || (GET_MODE (XEXP (src, 1)) != DImode
@@ -3387,6 +3395,13 @@  dimode_scalar_chain::compute_convert_gain ()
 	gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
       else if (MEM_P (src) && REG_P (dst))
 	gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
+      else if (GET_CODE (src) == ASHIFT
+	       || GET_CODE (src) == LSHIFTRT)
+	{
+	  gain += ix86_cost->add;
+    	  if (CONST_INT_P (XEXP (src, 0)))
+	    gain -= vector_const_cost (XEXP (src, 0));
+	}
       else if (GET_CODE (src) == PLUS
 	       || GET_CODE (src) == MINUS
 	       || GET_CODE (src) == IOR
@@ -3738,6 +3753,12 @@  dimode_scalar_chain::convert_insn (rtx_insn *insn)
 
   switch (GET_CODE (src))
     {
+    case ASHIFT:
+    case LSHIFTRT:
+      convert_op (&XEXP (src, 0), insn);
+      PUT_MODE (src, V2DImode);
+      break;
+
     case PLUS:
     case MINUS:
     case IOR:
Index: testsuite/gcc.target/i386/pr70799-2.c
===================================================================
--- testsuite/gcc.target/i386/pr70799-2.c	(nonexistent)
+++ testsuite/gcc.target/i386/pr70799-2.c	(working copy)
@@ -0,0 +1,17 @@ 
+/* PR target/pr70799 */
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2 -march=slm -mno-stackrealign" } */
+/* { dg-final { scan-assembler "psllq" } } */
+/* { dg-final { scan-assembler "psrlq" } } */
+
+unsigned long long a, b;
+
+void test1 (void)
+{
+  a = b << 21;
+}
+
+void test2 (void)
+{
+  a = b >> 21;
+}