Patchwork Prefer to use v?{and,or,xor}p[sd] for float vectors (PR target/54716)

login
register
mail settings
Submitter Jakub Jelinek
Date Sept. 27, 2012, 3:24 p.m.
Message ID <20120927152451.GY1787@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/187401/
State New
Headers show

Comments

Jakub Jelinek - Sept. 27, 2012, 3:24 p.m.
Hi!

As discussed in the PR, the only way how to request a vector float/double
logical operation in C/C++ code without intrinsics is by casting to integer
vectors temporarily, but we then generate v?p{and,or,xor} instead of *p[sd].

The following patch changes that if either both of the operands of
vector integer and/or/xor are SUBREGs of the same vector float/double mode,
or one is SUBREG and another one is CONST_VECTOR.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2012-09-27  Jakub Jelinek  <jakub@redhat.com>

	PR target/54716
	* config/i386/predicates.md (nonimmediate_or_const_vector_operand):
	New predicate.
	* config/i386/i386.c (ix86_expand_vector_logical_operator): New
	function.
	* config/i386/i386-protos.h (ix86_expand_vector_logical_operator): New
	prototype.
	* config/i386/sse.md (<code><mode>3 VI logic): Use it.

	* gcc.target/i386/xorps-sse2.c: Remove xfails.


	Jakub
Richard Henderson - Sept. 27, 2012, 4:43 p.m.
On 09/27/2012 08:24 AM, Jakub Jelinek wrote:
> Hi!
> 
> As discussed in the PR, the only way how to request a vector float/double
> logical operation in C/C++ code without intrinsics is by casting to integer
> vectors temporarily, but we then generate v?p{and,or,xor} instead of *p[sd].
> 
> The following patch changes that if either both of the operands of
> vector integer and/or/xor are SUBREGs of the same vector float/double mode,
> or one is SUBREG and another one is CONST_VECTOR.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2012-09-27  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR target/54716
> 	* config/i386/predicates.md (nonimmediate_or_const_vector_operand):
> 	New predicate.
> 	* config/i386/i386.c (ix86_expand_vector_logical_operator): New
> 	function.
> 	* config/i386/i386-protos.h (ix86_expand_vector_logical_operator): New
> 	prototype.
> 	* config/i386/sse.md (<code><mode>3 VI logic): Use it.
> 
> 	* gcc.target/i386/xorps-sse2.c: Remove xfails.

Ok.


r~

Patch

--- gcc/config/i386/predicates.md.jj	2012-09-13 07:54:44.000000000 +0200
+++ gcc/config/i386/predicates.md	2012-09-27 09:56:54.994873237 +0200
@@ -777,6 +777,12 @@  (define_predicate "vector_move_operand"
   (ior (match_operand 0 "nonimmediate_operand")
        (match_operand 0 "const0_operand")))
 
+;; Return true when OP is either nonimmediate operand, or any
+;; CONST_VECTOR.
+(define_predicate "nonimmediate_or_const_vector_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_code "const_vector")))
+
 ;; Return true when OP is nonimmediate or standard SSE constant.
 (define_predicate "nonimmediate_or_sse_const_operand"
   (match_operand 0 "general_operand")
--- gcc/config/i386/i386.c.jj	2012-09-20 09:22:11.000000000 +0200
+++ gcc/config/i386/i386.c	2012-09-27 10:02:47.725786590 +0200
@@ -16490,6 +16490,82 @@  ix86_expand_binary_operator (enum rtx_co
     emit_move_insn (operands[0], dst);
 }
 
+/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
+   the given OPERANDS.  */
+
+void
+ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
+				     rtx operands[])
+{
+  rtx op1 = NULL_RTX, op2 = NULL_RTX;
+  if (GET_CODE (operands[1]) == SUBREG)
+    {
+      op1 = operands[1];
+      op2 = operands[2];
+    }
+  else if (GET_CODE (operands[2]) == SUBREG)
+    {
+      op1 = operands[2];
+      op2 = operands[1];
+    }
+  /* Optimize (__m128i) d | (__m128i) e and similar code
+     when d and e are float vectors into float vector logical
+     insn.  In C/C++ without using intrinsics there is no other way
+     to express vector logical operation on float vectors than
+     to cast them temporarily to integer vectors.  */
+  if (op1
+      && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
+      && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
+      && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
+      && SUBREG_BYTE (op1) == 0
+      && (GET_CODE (op2) == CONST_VECTOR
+	  || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
+	      && SUBREG_BYTE (op2) == 0))
+      && can_create_pseudo_p ())
+    {
+      rtx dst;
+      switch (GET_MODE (SUBREG_REG (op1)))
+	{
+	case V4SFmode:
+	case V8SFmode:
+	case V2DFmode:
+	case V4DFmode:
+	  dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
+	  if (GET_CODE (op2) == CONST_VECTOR)
+	    {
+	      op2 = gen_lowpart (GET_MODE (dst), op2);
+	      op2 = force_reg (GET_MODE (dst), op2);
+	    }
+	  else
+	    {
+	      op1 = operands[1];
+	      op2 = SUBREG_REG (operands[2]);
+	      if (!nonimmediate_operand (op2, GET_MODE (dst)))
+		op2 = force_reg (GET_MODE (dst), op2);
+	    }
+	  op1 = SUBREG_REG (op1);
+	  if (!nonimmediate_operand (op1, GET_MODE (dst)))
+	    op1 = force_reg (GET_MODE (dst), op1);
+	  emit_insn (gen_rtx_SET (VOIDmode, dst,
+				  gen_rtx_fmt_ee (code, GET_MODE (dst),
+						  op1, op2)));
+	  emit_move_insn (operands[0], gen_lowpart (mode, dst));
+	  return;
+	default:
+	  break;
+	}
+    }
+  if (!nonimmediate_operand (operands[1], mode))
+    operands[1] = force_reg (mode, operands[1]);
+  if (!nonimmediate_operand (operands[2], mode))
+    operands[2] = force_reg (mode, operands[2]);
+  ix86_fixup_binary_operands_no_copy (code, mode, operands);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (code, mode, operands[1],
+					  operands[2])));
+}
+
 /* Return TRUE or FALSE depending on whether the binary operator meets the
    appropriate constraints.  */
 
--- gcc/config/i386/sse.md.jj	2012-09-14 14:36:44.000000000 +0200
+++ gcc/config/i386/sse.md	2012-09-27 09:52:47.182318053 +0200
@@ -6264,10 +6264,13 @@  (define_insn "*andnot<mode>3"
 (define_expand "<code><mode>3"
   [(set (match_operand:VI 0 "register_operand")
 	(any_logic:VI
-	  (match_operand:VI 1 "nonimmediate_operand")
-	  (match_operand:VI 2 "nonimmediate_operand")))]
+	  (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
+	  (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
   "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+{
+  ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
+  DONE;
+})
 
 (define_insn "*<code><mode>3"
   [(set (match_operand:VI 0 "register_operand" "=x,x")
--- gcc/config/i386/i386-protos.h.jj	2012-08-17 09:11:13.000000000 +0200
+++ gcc/config/i386/i386-protos.h	2012-09-27 09:53:48.532960733 +0200
@@ -91,6 +91,8 @@  extern void ix86_fixup_binary_operands_n
 						enum machine_mode, rtx[]);
 extern void ix86_expand_binary_operator (enum rtx_code,
 					 enum machine_mode, rtx[]);
+extern void ix86_expand_vector_logical_operator (enum rtx_code,
+						 enum machine_mode, rtx[]);
 extern bool ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
 extern bool ix86_avoid_lea_for_add (rtx, rtx[]);
 extern bool ix86_use_lea_for_mov (rtx, rtx[]);
--- gcc/testsuite/gcc.target/i386/xorps-sse2.c.jj	2010-07-26 11:40:10.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/xorps-sse2.c	2012-09-26 16:48:15.839983806 +0200
@@ -1,8 +1,8 @@ 
 /* Test that we generate xorps when the result is used in FP math.  */
 /* { dg-do compile } */
 /* { dg-options "-O -msse2 -mno-sse3" } */
-/* { dg-final { scan-assembler "xorps\[ \t\]" { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-not "pxor" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler "xorps\[ \t\]" } } */
+/* { dg-final { scan-assembler-not "pxor" } } */
 
 #define vector __attribute__ ((vector_size (16)))