diff mbox series

IBM Z: Change vector copysign to use bitwise operations

Message ID 20201008093848.305481-1-iii@linux.ibm.com
State New
Headers show
Series IBM Z: Change vector copysign to use bitwise operations | expand

Commit Message

Ilya Leoshkevich Oct. 8, 2020, 9:38 a.m. UTC
Bootstrapped and regtested on s390x-redhat-linux.  OK for master?

The vector copysign pattern incorrectly assumes that vector
if_then_else operates on bits, not on elements.  This can theoretically
mislead the optimizers.  Fix by changing it to use bitwise operations,
like commit 2930bb321794 ("PR94613: Fix vec_sel builtin for IBM Z") did
for vec_sel builtin.

gcc/ChangeLog:

2020-10-07  Ilya Leoshkevich  <iii@linux.ibm.com>

	* config/s390/s390-protos.h (s390_build_signbit_mask): New
	function.
	* config/s390/s390.c (s390_tointvec): New function.
	(s390_contiguous_bitmask_vector_p): Bitcast the argument to
	an integral mode.
	(s390_expand_vec_init): Do not call
	s390_contiguous_bitmask_vector_p with a scalar argument.
	(s390_build_signbit_mask): New function.
	* config/s390/vector.md (copysign<mode>3): Use bitwise
	operations.
---
 gcc/config/s390/s390-protos.h |  1 +
 gcc/config/s390/s390.c        | 92 ++++++++++++++++++++++++++++++++---
 gcc/config/s390/vector.md     | 31 ++++--------
 3 files changed, 95 insertions(+), 29 deletions(-)

Comments

Andreas Krebbel Oct. 9, 2020, 8:56 a.m. UTC | #1
On 08.10.20 11:38, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  OK for master?
> 
> The vector copysign pattern incorrectly assumes that vector
> if_then_else operates on bits, not on elements.  This can theoretically
> mislead the optimizers.  Fix by changing it to use bitwise operations,
> like commit 2930bb321794 ("PR94613: Fix vec_sel builtin for IBM Z") did
> for vec_sel builtin.
> 
> gcc/ChangeLog:
> 
> 2020-10-07  Ilya Leoshkevich  <iii@linux.ibm.com>
> 
> 	* config/s390/s390-protos.h (s390_build_signbit_mask): New
> 	function.
> 	* config/s390/s390.c (s390_tointvec): New function.
> 	(s390_contiguous_bitmask_vector_p): Bitcast the argument to
> 	an integral mode.
> 	(s390_expand_vec_init): Do not call
> 	s390_contiguous_bitmask_vector_p with a scalar argument.
> 	(s390_build_signbit_mask): New function.
> 	* config/s390/vector.md (copysign<mode>3): Use bitwise
> 	operations.

Couldn't s390_tointvec be implemented/replaced with related_int_vector_mode?

Ok, Thanks!

Andreas

> ---
>  gcc/config/s390/s390-protos.h |  1 +
>  gcc/config/s390/s390.c        | 92 ++++++++++++++++++++++++++++++++---
>  gcc/config/s390/vector.md     | 31 ++++--------
>  3 files changed, 95 insertions(+), 29 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index 6f1bc07db17..029f7289fac 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
>  extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
>  extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
>  extern void s390_expand_vec_init (rtx, rtx);
> +extern rtx s390_build_signbit_mask (machine_mode);
>  extern rtx s390_return_addr_rtx (int, rtx);
>  extern rtx s390_back_chain_rtx (void);
>  extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 93894307d62..554c1adf40a 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -2450,6 +2450,54 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
>    return b;
>  }
>  
> +/* Return the associated integral mode of VEC_MODE.  Must be in sync with
> +   tointvec mode_attr.  */
> +static machine_mode
> +s390_tointvec (machine_mode vec_mode)
> +{
> +  switch (vec_mode)
> +    {
> +    case V1QImode:
> +      return V1QImode;
> +    case V2QImode:
> +      return V2QImode;
> +    case V4QImode:
> +      return V4QImode;
> +    case V8QImode:
> +      return V8QImode;
> +    case V16QImode:
> +      return V16QImode;
> +    case V1HImode:
> +      return V1HImode;
> +    case V2HImode:
> +      return V2HImode;
> +    case V4HImode:
> +      return V4HImode;
> +    case V8HImode:
> +      return V8HImode;
> +    case V1SImode:
> +    case V1SFmode:
> +      return V1SImode;
> +    case V2SImode:
> +    case V2SFmode:
> +      return V2SImode;
> +    case V4SImode:
> +    case V4SFmode:
> +      return V4SImode;
> +    case V1DImode:
> +    case V1DFmode:
> +      return V1DImode;
> +    case V2DImode:
> +    case V2DFmode:
> +      return V2DImode;
> +    case V1TImode:
> +    case V1TFmode:
> +      return V1TImode;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
>  /* Return true if OP contains the same contiguous bitfield in *all*
>     its elements.  START and END can be used to obtain the start and
>     end position of the bitfield.
> @@ -2467,6 +2515,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
>    rtx elt;
>    bool b;
>  
> +  /* Handle floats by bitcasting them to ints.  */
> +  op = gen_lowpart (s390_tointvec (GET_MODE (op)), op);
> +
>    gcc_assert (!!start == !!end);
>    if (!const_vec_duplicate_p (op, &elt)
>        || !CONST_INT_P (elt))
> @@ -6863,15 +6914,16 @@ s390_expand_vec_init (rtx target, rtx vals)
>      }
>  
>    /* Use vector gen mask or vector gen byte mask if possible.  */
> -  if (all_same && all_const_int
> -      && (XVECEXP (vals, 0, 0) == const0_rtx
> -	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
> -					       NULL, NULL)
> -	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
> +  if (all_same && all_const_int)
>      {
> -      emit_insn (gen_rtx_SET (target,
> -			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
> -      return;
> +      rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +      if (XVECEXP (vals, 0, 0) == const0_rtx
> +	  || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
> +	  || s390_bytemask_vector_p (vec, NULL))
> +	{
> +	  emit_insn (gen_rtx_SET (target, vec));
> +	  return;
> +	}
>      }
>  
>    /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
> @@ -6949,6 +7001,30 @@ s390_expand_vec_init (rtx target, rtx vals)
>      }
>  }
>  
> +/* Emit a vector constant that contains 1s in each element's sign bit position
> +   and 0s in other positions.  MODE is the desired constant's mode.  */
> +extern rtx
> +s390_build_signbit_mask (machine_mode mode)
> +{
> +  /* Generate the integral element mask value.  */
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
> +  wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
> +
> +  /* Emit the element mask rtx.  Use gen_lowpart in order to cast the integral
> +     value to the desired mode.  */
> +  machine_mode inner_int_mode = GET_MODE_INNER (s390_tointvec (mode));
> +  rtx mask = immed_wide_int_const (mask_val, inner_int_mode);
> +  mask = gen_lowpart (inner_mode, mask);
> +
> +  /* Emit the vector mask rtx by mode the element mask rtx.  */
> +  int nunits = GET_MODE_NUNITS (mode);
> +  rtvec v = rtvec_alloc (nunits);
> +  for (int i = 0; i < nunits; i++)
> +    RTVEC_ELT (v, i) = mask;
> +  return gen_rtx_CONST_VECTOR (mode, v);
> +}
> +
>  /* Structure to hold the initial parameters for a compare_and_swap operation
>     in HImode and QImode.  */
>  
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 2573b7d980a..9d76118f525 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -126,7 +126,8 @@ (define_mode_attr w [(V1QI "")  (V2QI "")  (V4QI "")  (V8QI "") (V16QI "")
>  		     (V1DI "")  (V2DI "")])
>  
>  ; Resulting mode of a vector comparison.  For floating point modes an
> -; integer vector mode with the same element size is picked.
> +; integer vector mode with the same element size is picked.  Must be in sync
> +; with s390_tointvec function.
>  (define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI "V8QI") (V16QI "V16QI")
>  			    (V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI")
>  			    (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
> @@ -1425,28 +1426,16 @@ (define_insn_and_split "*sminv2df3_vx"
>  
>  ; Vector copysign, implement using vector select
>  (define_expand "copysign<mode>3"
> -  [(set (match_operand:VFT 0 "register_operand" "")
> -	(if_then_else:VFT
> -	 (eq (match_dup 3)
> -	     (match_dup 4))
> -	 (match_operand:VFT 1 "register_operand"  "")
> -	 (match_operand:VFT 2 "register_operand"  "")))]
> +  [(set (match_operand:VFT            0 "register_operand" "")
> +	(ior:VFT
> +	 (and:VFT (match_operand:VFT  2 "register_operand" "")
> +		  (match_dup 3))
> +	 (and:VFT (not:VFT (match_dup 3))
> +		  (match_operand:VFT  1 "register_operand" ""))))]
>    "TARGET_VX"
>  {
> -  int sz = GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
> -  int prec = GET_MODE_PRECISION (GET_MODE_INNER (<tointvec>mode));
> -  wide_int mask_val = wi::shwi (1l << (sz - 1), prec);
> -
> -  rtx mask = gen_reg_rtx (<tointvec>mode);
> -
> -  int nunits = GET_MODE_NUNITS (<tointvec>mode);
> -  rtvec v = rtvec_alloc (nunits);
> -  for (int i = 0; i < nunits; i++)
> -    RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ());
> -
> -  mask = gen_rtx_CONST_VECTOR (<tointvec>mode, v);
> -  operands[3] = force_reg (<tointvec>mode, mask);
> -  operands[4] = CONST0_RTX (<tointvec>mode);
> +  rtx mask = s390_build_signbit_mask (<MODE>mode);
> +  operands[3] = force_reg (<MODE>mode, mask);
>  })
>  
>  ;;
>
diff mbox series

Patch

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 6f1bc07db17..029f7289fac 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -121,6 +121,7 @@  extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
 extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
 extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_init (rtx, rtx);
+extern rtx s390_build_signbit_mask (machine_mode);
 extern rtx s390_return_addr_rtx (int, rtx);
 extern rtx s390_back_chain_rtx (void);
 extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 93894307d62..554c1adf40a 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2450,6 +2450,54 @@  s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
   return b;
 }
 
+/* Return the associated integral mode of VEC_MODE.  Must be in sync with
+   tointvec mode_attr.  */
+static machine_mode
+s390_tointvec (machine_mode vec_mode)
+{
+  switch (vec_mode)
+    {
+    case V1QImode:
+      return V1QImode;
+    case V2QImode:
+      return V2QImode;
+    case V4QImode:
+      return V4QImode;
+    case V8QImode:
+      return V8QImode;
+    case V16QImode:
+      return V16QImode;
+    case V1HImode:
+      return V1HImode;
+    case V2HImode:
+      return V2HImode;
+    case V4HImode:
+      return V4HImode;
+    case V8HImode:
+      return V8HImode;
+    case V1SImode:
+    case V1SFmode:
+      return V1SImode;
+    case V2SImode:
+    case V2SFmode:
+      return V2SImode;
+    case V4SImode:
+    case V4SFmode:
+      return V4SImode;
+    case V1DImode:
+    case V1DFmode:
+      return V1DImode;
+    case V2DImode:
+    case V2DFmode:
+      return V2DImode;
+    case V1TImode:
+    case V1TFmode:
+      return V1TImode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Return true if OP contains the same contiguous bitfield in *all*
    its elements.  START and END can be used to obtain the start and
    end position of the bitfield.
@@ -2467,6 +2515,9 @@  s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
   rtx elt;
   bool b;
 
+  /* Handle floats by bitcasting them to ints.  */
+  op = gen_lowpart (s390_tointvec (GET_MODE (op)), op);
+
   gcc_assert (!!start == !!end);
   if (!const_vec_duplicate_p (op, &elt)
       || !CONST_INT_P (elt))
@@ -6863,15 +6914,16 @@  s390_expand_vec_init (rtx target, rtx vals)
     }
 
   /* Use vector gen mask or vector gen byte mask if possible.  */
-  if (all_same && all_const_int
-      && (XVECEXP (vals, 0, 0) == const0_rtx
-	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
-					       NULL, NULL)
-	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
+  if (all_same && all_const_int)
     {
-      emit_insn (gen_rtx_SET (target,
-			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
-      return;
+      rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+      if (XVECEXP (vals, 0, 0) == const0_rtx
+	  || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
+	  || s390_bytemask_vector_p (vec, NULL))
+	{
+	  emit_insn (gen_rtx_SET (target, vec));
+	  return;
+	}
     }
 
   /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
@@ -6949,6 +7001,30 @@  s390_expand_vec_init (rtx target, rtx vals)
     }
 }
 
+/* Emit a vector constant that contains 1s in each element's sign bit position
+   and 0s in other positions.  MODE is the desired constant's mode.  */
+extern rtx
+s390_build_signbit_mask (machine_mode mode)
+{
+  /* Generate the integral element mask value.  */
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
+  wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
+
+  /* Emit the element mask rtx.  Use gen_lowpart in order to cast the integral
+     value to the desired mode.  */
+  machine_mode inner_int_mode = GET_MODE_INNER (s390_tointvec (mode));
+  rtx mask = immed_wide_int_const (mask_val, inner_int_mode);
+  mask = gen_lowpart (inner_mode, mask);
+
+  /* Emit the vector mask rtx by mode the element mask rtx.  */
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits);
+  for (int i = 0; i < nunits; i++)
+    RTVEC_ELT (v, i) = mask;
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
 /* Structure to hold the initial parameters for a compare_and_swap operation
    in HImode and QImode.  */
 
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 2573b7d980a..9d76118f525 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -126,7 +126,8 @@  (define_mode_attr w [(V1QI "")  (V2QI "")  (V4QI "")  (V8QI "") (V16QI "")
 		     (V1DI "")  (V2DI "")])
 
 ; Resulting mode of a vector comparison.  For floating point modes an
-; integer vector mode with the same element size is picked.
+; integer vector mode with the same element size is picked.  Must be in sync
+; with s390_tointvec function.
 (define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI "V8QI") (V16QI "V16QI")
 			    (V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI")
 			    (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
@@ -1425,28 +1426,16 @@  (define_insn_and_split "*sminv2df3_vx"
 
 ; Vector copysign, implement using vector select
 (define_expand "copysign<mode>3"
-  [(set (match_operand:VFT 0 "register_operand" "")
-	(if_then_else:VFT
-	 (eq (match_dup 3)
-	     (match_dup 4))
-	 (match_operand:VFT 1 "register_operand"  "")
-	 (match_operand:VFT 2 "register_operand"  "")))]
+  [(set (match_operand:VFT            0 "register_operand" "")
+	(ior:VFT
+	 (and:VFT (match_operand:VFT  2 "register_operand" "")
+		  (match_dup 3))
+	 (and:VFT (not:VFT (match_dup 3))
+		  (match_operand:VFT  1 "register_operand" ""))))]
   "TARGET_VX"
 {
-  int sz = GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
-  int prec = GET_MODE_PRECISION (GET_MODE_INNER (<tointvec>mode));
-  wide_int mask_val = wi::shwi (1l << (sz - 1), prec);
-
-  rtx mask = gen_reg_rtx (<tointvec>mode);
-
-  int nunits = GET_MODE_NUNITS (<tointvec>mode);
-  rtvec v = rtvec_alloc (nunits);
-  for (int i = 0; i < nunits; i++)
-    RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ());
-
-  mask = gen_rtx_CONST_VECTOR (<tointvec>mode, v);
-  operands[3] = force_reg (<tointvec>mode, mask);
-  operands[4] = CONST0_RTX (<tointvec>mode);
+  rtx mask = s390_build_signbit_mask (<MODE>mode);
+  operands[3] = force_reg (<MODE>mode, mask);
 })
 
 ;;