diff mbox series

rs6000: Remove useless insns fed into lvx/stvx [PR97019]

Message ID ac352a45-8b78-bd10-a224-f97e3fa5db3a@linux.ibm.com
State New
Headers show
Series rs6000: Remove useless insns fed into lvx/stvx [PR97019] | expand

Commit Message

Kewen.Lin Sept. 14, 2020, 9:53 a.m. UTC
Hi,

This patch is to extend the existing function find_alignment_op to
check all defintions of base_reg are AND operations with mask -16B
to force the alignment.  If all are satifised, it passes all AND
operations and instructions in one vector to recombine_lvx_pattern
and recombine_stvx_pattern, they will remove all useless ANDs further.

Bootstrapped/regtested on powerpc64le-linux-gnu P8.

Is it OK for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

	* config/rs6000/rs6000-p8swap.c (insn_rtx_pair_t): New type.
	(find_alignment_op): Adjust to support multiple defintions which
	are all AND operations with the mask -16B.
	(recombine_lvx_pattern): Adjust to handle multiple AND operations
	from find_alignment_op.
	(recombine_stvx_pattern): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr97019.c: New test.

Comments

Segher Boessenkool Sept. 14, 2020, 5:19 p.m. UTC | #1
Hi!

On Mon, Sep 14, 2020 at 05:53:13PM +0800, Kewen.Lin wrote:
> This patch is to extend the existing function find_alignment_op to
> check all defintions of base_reg are AND operations with mask -16B
> to force the alignment.  If all are satifised, it passes all AND
> operations and instructions in one vector to recombine_lvx_pattern
> and recombine_stvx_pattern, they will remove all useless ANDs further.

Great :-)

> 	* config/rs6000/rs6000-p8swap.c (insn_rtx_pair_t): New type.

Please don't do that.  The "first" and "second" are completely
meaningless.  Also,  keeping it separate arrays can very well result in
better machine code, and certainly makes easier to read source code.

> +static bool
> +find_alignment_op (rtx_insn *insn, rtx base_reg,
> +		   vec<insn_rtx_pair_t> *and_insn_vec)

Don't name vecs "_vec" (just keep it "and_insn" here, or sometimes
and_insns is clearer).

> -  rtx and_operation = 0;
> +  rtx and_operation = NULL_RTX;

Don't change code randomly (to something arguably worse, even).

> -      if (and_operation != 0)
> -	break;
> +	  rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
> +	  and_operation = alignment_mask (and_insn);
> +
> +	  /* Stop if we find any one which doesn't align.  */
> +	  if (and_operation == NULL_RTX)
> +	    return false;

No.   if (!and_operation)  would be fine though :-)

> -  return and_operation;
> +  return and_operation != NULL_RTX;

It was fine already.  You can write  !!and_operation  if you want to
explicitly convert it to bool (but that is not helpful usually).

> +  bool all_and_p = find_alignment_op (insn, base_reg, &and_insn_vec);

This is not a predicate.  Do not name it _p please.

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c
> @@ -0,0 +1,79 @@
> +/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */

Why only on LE?  (If there is a reason, the testcase should say; if
there is not, well, it shouldn't say it does then :-) )

Please resend with those things fixed.


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index 3d5dc7d8aae..2de2edeab67 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -183,6 +183,8 @@  class swap_web_entry : public web_entry_base
   unsigned int will_delete : 1;
 };
 
+typedef std::pair<rtx_insn *, rtx> insn_rtx_pair_t;
+
 enum special_handling_values {
   SH_NONE = 0,
   SH_CONST_VECTOR,
@@ -2095,15 +2097,19 @@  alignment_mask (rtx_insn *insn)
   return alignment_with_canonical_addr (SET_SRC (body));
 }
 
-/* Given INSN that's a load or store based at BASE_REG, look for a
-   feeding computation that aligns its address on a 16-byte boundary.
-   Return the rtx and its containing AND_INSN.  */
-static rtx
-find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
+/* Given INSN that's a load or store based at BASE_REG, check if
+   all of its feeding computations align its address on a 16-byte
+   boundary.  If so, return true and put all the computations
+   information with the form of pair {and_operation rtx, and_insn}
+   into AND_INSN_VEC.  Otherwise, return false.  */
+
+static bool
+find_alignment_op (rtx_insn *insn, rtx base_reg,
+		   vec<insn_rtx_pair_t> *and_insn_vec)
 {
   df_ref base_use;
   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
-  rtx and_operation = 0;
+  rtx and_operation = NULL_RTX;
 
   FOR_EACH_INSN_INFO_USE (base_use, insn_info)
     {
@@ -2111,22 +2117,30 @@  find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
 	continue;
 
       struct df_link *base_def_link = DF_REF_CHAIN (base_use);
-      if (!base_def_link || base_def_link->next)
-	break;
+      if (!base_def_link)
+	return false;
 
-      /* With stack-protector code enabled, and possibly in other
-	 circumstances, there may not be an associated insn for 
-	 the def.  */
-      if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
-	break;
+      while (base_def_link)
+	{
+	  /* With stack-protector code enabled, and possibly in other
+	     circumstances, there may not be an associated insn for
+	     the def.  */
+	  if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
+	    return false;
 
-      *and_insn = DF_REF_INSN (base_def_link->ref);
-      and_operation = alignment_mask (*and_insn);
-      if (and_operation != 0)
-	break;
+	  rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
+	  and_operation = alignment_mask (and_insn);
+
+	  /* Stop if we find any one which doesn't align.  */
+	  if (and_operation == NULL_RTX)
+	    return false;
+
+	  and_insn_vec->safe_push (std::make_pair (and_insn, and_operation));
+	  base_def_link = base_def_link->next;
+	}
     }
 
-  return and_operation;
+  return and_operation != NULL_RTX;
 }
 
 struct del_info { bool replace; rtx_insn *replace_insn; };
@@ -2143,10 +2157,10 @@  recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
   rtx mem = XEXP (SET_SRC (body), 0);
   rtx base_reg = XEXP (mem, 0);
 
-  rtx_insn *and_insn;
-  rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
+  auto_vec<insn_rtx_pair_t> and_insn_vec;
+  bool all_and_p = find_alignment_op (insn, base_reg, &and_insn_vec);
 
-  if (and_operation != 0)
+  if (all_and_p)
     {
       df_ref def;
       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
@@ -2168,25 +2182,34 @@  recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
 	  to_delete[INSN_UID (swap_insn)].replace = true;
 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
 
-	  /* However, first we must be sure that we make the
-	     base register from the AND operation available
-	     in case the register has been overwritten.  Copy
-	     the base register to a new pseudo and use that
-	     as the base register of the AND operation in
-	     the new LVX instruction.  */
-	  rtx and_base = XEXP (and_operation, 0);
-	  rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
-	  rtx copy = gen_rtx_SET (new_reg, and_base);
-	  rtx_insn *new_insn = emit_insn_after (copy, and_insn);
-	  set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
-	  df_insn_rescan (new_insn);
-
-	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
-				       XEXP (and_operation, 1));
+	  rtx new_reg = NULL_RTX;
+	  rtx and_op = NULL_RTX;
+	  for (unsigned i = 0; i < and_insn_vec.length (); ++i)
+	    {
+	      /* However, first we must be sure that we make the
+		 base register from the AND operation available
+		 in case the register has been overwritten.  Copy
+		 the base register to a new pseudo and use that
+		 as the base register of the AND operation in
+		 the new LVX instruction.  */
+	      insn_rtx_pair_t and_pair = and_insn_vec[i];
+	      rtx_insn *and_insn = and_pair.first;
+	      and_op = and_pair.second;
+	      rtx and_base = XEXP (and_op, 0);
+	      if (!new_reg)
+		new_reg = gen_reg_rtx (GET_MODE (and_base));
+	      rtx copy = gen_rtx_SET (new_reg, and_base);
+	      rtx_insn *new_insn = emit_insn_after (copy, and_insn);
+	      set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
+	      df_insn_rescan (new_insn);
+	    }
+
+	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg,
+				       XEXP (and_op, 1));
 	  SET_SRC (body) = mem;
 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
 	  df_insn_rescan (insn);
-		  
+
 	  if (dump_file)
 	    fprintf (dump_file, "lvx opportunity found at %d\n",
 		     INSN_UID (insn));
@@ -2205,10 +2228,10 @@  recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
   rtx mem = SET_DEST (body);
   rtx base_reg = XEXP (mem, 0);
 
-  rtx_insn *and_insn;
-  rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
+  auto_vec<insn_rtx_pair_t> and_insn_vec;
+  bool all_and_p = find_alignment_op (insn, base_reg, &and_insn_vec);
 
-  if (and_operation != 0)
+  if (all_and_p)
     {
       rtx src_reg = XEXP (SET_SRC (body), 0);
       df_ref src_use;
@@ -2234,25 +2257,34 @@  recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
 	  to_delete[INSN_UID (swap_insn)].replace = true;
 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
 
-	  /* However, first we must be sure that we make the
-	     base register from the AND operation available
-	     in case the register has been overwritten.  Copy
-	     the base register to a new pseudo and use that
-	     as the base register of the AND operation in
-	     the new STVX instruction.  */
-	  rtx and_base = XEXP (and_operation, 0);
-	  rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
-	  rtx copy = gen_rtx_SET (new_reg, and_base);
-	  rtx_insn *new_insn = emit_insn_after (copy, and_insn);
-	  set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
-	  df_insn_rescan (new_insn);
-
-	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
-				       XEXP (and_operation, 1));
+	  rtx new_reg = NULL_RTX;
+	  rtx and_op = NULL_RTX;
+	  for (unsigned i = 0; i < and_insn_vec.length (); ++i)
+	    {
+	      /* However, first we must be sure that we make the
+		 base register from the AND operation available
+		 in case the register has been overwritten.  Copy
+		 the base register to a new pseudo and use that
+		 as the base register of the AND operation in
+		 the new STVX instruction.  */
+	      insn_rtx_pair_t and_pair = and_insn_vec[i];
+	      rtx_insn *and_insn = and_pair.first;
+	      and_op = and_pair.second;
+	      rtx and_base = XEXP (and_op, 0);
+	      if (!new_reg)
+		new_reg = gen_reg_rtx (GET_MODE (and_base));
+	      rtx copy = gen_rtx_SET (new_reg, and_base);
+	      rtx_insn *new_insn = emit_insn_after (copy, and_insn);
+	      set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
+	      df_insn_rescan (new_insn);
+	    }
+
+	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg,
+				       XEXP (and_op, 1));
 	  SET_SRC (body) = src_reg;
 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
 	  df_insn_rescan (insn);
-		  
+
 	  if (dump_file)
 	    fprintf (dump_file, "stvx opportunity found at %d\n",
 		     INSN_UID (insn));
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97019.c b/gcc/testsuite/gcc.target/powerpc/pr97019.c
new file mode 100644
index 00000000000..d6bb08ad260
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c
@@ -0,0 +1,79 @@ 
+/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* Test there are no useless instructions "rldicr x,y,0,59"
+   to align the addresses for lvx/stvx.  */
+
+extern int a, b, c;
+extern vector unsigned long long ev5, ev6, ev7, ev8;
+extern int dummy (vector unsigned long long);
+
+int test_vec_ld(unsigned char *pe) {
+
+  vector unsigned long long v1, v2, v3, v4, v9;
+  vector unsigned long long v5 = ev5;
+  vector unsigned long long v6 = ev6;
+  vector unsigned long long v7 = ev7;
+  vector unsigned long long v8 = ev8;
+
+  unsigned char *e = pe;
+
+  do {
+    if (a) {
+      v1 = __builtin_vec_ld(16, (unsigned long long *)e);
+      v2 = __builtin_vec_ld(32, (unsigned long long *)e);
+      v3 = __builtin_vec_ld(48, (unsigned long long *)e);
+      e = e + 8;
+      for (int i = 0; i < a; i++) {
+        v4 = v5;
+        v5 = __builtin_crypto_vpmsumd(v1, v6);
+        v6 = __builtin_crypto_vpmsumd(v2, v7);
+        v7 = __builtin_crypto_vpmsumd(v3, v8);
+        e = e + 8;
+      }
+    }
+    v5 = __builtin_vec_ld(16, (unsigned long long *)e);
+    v6 = __builtin_vec_ld(32, (unsigned long long *)e);
+    v7 = __builtin_vec_ld(48, (unsigned long long *)e);
+    if (c)
+      b = 1;
+  } while (b);
+
+  return dummy(v4);
+}
+
+int test_vec_st(unsigned char *pe) {
+
+  vector unsigned long long v1, v2, v3, v4;
+  vector unsigned long long v5 = ev5;
+  vector unsigned long long v6 = ev6;
+  vector unsigned long long v7 = ev7;
+  vector unsigned long long v8 = ev8;
+
+  unsigned char *e = pe;
+
+  do {
+    if (a) {
+      __builtin_vec_st(v1, 16, (unsigned long long *)e);
+      __builtin_vec_st(v2, 32, (unsigned long long *)e);
+      __builtin_vec_st(v3, 48, (unsigned long long *)e);
+      e = e + 8;
+      for (int i = 0; i < a; i++) {
+        v4 = v5;
+        v5 = __builtin_crypto_vpmsumd(v1, v6);
+        v6 = __builtin_crypto_vpmsumd(v2, v7);
+        v7 = __builtin_crypto_vpmsumd(v3, v8);
+        e = e + 8;
+      }
+    }
+    __builtin_vec_st(v5, 16, (unsigned long long *)e);
+    __builtin_vec_st(v6, 32, (unsigned long long *)e);
+    __builtin_vec_st(v7, 48, (unsigned long long *)e);
+    if (c)
+      b = 1;
+  } while (b);
+
+  return dummy(v4);
+}
+
+/* { dg-final { scan-assembler-not "rldicr\[ \t\]+\[0-9\]+,\[0-9\]+,0,59" } } */