diff mbox

Improve bswap on nop non-base_addr reshuffles (PR tree-optimization/81396)

Message ID 20170713205155.GU2123@tucnak
State New
Headers show

Commit Message

Jakub Jelinek July 13, 2017, 8:51 p.m. UTC
Hi!

As mentioned in the PR, the following testcase started using recently
BIT_FIELD_REFs instead of MEM_REFs and thus the bswap pass, while it
properly determines the very long sequence of stmts is a nop transformation,
throws that away and doesn't optimize it, and no other optimizations
are able to optimize it away.

The patch attempts to not do anything if there is a simple identity
copy, but if the nop reshuffle needs more than one operation, it will
try to replace the final SSA_NAME BIT_IOR_EXPR assignment with assignment
from the src value (typically SSA_NAME).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-07-13  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/81396
	* tree-ssa-math-opts.c (struct symbolic_number): Add n_ops field.
	(init_symbolic_number): Initialize it to 1.
	(perform_symbolic_merge): Add n_ops from both operands into the new
	n_ops.
	(find_bswap_or_nop): Don't consider n->n == cmpnop computations
	without base_addr as useless if they need more than one operation.
	(bswap_replace): Handle !bswap case for NULL base_addr.

	* gcc.dg/tree-ssa/pr81396.c: New test.


	Jakub

Comments

Thomas Preudhomme July 14, 2017, 8:45 a.m. UTC | #1
Hi Jakub,

On 13/07/17 21:51, Jakub Jelinek wrote:
> Hi!
> 
> As mentioned in the PR, the following testcase started using recently
> BIT_FIELD_REFs instead of MEM_REFs and thus the bswap pass, while it
> properly determines the very long sequence of stmts is a nop transformation,
> throws that away and doesn't optimize it, and no other optimizations
> are able to optimize it away.
> 
> The patch attempts to not do anything if there is a simple identity
> copy, but if the nop reshuffle needs more than one operation, it will
> try to replace the final SSA_NAME BIT_IOR_EXPR assignment with assignment
> from the src value (typically SSA_NAME).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2017-07-13  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/81396
> 	* tree-ssa-math-opts.c (struct symbolic_number): Add n_ops field.
> 	(init_symbolic_number): Initialize it to 1.
> 	(perform_symbolic_merge): Add n_ops from both operands into the new
> 	n_ops.
> 	(find_bswap_or_nop): Don't consider n->n == cmpnop computations
> 	without base_addr as useless if they need more than one operation.
> 	(bswap_replace): Handle !bswap case for NULL base_addr.
> 
> 	* gcc.dg/tree-ssa/pr81396.c: New test.
> 
> --- gcc/tree-ssa-math-opts.c.jj	2017-07-06 20:31:43.000000000 +0200
> +++ gcc/tree-ssa-math-opts.c	2017-07-13 19:27:02.985354778 +0200
> @@ -1968,6 +1968,7 @@ struct symbolic_number {
>     tree alias_set;
>     tree vuse;
>     unsigned HOST_WIDE_INT range;
> +  int n_ops;
>   };
>   
>   #define BITS_PER_MARKER 8
> @@ -2083,6 +2084,7 @@ init_symbolic_number (struct symbolic_nu
>       return false;
>     n->range = size;
>     n->n = CMPNOP;
> +  n->n_ops = 1;
>   
>     if (size < 64 / BITS_PER_MARKER)
>       n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
> @@ -2293,6 +2295,7 @@ perform_symbolic_merge (gimple *source_s
>   	return NULL;
>       }
>     n->n = n1->n | n2->n;
> +  n->n_ops = n1->n_ops + n2->n_ops;
>   
>     return source_stmt;
>   }
> @@ -2588,7 +2591,7 @@ find_bswap_or_nop (gimple *stmt, struct
>       return NULL;
>   
>     /* Useless bit manipulation performed by code.  */
> -  if (!n->base_addr && n->n == cmpnop)
> +  if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
>       return NULL;
>   
>     n->range *= BITS_PER_UNIT;
> @@ -2747,6 +2750,36 @@ bswap_replace (gimple *cur_stmt, gimple
>   	}
>         src = val_tmp;
>       }
> +  else if (!bswap)
> +    {

Would it make sense to add an assert right here checking that this is a cmpnop 
operation?

Looks good to me otherwise.

Best regards,

Thomas

> +      gimple *g;
> +      if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
> +	{
> +	  if (!is_gimple_val (src))
> +	    return false;
> +	  g = gimple_build_assign (tgt, NOP_EXPR, src);
> +	}
> +      else
> +	g = gimple_build_assign (tgt, src);
> +      if (n->range == 16)
> +	nop_stats.found_16bit++;
> +      else if (n->range == 32)
> +	nop_stats.found_32bit++;
> +      else
> +	{
> +	  gcc_assert (n->range == 64);
> +	  nop_stats.found_64bit++;
> +	}
> +      if (dump_file)
> +	{
> +	  fprintf (dump_file,
> +		   "%d bit reshuffle in target endianness found at: ",
> +		   (int) n->range);
> +	  print_gimple_stmt (dump_file, cur_stmt, 0);
> +	}
> +      gsi_replace (&gsi, g, true);
> +      return true;
> +    }
>     else if (TREE_CODE (src) == BIT_FIELD_REF)
>       src = TREE_OPERAND (src, 0);
>   
> --- gcc/testsuite/gcc.dg/tree-ssa/pr81396.c.jj	2017-07-13 19:22:10.191954620 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr81396.c	2017-07-13 19:24:16.638399984 +0200
> @@ -0,0 +1,25 @@
> +/* PR tree-optimization/81396 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +typedef unsigned long long uint64_t;
> +
> +uint64_t
> +foo (uint64_t word)
> +{
> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_LONG__ == 8
> +  const unsigned char *const ptr = (const unsigned char *) &word;
> +  return ((uint64_t) ptr[0]
> +	  | ((uint64_t) ptr[1] << 8)
> +	  | ((uint64_t) ptr[2] << (8 * 2))
> +	  | ((uint64_t) ptr[3] << (8 * 3))
> +	  | ((uint64_t) ptr[4] << (8 * 4))
> +	  | ((uint64_t) ptr[5] << (8 * 5))
> +	  | ((uint64_t) ptr[6] << (8 * 6))
> +	  | ((uint64_t) ptr[7] << (8 * 7)));
> +#else
> +  return word;
> +#endif
> +}
> +
> +/* { dg-final { scan-tree-dump "return word_\[0-9]*\\(D\\);" "optimized" } } */
> 
> 	Jakub
>
Jakub Jelinek July 14, 2017, 8:56 a.m. UTC | #2
On Fri, Jul 14, 2017 at 09:45:39AM +0100, Thomas Preudhomme wrote:
> > +  else if (!bswap)
> > +    {
> 
> Would it make sense to add an assert right here checking that this is a
> cmpnop operation?

The earlier bswap_replace code to handle n->base_addr && !bswap doesn't
have anything like that either.

And the assert would be certainly non-trivial, like:
#if ENABLE_ASSERT_CHECKING
  uint64_t cmpnop = CMPNOP;
  /* Note, n->range has been multiplied by BITS_PER_UNIT in between, so it
     needs to be divided by it again.  */
  if (n->range / BITS_PER_UNIT < (int) sizeof (int64_t))
    {
      mask = ((uint64_t) 1 << (n->range / BITS_PER_UNIT * BITS_PER_MARKER)) - 1;
      cmpnop &= mask;
    }
  gcc_assert (n->n == cmpnop);
#endif

I think trusting find_bswap_or_nop did the job right is enough.

	Jakub
Thomas Preudhomme July 14, 2017, 9:11 a.m. UTC | #3
On 14/07/17 09:56, Jakub Jelinek wrote:
> On Fri, Jul 14, 2017 at 09:45:39AM +0100, Thomas Preudhomme wrote:
>>> +  else if (!bswap)
>>> +    {
>>
>> Would it make sense to add an assert right here checking that this is a
>> cmpnop operation?
> 
> The earlier bswap_replace code to handle n->base_addr && !bswap doesn't
> have anything like that either.
> 
> And the assert would be certainly non-trivial, like:
> #if ENABLE_ASSERT_CHECKING
>    uint64_t cmpnop = CMPNOP;
>    /* Note, n->range has been multiplied by BITS_PER_UNIT in between, so it
>       needs to be divided by it again.  */
>    if (n->range / BITS_PER_UNIT < (int) sizeof (int64_t))
>      {
>        mask = ((uint64_t) 1 << (n->range / BITS_PER_UNIT * BITS_PER_MARKER)) - 1;
>        cmpnop &= mask;
>      }
>    gcc_assert (n->n == cmpnop);
> #endif
> 
> I think trusting find_bswap_or_nop did the job right is enough.

Ooops I meant check that if !base_addr then !bswap but that's already guaranteed 
by the structure of the if. All good then.

Best regards,

Thomas
Richard Biener July 17, 2017, 7:36 a.m. UTC | #4
On Thu, 13 Jul 2017, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR, the following testcase started using recently
> BIT_FIELD_REFs instead of MEM_REFs and thus the bswap pass, while it
> properly determines the very long sequence of stmts is a nop transformation,
> throws that away and doesn't optimize it, and no other optimizations
> are able to optimize it away.
> 
> The patch attempts to not do anything if there is a simple identity
> copy, but if the nop reshuffle needs more than one operation, it will
> try to replace the final SSA_NAME BIT_IOR_EXPR assignment with assignment
> from the src value (typically SSA_NAME).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2017-07-13  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/81396
> 	* tree-ssa-math-opts.c (struct symbolic_number): Add n_ops field.
> 	(init_symbolic_number): Initialize it to 1.
> 	(perform_symbolic_merge): Add n_ops from both operands into the new
> 	n_ops.
> 	(find_bswap_or_nop): Don't consider n->n == cmpnop computations
> 	without base_addr as useless if they need more than one operation.
> 	(bswap_replace): Handle !bswap case for NULL base_addr.
> 
> 	* gcc.dg/tree-ssa/pr81396.c: New test.
> 
> --- gcc/tree-ssa-math-opts.c.jj	2017-07-06 20:31:43.000000000 +0200
> +++ gcc/tree-ssa-math-opts.c	2017-07-13 19:27:02.985354778 +0200
> @@ -1968,6 +1968,7 @@ struct symbolic_number {
>    tree alias_set;
>    tree vuse;
>    unsigned HOST_WIDE_INT range;
> +  int n_ops;
>  };

Ok if you document n_ops a bit in the comment above the structure.

Thanks,
Richard.

>  #define BITS_PER_MARKER 8
> @@ -2083,6 +2084,7 @@ init_symbolic_number (struct symbolic_nu
>      return false;
>    n->range = size;
>    n->n = CMPNOP;
> +  n->n_ops = 1;
>  
>    if (size < 64 / BITS_PER_MARKER)
>      n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
> @@ -2293,6 +2295,7 @@ perform_symbolic_merge (gimple *source_s
>  	return NULL;
>      }
>    n->n = n1->n | n2->n;
> +  n->n_ops = n1->n_ops + n2->n_ops;
>  
>    return source_stmt;
>  }
> @@ -2588,7 +2591,7 @@ find_bswap_or_nop (gimple *stmt, struct
>      return NULL;
>  
>    /* Useless bit manipulation performed by code.  */
> -  if (!n->base_addr && n->n == cmpnop)
> +  if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
>      return NULL;
>  
>    n->range *= BITS_PER_UNIT;
> @@ -2747,6 +2750,36 @@ bswap_replace (gimple *cur_stmt, gimple
>  	}
>        src = val_tmp;
>      }
> +  else if (!bswap)
> +    {
> +      gimple *g;
> +      if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
> +	{
> +	  if (!is_gimple_val (src))
> +	    return false;
> +	  g = gimple_build_assign (tgt, NOP_EXPR, src);
> +	}
> +      else
> +	g = gimple_build_assign (tgt, src);
> +      if (n->range == 16)
> +	nop_stats.found_16bit++;
> +      else if (n->range == 32)
> +	nop_stats.found_32bit++;
> +      else
> +	{
> +	  gcc_assert (n->range == 64);
> +	  nop_stats.found_64bit++;
> +	}
> +      if (dump_file)
> +	{
> +	  fprintf (dump_file,
> +		   "%d bit reshuffle in target endianness found at: ",
> +		   (int) n->range);
> +	  print_gimple_stmt (dump_file, cur_stmt, 0);
> +	}
> +      gsi_replace (&gsi, g, true);
> +      return true;
> +    }
>    else if (TREE_CODE (src) == BIT_FIELD_REF)
>      src = TREE_OPERAND (src, 0);
>  
> --- gcc/testsuite/gcc.dg/tree-ssa/pr81396.c.jj	2017-07-13 19:22:10.191954620 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr81396.c	2017-07-13 19:24:16.638399984 +0200
> @@ -0,0 +1,25 @@
> +/* PR tree-optimization/81396 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +typedef unsigned long long uint64_t;
> +
> +uint64_t
> +foo (uint64_t word)
> +{
> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_LONG__ == 8
> +  const unsigned char *const ptr = (const unsigned char *) &word;
> +  return ((uint64_t) ptr[0]
> +	  | ((uint64_t) ptr[1] << 8)
> +	  | ((uint64_t) ptr[2] << (8 * 2))
> +	  | ((uint64_t) ptr[3] << (8 * 3))
> +	  | ((uint64_t) ptr[4] << (8 * 4))
> +	  | ((uint64_t) ptr[5] << (8 * 5))
> +	  | ((uint64_t) ptr[6] << (8 * 6))
> +	  | ((uint64_t) ptr[7] << (8 * 7)));
> +#else
> +  return word;
> +#endif
> +}
> +
> +/* { dg-final { scan-tree-dump "return word_\[0-9]*\\(D\\);" "optimized" } } */
> 
> 	Jakub
> 
>
diff mbox

Patch

--- gcc/tree-ssa-math-opts.c.jj	2017-07-06 20:31:43.000000000 +0200
+++ gcc/tree-ssa-math-opts.c	2017-07-13 19:27:02.985354778 +0200
@@ -1968,6 +1968,7 @@  struct symbolic_number {
   tree alias_set;
   tree vuse;
   unsigned HOST_WIDE_INT range;
+  int n_ops;
 };
 
 #define BITS_PER_MARKER 8
@@ -2083,6 +2084,7 @@  init_symbolic_number (struct symbolic_nu
     return false;
   n->range = size;
   n->n = CMPNOP;
+  n->n_ops = 1;
 
   if (size < 64 / BITS_PER_MARKER)
     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
@@ -2293,6 +2295,7 @@  perform_symbolic_merge (gimple *source_s
 	return NULL;
     }
   n->n = n1->n | n2->n;
+  n->n_ops = n1->n_ops + n2->n_ops;
 
   return source_stmt;
 }
@@ -2588,7 +2591,7 @@  find_bswap_or_nop (gimple *stmt, struct
     return NULL;
 
   /* Useless bit manipulation performed by code.  */
-  if (!n->base_addr && n->n == cmpnop)
+  if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
     return NULL;
 
   n->range *= BITS_PER_UNIT;
@@ -2747,6 +2750,36 @@  bswap_replace (gimple *cur_stmt, gimple
 	}
       src = val_tmp;
     }
+  else if (!bswap)
+    {
+      gimple *g;
+      if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
+	{
+	  if (!is_gimple_val (src))
+	    return false;
+	  g = gimple_build_assign (tgt, NOP_EXPR, src);
+	}
+      else
+	g = gimple_build_assign (tgt, src);
+      if (n->range == 16)
+	nop_stats.found_16bit++;
+      else if (n->range == 32)
+	nop_stats.found_32bit++;
+      else
+	{
+	  gcc_assert (n->range == 64);
+	  nop_stats.found_64bit++;
+	}
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "%d bit reshuffle in target endianness found at: ",
+		   (int) n->range);
+	  print_gimple_stmt (dump_file, cur_stmt, 0);
+	}
+      gsi_replace (&gsi, g, true);
+      return true;
+    }
   else if (TREE_CODE (src) == BIT_FIELD_REF)
     src = TREE_OPERAND (src, 0);
 
--- gcc/testsuite/gcc.dg/tree-ssa/pr81396.c.jj	2017-07-13 19:22:10.191954620 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr81396.c	2017-07-13 19:24:16.638399984 +0200
@@ -0,0 +1,25 @@ 
+/* PR tree-optimization/81396 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef unsigned long long uint64_t;
+
+uint64_t
+foo (uint64_t word)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_LONG__ == 8
+  const unsigned char *const ptr = (const unsigned char *) &word;
+  return ((uint64_t) ptr[0]
+	  | ((uint64_t) ptr[1] << 8)
+	  | ((uint64_t) ptr[2] << (8 * 2))
+	  | ((uint64_t) ptr[3] << (8 * 3))
+	  | ((uint64_t) ptr[4] << (8 * 4))
+	  | ((uint64_t) ptr[5] << (8 * 5))
+	  | ((uint64_t) ptr[6] << (8 * 6))
+	  | ((uint64_t) ptr[7] << (8 * 7)));
+#else
+  return word;
+#endif
+}
+
+/* { dg-final { scan-tree-dump "return word_\[0-9]*\\(D\\);" "optimized" } } */