diff mbox series

Remove redundant zero extend

Message ID MN2PR04MB55500F363342CEBC75AAD7E3EAFC0@MN2PR04MB5550.namprd04.prod.outlook.com
State New
Headers show
Series Remove redundant zero extend | expand

Commit Message

Li, Pan2 via Gcc-patches March 11, 2020, 1:04 p.m. UTC
This patch is a code density oriented and attempt to remove redundant sign/zero extension from assignment statement.
The approach taken is to use VRP data while expanding the assignment to RTL to determine whether a sign/zero extension is necessary.
Thought the motivation of the patch is code density but it also good for speed.

for example:
extern unsigned int func ();

  unsigned char
  foo (unsigned int arg)
  {
    if (arg == 2)
      return 0;

    return (func() == arg || arg == 7);
  }

the result of the comparison in the return will yield a False or True, which will be converted to integer and then casting to unsigned char.
this casting from integer to unsigned char is redundant because the value is either 0 or 1.

this patch is targeting the RISCV-32bit only.
This patch has been tested on a real embedded project and saved about 0.2% of code size.

P.S. I have an FSF license under Western Digital incorporation.
P.S. I've attached the patch as a file in case my email client corrupted the patch itself

Regards,
Nidal Faour

Comments

Li, Pan2 via Gcc-patches March 11, 2020, 1:11 p.m. UTC | #1
On Wed, 2020-03-11 at 13:04 +0000, Nidal Faour via Gcc-patches wrote:

[...]

> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index f91af78a302..c5a701e08af 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-02-19  Nidal Faour  <nidal.faour@wdc.com>
> +       * gcc.target/riscv/masking-to-byte-1.c: New test
> +       * gcc.target/riscv/masking-to-byte-2.c: New test
> +
> 2020-03-09  Marek Polacek  <polacek@redhat.com>
>                 PR c++/92031 - bogus taking address of rvalue error.

If I'm reading it right, the patch is missing these new testcases.

Dave
Li, Pan2 via Gcc-patches March 11, 2020, 1:34 p.m. UTC | #2
Attaching the correct patch. 

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 9864e4344d2..c3739b2f331 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -17,6 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#include <algorithm>
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -73,6 +74,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-address.h"
 #include "output.h"
 #include "builtins.h"
+#include "tree-vrp.h"
+
 
 /* Some systems use __main in a way incompatible with its use in gcc, in these
    cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
@@ -3640,6 +3643,107 @@ expand_clobber (tree lhs)
     }
 }
 
+/* Print the ranges of the operand and save it to the dump file
+   used for debug purposes only.  */
+
+void
+print_range (tree operand, wide_int min, wide_int max)
+{
+  pretty_printer buffer;
+  pp_needs_newline (&buffer) = false;
+  buffer.buffer->stream = dump_file;
+  fprintf (dump_file, "Range for lhs: [");
+  pp_wide_int (&buffer, min, TYPE_SIGN (TREE_TYPE (operand)));
+  pp_printf (&buffer, ", ");
+  pp_wide_int (&buffer, max, TYPE_SIGN (TREE_TYPE (operand)));
+  pp_printf (&buffer, "]");
+  pp_flush (&buffer);
+  fprintf (dump_file, "\n");
+}
+
+
+
+/* Check that REG and SUBREG modes match between src and dest,
+   and that there is a cast to be removed.  */
+
+bool
+cast_to_remove_p (rtx src, rtx dest, machine_mode from
+				, machine_mode to)
+{
+  if (GET_CODE (src) != SUBREG
+      || GET_CODE (dest) != SUBREG
+      || GET_CODE (SUBREG_REG (src)) != REG
+      || GET_CODE (SUBREG_REG (dest)) != REG
+      || GET_MODE (src) != GET_MODE (dest)
+      || GET_MODE (SUBREG_REG (src)) != GET_MODE (SUBREG_REG (dest))
+      || GET_MODE (src) != to
+      || GET_MODE (SUBREG_REG (src)) != from)
+    return false;
+  return true;
+}
+
+bool
+remove_cast_p (rtx temp, rtx target, gassign *assign_stmt)
+{
+  enum gimple_code code;
+  value_range_kind lhs_range, rhs_range;
+  unsigned int ops_num;
+  wide_int lhs_min, lhs_max, rhs_min, rhs_max;
+  if (gimple_assign_cast_p (assign_stmt))
+  {
+    code = gimple_code (assign_stmt);
+    ops_num = gimple_num_ops (assign_stmt);
+    if (code == GIMPLE_ASSIGN && ops_num < 3)
+    {
+      tree lhs = gimple_assign_lhs (assign_stmt);
+      if (POINTER_TYPE_P (TREE_TYPE (lhs)))
+       return false;
+      lhs_range = get_range_info (lhs, &lhs_min, &lhs_max);
+      if (dump_file && lhs_range == VR_RANGE)
+       print_range (lhs, lhs_min, lhs_max);
+
+      tree rhs = gimple_assign_rhs1 (assign_stmt);
+      if (POINTER_TYPE_P (TREE_TYPE (rhs)))
+       return false;
+      rhs_range = get_range_info (rhs, &rhs_min, &rhs_max);
+      if (rhs_range == VR_RANGE)
+      {
+       unsigned int rhs_precision
+	 = std::max (wi::min_precision (rhs_max, TYPE_SIGN (TREE_TYPE (rhs))),
+		wi::min_precision (rhs_min, TYPE_SIGN (TREE_TYPE (rhs))));
+       unsigned int lhs_precision = TYPE_PRECISION (TREE_TYPE (lhs));
+       if (lhs_precision > rhs_precision)
+       {
+
+	 if (dump_file)
+	 {
+	   print_range (rhs, rhs_min, rhs_max);
+	   if (lhs_precision > rhs_precision)
+		fprintf (dump_file,
+		    "EXPAND-CAST: This casting can be optimized\n");
+	 }
+
+	 return cast_to_remove_p (temp,
+			 target,
+			 TYPE_MODE (TREE_TYPE (rhs)),
+			 TYPE_MODE (TREE_TYPE (lhs)));
+       }
+       else
+	 if (dump_file)
+	   fprintf (dump_file, "EXPAND-CAST: lhs_precision < rhs_precision\n");
+      }
+      else
+	if (dump_file)
+	  fprintf (dump_file, "EXPAND-CAST: Range is not VR_RANGE\n");
+    }
+    else
+      if (dump_file)
+       fprintf (dump_file, "EXPAND-CAST: Got more than 2 ops \n");
+  }
+  return false;
+}
+
+
 /* A subroutine of expand_gimple_stmt, expanding one gimple statement
    STMT that doesn't require special handling for outgoing edges.  That
    is no tailcalls and no GIMPLE_COND.  */
@@ -3801,6 +3905,11 @@ expand_gimple_stmt_1 (gimple *stmt)
 		    temp = convert_modes (GET_MODE (SUBREG_REG (target)),
 					  GET_MODE (target), temp, unsignedp);
 		  }
+		if (targetm.remove_redundant_cast)
+		  if (remove_cast_p (temp,
+					target,
+					assign_stmt))
+		    temp = SUBREG_REG (temp);
 
 		convert_move (SUBREG_REG (target), temp, unsignedp);
 	      }
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 94b5ac01762..504b1abf85e 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5226,6 +5226,9 @@ riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
 
+#undef TARGET_REMOVE_REDUNDANT_CAST
+#define TARGET_REMOVE_REDUNDANT_CAST true
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 19985adac3e..3a0d4813d96 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12194,3 +12194,9 @@ This target hook can be used to generate a target-specific code
 @deftypefn {Target Hook} void TARGET_RUN_TARGET_SELFTESTS (void)
 If selftests are enabled, run any selftests for this target.
 @end deftypefn
+
+@deftypevr {Target Hook} bool TARGET_REMOVE_REDUNDANT_CAST
+If this flag is true, @code{remove_cast_p} will be called
+and remove unneeded cast
+The default is false.
+@end deftypevr
\ No newline at end of file
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 1a16150bfc5..984e18e1697 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8178,3 +8178,5 @@ maintainer is familiar with.
 @hook TARGET_SPECULATION_SAFE_VALUE
 
 @hook TARGET_RUN_TARGET_SELFTESTS
+
+@hook TARGET_REMOVE_REDUNDANT_CAST
\ No newline at end of file
diff --git a/gcc/target.def b/gcc/target.def
index b5e82ff826e..014e5a22d4d 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -6770,6 +6770,15 @@ following it should not be run.  Usually true only for virtual assembler\n\
 targets.",
 bool, false)
 
+/* True if remove_redundant_cast should be called to remove unneeded
+   casting.  */
+DEFHOOKPOD
+(remove_redundant_cast,
+ "If this flag is true, @code{remove_cast_p} will be called\n\
+and remove unneeded cast\n\
+The default is false.",
+ bool, false)
+
 /* Leave the boolean fields at the end.  */
 
 /* Functions related to mode switching.  */
@@ -6835,4 +6844,3 @@ DEFHOOK
 
 /* Close the 'struct gcc_target' definition.  */
 HOOK_VECTOR_END (C90_EMPTY_HACK)
-
diff --git a/gcc/testsuite/gcc.target/riscv/masking-to-byte-1.c b/gcc/testsuite/gcc.target/riscv/masking-to-byte-1.c
new file mode 100644
index 00000000000..4001deaef59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/masking-to-byte-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { riscv32*-*-* } } } */
+/* { dg-options "-Os" } */
+
+extern unsigned int func ();
+
+unsigned char
+foo (unsigned int arg)
+{
+  if (arg == 2)
+    return 0;
+
+  return (func() == arg || arg == 7);
+}
+
+/* { dg-final { scan-assembler-not "andi\\s+\\w\\d,\\w\\d,0xff" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/masking-to-byte-2.c b/gcc/testsuite/gcc.target/riscv/masking-to-byte-2.c
new file mode 100644
index 00000000000..96e2f3a6f71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/masking-to-byte-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+
+/* This testcase is to show that the patch does not remove all the sign/zero extension.  */
+extern void bar(unsigned int x, unsigned int y, unsigned char index);
+
+void foo(void)
+{
+  unsigned int idx;
+
+  for(idx = 0; idx < 100; idx++)
+  {
+    bar(3, 0, (unsigned char)idx);
+  }
+}
+
+/* { dg-final { scan-assembler "andi\\s+\\w\\d,\\w\\d,0xff" } } */
\ No newline at end of file

-------------------------------------------
Change log:

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 085ef66a207..ffe7e061985 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2020-02-19  Nidal Faour <nidal.faour@wdc.com>
+
+	* doc/tm.texi.in (TARGET_REMOVE_REDUNDANT_CAST): New hook.
+	* doc/tm.texi: Regenerate.
+	*cfgexpand.c (algorith.h): New include.
+	(tree-vrp.h): Likewise
+	(print_range): New function
+	(check_assign_can_be_free): Likewise
+	(adjust_assign_to_remove_cast): Likewise
+	(expand_gimple_stmt_1): Call adjust_assign_to_remove_cast
+	*config/riscv/riscv.c (TARGET_REMOVE_REDUNDANT_CAST): Define
+	*target.def (remove_redundant_cast): New target hook
+
 2020-03-09  Jason Merrill  <jason@redhat.com>
 
 	* gdbinit.in (pgs): Fix typo in documentation.

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f91af78a302..c5a701e08af 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2020-02-19  Nidal Faour  <nidal.faour@wdc.com>
+       * gcc.target/riscv/masking-to-byte-1.c: New test
+       * gcc.target/riscv/masking-to-byte-2.c: New test
+
 2020-03-09  Marek Polacek  <polacek@redhat.com>
 
 	PR c++/92031 - bogus taking address of rvalue error.


Regards,
Nidal Faour

>-----Original Message-----
>From: David Malcolm <dmalcolm@redhat.com>
>Sent: Wednesday, 11 March 2020 15:12
>To: Nidal Faour <Nidal.Faour@wdc.com>; gcc-patches@gcc.gnu.org
>Cc: Ofer Shinaar <Ofer.Shinaar@wdc.com>; Craig Blackmore
><craig.blackmore@embecosm.com>
>Subject: Re: Remove redundant zero extend
>
>CAUTION: This email originated from outside of Western Digital. Do not click
>on links or open attachments unless you recognize the sender and know that
>the content is safe.
>
>
>On Wed, 2020-03-11 at 13:04 +0000, Nidal Faour via Gcc-patches wrote:
>
>[...]
>
>> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index
>> f91af78a302..c5a701e08af 100644
>> --- a/gcc/testsuite/ChangeLog
>> +++ b/gcc/testsuite/ChangeLog
>> @@ -1,3 +1,7 @@
>> +2020-02-19  Nidal Faour  <nidal.faour@wdc.com>
>> +       * gcc.target/riscv/masking-to-byte-1.c: New test
>> +       * gcc.target/riscv/masking-to-byte-2.c: New test
>> +
>> 2020-03-09  Marek Polacek  <polacek@redhat.com>
>>                 PR c++/92031 - bogus taking address of rvalue error.
>
>If I'm reading it right, the patch is missing these new testcases.
>
>Dave
Li, Pan2 via Gcc-patches March 12, 2020, 3:05 a.m. UTC | #3
On Wed, 2020-03-11 at 13:04 +0000, Nidal Faour via Gcc-patches wrote:
> This patch is a code density oriented and attempt to remove redundant sign/zero
> extension from assignment statement.
> The approach taken is to use VRP data while expanding the assignment to RTL to
> determine whether a sign/zero extension is necessary.
> Thought the motivation of the patch is code density but it also good for speed.
> 
> for example:
> extern unsigned int func ();
> 
>   unsigned char
>   foo (unsigned int arg)
>   {
>     if (arg == 2)
>       return 0;
> 
>     return (func() == arg || arg == 7);
>   }
> 
> the result of the comparison in the return will yield a False or True, which
> will be converted to integer and then casting to unsigned char.
> this casting from integer to unsigned char is redundant because the value is
> either 0 or 1.
> 
> this patch is targeting the RISCV-32bit only.
> This patch has been tested on a real embedded project and saved about 0.2% of
> code size.
> 
> P.S. I have an FSF license under Western Digital incorporation.
> P.S. I've attached the patch as a file in case my email client corrupted the
> patch itself
Just an FYI.  We're at stage4 in our development cycle, as a result most
developers are focused on regression bugfixing until the gcc-10 release is made. 
I've put your patch into the queue of things to evaluate for gcc-11.

Thanks,
jeff
>
Li, Pan2 via Gcc-patches March 12, 2020, 9:38 a.m. UTC | #4
On Thu, Mar 12, 2020 at 4:06 AM Jeff Law via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Wed, 2020-03-11 at 13:04 +0000, Nidal Faour via Gcc-patches wrote:
> > This patch is a code density oriented and attempt to remove redundant sign/zero
> > extension from assignment statement.
> > The approach taken is to use VRP data while expanding the assignment to RTL to
> > determine whether a sign/zero extension is necessary.
> > Thought the motivation of the patch is code density but it also good for speed.
> >
> > for example:
> > extern unsigned int func ();
> >
> >   unsigned char
> >   foo (unsigned int arg)
> >   {
> >     if (arg == 2)
> >       return 0;
> >
> >     return (func() == arg || arg == 7);
> >   }
> >
> > the result of the comparison in the return will yield a False or True, which
> > will be converted to integer and then casting to unsigned char.
> > this casting from integer to unsigned char is redundant because the value is
> > either 0 or 1.
> >
> > this patch is targeting the RISCV-32bit only.
> > This patch has been tested on a real embedded project and saved about 0.2% of
> > code size.
> >
> > P.S. I have an FSF license under Western Digital incorporation.
> > P.S. I've attached the patch as a file in case my email client corrupted the
> > patch itself
> Just an FYI.  We're at stage4 in our development cycle, as a result most
> developers are focused on regression bugfixing until the gcc-10 release is made.
> I've put your patch into the queue of things to evaluate for gcc-11.

Note there was also more general development in the past with regarding to
zext/sext removal and using VRP data for that.  Google for 'type promotion
pass' in the ml archives.  Quickly scanning your patch makes it look quite
ad-hoc to me.

Richard.

> Thanks,
> jeff
> >
>
Jim Wilson March 12, 2020, 9:43 p.m. UTC | #5
On Thu, Mar 12, 2020 at 2:38 AM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Mar 12, 2020 at 4:06 AM Jeff Law via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Wed, 2020-03-11 at 13:04 +0000, Nidal Faour via Gcc-patches wrote:
> > > This patch is a code density oriented and attempt to remove redundant sign/zero
> > > extension from assignment statement.
> > > The approach taken is to use VRP data while expanding the assignment to RTL to
> > > determine whether a sign/zero extension is necessary.
> > > Thought the motivation of the patch is code density but it also good for speed.
> > >
> > > for example:
> > > extern unsigned int func ();
> > >
> > >   unsigned char
> > >   foo (unsigned int arg)
> > >   {
> > >     if (arg == 2)
> > >       return 0;
> > >
> > >     return (func() == arg || arg == 7);
> > >   }
> > >

When we reach combine, we have
  if func result == arg
     r73 = 1
  else
     r73 = arg-7 == 0
  r75 = zero_extend subreg:QI r73

On this platform a scc operation always produces 0 or 1, so the
zero_extend is redundant.  We would need something pushing the zero
extend back into the if_then_else and then noticing that it is
redundant on both branches, or something propagating the value ranges
forward.

We almost have the latter bit in combine.  Right before we start
combining instructions, when we set nonzero_sign_valid to 1, we have

(gdb) print reg_stat[73]
$1 = (reg_stat_type &) @0x28ad908: {last_death = 0x7ffff5de0500,
  last_set = 0x7ffff5de02c0, last_set_value = 0x7ffff5ddc490,
  last_set_table_tick = 6, last_set_label = 5, last_set_nonzero_bits = 1,
  last_set_sign_bit_copies = 31 '\037', last_set_mode = E_SImode,
  last_set_invalid = 0 '\000', sign_bit_copies = 31 '\037',
  nonzero_bits = 1, truncation_label = 0, truncated_to_mode = E_VOIDmode}
(gdb)

so we know that r73 has 31 valid sign bits and only one bit is
nonzero.  This info is still valid when we reach the zero extend insn.
Unfortunately, we don't use this info to do anything useful.  The
zero_extend is the only insn in its basic block (not counting the
unconditional branch that ends the block), so it doesn't get combined
with anything, and hence doesn't get simplified.  If it did get
combined with something, then expand_compound_operation would
eliminate the zero_extend and subreg.  That makes me wonder if there
is any value in trying to handle single-instruction combinations, just
to get the simplifications we can get from the nonzero_bits info, but
it isn't obvious how we would detect when a single insn combine is
better than the original insn.  Maybe rtx cost info can be used for
that.

I looked at combine because I'm familiar with that pass, but the ree
pass might be the right place to handle this.  I don't know if it has
any support for handling if statements.  If not, maybe it could be
extended to handle cases like this.

Jim
Segher Boessenkool March 13, 2020, 12:54 a.m. UTC | #6
Hi!

On Thu, Mar 12, 2020 at 02:43:06PM -0700, Jim Wilson wrote:
> I looked at combine because I'm familiar with that pass, but the ree
> pass might be the right place to handle this.

IMO, part of this should perhaps be done on Gimple already.  But the part
that should be done on RTL should be done *early*, it should start before
the loop optimisations, and it should be *finished* before combine.

>  I don't know if it has
> any support for handling if statements.  If not, maybe it could be
> extended to handle cases like this.

No, combine cannot do anything that crosses basic blocks.


Segher
Li, Pan2 via Gcc-patches March 13, 2020, 9:29 p.m. UTC | #7
On Thu, 2020-03-12 at 14:43 -0700, Jim Wilson wrote:
> On Thu, Mar 12, 2020 at 2:38 AM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> > On Thu, Mar 12, 2020 at 4:06 AM Jeff Law via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > > On Wed, 2020-03-11 at 13:04 +0000, Nidal Faour via Gcc-patches wrote:
> > > > This patch is a code density oriented and attempt to remove redundant
> > > > sign/zero
> > > > extension from assignment statement.
> > > > The approach taken is to use VRP data while expanding the assignment to
> > > > RTL to
> > > > determine whether a sign/zero extension is necessary.
> > > > Thought the motivation of the patch is code density but it also good for
> > > > speed.
> > > > 
> > > > for example:
> > > > extern unsigned int func ();
> > > > 
> > > >   unsigned char
> > > >   foo (unsigned int arg)
> > > >   {
> > > >     if (arg == 2)
> > > >       return 0;
> > > > 
> > > >     return (func() == arg || arg == 7);
> > > >   }
> > > > 
> 
> When we reach combine, we have
>   if func result == arg
>      r73 = 1
>   else
>      r73 = arg-7 == 0
>   r75 = zero_extend subreg:QI r73
> 
> On this platform a scc operation always produces 0 or 1, so the
> zero_extend is redundant.  We would need something pushing the zero
> extend back into the if_then_else and then noticing that it is
> redundant on both branches, or something propagating the value ranges
> forward.
> 
> We almost have the latter bit in combine.  Right before we start
> combining instructions, when we set nonzero_sign_valid to 1, we have
> 
> (gdb) print reg_stat[73]
> $1 = (reg_stat_type &) @0x28ad908: {last_death = 0x7ffff5de0500,
>   last_set = 0x7ffff5de02c0, last_set_value = 0x7ffff5ddc490,
>   last_set_table_tick = 6, last_set_label = 5, last_set_nonzero_bits = 1,
>   last_set_sign_bit_copies = 31 '\037', last_set_mode = E_SImode,
>   last_set_invalid = 0 '\000', sign_bit_copies = 31 '\037',
>   nonzero_bits = 1, truncation_label = 0, truncated_to_mode = E_VOIDmode}
> (gdb)
> 
> so we know that r73 has 31 valid sign bits and only one bit is
> nonzero.  This info is still valid when we reach the zero extend insn.
> Unfortunately, we don't use this info to do anything useful.  The
> zero_extend is the only insn in its basic block (not counting the
> unconditional branch that ends the block), so it doesn't get combined
> with anything, and hence doesn't get simplified.  If it did get
> combined with something, then expand_compound_operation would
> eliminate the zero_extend and subreg.  That makes me wonder if there
> is any value in trying to handle single-instruction combinations, just
> to get the simplifications we can get from the nonzero_bits info, but
> it isn't obvious how we would detect when a single insn combine is
> better than the original insn.  Maybe rtx cost info can be used for
> that.
> 
> I looked at combine because I'm familiar with that pass, but the ree
> pass might be the right place to handle this.  I don't know if it has
> any support for handling if statements.  If not, maybe it could be
> extended to handle cases like this.
I've speculated that handling (any_extend (subreg)) would be helpful in ree.c. It
doesn't seem like it'd be terribly hard.  The structure of ree.c is painful to
follow sometimes, but it's doable.

jeff
Jeff Law Nov. 16, 2020, 10:47 p.m. UTC | #8
On 3/11/20 7:34 AM, Nidal Faour via Gcc-patches wrote:
> This patch is a code density oriented and attempt to remove redundant sign/zero extension from assignment statement.
> The approach taken is to use VRP data while expanding the assignment to RTL to determine whether a sign/zero extension is necessary.
> Thought the motivation of the patch is code density but it also good for speed.
>
> for example:
> extern unsigned int func ();
>
>   unsigned char
>   foo (unsigned int arg)
>   {
>     if (arg == 2)
>       return 0;
>
>     return (func() == arg || arg == 7);
>   }
>
> the result of the comparison in the return will yield a False or True, which will be converted to integer and then casting to unsigned char.
> this casting from integer to unsigned char is redundant because the value is either 0 or 1.
>
> this patch is targeting the RISCV-32bit only.
> This patch has been tested on a real embedded project and saved about 0.2% of code size.
>
> P.S. I have an FSF license under Western Digital incorporation.
> P.S. I've attached the patch as a file in case my email client corrupted the patch itself
So at a high level, this really shouldn't be a target dependent
transformation AFAICT.  So I think all the hook related stuff should be
removed.  Furthermore, REE does a fair amount of this kind of stuff.  I
realize it doesn't handle the testcase you've included, but if you're
looking to improve GCC's ability to identify and remove redundant
extensions, you might want to get familiar with REE.  It's most glaring
deficiencies are that it doesn't know about SUBREGS which are a form of
extension and that it does not handle cases where a copy is needed and
there are multiple definitions that reach the extension we're trying to
eliminate.

Anyway, the goal of this patch appears to be to remove extensions that
result from expanding a NOP_EXPR (and the other conversion tree codes,
though NOP_EXPR will be the most common case we'll see).





> Attaching the correct patch. 
>
> diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
> index 9864e4344d2..c3739b2f331 100644
> --- a/gcc/cfgexpand.c
> +++ b/gcc/cfgexpand.c
> @@ -17,6 +17,7 @@ You should have received a copy of the GNU General Public License
>  along with GCC; see the file COPYING3.  If not see
>  <http://www.gnu.org/licenses/>.  */
>  
> +#include <algorithm>

Why did you need <algorithm>?  That seems odd at best.


> @@ -3640,6 +3643,107 @@ expand_clobber (tree lhs)
>      }
>  }
>  
> +/* Print the ranges of the operand and save it to the dump file
> +   used for debug purposes only.  */
> +
> +void
> +print_range (tree operand, wide_int min, wide_int max)
> +{
> +  pretty_printer buffer;
> +  pp_needs_newline (&buffer) = false;
> +  buffer.buffer->stream = dump_file;
> +  fprintf (dump_file, "Range for lhs: [");
> +  pp_wide_int (&buffer, min, TYPE_SIGN (TREE_TYPE (operand)));
> +  pp_printf (&buffer, ", ");
> +  pp_wide_int (&buffer, max, TYPE_SIGN (TREE_TYPE (operand)));
> +  pp_printf (&buffer, "]");
> +  pp_flush (&buffer);
> +  fprintf (dump_file, "\n");
> +}

I'm surprised we don't already have a range dumper you could use.  Also
note that you're querying global ranges.  You might consider using the
new ranger APIs which allow you to get context sensitive range
information which in turn may allow you to remove more useless extensions.


> +
> +
> +
> +/* Check that REG and SUBREG modes match between src and dest,
> +   and that there is a cast to be removed.  */
> +
> +bool
> +cast_to_remove_p (rtx src, rtx dest, machine_mode from
> +				, machine_mode to)
> +{
> +  if (GET_CODE (src) != SUBREG
> +      || GET_CODE (dest) != SUBREG
> +      || GET_CODE (SUBREG_REG (src)) != REG
> +      || GET_CODE (SUBREG_REG (dest)) != REG
> +      || GET_MODE (src) != GET_MODE (dest)
> +      || GET_MODE (SUBREG_REG (src)) != GET_MODE (SUBREG_REG (dest))
> +      || GET_MODE (src) != to
> +      || GET_MODE (SUBREG_REG (src)) != from)
> +    return false;
> +  return true;

This seems highly restrictive.  I don't see why the DEST would need to
be a SUBREG expression.  ISTM you'd want to remove something like this,
though your patch won't:


(set (reg:SI) (zero_extend:SI (subreg:QI (reg:SI))))


You may also need to check SUBREG_BYTE here.  I'm pretty sure you need
to be more careful for big endian targets as well.



> +}
> +
> +bool
> +remove_cast_p (rtx temp, rtx target, gassign *assign_stmt)
> +{
> +  enum gimple_code code;
> +  value_range_kind lhs_range, rhs_range;
> +  unsigned int ops_num;
> +  wide_int lhs_min, lhs_max, rhs_min, rhs_max;
> +  if (gimple_assign_cast_p (assign_stmt))
> +  {
> +    code = gimple_code (assign_stmt);
> +    ops_num = gimple_num_ops (assign_stmt);
> +    if (code == GIMPLE_ASSIGN && ops_num < 3)

Is there any case where we get more than 2 ops here?  If it's a cast,
then I would think not.



> +    {
> +      tree lhs = gimple_assign_lhs (assign_stmt);
> +      if (POINTER_TYPE_P (TREE_TYPE (lhs)))
> +       return false;
> +      lhs_range = get_range_info (lhs, &lhs_min, &lhs_max);
> +      if (dump_file && lhs_range == VR_RANGE)
> +       print_range (lhs, lhs_min, lhs_max);

I'm not sure why you need the range of the LHS.  What you really care
about is the size of the destination object, not the ranges it may have.



> +
> +      tree rhs = gimple_assign_rhs1 (assign_stmt);
> +      if (POINTER_TYPE_P (TREE_TYPE (rhs)))
> +       return false;
> +      rhs_range = get_range_info (rhs, &rhs_min, &rhs_max);
> +      if (rhs_range == VR_RANGE)
> +      {
> +       unsigned int rhs_precision
> +	 = std::max (wi::min_precision (rhs_max, TYPE_SIGN (TREE_TYPE (rhs))),
> +		wi::min_precision (rhs_min, TYPE_SIGN (TREE_TYPE (rhs))));
> +       unsigned int lhs_precision = TYPE_PRECISION (TREE_TYPE (lhs));
> +       if (lhs_precision > rhs_precision)
> +       {
> +
> +	 if (dump_file)
> +	 {
> +	   print_range (rhs, rhs_min, rhs_max);
> +	   if (lhs_precision > rhs_precision)
> +		fprintf (dump_file,
> +		    "EXPAND-CAST: This casting can be optimized\n");
> +	 }
> +
> +	 return cast_to_remove_p (temp,
> +			 target,
> +			 TYPE_MODE (TREE_TYPE (rhs)),
> +			 TYPE_MODE (TREE_TYPE (lhs)));
> +       }

It appears to me that you always remove the extension when the LHS
precision is more than the RHS precision.   That should work for zero
extension, but I'm pretty sure its wrong for sign extension, but I don't
see where you filter out any sign extending casts.


For the sign extension case, you want to check the range of the source
object to see if its sign bit of the narrowing subreg's mode is on in
the source range. 


You might also look at Kugan Vivekanandara's patches from 2015.  I think
they stalled out as they weren't ready for that release and I don't
recall Kugan re-engaging on them once we got past that year's
release.    But there may be useful nuggets in those patches.

Jeff
diff mbox series

Patch

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 9864e4344d2..c3739b2f331 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -17,6 +17,7 @@  You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
+#include <algorithm>
#include "config.h"
#include "system.h"
#include "coretypes.h"
@@ -73,6 +74,8 @@  along with GCC; see the file COPYING3.  If not see
#include "tree-ssa-address.h"
#include "output.h"
#include "builtins.h"
+#include "tree-vrp.h"
+
 /* Some systems use __main in a way incompatible with its use in gcc, in these
    cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
@@ -3640,6 +3643,107 @@  expand_clobber (tree lhs)
     }
}
+/* Print the ranges of the operand and save it to the dump file
+   used for debug purposes only.  */
+
+void
+print_range (tree operand, wide_int min, wide_int max)
+{
+  pretty_printer buffer;
+  pp_needs_newline (&buffer) = false;
+  buffer.buffer->stream = dump_file;
+  fprintf (dump_file, "Range for lhs: [");
+  pp_wide_int (&buffer, min, TYPE_SIGN (TREE_TYPE (operand)));
+  pp_printf (&buffer, ", ");
+  pp_wide_int (&buffer, max, TYPE_SIGN (TREE_TYPE (operand)));
+  pp_printf (&buffer, "]");
+  pp_flush (&buffer);
+  fprintf (dump_file, "\n");
+}
+
+
+
+/* Check that REG and SUBREG modes match between src and dest,
+   and that there is a cast to be removed.  */
+
+bool
+cast_to_remove_p (rtx src, rtx dest, machine_mode from
+                                                             , machine_mode to)
+{
+  if (GET_CODE (src) != SUBREG
+      || GET_CODE (dest) != SUBREG
+      || GET_CODE (SUBREG_REG (src)) != REG
+      || GET_CODE (SUBREG_REG (dest)) != REG
+      || GET_MODE (src) != GET_MODE (dest)
+      || GET_MODE (SUBREG_REG (src)) != GET_MODE (SUBREG_REG (dest))
+      || GET_MODE (src) != to
+      || GET_MODE (SUBREG_REG (src)) != from)
+    return false;
+  return true;
+}
+
+bool
+remove_cast_p (rtx temp, rtx target, gassign *assign_stmt)
+{
+  enum gimple_code code;
+  value_range_kind lhs_range, rhs_range;
+  unsigned int ops_num;
+  wide_int lhs_min, lhs_max, rhs_min, rhs_max;
+  if (gimple_assign_cast_p (assign_stmt))
+  {
+    code = gimple_code (assign_stmt);
+    ops_num = gimple_num_ops (assign_stmt);
+    if (code == GIMPLE_ASSIGN && ops_num < 3)
+    {
+      tree lhs = gimple_assign_lhs (assign_stmt);
+      if (POINTER_TYPE_P (TREE_TYPE (lhs)))
+       return false;
+      lhs_range = get_range_info (lhs, &lhs_min, &lhs_max);
+      if (dump_file && lhs_range == VR_RANGE)
+       print_range (lhs, lhs_min, lhs_max);
+
+      tree rhs = gimple_assign_rhs1 (assign_stmt);
+      if (POINTER_TYPE_P (TREE_TYPE (rhs)))
+       return false;
+      rhs_range = get_range_info (rhs, &rhs_min, &rhs_max);
+      if (rhs_range == VR_RANGE)
+      {
+       unsigned int rhs_precision
+             = std::max (wi::min_precision (rhs_max, TYPE_SIGN (TREE_TYPE (rhs))),
+                             wi::min_precision (rhs_min, TYPE_SIGN (TREE_TYPE (rhs))));
+       unsigned int lhs_precision = TYPE_PRECISION (TREE_TYPE (lhs));
+       if (lhs_precision > rhs_precision)
+       {
+
+             if (dump_file)
+             {
+                print_range (rhs, rhs_min, rhs_max);
+                if (lhs_precision > rhs_precision)
+                             fprintf (dump_file,
+                                 "EXPAND-CAST: This casting can be optimized\n");
+             }
+
+             return cast_to_remove_p (temp,
+                                             target,
+                                             TYPE_MODE (TREE_TYPE (rhs)),
+                                             TYPE_MODE (TREE_TYPE (lhs)));
+       }
+       else
+             if (dump_file)
+                fprintf (dump_file, "EXPAND-CAST: lhs_precision < rhs_precision\n");
+      }
+      else
+             if (dump_file)
+               fprintf (dump_file, "EXPAND-CAST: Range is not VR_RANGE\n");
+    }
+    else
+      if (dump_file)
+       fprintf (dump_file, "EXPAND-CAST: Got more than 2 ops \n");
+  }
+  return false;
+}
+
+
/* A subroutine of expand_gimple_stmt, expanding one gimple statement
    STMT that doesn't require special handling for outgoing edges.  That
    is no tailcalls and no GIMPLE_COND.  */
@@ -3801,6 +3905,11 @@  expand_gimple_stmt_1 (gimple *stmt)
                                   temp = convert_modes (GET_MODE (SUBREG_REG (target)),
                                                                                 GET_MODE (target), temp, unsignedp);
                                 }
+                             if (targetm.remove_redundant_cast)
+                               if (remove_cast_p (temp,
+                                                                             target,
+                                                                             assign_stmt))
+                                 temp = SUBREG_REG (temp);
                                convert_move (SUBREG_REG (target), temp, unsignedp);
                     }
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 94b5ac01762..504b1abf85e 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5226,6 +5226,9 @@  riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
+#undef TARGET_REMOVE_REDUNDANT_CAST
+#define TARGET_REMOVE_REDUNDANT_CAST true
+
struct gcc_target targetm = TARGET_INITIALIZER;
 #include "gt-riscv.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 19985adac3e..3a0d4813d96 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12194,3 +12194,9 @@  This target hook can be used to generate a target-specific code
@deftypefn {Target Hook} void TARGET_RUN_TARGET_SELFTESTS (void)
If selftests are enabled, run any selftests for this target.
@end deftypefn
+
+@deftypevr {Target Hook} bool TARGET_REMOVE_REDUNDANT_CAST
+If this flag is true, @code{remove_cast_p} will be called
+and remove unneeded cast
+The default is false.
+@end deftypevr
\ No newline at end of file
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 1a16150bfc5..984e18e1697 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8178,3 +8178,5 @@  maintainer is familiar with.
@hook TARGET_SPECULATION_SAFE_VALUE
 @hook TARGET_RUN_TARGET_SELFTESTS
+
+@hook TARGET_REMOVE_REDUNDANT_CAST
\ No newline at end of file
diff --git a/gcc/target.def b/gcc/target.def
index b5e82ff826e..014e5a22d4d 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -6770,6 +6770,15 @@  following it should not be run.  Usually true only for virtual assembler\n\
targets.",
bool, false)
+/* True if remove_redundant_cast should be called to remove unneeded
+   casting.  */
+DEFHOOKPOD
+(remove_redundant_cast,
+ "If this flag is true, @code{remove_cast_p} will be called\n\
+and remove unneeded cast\n\
+The default is false.",
+ bool, false)
+
/* Leave the boolean fields at the end.  */
 /* Functions related to mode switching.  */
@@ -6835,4 +6844,3 @@  DEFHOOK
 /* Close the 'struct gcc_target' definition.  */
HOOK_VECTOR_END (C90_EMPTY_HACK)
-

-----------------------------------------------------------------------
Change log:

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 085ef66a207..ffe7e061985 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@ 
+2020-02-19  Nidal Faour <nidal.faour@wdc.com>
+
+             * doc/tm.texi.in (TARGET_REMOVE_REDUNDANT_CAST): New hook.
+             * doc/tm.texi: Regenerate.
+             *cfgexpand.c (algorith.h): New include.
+             (tree-vrp.h): Likewise
+             (print_range): New function
+             (check_assign_can_be_free): Likewise
+             (adjust_assign_to_remove_cast): Likewise
+             (expand_gimple_stmt_1): Call adjust_assign_to_remove_cast
+             *config/riscv/riscv.c (TARGET_REMOVE_REDUNDANT_CAST): Define
+             *target.def (remove_redundant_cast): New target hook
+
2020-03-09  Jason Merrill  <jason@redhat.com>
                * gdbinit.in (pgs): Fix typo in documentation.

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f91af78a302..c5a701e08af 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@ 
+2020-02-19  Nidal Faour  <nidal.faour@wdc.com>
+       * gcc.target/riscv/masking-to-byte-1.c: New test
+       * gcc.target/riscv/masking-to-byte-2.c: New test
+
2020-03-09  Marek Polacek  <polacek@redhat.com>
                PR c++/92031 - bogus taking address of rvalue error.