diff mbox series

rs6000: suboptimal code for returning bool value on target ppc

Message ID 86cf8475-4353-52ca-869c-75f40bd7d06f@linux.ibm.com
State New
Headers show
Series rs6000: suboptimal code for returning bool value on target ppc | expand

Commit Message

Ajit Agarwal March 16, 2023, 5:20 a.m. UTC
Hello All:


This patch eliminates unnecessary zero extension instruction from power generated assembly.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

	rs6000: suboptimal code for returning bool value on target ppc.

	New pass to eliminate unnecessary zero extension. This pass
	is registered after cse rtl pass.

	2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	* config/rs6000/rs6000-passes.def: Registered zero elimination
	pass.
	* config/rs6000/rs6000-zext-elim.cc: Add new pass.
	* config.gcc: Add new executable.
	* config/rs6000/rs6000-protos.h: Add new prototype for zero
	elimination pass.
	* config/rs6000/rs6000.cc: Add new prototype for zero
	elimination pass.
	* config/rs6000/t-rs6000: Add new rule.
	* expr.cc: Modified gcc assert.
	* explow.cc: Modified gcc assert.
	* optabs.cc: Modified gcc assert.
---
 gcc/config.gcc                        |   4 +-
 gcc/config/rs6000/rs6000-passes.def   |   2 +
 gcc/config/rs6000/rs6000-protos.h     |   1 +
 gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000.cc           |   2 +
 gcc/config/rs6000/t-rs6000            |   5 +
 gcc/explow.cc                         |   3 +-
 gcc/expr.cc                           |   4 +-
 gcc/optabs.cc                         |   3 +-
 9 files changed, 379 insertions(+), 6 deletions(-)
 create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc

Comments

Richard Biener March 16, 2023, 7:40 a.m. UTC | #1
On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hello All:
>
>
> This patch eliminates unnecessary zero extension instruction from power generated assembly.
> Bootstrapped and regtested on powerpc64-linux-gnu.

What makes this so special that we cannot deal with it from generic code?
In particular we do have the REE pass, why is target specific
knowledge neccessary
to eliminate the extension?

> Thanks & Regards
> Ajit
>
>         rs6000: suboptimal code for returning bool value on target ppc.
>
>         New pass to eliminate unnecessary zero extension. This pass
>         is registered after cse rtl pass.
>
>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>
> gcc/ChangeLog:
>
>         * config/rs6000/rs6000-passes.def: Registered zero elimination
>         pass.
>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>         * config.gcc: Add new executable.
>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
>         elimination pass.
>         * config/rs6000/rs6000.cc: Add new prototype for zero
>         elimination pass.
>         * config/rs6000/t-rs6000: Add new rule.
>         * expr.cc: Modified gcc assert.
>         * explow.cc: Modified gcc assert.
>         * optabs.cc: Modified gcc assert.
> ---
>  gcc/config.gcc                        |   4 +-
>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>  gcc/config/rs6000/rs6000-protos.h     |   1 +
>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
>  gcc/config/rs6000/rs6000.cc           |   2 +
>  gcc/config/rs6000/t-rs6000            |   5 +
>  gcc/explow.cc                         |   3 +-
>  gcc/expr.cc                           |   4 +-
>  gcc/optabs.cc                         |   3 +-
>  9 files changed, 379 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index da3a6d3ba1f..e8ac9d882f0 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -503,7 +503,7 @@ or1k*-*-*)
>         ;;
>  powerpc*-*-*)
>         cpu_type=rs6000
> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> @@ -538,7 +538,7 @@ riscv*)
>         ;;
>  rs6000*-*-*)
>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
> index ca899d5f7af..d7500feddf1 100644
> --- a/gcc/config/rs6000/rs6000-passes.def
> +++ b/gcc/config/rs6000/rs6000-passes.def
> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>       The power8 does not have instructions that automaticaly do the byte swaps
>       for loads and stores.  */
>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> +
>
>    /* Pass to do the PCREL_OPT optimization that combines the load of an
>       external symbol's address along with a single load or store using that
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 1a4fc1df668..f6cf2d673d4 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>  class rtl_opt_pass;
>
>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
> new file mode 100644
> index 00000000000..777c7a5a387
> --- /dev/null
> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> @@ -0,0 +1,361 @@
> +/* Subroutine to eliminate redundant zero extend for power architecture.
> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published
> +   by the Free Software Foundation; either version 3, or (at your
> +   option) any later version.
> +
> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> +   License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with GCC; see the file COPYING3.  If not see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* This pass remove unnecessary zero extension instruction from
> +  power generated assembly. This pass is register after cse
> +  pass.
> +  Identifies the following sequence of instruction after cse
> +  rtl pass.
> +
> +  set compare (subreg)
> +  set if_then_else
> +  set SImode -> QImode
> +  set zero_extend to DImode from QImode
> +  set return value 0 in one path of cfg.
> +  set return value 1 in other path of cfg.
> +
> +  In cfgexpand pass QImode is generated with
> +  bool register value and this pass uses QI
> +  as 64 bit registers.
> +
> +  This pass replace copy operation from QImode to DImode
> +  and return appropriate return values.*/
> +
> +#define IN_TARGET_CODE 1
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "rtl.h"
> +#include "tree.h"
> +#include "memmodel.h"
> +#include "df.h"
> +#include "tm_p.h"
> +#include "ira.h"
> +#include "print-tree.h"
> +#include "varasm.h"
> +#include "explow.h"
> +#include "expr.h"
> +#include "output.h"
> +#include "tree-pass.h"
> +
> +/* This is based on the union-find logic in web.cc.  web_entry_base is
> +   defined in df.h.  */
> +class zext_web_entry : public web_entry_base
> +{
> + public:
> +  /* Pointer to the insn.  */
> +  rtx_insn *insn;
> +  unsigned int is_relevant : 1;
> +  /* Set if insn is a load.  */
> +  unsigned int is_load : 1;
> +  /* Set if insn is a store.  */
> +  unsigned int is_store : 1;
> +  unsigned int is_zext :1 ;
> +  unsigned int is_move :1;
> +  unsigned int is_delete_move :1;
> +  /* Set if this insn should be deleted.  */
> +  unsigned int will_delete : 1;
> +  unsigned int will_delete_chances : 1;
> +};
> +
> +/* Checks if instruction is zero extension
> + * with QIMode to DImode.*/
> +static unsigned int
> +insn_is_zext_p(rtx insn)
> +{
> +  rtx body = PATTERN (insn);
> +
> +  if (GET_CODE (body) == SET
> +      && GET_MODE(SET_DEST (body)) == DImode
> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
> +  {
> +    rtx set = XEXP (SET_SRC (body), 0);
> +
> +    if (REG_P (set))
> +    {
> +      if (GET_MODE (set) == QImode) return 1;
> +    }
> +    else
> +      return 0;
> +  }
> +  return 0;
> +}
> +
> +/* Checks if instruction is SET operation with QImode.*/
> +static unsigned int
> +insn_is_store_p (rtx insn)
> +{
> +  rtx body = PATTERN (insn);
> +  if (GET_CODE (body) == SET
> +      && SUBREG_P(SET_SRC (body))
> +      && !CONST_INT_P(SET_SRC (body))
> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
> +      && GET_MODE(SET_SRC (body)) == QImode)
> +    return 1;
> +
> +  return 0;
> +}
> +
> +/* Find out zero extension removal candidate with use-def web.*/
> +static void
> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
> +                                    rtx insn, df_ref def)
> +{
> +  struct df_link *link = DF_REF_CHAIN (def);
> +
> +  rtx move_insn = NULL_RTX;
> +  rtx compare_insn = NULL_RTX;
> +
> +  while (link)
> +  {
> +    if (!DF_REF_INSN_INFO (link->ref))
> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
> +
> +    if (DF_REF_INSN_INFO (link->ref))
> +      {
> +       rtx use_insn = DF_REF_INSN (link->ref);
> +
> +       if (GET_CODE (PATTERN (use_insn)) == SET
> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
> +         {
> +           if (GET_CODE (PATTERN (insn)) == SET
> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
> +             {
> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
> +
> +               if (SUBREG_P (body))
> +                 {
> +                   compare_insn = use_insn;
> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
> +
> +                   if (compare_insn
> +                       && ((REGNO (XEXP (compare_body, 0)))
> +                               == REGNO (SET_DEST (PATTERN (insn)))))
> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> +                 }
> +              }
> +           }
> +
> +       if (insn_is_store_p(use_insn)
> +           && GET_CODE (PATTERN (insn)) == SET
> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
> +         {
> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
> +             {
> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> +             }
> +         }
> +
> +       if (insn_is_zext_p (insn))
> +         {
> +           if (GET_CODE (PATTERN (use_insn)) == SET
> +               && REG_P (SET_SRC (PATTERN (use_insn))))
> +             {
> +               if (move_insn
> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
> +                 {
> +                   insn_entry[INSN_UID (insn)].is_move = 1;
> +                   break;
> +                 }
> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
> +                   {
> +                     move_insn = use_insn;
> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
> +                   }
> +             }
> +         }
> +
> +       if (insn_is_zext_p (use_insn))
> +         {
> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
> +
> +           if (insn_is_store_p (insn)
> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
> +           {
> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
> +             insn_entry[INSN_UID (insn)].will_delete = 1;
> +             insn_entry[INSN_UID( insn)].is_store = 1;
> +           }
> +
> +          if (NONDEBUG_INSN_P (use_insn))
> +            unionfind_union (insn_entry + INSN_UID (insn),
> +                             insn_entry + INSN_UID (use_insn));
> +       }
> +      }
> +
> +    link = link->next;
> +  }
> +}
> +
> +/* Replace QImode extensions with copy operations.*/
> +static void
> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
> +{
> +  rtx_insn *insn = insn_entry[i].insn;
> +  rtx body = PATTERN (insn);
> +  rtx src_reg;
> +  src_reg = XEXP (SET_SRC (body), 0);
> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
> +
> +  if (GET_MODE(SET_DEST(body)) != DImode)
> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
> +
> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
> +  df_insn_rescan (new_insn);
> +
> +  df_insn_delete (insn);
> +  remove_insn (insn);
> +  insn->set_deleted ();
> +}
> +
> +/* Main entry point for this pass.  */
> +unsigned int
> +rs6000_analyze_zext (function *fun)
> +{
> +  zext_web_entry *insn_entry;
> +  basic_block bb;
> +  rtx_insn *insn, *curr_insn = 0;
> +
> +  /* Dataflow analysis for use-def chains.  */
> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> +  df_analyze ();
> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> +
> +  /* Rebuild ud- and du-chains.  */
> +  df_remove_problem (df_chain);
> +  df_process_deferred_rescans ();
> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> +  df_analyze ();
> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> +
> +  /* Allocate structure to represent webs of insns.  */
> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
> +
> +  /* Walk the insns to gather basic data.  */
> +  FOR_ALL_BB_FN (bb, fun)
> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> +    {
> +      unsigned int uid = INSN_UID (insn);
> +      if (NONDEBUG_INSN_P (insn))
> +       {
> +         insn_entry[uid].insn = insn;
> +
> +         if (GET_CODE (insn) == insn_is_store_p (insn))
> +           {
> +             insn_entry[uid].is_store = 1;
> +             insn_entry[uid].is_relevant = 1;
> +           }
> +
> +         /* Walk the uses and defs to identify the optimization
> +            candidates.*/
> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> +         df_ref mention;
> +
> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
> +           {
> +             insn_entry[uid].is_relevant = 1;
> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
> +           }
> +
> +         if (insn_entry[uid].is_relevant)
> +           {
> +             /* Determine if this is a store.  */
> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> +           }
> +       }
> +     }
> +
> +   unsigned e = get_max_uid (), i;
> +
> +   int store_index = -1;
> +
> +   /* Replace with copy operation.*/
> +   for (i = 0; i < e; ++i)
> +     {
> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
> +        store_index  = i;
> +
> +       if ((store_index != -1)
> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
> +         {
> +           replace_marked_insns (insn_entry, store_index);
> +           replace_marked_insns (insn_entry, i);
> +         }
> +     }
> +    /* Clean up.  */
> +    free (insn_entry);
> +
> +    return 0;
> +}
> +
> +const pass_data pass_data_analyze_zext =
> +{
> +  RTL_PASS, /* type */
> +  "zext", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE, /* tv_id */
> +  0, /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  TODO_df_finish, /* todo_flags_finish */
> +};
> +
> +class pass_analyze_zext : public rtl_opt_pass
> +{
> +public:
> +  pass_analyze_zext(gcc::context *ctxt)
> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
> +  {}
> +
> +  /* opt_pass methods: */
> +  virtual bool gate (function *)
> +    {
> +      return (optimize > 0 );
> +    }
> +
> +  virtual unsigned int execute (function *fun)
> +    {
> +      return rs6000_analyze_zext (fun);
> +    }
> +
> +  opt_pass *clone ()
> +    {
> +      return new pass_analyze_zext (m_ctxt);
> +    }
> +
> +}; // class pass_analyze_zext
> +
> +rtl_opt_pass *
> +make_pass_analyze_zext (gcc::context *ctxt)
> +{
> +  return new pass_analyze_zext (ctxt);
> +}
> +
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 8e0b0d022db..6541334bf2d 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
>                                           bool);
>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>
> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
> +
>  /* Hash table stuff for keeping track of TOC entries.  */
>
>  struct GTY((for_user)) toc_hash_struct
> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
> index f183b42ce1d..c1f61591d2f 100644
> --- a/gcc/config/rs6000/t-rs6000
> +++ b/gcc/config/rs6000/t-rs6000
> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
>         $(COMPILE) $<
>         $(POSTCOMPILE)
>
> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
> +       $(COMPILE) $<
> +       $(POSTCOMPILE)
> +
> +
>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
>         $(COMPILE) $<
>         $(POSTCOMPILE)
> diff --git a/gcc/explow.cc b/gcc/explow.cc
> index 32e9498ee07..316aa975e40 100644
> --- a/gcc/explow.cc
> +++ b/gcc/explow.cc
> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
>    if (! general_operand (x, VOIDmode))
>      x = force_operand (x, temp);
>
> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
> +              || GET_MODE (x) == VOIDmode);
>    if (x != temp)
>      emit_move_insn (temp, x);
>    return temp;
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 15be1c8db99..6162ef92b88 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
>    rtx y_cst = NULL_RTX;
>    rtx_insn *last_insn;
>    rtx set;
> -
>    gcc_assert (mode != BLKmode
> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
> +             && (mode == DImode || GET_MODE (y) == mode
> +             || GET_MODE (y) == VOIDmode));
>
>    /* If we have a copy that looks like one of the following patterns:
>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 4c641cab192..9d22fadc7ef 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
>      input:
>        gcc_assert (mode != VOIDmode);
>        gcc_assert (GET_MODE (op->value) == VOIDmode
> -                 || GET_MODE (op->value) == mode);
> +                 || GET_MODE (op->value) == mode
> +                 || mode == DImode);
>        if (maybe_legitimize_operand_same_code (icode, opno, op))
>         return true;
>
> --
> 2.31.1
>
Ajit Agarwal March 16, 2023, 8:11 a.m. UTC | #2
Hello Richard:

On 16/03/23 1:10 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> Hello All:
>>
>>
>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
> 
> What makes this so special that we cannot deal with it from generic code?
> In particular we do have the REE pass, why is target specific
> knowledge neccessary
> to eliminate the extension?
>

For returning bool values and comparision with integers generates the following by all the rtl passes.
 
set compare (subreg)
set if_then_else
Convert SImode -> QImode
set zero_extend to SImode from QImode
set return value 0 in one path of cfg.
set return value 1 in other path of cfg.

This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.

Thanks & Regards
Ajit
>> +  In cfgexpand pass QImode is generated with
>> +  bool register value and this pass uses QI
>> +  as 64 bit registers.
>> +

>>         rs6000: suboptimal code for returning bool value on target ppc.
>>
>>         New pass to eliminate unnecessary zero extension. This pass
>>         is registered after cse rtl pass.
>>
>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>
>> gcc/ChangeLog:
>>
>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
>>         pass.
>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>>         * config.gcc: Add new executable.
>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
>>         elimination pass.
>>         * config/rs6000/rs6000.cc: Add new prototype for zero
>>         elimination pass.
>>         * config/rs6000/t-rs6000: Add new rule.
>>         * expr.cc: Modified gcc assert.
>>         * explow.cc: Modified gcc assert.
>>         * optabs.cc: Modified gcc assert.
>> ---
>>  gcc/config.gcc                        |   4 +-
>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
>>  gcc/config/rs6000/rs6000.cc           |   2 +
>>  gcc/config/rs6000/t-rs6000            |   5 +
>>  gcc/explow.cc                         |   3 +-
>>  gcc/expr.cc                           |   4 +-
>>  gcc/optabs.cc                         |   3 +-
>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>
>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>> index da3a6d3ba1f..e8ac9d882f0 100644
>> --- a/gcc/config.gcc
>> +++ b/gcc/config.gcc
>> @@ -503,7 +503,7 @@ or1k*-*-*)
>>         ;;
>>  powerpc*-*-*)
>>         cpu_type=rs6000
>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>> @@ -538,7 +538,7 @@ riscv*)
>>         ;;
>>  rs6000*-*-*)
>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
>> index ca899d5f7af..d7500feddf1 100644
>> --- a/gcc/config/rs6000/rs6000-passes.def
>> +++ b/gcc/config/rs6000/rs6000-passes.def
>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>>       The power8 does not have instructions that automaticaly do the byte swaps
>>       for loads and stores.  */
>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
>> +
>>
>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
>>       external symbol's address along with a single load or store using that
>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>> index 1a4fc1df668..f6cf2d673d4 100644
>> --- a/gcc/config/rs6000/rs6000-protos.h
>> +++ b/gcc/config/rs6000/rs6000-protos.h
>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>>  class rtl_opt_pass;
>>
>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
>> new file mode 100644
>> index 00000000000..777c7a5a387
>> --- /dev/null
>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
>> @@ -0,0 +1,361 @@
>> +/* Subroutine to eliminate redundant zero extend for power architecture.
>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
>> +
>> +   This file is part of GCC.
>> +
>> +   GCC is free software; you can redistribute it and/or modify it
>> +   under the terms of the GNU General Public License as published
>> +   by the Free Software Foundation; either version 3, or (at your
>> +   option) any later version.
>> +
>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>> +   License for more details.
>> +
>> +   You should have received a copy of the GNU General Public License
>> +   along with GCC; see the file COPYING3.  If not see
>> +   <http://www.gnu.org/licenses/>.  */
>> +
>> +/* This pass remove unnecessary zero extension instruction from
>> +  power generated assembly. This pass is register after cse
>> +  pass.
>> +  Identifies the following sequence of instruction after cse
>> +  rtl pass.
>> +
>> +  set compare (subreg)
>> +  set if_then_else
>> +  set SImode -> QImode
>> +  set zero_extend to DImode from QImode
>> +  set return value 0 in one path of cfg.
>> +  set return value 1 in other path of cfg.
>> +
>> +  In cfgexpand pass QImode is generated with
>> +  bool register value and this pass uses QI
>> +  as 64 bit registers.
>> +
>> +  This pass replace copy operation from QImode to DImode
>> +  and return appropriate return values.*/
>> +
>> +#define IN_TARGET_CODE 1
>> +
>> +#include "config.h"
>> +#include "system.h"
>> +#include "coretypes.h"
>> +#include "backend.h"
>> +#include "rtl.h"
>> +#include "tree.h"
>> +#include "memmodel.h"
>> +#include "df.h"
>> +#include "tm_p.h"
>> +#include "ira.h"
>> +#include "print-tree.h"
>> +#include "varasm.h"
>> +#include "explow.h"
>> +#include "expr.h"
>> +#include "output.h"
>> +#include "tree-pass.h"
>> +
>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
>> +   defined in df.h.  */
>> +class zext_web_entry : public web_entry_base
>> +{
>> + public:
>> +  /* Pointer to the insn.  */
>> +  rtx_insn *insn;
>> +  unsigned int is_relevant : 1;
>> +  /* Set if insn is a load.  */
>> +  unsigned int is_load : 1;
>> +  /* Set if insn is a store.  */
>> +  unsigned int is_store : 1;
>> +  unsigned int is_zext :1 ;
>> +  unsigned int is_move :1;
>> +  unsigned int is_delete_move :1;
>> +  /* Set if this insn should be deleted.  */
>> +  unsigned int will_delete : 1;
>> +  unsigned int will_delete_chances : 1;
>> +};
>> +
>> +/* Checks if instruction is zero extension
>> + * with QIMode to DImode.*/
>> +static unsigned int
>> +insn_is_zext_p(rtx insn)
>> +{
>> +  rtx body = PATTERN (insn);
>> +
>> +  if (GET_CODE (body) == SET
>> +      && GET_MODE(SET_DEST (body)) == DImode
>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
>> +  {
>> +    rtx set = XEXP (SET_SRC (body), 0);
>> +
>> +    if (REG_P (set))
>> +    {
>> +      if (GET_MODE (set) == QImode) return 1;
>> +    }
>> +    else
>> +      return 0;
>> +  }
>> +  return 0;
>> +}
>> +
>> +/* Checks if instruction is SET operation with QImode.*/
>> +static unsigned int
>> +insn_is_store_p (rtx insn)
>> +{
>> +  rtx body = PATTERN (insn);
>> +  if (GET_CODE (body) == SET
>> +      && SUBREG_P(SET_SRC (body))
>> +      && !CONST_INT_P(SET_SRC (body))
>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
>> +      && GET_MODE(SET_SRC (body)) == QImode)
>> +    return 1;
>> +
>> +  return 0;
>> +}
>> +
>> +/* Find out zero extension removal candidate with use-def web.*/
>> +static void
>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
>> +                                    rtx insn, df_ref def)
>> +{
>> +  struct df_link *link = DF_REF_CHAIN (def);
>> +
>> +  rtx move_insn = NULL_RTX;
>> +  rtx compare_insn = NULL_RTX;
>> +
>> +  while (link)
>> +  {
>> +    if (!DF_REF_INSN_INFO (link->ref))
>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
>> +
>> +    if (DF_REF_INSN_INFO (link->ref))
>> +      {
>> +       rtx use_insn = DF_REF_INSN (link->ref);
>> +
>> +       if (GET_CODE (PATTERN (use_insn)) == SET
>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
>> +         {
>> +           if (GET_CODE (PATTERN (insn)) == SET
>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
>> +             {
>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
>> +
>> +               if (SUBREG_P (body))
>> +                 {
>> +                   compare_insn = use_insn;
>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
>> +
>> +                   if (compare_insn
>> +                       && ((REGNO (XEXP (compare_body, 0)))
>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>> +                 }
>> +              }
>> +           }
>> +
>> +       if (insn_is_store_p(use_insn)
>> +           && GET_CODE (PATTERN (insn)) == SET
>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
>> +         {
>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
>> +             {
>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>> +             }
>> +         }
>> +
>> +       if (insn_is_zext_p (insn))
>> +         {
>> +           if (GET_CODE (PATTERN (use_insn)) == SET
>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
>> +             {
>> +               if (move_insn
>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
>> +                 {
>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
>> +                   break;
>> +                 }
>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
>> +                   {
>> +                     move_insn = use_insn;
>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
>> +                   }
>> +             }
>> +         }
>> +
>> +       if (insn_is_zext_p (use_insn))
>> +         {
>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
>> +
>> +           if (insn_is_store_p (insn)
>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
>> +           {
>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
>> +             insn_entry[INSN_UID( insn)].is_store = 1;
>> +           }
>> +
>> +          if (NONDEBUG_INSN_P (use_insn))
>> +            unionfind_union (insn_entry + INSN_UID (insn),
>> +                             insn_entry + INSN_UID (use_insn));
>> +       }
>> +      }
>> +
>> +    link = link->next;
>> +  }
>> +}
>> +
>> +/* Replace QImode extensions with copy operations.*/
>> +static void
>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
>> +{
>> +  rtx_insn *insn = insn_entry[i].insn;
>> +  rtx body = PATTERN (insn);
>> +  rtx src_reg;
>> +  src_reg = XEXP (SET_SRC (body), 0);
>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
>> +
>> +  if (GET_MODE(SET_DEST(body)) != DImode)
>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
>> +
>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
>> +  df_insn_rescan (new_insn);
>> +
>> +  df_insn_delete (insn);
>> +  remove_insn (insn);
>> +  insn->set_deleted ();
>> +}
>> +
>> +/* Main entry point for this pass.  */
>> +unsigned int
>> +rs6000_analyze_zext (function *fun)
>> +{
>> +  zext_web_entry *insn_entry;
>> +  basic_block bb;
>> +  rtx_insn *insn, *curr_insn = 0;
>> +
>> +  /* Dataflow analysis for use-def chains.  */
>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>> +  df_analyze ();
>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>> +
>> +  /* Rebuild ud- and du-chains.  */
>> +  df_remove_problem (df_chain);
>> +  df_process_deferred_rescans ();
>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>> +  df_analyze ();
>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>> +
>> +  /* Allocate structure to represent webs of insns.  */
>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
>> +
>> +  /* Walk the insns to gather basic data.  */
>> +  FOR_ALL_BB_FN (bb, fun)
>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
>> +    {
>> +      unsigned int uid = INSN_UID (insn);
>> +      if (NONDEBUG_INSN_P (insn))
>> +       {
>> +         insn_entry[uid].insn = insn;
>> +
>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
>> +           {
>> +             insn_entry[uid].is_store = 1;
>> +             insn_entry[uid].is_relevant = 1;
>> +           }
>> +
>> +         /* Walk the uses and defs to identify the optimization
>> +            candidates.*/
>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
>> +         df_ref mention;
>> +
>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
>> +           {
>> +             insn_entry[uid].is_relevant = 1;
>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
>> +           }
>> +
>> +         if (insn_entry[uid].is_relevant)
>> +           {
>> +             /* Determine if this is a store.  */
>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>> +           }
>> +       }
>> +     }
>> +
>> +   unsigned e = get_max_uid (), i;
>> +
>> +   int store_index = -1;
>> +
>> +   /* Replace with copy operation.*/
>> +   for (i = 0; i < e; ++i)
>> +     {
>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
>> +        store_index  = i;
>> +
>> +       if ((store_index != -1)
>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
>> +         {
>> +           replace_marked_insns (insn_entry, store_index);
>> +           replace_marked_insns (insn_entry, i);
>> +         }
>> +     }
>> +    /* Clean up.  */
>> +    free (insn_entry);
>> +
>> +    return 0;
>> +}
>> +
>> +const pass_data pass_data_analyze_zext =
>> +{
>> +  RTL_PASS, /* type */
>> +  "zext", /* name */
>> +  OPTGROUP_NONE, /* optinfo_flags */
>> +  TV_NONE, /* tv_id */
>> +  0, /* properties_required */
>> +  0, /* properties_provided */
>> +  0, /* properties_destroyed */
>> +  0, /* todo_flags_start */
>> +  TODO_df_finish, /* todo_flags_finish */
>> +};
>> +
>> +class pass_analyze_zext : public rtl_opt_pass
>> +{
>> +public:
>> +  pass_analyze_zext(gcc::context *ctxt)
>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
>> +  {}
>> +
>> +  /* opt_pass methods: */
>> +  virtual bool gate (function *)
>> +    {
>> +      return (optimize > 0 );
>> +    }
>> +
>> +  virtual unsigned int execute (function *fun)
>> +    {
>> +      return rs6000_analyze_zext (fun);
>> +    }
>> +
>> +  opt_pass *clone ()
>> +    {
>> +      return new pass_analyze_zext (m_ctxt);
>> +    }
>> +
>> +}; // class pass_analyze_zext
>> +
>> +rtl_opt_pass *
>> +make_pass_analyze_zext (gcc::context *ctxt)
>> +{
>> +  return new pass_analyze_zext (ctxt);
>> +}
>> +
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 8e0b0d022db..6541334bf2d 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
>>                                           bool);
>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>>
>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
>> +
>>  /* Hash table stuff for keeping track of TOC entries.  */
>>
>>  struct GTY((for_user)) toc_hash_struct
>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
>> index f183b42ce1d..c1f61591d2f 100644
>> --- a/gcc/config/rs6000/t-rs6000
>> +++ b/gcc/config/rs6000/t-rs6000
>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
>>         $(COMPILE) $<
>>         $(POSTCOMPILE)
>>
>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
>> +       $(COMPILE) $<
>> +       $(POSTCOMPILE)
>> +
>> +
>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
>>         $(COMPILE) $<
>>         $(POSTCOMPILE)
>> diff --git a/gcc/explow.cc b/gcc/explow.cc
>> index 32e9498ee07..316aa975e40 100644
>> --- a/gcc/explow.cc
>> +++ b/gcc/explow.cc
>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
>>    if (! general_operand (x, VOIDmode))
>>      x = force_operand (x, temp);
>>
>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
>> +              || GET_MODE (x) == VOIDmode);
>>    if (x != temp)
>>      emit_move_insn (temp, x);
>>    return temp;
>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>> index 15be1c8db99..6162ef92b88 100644
>> --- a/gcc/expr.cc
>> +++ b/gcc/expr.cc
>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
>>    rtx y_cst = NULL_RTX;
>>    rtx_insn *last_insn;
>>    rtx set;
>> -
>>    gcc_assert (mode != BLKmode
>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
>> +             && (mode == DImode || GET_MODE (y) == mode
>> +             || GET_MODE (y) == VOIDmode));
>>
>>    /* If we have a copy that looks like one of the following patterns:
>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
>> index 4c641cab192..9d22fadc7ef 100644
>> --- a/gcc/optabs.cc
>> +++ b/gcc/optabs.cc
>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
>>      input:
>>        gcc_assert (mode != VOIDmode);
>>        gcc_assert (GET_MODE (op->value) == VOIDmode
>> -                 || GET_MODE (op->value) == mode);
>> +                 || GET_MODE (op->value) == mode
>> +                 || mode == DImode);
>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
>>         return true;
>>
>> --
>> 2.31.1
>>
Richard Biener March 16, 2023, 8:14 a.m. UTC | #3
On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
> Hello Richard:
>
> On 16/03/23 1:10 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> >>
> >> Hello All:
> >>
> >>
> >> This patch eliminates unnecessary zero extension instruction from power generated assembly.
> >> Bootstrapped and regtested on powerpc64-linux-gnu.
> >
> > What makes this so special that we cannot deal with it from generic code?
> > In particular we do have the REE pass, why is target specific
> > knowledge neccessary
> > to eliminate the extension?
> >
>
> For returning bool values and comparision with integers generates the following by all the rtl passes.
>
> set compare (subreg)
> set if_then_else
> Convert SImode -> QImode
> set zero_extend to SImode from QImode
> set return value 0 in one path of cfg.
> set return value 1 in other path of cfg.
>
> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.

Sorry, I can't parse that - as there's no testcase with the patch I
cannot even try to see what the actual RTL
looks like (without the pass).

Richard.

> Thanks & Regards
> Ajit
> >> +  In cfgexpand pass QImode is generated with
> >> +  bool register value and this pass uses QI
> >> +  as 64 bit registers.
> >> +
>
> >>         rs6000: suboptimal code for returning bool value on target ppc.
> >>
> >>         New pass to eliminate unnecessary zero extension. This pass
> >>         is registered after cse rtl pass.
> >>
> >>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> >>
> >> gcc/ChangeLog:
> >>
> >>         * config/rs6000/rs6000-passes.def: Registered zero elimination
> >>         pass.
> >>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
> >>         * config.gcc: Add new executable.
> >>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
> >>         elimination pass.
> >>         * config/rs6000/rs6000.cc: Add new prototype for zero
> >>         elimination pass.
> >>         * config/rs6000/t-rs6000: Add new rule.
> >>         * expr.cc: Modified gcc assert.
> >>         * explow.cc: Modified gcc assert.
> >>         * optabs.cc: Modified gcc assert.
> >> ---
> >>  gcc/config.gcc                        |   4 +-
> >>  gcc/config/rs6000/rs6000-passes.def   |   2 +
> >>  gcc/config/rs6000/rs6000-protos.h     |   1 +
> >>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
> >>  gcc/config/rs6000/rs6000.cc           |   2 +
> >>  gcc/config/rs6000/t-rs6000            |   5 +
> >>  gcc/explow.cc                         |   3 +-
> >>  gcc/expr.cc                           |   4 +-
> >>  gcc/optabs.cc                         |   3 +-
> >>  9 files changed, 379 insertions(+), 6 deletions(-)
> >>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> >>
> >> diff --git a/gcc/config.gcc b/gcc/config.gcc
> >> index da3a6d3ba1f..e8ac9d882f0 100644
> >> --- a/gcc/config.gcc
> >> +++ b/gcc/config.gcc
> >> @@ -503,7 +503,7 @@ or1k*-*-*)
> >>         ;;
> >>  powerpc*-*-*)
> >>         cpu_type=rs6000
> >> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
> >>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> >> @@ -538,7 +538,7 @@ riscv*)
> >>         ;;
> >>  rs6000*-*-*)
> >>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> >> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
> >>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> >> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
> >> index ca899d5f7af..d7500feddf1 100644
> >> --- a/gcc/config/rs6000/rs6000-passes.def
> >> +++ b/gcc/config/rs6000/rs6000-passes.def
> >> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
> >>       The power8 does not have instructions that automaticaly do the byte swaps
> >>       for loads and stores.  */
> >>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> >> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> >> +
> >>
> >>    /* Pass to do the PCREL_OPT optimization that combines the load of an
> >>       external symbol's address along with a single load or store using that
> >> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> >> index 1a4fc1df668..f6cf2d673d4 100644
> >> --- a/gcc/config/rs6000/rs6000-protos.h
> >> +++ b/gcc/config/rs6000/rs6000-protos.h
> >> @@ -340,6 +340,7 @@ namespace gcc { class context; }
> >>  class rtl_opt_pass;
> >>
> >>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> >> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
> >>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
> >>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
> >>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> >> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
> >> new file mode 100644
> >> index 00000000000..777c7a5a387
> >> --- /dev/null
> >> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> >> @@ -0,0 +1,361 @@
> >> +/* Subroutine to eliminate redundant zero extend for power architecture.
> >> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> >> +
> >> +   This file is part of GCC.
> >> +
> >> +   GCC is free software; you can redistribute it and/or modify it
> >> +   under the terms of the GNU General Public License as published
> >> +   by the Free Software Foundation; either version 3, or (at your
> >> +   option) any later version.
> >> +
> >> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> >> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> >> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> >> +   License for more details.
> >> +
> >> +   You should have received a copy of the GNU General Public License
> >> +   along with GCC; see the file COPYING3.  If not see
> >> +   <http://www.gnu.org/licenses/>.  */
> >> +
> >> +/* This pass remove unnecessary zero extension instruction from
> >> +  power generated assembly. This pass is register after cse
> >> +  pass.
> >> +  Identifies the following sequence of instruction after cse
> >> +  rtl pass.
> >> +
> >> +  set compare (subreg)
> >> +  set if_then_else
> >> +  set SImode -> QImode
> >> +  set zero_extend to DImode from QImode
> >> +  set return value 0 in one path of cfg.
> >> +  set return value 1 in other path of cfg.
> >> +
> >> +  In cfgexpand pass QImode is generated with
> >> +  bool register value and this pass uses QI
> >> +  as 64 bit registers.
> >> +
> >> +  This pass replace copy operation from QImode to DImode
> >> +  and return appropriate return values.*/
> >> +
> >> +#define IN_TARGET_CODE 1
> >> +
> >> +#include "config.h"
> >> +#include "system.h"
> >> +#include "coretypes.h"
> >> +#include "backend.h"
> >> +#include "rtl.h"
> >> +#include "tree.h"
> >> +#include "memmodel.h"
> >> +#include "df.h"
> >> +#include "tm_p.h"
> >> +#include "ira.h"
> >> +#include "print-tree.h"
> >> +#include "varasm.h"
> >> +#include "explow.h"
> >> +#include "expr.h"
> >> +#include "output.h"
> >> +#include "tree-pass.h"
> >> +
> >> +/* This is based on the union-find logic in web.cc.  web_entry_base is
> >> +   defined in df.h.  */
> >> +class zext_web_entry : public web_entry_base
> >> +{
> >> + public:
> >> +  /* Pointer to the insn.  */
> >> +  rtx_insn *insn;
> >> +  unsigned int is_relevant : 1;
> >> +  /* Set if insn is a load.  */
> >> +  unsigned int is_load : 1;
> >> +  /* Set if insn is a store.  */
> >> +  unsigned int is_store : 1;
> >> +  unsigned int is_zext :1 ;
> >> +  unsigned int is_move :1;
> >> +  unsigned int is_delete_move :1;
> >> +  /* Set if this insn should be deleted.  */
> >> +  unsigned int will_delete : 1;
> >> +  unsigned int will_delete_chances : 1;
> >> +};
> >> +
> >> +/* Checks if instruction is zero extension
> >> + * with QIMode to DImode.*/
> >> +static unsigned int
> >> +insn_is_zext_p(rtx insn)
> >> +{
> >> +  rtx body = PATTERN (insn);
> >> +
> >> +  if (GET_CODE (body) == SET
> >> +      && GET_MODE(SET_DEST (body)) == DImode
> >> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
> >> +  {
> >> +    rtx set = XEXP (SET_SRC (body), 0);
> >> +
> >> +    if (REG_P (set))
> >> +    {
> >> +      if (GET_MODE (set) == QImode) return 1;
> >> +    }
> >> +    else
> >> +      return 0;
> >> +  }
> >> +  return 0;
> >> +}
> >> +
> >> +/* Checks if instruction is SET operation with QImode.*/
> >> +static unsigned int
> >> +insn_is_store_p (rtx insn)
> >> +{
> >> +  rtx body = PATTERN (insn);
> >> +  if (GET_CODE (body) == SET
> >> +      && SUBREG_P(SET_SRC (body))
> >> +      && !CONST_INT_P(SET_SRC (body))
> >> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
> >> +      && GET_MODE(SET_SRC (body)) == QImode)
> >> +    return 1;
> >> +
> >> +  return 0;
> >> +}
> >> +
> >> +/* Find out zero extension removal candidate with use-def web.*/
> >> +static void
> >> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
> >> +                                    rtx insn, df_ref def)
> >> +{
> >> +  struct df_link *link = DF_REF_CHAIN (def);
> >> +
> >> +  rtx move_insn = NULL_RTX;
> >> +  rtx compare_insn = NULL_RTX;
> >> +
> >> +  while (link)
> >> +  {
> >> +    if (!DF_REF_INSN_INFO (link->ref))
> >> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
> >> +
> >> +    if (DF_REF_INSN_INFO (link->ref))
> >> +      {
> >> +       rtx use_insn = DF_REF_INSN (link->ref);
> >> +
> >> +       if (GET_CODE (PATTERN (use_insn)) == SET
> >> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
> >> +         {
> >> +           if (GET_CODE (PATTERN (insn)) == SET
> >> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
> >> +             {
> >> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
> >> +
> >> +               if (SUBREG_P (body))
> >> +                 {
> >> +                   compare_insn = use_insn;
> >> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
> >> +
> >> +                   if (compare_insn
> >> +                       && ((REGNO (XEXP (compare_body, 0)))
> >> +                               == REGNO (SET_DEST (PATTERN (insn)))))
> >> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >> +                 }
> >> +              }
> >> +           }
> >> +
> >> +       if (insn_is_store_p(use_insn)
> >> +           && GET_CODE (PATTERN (insn)) == SET
> >> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
> >> +         {
> >> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
> >> +             {
> >> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
> >> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >> +             }
> >> +         }
> >> +
> >> +       if (insn_is_zext_p (insn))
> >> +         {
> >> +           if (GET_CODE (PATTERN (use_insn)) == SET
> >> +               && REG_P (SET_SRC (PATTERN (use_insn))))
> >> +             {
> >> +               if (move_insn
> >> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
> >> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
> >> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
> >> +                 {
> >> +                   insn_entry[INSN_UID (insn)].is_move = 1;
> >> +                   break;
> >> +                 }
> >> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
> >> +                   {
> >> +                     move_insn = use_insn;
> >> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
> >> +                   }
> >> +             }
> >> +         }
> >> +
> >> +       if (insn_is_zext_p (use_insn))
> >> +         {
> >> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
> >> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
> >> +
> >> +           if (insn_is_store_p (insn)
> >> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
> >> +           {
> >> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
> >> +             insn_entry[INSN_UID (insn)].will_delete = 1;
> >> +             insn_entry[INSN_UID( insn)].is_store = 1;
> >> +           }
> >> +
> >> +          if (NONDEBUG_INSN_P (use_insn))
> >> +            unionfind_union (insn_entry + INSN_UID (insn),
> >> +                             insn_entry + INSN_UID (use_insn));
> >> +       }
> >> +      }
> >> +
> >> +    link = link->next;
> >> +  }
> >> +}
> >> +
> >> +/* Replace QImode extensions with copy operations.*/
> >> +static void
> >> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
> >> +{
> >> +  rtx_insn *insn = insn_entry[i].insn;
> >> +  rtx body = PATTERN (insn);
> >> +  rtx src_reg;
> >> +  src_reg = XEXP (SET_SRC (body), 0);
> >> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
> >> +
> >> +  if (GET_MODE(SET_DEST(body)) != DImode)
> >> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
> >> +
> >> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
> >> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
> >> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
> >> +  df_insn_rescan (new_insn);
> >> +
> >> +  df_insn_delete (insn);
> >> +  remove_insn (insn);
> >> +  insn->set_deleted ();
> >> +}
> >> +
> >> +/* Main entry point for this pass.  */
> >> +unsigned int
> >> +rs6000_analyze_zext (function *fun)
> >> +{
> >> +  zext_web_entry *insn_entry;
> >> +  basic_block bb;
> >> +  rtx_insn *insn, *curr_insn = 0;
> >> +
> >> +  /* Dataflow analysis for use-def chains.  */
> >> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >> +  df_analyze ();
> >> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >> +
> >> +  /* Rebuild ud- and du-chains.  */
> >> +  df_remove_problem (df_chain);
> >> +  df_process_deferred_rescans ();
> >> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >> +  df_analyze ();
> >> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >> +
> >> +  /* Allocate structure to represent webs of insns.  */
> >> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
> >> +
> >> +  /* Walk the insns to gather basic data.  */
> >> +  FOR_ALL_BB_FN (bb, fun)
> >> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> >> +    {
> >> +      unsigned int uid = INSN_UID (insn);
> >> +      if (NONDEBUG_INSN_P (insn))
> >> +       {
> >> +         insn_entry[uid].insn = insn;
> >> +
> >> +         if (GET_CODE (insn) == insn_is_store_p (insn))
> >> +           {
> >> +             insn_entry[uid].is_store = 1;
> >> +             insn_entry[uid].is_relevant = 1;
> >> +           }
> >> +
> >> +         /* Walk the uses and defs to identify the optimization
> >> +            candidates.*/
> >> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> >> +         df_ref mention;
> >> +
> >> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
> >> +           {
> >> +             insn_entry[uid].is_relevant = 1;
> >> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
> >> +           }
> >> +
> >> +         if (insn_entry[uid].is_relevant)
> >> +           {
> >> +             /* Determine if this is a store.  */
> >> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >> +           }
> >> +       }
> >> +     }
> >> +
> >> +   unsigned e = get_max_uid (), i;
> >> +
> >> +   int store_index = -1;
> >> +
> >> +   /* Replace with copy operation.*/
> >> +   for (i = 0; i < e; ++i)
> >> +     {
> >> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
> >> +        store_index  = i;
> >> +
> >> +       if ((store_index != -1)
> >> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
> >> +         {
> >> +           replace_marked_insns (insn_entry, store_index);
> >> +           replace_marked_insns (insn_entry, i);
> >> +         }
> >> +     }
> >> +    /* Clean up.  */
> >> +    free (insn_entry);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +const pass_data pass_data_analyze_zext =
> >> +{
> >> +  RTL_PASS, /* type */
> >> +  "zext", /* name */
> >> +  OPTGROUP_NONE, /* optinfo_flags */
> >> +  TV_NONE, /* tv_id */
> >> +  0, /* properties_required */
> >> +  0, /* properties_provided */
> >> +  0, /* properties_destroyed */
> >> +  0, /* todo_flags_start */
> >> +  TODO_df_finish, /* todo_flags_finish */
> >> +};
> >> +
> >> +class pass_analyze_zext : public rtl_opt_pass
> >> +{
> >> +public:
> >> +  pass_analyze_zext(gcc::context *ctxt)
> >> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
> >> +  {}
> >> +
> >> +  /* opt_pass methods: */
> >> +  virtual bool gate (function *)
> >> +    {
> >> +      return (optimize > 0 );
> >> +    }
> >> +
> >> +  virtual unsigned int execute (function *fun)
> >> +    {
> >> +      return rs6000_analyze_zext (fun);
> >> +    }
> >> +
> >> +  opt_pass *clone ()
> >> +    {
> >> +      return new pass_analyze_zext (m_ctxt);
> >> +    }
> >> +
> >> +}; // class pass_analyze_zext
> >> +
> >> +rtl_opt_pass *
> >> +make_pass_analyze_zext (gcc::context *ctxt)
> >> +{
> >> +  return new pass_analyze_zext (ctxt);
> >> +}
> >> +
> >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> >> index 8e0b0d022db..6541334bf2d 100644
> >> --- a/gcc/config/rs6000/rs6000.cc
> >> +++ b/gcc/config/rs6000/rs6000.cc
> >> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
> >>                                           bool);
> >>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
> >>
> >> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
> >> +
> >>  /* Hash table stuff for keeping track of TOC entries.  */
> >>
> >>  struct GTY((for_user)) toc_hash_struct
> >> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
> >> index f183b42ce1d..c1f61591d2f 100644
> >> --- a/gcc/config/rs6000/t-rs6000
> >> +++ b/gcc/config/rs6000/t-rs6000
> >> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
> >>         $(COMPILE) $<
> >>         $(POSTCOMPILE)
> >>
> >> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
> >> +       $(COMPILE) $<
> >> +       $(POSTCOMPILE)
> >> +
> >> +
> >>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
> >>         $(COMPILE) $<
> >>         $(POSTCOMPILE)
> >> diff --git a/gcc/explow.cc b/gcc/explow.cc
> >> index 32e9498ee07..316aa975e40 100644
> >> --- a/gcc/explow.cc
> >> +++ b/gcc/explow.cc
> >> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
> >>    if (! general_operand (x, VOIDmode))
> >>      x = force_operand (x, temp);
> >>
> >> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
> >> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
> >> +              || GET_MODE (x) == VOIDmode);
> >>    if (x != temp)
> >>      emit_move_insn (temp, x);
> >>    return temp;
> >> diff --git a/gcc/expr.cc b/gcc/expr.cc
> >> index 15be1c8db99..6162ef92b88 100644
> >> --- a/gcc/expr.cc
> >> +++ b/gcc/expr.cc
> >> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
> >>    rtx y_cst = NULL_RTX;
> >>    rtx_insn *last_insn;
> >>    rtx set;
> >> -
> >>    gcc_assert (mode != BLKmode
> >> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
> >> +             && (mode == DImode || GET_MODE (y) == mode
> >> +             || GET_MODE (y) == VOIDmode));
> >>
> >>    /* If we have a copy that looks like one of the following patterns:
> >>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
> >> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> >> index 4c641cab192..9d22fadc7ef 100644
> >> --- a/gcc/optabs.cc
> >> +++ b/gcc/optabs.cc
> >> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
> >>      input:
> >>        gcc_assert (mode != VOIDmode);
> >>        gcc_assert (GET_MODE (op->value) == VOIDmode
> >> -                 || GET_MODE (op->value) == mode);
> >> +                 || GET_MODE (op->value) == mode
> >> +                 || mode == DImode);
> >>        if (maybe_legitimize_operand_same_code (icode, opno, op))
> >>         return true;
> >>
> >> --
> >> 2.31.1
> >>
Ajit Agarwal March 16, 2023, 8:19 a.m. UTC | #4
On 16/03/23 1:44 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>> Hello Richard:
>>
>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>> <gcc-patches@gcc.gnu.org> wrote:
>>>>
>>>> Hello All:
>>>>
>>>>
>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>
>>> What makes this so special that we cannot deal with it from generic code?
>>> In particular we do have the REE pass, why is target specific
>>> knowledge neccessary
>>> to eliminate the extension?
>>>
>>
>> For returning bool values and comparision with integers generates the following by all the rtl passes.
>>
>> set compare (subreg)
>> set if_then_else
>> Convert SImode -> QImode
>> set zero_extend to SImode from QImode
>> set return value 0 in one path of cfg.
>> set return value 1 in other path of cfg.
>>
>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
> 
> Sorry, I can't parse that - as there's no testcase with the patch I
> cannot even try to see what the actual RTL
> looks like (without the pass).
> 

Here is the PR with bugzilla. 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784

I can add the attached testcase with this PR in the patch.

Thanks & Regards
Ajit 
> Richard.
> 
>> Thanks & Regards
>> Ajit
>>>> +  In cfgexpand pass QImode is generated with
>>>> +  bool register value and this pass uses QI
>>>> +  as 64 bit registers.
>>>> +
>>
>>>>         rs6000: suboptimal code for returning bool value on target ppc.
>>>>
>>>>         New pass to eliminate unnecessary zero extension. This pass
>>>>         is registered after cse rtl pass.
>>>>
>>>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
>>>>         pass.
>>>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>>>>         * config.gcc: Add new executable.
>>>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
>>>>         elimination pass.
>>>>         * config/rs6000/rs6000.cc: Add new prototype for zero
>>>>         elimination pass.
>>>>         * config/rs6000/t-rs6000: Add new rule.
>>>>         * expr.cc: Modified gcc assert.
>>>>         * explow.cc: Modified gcc assert.
>>>>         * optabs.cc: Modified gcc assert.
>>>> ---
>>>>  gcc/config.gcc                        |   4 +-
>>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
>>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
>>>>  gcc/config/rs6000/rs6000.cc           |   2 +
>>>>  gcc/config/rs6000/t-rs6000            |   5 +
>>>>  gcc/explow.cc                         |   3 +-
>>>>  gcc/expr.cc                           |   4 +-
>>>>  gcc/optabs.cc                         |   3 +-
>>>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>>>
>>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>>>> index da3a6d3ba1f..e8ac9d882f0 100644
>>>> --- a/gcc/config.gcc
>>>> +++ b/gcc/config.gcc
>>>> @@ -503,7 +503,7 @@ or1k*-*-*)
>>>>         ;;
>>>>  powerpc*-*-*)
>>>>         cpu_type=rs6000
>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>>>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>>>> @@ -538,7 +538,7 @@ riscv*)
>>>>         ;;
>>>>  rs6000*-*-*)
>>>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>>>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
>>>> index ca899d5f7af..d7500feddf1 100644
>>>> --- a/gcc/config/rs6000/rs6000-passes.def
>>>> +++ b/gcc/config/rs6000/rs6000-passes.def
>>>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>>>>       The power8 does not have instructions that automaticaly do the byte swaps
>>>>       for loads and stores.  */
>>>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
>>>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
>>>> +
>>>>
>>>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
>>>>       external symbol's address along with a single load or store using that
>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>>> index 1a4fc1df668..f6cf2d673d4 100644
>>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>>>>  class rtl_opt_pass;
>>>>
>>>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
>>>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>>>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>>>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>>>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
>>>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>> new file mode 100644
>>>> index 00000000000..777c7a5a387
>>>> --- /dev/null
>>>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>> @@ -0,0 +1,361 @@
>>>> +/* Subroutine to eliminate redundant zero extend for power architecture.
>>>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
>>>> +
>>>> +   This file is part of GCC.
>>>> +
>>>> +   GCC is free software; you can redistribute it and/or modify it
>>>> +   under the terms of the GNU General Public License as published
>>>> +   by the Free Software Foundation; either version 3, or (at your
>>>> +   option) any later version.
>>>> +
>>>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
>>>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
>>>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>> +   License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU General Public License
>>>> +   along with GCC; see the file COPYING3.  If not see
>>>> +   <http://www.gnu.org/licenses/>.  */
>>>> +
>>>> +/* This pass remove unnecessary zero extension instruction from
>>>> +  power generated assembly. This pass is register after cse
>>>> +  pass.
>>>> +  Identifies the following sequence of instruction after cse
>>>> +  rtl pass.
>>>> +
>>>> +  set compare (subreg)
>>>> +  set if_then_else
>>>> +  set SImode -> QImode
>>>> +  set zero_extend to DImode from QImode
>>>> +  set return value 0 in one path of cfg.
>>>> +  set return value 1 in other path of cfg.
>>>> +
>>>> +  In cfgexpand pass QImode is generated with
>>>> +  bool register value and this pass uses QI
>>>> +  as 64 bit registers.
>>>> +
>>>> +  This pass replace copy operation from QImode to DImode
>>>> +  and return appropriate return values.*/
>>>> +
>>>> +#define IN_TARGET_CODE 1
>>>> +
>>>> +#include "config.h"
>>>> +#include "system.h"
>>>> +#include "coretypes.h"
>>>> +#include "backend.h"
>>>> +#include "rtl.h"
>>>> +#include "tree.h"
>>>> +#include "memmodel.h"
>>>> +#include "df.h"
>>>> +#include "tm_p.h"
>>>> +#include "ira.h"
>>>> +#include "print-tree.h"
>>>> +#include "varasm.h"
>>>> +#include "explow.h"
>>>> +#include "expr.h"
>>>> +#include "output.h"
>>>> +#include "tree-pass.h"
>>>> +
>>>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
>>>> +   defined in df.h.  */
>>>> +class zext_web_entry : public web_entry_base
>>>> +{
>>>> + public:
>>>> +  /* Pointer to the insn.  */
>>>> +  rtx_insn *insn;
>>>> +  unsigned int is_relevant : 1;
>>>> +  /* Set if insn is a load.  */
>>>> +  unsigned int is_load : 1;
>>>> +  /* Set if insn is a store.  */
>>>> +  unsigned int is_store : 1;
>>>> +  unsigned int is_zext :1 ;
>>>> +  unsigned int is_move :1;
>>>> +  unsigned int is_delete_move :1;
>>>> +  /* Set if this insn should be deleted.  */
>>>> +  unsigned int will_delete : 1;
>>>> +  unsigned int will_delete_chances : 1;
>>>> +};
>>>> +
>>>> +/* Checks if instruction is zero extension
>>>> + * with QIMode to DImode.*/
>>>> +static unsigned int
>>>> +insn_is_zext_p(rtx insn)
>>>> +{
>>>> +  rtx body = PATTERN (insn);
>>>> +
>>>> +  if (GET_CODE (body) == SET
>>>> +      && GET_MODE(SET_DEST (body)) == DImode
>>>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
>>>> +  {
>>>> +    rtx set = XEXP (SET_SRC (body), 0);
>>>> +
>>>> +    if (REG_P (set))
>>>> +    {
>>>> +      if (GET_MODE (set) == QImode) return 1;
>>>> +    }
>>>> +    else
>>>> +      return 0;
>>>> +  }
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +/* Checks if instruction is SET operation with QImode.*/
>>>> +static unsigned int
>>>> +insn_is_store_p (rtx insn)
>>>> +{
>>>> +  rtx body = PATTERN (insn);
>>>> +  if (GET_CODE (body) == SET
>>>> +      && SUBREG_P(SET_SRC (body))
>>>> +      && !CONST_INT_P(SET_SRC (body))
>>>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
>>>> +      && GET_MODE(SET_SRC (body)) == QImode)
>>>> +    return 1;
>>>> +
>>>> +  return 0;
>>>> +}
>>>> +
>>>> +/* Find out zero extension removal candidate with use-def web.*/
>>>> +static void
>>>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
>>>> +                                    rtx insn, df_ref def)
>>>> +{
>>>> +  struct df_link *link = DF_REF_CHAIN (def);
>>>> +
>>>> +  rtx move_insn = NULL_RTX;
>>>> +  rtx compare_insn = NULL_RTX;
>>>> +
>>>> +  while (link)
>>>> +  {
>>>> +    if (!DF_REF_INSN_INFO (link->ref))
>>>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
>>>> +
>>>> +    if (DF_REF_INSN_INFO (link->ref))
>>>> +      {
>>>> +       rtx use_insn = DF_REF_INSN (link->ref);
>>>> +
>>>> +       if (GET_CODE (PATTERN (use_insn)) == SET
>>>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
>>>> +         {
>>>> +           if (GET_CODE (PATTERN (insn)) == SET
>>>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
>>>> +             {
>>>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
>>>> +
>>>> +               if (SUBREG_P (body))
>>>> +                 {
>>>> +                   compare_insn = use_insn;
>>>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
>>>> +
>>>> +                   if (compare_insn
>>>> +                       && ((REGNO (XEXP (compare_body, 0)))
>>>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
>>>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>> +                 }
>>>> +              }
>>>> +           }
>>>> +
>>>> +       if (insn_is_store_p(use_insn)
>>>> +           && GET_CODE (PATTERN (insn)) == SET
>>>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
>>>> +         {
>>>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
>>>> +             {
>>>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
>>>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>> +             }
>>>> +         }
>>>> +
>>>> +       if (insn_is_zext_p (insn))
>>>> +         {
>>>> +           if (GET_CODE (PATTERN (use_insn)) == SET
>>>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
>>>> +             {
>>>> +               if (move_insn
>>>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
>>>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
>>>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
>>>> +                 {
>>>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
>>>> +                   break;
>>>> +                 }
>>>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
>>>> +                   {
>>>> +                     move_insn = use_insn;
>>>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
>>>> +                   }
>>>> +             }
>>>> +         }
>>>> +
>>>> +       if (insn_is_zext_p (use_insn))
>>>> +         {
>>>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
>>>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
>>>> +
>>>> +           if (insn_is_store_p (insn)
>>>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
>>>> +           {
>>>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
>>>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
>>>> +             insn_entry[INSN_UID( insn)].is_store = 1;
>>>> +           }
>>>> +
>>>> +          if (NONDEBUG_INSN_P (use_insn))
>>>> +            unionfind_union (insn_entry + INSN_UID (insn),
>>>> +                             insn_entry + INSN_UID (use_insn));
>>>> +       }
>>>> +      }
>>>> +
>>>> +    link = link->next;
>>>> +  }
>>>> +}
>>>> +
>>>> +/* Replace QImode extensions with copy operations.*/
>>>> +static void
>>>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
>>>> +{
>>>> +  rtx_insn *insn = insn_entry[i].insn;
>>>> +  rtx body = PATTERN (insn);
>>>> +  rtx src_reg;
>>>> +  src_reg = XEXP (SET_SRC (body), 0);
>>>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
>>>> +
>>>> +  if (GET_MODE(SET_DEST(body)) != DImode)
>>>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
>>>> +
>>>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
>>>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
>>>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
>>>> +  df_insn_rescan (new_insn);
>>>> +
>>>> +  df_insn_delete (insn);
>>>> +  remove_insn (insn);
>>>> +  insn->set_deleted ();
>>>> +}
>>>> +
>>>> +/* Main entry point for this pass.  */
>>>> +unsigned int
>>>> +rs6000_analyze_zext (function *fun)
>>>> +{
>>>> +  zext_web_entry *insn_entry;
>>>> +  basic_block bb;
>>>> +  rtx_insn *insn, *curr_insn = 0;
>>>> +
>>>> +  /* Dataflow analysis for use-def chains.  */
>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>> +  df_analyze ();
>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>> +
>>>> +  /* Rebuild ud- and du-chains.  */
>>>> +  df_remove_problem (df_chain);
>>>> +  df_process_deferred_rescans ();
>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>> +  df_analyze ();
>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>> +
>>>> +  /* Allocate structure to represent webs of insns.  */
>>>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
>>>> +
>>>> +  /* Walk the insns to gather basic data.  */
>>>> +  FOR_ALL_BB_FN (bb, fun)
>>>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
>>>> +    {
>>>> +      unsigned int uid = INSN_UID (insn);
>>>> +      if (NONDEBUG_INSN_P (insn))
>>>> +       {
>>>> +         insn_entry[uid].insn = insn;
>>>> +
>>>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
>>>> +           {
>>>> +             insn_entry[uid].is_store = 1;
>>>> +             insn_entry[uid].is_relevant = 1;
>>>> +           }
>>>> +
>>>> +         /* Walk the uses and defs to identify the optimization
>>>> +            candidates.*/
>>>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
>>>> +         df_ref mention;
>>>> +
>>>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
>>>> +           {
>>>> +             insn_entry[uid].is_relevant = 1;
>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
>>>> +           }
>>>> +
>>>> +         if (insn_entry[uid].is_relevant)
>>>> +           {
>>>> +             /* Determine if this is a store.  */
>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>> +           }
>>>> +       }
>>>> +     }
>>>> +
>>>> +   unsigned e = get_max_uid (), i;
>>>> +
>>>> +   int store_index = -1;
>>>> +
>>>> +   /* Replace with copy operation.*/
>>>> +   for (i = 0; i < e; ++i)
>>>> +     {
>>>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
>>>> +        store_index  = i;
>>>> +
>>>> +       if ((store_index != -1)
>>>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
>>>> +         {
>>>> +           replace_marked_insns (insn_entry, store_index);
>>>> +           replace_marked_insns (insn_entry, i);
>>>> +         }
>>>> +     }
>>>> +    /* Clean up.  */
>>>> +    free (insn_entry);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +const pass_data pass_data_analyze_zext =
>>>> +{
>>>> +  RTL_PASS, /* type */
>>>> +  "zext", /* name */
>>>> +  OPTGROUP_NONE, /* optinfo_flags */
>>>> +  TV_NONE, /* tv_id */
>>>> +  0, /* properties_required */
>>>> +  0, /* properties_provided */
>>>> +  0, /* properties_destroyed */
>>>> +  0, /* todo_flags_start */
>>>> +  TODO_df_finish, /* todo_flags_finish */
>>>> +};
>>>> +
>>>> +class pass_analyze_zext : public rtl_opt_pass
>>>> +{
>>>> +public:
>>>> +  pass_analyze_zext(gcc::context *ctxt)
>>>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
>>>> +  {}
>>>> +
>>>> +  /* opt_pass methods: */
>>>> +  virtual bool gate (function *)
>>>> +    {
>>>> +      return (optimize > 0 );
>>>> +    }
>>>> +
>>>> +  virtual unsigned int execute (function *fun)
>>>> +    {
>>>> +      return rs6000_analyze_zext (fun);
>>>> +    }
>>>> +
>>>> +  opt_pass *clone ()
>>>> +    {
>>>> +      return new pass_analyze_zext (m_ctxt);
>>>> +    }
>>>> +
>>>> +}; // class pass_analyze_zext
>>>> +
>>>> +rtl_opt_pass *
>>>> +make_pass_analyze_zext (gcc::context *ctxt)
>>>> +{
>>>> +  return new pass_analyze_zext (ctxt);
>>>> +}
>>>> +
>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>>> index 8e0b0d022db..6541334bf2d 100644
>>>> --- a/gcc/config/rs6000/rs6000.cc
>>>> +++ b/gcc/config/rs6000/rs6000.cc
>>>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
>>>>                                           bool);
>>>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>>>>
>>>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
>>>> +
>>>>  /* Hash table stuff for keeping track of TOC entries.  */
>>>>
>>>>  struct GTY((for_user)) toc_hash_struct
>>>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
>>>> index f183b42ce1d..c1f61591d2f 100644
>>>> --- a/gcc/config/rs6000/t-rs6000
>>>> +++ b/gcc/config/rs6000/t-rs6000
>>>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
>>>>         $(COMPILE) $<
>>>>         $(POSTCOMPILE)
>>>>
>>>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
>>>> +       $(COMPILE) $<
>>>> +       $(POSTCOMPILE)
>>>> +
>>>> +
>>>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
>>>>         $(COMPILE) $<
>>>>         $(POSTCOMPILE)
>>>> diff --git a/gcc/explow.cc b/gcc/explow.cc
>>>> index 32e9498ee07..316aa975e40 100644
>>>> --- a/gcc/explow.cc
>>>> +++ b/gcc/explow.cc
>>>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
>>>>    if (! general_operand (x, VOIDmode))
>>>>      x = force_operand (x, temp);
>>>>
>>>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
>>>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
>>>> +              || GET_MODE (x) == VOIDmode);
>>>>    if (x != temp)
>>>>      emit_move_insn (temp, x);
>>>>    return temp;
>>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>>>> index 15be1c8db99..6162ef92b88 100644
>>>> --- a/gcc/expr.cc
>>>> +++ b/gcc/expr.cc
>>>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
>>>>    rtx y_cst = NULL_RTX;
>>>>    rtx_insn *last_insn;
>>>>    rtx set;
>>>> -
>>>>    gcc_assert (mode != BLKmode
>>>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
>>>> +             && (mode == DImode || GET_MODE (y) == mode
>>>> +             || GET_MODE (y) == VOIDmode));
>>>>
>>>>    /* If we have a copy that looks like one of the following patterns:
>>>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
>>>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
>>>> index 4c641cab192..9d22fadc7ef 100644
>>>> --- a/gcc/optabs.cc
>>>> +++ b/gcc/optabs.cc
>>>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
>>>>      input:
>>>>        gcc_assert (mode != VOIDmode);
>>>>        gcc_assert (GET_MODE (op->value) == VOIDmode
>>>> -                 || GET_MODE (op->value) == mode);
>>>> +                 || GET_MODE (op->value) == mode
>>>> +                 || mode == DImode);
>>>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
>>>>         return true;
>>>>
>>>> --
>>>> 2.31.1
>>>>
Richard Biener March 16, 2023, 9:52 a.m. UTC | #5
On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
>
>
> On 16/03/23 1:44 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> >>
> >> Hello Richard:
> >>
> >> On 16/03/23 1:10 pm, Richard Biener wrote:
> >>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> >>> <gcc-patches@gcc.gnu.org> wrote:
> >>>>
> >>>> Hello All:
> >>>>
> >>>>
> >>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
> >>>> Bootstrapped and regtested on powerpc64-linux-gnu.
> >>>
> >>> What makes this so special that we cannot deal with it from generic code?
> >>> In particular we do have the REE pass, why is target specific
> >>> knowledge neccessary
> >>> to eliminate the extension?
> >>>
> >>
> >> For returning bool values and comparision with integers generates the following by all the rtl passes.
> >>
> >> set compare (subreg)
> >> set if_then_else
> >> Convert SImode -> QImode
> >> set zero_extend to SImode from QImode
> >> set return value 0 in one path of cfg.
> >> set return value 1 in other path of cfg.
> >>
> >> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
> >
> > Sorry, I can't parse that - as there's no testcase with the patch I
> > cannot even try to see what the actual RTL
> > looks like (without the pass).
> >
>
> Here is the PR with bugzilla.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>
> I can add the attached testcase with this PR in the patch.

I don't see any zero-extends there.

> Thanks & Regards
> Ajit
> > Richard.
> >
> >> Thanks & Regards
> >> Ajit
> >>>> +  In cfgexpand pass QImode is generated with
> >>>> +  bool register value and this pass uses QI
> >>>> +  as 64 bit registers.
> >>>> +
> >>
> >>>>         rs6000: suboptimal code for returning bool value on target ppc.
> >>>>
> >>>>         New pass to eliminate unnecessary zero extension. This pass
> >>>>         is registered after cse rtl pass.
> >>>>
> >>>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> >>>>
> >>>> gcc/ChangeLog:
> >>>>
> >>>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
> >>>>         pass.
> >>>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
> >>>>         * config.gcc: Add new executable.
> >>>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
> >>>>         elimination pass.
> >>>>         * config/rs6000/rs6000.cc: Add new prototype for zero
> >>>>         elimination pass.
> >>>>         * config/rs6000/t-rs6000: Add new rule.
> >>>>         * expr.cc: Modified gcc assert.
> >>>>         * explow.cc: Modified gcc assert.
> >>>>         * optabs.cc: Modified gcc assert.
> >>>> ---
> >>>>  gcc/config.gcc                        |   4 +-
> >>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
> >>>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
> >>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
> >>>>  gcc/config/rs6000/rs6000.cc           |   2 +
> >>>>  gcc/config/rs6000/t-rs6000            |   5 +
> >>>>  gcc/explow.cc                         |   3 +-
> >>>>  gcc/expr.cc                           |   4 +-
> >>>>  gcc/optabs.cc                         |   3 +-
> >>>>  9 files changed, 379 insertions(+), 6 deletions(-)
> >>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> >>>>
> >>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
> >>>> index da3a6d3ba1f..e8ac9d882f0 100644
> >>>> --- a/gcc/config.gcc
> >>>> +++ b/gcc/config.gcc
> >>>> @@ -503,7 +503,7 @@ or1k*-*-*)
> >>>>         ;;
> >>>>  powerpc*-*-*)
> >>>>         cpu_type=rs6000
> >>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
> >>>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> >>>> @@ -538,7 +538,7 @@ riscv*)
> >>>>         ;;
> >>>>  rs6000*-*-*)
> >>>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> >>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
> >>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> >>>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
> >>>> index ca899d5f7af..d7500feddf1 100644
> >>>> --- a/gcc/config/rs6000/rs6000-passes.def
> >>>> +++ b/gcc/config/rs6000/rs6000-passes.def
> >>>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
> >>>>       The power8 does not have instructions that automaticaly do the byte swaps
> >>>>       for loads and stores.  */
> >>>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> >>>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> >>>> +
> >>>>
> >>>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
> >>>>       external symbol's address along with a single load or store using that
> >>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> >>>> index 1a4fc1df668..f6cf2d673d4 100644
> >>>> --- a/gcc/config/rs6000/rs6000-protos.h
> >>>> +++ b/gcc/config/rs6000/rs6000-protos.h
> >>>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
> >>>>  class rtl_opt_pass;
> >>>>
> >>>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> >>>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
> >>>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
> >>>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
> >>>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> >>>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
> >>>> new file mode 100644
> >>>> index 00000000000..777c7a5a387
> >>>> --- /dev/null
> >>>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> >>>> @@ -0,0 +1,361 @@
> >>>> +/* Subroutine to eliminate redundant zero extend for power architecture.
> >>>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> >>>> +
> >>>> +   This file is part of GCC.
> >>>> +
> >>>> +   GCC is free software; you can redistribute it and/or modify it
> >>>> +   under the terms of the GNU General Public License as published
> >>>> +   by the Free Software Foundation; either version 3, or (at your
> >>>> +   option) any later version.
> >>>> +
> >>>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> >>>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> >>>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> >>>> +   License for more details.
> >>>> +
> >>>> +   You should have received a copy of the GNU General Public License
> >>>> +   along with GCC; see the file COPYING3.  If not see
> >>>> +   <http://www.gnu.org/licenses/>.  */
> >>>> +
> >>>> +/* This pass remove unnecessary zero extension instruction from
> >>>> +  power generated assembly. This pass is register after cse
> >>>> +  pass.
> >>>> +  Identifies the following sequence of instruction after cse
> >>>> +  rtl pass.
> >>>> +
> >>>> +  set compare (subreg)
> >>>> +  set if_then_else
> >>>> +  set SImode -> QImode
> >>>> +  set zero_extend to DImode from QImode
> >>>> +  set return value 0 in one path of cfg.
> >>>> +  set return value 1 in other path of cfg.
> >>>> +
> >>>> +  In cfgexpand pass QImode is generated with
> >>>> +  bool register value and this pass uses QI
> >>>> +  as 64 bit registers.
> >>>> +
> >>>> +  This pass replace copy operation from QImode to DImode
> >>>> +  and return appropriate return values.*/
> >>>> +
> >>>> +#define IN_TARGET_CODE 1
> >>>> +
> >>>> +#include "config.h"
> >>>> +#include "system.h"
> >>>> +#include "coretypes.h"
> >>>> +#include "backend.h"
> >>>> +#include "rtl.h"
> >>>> +#include "tree.h"
> >>>> +#include "memmodel.h"
> >>>> +#include "df.h"
> >>>> +#include "tm_p.h"
> >>>> +#include "ira.h"
> >>>> +#include "print-tree.h"
> >>>> +#include "varasm.h"
> >>>> +#include "explow.h"
> >>>> +#include "expr.h"
> >>>> +#include "output.h"
> >>>> +#include "tree-pass.h"
> >>>> +
> >>>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
> >>>> +   defined in df.h.  */
> >>>> +class zext_web_entry : public web_entry_base
> >>>> +{
> >>>> + public:
> >>>> +  /* Pointer to the insn.  */
> >>>> +  rtx_insn *insn;
> >>>> +  unsigned int is_relevant : 1;
> >>>> +  /* Set if insn is a load.  */
> >>>> +  unsigned int is_load : 1;
> >>>> +  /* Set if insn is a store.  */
> >>>> +  unsigned int is_store : 1;
> >>>> +  unsigned int is_zext :1 ;
> >>>> +  unsigned int is_move :1;
> >>>> +  unsigned int is_delete_move :1;
> >>>> +  /* Set if this insn should be deleted.  */
> >>>> +  unsigned int will_delete : 1;
> >>>> +  unsigned int will_delete_chances : 1;
> >>>> +};
> >>>> +
> >>>> +/* Checks if instruction is zero extension
> >>>> + * with QIMode to DImode.*/
> >>>> +static unsigned int
> >>>> +insn_is_zext_p(rtx insn)
> >>>> +{
> >>>> +  rtx body = PATTERN (insn);
> >>>> +
> >>>> +  if (GET_CODE (body) == SET
> >>>> +      && GET_MODE(SET_DEST (body)) == DImode
> >>>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
> >>>> +  {
> >>>> +    rtx set = XEXP (SET_SRC (body), 0);
> >>>> +
> >>>> +    if (REG_P (set))
> >>>> +    {
> >>>> +      if (GET_MODE (set) == QImode) return 1;
> >>>> +    }
> >>>> +    else
> >>>> +      return 0;
> >>>> +  }
> >>>> +  return 0;
> >>>> +}
> >>>> +
> >>>> +/* Checks if instruction is SET operation with QImode.*/
> >>>> +static unsigned int
> >>>> +insn_is_store_p (rtx insn)
> >>>> +{
> >>>> +  rtx body = PATTERN (insn);
> >>>> +  if (GET_CODE (body) == SET
> >>>> +      && SUBREG_P(SET_SRC (body))
> >>>> +      && !CONST_INT_P(SET_SRC (body))
> >>>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
> >>>> +      && GET_MODE(SET_SRC (body)) == QImode)
> >>>> +    return 1;
> >>>> +
> >>>> +  return 0;
> >>>> +}
> >>>> +
> >>>> +/* Find out zero extension removal candidate with use-def web.*/
> >>>> +static void
> >>>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
> >>>> +                                    rtx insn, df_ref def)
> >>>> +{
> >>>> +  struct df_link *link = DF_REF_CHAIN (def);
> >>>> +
> >>>> +  rtx move_insn = NULL_RTX;
> >>>> +  rtx compare_insn = NULL_RTX;
> >>>> +
> >>>> +  while (link)
> >>>> +  {
> >>>> +    if (!DF_REF_INSN_INFO (link->ref))
> >>>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
> >>>> +
> >>>> +    if (DF_REF_INSN_INFO (link->ref))
> >>>> +      {
> >>>> +       rtx use_insn = DF_REF_INSN (link->ref);
> >>>> +
> >>>> +       if (GET_CODE (PATTERN (use_insn)) == SET
> >>>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
> >>>> +         {
> >>>> +           if (GET_CODE (PATTERN (insn)) == SET
> >>>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
> >>>> +             {
> >>>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
> >>>> +
> >>>> +               if (SUBREG_P (body))
> >>>> +                 {
> >>>> +                   compare_insn = use_insn;
> >>>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
> >>>> +
> >>>> +                   if (compare_insn
> >>>> +                       && ((REGNO (XEXP (compare_body, 0)))
> >>>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
> >>>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >>>> +                 }
> >>>> +              }
> >>>> +           }
> >>>> +
> >>>> +       if (insn_is_store_p(use_insn)
> >>>> +           && GET_CODE (PATTERN (insn)) == SET
> >>>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
> >>>> +         {
> >>>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
> >>>> +             {
> >>>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
> >>>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >>>> +             }
> >>>> +         }
> >>>> +
> >>>> +       if (insn_is_zext_p (insn))
> >>>> +         {
> >>>> +           if (GET_CODE (PATTERN (use_insn)) == SET
> >>>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
> >>>> +             {
> >>>> +               if (move_insn
> >>>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
> >>>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
> >>>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
> >>>> +                 {
> >>>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
> >>>> +                   break;
> >>>> +                 }
> >>>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
> >>>> +                   {
> >>>> +                     move_insn = use_insn;
> >>>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
> >>>> +                   }
> >>>> +             }
> >>>> +         }
> >>>> +
> >>>> +       if (insn_is_zext_p (use_insn))
> >>>> +         {
> >>>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
> >>>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
> >>>> +
> >>>> +           if (insn_is_store_p (insn)
> >>>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
> >>>> +           {
> >>>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
> >>>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
> >>>> +             insn_entry[INSN_UID( insn)].is_store = 1;
> >>>> +           }
> >>>> +
> >>>> +          if (NONDEBUG_INSN_P (use_insn))
> >>>> +            unionfind_union (insn_entry + INSN_UID (insn),
> >>>> +                             insn_entry + INSN_UID (use_insn));
> >>>> +       }
> >>>> +      }
> >>>> +
> >>>> +    link = link->next;
> >>>> +  }
> >>>> +}
> >>>> +
> >>>> +/* Replace QImode extensions with copy operations.*/
> >>>> +static void
> >>>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
> >>>> +{
> >>>> +  rtx_insn *insn = insn_entry[i].insn;
> >>>> +  rtx body = PATTERN (insn);
> >>>> +  rtx src_reg;
> >>>> +  src_reg = XEXP (SET_SRC (body), 0);
> >>>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
> >>>> +
> >>>> +  if (GET_MODE(SET_DEST(body)) != DImode)
> >>>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
> >>>> +
> >>>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
> >>>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
> >>>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
> >>>> +  df_insn_rescan (new_insn);
> >>>> +
> >>>> +  df_insn_delete (insn);
> >>>> +  remove_insn (insn);
> >>>> +  insn->set_deleted ();
> >>>> +}
> >>>> +
> >>>> +/* Main entry point for this pass.  */
> >>>> +unsigned int
> >>>> +rs6000_analyze_zext (function *fun)
> >>>> +{
> >>>> +  zext_web_entry *insn_entry;
> >>>> +  basic_block bb;
> >>>> +  rtx_insn *insn, *curr_insn = 0;
> >>>> +
> >>>> +  /* Dataflow analysis for use-def chains.  */
> >>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >>>> +  df_analyze ();
> >>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >>>> +
> >>>> +  /* Rebuild ud- and du-chains.  */
> >>>> +  df_remove_problem (df_chain);
> >>>> +  df_process_deferred_rescans ();
> >>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >>>> +  df_analyze ();
> >>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >>>> +
> >>>> +  /* Allocate structure to represent webs of insns.  */
> >>>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
> >>>> +
> >>>> +  /* Walk the insns to gather basic data.  */
> >>>> +  FOR_ALL_BB_FN (bb, fun)
> >>>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> >>>> +    {
> >>>> +      unsigned int uid = INSN_UID (insn);
> >>>> +      if (NONDEBUG_INSN_P (insn))
> >>>> +       {
> >>>> +         insn_entry[uid].insn = insn;
> >>>> +
> >>>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
> >>>> +           {
> >>>> +             insn_entry[uid].is_store = 1;
> >>>> +             insn_entry[uid].is_relevant = 1;
> >>>> +           }
> >>>> +
> >>>> +         /* Walk the uses and defs to identify the optimization
> >>>> +            candidates.*/
> >>>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> >>>> +         df_ref mention;
> >>>> +
> >>>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
> >>>> +           {
> >>>> +             insn_entry[uid].is_relevant = 1;
> >>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >>>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
> >>>> +           }
> >>>> +
> >>>> +         if (insn_entry[uid].is_relevant)
> >>>> +           {
> >>>> +             /* Determine if this is a store.  */
> >>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >>>> +           }
> >>>> +       }
> >>>> +     }
> >>>> +
> >>>> +   unsigned e = get_max_uid (), i;
> >>>> +
> >>>> +   int store_index = -1;
> >>>> +
> >>>> +   /* Replace with copy operation.*/
> >>>> +   for (i = 0; i < e; ++i)
> >>>> +     {
> >>>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
> >>>> +        store_index  = i;
> >>>> +
> >>>> +       if ((store_index != -1)
> >>>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
> >>>> +         {
> >>>> +           replace_marked_insns (insn_entry, store_index);
> >>>> +           replace_marked_insns (insn_entry, i);
> >>>> +         }
> >>>> +     }
> >>>> +    /* Clean up.  */
> >>>> +    free (insn_entry);
> >>>> +
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>> +const pass_data pass_data_analyze_zext =
> >>>> +{
> >>>> +  RTL_PASS, /* type */
> >>>> +  "zext", /* name */
> >>>> +  OPTGROUP_NONE, /* optinfo_flags */
> >>>> +  TV_NONE, /* tv_id */
> >>>> +  0, /* properties_required */
> >>>> +  0, /* properties_provided */
> >>>> +  0, /* properties_destroyed */
> >>>> +  0, /* todo_flags_start */
> >>>> +  TODO_df_finish, /* todo_flags_finish */
> >>>> +};
> >>>> +
> >>>> +class pass_analyze_zext : public rtl_opt_pass
> >>>> +{
> >>>> +public:
> >>>> +  pass_analyze_zext(gcc::context *ctxt)
> >>>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
> >>>> +  {}
> >>>> +
> >>>> +  /* opt_pass methods: */
> >>>> +  virtual bool gate (function *)
> >>>> +    {
> >>>> +      return (optimize > 0 );
> >>>> +    }
> >>>> +
> >>>> +  virtual unsigned int execute (function *fun)
> >>>> +    {
> >>>> +      return rs6000_analyze_zext (fun);
> >>>> +    }
> >>>> +
> >>>> +  opt_pass *clone ()
> >>>> +    {
> >>>> +      return new pass_analyze_zext (m_ctxt);
> >>>> +    }
> >>>> +
> >>>> +}; // class pass_analyze_zext
> >>>> +
> >>>> +rtl_opt_pass *
> >>>> +make_pass_analyze_zext (gcc::context *ctxt)
> >>>> +{
> >>>> +  return new pass_analyze_zext (ctxt);
> >>>> +}
> >>>> +
> >>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> >>>> index 8e0b0d022db..6541334bf2d 100644
> >>>> --- a/gcc/config/rs6000/rs6000.cc
> >>>> +++ b/gcc/config/rs6000/rs6000.cc
> >>>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
> >>>>                                           bool);
> >>>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
> >>>>
> >>>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
> >>>> +
> >>>>  /* Hash table stuff for keeping track of TOC entries.  */
> >>>>
> >>>>  struct GTY((for_user)) toc_hash_struct
> >>>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
> >>>> index f183b42ce1d..c1f61591d2f 100644
> >>>> --- a/gcc/config/rs6000/t-rs6000
> >>>> +++ b/gcc/config/rs6000/t-rs6000
> >>>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
> >>>>         $(COMPILE) $<
> >>>>         $(POSTCOMPILE)
> >>>>
> >>>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
> >>>> +       $(COMPILE) $<
> >>>> +       $(POSTCOMPILE)
> >>>> +
> >>>> +
> >>>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
> >>>>         $(COMPILE) $<
> >>>>         $(POSTCOMPILE)
> >>>> diff --git a/gcc/explow.cc b/gcc/explow.cc
> >>>> index 32e9498ee07..316aa975e40 100644
> >>>> --- a/gcc/explow.cc
> >>>> +++ b/gcc/explow.cc
> >>>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
> >>>>    if (! general_operand (x, VOIDmode))
> >>>>      x = force_operand (x, temp);
> >>>>
> >>>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
> >>>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
> >>>> +              || GET_MODE (x) == VOIDmode);
> >>>>    if (x != temp)
> >>>>      emit_move_insn (temp, x);
> >>>>    return temp;
> >>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
> >>>> index 15be1c8db99..6162ef92b88 100644
> >>>> --- a/gcc/expr.cc
> >>>> +++ b/gcc/expr.cc
> >>>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
> >>>>    rtx y_cst = NULL_RTX;
> >>>>    rtx_insn *last_insn;
> >>>>    rtx set;
> >>>> -
> >>>>    gcc_assert (mode != BLKmode
> >>>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
> >>>> +             && (mode == DImode || GET_MODE (y) == mode
> >>>> +             || GET_MODE (y) == VOIDmode));
> >>>>
> >>>>    /* If we have a copy that looks like one of the following patterns:
> >>>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
> >>>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> >>>> index 4c641cab192..9d22fadc7ef 100644
> >>>> --- a/gcc/optabs.cc
> >>>> +++ b/gcc/optabs.cc
> >>>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
> >>>>      input:
> >>>>        gcc_assert (mode != VOIDmode);
> >>>>        gcc_assert (GET_MODE (op->value) == VOIDmode
> >>>> -                 || GET_MODE (op->value) == mode);
> >>>> +                 || GET_MODE (op->value) == mode
> >>>> +                 || mode == DImode);
> >>>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
> >>>>         return true;
> >>>>
> >>>> --
> >>>> 2.31.1
> >>>>
Ajit Agarwal March 16, 2023, 10:11 a.m. UTC | #6
Hello Richard:

On 16/03/23 3:22 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>>
>>
>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>
>>>> Hello Richard:
>>>>
>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>> <gcc-patches@gcc.gnu.org> wrote:
>>>>>>
>>>>>> Hello All:
>>>>>>
>>>>>>
>>>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>
>>>>> What makes this so special that we cannot deal with it from generic code?
>>>>> In particular we do have the REE pass, why is target specific
>>>>> knowledge neccessary
>>>>> to eliminate the extension?
>>>>>
>>>>
>>>> For returning bool values and comparision with integers generates the following by all the rtl passes.
>>>>
>>>> set compare (subreg)
>>>> set if_then_else
>>>> Convert SImode -> QImode
>>>> set zero_extend to SImode from QImode
>>>> set return value 0 in one path of cfg.
>>>> set return value 1 in other path of cfg.
>>>>
>>>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
>>>
>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>> cannot even try to see what the actual RTL
>>> looks like (without the pass).
>>>
>>
>> Here is the PR with bugzilla.
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>
>> I can add the attached testcase with this PR in the patch.
> 
> I don't see any zero-extends there.
>

Here is the testcase.


bool (int a, int b)
{ 
          if (a > 2)
                      return false;
           if (b < 10)
                       return true;
             return false;
}

compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.

Here is the rtl after cse.
(note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(insn 15 12 16 3 (set (reg:CC 123)
        (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
            (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
     (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
        (nil)))
(insn 16 15 17 3 (set (reg:SI 124)
        (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
     (nil))
(insn 17 16 18 3 (set (reg:SI 122)
        (if_then_else:SI (gt (reg:CC 123)
                (const_int 0 [0]))
            (const_int 0 [0])
            (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
     (expr_list:REG_DEAD (reg:SI 124)
        (expr_list:REG_DEAD (reg:CC 123)
            (nil))))
(insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
        (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
     (expr_list:REG_DEAD (reg:SI 122)
        (nil)))
      ; pc falls through to BB 5
(code_label 32 18 31 4 3 (nil) [1 uses])
(note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
        (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
     (nil))
(code_label 19 5 20 5 2 (nil) [0 uses])
(note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
        (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
     (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
        (nil)))
(insn 22 21 26 5 (set (reg:DI 118 [ <retval> ])
        (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
     (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
        (nil)))
(insn 26 22 27 5 (set (reg/i:DI 3 3)
        (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
     (expr_list:REG_DEAD (reg:DI 118 [ <retval> ])
        (nil)))
(insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
     (nil))


Thanks & Regards
Ajit
 
>> Thanks & Regards
>> Ajit
>>> Richard.
>>>
>>>> Thanks & Regards
>>>> Ajit
>>>>>> +  In cfgexpand pass QImode is generated with
>>>>>> +  bool register value and this pass uses QI
>>>>>> +  as 64 bit registers.
>>>>>> +
>>>>
>>>>>>         rs6000: suboptimal code for returning bool value on target ppc.
>>>>>>
>>>>>>         New pass to eliminate unnecessary zero extension. This pass
>>>>>>         is registered after cse rtl pass.
>>>>>>
>>>>>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>>>>>
>>>>>> gcc/ChangeLog:
>>>>>>
>>>>>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
>>>>>>         pass.
>>>>>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>>>>>>         * config.gcc: Add new executable.
>>>>>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
>>>>>>         elimination pass.
>>>>>>         * config/rs6000/rs6000.cc: Add new prototype for zero
>>>>>>         elimination pass.
>>>>>>         * config/rs6000/t-rs6000: Add new rule.
>>>>>>         * expr.cc: Modified gcc assert.
>>>>>>         * explow.cc: Modified gcc assert.
>>>>>>         * optabs.cc: Modified gcc assert.
>>>>>> ---
>>>>>>  gcc/config.gcc                        |   4 +-
>>>>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>>>>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
>>>>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
>>>>>>  gcc/config/rs6000/rs6000.cc           |   2 +
>>>>>>  gcc/config/rs6000/t-rs6000            |   5 +
>>>>>>  gcc/explow.cc                         |   3 +-
>>>>>>  gcc/expr.cc                           |   4 +-
>>>>>>  gcc/optabs.cc                         |   3 +-
>>>>>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>>>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>>
>>>>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>>>>>> index da3a6d3ba1f..e8ac9d882f0 100644
>>>>>> --- a/gcc/config.gcc
>>>>>> +++ b/gcc/config.gcc
>>>>>> @@ -503,7 +503,7 @@ or1k*-*-*)
>>>>>>         ;;
>>>>>>  powerpc*-*-*)
>>>>>>         cpu_type=rs6000
>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>>>>>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>>>>>> @@ -538,7 +538,7 @@ riscv*)
>>>>>>         ;;
>>>>>>  rs6000*-*-*)
>>>>>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>>>>>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
>>>>>> index ca899d5f7af..d7500feddf1 100644
>>>>>> --- a/gcc/config/rs6000/rs6000-passes.def
>>>>>> +++ b/gcc/config/rs6000/rs6000-passes.def
>>>>>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>>>>>>       The power8 does not have instructions that automaticaly do the byte swaps
>>>>>>       for loads and stores.  */
>>>>>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
>>>>>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
>>>>>> +
>>>>>>
>>>>>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
>>>>>>       external symbol's address along with a single load or store using that
>>>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>>>>> index 1a4fc1df668..f6cf2d673d4 100644
>>>>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>>>>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>>>>>>  class rtl_opt_pass;
>>>>>>
>>>>>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
>>>>>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>>>>>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>>>>>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>>>>>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
>>>>>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>> new file mode 100644
>>>>>> index 00000000000..777c7a5a387
>>>>>> --- /dev/null
>>>>>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>> @@ -0,0 +1,361 @@
>>>>>> +/* Subroutine to eliminate redundant zero extend for power architecture.
>>>>>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
>>>>>> +
>>>>>> +   This file is part of GCC.
>>>>>> +
>>>>>> +   GCC is free software; you can redistribute it and/or modify it
>>>>>> +   under the terms of the GNU General Public License as published
>>>>>> +   by the Free Software Foundation; either version 3, or (at your
>>>>>> +   option) any later version.
>>>>>> +
>>>>>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
>>>>>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
>>>>>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>>>> +   License for more details.
>>>>>> +
>>>>>> +   You should have received a copy of the GNU General Public License
>>>>>> +   along with GCC; see the file COPYING3.  If not see
>>>>>> +   <http://www.gnu.org/licenses/>.  */
>>>>>> +
>>>>>> +/* This pass remove unnecessary zero extension instruction from
>>>>>> +  power generated assembly. This pass is register after cse
>>>>>> +  pass.
>>>>>> +  Identifies the following sequence of instruction after cse
>>>>>> +  rtl pass.
>>>>>> +
>>>>>> +  set compare (subreg)
>>>>>> +  set if_then_else
>>>>>> +  set SImode -> QImode
>>>>>> +  set zero_extend to DImode from QImode
>>>>>> +  set return value 0 in one path of cfg.
>>>>>> +  set return value 1 in other path of cfg.
>>>>>> +
>>>>>> +  In cfgexpand pass QImode is generated with
>>>>>> +  bool register value and this pass uses QI
>>>>>> +  as 64 bit registers.
>>>>>> +
>>>>>> +  This pass replace copy operation from QImode to DImode
>>>>>> +  and return appropriate return values.*/
>>>>>> +
>>>>>> +#define IN_TARGET_CODE 1
>>>>>> +
>>>>>> +#include "config.h"
>>>>>> +#include "system.h"
>>>>>> +#include "coretypes.h"
>>>>>> +#include "backend.h"
>>>>>> +#include "rtl.h"
>>>>>> +#include "tree.h"
>>>>>> +#include "memmodel.h"
>>>>>> +#include "df.h"
>>>>>> +#include "tm_p.h"
>>>>>> +#include "ira.h"
>>>>>> +#include "print-tree.h"
>>>>>> +#include "varasm.h"
>>>>>> +#include "explow.h"
>>>>>> +#include "expr.h"
>>>>>> +#include "output.h"
>>>>>> +#include "tree-pass.h"
>>>>>> +
>>>>>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
>>>>>> +   defined in df.h.  */
>>>>>> +class zext_web_entry : public web_entry_base
>>>>>> +{
>>>>>> + public:
>>>>>> +  /* Pointer to the insn.  */
>>>>>> +  rtx_insn *insn;
>>>>>> +  unsigned int is_relevant : 1;
>>>>>> +  /* Set if insn is a load.  */
>>>>>> +  unsigned int is_load : 1;
>>>>>> +  /* Set if insn is a store.  */
>>>>>> +  unsigned int is_store : 1;
>>>>>> +  unsigned int is_zext :1 ;
>>>>>> +  unsigned int is_move :1;
>>>>>> +  unsigned int is_delete_move :1;
>>>>>> +  /* Set if this insn should be deleted.  */
>>>>>> +  unsigned int will_delete : 1;
>>>>>> +  unsigned int will_delete_chances : 1;
>>>>>> +};
>>>>>> +
>>>>>> +/* Checks if instruction is zero extension
>>>>>> + * with QIMode to DImode.*/
>>>>>> +static unsigned int
>>>>>> +insn_is_zext_p(rtx insn)
>>>>>> +{
>>>>>> +  rtx body = PATTERN (insn);
>>>>>> +
>>>>>> +  if (GET_CODE (body) == SET
>>>>>> +      && GET_MODE(SET_DEST (body)) == DImode
>>>>>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
>>>>>> +  {
>>>>>> +    rtx set = XEXP (SET_SRC (body), 0);
>>>>>> +
>>>>>> +    if (REG_P (set))
>>>>>> +    {
>>>>>> +      if (GET_MODE (set) == QImode) return 1;
>>>>>> +    }
>>>>>> +    else
>>>>>> +      return 0;
>>>>>> +  }
>>>>>> +  return 0;
>>>>>> +}
>>>>>> +
>>>>>> +/* Checks if instruction is SET operation with QImode.*/
>>>>>> +static unsigned int
>>>>>> +insn_is_store_p (rtx insn)
>>>>>> +{
>>>>>> +  rtx body = PATTERN (insn);
>>>>>> +  if (GET_CODE (body) == SET
>>>>>> +      && SUBREG_P(SET_SRC (body))
>>>>>> +      && !CONST_INT_P(SET_SRC (body))
>>>>>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
>>>>>> +      && GET_MODE(SET_SRC (body)) == QImode)
>>>>>> +    return 1;
>>>>>> +
>>>>>> +  return 0;
>>>>>> +}
>>>>>> +
>>>>>> +/* Find out zero extension removal candidate with use-def web.*/
>>>>>> +static void
>>>>>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
>>>>>> +                                    rtx insn, df_ref def)
>>>>>> +{
>>>>>> +  struct df_link *link = DF_REF_CHAIN (def);
>>>>>> +
>>>>>> +  rtx move_insn = NULL_RTX;
>>>>>> +  rtx compare_insn = NULL_RTX;
>>>>>> +
>>>>>> +  while (link)
>>>>>> +  {
>>>>>> +    if (!DF_REF_INSN_INFO (link->ref))
>>>>>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
>>>>>> +
>>>>>> +    if (DF_REF_INSN_INFO (link->ref))
>>>>>> +      {
>>>>>> +       rtx use_insn = DF_REF_INSN (link->ref);
>>>>>> +
>>>>>> +       if (GET_CODE (PATTERN (use_insn)) == SET
>>>>>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
>>>>>> +         {
>>>>>> +           if (GET_CODE (PATTERN (insn)) == SET
>>>>>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
>>>>>> +             {
>>>>>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
>>>>>> +
>>>>>> +               if (SUBREG_P (body))
>>>>>> +                 {
>>>>>> +                   compare_insn = use_insn;
>>>>>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
>>>>>> +
>>>>>> +                   if (compare_insn
>>>>>> +                       && ((REGNO (XEXP (compare_body, 0)))
>>>>>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
>>>>>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>>>> +                 }
>>>>>> +              }
>>>>>> +           }
>>>>>> +
>>>>>> +       if (insn_is_store_p(use_insn)
>>>>>> +           && GET_CODE (PATTERN (insn)) == SET
>>>>>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
>>>>>> +         {
>>>>>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
>>>>>> +             {
>>>>>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
>>>>>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>>>> +             }
>>>>>> +         }
>>>>>> +
>>>>>> +       if (insn_is_zext_p (insn))
>>>>>> +         {
>>>>>> +           if (GET_CODE (PATTERN (use_insn)) == SET
>>>>>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
>>>>>> +             {
>>>>>> +               if (move_insn
>>>>>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
>>>>>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
>>>>>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
>>>>>> +                 {
>>>>>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
>>>>>> +                   break;
>>>>>> +                 }
>>>>>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
>>>>>> +                   {
>>>>>> +                     move_insn = use_insn;
>>>>>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
>>>>>> +                   }
>>>>>> +             }
>>>>>> +         }
>>>>>> +
>>>>>> +       if (insn_is_zext_p (use_insn))
>>>>>> +         {
>>>>>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
>>>>>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
>>>>>> +
>>>>>> +           if (insn_is_store_p (insn)
>>>>>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
>>>>>> +           {
>>>>>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
>>>>>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
>>>>>> +             insn_entry[INSN_UID( insn)].is_store = 1;
>>>>>> +           }
>>>>>> +
>>>>>> +          if (NONDEBUG_INSN_P (use_insn))
>>>>>> +            unionfind_union (insn_entry + INSN_UID (insn),
>>>>>> +                             insn_entry + INSN_UID (use_insn));
>>>>>> +       }
>>>>>> +      }
>>>>>> +
>>>>>> +    link = link->next;
>>>>>> +  }
>>>>>> +}
>>>>>> +
>>>>>> +/* Replace QImode extensions with copy operations.*/
>>>>>> +static void
>>>>>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
>>>>>> +{
>>>>>> +  rtx_insn *insn = insn_entry[i].insn;
>>>>>> +  rtx body = PATTERN (insn);
>>>>>> +  rtx src_reg;
>>>>>> +  src_reg = XEXP (SET_SRC (body), 0);
>>>>>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
>>>>>> +
>>>>>> +  if (GET_MODE(SET_DEST(body)) != DImode)
>>>>>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
>>>>>> +
>>>>>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
>>>>>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
>>>>>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
>>>>>> +  df_insn_rescan (new_insn);
>>>>>> +
>>>>>> +  df_insn_delete (insn);
>>>>>> +  remove_insn (insn);
>>>>>> +  insn->set_deleted ();
>>>>>> +}
>>>>>> +
>>>>>> +/* Main entry point for this pass.  */
>>>>>> +unsigned int
>>>>>> +rs6000_analyze_zext (function *fun)
>>>>>> +{
>>>>>> +  zext_web_entry *insn_entry;
>>>>>> +  basic_block bb;
>>>>>> +  rtx_insn *insn, *curr_insn = 0;
>>>>>> +
>>>>>> +  /* Dataflow analysis for use-def chains.  */
>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>>>> +  df_analyze ();
>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>>>> +
>>>>>> +  /* Rebuild ud- and du-chains.  */
>>>>>> +  df_remove_problem (df_chain);
>>>>>> +  df_process_deferred_rescans ();
>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>>>> +  df_analyze ();
>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>>>> +
>>>>>> +  /* Allocate structure to represent webs of insns.  */
>>>>>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
>>>>>> +
>>>>>> +  /* Walk the insns to gather basic data.  */
>>>>>> +  FOR_ALL_BB_FN (bb, fun)
>>>>>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
>>>>>> +    {
>>>>>> +      unsigned int uid = INSN_UID (insn);
>>>>>> +      if (NONDEBUG_INSN_P (insn))
>>>>>> +       {
>>>>>> +         insn_entry[uid].insn = insn;
>>>>>> +
>>>>>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
>>>>>> +           {
>>>>>> +             insn_entry[uid].is_store = 1;
>>>>>> +             insn_entry[uid].is_relevant = 1;
>>>>>> +           }
>>>>>> +
>>>>>> +         /* Walk the uses and defs to identify the optimization
>>>>>> +            candidates.*/
>>>>>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
>>>>>> +         df_ref mention;
>>>>>> +
>>>>>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
>>>>>> +           {
>>>>>> +             insn_entry[uid].is_relevant = 1;
>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>>>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
>>>>>> +           }
>>>>>> +
>>>>>> +         if (insn_entry[uid].is_relevant)
>>>>>> +           {
>>>>>> +             /* Determine if this is a store.  */
>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>>>> +           }
>>>>>> +       }
>>>>>> +     }
>>>>>> +
>>>>>> +   unsigned e = get_max_uid (), i;
>>>>>> +
>>>>>> +   int store_index = -1;
>>>>>> +
>>>>>> +   /* Replace with copy operation.*/
>>>>>> +   for (i = 0; i < e; ++i)
>>>>>> +     {
>>>>>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
>>>>>> +        store_index  = i;
>>>>>> +
>>>>>> +       if ((store_index != -1)
>>>>>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
>>>>>> +         {
>>>>>> +           replace_marked_insns (insn_entry, store_index);
>>>>>> +           replace_marked_insns (insn_entry, i);
>>>>>> +         }
>>>>>> +     }
>>>>>> +    /* Clean up.  */
>>>>>> +    free (insn_entry);
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +const pass_data pass_data_analyze_zext =
>>>>>> +{
>>>>>> +  RTL_PASS, /* type */
>>>>>> +  "zext", /* name */
>>>>>> +  OPTGROUP_NONE, /* optinfo_flags */
>>>>>> +  TV_NONE, /* tv_id */
>>>>>> +  0, /* properties_required */
>>>>>> +  0, /* properties_provided */
>>>>>> +  0, /* properties_destroyed */
>>>>>> +  0, /* todo_flags_start */
>>>>>> +  TODO_df_finish, /* todo_flags_finish */
>>>>>> +};
>>>>>> +
>>>>>> +class pass_analyze_zext : public rtl_opt_pass
>>>>>> +{
>>>>>> +public:
>>>>>> +  pass_analyze_zext(gcc::context *ctxt)
>>>>>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
>>>>>> +  {}
>>>>>> +
>>>>>> +  /* opt_pass methods: */
>>>>>> +  virtual bool gate (function *)
>>>>>> +    {
>>>>>> +      return (optimize > 0 );
>>>>>> +    }
>>>>>> +
>>>>>> +  virtual unsigned int execute (function *fun)
>>>>>> +    {
>>>>>> +      return rs6000_analyze_zext (fun);
>>>>>> +    }
>>>>>> +
>>>>>> +  opt_pass *clone ()
>>>>>> +    {
>>>>>> +      return new pass_analyze_zext (m_ctxt);
>>>>>> +    }
>>>>>> +
>>>>>> +}; // class pass_analyze_zext
>>>>>> +
>>>>>> +rtl_opt_pass *
>>>>>> +make_pass_analyze_zext (gcc::context *ctxt)
>>>>>> +{
>>>>>> +  return new pass_analyze_zext (ctxt);
>>>>>> +}
>>>>>> +
>>>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>>>>> index 8e0b0d022db..6541334bf2d 100644
>>>>>> --- a/gcc/config/rs6000/rs6000.cc
>>>>>> +++ b/gcc/config/rs6000/rs6000.cc
>>>>>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
>>>>>>                                           bool);
>>>>>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>>>>>>
>>>>>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
>>>>>> +
>>>>>>  /* Hash table stuff for keeping track of TOC entries.  */
>>>>>>
>>>>>>  struct GTY((for_user)) toc_hash_struct
>>>>>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
>>>>>> index f183b42ce1d..c1f61591d2f 100644
>>>>>> --- a/gcc/config/rs6000/t-rs6000
>>>>>> +++ b/gcc/config/rs6000/t-rs6000
>>>>>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
>>>>>>         $(COMPILE) $<
>>>>>>         $(POSTCOMPILE)
>>>>>>
>>>>>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
>>>>>> +       $(COMPILE) $<
>>>>>> +       $(POSTCOMPILE)
>>>>>> +
>>>>>> +
>>>>>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
>>>>>>         $(COMPILE) $<
>>>>>>         $(POSTCOMPILE)
>>>>>> diff --git a/gcc/explow.cc b/gcc/explow.cc
>>>>>> index 32e9498ee07..316aa975e40 100644
>>>>>> --- a/gcc/explow.cc
>>>>>> +++ b/gcc/explow.cc
>>>>>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
>>>>>>    if (! general_operand (x, VOIDmode))
>>>>>>      x = force_operand (x, temp);
>>>>>>
>>>>>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
>>>>>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
>>>>>> +              || GET_MODE (x) == VOIDmode);
>>>>>>    if (x != temp)
>>>>>>      emit_move_insn (temp, x);
>>>>>>    return temp;
>>>>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>>>>>> index 15be1c8db99..6162ef92b88 100644
>>>>>> --- a/gcc/expr.cc
>>>>>> +++ b/gcc/expr.cc
>>>>>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
>>>>>>    rtx y_cst = NULL_RTX;
>>>>>>    rtx_insn *last_insn;
>>>>>>    rtx set;
>>>>>> -
>>>>>>    gcc_assert (mode != BLKmode
>>>>>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
>>>>>> +             && (mode == DImode || GET_MODE (y) == mode
>>>>>> +             || GET_MODE (y) == VOIDmode));
>>>>>>
>>>>>>    /* If we have a copy that looks like one of the following patterns:
>>>>>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
>>>>>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
>>>>>> index 4c641cab192..9d22fadc7ef 100644
>>>>>> --- a/gcc/optabs.cc
>>>>>> +++ b/gcc/optabs.cc
>>>>>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
>>>>>>      input:
>>>>>>        gcc_assert (mode != VOIDmode);
>>>>>>        gcc_assert (GET_MODE (op->value) == VOIDmode
>>>>>> -                 || GET_MODE (op->value) == mode);
>>>>>> +                 || GET_MODE (op->value) == mode
>>>>>> +                 || mode == DImode);
>>>>>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
>>>>>>         return true;
>>>>>>
>>>>>> --
>>>>>> 2.31.1
>>>>>>
Richard Biener March 16, 2023, 10:30 a.m. UTC | #7
On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
>
> Hello Richard:
>
> On 16/03/23 3:22 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> >>
> >>
> >>
> >> On 16/03/23 1:44 pm, Richard Biener wrote:
> >>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> >>>>
> >>>> Hello Richard:
> >>>>
> >>>> On 16/03/23 1:10 pm, Richard Biener wrote:
> >>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> >>>>> <gcc-patches@gcc.gnu.org> wrote:
> >>>>>>
> >>>>>> Hello All:
> >>>>>>
> >>>>>>
> >>>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
> >>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
> >>>>>
> >>>>> What makes this so special that we cannot deal with it from generic code?
> >>>>> In particular we do have the REE pass, why is target specific
> >>>>> knowledge neccessary
> >>>>> to eliminate the extension?
> >>>>>
> >>>>
> >>>> For returning bool values and comparision with integers generates the following by all the rtl passes.
> >>>>
> >>>> set compare (subreg)
> >>>> set if_then_else
> >>>> Convert SImode -> QImode
> >>>> set zero_extend to SImode from QImode
> >>>> set return value 0 in one path of cfg.
> >>>> set return value 1 in other path of cfg.
> >>>>
> >>>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
> >>>
> >>> Sorry, I can't parse that - as there's no testcase with the patch I
> >>> cannot even try to see what the actual RTL
> >>> looks like (without the pass).
> >>>
> >>
> >> Here is the PR with bugzilla.
> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
> >>
> >> I can add the attached testcase with this PR in the patch.
> >
> > I don't see any zero-extends there.
> >
>
> Here is the testcase.
>
>
> bool (int a, int b)
> {
>           if (a > 2)
>                       return false;
>            if (b < 10)
>                        return true;
>              return false;
> }
>
> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>
> Here is the rtl after cse.
> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> (insn 15 12 16 3 (set (reg:CC 123)
>         (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>             (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>      (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>         (nil)))
> (insn 16 15 17 3 (set (reg:SI 124)
>         (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>      (nil))
> (insn 17 16 18 3 (set (reg:SI 122)
>         (if_then_else:SI (gt (reg:CC 123)
>                 (const_int 0 [0]))
>             (const_int 0 [0])
>             (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>      (expr_list:REG_DEAD (reg:SI 124)
>         (expr_list:REG_DEAD (reg:CC 123)
>             (nil))))
> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>         (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>      (expr_list:REG_DEAD (reg:SI 122)
>         (nil)))
>       ; pc falls through to BB 5
> (code_label 32 18 31 4 3 (nil) [1 uses])
> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>         (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>      (nil))
> (code_label 19 5 20 5 2 (nil) [0 uses])
> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>         (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
>      (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>         (nil)))
> (insn 22 21 26 5 (set (reg:DI 118 [ <retval> ])
>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>      (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
>         (nil)))
> (insn 26 22 27 5 (set (reg/i:DI 3 3)
>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>      (expr_list:REG_DEAD (reg:DI 118 [ <retval> ])
>         (nil)))
> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
>      (nil))

But after combine there's just

(note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(insn 38 6 2 2 (set (reg:DI 126)
        (reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
     (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
        (nil)))
(note 2 38 39 2 NOTE_INSN_DELETED)
(insn 39 2 3 2 (set (reg:DI 127)
        (reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
     (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
        (nil)))
(insn 3 39 4 2 (set (reg/v:DI 119 [ b ])
        (reg:DI 127)) "t.c":3:1 634 {*movdi_internal64}
     (expr_list:REG_DEAD (reg:DI 127)
        (nil)))
(note 4 3 10 2 NOTE_INSN_FUNCTION_BEG)
(insn 10 4 11 2 (set (reg:CC 120)
        (compare:CC (subreg/s/u:SI (reg:DI 126) 0)
            (const_int 2 [0x2]))) "t.c":4:6 755 {*cmpsi_signed}
     (expr_list:REG_DEAD (reg:DI 126)
        (nil)))
(jump_insn 11 10 12 2 (set (pc)
        (if_then_else (gt (reg:CC 120)
                (const_int 0 [0]))
            (label_ref:DI 32)
            (pc))) "t.c":4:6 838 {*cbranch}
     (expr_list:REG_DEAD (reg:CC 120)
        (int_list:REG_BR_PROB 365072228 (nil)))
 -> 32)
(note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(note 15 12 16 3 NOTE_INSN_DELETED)
(note 16 15 17 3 NOTE_INSN_DELETED)
(note 17 16 19 3 NOTE_INSN_DELETED)
(insn 19 17 32 3 (parallel [
            (set (reg:DI 117 [ <retval> ])
                (le:DI (subreg/s/u:SI (reg/v:DI 119 [ b ]) 0)
                    (const_int 9 [0x9])))
            (clobber (scratch:DI))
            (clobber (scratch:DI))
            (clobber (scratch:CC))
        ]) "t.c":6:6 783 {ledisi2_isel}
     (expr_list:REG_DEAD (reg/v:DI 119 [ b ])
        (nil)))
      ; pc falls through to BB 5
(code_label 32 19 31 4 3 (nil) [1 uses])
(note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 5 31 20 4 (set (reg:DI 117 [ <retval> ])
        (const_int 0 [0])) "t.c":5:12 634 {*movdi_internal64}
     (nil))
(code_label 20 5 21 5 2 (nil) [0 uses])
(note 21 20 26 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(insn 26 21 27 5 (set (reg/i:DI 3 3)
        (reg:DI 117 [ <retval> ])) "t.c":9:1 634 {*movdi_internal64}
     (expr_list:REG_DEAD (reg:DI 117 [ <retval> ])
        (nil)))
(insn 27 26 0 5 (use (reg/i:DI 3 3)) "t.c":9:1 -1
     (nil))

and we get

foo:
.LFB0:
        .cfi_startproc
        cmpwi 0,3,2
        bgt 0,.L3
        cmpwi 0,4,9
        li 3,1
        isel 3,0,3,1
        blr
        .p2align 4,,15
.L3:
        li 3,0
        blr

where I don't see what we can do better (ok, not knowing ppc very much)

>
> Thanks & Regards
> Ajit
>
> >> Thanks & Regards
> >> Ajit
> >>> Richard.
> >>>
> >>>> Thanks & Regards
> >>>> Ajit
> >>>>>> +  In cfgexpand pass QImode is generated with
> >>>>>> +  bool register value and this pass uses QI
> >>>>>> +  as 64 bit registers.
> >>>>>> +
> >>>>
> >>>>>>         rs6000: suboptimal code for returning bool value on target ppc.
> >>>>>>
> >>>>>>         New pass to eliminate unnecessary zero extension. This pass
> >>>>>>         is registered after cse rtl pass.
> >>>>>>
> >>>>>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> >>>>>>
> >>>>>> gcc/ChangeLog:
> >>>>>>
> >>>>>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
> >>>>>>         pass.
> >>>>>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
> >>>>>>         * config.gcc: Add new executable.
> >>>>>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
> >>>>>>         elimination pass.
> >>>>>>         * config/rs6000/rs6000.cc: Add new prototype for zero
> >>>>>>         elimination pass.
> >>>>>>         * config/rs6000/t-rs6000: Add new rule.
> >>>>>>         * expr.cc: Modified gcc assert.
> >>>>>>         * explow.cc: Modified gcc assert.
> >>>>>>         * optabs.cc: Modified gcc assert.
> >>>>>> ---
> >>>>>>  gcc/config.gcc                        |   4 +-
> >>>>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
> >>>>>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
> >>>>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
> >>>>>>  gcc/config/rs6000/rs6000.cc           |   2 +
> >>>>>>  gcc/config/rs6000/t-rs6000            |   5 +
> >>>>>>  gcc/explow.cc                         |   3 +-
> >>>>>>  gcc/expr.cc                           |   4 +-
> >>>>>>  gcc/optabs.cc                         |   3 +-
> >>>>>>  9 files changed, 379 insertions(+), 6 deletions(-)
> >>>>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> >>>>>>
> >>>>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
> >>>>>> index da3a6d3ba1f..e8ac9d882f0 100644
> >>>>>> --- a/gcc/config.gcc
> >>>>>> +++ b/gcc/config.gcc
> >>>>>> @@ -503,7 +503,7 @@ or1k*-*-*)
> >>>>>>         ;;
> >>>>>>  powerpc*-*-*)
> >>>>>>         cpu_type=rs6000
> >>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>>>>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
> >>>>>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> >>>>>> @@ -538,7 +538,7 @@ riscv*)
> >>>>>>         ;;
> >>>>>>  rs6000*-*-*)
> >>>>>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> >>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
> >>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> >>>>>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
> >>>>>> index ca899d5f7af..d7500feddf1 100644
> >>>>>> --- a/gcc/config/rs6000/rs6000-passes.def
> >>>>>> +++ b/gcc/config/rs6000/rs6000-passes.def
> >>>>>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
> >>>>>>       The power8 does not have instructions that automaticaly do the byte swaps
> >>>>>>       for loads and stores.  */
> >>>>>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> >>>>>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> >>>>>> +
> >>>>>>
> >>>>>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
> >>>>>>       external symbol's address along with a single load or store using that
> >>>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> >>>>>> index 1a4fc1df668..f6cf2d673d4 100644
> >>>>>> --- a/gcc/config/rs6000/rs6000-protos.h
> >>>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
> >>>>>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
> >>>>>>  class rtl_opt_pass;
> >>>>>>
> >>>>>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> >>>>>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
> >>>>>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
> >>>>>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
> >>>>>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> >>>>>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
> >>>>>> new file mode 100644
> >>>>>> index 00000000000..777c7a5a387
> >>>>>> --- /dev/null
> >>>>>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> >>>>>> @@ -0,0 +1,361 @@
> >>>>>> +/* Subroutine to eliminate redundant zero extend for power architecture.
> >>>>>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> >>>>>> +
> >>>>>> +   This file is part of GCC.
> >>>>>> +
> >>>>>> +   GCC is free software; you can redistribute it and/or modify it
> >>>>>> +   under the terms of the GNU General Public License as published
> >>>>>> +   by the Free Software Foundation; either version 3, or (at your
> >>>>>> +   option) any later version.
> >>>>>> +
> >>>>>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> >>>>>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> >>>>>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> >>>>>> +   License for more details.
> >>>>>> +
> >>>>>> +   You should have received a copy of the GNU General Public License
> >>>>>> +   along with GCC; see the file COPYING3.  If not see
> >>>>>> +   <http://www.gnu.org/licenses/>.  */
> >>>>>> +
> >>>>>> +/* This pass remove unnecessary zero extension instruction from
> >>>>>> +  power generated assembly. This pass is register after cse
> >>>>>> +  pass.
> >>>>>> +  Identifies the following sequence of instruction after cse
> >>>>>> +  rtl pass.
> >>>>>> +
> >>>>>> +  set compare (subreg)
> >>>>>> +  set if_then_else
> >>>>>> +  set SImode -> QImode
> >>>>>> +  set zero_extend to DImode from QImode
> >>>>>> +  set return value 0 in one path of cfg.
> >>>>>> +  set return value 1 in other path of cfg.
> >>>>>> +
> >>>>>> +  In cfgexpand pass QImode is generated with
> >>>>>> +  bool register value and this pass uses QI
> >>>>>> +  as 64 bit registers.
> >>>>>> +
> >>>>>> +  This pass replace copy operation from QImode to DImode
> >>>>>> +  and return appropriate return values.*/
> >>>>>> +
> >>>>>> +#define IN_TARGET_CODE 1
> >>>>>> +
> >>>>>> +#include "config.h"
> >>>>>> +#include "system.h"
> >>>>>> +#include "coretypes.h"
> >>>>>> +#include "backend.h"
> >>>>>> +#include "rtl.h"
> >>>>>> +#include "tree.h"
> >>>>>> +#include "memmodel.h"
> >>>>>> +#include "df.h"
> >>>>>> +#include "tm_p.h"
> >>>>>> +#include "ira.h"
> >>>>>> +#include "print-tree.h"
> >>>>>> +#include "varasm.h"
> >>>>>> +#include "explow.h"
> >>>>>> +#include "expr.h"
> >>>>>> +#include "output.h"
> >>>>>> +#include "tree-pass.h"
> >>>>>> +
> >>>>>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
> >>>>>> +   defined in df.h.  */
> >>>>>> +class zext_web_entry : public web_entry_base
> >>>>>> +{
> >>>>>> + public:
> >>>>>> +  /* Pointer to the insn.  */
> >>>>>> +  rtx_insn *insn;
> >>>>>> +  unsigned int is_relevant : 1;
> >>>>>> +  /* Set if insn is a load.  */
> >>>>>> +  unsigned int is_load : 1;
> >>>>>> +  /* Set if insn is a store.  */
> >>>>>> +  unsigned int is_store : 1;
> >>>>>> +  unsigned int is_zext :1 ;
> >>>>>> +  unsigned int is_move :1;
> >>>>>> +  unsigned int is_delete_move :1;
> >>>>>> +  /* Set if this insn should be deleted.  */
> >>>>>> +  unsigned int will_delete : 1;
> >>>>>> +  unsigned int will_delete_chances : 1;
> >>>>>> +};
> >>>>>> +
> >>>>>> +/* Checks if instruction is zero extension
> >>>>>> + * with QIMode to DImode.*/
> >>>>>> +static unsigned int
> >>>>>> +insn_is_zext_p(rtx insn)
> >>>>>> +{
> >>>>>> +  rtx body = PATTERN (insn);
> >>>>>> +
> >>>>>> +  if (GET_CODE (body) == SET
> >>>>>> +      && GET_MODE(SET_DEST (body)) == DImode
> >>>>>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
> >>>>>> +  {
> >>>>>> +    rtx set = XEXP (SET_SRC (body), 0);
> >>>>>> +
> >>>>>> +    if (REG_P (set))
> >>>>>> +    {
> >>>>>> +      if (GET_MODE (set) == QImode) return 1;
> >>>>>> +    }
> >>>>>> +    else
> >>>>>> +      return 0;
> >>>>>> +  }
> >>>>>> +  return 0;
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Checks if instruction is SET operation with QImode.*/
> >>>>>> +static unsigned int
> >>>>>> +insn_is_store_p (rtx insn)
> >>>>>> +{
> >>>>>> +  rtx body = PATTERN (insn);
> >>>>>> +  if (GET_CODE (body) == SET
> >>>>>> +      && SUBREG_P(SET_SRC (body))
> >>>>>> +      && !CONST_INT_P(SET_SRC (body))
> >>>>>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
> >>>>>> +      && GET_MODE(SET_SRC (body)) == QImode)
> >>>>>> +    return 1;
> >>>>>> +
> >>>>>> +  return 0;
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Find out zero extension removal candidate with use-def web.*/
> >>>>>> +static void
> >>>>>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
> >>>>>> +                                    rtx insn, df_ref def)
> >>>>>> +{
> >>>>>> +  struct df_link *link = DF_REF_CHAIN (def);
> >>>>>> +
> >>>>>> +  rtx move_insn = NULL_RTX;
> >>>>>> +  rtx compare_insn = NULL_RTX;
> >>>>>> +
> >>>>>> +  while (link)
> >>>>>> +  {
> >>>>>> +    if (!DF_REF_INSN_INFO (link->ref))
> >>>>>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
> >>>>>> +
> >>>>>> +    if (DF_REF_INSN_INFO (link->ref))
> >>>>>> +      {
> >>>>>> +       rtx use_insn = DF_REF_INSN (link->ref);
> >>>>>> +
> >>>>>> +       if (GET_CODE (PATTERN (use_insn)) == SET
> >>>>>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
> >>>>>> +         {
> >>>>>> +           if (GET_CODE (PATTERN (insn)) == SET
> >>>>>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
> >>>>>> +             {
> >>>>>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
> >>>>>> +
> >>>>>> +               if (SUBREG_P (body))
> >>>>>> +                 {
> >>>>>> +                   compare_insn = use_insn;
> >>>>>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
> >>>>>> +
> >>>>>> +                   if (compare_insn
> >>>>>> +                       && ((REGNO (XEXP (compare_body, 0)))
> >>>>>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
> >>>>>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >>>>>> +                 }
> >>>>>> +              }
> >>>>>> +           }
> >>>>>> +
> >>>>>> +       if (insn_is_store_p(use_insn)
> >>>>>> +           && GET_CODE (PATTERN (insn)) == SET
> >>>>>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
> >>>>>> +         {
> >>>>>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
> >>>>>> +             {
> >>>>>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
> >>>>>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >>>>>> +             }
> >>>>>> +         }
> >>>>>> +
> >>>>>> +       if (insn_is_zext_p (insn))
> >>>>>> +         {
> >>>>>> +           if (GET_CODE (PATTERN (use_insn)) == SET
> >>>>>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
> >>>>>> +             {
> >>>>>> +               if (move_insn
> >>>>>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
> >>>>>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
> >>>>>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
> >>>>>> +                 {
> >>>>>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
> >>>>>> +                   break;
> >>>>>> +                 }
> >>>>>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
> >>>>>> +                   {
> >>>>>> +                     move_insn = use_insn;
> >>>>>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
> >>>>>> +                   }
> >>>>>> +             }
> >>>>>> +         }
> >>>>>> +
> >>>>>> +       if (insn_is_zext_p (use_insn))
> >>>>>> +         {
> >>>>>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
> >>>>>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
> >>>>>> +
> >>>>>> +           if (insn_is_store_p (insn)
> >>>>>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
> >>>>>> +           {
> >>>>>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
> >>>>>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
> >>>>>> +             insn_entry[INSN_UID( insn)].is_store = 1;
> >>>>>> +           }
> >>>>>> +
> >>>>>> +          if (NONDEBUG_INSN_P (use_insn))
> >>>>>> +            unionfind_union (insn_entry + INSN_UID (insn),
> >>>>>> +                             insn_entry + INSN_UID (use_insn));
> >>>>>> +       }
> >>>>>> +      }
> >>>>>> +
> >>>>>> +    link = link->next;
> >>>>>> +  }
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Replace QImode extensions with copy operations.*/
> >>>>>> +static void
> >>>>>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
> >>>>>> +{
> >>>>>> +  rtx_insn *insn = insn_entry[i].insn;
> >>>>>> +  rtx body = PATTERN (insn);
> >>>>>> +  rtx src_reg;
> >>>>>> +  src_reg = XEXP (SET_SRC (body), 0);
> >>>>>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
> >>>>>> +
> >>>>>> +  if (GET_MODE(SET_DEST(body)) != DImode)
> >>>>>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
> >>>>>> +
> >>>>>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
> >>>>>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
> >>>>>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
> >>>>>> +  df_insn_rescan (new_insn);
> >>>>>> +
> >>>>>> +  df_insn_delete (insn);
> >>>>>> +  remove_insn (insn);
> >>>>>> +  insn->set_deleted ();
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Main entry point for this pass.  */
> >>>>>> +unsigned int
> >>>>>> +rs6000_analyze_zext (function *fun)
> >>>>>> +{
> >>>>>> +  zext_web_entry *insn_entry;
> >>>>>> +  basic_block bb;
> >>>>>> +  rtx_insn *insn, *curr_insn = 0;
> >>>>>> +
> >>>>>> +  /* Dataflow analysis for use-def chains.  */
> >>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >>>>>> +  df_analyze ();
> >>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >>>>>> +
> >>>>>> +  /* Rebuild ud- and du-chains.  */
> >>>>>> +  df_remove_problem (df_chain);
> >>>>>> +  df_process_deferred_rescans ();
> >>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >>>>>> +  df_analyze ();
> >>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >>>>>> +
> >>>>>> +  /* Allocate structure to represent webs of insns.  */
> >>>>>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
> >>>>>> +
> >>>>>> +  /* Walk the insns to gather basic data.  */
> >>>>>> +  FOR_ALL_BB_FN (bb, fun)
> >>>>>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> >>>>>> +    {
> >>>>>> +      unsigned int uid = INSN_UID (insn);
> >>>>>> +      if (NONDEBUG_INSN_P (insn))
> >>>>>> +       {
> >>>>>> +         insn_entry[uid].insn = insn;
> >>>>>> +
> >>>>>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
> >>>>>> +           {
> >>>>>> +             insn_entry[uid].is_store = 1;
> >>>>>> +             insn_entry[uid].is_relevant = 1;
> >>>>>> +           }
> >>>>>> +
> >>>>>> +         /* Walk the uses and defs to identify the optimization
> >>>>>> +            candidates.*/
> >>>>>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> >>>>>> +         df_ref mention;
> >>>>>> +
> >>>>>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
> >>>>>> +           {
> >>>>>> +             insn_entry[uid].is_relevant = 1;
> >>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >>>>>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
> >>>>>> +           }
> >>>>>> +
> >>>>>> +         if (insn_entry[uid].is_relevant)
> >>>>>> +           {
> >>>>>> +             /* Determine if this is a store.  */
> >>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >>>>>> +           }
> >>>>>> +       }
> >>>>>> +     }
> >>>>>> +
> >>>>>> +   unsigned e = get_max_uid (), i;
> >>>>>> +
> >>>>>> +   int store_index = -1;
> >>>>>> +
> >>>>>> +   /* Replace with copy operation.*/
> >>>>>> +   for (i = 0; i < e; ++i)
> >>>>>> +     {
> >>>>>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
> >>>>>> +        store_index  = i;
> >>>>>> +
> >>>>>> +       if ((store_index != -1)
> >>>>>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
> >>>>>> +         {
> >>>>>> +           replace_marked_insns (insn_entry, store_index);
> >>>>>> +           replace_marked_insns (insn_entry, i);
> >>>>>> +         }
> >>>>>> +     }
> >>>>>> +    /* Clean up.  */
> >>>>>> +    free (insn_entry);
> >>>>>> +
> >>>>>> +    return 0;
> >>>>>> +}
> >>>>>> +
> >>>>>> +const pass_data pass_data_analyze_zext =
> >>>>>> +{
> >>>>>> +  RTL_PASS, /* type */
> >>>>>> +  "zext", /* name */
> >>>>>> +  OPTGROUP_NONE, /* optinfo_flags */
> >>>>>> +  TV_NONE, /* tv_id */
> >>>>>> +  0, /* properties_required */
> >>>>>> +  0, /* properties_provided */
> >>>>>> +  0, /* properties_destroyed */
> >>>>>> +  0, /* todo_flags_start */
> >>>>>> +  TODO_df_finish, /* todo_flags_finish */
> >>>>>> +};
> >>>>>> +
> >>>>>> +class pass_analyze_zext : public rtl_opt_pass
> >>>>>> +{
> >>>>>> +public:
> >>>>>> +  pass_analyze_zext(gcc::context *ctxt)
> >>>>>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
> >>>>>> +  {}
> >>>>>> +
> >>>>>> +  /* opt_pass methods: */
> >>>>>> +  virtual bool gate (function *)
> >>>>>> +    {
> >>>>>> +      return (optimize > 0 );
> >>>>>> +    }
> >>>>>> +
> >>>>>> +  virtual unsigned int execute (function *fun)
> >>>>>> +    {
> >>>>>> +      return rs6000_analyze_zext (fun);
> >>>>>> +    }
> >>>>>> +
> >>>>>> +  opt_pass *clone ()
> >>>>>> +    {
> >>>>>> +      return new pass_analyze_zext (m_ctxt);
> >>>>>> +    }
> >>>>>> +
> >>>>>> +}; // class pass_analyze_zext
> >>>>>> +
> >>>>>> +rtl_opt_pass *
> >>>>>> +make_pass_analyze_zext (gcc::context *ctxt)
> >>>>>> +{
> >>>>>> +  return new pass_analyze_zext (ctxt);
> >>>>>> +}
> >>>>>> +
> >>>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> >>>>>> index 8e0b0d022db..6541334bf2d 100644
> >>>>>> --- a/gcc/config/rs6000/rs6000.cc
> >>>>>> +++ b/gcc/config/rs6000/rs6000.cc
> >>>>>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
> >>>>>>                                           bool);
> >>>>>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
> >>>>>>
> >>>>>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
> >>>>>> +
> >>>>>>  /* Hash table stuff for keeping track of TOC entries.  */
> >>>>>>
> >>>>>>  struct GTY((for_user)) toc_hash_struct
> >>>>>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
> >>>>>> index f183b42ce1d..c1f61591d2f 100644
> >>>>>> --- a/gcc/config/rs6000/t-rs6000
> >>>>>> +++ b/gcc/config/rs6000/t-rs6000
> >>>>>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
> >>>>>>         $(COMPILE) $<
> >>>>>>         $(POSTCOMPILE)
> >>>>>>
> >>>>>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
> >>>>>> +       $(COMPILE) $<
> >>>>>> +       $(POSTCOMPILE)
> >>>>>> +
> >>>>>> +
> >>>>>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
> >>>>>>         $(COMPILE) $<
> >>>>>>         $(POSTCOMPILE)
> >>>>>> diff --git a/gcc/explow.cc b/gcc/explow.cc
> >>>>>> index 32e9498ee07..316aa975e40 100644
> >>>>>> --- a/gcc/explow.cc
> >>>>>> +++ b/gcc/explow.cc
> >>>>>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
> >>>>>>    if (! general_operand (x, VOIDmode))
> >>>>>>      x = force_operand (x, temp);
> >>>>>>
> >>>>>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
> >>>>>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
> >>>>>> +              || GET_MODE (x) == VOIDmode);
> >>>>>>    if (x != temp)
> >>>>>>      emit_move_insn (temp, x);
> >>>>>>    return temp;
> >>>>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
> >>>>>> index 15be1c8db99..6162ef92b88 100644
> >>>>>> --- a/gcc/expr.cc
> >>>>>> +++ b/gcc/expr.cc
> >>>>>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
> >>>>>>    rtx y_cst = NULL_RTX;
> >>>>>>    rtx_insn *last_insn;
> >>>>>>    rtx set;
> >>>>>> -
> >>>>>>    gcc_assert (mode != BLKmode
> >>>>>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
> >>>>>> +             && (mode == DImode || GET_MODE (y) == mode
> >>>>>> +             || GET_MODE (y) == VOIDmode));
> >>>>>>
> >>>>>>    /* If we have a copy that looks like one of the following patterns:
> >>>>>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
> >>>>>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> >>>>>> index 4c641cab192..9d22fadc7ef 100644
> >>>>>> --- a/gcc/optabs.cc
> >>>>>> +++ b/gcc/optabs.cc
> >>>>>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
> >>>>>>      input:
> >>>>>>        gcc_assert (mode != VOIDmode);
> >>>>>>        gcc_assert (GET_MODE (op->value) == VOIDmode
> >>>>>> -                 || GET_MODE (op->value) == mode);
> >>>>>> +                 || GET_MODE (op->value) == mode
> >>>>>> +                 || mode == DImode);
> >>>>>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
> >>>>>>         return true;
> >>>>>>
> >>>>>> --
> >>>>>> 2.31.1
> >>>>>>
Ajit Agarwal March 16, 2023, 10:43 a.m. UTC | #8
On 16/03/23 4:00 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>>
>> Hello Richard:
>>
>> On 16/03/23 3:22 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>
>>>>
>>>>
>>>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>>>
>>>>>> Hello Richard:
>>>>>>
>>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>>> <gcc-patches@gcc.gnu.org> wrote:
>>>>>>>>
>>>>>>>> Hello All:
>>>>>>>>
>>>>>>>>
>>>>>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
>>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>>>
>>>>>>> What makes this so special that we cannot deal with it from generic code?
>>>>>>> In particular we do have the REE pass, why is target specific
>>>>>>> knowledge neccessary
>>>>>>> to eliminate the extension?
>>>>>>>
>>>>>>
>>>>>> For returning bool values and comparision with integers generates the following by all the rtl passes.
>>>>>>
>>>>>> set compare (subreg)
>>>>>> set if_then_else
>>>>>> Convert SImode -> QImode
>>>>>> set zero_extend to SImode from QImode
>>>>>> set return value 0 in one path of cfg.
>>>>>> set return value 1 in other path of cfg.
>>>>>>
>>>>>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
>>>>>
>>>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>>>> cannot even try to see what the actual RTL
>>>>> looks like (without the pass).
>>>>>
>>>>
>>>> Here is the PR with bugzilla.
>>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>>>
>>>> I can add the attached testcase with this PR in the patch.
>>>
>>> I don't see any zero-extends there.
>>>
>>
>> Here is the testcase.
>>
>>
>> bool (int a, int b)
>> {
>>           if (a > 2)
>>                       return false;
>>            if (b < 10)
>>                        return true;
>>              return false;
>> }
>>
>> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>>
>> Here is the rtl after cse.
>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>> (insn 15 12 16 3 (set (reg:CC 123)
>>         (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>>             (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>>      (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>>         (nil)))
>> (insn 16 15 17 3 (set (reg:SI 124)
>>         (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>>      (nil))
>> (insn 17 16 18 3 (set (reg:SI 122)
>>         (if_then_else:SI (gt (reg:CC 123)
>>                 (const_int 0 [0]))
>>             (const_int 0 [0])
>>             (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>>      (expr_list:REG_DEAD (reg:SI 124)
>>         (expr_list:REG_DEAD (reg:CC 123)
>>             (nil))))
>> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>>         (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>>      (expr_list:REG_DEAD (reg:SI 122)
>>         (nil)))
>>       ; pc falls through to BB 5
>> (code_label 32 18 31 4 3 (nil) [1 uses])
>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>>         (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>>      (nil))
>> (code_label 19 5 20 5 2 (nil) [0 uses])
>> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>>         (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
>>      (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>>         (nil)))
>> (insn 22 21 26 5 (set (reg:DI 118 [ <retval> ])
>>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>      (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
>>         (nil)))
>> (insn 26 22 27 5 (set (reg/i:DI 3 3)
>>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>      (expr_list:REG_DEAD (reg:DI 118 [ <retval> ])
>>         (nil)))
>> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
>>      (nil))
> 
> But after combine there's just
> 
> (note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> (insn 38 6 2 2 (set (reg:DI 126)
>         (reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
>      (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
>         (nil)))
> (note 2 38 39 2 NOTE_INSN_DELETED)
> (insn 39 2 3 2 (set (reg:DI 127)
>         (reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
>      (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
>         (nil)))
> (insn 3 39 4 2 (set (reg/v:DI 119 [ b ])
>         (reg:DI 127)) "t.c":3:1 634 {*movdi_internal64}
>      (expr_list:REG_DEAD (reg:DI 127)
>         (nil)))
> (note 4 3 10 2 NOTE_INSN_FUNCTION_BEG)
> (insn 10 4 11 2 (set (reg:CC 120)
>         (compare:CC (subreg/s/u:SI (reg:DI 126) 0)
>             (const_int 2 [0x2]))) "t.c":4:6 755 {*cmpsi_signed}
>      (expr_list:REG_DEAD (reg:DI 126)
>         (nil)))
> (jump_insn 11 10 12 2 (set (pc)
>         (if_then_else (gt (reg:CC 120)
>                 (const_int 0 [0]))
>             (label_ref:DI 32)
>             (pc))) "t.c":4:6 838 {*cbranch}
>      (expr_list:REG_DEAD (reg:CC 120)
>         (int_list:REG_BR_PROB 365072228 (nil)))
>  -> 32)
> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> (note 15 12 16 3 NOTE_INSN_DELETED)
> (note 16 15 17 3 NOTE_INSN_DELETED)
> (note 17 16 19 3 NOTE_INSN_DELETED)
> (insn 19 17 32 3 (parallel [
>             (set (reg:DI 117 [ <retval> ])
>                 (le:DI (subreg/s/u:SI (reg/v:DI 119 [ b ]) 0)
>                     (const_int 9 [0x9])))
>             (clobber (scratch:DI))
>             (clobber (scratch:DI))
>             (clobber (scratch:CC))
>         ]) "t.c":6:6 783 {ledisi2_isel}
>      (expr_list:REG_DEAD (reg/v:DI 119 [ b ])
>         (nil)))
>       ; pc falls through to BB 5
> (code_label 32 19 31 4 3 (nil) [1 uses])
> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> (insn 5 31 20 4 (set (reg:DI 117 [ <retval> ])
>         (const_int 0 [0])) "t.c":5:12 634 {*movdi_internal64}
>      (nil))
> (code_label 20 5 21 5 2 (nil) [0 uses])
> (note 21 20 26 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> (insn 26 21 27 5 (set (reg/i:DI 3 3)
>         (reg:DI 117 [ <retval> ])) "t.c":9:1 634 {*movdi_internal64}
>      (expr_list:REG_DEAD (reg:DI 117 [ <retval> ])
>         (nil)))
> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "t.c":9:1 -1
>      (nil))
> 
> and we get
> 
> foo:
> .LFB0:
>         .cfi_startproc
>         cmpwi 0,3,2
>         bgt 0,.L3
>         cmpwi 0,4,9
>         li 3,1
>         isel 3,0,3,1
>         blr
>         .p2align 4,,15
> .L3:
>         li 3,0
>         blr
> 
> where I don't see what we can do better (ok, not knowing ppc very much)
> 

After combine I get the following:

(insn 10 4 11 2 (set (reg:CC 121)
        (compare:CC (subreg/s/u:SI (reg:DI 127) 0)
            (const_int 2 [0x2]))) "ext.cc":3:4 796 {*cmpsi_signed}
     (expr_list:REG_DEAD (reg:DI 127)
        (nil)))
(jump_insn 11 10 12 2 (set (pc)
        (if_then_else (gt (reg:CC 121)
                (const_int 0 [0]))
            (label_ref:DI 32)
            (pc))) "ext.cc":3:4 879 {*cbranch}
     (expr_list:REG_DEAD (reg:CC 121)
        (int_list:REG_BR_PROB 365072228 (nil)))
 -> 32)
(note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(note 15 12 16 3 NOTE_INSN_DELETED)
(note 16 15 17 3 NOTE_INSN_DELETED)
(insn 17 16 18 3 (parallel [
            (set (reg:SI 122)
                (le:SI (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
                    (const_int 9 [0x9])))
            (clobber (scratch:SI))
            (clobber (scratch:SI))
            (clobber (scratch:CC))
        ]) "ext.cc":5:5 814 {lesisi2_isel}
     (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
        (nil)))
(insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
        (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
     (expr_list:REG_DEAD (reg:SI 122)
        (nil)))
      ; pc falls through to BB 5
(code_label 32 18 31 4 3 (nil) [1 uses])
(note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
        (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
     (nil))
(code_label 19 5 20 5 2 (nil) [0 uses])
(note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(note 21 20 26 5 NOTE_INSN_DELETED)
(insn 26 21 27 5 (set (reg/i:DI 3 %r3)
        (and:DI (subreg:DI (reg:QI 117 [ _1 ]) 0)
            (const_int 1 [0x1]))) "ext.cc":8:1 207 {anddi3_mask}
     (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
        (nil)))
(insn 27 26 0 5 (use (reg/i:DI 3 %r3)) "ext.cc":8:1 -1
     (nil))

and here is the assembly:


        .file   "ext.cc"
        .machine power9
        .abiversion 2
        .section        ".text"
        .align 2
        .p2align 4,,15
        .globl _Z3fooii
        .type   _Z3fooii, @function
_Z3fooii:
.LFB0:
        .cfi_startproc
        cmpwi %cr0,%r3,2
        bgt %cr0,.L3
        cmpwi %cr0,%r4,9
        li %r3,1
        isel %r3,0,%r3,1
        rldicl %r3,%r3,0,63
        blr
        .p2align 4,,15
.L3:
        li %r3,0
        rldicl %r3,%r3,0,63
        blr
        .long 0
        .byte 0,9,0,0,0,0,0,0
        .cfi_endproc
.LFE0:
        .size   _Z3fooii,.-_Z3fooii
        .ident  "GCC: (GNU) 13.0.1 20230310 (experimental)"
        .section        .note.GNU-stack,"",@progbits

Did you try with -O3.

Thanks & Regards
Ajit
>>
>> Thanks & Regards
>> Ajit
>>
>>>> Thanks & Regards
>>>> Ajit
>>>>> Richard.
>>>>>
>>>>>> Thanks & Regards
>>>>>> Ajit
>>>>>>>> +  In cfgexpand pass QImode is generated with
>>>>>>>> +  bool register value and this pass uses QI
>>>>>>>> +  as 64 bit registers.
>>>>>>>> +
>>>>>>
>>>>>>>>         rs6000: suboptimal code for returning bool value on target ppc.
>>>>>>>>
>>>>>>>>         New pass to eliminate unnecessary zero extension. This pass
>>>>>>>>         is registered after cse rtl pass.
>>>>>>>>
>>>>>>>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>>>>>>>
>>>>>>>> gcc/ChangeLog:
>>>>>>>>
>>>>>>>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
>>>>>>>>         pass.
>>>>>>>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>>>>>>>>         * config.gcc: Add new executable.
>>>>>>>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
>>>>>>>>         elimination pass.
>>>>>>>>         * config/rs6000/rs6000.cc: Add new prototype for zero
>>>>>>>>         elimination pass.
>>>>>>>>         * config/rs6000/t-rs6000: Add new rule.
>>>>>>>>         * expr.cc: Modified gcc assert.
>>>>>>>>         * explow.cc: Modified gcc assert.
>>>>>>>>         * optabs.cc: Modified gcc assert.
>>>>>>>> ---
>>>>>>>>  gcc/config.gcc                        |   4 +-
>>>>>>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>>>>>>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
>>>>>>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
>>>>>>>>  gcc/config/rs6000/rs6000.cc           |   2 +
>>>>>>>>  gcc/config/rs6000/t-rs6000            |   5 +
>>>>>>>>  gcc/explow.cc                         |   3 +-
>>>>>>>>  gcc/expr.cc                           |   4 +-
>>>>>>>>  gcc/optabs.cc                         |   3 +-
>>>>>>>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>>>>>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>>>>
>>>>>>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>>>>>>>> index da3a6d3ba1f..e8ac9d882f0 100644
>>>>>>>> --- a/gcc/config.gcc
>>>>>>>> +++ b/gcc/config.gcc
>>>>>>>> @@ -503,7 +503,7 @@ or1k*-*-*)
>>>>>>>>         ;;
>>>>>>>>  powerpc*-*-*)
>>>>>>>>         cpu_type=rs6000
>>>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>>>>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>>>>>>>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>>>>>>>> @@ -538,7 +538,7 @@ riscv*)
>>>>>>>>         ;;
>>>>>>>>  rs6000*-*-*)
>>>>>>>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
>>>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
>>>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>>>>>>>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
>>>>>>>> index ca899d5f7af..d7500feddf1 100644
>>>>>>>> --- a/gcc/config/rs6000/rs6000-passes.def
>>>>>>>> +++ b/gcc/config/rs6000/rs6000-passes.def
>>>>>>>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>>>>>>>>       The power8 does not have instructions that automaticaly do the byte swaps
>>>>>>>>       for loads and stores.  */
>>>>>>>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
>>>>>>>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
>>>>>>>> +
>>>>>>>>
>>>>>>>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
>>>>>>>>       external symbol's address along with a single load or store using that
>>>>>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>>>>>>> index 1a4fc1df668..f6cf2d673d4 100644
>>>>>>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>>>>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>>>>>>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>>>>>>>>  class rtl_opt_pass;
>>>>>>>>
>>>>>>>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
>>>>>>>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>>>>>>>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>>>>>>>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>>>>>>>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
>>>>>>>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>>>> new file mode 100644
>>>>>>>> index 00000000000..777c7a5a387
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>>>> @@ -0,0 +1,361 @@
>>>>>>>> +/* Subroutine to eliminate redundant zero extend for power architecture.
>>>>>>>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
>>>>>>>> +
>>>>>>>> +   This file is part of GCC.
>>>>>>>> +
>>>>>>>> +   GCC is free software; you can redistribute it and/or modify it
>>>>>>>> +   under the terms of the GNU General Public License as published
>>>>>>>> +   by the Free Software Foundation; either version 3, or (at your
>>>>>>>> +   option) any later version.
>>>>>>>> +
>>>>>>>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
>>>>>>>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
>>>>>>>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>>>>>> +   License for more details.
>>>>>>>> +
>>>>>>>> +   You should have received a copy of the GNU General Public License
>>>>>>>> +   along with GCC; see the file COPYING3.  If not see
>>>>>>>> +   <http://www.gnu.org/licenses/>.  */
>>>>>>>> +
>>>>>>>> +/* This pass remove unnecessary zero extension instruction from
>>>>>>>> +  power generated assembly. This pass is register after cse
>>>>>>>> +  pass.
>>>>>>>> +  Identifies the following sequence of instruction after cse
>>>>>>>> +  rtl pass.
>>>>>>>> +
>>>>>>>> +  set compare (subreg)
>>>>>>>> +  set if_then_else
>>>>>>>> +  set SImode -> QImode
>>>>>>>> +  set zero_extend to DImode from QImode
>>>>>>>> +  set return value 0 in one path of cfg.
>>>>>>>> +  set return value 1 in other path of cfg.
>>>>>>>> +
>>>>>>>> +  In cfgexpand pass QImode is generated with
>>>>>>>> +  bool register value and this pass uses QI
>>>>>>>> +  as 64 bit registers.
>>>>>>>> +
>>>>>>>> +  This pass replace copy operation from QImode to DImode
>>>>>>>> +  and return appropriate return values.*/
>>>>>>>> +
>>>>>>>> +#define IN_TARGET_CODE 1
>>>>>>>> +
>>>>>>>> +#include "config.h"
>>>>>>>> +#include "system.h"
>>>>>>>> +#include "coretypes.h"
>>>>>>>> +#include "backend.h"
>>>>>>>> +#include "rtl.h"
>>>>>>>> +#include "tree.h"
>>>>>>>> +#include "memmodel.h"
>>>>>>>> +#include "df.h"
>>>>>>>> +#include "tm_p.h"
>>>>>>>> +#include "ira.h"
>>>>>>>> +#include "print-tree.h"
>>>>>>>> +#include "varasm.h"
>>>>>>>> +#include "explow.h"
>>>>>>>> +#include "expr.h"
>>>>>>>> +#include "output.h"
>>>>>>>> +#include "tree-pass.h"
>>>>>>>> +
>>>>>>>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
>>>>>>>> +   defined in df.h.  */
>>>>>>>> +class zext_web_entry : public web_entry_base
>>>>>>>> +{
>>>>>>>> + public:
>>>>>>>> +  /* Pointer to the insn.  */
>>>>>>>> +  rtx_insn *insn;
>>>>>>>> +  unsigned int is_relevant : 1;
>>>>>>>> +  /* Set if insn is a load.  */
>>>>>>>> +  unsigned int is_load : 1;
>>>>>>>> +  /* Set if insn is a store.  */
>>>>>>>> +  unsigned int is_store : 1;
>>>>>>>> +  unsigned int is_zext :1 ;
>>>>>>>> +  unsigned int is_move :1;
>>>>>>>> +  unsigned int is_delete_move :1;
>>>>>>>> +  /* Set if this insn should be deleted.  */
>>>>>>>> +  unsigned int will_delete : 1;
>>>>>>>> +  unsigned int will_delete_chances : 1;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +/* Checks if instruction is zero extension
>>>>>>>> + * with QIMode to DImode.*/
>>>>>>>> +static unsigned int
>>>>>>>> +insn_is_zext_p(rtx insn)
>>>>>>>> +{
>>>>>>>> +  rtx body = PATTERN (insn);
>>>>>>>> +
>>>>>>>> +  if (GET_CODE (body) == SET
>>>>>>>> +      && GET_MODE(SET_DEST (body)) == DImode
>>>>>>>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
>>>>>>>> +  {
>>>>>>>> +    rtx set = XEXP (SET_SRC (body), 0);
>>>>>>>> +
>>>>>>>> +    if (REG_P (set))
>>>>>>>> +    {
>>>>>>>> +      if (GET_MODE (set) == QImode) return 1;
>>>>>>>> +    }
>>>>>>>> +    else
>>>>>>>> +      return 0;
>>>>>>>> +  }
>>>>>>>> +  return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Checks if instruction is SET operation with QImode.*/
>>>>>>>> +static unsigned int
>>>>>>>> +insn_is_store_p (rtx insn)
>>>>>>>> +{
>>>>>>>> +  rtx body = PATTERN (insn);
>>>>>>>> +  if (GET_CODE (body) == SET
>>>>>>>> +      && SUBREG_P(SET_SRC (body))
>>>>>>>> +      && !CONST_INT_P(SET_SRC (body))
>>>>>>>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
>>>>>>>> +      && GET_MODE(SET_SRC (body)) == QImode)
>>>>>>>> +    return 1;
>>>>>>>> +
>>>>>>>> +  return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Find out zero extension removal candidate with use-def web.*/
>>>>>>>> +static void
>>>>>>>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
>>>>>>>> +                                    rtx insn, df_ref def)
>>>>>>>> +{
>>>>>>>> +  struct df_link *link = DF_REF_CHAIN (def);
>>>>>>>> +
>>>>>>>> +  rtx move_insn = NULL_RTX;
>>>>>>>> +  rtx compare_insn = NULL_RTX;
>>>>>>>> +
>>>>>>>> +  while (link)
>>>>>>>> +  {
>>>>>>>> +    if (!DF_REF_INSN_INFO (link->ref))
>>>>>>>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
>>>>>>>> +
>>>>>>>> +    if (DF_REF_INSN_INFO (link->ref))
>>>>>>>> +      {
>>>>>>>> +       rtx use_insn = DF_REF_INSN (link->ref);
>>>>>>>> +
>>>>>>>> +       if (GET_CODE (PATTERN (use_insn)) == SET
>>>>>>>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
>>>>>>>> +         {
>>>>>>>> +           if (GET_CODE (PATTERN (insn)) == SET
>>>>>>>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
>>>>>>>> +             {
>>>>>>>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
>>>>>>>> +
>>>>>>>> +               if (SUBREG_P (body))
>>>>>>>> +                 {
>>>>>>>> +                   compare_insn = use_insn;
>>>>>>>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
>>>>>>>> +
>>>>>>>> +                   if (compare_insn
>>>>>>>> +                       && ((REGNO (XEXP (compare_body, 0)))
>>>>>>>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
>>>>>>>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>>>>>> +                 }
>>>>>>>> +              }
>>>>>>>> +           }
>>>>>>>> +
>>>>>>>> +       if (insn_is_store_p(use_insn)
>>>>>>>> +           && GET_CODE (PATTERN (insn)) == SET
>>>>>>>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
>>>>>>>> +         {
>>>>>>>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
>>>>>>>> +             {
>>>>>>>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
>>>>>>>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>>>>>> +             }
>>>>>>>> +         }
>>>>>>>> +
>>>>>>>> +       if (insn_is_zext_p (insn))
>>>>>>>> +         {
>>>>>>>> +           if (GET_CODE (PATTERN (use_insn)) == SET
>>>>>>>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
>>>>>>>> +             {
>>>>>>>> +               if (move_insn
>>>>>>>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
>>>>>>>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
>>>>>>>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
>>>>>>>> +                 {
>>>>>>>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
>>>>>>>> +                   break;
>>>>>>>> +                 }
>>>>>>>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
>>>>>>>> +                   {
>>>>>>>> +                     move_insn = use_insn;
>>>>>>>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
>>>>>>>> +                   }
>>>>>>>> +             }
>>>>>>>> +         }
>>>>>>>> +
>>>>>>>> +       if (insn_is_zext_p (use_insn))
>>>>>>>> +         {
>>>>>>>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
>>>>>>>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
>>>>>>>> +
>>>>>>>> +           if (insn_is_store_p (insn)
>>>>>>>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
>>>>>>>> +           {
>>>>>>>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
>>>>>>>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
>>>>>>>> +             insn_entry[INSN_UID( insn)].is_store = 1;
>>>>>>>> +           }
>>>>>>>> +
>>>>>>>> +          if (NONDEBUG_INSN_P (use_insn))
>>>>>>>> +            unionfind_union (insn_entry + INSN_UID (insn),
>>>>>>>> +                             insn_entry + INSN_UID (use_insn));
>>>>>>>> +       }
>>>>>>>> +      }
>>>>>>>> +
>>>>>>>> +    link = link->next;
>>>>>>>> +  }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Replace QImode extensions with copy operations.*/
>>>>>>>> +static void
>>>>>>>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
>>>>>>>> +{
>>>>>>>> +  rtx_insn *insn = insn_entry[i].insn;
>>>>>>>> +  rtx body = PATTERN (insn);
>>>>>>>> +  rtx src_reg;
>>>>>>>> +  src_reg = XEXP (SET_SRC (body), 0);
>>>>>>>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
>>>>>>>> +
>>>>>>>> +  if (GET_MODE(SET_DEST(body)) != DImode)
>>>>>>>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
>>>>>>>> +
>>>>>>>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
>>>>>>>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
>>>>>>>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
>>>>>>>> +  df_insn_rescan (new_insn);
>>>>>>>> +
>>>>>>>> +  df_insn_delete (insn);
>>>>>>>> +  remove_insn (insn);
>>>>>>>> +  insn->set_deleted ();
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Main entry point for this pass.  */
>>>>>>>> +unsigned int
>>>>>>>> +rs6000_analyze_zext (function *fun)
>>>>>>>> +{
>>>>>>>> +  zext_web_entry *insn_entry;
>>>>>>>> +  basic_block bb;
>>>>>>>> +  rtx_insn *insn, *curr_insn = 0;
>>>>>>>> +
>>>>>>>> +  /* Dataflow analysis for use-def chains.  */
>>>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>>>>>> +  df_analyze ();
>>>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>>>>>> +
>>>>>>>> +  /* Rebuild ud- and du-chains.  */
>>>>>>>> +  df_remove_problem (df_chain);
>>>>>>>> +  df_process_deferred_rescans ();
>>>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>>>>>> +  df_analyze ();
>>>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>>>>>> +
>>>>>>>> +  /* Allocate structure to represent webs of insns.  */
>>>>>>>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
>>>>>>>> +
>>>>>>>> +  /* Walk the insns to gather basic data.  */
>>>>>>>> +  FOR_ALL_BB_FN (bb, fun)
>>>>>>>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
>>>>>>>> +    {
>>>>>>>> +      unsigned int uid = INSN_UID (insn);
>>>>>>>> +      if (NONDEBUG_INSN_P (insn))
>>>>>>>> +       {
>>>>>>>> +         insn_entry[uid].insn = insn;
>>>>>>>> +
>>>>>>>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
>>>>>>>> +           {
>>>>>>>> +             insn_entry[uid].is_store = 1;
>>>>>>>> +             insn_entry[uid].is_relevant = 1;
>>>>>>>> +           }
>>>>>>>> +
>>>>>>>> +         /* Walk the uses and defs to identify the optimization
>>>>>>>> +            candidates.*/
>>>>>>>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
>>>>>>>> +         df_ref mention;
>>>>>>>> +
>>>>>>>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
>>>>>>>> +           {
>>>>>>>> +             insn_entry[uid].is_relevant = 1;
>>>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>>>>>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
>>>>>>>> +           }
>>>>>>>> +
>>>>>>>> +         if (insn_entry[uid].is_relevant)
>>>>>>>> +           {
>>>>>>>> +             /* Determine if this is a store.  */
>>>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>>>>>> +           }
>>>>>>>> +       }
>>>>>>>> +     }
>>>>>>>> +
>>>>>>>> +   unsigned e = get_max_uid (), i;
>>>>>>>> +
>>>>>>>> +   int store_index = -1;
>>>>>>>> +
>>>>>>>> +   /* Replace with copy operation.*/
>>>>>>>> +   for (i = 0; i < e; ++i)
>>>>>>>> +     {
>>>>>>>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
>>>>>>>> +        store_index  = i;
>>>>>>>> +
>>>>>>>> +       if ((store_index != -1)
>>>>>>>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
>>>>>>>> +         {
>>>>>>>> +           replace_marked_insns (insn_entry, store_index);
>>>>>>>> +           replace_marked_insns (insn_entry, i);
>>>>>>>> +         }
>>>>>>>> +     }
>>>>>>>> +    /* Clean up.  */
>>>>>>>> +    free (insn_entry);
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +const pass_data pass_data_analyze_zext =
>>>>>>>> +{
>>>>>>>> +  RTL_PASS, /* type */
>>>>>>>> +  "zext", /* name */
>>>>>>>> +  OPTGROUP_NONE, /* optinfo_flags */
>>>>>>>> +  TV_NONE, /* tv_id */
>>>>>>>> +  0, /* properties_required */
>>>>>>>> +  0, /* properties_provided */
>>>>>>>> +  0, /* properties_destroyed */
>>>>>>>> +  0, /* todo_flags_start */
>>>>>>>> +  TODO_df_finish, /* todo_flags_finish */
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +class pass_analyze_zext : public rtl_opt_pass
>>>>>>>> +{
>>>>>>>> +public:
>>>>>>>> +  pass_analyze_zext(gcc::context *ctxt)
>>>>>>>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
>>>>>>>> +  {}
>>>>>>>> +
>>>>>>>> +  /* opt_pass methods: */
>>>>>>>> +  virtual bool gate (function *)
>>>>>>>> +    {
>>>>>>>> +      return (optimize > 0 );
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +  virtual unsigned int execute (function *fun)
>>>>>>>> +    {
>>>>>>>> +      return rs6000_analyze_zext (fun);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +  opt_pass *clone ()
>>>>>>>> +    {
>>>>>>>> +      return new pass_analyze_zext (m_ctxt);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +}; // class pass_analyze_zext
>>>>>>>> +
>>>>>>>> +rtl_opt_pass *
>>>>>>>> +make_pass_analyze_zext (gcc::context *ctxt)
>>>>>>>> +{
>>>>>>>> +  return new pass_analyze_zext (ctxt);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>>>>>>> index 8e0b0d022db..6541334bf2d 100644
>>>>>>>> --- a/gcc/config/rs6000/rs6000.cc
>>>>>>>> +++ b/gcc/config/rs6000/rs6000.cc
>>>>>>>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
>>>>>>>>                                           bool);
>>>>>>>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>>>>>>>>
>>>>>>>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
>>>>>>>> +
>>>>>>>>  /* Hash table stuff for keeping track of TOC entries.  */
>>>>>>>>
>>>>>>>>  struct GTY((for_user)) toc_hash_struct
>>>>>>>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
>>>>>>>> index f183b42ce1d..c1f61591d2f 100644
>>>>>>>> --- a/gcc/config/rs6000/t-rs6000
>>>>>>>> +++ b/gcc/config/rs6000/t-rs6000
>>>>>>>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
>>>>>>>>         $(COMPILE) $<
>>>>>>>>         $(POSTCOMPILE)
>>>>>>>>
>>>>>>>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
>>>>>>>> +       $(COMPILE) $<
>>>>>>>> +       $(POSTCOMPILE)
>>>>>>>> +
>>>>>>>> +
>>>>>>>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
>>>>>>>>         $(COMPILE) $<
>>>>>>>>         $(POSTCOMPILE)
>>>>>>>> diff --git a/gcc/explow.cc b/gcc/explow.cc
>>>>>>>> index 32e9498ee07..316aa975e40 100644
>>>>>>>> --- a/gcc/explow.cc
>>>>>>>> +++ b/gcc/explow.cc
>>>>>>>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
>>>>>>>>    if (! general_operand (x, VOIDmode))
>>>>>>>>      x = force_operand (x, temp);
>>>>>>>>
>>>>>>>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
>>>>>>>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
>>>>>>>> +              || GET_MODE (x) == VOIDmode);
>>>>>>>>    if (x != temp)
>>>>>>>>      emit_move_insn (temp, x);
>>>>>>>>    return temp;
>>>>>>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>>>>>>>> index 15be1c8db99..6162ef92b88 100644
>>>>>>>> --- a/gcc/expr.cc
>>>>>>>> +++ b/gcc/expr.cc
>>>>>>>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
>>>>>>>>    rtx y_cst = NULL_RTX;
>>>>>>>>    rtx_insn *last_insn;
>>>>>>>>    rtx set;
>>>>>>>> -
>>>>>>>>    gcc_assert (mode != BLKmode
>>>>>>>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
>>>>>>>> +             && (mode == DImode || GET_MODE (y) == mode
>>>>>>>> +             || GET_MODE (y) == VOIDmode));
>>>>>>>>
>>>>>>>>    /* If we have a copy that looks like one of the following patterns:
>>>>>>>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
>>>>>>>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
>>>>>>>> index 4c641cab192..9d22fadc7ef 100644
>>>>>>>> --- a/gcc/optabs.cc
>>>>>>>> +++ b/gcc/optabs.cc
>>>>>>>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
>>>>>>>>      input:
>>>>>>>>        gcc_assert (mode != VOIDmode);
>>>>>>>>        gcc_assert (GET_MODE (op->value) == VOIDmode
>>>>>>>> -                 || GET_MODE (op->value) == mode);
>>>>>>>> +                 || GET_MODE (op->value) == mode
>>>>>>>> +                 || mode == DImode);
>>>>>>>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
>>>>>>>>         return true;
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.31.1
>>>>>>>>
Richard Biener March 16, 2023, 10:56 a.m. UTC | #9
On Thu, Mar 16, 2023 at 11:43 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
>
>
> On 16/03/23 4:00 pm, Richard Biener wrote:
> > On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> >>
> >>
> >> Hello Richard:
> >>
> >> On 16/03/23 3:22 pm, Richard Biener wrote:
> >>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 16/03/23 1:44 pm, Richard Biener wrote:
> >>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> >>>>>>
> >>>>>> Hello Richard:
> >>>>>>
> >>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
> >>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
> >>>>>>> <gcc-patches@gcc.gnu.org> wrote:
> >>>>>>>>
> >>>>>>>> Hello All:
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
> >>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
> >>>>>>>
> >>>>>>> What makes this so special that we cannot deal with it from generic code?
> >>>>>>> In particular we do have the REE pass, why is target specific
> >>>>>>> knowledge neccessary
> >>>>>>> to eliminate the extension?
> >>>>>>>
> >>>>>>
> >>>>>> For returning bool values and comparision with integers generates the following by all the rtl passes.
> >>>>>>
> >>>>>> set compare (subreg)
> >>>>>> set if_then_else
> >>>>>> Convert SImode -> QImode
> >>>>>> set zero_extend to SImode from QImode
> >>>>>> set return value 0 in one path of cfg.
> >>>>>> set return value 1 in other path of cfg.
> >>>>>>
> >>>>>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
> >>>>>
> >>>>> Sorry, I can't parse that - as there's no testcase with the patch I
> >>>>> cannot even try to see what the actual RTL
> >>>>> looks like (without the pass).
> >>>>>
> >>>>
> >>>> Here is the PR with bugzilla.
> >>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
> >>>>
> >>>> I can add the attached testcase with this PR in the patch.
> >>>
> >>> I don't see any zero-extends there.
> >>>
> >>
> >> Here is the testcase.
> >>
> >>
> >> bool (int a, int b)
> >> {
> >>           if (a > 2)
> >>                       return false;
> >>            if (b < 10)
> >>                        return true;
> >>              return false;
> >> }
> >>
> >> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
> >>
> >> Here is the rtl after cse.
> >> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> >> (insn 15 12 16 3 (set (reg:CC 123)
> >>         (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
> >>             (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
> >>      (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
> >>         (nil)))
> >> (insn 16 15 17 3 (set (reg:SI 124)
> >>         (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
> >>      (nil))
> >> (insn 17 16 18 3 (set (reg:SI 122)
> >>         (if_then_else:SI (gt (reg:CC 123)
> >>                 (const_int 0 [0]))
> >>             (const_int 0 [0])
> >>             (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
> >>      (expr_list:REG_DEAD (reg:SI 124)
> >>         (expr_list:REG_DEAD (reg:CC 123)
> >>             (nil))))
> >> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
> >>         (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
> >>      (expr_list:REG_DEAD (reg:SI 122)
> >>         (nil)))
> >>       ; pc falls through to BB 5
> >> (code_label 32 18 31 4 3 (nil) [1 uses])
> >> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> >> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
> >>         (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
> >>      (nil))
> >> (code_label 19 5 20 5 2 (nil) [0 uses])
> >> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> >> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
> >>         (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
> >>      (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
> >>         (nil)))
> >> (insn 22 21 26 5 (set (reg:DI 118 [ <retval> ])
> >>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
> >>      (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
> >>         (nil)))
> >> (insn 26 22 27 5 (set (reg/i:DI 3 3)
> >>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
> >>      (expr_list:REG_DEAD (reg:DI 118 [ <retval> ])
> >>         (nil)))
> >> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
> >>      (nil))
> >
> > But after combine there's just
> >
> > (note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
> > (insn 38 6 2 2 (set (reg:DI 126)
> >         (reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
> >      (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
> >         (nil)))
> > (note 2 38 39 2 NOTE_INSN_DELETED)
> > (insn 39 2 3 2 (set (reg:DI 127)
> >         (reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
> >      (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
> >         (nil)))
> > (insn 3 39 4 2 (set (reg/v:DI 119 [ b ])
> >         (reg:DI 127)) "t.c":3:1 634 {*movdi_internal64}
> >      (expr_list:REG_DEAD (reg:DI 127)
> >         (nil)))
> > (note 4 3 10 2 NOTE_INSN_FUNCTION_BEG)
> > (insn 10 4 11 2 (set (reg:CC 120)
> >         (compare:CC (subreg/s/u:SI (reg:DI 126) 0)
> >             (const_int 2 [0x2]))) "t.c":4:6 755 {*cmpsi_signed}
> >      (expr_list:REG_DEAD (reg:DI 126)
> >         (nil)))
> > (jump_insn 11 10 12 2 (set (pc)
> >         (if_then_else (gt (reg:CC 120)
> >                 (const_int 0 [0]))
> >             (label_ref:DI 32)
> >             (pc))) "t.c":4:6 838 {*cbranch}
> >      (expr_list:REG_DEAD (reg:CC 120)
> >         (int_list:REG_BR_PROB 365072228 (nil)))
> >  -> 32)
> > (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> > (note 15 12 16 3 NOTE_INSN_DELETED)
> > (note 16 15 17 3 NOTE_INSN_DELETED)
> > (note 17 16 19 3 NOTE_INSN_DELETED)
> > (insn 19 17 32 3 (parallel [
> >             (set (reg:DI 117 [ <retval> ])
> >                 (le:DI (subreg/s/u:SI (reg/v:DI 119 [ b ]) 0)
> >                     (const_int 9 [0x9])))
> >             (clobber (scratch:DI))
> >             (clobber (scratch:DI))
> >             (clobber (scratch:CC))
> >         ]) "t.c":6:6 783 {ledisi2_isel}
> >      (expr_list:REG_DEAD (reg/v:DI 119 [ b ])
> >         (nil)))
> >       ; pc falls through to BB 5
> > (code_label 32 19 31 4 3 (nil) [1 uses])
> > (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> > (insn 5 31 20 4 (set (reg:DI 117 [ <retval> ])
> >         (const_int 0 [0])) "t.c":5:12 634 {*movdi_internal64}
> >      (nil))
> > (code_label 20 5 21 5 2 (nil) [0 uses])
> > (note 21 20 26 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> > (insn 26 21 27 5 (set (reg/i:DI 3 3)
> >         (reg:DI 117 [ <retval> ])) "t.c":9:1 634 {*movdi_internal64}
> >      (expr_list:REG_DEAD (reg:DI 117 [ <retval> ])
> >         (nil)))
> > (insn 27 26 0 5 (use (reg/i:DI 3 3)) "t.c":9:1 -1
> >      (nil))
> >
> > and we get
> >
> > foo:
> > .LFB0:
> >         .cfi_startproc
> >         cmpwi 0,3,2
> >         bgt 0,.L3
> >         cmpwi 0,4,9
> >         li 3,1
> >         isel 3,0,3,1
> >         blr
> >         .p2align 4,,15
> > .L3:
> >         li 3,0
> >         blr
> >
> > where I don't see what we can do better (ok, not knowing ppc very much)
> >
>
> After combine I get the following:
>
> (insn 10 4 11 2 (set (reg:CC 121)
>         (compare:CC (subreg/s/u:SI (reg:DI 127) 0)
>             (const_int 2 [0x2]))) "ext.cc":3:4 796 {*cmpsi_signed}
>      (expr_list:REG_DEAD (reg:DI 127)
>         (nil)))
> (jump_insn 11 10 12 2 (set (pc)
>         (if_then_else (gt (reg:CC 121)
>                 (const_int 0 [0]))
>             (label_ref:DI 32)
>             (pc))) "ext.cc":3:4 879 {*cbranch}
>      (expr_list:REG_DEAD (reg:CC 121)
>         (int_list:REG_BR_PROB 365072228 (nil)))
>  -> 32)
> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> (note 15 12 16 3 NOTE_INSN_DELETED)
> (note 16 15 17 3 NOTE_INSN_DELETED)
> (insn 17 16 18 3 (parallel [
>             (set (reg:SI 122)
>                 (le:SI (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>                     (const_int 9 [0x9])))
>             (clobber (scratch:SI))
>             (clobber (scratch:SI))
>             (clobber (scratch:CC))
>         ]) "ext.cc":5:5 814 {lesisi2_isel}
>      (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>         (nil)))
> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>         (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>      (expr_list:REG_DEAD (reg:SI 122)
>         (nil)))
>       ; pc falls through to BB 5
> (code_label 32 18 31 4 3 (nil) [1 uses])
> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>         (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>      (nil))
> (code_label 19 5 20 5 2 (nil) [0 uses])
> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> (note 21 20 26 5 NOTE_INSN_DELETED)
> (insn 26 21 27 5 (set (reg/i:DI 3 %r3)
>         (and:DI (subreg:DI (reg:QI 117 [ _1 ]) 0)
>             (const_int 1 [0x1]))) "ext.cc":8:1 207 {anddi3_mask}
>      (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>         (nil)))
> (insn 27 26 0 5 (use (reg/i:DI 3 %r3)) "ext.cc":8:1 -1
>      (nil))
>
> and here is the assembly:
>
>
>         .file   "ext.cc"
>         .machine power9
>         .abiversion 2
>         .section        ".text"
>         .align 2
>         .p2align 4,,15
>         .globl _Z3fooii
>         .type   _Z3fooii, @function
> _Z3fooii:
> .LFB0:
>         .cfi_startproc
>         cmpwi %cr0,%r3,2
>         bgt %cr0,.L3
>         cmpwi %cr0,%r4,9
>         li %r3,1
>         isel %r3,0,%r3,1
>         rldicl %r3,%r3,0,63
>         blr
>         .p2align 4,,15
> .L3:
>         li %r3,0
>         rldicl %r3,%r3,0,63
>         blr
>         .long 0
>         .byte 0,9,0,0,0,0,0,0
>         .cfi_endproc
> .LFE0:
>         .size   _Z3fooii,.-_Z3fooii
>         .ident  "GCC: (GNU) 13.0.1 20230310 (experimental)"
>         .section        .note.GNU-stack,"",@progbits
>
> Did you try with -O3.

Yes (but the tree I tried from is somewhat old it seems - sorry for
that).  But it
means it at least worked at some point?

Richard.

> Thanks & Regards
> Ajit
> >>
> >> Thanks & Regards
> >> Ajit
> >>
> >>>> Thanks & Regards
> >>>> Ajit
> >>>>> Richard.
> >>>>>
> >>>>>> Thanks & Regards
> >>>>>> Ajit
> >>>>>>>> +  In cfgexpand pass QImode is generated with
> >>>>>>>> +  bool register value and this pass uses QI
> >>>>>>>> +  as 64 bit registers.
> >>>>>>>> +
> >>>>>>
> >>>>>>>>         rs6000: suboptimal code for returning bool value on target ppc.
> >>>>>>>>
> >>>>>>>>         New pass to eliminate unnecessary zero extension. This pass
> >>>>>>>>         is registered after cse rtl pass.
> >>>>>>>>
> >>>>>>>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> >>>>>>>>
> >>>>>>>> gcc/ChangeLog:
> >>>>>>>>
> >>>>>>>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
> >>>>>>>>         pass.
> >>>>>>>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
> >>>>>>>>         * config.gcc: Add new executable.
> >>>>>>>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
> >>>>>>>>         elimination pass.
> >>>>>>>>         * config/rs6000/rs6000.cc: Add new prototype for zero
> >>>>>>>>         elimination pass.
> >>>>>>>>         * config/rs6000/t-rs6000: Add new rule.
> >>>>>>>>         * expr.cc: Modified gcc assert.
> >>>>>>>>         * explow.cc: Modified gcc assert.
> >>>>>>>>         * optabs.cc: Modified gcc assert.
> >>>>>>>> ---
> >>>>>>>>  gcc/config.gcc                        |   4 +-
> >>>>>>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
> >>>>>>>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
> >>>>>>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
> >>>>>>>>  gcc/config/rs6000/rs6000.cc           |   2 +
> >>>>>>>>  gcc/config/rs6000/t-rs6000            |   5 +
> >>>>>>>>  gcc/explow.cc                         |   3 +-
> >>>>>>>>  gcc/expr.cc                           |   4 +-
> >>>>>>>>  gcc/optabs.cc                         |   3 +-
> >>>>>>>>  9 files changed, 379 insertions(+), 6 deletions(-)
> >>>>>>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> >>>>>>>>
> >>>>>>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
> >>>>>>>> index da3a6d3ba1f..e8ac9d882f0 100644
> >>>>>>>> --- a/gcc/config.gcc
> >>>>>>>> +++ b/gcc/config.gcc
> >>>>>>>> @@ -503,7 +503,7 @@ or1k*-*-*)
> >>>>>>>>         ;;
> >>>>>>>>  powerpc*-*-*)
> >>>>>>>>         cpu_type=rs6000
> >>>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >>>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>>>>>>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
> >>>>>>>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> >>>>>>>> @@ -538,7 +538,7 @@ riscv*)
> >>>>>>>>         ;;
> >>>>>>>>  rs6000*-*-*)
> >>>>>>>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> >>>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> >>>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
> >>>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
> >>>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
> >>>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> >>>>>>>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
> >>>>>>>> index ca899d5f7af..d7500feddf1 100644
> >>>>>>>> --- a/gcc/config/rs6000/rs6000-passes.def
> >>>>>>>> +++ b/gcc/config/rs6000/rs6000-passes.def
> >>>>>>>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
> >>>>>>>>       The power8 does not have instructions that automaticaly do the byte swaps
> >>>>>>>>       for loads and stores.  */
> >>>>>>>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> >>>>>>>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> >>>>>>>> +
> >>>>>>>>
> >>>>>>>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
> >>>>>>>>       external symbol's address along with a single load or store using that
> >>>>>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> >>>>>>>> index 1a4fc1df668..f6cf2d673d4 100644
> >>>>>>>> --- a/gcc/config/rs6000/rs6000-protos.h
> >>>>>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
> >>>>>>>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
> >>>>>>>>  class rtl_opt_pass;
> >>>>>>>>
> >>>>>>>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> >>>>>>>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
> >>>>>>>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
> >>>>>>>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
> >>>>>>>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> >>>>>>>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
> >>>>>>>> new file mode 100644
> >>>>>>>> index 00000000000..777c7a5a387
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> >>>>>>>> @@ -0,0 +1,361 @@
> >>>>>>>> +/* Subroutine to eliminate redundant zero extend for power architecture.
> >>>>>>>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> >>>>>>>> +
> >>>>>>>> +   This file is part of GCC.
> >>>>>>>> +
> >>>>>>>> +   GCC is free software; you can redistribute it and/or modify it
> >>>>>>>> +   under the terms of the GNU General Public License as published
> >>>>>>>> +   by the Free Software Foundation; either version 3, or (at your
> >>>>>>>> +   option) any later version.
> >>>>>>>> +
> >>>>>>>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> >>>>>>>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> >>>>>>>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> >>>>>>>> +   License for more details.
> >>>>>>>> +
> >>>>>>>> +   You should have received a copy of the GNU General Public License
> >>>>>>>> +   along with GCC; see the file COPYING3.  If not see
> >>>>>>>> +   <http://www.gnu.org/licenses/>.  */
> >>>>>>>> +
> >>>>>>>> +/* This pass remove unnecessary zero extension instruction from
> >>>>>>>> +  power generated assembly. This pass is register after cse
> >>>>>>>> +  pass.
> >>>>>>>> +  Identifies the following sequence of instruction after cse
> >>>>>>>> +  rtl pass.
> >>>>>>>> +
> >>>>>>>> +  set compare (subreg)
> >>>>>>>> +  set if_then_else
> >>>>>>>> +  set SImode -> QImode
> >>>>>>>> +  set zero_extend to DImode from QImode
> >>>>>>>> +  set return value 0 in one path of cfg.
> >>>>>>>> +  set return value 1 in other path of cfg.
> >>>>>>>> +
> >>>>>>>> +  In cfgexpand pass QImode is generated with
> >>>>>>>> +  bool register value and this pass uses QI
> >>>>>>>> +  as 64 bit registers.
> >>>>>>>> +
> >>>>>>>> +  This pass replace copy operation from QImode to DImode
> >>>>>>>> +  and return appropriate return values.*/
> >>>>>>>> +
> >>>>>>>> +#define IN_TARGET_CODE 1
> >>>>>>>> +
> >>>>>>>> +#include "config.h"
> >>>>>>>> +#include "system.h"
> >>>>>>>> +#include "coretypes.h"
> >>>>>>>> +#include "backend.h"
> >>>>>>>> +#include "rtl.h"
> >>>>>>>> +#include "tree.h"
> >>>>>>>> +#include "memmodel.h"
> >>>>>>>> +#include "df.h"
> >>>>>>>> +#include "tm_p.h"
> >>>>>>>> +#include "ira.h"
> >>>>>>>> +#include "print-tree.h"
> >>>>>>>> +#include "varasm.h"
> >>>>>>>> +#include "explow.h"
> >>>>>>>> +#include "expr.h"
> >>>>>>>> +#include "output.h"
> >>>>>>>> +#include "tree-pass.h"
> >>>>>>>> +
> >>>>>>>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
> >>>>>>>> +   defined in df.h.  */
> >>>>>>>> +class zext_web_entry : public web_entry_base
> >>>>>>>> +{
> >>>>>>>> + public:
> >>>>>>>> +  /* Pointer to the insn.  */
> >>>>>>>> +  rtx_insn *insn;
> >>>>>>>> +  unsigned int is_relevant : 1;
> >>>>>>>> +  /* Set if insn is a load.  */
> >>>>>>>> +  unsigned int is_load : 1;
> >>>>>>>> +  /* Set if insn is a store.  */
> >>>>>>>> +  unsigned int is_store : 1;
> >>>>>>>> +  unsigned int is_zext :1 ;
> >>>>>>>> +  unsigned int is_move :1;
> >>>>>>>> +  unsigned int is_delete_move :1;
> >>>>>>>> +  /* Set if this insn should be deleted.  */
> >>>>>>>> +  unsigned int will_delete : 1;
> >>>>>>>> +  unsigned int will_delete_chances : 1;
> >>>>>>>> +};
> >>>>>>>> +
> >>>>>>>> +/* Checks if instruction is zero extension
> >>>>>>>> + * with QIMode to DImode.*/
> >>>>>>>> +static unsigned int
> >>>>>>>> +insn_is_zext_p(rtx insn)
> >>>>>>>> +{
> >>>>>>>> +  rtx body = PATTERN (insn);
> >>>>>>>> +
> >>>>>>>> +  if (GET_CODE (body) == SET
> >>>>>>>> +      && GET_MODE(SET_DEST (body)) == DImode
> >>>>>>>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
> >>>>>>>> +  {
> >>>>>>>> +    rtx set = XEXP (SET_SRC (body), 0);
> >>>>>>>> +
> >>>>>>>> +    if (REG_P (set))
> >>>>>>>> +    {
> >>>>>>>> +      if (GET_MODE (set) == QImode) return 1;
> >>>>>>>> +    }
> >>>>>>>> +    else
> >>>>>>>> +      return 0;
> >>>>>>>> +  }
> >>>>>>>> +  return 0;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +/* Checks if instruction is SET operation with QImode.*/
> >>>>>>>> +static unsigned int
> >>>>>>>> +insn_is_store_p (rtx insn)
> >>>>>>>> +{
> >>>>>>>> +  rtx body = PATTERN (insn);
> >>>>>>>> +  if (GET_CODE (body) == SET
> >>>>>>>> +      && SUBREG_P(SET_SRC (body))
> >>>>>>>> +      && !CONST_INT_P(SET_SRC (body))
> >>>>>>>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
> >>>>>>>> +      && GET_MODE(SET_SRC (body)) == QImode)
> >>>>>>>> +    return 1;
> >>>>>>>> +
> >>>>>>>> +  return 0;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +/* Find out zero extension removal candidate with use-def web.*/
> >>>>>>>> +static void
> >>>>>>>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
> >>>>>>>> +                                    rtx insn, df_ref def)
> >>>>>>>> +{
> >>>>>>>> +  struct df_link *link = DF_REF_CHAIN (def);
> >>>>>>>> +
> >>>>>>>> +  rtx move_insn = NULL_RTX;
> >>>>>>>> +  rtx compare_insn = NULL_RTX;
> >>>>>>>> +
> >>>>>>>> +  while (link)
> >>>>>>>> +  {
> >>>>>>>> +    if (!DF_REF_INSN_INFO (link->ref))
> >>>>>>>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
> >>>>>>>> +
> >>>>>>>> +    if (DF_REF_INSN_INFO (link->ref))
> >>>>>>>> +      {
> >>>>>>>> +       rtx use_insn = DF_REF_INSN (link->ref);
> >>>>>>>> +
> >>>>>>>> +       if (GET_CODE (PATTERN (use_insn)) == SET
> >>>>>>>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
> >>>>>>>> +         {
> >>>>>>>> +           if (GET_CODE (PATTERN (insn)) == SET
> >>>>>>>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
> >>>>>>>> +             {
> >>>>>>>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
> >>>>>>>> +
> >>>>>>>> +               if (SUBREG_P (body))
> >>>>>>>> +                 {
> >>>>>>>> +                   compare_insn = use_insn;
> >>>>>>>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
> >>>>>>>> +
> >>>>>>>> +                   if (compare_insn
> >>>>>>>> +                       && ((REGNO (XEXP (compare_body, 0)))
> >>>>>>>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
> >>>>>>>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >>>>>>>> +                 }
> >>>>>>>> +              }
> >>>>>>>> +           }
> >>>>>>>> +
> >>>>>>>> +       if (insn_is_store_p(use_insn)
> >>>>>>>> +           && GET_CODE (PATTERN (insn)) == SET
> >>>>>>>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
> >>>>>>>> +         {
> >>>>>>>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
> >>>>>>>> +             {
> >>>>>>>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
> >>>>>>>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> >>>>>>>> +             }
> >>>>>>>> +         }
> >>>>>>>> +
> >>>>>>>> +       if (insn_is_zext_p (insn))
> >>>>>>>> +         {
> >>>>>>>> +           if (GET_CODE (PATTERN (use_insn)) == SET
> >>>>>>>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
> >>>>>>>> +             {
> >>>>>>>> +               if (move_insn
> >>>>>>>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
> >>>>>>>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
> >>>>>>>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
> >>>>>>>> +                 {
> >>>>>>>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
> >>>>>>>> +                   break;
> >>>>>>>> +                 }
> >>>>>>>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
> >>>>>>>> +                   {
> >>>>>>>> +                     move_insn = use_insn;
> >>>>>>>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
> >>>>>>>> +                   }
> >>>>>>>> +             }
> >>>>>>>> +         }
> >>>>>>>> +
> >>>>>>>> +       if (insn_is_zext_p (use_insn))
> >>>>>>>> +         {
> >>>>>>>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
> >>>>>>>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
> >>>>>>>> +
> >>>>>>>> +           if (insn_is_store_p (insn)
> >>>>>>>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
> >>>>>>>> +           {
> >>>>>>>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
> >>>>>>>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
> >>>>>>>> +             insn_entry[INSN_UID( insn)].is_store = 1;
> >>>>>>>> +           }
> >>>>>>>> +
> >>>>>>>> +          if (NONDEBUG_INSN_P (use_insn))
> >>>>>>>> +            unionfind_union (insn_entry + INSN_UID (insn),
> >>>>>>>> +                             insn_entry + INSN_UID (use_insn));
> >>>>>>>> +       }
> >>>>>>>> +      }
> >>>>>>>> +
> >>>>>>>> +    link = link->next;
> >>>>>>>> +  }
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +/* Replace QImode extensions with copy operations.*/
> >>>>>>>> +static void
> >>>>>>>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
> >>>>>>>> +{
> >>>>>>>> +  rtx_insn *insn = insn_entry[i].insn;
> >>>>>>>> +  rtx body = PATTERN (insn);
> >>>>>>>> +  rtx src_reg;
> >>>>>>>> +  src_reg = XEXP (SET_SRC (body), 0);
> >>>>>>>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
> >>>>>>>> +
> >>>>>>>> +  if (GET_MODE(SET_DEST(body)) != DImode)
> >>>>>>>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
> >>>>>>>> +
> >>>>>>>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
> >>>>>>>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
> >>>>>>>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
> >>>>>>>> +  df_insn_rescan (new_insn);
> >>>>>>>> +
> >>>>>>>> +  df_insn_delete (insn);
> >>>>>>>> +  remove_insn (insn);
> >>>>>>>> +  insn->set_deleted ();
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +/* Main entry point for this pass.  */
> >>>>>>>> +unsigned int
> >>>>>>>> +rs6000_analyze_zext (function *fun)
> >>>>>>>> +{
> >>>>>>>> +  zext_web_entry *insn_entry;
> >>>>>>>> +  basic_block bb;
> >>>>>>>> +  rtx_insn *insn, *curr_insn = 0;
> >>>>>>>> +
> >>>>>>>> +  /* Dataflow analysis for use-def chains.  */
> >>>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >>>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >>>>>>>> +  df_analyze ();
> >>>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >>>>>>>> +
> >>>>>>>> +  /* Rebuild ud- and du-chains.  */
> >>>>>>>> +  df_remove_problem (df_chain);
> >>>>>>>> +  df_process_deferred_rescans ();
> >>>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> >>>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> >>>>>>>> +  df_analyze ();
> >>>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> >>>>>>>> +
> >>>>>>>> +  /* Allocate structure to represent webs of insns.  */
> >>>>>>>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
> >>>>>>>> +
> >>>>>>>> +  /* Walk the insns to gather basic data.  */
> >>>>>>>> +  FOR_ALL_BB_FN (bb, fun)
> >>>>>>>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> >>>>>>>> +    {
> >>>>>>>> +      unsigned int uid = INSN_UID (insn);
> >>>>>>>> +      if (NONDEBUG_INSN_P (insn))
> >>>>>>>> +       {
> >>>>>>>> +         insn_entry[uid].insn = insn;
> >>>>>>>> +
> >>>>>>>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
> >>>>>>>> +           {
> >>>>>>>> +             insn_entry[uid].is_store = 1;
> >>>>>>>> +             insn_entry[uid].is_relevant = 1;
> >>>>>>>> +           }
> >>>>>>>> +
> >>>>>>>> +         /* Walk the uses and defs to identify the optimization
> >>>>>>>> +            candidates.*/
> >>>>>>>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> >>>>>>>> +         df_ref mention;
> >>>>>>>> +
> >>>>>>>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
> >>>>>>>> +           {
> >>>>>>>> +             insn_entry[uid].is_relevant = 1;
> >>>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >>>>>>>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
> >>>>>>>> +           }
> >>>>>>>> +
> >>>>>>>> +         if (insn_entry[uid].is_relevant)
> >>>>>>>> +           {
> >>>>>>>> +             /* Determine if this is a store.  */
> >>>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
> >>>>>>>> +           }
> >>>>>>>> +       }
> >>>>>>>> +     }
> >>>>>>>> +
> >>>>>>>> +   unsigned e = get_max_uid (), i;
> >>>>>>>> +
> >>>>>>>> +   int store_index = -1;
> >>>>>>>> +
> >>>>>>>> +   /* Replace with copy operation.*/
> >>>>>>>> +   for (i = 0; i < e; ++i)
> >>>>>>>> +     {
> >>>>>>>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
> >>>>>>>> +        store_index  = i;
> >>>>>>>> +
> >>>>>>>> +       if ((store_index != -1)
> >>>>>>>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
> >>>>>>>> +         {
> >>>>>>>> +           replace_marked_insns (insn_entry, store_index);
> >>>>>>>> +           replace_marked_insns (insn_entry, i);
> >>>>>>>> +         }
> >>>>>>>> +     }
> >>>>>>>> +    /* Clean up.  */
> >>>>>>>> +    free (insn_entry);
> >>>>>>>> +
> >>>>>>>> +    return 0;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +const pass_data pass_data_analyze_zext =
> >>>>>>>> +{
> >>>>>>>> +  RTL_PASS, /* type */
> >>>>>>>> +  "zext", /* name */
> >>>>>>>> +  OPTGROUP_NONE, /* optinfo_flags */
> >>>>>>>> +  TV_NONE, /* tv_id */
> >>>>>>>> +  0, /* properties_required */
> >>>>>>>> +  0, /* properties_provided */
> >>>>>>>> +  0, /* properties_destroyed */
> >>>>>>>> +  0, /* todo_flags_start */
> >>>>>>>> +  TODO_df_finish, /* todo_flags_finish */
> >>>>>>>> +};
> >>>>>>>> +
> >>>>>>>> +class pass_analyze_zext : public rtl_opt_pass
> >>>>>>>> +{
> >>>>>>>> +public:
> >>>>>>>> +  pass_analyze_zext(gcc::context *ctxt)
> >>>>>>>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
> >>>>>>>> +  {}
> >>>>>>>> +
> >>>>>>>> +  /* opt_pass methods: */
> >>>>>>>> +  virtual bool gate (function *)
> >>>>>>>> +    {
> >>>>>>>> +      return (optimize > 0 );
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +  virtual unsigned int execute (function *fun)
> >>>>>>>> +    {
> >>>>>>>> +      return rs6000_analyze_zext (fun);
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +  opt_pass *clone ()
> >>>>>>>> +    {
> >>>>>>>> +      return new pass_analyze_zext (m_ctxt);
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +}; // class pass_analyze_zext
> >>>>>>>> +
> >>>>>>>> +rtl_opt_pass *
> >>>>>>>> +make_pass_analyze_zext (gcc::context *ctxt)
> >>>>>>>> +{
> >>>>>>>> +  return new pass_analyze_zext (ctxt);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> >>>>>>>> index 8e0b0d022db..6541334bf2d 100644
> >>>>>>>> --- a/gcc/config/rs6000/rs6000.cc
> >>>>>>>> +++ b/gcc/config/rs6000/rs6000.cc
> >>>>>>>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
> >>>>>>>>                                           bool);
> >>>>>>>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
> >>>>>>>>
> >>>>>>>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
> >>>>>>>> +
> >>>>>>>>  /* Hash table stuff for keeping track of TOC entries.  */
> >>>>>>>>
> >>>>>>>>  struct GTY((for_user)) toc_hash_struct
> >>>>>>>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
> >>>>>>>> index f183b42ce1d..c1f61591d2f 100644
> >>>>>>>> --- a/gcc/config/rs6000/t-rs6000
> >>>>>>>> +++ b/gcc/config/rs6000/t-rs6000
> >>>>>>>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
> >>>>>>>>         $(COMPILE) $<
> >>>>>>>>         $(POSTCOMPILE)
> >>>>>>>>
> >>>>>>>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
> >>>>>>>> +       $(COMPILE) $<
> >>>>>>>> +       $(POSTCOMPILE)
> >>>>>>>> +
> >>>>>>>> +
> >>>>>>>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
> >>>>>>>>         $(COMPILE) $<
> >>>>>>>>         $(POSTCOMPILE)
> >>>>>>>> diff --git a/gcc/explow.cc b/gcc/explow.cc
> >>>>>>>> index 32e9498ee07..316aa975e40 100644
> >>>>>>>> --- a/gcc/explow.cc
> >>>>>>>> +++ b/gcc/explow.cc
> >>>>>>>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
> >>>>>>>>    if (! general_operand (x, VOIDmode))
> >>>>>>>>      x = force_operand (x, temp);
> >>>>>>>>
> >>>>>>>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
> >>>>>>>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
> >>>>>>>> +              || GET_MODE (x) == VOIDmode);
> >>>>>>>>    if (x != temp)
> >>>>>>>>      emit_move_insn (temp, x);
> >>>>>>>>    return temp;
> >>>>>>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
> >>>>>>>> index 15be1c8db99..6162ef92b88 100644
> >>>>>>>> --- a/gcc/expr.cc
> >>>>>>>> +++ b/gcc/expr.cc
> >>>>>>>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
> >>>>>>>>    rtx y_cst = NULL_RTX;
> >>>>>>>>    rtx_insn *last_insn;
> >>>>>>>>    rtx set;
> >>>>>>>> -
> >>>>>>>>    gcc_assert (mode != BLKmode
> >>>>>>>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
> >>>>>>>> +             && (mode == DImode || GET_MODE (y) == mode
> >>>>>>>> +             || GET_MODE (y) == VOIDmode));
> >>>>>>>>
> >>>>>>>>    /* If we have a copy that looks like one of the following patterns:
> >>>>>>>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
> >>>>>>>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> >>>>>>>> index 4c641cab192..9d22fadc7ef 100644
> >>>>>>>> --- a/gcc/optabs.cc
> >>>>>>>> +++ b/gcc/optabs.cc
> >>>>>>>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
> >>>>>>>>      input:
> >>>>>>>>        gcc_assert (mode != VOIDmode);
> >>>>>>>>        gcc_assert (GET_MODE (op->value) == VOIDmode
> >>>>>>>> -                 || GET_MODE (op->value) == mode);
> >>>>>>>> +                 || GET_MODE (op->value) == mode
> >>>>>>>> +                 || mode == DImode);
> >>>>>>>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
> >>>>>>>>         return true;
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.31.1
> >>>>>>>>
Ajit Agarwal March 16, 2023, 11:43 a.m. UTC | #10
On 16/03/23 4:26 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 11:43 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>>
>>
>> On 16/03/23 4:00 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>
>>>>
>>>> Hello Richard:
>>>>
>>>> On 16/03/23 3:22 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>>>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>>>>>
>>>>>>>> Hello Richard:
>>>>>>>>
>>>>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>>>>> <gcc-patches@gcc.gnu.org> wrote:
>>>>>>>>>>
>>>>>>>>>> Hello All:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
>>>>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>>>>>
>>>>>>>>> What makes this so special that we cannot deal with it from generic code?
>>>>>>>>> In particular we do have the REE pass, why is target specific
>>>>>>>>> knowledge neccessary
>>>>>>>>> to eliminate the extension?
>>>>>>>>>
>>>>>>>>
>>>>>>>> For returning bool values and comparision with integers generates the following by all the rtl passes.
>>>>>>>>
>>>>>>>> set compare (subreg)
>>>>>>>> set if_then_else
>>>>>>>> Convert SImode -> QImode
>>>>>>>> set zero_extend to SImode from QImode
>>>>>>>> set return value 0 in one path of cfg.
>>>>>>>> set return value 1 in other path of cfg.
>>>>>>>>
>>>>>>>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
>>>>>>>
>>>>>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>>>>>> cannot even try to see what the actual RTL
>>>>>>> looks like (without the pass).
>>>>>>>
>>>>>>
>>>>>> Here is the PR with bugzilla.
>>>>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>>>>>
>>>>>> I can add the attached testcase with this PR in the patch.
>>>>>
>>>>> I don't see any zero-extends there.
>>>>>
>>>>
>>>> Here is the testcase.
>>>>
>>>>
>>>> bool (int a, int b)
>>>> {
>>>>           if (a > 2)
>>>>                       return false;
>>>>            if (b < 10)
>>>>                        return true;
>>>>              return false;
>>>> }
>>>>
>>>> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>>>>
>>>> Here is the rtl after cse.
>>>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>>>> (insn 15 12 16 3 (set (reg:CC 123)
>>>>         (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>>>>             (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>>>>      (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>>>>         (nil)))
>>>> (insn 16 15 17 3 (set (reg:SI 124)
>>>>         (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>>>>      (nil))
>>>> (insn 17 16 18 3 (set (reg:SI 122)
>>>>         (if_then_else:SI (gt (reg:CC 123)
>>>>                 (const_int 0 [0]))
>>>>             (const_int 0 [0])
>>>>             (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>>>>      (expr_list:REG_DEAD (reg:SI 124)
>>>>         (expr_list:REG_DEAD (reg:CC 123)
>>>>             (nil))))
>>>> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>>>>         (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>>>>      (expr_list:REG_DEAD (reg:SI 122)
>>>>         (nil)))
>>>>       ; pc falls through to BB 5
>>>> (code_label 32 18 31 4 3 (nil) [1 uses])
>>>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>>>> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>>>>         (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>>>>      (nil))
>>>> (code_label 19 5 20 5 2 (nil) [0 uses])
>>>> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>>>> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>>>>         (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
>>>>      (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>>>>         (nil)))
>>>> (insn 22 21 26 5 (set (reg:DI 118 [ <retval> ])
>>>>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>>>      (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
>>>>         (nil)))
>>>> (insn 26 22 27 5 (set (reg/i:DI 3 3)
>>>>         (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>>>      (expr_list:REG_DEAD (reg:DI 118 [ <retval> ])
>>>>         (nil)))
>>>> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
>>>>      (nil))
>>>
>>> But after combine there's just
>>>
>>> (note 6 0 38 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
>>> (insn 38 6 2 2 (set (reg:DI 126)
>>>         (reg:DI 3 3 [ a ])) "t.c":3:1 634 {*movdi_internal64}
>>>      (expr_list:REG_DEAD (reg:DI 3 3 [ a ])
>>>         (nil)))
>>> (note 2 38 39 2 NOTE_INSN_DELETED)
>>> (insn 39 2 3 2 (set (reg:DI 127)
>>>         (reg:DI 4 4 [ b ])) "t.c":3:1 634 {*movdi_internal64}
>>>      (expr_list:REG_DEAD (reg:DI 4 4 [ b ])
>>>         (nil)))
>>> (insn 3 39 4 2 (set (reg/v:DI 119 [ b ])
>>>         (reg:DI 127)) "t.c":3:1 634 {*movdi_internal64}
>>>      (expr_list:REG_DEAD (reg:DI 127)
>>>         (nil)))
>>> (note 4 3 10 2 NOTE_INSN_FUNCTION_BEG)
>>> (insn 10 4 11 2 (set (reg:CC 120)
>>>         (compare:CC (subreg/s/u:SI (reg:DI 126) 0)
>>>             (const_int 2 [0x2]))) "t.c":4:6 755 {*cmpsi_signed}
>>>      (expr_list:REG_DEAD (reg:DI 126)
>>>         (nil)))
>>> (jump_insn 11 10 12 2 (set (pc)
>>>         (if_then_else (gt (reg:CC 120)
>>>                 (const_int 0 [0]))
>>>             (label_ref:DI 32)
>>>             (pc))) "t.c":4:6 838 {*cbranch}
>>>      (expr_list:REG_DEAD (reg:CC 120)
>>>         (int_list:REG_BR_PROB 365072228 (nil)))
>>>  -> 32)
>>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>>> (note 15 12 16 3 NOTE_INSN_DELETED)
>>> (note 16 15 17 3 NOTE_INSN_DELETED)
>>> (note 17 16 19 3 NOTE_INSN_DELETED)
>>> (insn 19 17 32 3 (parallel [
>>>             (set (reg:DI 117 [ <retval> ])
>>>                 (le:DI (subreg/s/u:SI (reg/v:DI 119 [ b ]) 0)
>>>                     (const_int 9 [0x9])))
>>>             (clobber (scratch:DI))
>>>             (clobber (scratch:DI))
>>>             (clobber (scratch:CC))
>>>         ]) "t.c":6:6 783 {ledisi2_isel}
>>>      (expr_list:REG_DEAD (reg/v:DI 119 [ b ])
>>>         (nil)))
>>>       ; pc falls through to BB 5
>>> (code_label 32 19 31 4 3 (nil) [1 uses])
>>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>>> (insn 5 31 20 4 (set (reg:DI 117 [ <retval> ])
>>>         (const_int 0 [0])) "t.c":5:12 634 {*movdi_internal64}
>>>      (nil))
>>> (code_label 20 5 21 5 2 (nil) [0 uses])
>>> (note 21 20 26 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>>> (insn 26 21 27 5 (set (reg/i:DI 3 3)
>>>         (reg:DI 117 [ <retval> ])) "t.c":9:1 634 {*movdi_internal64}
>>>      (expr_list:REG_DEAD (reg:DI 117 [ <retval> ])
>>>         (nil)))
>>> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "t.c":9:1 -1
>>>      (nil))
>>>
>>> and we get
>>>
>>> foo:
>>> .LFB0:
>>>         .cfi_startproc
>>>         cmpwi 0,3,2
>>>         bgt 0,.L3
>>>         cmpwi 0,4,9
>>>         li 3,1
>>>         isel 3,0,3,1
>>>         blr
>>>         .p2align 4,,15
>>> .L3:
>>>         li 3,0
>>>         blr
>>>
>>> where I don't see what we can do better (ok, not knowing ppc very much)
>>>
>>
>> After combine I get the following:
>>
>> (insn 10 4 11 2 (set (reg:CC 121)
>>         (compare:CC (subreg/s/u:SI (reg:DI 127) 0)
>>             (const_int 2 [0x2]))) "ext.cc":3:4 796 {*cmpsi_signed}
>>      (expr_list:REG_DEAD (reg:DI 127)
>>         (nil)))
>> (jump_insn 11 10 12 2 (set (pc)
>>         (if_then_else (gt (reg:CC 121)
>>                 (const_int 0 [0]))
>>             (label_ref:DI 32)
>>             (pc))) "ext.cc":3:4 879 {*cbranch}
>>      (expr_list:REG_DEAD (reg:CC 121)
>>         (int_list:REG_BR_PROB 365072228 (nil)))
>>  -> 32)
>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>> (note 15 12 16 3 NOTE_INSN_DELETED)
>> (note 16 15 17 3 NOTE_INSN_DELETED)
>> (insn 17 16 18 3 (parallel [
>>             (set (reg:SI 122)
>>                 (le:SI (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>>                     (const_int 9 [0x9])))
>>             (clobber (scratch:SI))
>>             (clobber (scratch:SI))
>>             (clobber (scratch:CC))
>>         ]) "ext.cc":5:5 814 {lesisi2_isel}
>>      (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>>         (nil)))
>> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>>         (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>>      (expr_list:REG_DEAD (reg:SI 122)
>>         (nil)))
>>       ; pc falls through to BB 5
>> (code_label 32 18 31 4 3 (nil) [1 uses])
>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>>         (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>>      (nil))
>> (code_label 19 5 20 5 2 (nil) [0 uses])
>> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>> (note 21 20 26 5 NOTE_INSN_DELETED)
>> (insn 26 21 27 5 (set (reg/i:DI 3 %r3)
>>         (and:DI (subreg:DI (reg:QI 117 [ _1 ]) 0)
>>             (const_int 1 [0x1]))) "ext.cc":8:1 207 {anddi3_mask}
>>      (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>>         (nil)))
>> (insn 27 26 0 5 (use (reg/i:DI 3 %r3)) "ext.cc":8:1 -1
>>      (nil))
>>
>> and here is the assembly:
>>
>>
>>         .file   "ext.cc"
>>         .machine power9
>>         .abiversion 2
>>         .section        ".text"
>>         .align 2
>>         .p2align 4,,15
>>         .globl _Z3fooii
>>         .type   _Z3fooii, @function
>> _Z3fooii:
>> .LFB0:
>>         .cfi_startproc
>>         cmpwi %cr0,%r3,2
>>         bgt %cr0,.L3
>>         cmpwi %cr0,%r4,9
>>         li %r3,1
>>         isel %r3,0,%r3,1
>>         rldicl %r3,%r3,0,63
>>         blr
>>         .p2align 4,,15
>> .L3:
>>         li %r3,0
>>         rldicl %r3,%r3,0,63
>>         blr
>>         .long 0
>>         .byte 0,9,0,0,0,0,0,0
>>         .cfi_endproc
>> .LFE0:
>>         .size   _Z3fooii,.-_Z3fooii
>>         .ident  "GCC: (GNU) 13.0.1 20230310 (experimental)"
>>         .section        .note.GNU-stack,"",@progbits
>>
>> Did you try with -O3.
> 
> Yes (but the tree I tried from is somewhat old it seems - sorry for
> that).  But it
> means it at least worked at some point?

Yes in older version of gcc if_then_else/Conversion from SImode to DImode/zero_extend are in the same basic block (cfg) and hence dce pass remove them which is not the case of newer version of gcc.

Thanks & Regards
Ajit
> 
> Richard.
> 
>> Thanks & Regards
>> Ajit
>>>>
>>>> Thanks & Regards
>>>> Ajit
>>>>
>>>>>> Thanks & Regards
>>>>>> Ajit
>>>>>>> Richard.
>>>>>>>
>>>>>>>> Thanks & Regards
>>>>>>>> Ajit
>>>>>>>>>> +  In cfgexpand pass QImode is generated with
>>>>>>>>>> +  bool register value and this pass uses QI
>>>>>>>>>> +  as 64 bit registers.
>>>>>>>>>> +
>>>>>>>>
>>>>>>>>>>         rs6000: suboptimal code for returning bool value on target ppc.
>>>>>>>>>>
>>>>>>>>>>         New pass to eliminate unnecessary zero extension. This pass
>>>>>>>>>>         is registered after cse rtl pass.
>>>>>>>>>>
>>>>>>>>>>         2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>>>>>>>>>
>>>>>>>>>> gcc/ChangeLog:
>>>>>>>>>>
>>>>>>>>>>         * config/rs6000/rs6000-passes.def: Registered zero elimination
>>>>>>>>>>         pass.
>>>>>>>>>>         * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>>>>>>>>>>         * config.gcc: Add new executable.
>>>>>>>>>>         * config/rs6000/rs6000-protos.h: Add new prototype for zero
>>>>>>>>>>         elimination pass.
>>>>>>>>>>         * config/rs6000/rs6000.cc: Add new prototype for zero
>>>>>>>>>>         elimination pass.
>>>>>>>>>>         * config/rs6000/t-rs6000: Add new rule.
>>>>>>>>>>         * expr.cc: Modified gcc assert.
>>>>>>>>>>         * explow.cc: Modified gcc assert.
>>>>>>>>>>         * optabs.cc: Modified gcc assert.
>>>>>>>>>> ---
>>>>>>>>>>  gcc/config.gcc                        |   4 +-
>>>>>>>>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>>>>>>>>>  gcc/config/rs6000/rs6000-protos.h     |   1 +
>>>>>>>>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
>>>>>>>>>>  gcc/config/rs6000/rs6000.cc           |   2 +
>>>>>>>>>>  gcc/config/rs6000/t-rs6000            |   5 +
>>>>>>>>>>  gcc/explow.cc                         |   3 +-
>>>>>>>>>>  gcc/expr.cc                           |   4 +-
>>>>>>>>>>  gcc/optabs.cc                         |   3 +-
>>>>>>>>>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>>>>>>>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>>>>>>
>>>>>>>>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>>>>>>>>>> index da3a6d3ba1f..e8ac9d882f0 100644
>>>>>>>>>> --- a/gcc/config.gcc
>>>>>>>>>> +++ b/gcc/config.gcc
>>>>>>>>>> @@ -503,7 +503,7 @@ or1k*-*-*)
>>>>>>>>>>         ;;
>>>>>>>>>>  powerpc*-*-*)
>>>>>>>>>>         cpu_type=rs6000
>>>>>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>>>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>>>>>>>         extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>>>>>>>>>>         extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>>>>>>>>>> @@ -538,7 +538,7 @@ riscv*)
>>>>>>>>>>         ;;
>>>>>>>>>>  rs6000*-*-*)
>>>>>>>>>>         extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
>>>>>>>>>> -       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>>>>>>>> +       extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>>>>>>>>>>         extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>>>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
>>>>>>>>>>         target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>>>>>>>>>> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
>>>>>>>>>> index ca899d5f7af..d7500feddf1 100644
>>>>>>>>>> --- a/gcc/config/rs6000/rs6000-passes.def
>>>>>>>>>> +++ b/gcc/config/rs6000/rs6000-passes.def
>>>>>>>>>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>>>>>>>>>>       The power8 does not have instructions that automaticaly do the byte swaps
>>>>>>>>>>       for loads and stores.  */
>>>>>>>>>>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
>>>>>>>>>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
>>>>>>>>>> +
>>>>>>>>>>
>>>>>>>>>>    /* Pass to do the PCREL_OPT optimization that combines the load of an
>>>>>>>>>>       external symbol's address along with a single load or store using that
>>>>>>>>>> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
>>>>>>>>>> index 1a4fc1df668..f6cf2d673d4 100644
>>>>>>>>>> --- a/gcc/config/rs6000/rs6000-protos.h
>>>>>>>>>> +++ b/gcc/config/rs6000/rs6000-protos.h
>>>>>>>>>> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>>>>>>>>>>  class rtl_opt_pass;
>>>>>>>>>>
>>>>>>>>>>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
>>>>>>>>>> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>>>>>>>>>>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>>>>>>>>>>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>>>>>>>>>>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
>>>>>>>>>> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 00000000000..777c7a5a387
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
>>>>>>>>>> @@ -0,0 +1,361 @@
>>>>>>>>>> +/* Subroutine to eliminate redundant zero extend for power architecture.
>>>>>>>>>> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
>>>>>>>>>> +
>>>>>>>>>> +   This file is part of GCC.
>>>>>>>>>> +
>>>>>>>>>> +   GCC is free software; you can redistribute it and/or modify it
>>>>>>>>>> +   under the terms of the GNU General Public License as published
>>>>>>>>>> +   by the Free Software Foundation; either version 3, or (at your
>>>>>>>>>> +   option) any later version.
>>>>>>>>>> +
>>>>>>>>>> +   GCC is distributed in the hope that it will be useful, but WITHOUT
>>>>>>>>>> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
>>>>>>>>>> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>>>>>>>> +   License for more details.
>>>>>>>>>> +
>>>>>>>>>> +   You should have received a copy of the GNU General Public License
>>>>>>>>>> +   along with GCC; see the file COPYING3.  If not see
>>>>>>>>>> +   <http://www.gnu.org/licenses/>.  */
>>>>>>>>>> +
>>>>>>>>>> +/* This pass remove unnecessary zero extension instruction from
>>>>>>>>>> +  power generated assembly. This pass is register after cse
>>>>>>>>>> +  pass.
>>>>>>>>>> +  Identifies the following sequence of instruction after cse
>>>>>>>>>> +  rtl pass.
>>>>>>>>>> +
>>>>>>>>>> +  set compare (subreg)
>>>>>>>>>> +  set if_then_else
>>>>>>>>>> +  set SImode -> QImode
>>>>>>>>>> +  set zero_extend to DImode from QImode
>>>>>>>>>> +  set return value 0 in one path of cfg.
>>>>>>>>>> +  set return value 1 in other path of cfg.
>>>>>>>>>> +
>>>>>>>>>> +  In cfgexpand pass QImode is generated with
>>>>>>>>>> +  bool register value and this pass uses QI
>>>>>>>>>> +  as 64 bit registers.
>>>>>>>>>> +
>>>>>>>>>> +  This pass replace copy operation from QImode to DImode
>>>>>>>>>> +  and return appropriate return values.*/
>>>>>>>>>> +
>>>>>>>>>> +#define IN_TARGET_CODE 1
>>>>>>>>>> +
>>>>>>>>>> +#include "config.h"
>>>>>>>>>> +#include "system.h"
>>>>>>>>>> +#include "coretypes.h"
>>>>>>>>>> +#include "backend.h"
>>>>>>>>>> +#include "rtl.h"
>>>>>>>>>> +#include "tree.h"
>>>>>>>>>> +#include "memmodel.h"
>>>>>>>>>> +#include "df.h"
>>>>>>>>>> +#include "tm_p.h"
>>>>>>>>>> +#include "ira.h"
>>>>>>>>>> +#include "print-tree.h"
>>>>>>>>>> +#include "varasm.h"
>>>>>>>>>> +#include "explow.h"
>>>>>>>>>> +#include "expr.h"
>>>>>>>>>> +#include "output.h"
>>>>>>>>>> +#include "tree-pass.h"
>>>>>>>>>> +
>>>>>>>>>> +/* This is based on the union-find logic in web.cc.  web_entry_base is
>>>>>>>>>> +   defined in df.h.  */
>>>>>>>>>> +class zext_web_entry : public web_entry_base
>>>>>>>>>> +{
>>>>>>>>>> + public:
>>>>>>>>>> +  /* Pointer to the insn.  */
>>>>>>>>>> +  rtx_insn *insn;
>>>>>>>>>> +  unsigned int is_relevant : 1;
>>>>>>>>>> +  /* Set if insn is a load.  */
>>>>>>>>>> +  unsigned int is_load : 1;
>>>>>>>>>> +  /* Set if insn is a store.  */
>>>>>>>>>> +  unsigned int is_store : 1;
>>>>>>>>>> +  unsigned int is_zext :1 ;
>>>>>>>>>> +  unsigned int is_move :1;
>>>>>>>>>> +  unsigned int is_delete_move :1;
>>>>>>>>>> +  /* Set if this insn should be deleted.  */
>>>>>>>>>> +  unsigned int will_delete : 1;
>>>>>>>>>> +  unsigned int will_delete_chances : 1;
>>>>>>>>>> +};
>>>>>>>>>> +
>>>>>>>>>> +/* Checks if instruction is zero extension
>>>>>>>>>> + * with QIMode to DImode.*/
>>>>>>>>>> +static unsigned int
>>>>>>>>>> +insn_is_zext_p(rtx insn)
>>>>>>>>>> +{
>>>>>>>>>> +  rtx body = PATTERN (insn);
>>>>>>>>>> +
>>>>>>>>>> +  if (GET_CODE (body) == SET
>>>>>>>>>> +      && GET_MODE(SET_DEST (body)) == DImode
>>>>>>>>>> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
>>>>>>>>>> +  {
>>>>>>>>>> +    rtx set = XEXP (SET_SRC (body), 0);
>>>>>>>>>> +
>>>>>>>>>> +    if (REG_P (set))
>>>>>>>>>> +    {
>>>>>>>>>> +      if (GET_MODE (set) == QImode) return 1;
>>>>>>>>>> +    }
>>>>>>>>>> +    else
>>>>>>>>>> +      return 0;
>>>>>>>>>> +  }
>>>>>>>>>> +  return 0;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +/* Checks if instruction is SET operation with QImode.*/
>>>>>>>>>> +static unsigned int
>>>>>>>>>> +insn_is_store_p (rtx insn)
>>>>>>>>>> +{
>>>>>>>>>> +  rtx body = PATTERN (insn);
>>>>>>>>>> +  if (GET_CODE (body) == SET
>>>>>>>>>> +      && SUBREG_P(SET_SRC (body))
>>>>>>>>>> +      && !CONST_INT_P(SET_SRC (body))
>>>>>>>>>> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
>>>>>>>>>> +      && GET_MODE(SET_SRC (body)) == QImode)
>>>>>>>>>> +    return 1;
>>>>>>>>>> +
>>>>>>>>>> +  return 0;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +/* Find out zero extension removal candidate with use-def web.*/
>>>>>>>>>> +static void
>>>>>>>>>> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
>>>>>>>>>> +                                    rtx insn, df_ref def)
>>>>>>>>>> +{
>>>>>>>>>> +  struct df_link *link = DF_REF_CHAIN (def);
>>>>>>>>>> +
>>>>>>>>>> +  rtx move_insn = NULL_RTX;
>>>>>>>>>> +  rtx compare_insn = NULL_RTX;
>>>>>>>>>> +
>>>>>>>>>> +  while (link)
>>>>>>>>>> +  {
>>>>>>>>>> +    if (!DF_REF_INSN_INFO (link->ref))
>>>>>>>>>> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
>>>>>>>>>> +
>>>>>>>>>> +    if (DF_REF_INSN_INFO (link->ref))
>>>>>>>>>> +      {
>>>>>>>>>> +       rtx use_insn = DF_REF_INSN (link->ref);
>>>>>>>>>> +
>>>>>>>>>> +       if (GET_CODE (PATTERN (use_insn)) == SET
>>>>>>>>>> +           && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
>>>>>>>>>> +         {
>>>>>>>>>> +           if (GET_CODE (PATTERN (insn)) == SET
>>>>>>>>>> +               && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
>>>>>>>>>> +             {
>>>>>>>>>> +               rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
>>>>>>>>>> +
>>>>>>>>>> +               if (SUBREG_P (body))
>>>>>>>>>> +                 {
>>>>>>>>>> +                   compare_insn = use_insn;
>>>>>>>>>> +                   rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
>>>>>>>>>> +
>>>>>>>>>> +                   if (compare_insn
>>>>>>>>>> +                       && ((REGNO (XEXP (compare_body, 0)))
>>>>>>>>>> +                               == REGNO (SET_DEST (PATTERN (insn)))))
>>>>>>>>>> +                     insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>>>>>>>> +                 }
>>>>>>>>>> +              }
>>>>>>>>>> +           }
>>>>>>>>>> +
>>>>>>>>>> +       if (insn_is_store_p(use_insn)
>>>>>>>>>> +           && GET_CODE (PATTERN (insn)) == SET
>>>>>>>>>> +           && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
>>>>>>>>>> +         {
>>>>>>>>>> +           if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
>>>>>>>>>> +             {
>>>>>>>>>> +               if (insn_entry[INSN_UID(insn)].will_delete_chances)
>>>>>>>>>> +                 insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
>>>>>>>>>> +             }
>>>>>>>>>> +         }
>>>>>>>>>> +
>>>>>>>>>> +       if (insn_is_zext_p (insn))
>>>>>>>>>> +         {
>>>>>>>>>> +           if (GET_CODE (PATTERN (use_insn)) == SET
>>>>>>>>>> +               && REG_P (SET_SRC (PATTERN (use_insn))))
>>>>>>>>>> +             {
>>>>>>>>>> +               if (move_insn
>>>>>>>>>> +                   && REGNO (SET_SRC (PATTERN (use_insn)))
>>>>>>>>>> +                      == REGNO (SET_SRC (PATTERN (move_insn)))
>>>>>>>>>> +                   && insn_entry[INSN_UID(insn)].is_delete_move)
>>>>>>>>>> +                 {
>>>>>>>>>> +                   insn_entry[INSN_UID (insn)].is_move = 1;
>>>>>>>>>> +                   break;
>>>>>>>>>> +                 }
>>>>>>>>>> +                 else if (insn_entry[INSN_UID (insn)].will_delete)
>>>>>>>>>> +                   {
>>>>>>>>>> +                     move_insn = use_insn;
>>>>>>>>>> +                     insn_entry[INSN_UID(insn)].is_delete_move= 1;
>>>>>>>>>> +                   }
>>>>>>>>>> +             }
>>>>>>>>>> +         }
>>>>>>>>>> +
>>>>>>>>>> +       if (insn_is_zext_p (use_insn))
>>>>>>>>>> +         {
>>>>>>>>>> +           insn_entry[INSN_UID (use_insn)].is_zext = 1;
>>>>>>>>>> +           insn_entry[INSN_UID(use_insn)].is_relevant = 1;
>>>>>>>>>> +
>>>>>>>>>> +           if (insn_is_store_p (insn)
>>>>>>>>>> +               && insn_entry[INSN_UID (insn)].will_delete_chances)
>>>>>>>>>> +           {
>>>>>>>>>> +             insn_entry[INSN_UID (use_insn)].will_delete = 1;
>>>>>>>>>> +             insn_entry[INSN_UID (insn)].will_delete = 1;
>>>>>>>>>> +             insn_entry[INSN_UID( insn)].is_store = 1;
>>>>>>>>>> +           }
>>>>>>>>>> +
>>>>>>>>>> +          if (NONDEBUG_INSN_P (use_insn))
>>>>>>>>>> +            unionfind_union (insn_entry + INSN_UID (insn),
>>>>>>>>>> +                             insn_entry + INSN_UID (use_insn));
>>>>>>>>>> +       }
>>>>>>>>>> +      }
>>>>>>>>>> +
>>>>>>>>>> +    link = link->next;
>>>>>>>>>> +  }
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +/* Replace QImode extensions with copy operations.*/
>>>>>>>>>> +static void
>>>>>>>>>> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
>>>>>>>>>> +{
>>>>>>>>>> +  rtx_insn *insn = insn_entry[i].insn;
>>>>>>>>>> +  rtx body = PATTERN (insn);
>>>>>>>>>> +  rtx src_reg;
>>>>>>>>>> +  src_reg = XEXP (SET_SRC (body), 0);
>>>>>>>>>> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
>>>>>>>>>> +
>>>>>>>>>> +  if (GET_MODE(SET_DEST(body)) != DImode)
>>>>>>>>>> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
>>>>>>>>>> +
>>>>>>>>>> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
>>>>>>>>>> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
>>>>>>>>>> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
>>>>>>>>>> +  df_insn_rescan (new_insn);
>>>>>>>>>> +
>>>>>>>>>> +  df_insn_delete (insn);
>>>>>>>>>> +  remove_insn (insn);
>>>>>>>>>> +  insn->set_deleted ();
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +/* Main entry point for this pass.  */
>>>>>>>>>> +unsigned int
>>>>>>>>>> +rs6000_analyze_zext (function *fun)
>>>>>>>>>> +{
>>>>>>>>>> +  zext_web_entry *insn_entry;
>>>>>>>>>> +  basic_block bb;
>>>>>>>>>> +  rtx_insn *insn, *curr_insn = 0;
>>>>>>>>>> +
>>>>>>>>>> +  /* Dataflow analysis for use-def chains.  */
>>>>>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>>>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>>>>>>>> +  df_analyze ();
>>>>>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>>>>>>>> +
>>>>>>>>>> +  /* Rebuild ud- and du-chains.  */
>>>>>>>>>> +  df_remove_problem (df_chain);
>>>>>>>>>> +  df_process_deferred_rescans ();
>>>>>>>>>> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
>>>>>>>>>> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
>>>>>>>>>> +  df_analyze ();
>>>>>>>>>> +  df_set_flags (DF_DEFER_INSN_RESCAN);
>>>>>>>>>> +
>>>>>>>>>> +  /* Allocate structure to represent webs of insns.  */
>>>>>>>>>> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
>>>>>>>>>> +
>>>>>>>>>> +  /* Walk the insns to gather basic data.  */
>>>>>>>>>> +  FOR_ALL_BB_FN (bb, fun)
>>>>>>>>>> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
>>>>>>>>>> +    {
>>>>>>>>>> +      unsigned int uid = INSN_UID (insn);
>>>>>>>>>> +      if (NONDEBUG_INSN_P (insn))
>>>>>>>>>> +       {
>>>>>>>>>> +         insn_entry[uid].insn = insn;
>>>>>>>>>> +
>>>>>>>>>> +         if (GET_CODE (insn) == insn_is_store_p (insn))
>>>>>>>>>> +           {
>>>>>>>>>> +             insn_entry[uid].is_store = 1;
>>>>>>>>>> +             insn_entry[uid].is_relevant = 1;
>>>>>>>>>> +           }
>>>>>>>>>> +
>>>>>>>>>> +         /* Walk the uses and defs to identify the optimization
>>>>>>>>>> +            candidates.*/
>>>>>>>>>> +         struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
>>>>>>>>>> +         df_ref mention;
>>>>>>>>>> +
>>>>>>>>>> +         FOR_EACH_INSN_INFO_DEF (mention, insn_info)
>>>>>>>>>> +           {
>>>>>>>>>> +             insn_entry[uid].is_relevant = 1;
>>>>>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>>>>>>>> +             find_zero_ext_elimination_candidate (insn_entry, insn, mention);
>>>>>>>>>> +           }
>>>>>>>>>> +
>>>>>>>>>> +         if (insn_entry[uid].is_relevant)
>>>>>>>>>> +           {
>>>>>>>>>> +             /* Determine if this is a store.  */
>>>>>>>>>> +             insn_entry[uid].is_store = insn_is_store_p (insn);
>>>>>>>>>> +           }
>>>>>>>>>> +       }
>>>>>>>>>> +     }
>>>>>>>>>> +
>>>>>>>>>> +   unsigned e = get_max_uid (), i;
>>>>>>>>>> +
>>>>>>>>>> +   int store_index = -1;
>>>>>>>>>> +
>>>>>>>>>> +   /* Replace with copy operation.*/
>>>>>>>>>> +   for (i = 0; i < e; ++i)
>>>>>>>>>> +     {
>>>>>>>>>> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
>>>>>>>>>> +        store_index  = i;
>>>>>>>>>> +
>>>>>>>>>> +       if ((store_index != -1)
>>>>>>>>>> +            && insn_entry[i].is_move && insn_entry[i].will_delete)
>>>>>>>>>> +         {
>>>>>>>>>> +           replace_marked_insns (insn_entry, store_index);
>>>>>>>>>> +           replace_marked_insns (insn_entry, i);
>>>>>>>>>> +         }
>>>>>>>>>> +     }
>>>>>>>>>> +    /* Clean up.  */
>>>>>>>>>> +    free (insn_entry);
>>>>>>>>>> +
>>>>>>>>>> +    return 0;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> +const pass_data pass_data_analyze_zext =
>>>>>>>>>> +{
>>>>>>>>>> +  RTL_PASS, /* type */
>>>>>>>>>> +  "zext", /* name */
>>>>>>>>>> +  OPTGROUP_NONE, /* optinfo_flags */
>>>>>>>>>> +  TV_NONE, /* tv_id */
>>>>>>>>>> +  0, /* properties_required */
>>>>>>>>>> +  0, /* properties_provided */
>>>>>>>>>> +  0, /* properties_destroyed */
>>>>>>>>>> +  0, /* todo_flags_start */
>>>>>>>>>> +  TODO_df_finish, /* todo_flags_finish */
>>>>>>>>>> +};
>>>>>>>>>> +
>>>>>>>>>> +class pass_analyze_zext : public rtl_opt_pass
>>>>>>>>>> +{
>>>>>>>>>> +public:
>>>>>>>>>> +  pass_analyze_zext(gcc::context *ctxt)
>>>>>>>>>> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
>>>>>>>>>> +  {}
>>>>>>>>>> +
>>>>>>>>>> +  /* opt_pass methods: */
>>>>>>>>>> +  virtual bool gate (function *)
>>>>>>>>>> +    {
>>>>>>>>>> +      return (optimize > 0 );
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +  virtual unsigned int execute (function *fun)
>>>>>>>>>> +    {
>>>>>>>>>> +      return rs6000_analyze_zext (fun);
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +  opt_pass *clone ()
>>>>>>>>>> +    {
>>>>>>>>>> +      return new pass_analyze_zext (m_ctxt);
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +}; // class pass_analyze_zext
>>>>>>>>>> +
>>>>>>>>>> +rtl_opt_pass *
>>>>>>>>>> +make_pass_analyze_zext (gcc::context *ctxt)
>>>>>>>>>> +{
>>>>>>>>>> +  return new pass_analyze_zext (ctxt);
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>>>>>>>>>> index 8e0b0d022db..6541334bf2d 100644
>>>>>>>>>> --- a/gcc/config/rs6000/rs6000.cc
>>>>>>>>>> +++ b/gcc/config/rs6000/rs6000.cc
>>>>>>>>>> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
>>>>>>>>>>                                           bool);
>>>>>>>>>>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>>>>>>>>>>
>>>>>>>>>> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
>>>>>>>>>> +
>>>>>>>>>>  /* Hash table stuff for keeping track of TOC entries.  */
>>>>>>>>>>
>>>>>>>>>>  struct GTY((for_user)) toc_hash_struct
>>>>>>>>>> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
>>>>>>>>>> index f183b42ce1d..c1f61591d2f 100644
>>>>>>>>>> --- a/gcc/config/rs6000/t-rs6000
>>>>>>>>>> +++ b/gcc/config/rs6000/t-rs6000
>>>>>>>>>> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
>>>>>>>>>>         $(COMPILE) $<
>>>>>>>>>>         $(POSTCOMPILE)
>>>>>>>>>>
>>>>>>>>>> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
>>>>>>>>>> +       $(COMPILE) $<
>>>>>>>>>> +       $(POSTCOMPILE)
>>>>>>>>>> +
>>>>>>>>>> +
>>>>>>>>>>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
>>>>>>>>>>         $(COMPILE) $<
>>>>>>>>>>         $(POSTCOMPILE)
>>>>>>>>>> diff --git a/gcc/explow.cc b/gcc/explow.cc
>>>>>>>>>> index 32e9498ee07..316aa975e40 100644
>>>>>>>>>> --- a/gcc/explow.cc
>>>>>>>>>> +++ b/gcc/explow.cc
>>>>>>>>>> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
>>>>>>>>>>    if (! general_operand (x, VOIDmode))
>>>>>>>>>>      x = force_operand (x, temp);
>>>>>>>>>>
>>>>>>>>>> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
>>>>>>>>>> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
>>>>>>>>>> +              || GET_MODE (x) == VOIDmode);
>>>>>>>>>>    if (x != temp)
>>>>>>>>>>      emit_move_insn (temp, x);
>>>>>>>>>>    return temp;
>>>>>>>>>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>>>>>>>>>> index 15be1c8db99..6162ef92b88 100644
>>>>>>>>>> --- a/gcc/expr.cc
>>>>>>>>>> +++ b/gcc/expr.cc
>>>>>>>>>> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
>>>>>>>>>>    rtx y_cst = NULL_RTX;
>>>>>>>>>>    rtx_insn *last_insn;
>>>>>>>>>>    rtx set;
>>>>>>>>>> -
>>>>>>>>>>    gcc_assert (mode != BLKmode
>>>>>>>>>> -             && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
>>>>>>>>>> +             && (mode == DImode || GET_MODE (y) == mode
>>>>>>>>>> +             || GET_MODE (y) == VOIDmode));
>>>>>>>>>>
>>>>>>>>>>    /* If we have a copy that looks like one of the following patterns:
>>>>>>>>>>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
>>>>>>>>>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
>>>>>>>>>> index 4c641cab192..9d22fadc7ef 100644
>>>>>>>>>> --- a/gcc/optabs.cc
>>>>>>>>>> +++ b/gcc/optabs.cc
>>>>>>>>>> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
>>>>>>>>>>      input:
>>>>>>>>>>        gcc_assert (mode != VOIDmode);
>>>>>>>>>>        gcc_assert (GET_MODE (op->value) == VOIDmode
>>>>>>>>>> -                 || GET_MODE (op->value) == mode);
>>>>>>>>>> +                 || GET_MODE (op->value) == mode
>>>>>>>>>> +                 || mode == DImode);
>>>>>>>>>>        if (maybe_legitimize_operand_same_code (icode, opno, op))
>>>>>>>>>>         return true;
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> 2.31.1
>>>>>>>>>>
Jeff Law March 16, 2023, 2:48 p.m. UTC | #11
On 3/16/23 04:11, Ajit Agarwal via Gcc-patches wrote:
> 
> Hello Richard:
> 
> On 16/03/23 3:22 pm, Richard Biener wrote:
>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>
>>>
>>>
>>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>>
>>>>> Hello Richard:
>>>>>
>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>> <gcc-patches@gcc.gnu.org> wrote:
>>>>>>>
>>>>>>> Hello All:
>>>>>>>
>>>>>>>
>>>>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>>
>>>>>> What makes this so special that we cannot deal with it from generic code?
>>>>>> In particular we do have the REE pass, why is target specific
>>>>>> knowledge neccessary
>>>>>> to eliminate the extension?
>>>>>>
>>>>>
>>>>> For returning bool values and comparision with integers generates the following by all the rtl passes.
>>>>>
>>>>> set compare (subreg)
>>>>> set if_then_else
>>>>> Convert SImode -> QImode
>>>>> set zero_extend to SImode from QImode
>>>>> set return value 0 in one path of cfg.
>>>>> set return value 1 in other path of cfg.
>>>>>
>>>>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
>>>>
>>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>>> cannot even try to see what the actual RTL
>>>> looks like (without the pass).
>>>>
>>>
>>> Here is the PR with bugzilla.
>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>>
>>> I can add the attached testcase with this PR in the patch.
>>
>> I don't see any zero-extends there.
>>
> 
> Here is the testcase.
> 
> 
> bool (int a, int b)
> {
>            if (a > 2)
>                        return false;
>             if (b < 10)
>                         return true;
>               return false;
> }
> 
> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
> 
> Here is the rtl after cse.
> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
> (insn 15 12 16 3 (set (reg:CC 123)
>          (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>              (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>       (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>          (nil)))
> (insn 16 15 17 3 (set (reg:SI 124)
>          (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>       (nil))
> (insn 17 16 18 3 (set (reg:SI 122)
>          (if_then_else:SI (gt (reg:CC 123)
>                  (const_int 0 [0]))
>              (const_int 0 [0])
>              (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>       (expr_list:REG_DEAD (reg:SI 124)
>          (expr_list:REG_DEAD (reg:CC 123)
>              (nil))))
> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>          (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>       (expr_list:REG_DEAD (reg:SI 122)
>          (nil)))
>        ; pc falls through to BB 5
> (code_label 32 18 31 4 3 (nil) [1 uses])
> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>          (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>       (nil))
> (code_label 19 5 20 5 2 (nil) [0 uses])
> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>          (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
>       (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>          (nil)))
> (insn 22 21 26 5 (set (reg:DI 118 [ <retval> ])
>          (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>       (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
>          (nil)))
> (insn 26 22 27 5 (set (reg/i:DI 3 3)
>          (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>       (expr_list:REG_DEAD (reg:DI 118 [ <retval> ])
>          (nil)))
> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
>       (nil))
This looks like it'd be better addressed in REE.


We've got two paths to the zero_extend.  One sets (reg 117) from a 
constant.  The other sets (reg 117) from a (subreg:QI (reg:SI)).

Handling the constant is trivial.  For the other set, we can replace the 
subreg with the zero_extend.  Presumably we'd then proceed to try and 
eliminate the zero-extend by realizing both arms of the conditional move 
are constants and thus trivially handled.

While I don't think REE would handle all this today, fixing it to handle 
this case seems like it'd be better than doing a specialized pass in the 
ppc backend.

jeff
Surya Kumari Jangala March 17, 2023, 3:37 a.m. UTC | #12
The issue of suboptimal code exists even for integer return value and not just bool return value. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784#c9 
So the patch would need to take care of integer return values too.

On 16/03/23 10:50 am, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> 
> This patch eliminates unnecessary zero extension instruction from power generated assembly.
> Bootstrapped and regtested on powerpc64-linux-gnu.
> 
> Thanks & Regards
> Ajit
> 
> 	rs6000: suboptimal code for returning bool value on target ppc.
> 
> 	New pass to eliminate unnecessary zero extension. This pass
> 	is registered after cse rtl pass.
> 
> 	2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> 
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000-passes.def: Registered zero elimination
> 	pass.
> 	* config/rs6000/rs6000-zext-elim.cc: Add new pass.
> 	* config.gcc: Add new executable.
> 	* config/rs6000/rs6000-protos.h: Add new prototype for zero
> 	elimination pass.
> 	* config/rs6000/rs6000.cc: Add new prototype for zero
> 	elimination pass.
> 	* config/rs6000/t-rs6000: Add new rule.
> 	* expr.cc: Modified gcc assert.
> 	* explow.cc: Modified gcc assert.
> 	* optabs.cc: Modified gcc assert.
> ---
>  gcc/config.gcc                        |   4 +-
>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>  gcc/config/rs6000/rs6000-protos.h     |   1 +
>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
>  gcc/config/rs6000/rs6000.cc           |   2 +
>  gcc/config/rs6000/t-rs6000            |   5 +
>  gcc/explow.cc                         |   3 +-
>  gcc/expr.cc                           |   4 +-
>  gcc/optabs.cc                         |   3 +-
>  9 files changed, 379 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
> 
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index da3a6d3ba1f..e8ac9d882f0 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -503,7 +503,7 @@ or1k*-*-*)
>  	;;
>  powerpc*-*-*)
>  	cpu_type=rs6000
> -	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> +	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>  	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>  	extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>  	extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
> @@ -538,7 +538,7 @@ riscv*)
>  	;;
>  rs6000*-*-*)
>  	extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
> -	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
> +	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
>  	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>  	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
>  	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
> diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
> index ca899d5f7af..d7500feddf1 100644
> --- a/gcc/config/rs6000/rs6000-passes.def
> +++ b/gcc/config/rs6000/rs6000-passes.def
> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>       The power8 does not have instructions that automaticaly do the byte swaps
>       for loads and stores.  */
>    INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
> +
>  
>    /* Pass to do the PCREL_OPT optimization that combines the load of an
>       external symbol's address along with a single load or store using that
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 1a4fc1df668..f6cf2d673d4 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -340,6 +340,7 @@ namespace gcc { class context; }
>  class rtl_opt_pass;
>  
>  extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
> +extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
>  extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
>  extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
>  extern bool rs6000_quadword_masked_address_p (const_rtx exp);
> diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
> new file mode 100644
> index 00000000000..777c7a5a387
> --- /dev/null
> +++ b/gcc/config/rs6000/rs6000-zext-elim.cc
> @@ -0,0 +1,361 @@
> +/* Subroutine to eliminate redundant zero extend for power architecture.
> +   Copyright (C) 1991-2023 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published
> +   by the Free Software Foundation; either version 3, or (at your
> +   option) any later version.
> +
> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> +   License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with GCC; see the file COPYING3.  If not see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* This pass remove unnecessary zero extension instruction from
> +  power generated assembly. This pass is register after cse
> +  pass.
> +  Identifies the following sequence of instruction after cse
> +  rtl pass.
> +
> +  set compare (subreg)
> +  set if_then_else
> +  set SImode -> QImode
> +  set zero_extend to DImode from QImode
> +  set return value 0 in one path of cfg.
> +  set return value 1 in other path of cfg.
> +
> +  In cfgexpand pass QImode is generated with
> +  bool register value and this pass uses QI
> +  as 64 bit registers.
> +
> +  This pass replace copy operation from QImode to DImode
> +  and return appropriate return values.*/
> +
> +#define IN_TARGET_CODE 1
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "rtl.h"
> +#include "tree.h"
> +#include "memmodel.h"
> +#include "df.h"
> +#include "tm_p.h"
> +#include "ira.h"
> +#include "print-tree.h"
> +#include "varasm.h"
> +#include "explow.h"
> +#include "expr.h"
> +#include "output.h"
> +#include "tree-pass.h"
> +
> +/* This is based on the union-find logic in web.cc.  web_entry_base is
> +   defined in df.h.  */
> +class zext_web_entry : public web_entry_base
> +{
> + public:
> +  /* Pointer to the insn.  */
> +  rtx_insn *insn;
> +  unsigned int is_relevant : 1;
> +  /* Set if insn is a load.  */
> +  unsigned int is_load : 1;
> +  /* Set if insn is a store.  */
> +  unsigned int is_store : 1;
> +  unsigned int is_zext :1 ;
> +  unsigned int is_move :1;
> +  unsigned int is_delete_move :1;
> +  /* Set if this insn should be deleted.  */
> +  unsigned int will_delete : 1;
> +  unsigned int will_delete_chances : 1;
> +};
> +
> +/* Checks if instruction is zero extension
> + * with QIMode to DImode.*/
> +static unsigned int
> +insn_is_zext_p(rtx insn)
> +{
> +  rtx body = PATTERN (insn);
> +
> +  if (GET_CODE (body) == SET
> +      && GET_MODE(SET_DEST (body)) == DImode
> +      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
> +  {
> +    rtx set = XEXP (SET_SRC (body), 0);
> +
> +    if (REG_P (set))
> +    {
> +      if (GET_MODE (set) == QImode) return 1;
> +    }
> +    else
> +      return 0;
> +  }
> +  return 0;
> +}
> +
> +/* Checks if instruction is SET operation with QImode.*/
> +static unsigned int
> +insn_is_store_p (rtx insn)
> +{
> +  rtx body = PATTERN (insn);
> +  if (GET_CODE (body) == SET
> +      && SUBREG_P(SET_SRC (body))
> +      && !CONST_INT_P(SET_SRC (body))
> +      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
> +      && GET_MODE(SET_SRC (body)) == QImode)
> +    return 1;
> +
> +  return 0;
> +}
> +
> +/* Find out zero extension removal candidate with use-def web.*/
> +static void
> +find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
> +				     rtx insn, df_ref def)
> +{
> +  struct df_link *link = DF_REF_CHAIN (def);
> +
> +  rtx move_insn = NULL_RTX;
> +  rtx compare_insn = NULL_RTX;
> +
> +  while (link)
> +  {
> +    if (!DF_REF_INSN_INFO (link->ref))
> +      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
> +
> +    if (DF_REF_INSN_INFO (link->ref))
> +      {
> +	rtx use_insn = DF_REF_INSN (link->ref);
> +
> +	if (GET_CODE (PATTERN (use_insn)) == SET
> +	    && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
> +	  {
> +	    if (GET_CODE (PATTERN (insn)) == SET
> +		&& GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
> +	      {
> +		rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
> +
> +		if (SUBREG_P (body))
> +		  {
> +		    compare_insn = use_insn;
> +		    rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
> +
> +		    if (compare_insn
> +			&& ((REGNO (XEXP (compare_body, 0)))
> +				== REGNO (SET_DEST (PATTERN (insn)))))
> +		      insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> +		  }
> +	       }
> +	    }
> +
> +	if (insn_is_store_p(use_insn)
> +	    && GET_CODE (PATTERN (insn)) == SET
> +	    && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
> +	  {
> +	    if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
> +	      {
> +		if (insn_entry[INSN_UID(insn)].will_delete_chances)
> +		  insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
> +	      }
> +	  }
> +
> +	if (insn_is_zext_p (insn))
> +	  {
> +	    if (GET_CODE (PATTERN (use_insn)) == SET
> +		&& REG_P (SET_SRC (PATTERN (use_insn))))
> +	      {
> +		if (move_insn
> +		    && REGNO (SET_SRC (PATTERN (use_insn)))
> +		       == REGNO (SET_SRC (PATTERN (move_insn)))
> +		    && insn_entry[INSN_UID(insn)].is_delete_move)
> +		  {
> +		    insn_entry[INSN_UID (insn)].is_move = 1;
> +		    break;
> +		  }
> +		  else if (insn_entry[INSN_UID (insn)].will_delete)
> +		    {
> +		      move_insn = use_insn;
> +		      insn_entry[INSN_UID(insn)].is_delete_move= 1;
> +		    }
> +	      }
> +	  }
> +
> +	if (insn_is_zext_p (use_insn))
> +	  {
> +	    insn_entry[INSN_UID (use_insn)].is_zext = 1;
> +	    insn_entry[INSN_UID(use_insn)].is_relevant = 1;
> +
> +	    if (insn_is_store_p (insn)
> +		&& insn_entry[INSN_UID (insn)].will_delete_chances)
> +	    {
> +	      insn_entry[INSN_UID (use_insn)].will_delete = 1;
> +	      insn_entry[INSN_UID (insn)].will_delete = 1;
> +	      insn_entry[INSN_UID( insn)].is_store = 1;
> +	    }
> +
> +	   if (NONDEBUG_INSN_P (use_insn))
> +	     unionfind_union (insn_entry + INSN_UID (insn),
> +			      insn_entry + INSN_UID (use_insn));
> +	}
> +      }
> +
> +    link = link->next;
> +  }
> +}
> +
> +/* Replace QImode extensions with copy operations.*/
> +static void
> +replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
> +{
> +  rtx_insn *insn = insn_entry[i].insn;
> +  rtx body = PATTERN (insn);
> +  rtx src_reg;
> +  src_reg = XEXP (SET_SRC (body), 0);
> +  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
> +
> +  if (GET_MODE(SET_DEST(body)) != DImode)
> +    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
> +
> +  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
> +  rtx_insn *new_insn = emit_insn_before (copy, insn);
> +  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
> +  df_insn_rescan (new_insn);
> +
> +  df_insn_delete (insn);
> +  remove_insn (insn);
> +  insn->set_deleted ();
> +}
> +
> +/* Main entry point for this pass.  */
> +unsigned int
> +rs6000_analyze_zext (function *fun)
> +{
> +  zext_web_entry *insn_entry;
> +  basic_block bb;
> +  rtx_insn *insn, *curr_insn = 0;
> +
> +  /* Dataflow analysis for use-def chains.  */
> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> +  df_analyze ();
> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> +
> +  /* Rebuild ud- and du-chains.  */
> +  df_remove_problem (df_chain);
> +  df_process_deferred_rescans ();
> +  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
> +  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
> +  df_analyze ();
> +  df_set_flags (DF_DEFER_INSN_RESCAN);
> +
> +  /* Allocate structure to represent webs of insns.  */
> +  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
> +
> +  /* Walk the insns to gather basic data.  */
> +  FOR_ALL_BB_FN (bb, fun)
> +    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
> +    {
> +      unsigned int uid = INSN_UID (insn);
> +      if (NONDEBUG_INSN_P (insn))
> +	{
> +	  insn_entry[uid].insn = insn;
> +
> +	  if (GET_CODE (insn) == insn_is_store_p (insn))
> +	    {
> +	      insn_entry[uid].is_store = 1;
> +	      insn_entry[uid].is_relevant = 1;
> +	    }
> +
> +	  /* Walk the uses and defs to identify the optimization
> +	     candidates.*/
> +	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> +	  df_ref mention;
> +
> +	  FOR_EACH_INSN_INFO_DEF (mention, insn_info)
> +	    {
> +	      insn_entry[uid].is_relevant = 1;
> +	      insn_entry[uid].is_store = insn_is_store_p (insn);
> +	      find_zero_ext_elimination_candidate (insn_entry, insn, mention);
> +	    }
> +
> +	  if (insn_entry[uid].is_relevant)
> +	    {
> +	      /* Determine if this is a store.  */
> +	      insn_entry[uid].is_store = insn_is_store_p (insn);
> +	    }
> +	}
> +     }
> +
> +   unsigned e = get_max_uid (), i;
> +
> +   int store_index = -1;
> +
> +   /* Replace with copy operation.*/
> +   for (i = 0; i < e; ++i)
> +     {
> +       if (insn_entry[i].is_store && insn_entry[i].will_delete)
> +	 store_index  = i;
> +
> +	if ((store_index != -1)
> +	     && insn_entry[i].is_move && insn_entry[i].will_delete)
> +	  {
> +	    replace_marked_insns (insn_entry, store_index);
> +	    replace_marked_insns (insn_entry, i);
> +	  }
> +     }
> +    /* Clean up.  */
> +    free (insn_entry);
> +
> +    return 0;
> +}
> +
> +const pass_data pass_data_analyze_zext =
> +{
> +  RTL_PASS, /* type */
> +  "zext", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE, /* tv_id */
> +  0, /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  TODO_df_finish, /* todo_flags_finish */
> +};
> +
> +class pass_analyze_zext : public rtl_opt_pass
> +{
> +public:
> +  pass_analyze_zext(gcc::context *ctxt)
> +    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
> +  {}
> +
> +  /* opt_pass methods: */
> +  virtual bool gate (function *)
> +    {
> +      return (optimize > 0 );
> +    }
> +
> +  virtual unsigned int execute (function *fun)
> +    {
> +      return rs6000_analyze_zext (fun);
> +    }
> +
> +  opt_pass *clone ()
> +    {
> +      return new pass_analyze_zext (m_ctxt);
> +    }
> +
> +}; // class pass_analyze_zext
> +
> +rtl_opt_pass *
> +make_pass_analyze_zext (gcc::context *ctxt)
> +{
> +  return new pass_analyze_zext (ctxt);
> +}
> +
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 8e0b0d022db..6541334bf2d 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
>  					  bool);
>  rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
>  
> +rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
> +
>  /* Hash table stuff for keeping track of TOC entries.  */
>  
>  struct GTY((for_user)) toc_hash_struct
> diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
> index f183b42ce1d..c1f61591d2f 100644
> --- a/gcc/config/rs6000/t-rs6000
> +++ b/gcc/config/rs6000/t-rs6000
> @@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
>  	$(COMPILE) $<
>  	$(POSTCOMPILE)
>  
> +rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
> +	$(COMPILE) $<
> +	$(POSTCOMPILE)
> +
> +
>  rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
>  	$(COMPILE) $<
>  	$(POSTCOMPILE)
> diff --git a/gcc/explow.cc b/gcc/explow.cc
> index 32e9498ee07..316aa975e40 100644
> --- a/gcc/explow.cc
> +++ b/gcc/explow.cc
> @@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
>    if (! general_operand (x, VOIDmode))
>      x = force_operand (x, temp);
>  
> -  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
> +  gcc_assert (mode == DImode || GET_MODE (x) == mode
> +	       || GET_MODE (x) == VOIDmode);
>    if (x != temp)
>      emit_move_insn (temp, x);
>    return temp;
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 15be1c8db99..6162ef92b88 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
>    rtx y_cst = NULL_RTX;
>    rtx_insn *last_insn;
>    rtx set;
> -
>    gcc_assert (mode != BLKmode
> -	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
> +	      && (mode == DImode || GET_MODE (y) == mode
> +	      || GET_MODE (y) == VOIDmode));
>  
>    /* If we have a copy that looks like one of the following patterns:
>         (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 4c641cab192..9d22fadc7ef 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
>      input:
>        gcc_assert (mode != VOIDmode);
>        gcc_assert (GET_MODE (op->value) == VOIDmode
> -		  || GET_MODE (op->value) == mode);
> +		  || GET_MODE (op->value) == mode
> +		  || mode == DImode);
>        if (maybe_legitimize_operand_same_code (icode, opno, op))
>  	return true;
>
Ajit Agarwal March 17, 2023, 11:49 a.m. UTC | #13
Hello Jeff:

On 16/03/23 8:18 pm, Jeff Law wrote:
> 
> 
> On 3/16/23 04:11, Ajit Agarwal via Gcc-patches wrote:
>>
>> Hello Richard:
>>
>> On 16/03/23 3:22 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>
>>>>
>>>>
>>>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>>>
>>>>>> Hello Richard:
>>>>>>
>>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>>> <gcc-patches@gcc.gnu.org> wrote:
>>>>>>>>
>>>>>>>> Hello All:
>>>>>>>>
>>>>>>>>
>>>>>>>> This patch eliminates unnecessary zero extension instruction from power generated assembly.
>>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>>>
>>>>>>> What makes this so special that we cannot deal with it from generic code?
>>>>>>> In particular we do have the REE pass, why is target specific
>>>>>>> knowledge neccessary
>>>>>>> to eliminate the extension?
>>>>>>>
>>>>>>
>>>>>> For returning bool values and comparision with integers generates the following by all the rtl passes.
>>>>>>
>>>>>> set compare (subreg)
>>>>>> set if_then_else
>>>>>> Convert SImode -> QImode
>>>>>> set zero_extend to SImode from QImode
>>>>>> set return value 0 in one path of cfg.
>>>>>> set return value 1 in other path of cfg.
>>>>>>
>>>>>> This pass replaces the above zero extension and conversion from QImode to DImode with copy operation to keep QImode in 64 bit registers in powerpc target.
>>>>>
>>>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>>>> cannot even try to see what the actual RTL
>>>>> looks like (without the pass).
>>>>>
>>>>
>>>> Here is the PR with bugzilla.
>>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>>>
>>>> I can add the attached testcase with this PR in the patch.
>>>
>>> I don't see any zero-extends there.
>>>
>>
>> Here is the testcase.
>>
>>
>> bool (int a, int b)
>> {
>>            if (a > 2)
>>                        return false;
>>             if (b < 10)
>>                         return true;
>>               return false;
>> }
>>
>> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>>
>> Here is the rtl after cse.
>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>> (insn 15 12 16 3 (set (reg:CC 123)
>>          (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>>              (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>>       (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>>          (nil)))
>> (insn 16 15 17 3 (set (reg:SI 124)
>>          (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>>       (nil))
>> (insn 17 16 18 3 (set (reg:SI 122)
>>          (if_then_else:SI (gt (reg:CC 123)
>>                  (const_int 0 [0]))
>>              (const_int 0 [0])
>>              (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>>       (expr_list:REG_DEAD (reg:SI 124)
>>          (expr_list:REG_DEAD (reg:CC 123)
>>              (nil))))
>> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>>          (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>>       (expr_list:REG_DEAD (reg:SI 122)
>>          (nil)))
>>        ; pc falls through to BB 5
>> (code_label 32 18 31 4 3 (nil) [1 uses])
>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>>          (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>>       (nil))
>> (code_label 19 5 20 5 2 (nil) [0 uses])
>> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>>          (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
>>       (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
>>          (nil)))
>> (insn 22 21 26 5 (set (reg:DI 118 [ <retval> ])
>>          (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>       (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
>>          (nil)))
>> (insn 26 22 27 5 (set (reg/i:DI 3 3)
>>          (reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
>>       (expr_list:REG_DEAD (reg:DI 118 [ <retval> ])
>>          (nil)))
>> (insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
>>       (nil))
> This looks like it'd be better addressed in REE.
> 
> 
> We've got two paths to the zero_extend.  One sets (reg 117) from a constant.  The other sets (reg 117) from a (subreg:QI (reg:SI)).
> 
> Handling the constant is trivial.  For the other set, we can replace the subreg with the zero_extend.  Presumably we'd then proceed to try and eliminate the zero-extend by realizing both arms of the conditional move are constants and thus trivially handled.
> 
> While I don't think REE would handle all this today, fixing it to handle this case seems like it'd be better than doing a specialized pass in the ppc backend.
> 
> jeff
> 

Thanks for your advice. At the input of REE pass the RTL has the following wherein zero_extend and subreg( reg 117) is converted to and (subreg DI ( reg QI 117).

This needs to be handled. I am working on handling this in REE pass. 

insn 44 43 18 3 (set (reg:SI 122)
        (if_then_else:SI (le:SI (reg:CC 130)
                (const_int 0 [0]))
            (reg:SI 129)
            (const_int 0 [0]))) "ext.cc":5:5 -1
     (nil))
(insn 18 44 40 3 (set (reg:QI 117 [ _1 ])
        (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
     (expr_list:REG_DEAD (reg:SI 122)
        (nil)))
(jump_insn 40 18 41 3 (set (pc)
        (label_ref 19)) -1
     (nil)
 -> 19)
(barrier 41 40 32)
(code_label 32 41 31 4 3 (nil) [1 uses])
(note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
        (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
     (nil))
(code_label 19 5 20 5 2 (nil) [1 uses])
(note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(note 21 20 26 5 NOTE_INSN_DELETED)
(insn 26 21 27 5 (set (reg/i:DI 3 %r3)
        (and:DI (subreg:DI (reg:QI 117 [ _1 ]) 0)
            (const_int 1 [0x1]))) "ext.cc":8:1 207 {anddi3_mask}
     (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
        (nil)))
(insn 27 26 0 5 (use (reg/i:DI 3 %r3)) "ext.cc":8:1 -1
     (nil))
"a-ext.cc.292r.split1" 92L, 4727C                     

Thanks & Regards
Ajit
Peter Bergner March 17, 2023, 9:20 p.m. UTC | #14
On 3/16/23 10:37 PM, Surya Kumari Jangala wrote:
> The issue of suboptimal code exists even for integer return value and not just bool return value. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784#c9 
> So the patch would need to take care of integer return values too.

Correct.  Basically any time we return an integral type (signed or unsigned)
type is smaller than the hard register we are returning it in, we can get these
unwanted sign/zero extensions.

I'm sure we have quite a few bugzillas mentioning the unneeded sign/zero extends.

Peter
Peter Bergner March 18, 2023, 3:53 a.m. UTC | #15
On 3/17/23 4:20 PM, Peter Bergner via Gcc-patches wrote:
> On 3/16/23 10:37 PM, Surya Kumari Jangala wrote:
>> The issue of suboptimal code exists even for integer return value and not just bool return value. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784#c9 
>> So the patch would need to take care of integer return values too.
> 
> Correct.  Basically any time we return an integral type (signed or unsigned)
> type is smaller than the hard register we are returning it in, we can get these
> unwanted sign/zero extensions.

I'm sorry, I didn't mean to imply every sign and zero extend is useless.
I just meant to say that there are many cases when these sign and zero extends
are not necessary.

Peter
diff mbox series

Patch

diff --git a/gcc/config.gcc b/gcc/config.gcc
index da3a6d3ba1f..e8ac9d882f0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -503,7 +503,7 @@  or1k*-*-*)
 	;;
 powerpc*-*-*)
 	cpu_type=rs6000
-	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
 	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
 	extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
 	extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
@@ -538,7 +538,7 @@  riscv*)
 	;;
 rs6000*-*-*)
 	extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
-	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
 	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
index ca899d5f7af..d7500feddf1 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -28,6 +28,8 @@  along with GCC; see the file COPYING3.  If not see
      The power8 does not have instructions that automaticaly do the byte swaps
      for loads and stores.  */
   INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
+
 
   /* Pass to do the PCREL_OPT optimization that combines the load of an
      external symbol's address along with a single load or store using that
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..f6cf2d673d4 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -340,6 +340,7 @@  namespace gcc { class context; }
 class rtl_opt_pass;
 
 extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
 extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
 extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
 extern bool rs6000_quadword_masked_address_p (const_rtx exp);
diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
new file mode 100644
index 00000000000..777c7a5a387
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-zext-elim.cc
@@ -0,0 +1,361 @@ 
+/* Subroutine to eliminate redundant zero extend for power architecture.
+   Copyright (C) 1991-2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This pass remove unnecessary zero extension instruction from
+  power generated assembly. This pass is register after cse
+  pass.
+  Identifies the following sequence of instruction after cse
+  rtl pass.
+
+  set compare (subreg)
+  set if_then_else
+  set SImode -> QImode
+  set zero_extend to DImode from QImode
+  set return value 0 in one path of cfg.
+  set return value 1 in other path of cfg.
+
+  In cfgexpand pass QImode is generated with
+  bool register value and this pass uses QI
+  as 64 bit registers.
+
+  This pass replace copy operation from QImode to DImode
+  and return appropriate return values.*/
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+
+/* This is based on the union-find logic in web.cc.  web_entry_base is
+   defined in df.h.  */
+class zext_web_entry : public web_entry_base
+{
+ public:
+  /* Pointer to the insn.  */
+  rtx_insn *insn;
+  unsigned int is_relevant : 1;
+  /* Set if insn is a load.  */
+  unsigned int is_load : 1;
+  /* Set if insn is a store.  */
+  unsigned int is_store : 1;
+  unsigned int is_zext :1 ;
+  unsigned int is_move :1;
+  unsigned int is_delete_move :1;
+  /* Set if this insn should be deleted.  */
+  unsigned int will_delete : 1;
+  unsigned int will_delete_chances : 1;
+};
+
+/* Checks if instruction is zero extension
+ * with QIMode to DImode.*/
+static unsigned int
+insn_is_zext_p(rtx insn)
+{
+  rtx body = PATTERN (insn);
+
+  if (GET_CODE (body) == SET
+      && GET_MODE(SET_DEST (body)) == DImode
+      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
+  {
+    rtx set = XEXP (SET_SRC (body), 0);
+
+    if (REG_P (set))
+    {
+      if (GET_MODE (set) == QImode) return 1;
+    }
+    else
+      return 0;
+  }
+  return 0;
+}
+
+/* Checks if instruction is SET operation with QImode.*/
+static unsigned int
+insn_is_store_p (rtx insn)
+{
+  rtx body = PATTERN (insn);
+  if (GET_CODE (body) == SET
+      && SUBREG_P(SET_SRC (body))
+      && !CONST_INT_P(SET_SRC (body))
+      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
+      && GET_MODE(SET_SRC (body)) == QImode)
+    return 1;
+
+  return 0;
+}
+
+/* Find out zero extension removal candidate with use-def web.*/
+static void
+find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
+				     rtx insn, df_ref def)
+{
+  struct df_link *link = DF_REF_CHAIN (def);
+
+  rtx move_insn = NULL_RTX;
+  rtx compare_insn = NULL_RTX;
+
+  while (link)
+  {
+    if (!DF_REF_INSN_INFO (link->ref))
+      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
+
+    if (DF_REF_INSN_INFO (link->ref))
+      {
+	rtx use_insn = DF_REF_INSN (link->ref);
+
+	if (GET_CODE (PATTERN (use_insn)) == SET
+	    && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
+	  {
+	    if (GET_CODE (PATTERN (insn)) == SET
+		&& GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
+	      {
+		rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
+
+		if (SUBREG_P (body))
+		  {
+		    compare_insn = use_insn;
+		    rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
+
+		    if (compare_insn
+			&& ((REGNO (XEXP (compare_body, 0)))
+				== REGNO (SET_DEST (PATTERN (insn)))))
+		      insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
+		  }
+	       }
+	    }
+
+	if (insn_is_store_p(use_insn)
+	    && GET_CODE (PATTERN (insn)) == SET
+	    && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
+	  {
+	    if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
+	      {
+		if (insn_entry[INSN_UID(insn)].will_delete_chances)
+		  insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
+	      }
+	  }
+
+	if (insn_is_zext_p (insn))
+	  {
+	    if (GET_CODE (PATTERN (use_insn)) == SET
+		&& REG_P (SET_SRC (PATTERN (use_insn))))
+	      {
+		if (move_insn
+		    && REGNO (SET_SRC (PATTERN (use_insn)))
+		       == REGNO (SET_SRC (PATTERN (move_insn)))
+		    && insn_entry[INSN_UID(insn)].is_delete_move)
+		  {
+		    insn_entry[INSN_UID (insn)].is_move = 1;
+		    break;
+		  }
+		  else if (insn_entry[INSN_UID (insn)].will_delete)
+		    {
+		      move_insn = use_insn;
+		      insn_entry[INSN_UID(insn)].is_delete_move= 1;
+		    }
+	      }
+	  }
+
+	if (insn_is_zext_p (use_insn))
+	  {
+	    insn_entry[INSN_UID (use_insn)].is_zext = 1;
+	    insn_entry[INSN_UID(use_insn)].is_relevant = 1;
+
+	    if (insn_is_store_p (insn)
+		&& insn_entry[INSN_UID (insn)].will_delete_chances)
+	    {
+	      insn_entry[INSN_UID (use_insn)].will_delete = 1;
+	      insn_entry[INSN_UID (insn)].will_delete = 1;
+	      insn_entry[INSN_UID( insn)].is_store = 1;
+	    }
+
+	   if (NONDEBUG_INSN_P (use_insn))
+	     unionfind_union (insn_entry + INSN_UID (insn),
+			      insn_entry + INSN_UID (use_insn));
+	}
+      }
+
+    link = link->next;
+  }
+}
+
+/* Replace QImode extensions with copy operations.*/
+static void
+replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
+{
+  rtx_insn *insn = insn_entry[i].insn;
+  rtx body = PATTERN (insn);
+  rtx src_reg;
+  src_reg = XEXP (SET_SRC (body), 0);
+  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
+
+  if (GET_MODE(SET_DEST(body)) != DImode)
+    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
+
+  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
+  rtx_insn *new_insn = emit_insn_before (copy, insn);
+  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
+  df_insn_rescan (new_insn);
+
+  df_insn_delete (insn);
+  remove_insn (insn);
+  insn->set_deleted ();
+}
+
+/* Main entry point for this pass.  */
+unsigned int
+rs6000_analyze_zext (function *fun)
+{
+  zext_web_entry *insn_entry;
+  basic_block bb;
+  rtx_insn *insn, *curr_insn = 0;
+
+  /* Dataflow analysis for use-def chains.  */
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  /* Rebuild ud- and du-chains.  */
+  df_remove_problem (df_chain);
+  df_process_deferred_rescans ();
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  /* Allocate structure to represent webs of insns.  */
+  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
+
+  /* Walk the insns to gather basic data.  */
+  FOR_ALL_BB_FN (bb, fun)
+    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+    {
+      unsigned int uid = INSN_UID (insn);
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  insn_entry[uid].insn = insn;
+
+	  if (GET_CODE (insn) == insn_is_store_p (insn))
+	    {
+	      insn_entry[uid].is_store = 1;
+	      insn_entry[uid].is_relevant = 1;
+	    }
+
+	  /* Walk the uses and defs to identify the optimization
+	     candidates.*/
+	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+	  df_ref mention;
+
+	  FOR_EACH_INSN_INFO_DEF (mention, insn_info)
+	    {
+	      insn_entry[uid].is_relevant = 1;
+	      insn_entry[uid].is_store = insn_is_store_p (insn);
+	      find_zero_ext_elimination_candidate (insn_entry, insn, mention);
+	    }
+
+	  if (insn_entry[uid].is_relevant)
+	    {
+	      /* Determine if this is a store.  */
+	      insn_entry[uid].is_store = insn_is_store_p (insn);
+	    }
+	}
+     }
+
+   unsigned e = get_max_uid (), i;
+
+   int store_index = -1;
+
+   /* Replace with copy operation.*/
+   for (i = 0; i < e; ++i)
+     {
+       if (insn_entry[i].is_store && insn_entry[i].will_delete)
+	 store_index  = i;
+
+	if ((store_index != -1)
+	     && insn_entry[i].is_move && insn_entry[i].will_delete)
+	  {
+	    replace_marked_insns (insn_entry, store_index);
+	    replace_marked_insns (insn_entry, i);
+	  }
+     }
+    /* Clean up.  */
+    free (insn_entry);
+
+    return 0;
+}
+
+const pass_data pass_data_analyze_zext =
+{
+  RTL_PASS, /* type */
+  "zext", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_analyze_zext : public rtl_opt_pass
+{
+public:
+  pass_analyze_zext(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return (optimize > 0 );
+    }
+
+  virtual unsigned int execute (function *fun)
+    {
+      return rs6000_analyze_zext (fun);
+    }
+
+  opt_pass *clone ()
+    {
+      return new pass_analyze_zext (m_ctxt);
+    }
+
+}; // class pass_analyze_zext
+
+rtl_opt_pass *
+make_pass_analyze_zext (gcc::context *ctxt)
+{
+  return new pass_analyze_zext (ctxt);
+}
+
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 8e0b0d022db..6541334bf2d 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1178,6 +1178,8 @@  static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
 					  bool);
 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
 
+rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
+
 /* Hash table stuff for keeping track of TOC entries.  */
 
 struct GTY((for_user)) toc_hash_struct
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index f183b42ce1d..c1f61591d2f 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -35,6 +35,11 @@  rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+
 rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
 	$(COMPILE) $<
 	$(POSTCOMPILE)
diff --git a/gcc/explow.cc b/gcc/explow.cc
index 32e9498ee07..316aa975e40 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -654,7 +654,8 @@  copy_to_mode_reg (machine_mode mode, rtx x)
   if (! general_operand (x, VOIDmode))
     x = force_operand (x, temp);
 
-  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
+  gcc_assert (mode == DImode || GET_MODE (x) == mode
+	       || GET_MODE (x) == VOIDmode);
   if (x != temp)
     emit_move_insn (temp, x);
   return temp;
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 15be1c8db99..6162ef92b88 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -4223,9 +4223,9 @@  emit_move_insn (rtx x, rtx y)
   rtx y_cst = NULL_RTX;
   rtx_insn *last_insn;
   rtx set;
-
   gcc_assert (mode != BLKmode
-	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+	      && (mode == DImode || GET_MODE (y) == mode
+	      || GET_MODE (y) == VOIDmode));
 
   /* If we have a copy that looks like one of the following patterns:
        (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 4c641cab192..9d22fadc7ef 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -7902,7 +7902,8 @@  maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
     input:
       gcc_assert (mode != VOIDmode);
       gcc_assert (GET_MODE (op->value) == VOIDmode
-		  || GET_MODE (op->value) == mode);
+		  || GET_MODE (op->value) == mode
+		  || mode == DImode);
       if (maybe_legitimize_operand_same_code (icode, opno, op))
 	return true;