diff mbox

using scratchpads to enhance RTL-level if-conversion: revised patch

Message ID 562FFC2D.1020602@yahoo.com
State New
Headers show

Commit Message

Abe Oct. 27, 2015, 10:35 p.m. UTC
Dear all,

Thanks for all your feedback.  I have integrated as much of it as I could in the available time.

The revised patch now allows the target to decide when to use scratchpads,
the global default being to do so when the profile data indicates it is probably good to do so.

Bootstrapped and make-checked on x86_64 GNU/Linux.

Regards,

Abe








2015-10-06  Abe  <abe_skolnik@yahoo.com>

         * ifcvt.c (noce_mem_write_may_go_wrong_even_with_scratchpads): New.
         (noce_mem_write_may_trap_or_fault_p_1): New.
         (noce_mem_write_may_trap_or_fault_p): Moved most of it into
         "noce_mem_write_may_trap_or_fault_p_1".
         (noce_process_if_block): Added scratchpad-based support for if-converting
         half hammocks with writes to destinations GCC says may "trap or fault".
         (if_convert): Ensure the scratchpad-support vector is cleared upon
         each entry into the RTL if-converter.
         * target.def (RTL_ifcvt_when_to_use_scratchpads): New DEFHOOKPOD.
         * target.h: New enum named "RTL_ifcvt_when_to_use_scratchpads".






 From c24c9b7528a2865efe49e5abb812bfbbac1a5e1c Mon Sep 17 00:00:00 2001
From: Abe <abe_skolnik@yahoo.com>
Date: Tue, 27 Oct 2015 17:12:44 -0500
Subject: [PATCH] Adding my RTL ifcvt work on top of Ville`s upstream commit.

---
  gcc/ifcvt.c    | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
  gcc/target.def |   5 ++
  gcc/target.h   |   6 ++
  3 files changed, 160 insertions(+), 20 deletions(-)

Comments

Bernd Schmidt Oct. 30, 2015, 2:08 p.m. UTC | #1
(Jakub Cc'd because of code he added for PR23567).

On 10/27/2015 11:35 PM, Abe wrote:
> Thanks for all your feedback.  I have integrated as much of it as I
> could in the available time.

Unfortunately not all of it - I still think we need to have a better 
strategy of selecting a scratchpad than a newly allocated stack slot. 
There are sufficiently many options.

>          * ifcvt.c (noce_mem_write_may_go_wrong_even_with_scratchpads):
> New.

ChangeLog doesn't correspond to the patch. If the function actually 
existed I'd reject it for a way overlong identifier.

>          * target.h: New enum named "RTL_ifcvt_when_to_use_scratchpads".

Please follow ChangeLog writing guidelines.

> -/* Return true if a write into MEM may trap or fault.  */
> +/* Return true if a write into MEM may trap or fault
> +   even in the presence of scratchpad support.  */

I still think this comment is fairly useless and needs to better 
describe what it is actually doing.

>   static bool
> -noce_mem_write_may_trap_or_fault_p (const_rtx mem)
> +noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem)

For what this ends up doing, I think a name like "unsafe_address_p" 
would be better. Also, I think the code in there is really dubious - it 
tries to look for SYMBOL_REFs which are in decl_readonly_section, but 
shouldn't that be unnecessary given the test for MEM_READONLY_P? It's 
completely unreliable anyway since the address could be loaded into a 
register (on most targets it would be) and we'd never see the SYMBOL_REF 
in that function.

> +/* Return true if a write into MEM may trap or fault
> +   without scratchpad support.  */

Just keep the previous comment without mentioning scratchpads.

> +static bool
> +noce_mem_write_may_trap_or_fault_p (const_rtx mem)
> +{
> +  if (may_trap_or_fault_p (mem))
> +    return true;
> +
> +  return noce_mem_write_may_trap_or_fault_p_1 (mem);
> +}
> +      if (RTL_ifcvt_use_spads_as_per_profile
> +        == targetm.RTL_ifcvt_when_to_use_scratchpads
> +          && (PROFILE_ABSENT == profile_status_for_fn (cfun)
> +          || PROFILE_GUESSED == profile_status_for_fn (cfun)
> +          || predictable_edge_p (then_edge)
> +          || ! maybe_hot_bb_p (cfun, then_bb)))
> +        return FALSE;

I guess this is slightly better than no cost estimate at all.

> +      if (noce_mem_write_may_trap_or_fault_p_1 (orig_x)

So why do you want to call this function anyway? Doesn't the scratchpad 
technique protect against storing to a bad address?

> +      const size_t MEM_size = MEM_SIZE (orig_x);

No uppercase letters for variables and such. Just use "sz" or "size" for 
brevity.

> +          biggest_spad = assign_stack_local (GET_MODE (orig_x),

Still the same problems - this is the least attractive choice of a 
scratchpad location, and the code may end up allocating more scratchpads 
than you need.

> +      emit_insn_before_setloc (seq, if_info->jump,
> +                   INSN_LOCATION (if_info->insn_a));

Formatting? Could be mail client damage.

> +DEFHOOKPOD
> +(RTL_ifcvt_when_to_use_scratchpads,
> +"*",
> +enum RTL_ifcvt_when_to_use_scratchpads,
> RTL_ifcvt_use_spads_as_per_profile)
> +

No caps, and maybe a less clumsy name.

> +enum RTL_ifcvt_when_to_use_scratchpads {
> +  RTL_ifcvt_never_use_scratchpads = 0,
> +  RTL_ifcvt_always_use_scratchpads,
> +  RTL_ifcvt_use_spads_as_per_profile
> +};

Likewise. Maybe

enum ifcvt_scratchpads_strategy {
   scratchpad_never,
   scratchpad_always,
   scratchpad_unpredictable
};

So, still a NACK from my side.


Bernd
diff mbox

Patch

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 7ab738e..fb80ee9 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -56,6 +56,8 @@ 
  #include "rtl-iter.h"
  #include "ifcvt.h"

+#include <utility>
+
  #ifndef MAX_CONDITIONAL_EXECUTE
  #define MAX_CONDITIONAL_EXECUTE \
    (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
@@ -66,6 +68,9 @@ 

  #define NULL_BLOCK	((basic_block) NULL)

+/* An arbitrary inclusive maximum size (in bytes) for each scratchpad.  */
+#define SCRATCHPAD_MAX_SIZE 128
+
  /* True if after combine pass.  */
  static bool ifcvt_after_combine;

@@ -110,6 +115,8 @@  static int dead_or_predicable (basic_block, basic_block, basic_block,
  			       edge, int);
  static void noce_emit_move_insn (rtx, rtx);
  static rtx_insn *block_has_only_trap (basic_block);
+
+static auto_vec<std::pair<rtx, unsigned int> > scratchpads;
  
  /* Count the number of non-jump active insns in BB.  */

@@ -2828,19 +2835,16 @@  noce_operand_ok (const_rtx op)
    return ! may_trap_p (op);
  }

-/* Return true if a write into MEM may trap or fault.  */
+/* Return true if a write into MEM may trap or fault
+   even in the presence of scratchpad support.  */

  static bool
-noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem)
  {
-  rtx addr;
-
    if (MEM_READONLY_P (mem))
      return true;

-  if (may_trap_or_fault_p (mem))
-    return true;
-
+  rtx addr;
    addr = XEXP (mem, 0);

    /* Call target hook to avoid the effects of -fpic etc....  */
@@ -2881,6 +2885,18 @@  noce_mem_write_may_trap_or_fault_p (const_rtx mem)
    return false;
  }

+/* Return true if a write into MEM may trap or fault
+   without scratchpad support.  */
+
+static bool
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+{
+  if (may_trap_or_fault_p (mem))
+    return true;
+
+  return noce_mem_write_may_trap_or_fault_p_1 (mem);
+}
+
  /* Return whether we can use store speculation for MEM.  TOP_BB is the
     basic block above the conditional block where we are considering
     doing the speculative store.  We look for whether MEM is set
@@ -3031,7 +3047,7 @@  bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
     at converting the block.  */

  static int
-noce_process_if_block (struct noce_if_info *if_info)
+noce_process_if_block (struct noce_if_info *if_info, edge then_edge)
  {
    basic_block test_bb = if_info->test_bb;	/* test block */
    basic_block then_bb = if_info->then_bb;	/* THEN */
@@ -3047,7 +3063,7 @@  noce_process_if_block (struct noce_if_info *if_info)

       (1) if (...) x = a; else x = b;
       (2) x = b; if (...) x = a;
-     (3) if (...) x = a;   // as if with an initial x = x.
+     (3) if (...) x = a;

       The later patterns require jumps to be more expensive.
       For the if (...) x = a; else x = b; case we allow multiple insns
@@ -3200,17 +3216,127 @@  noce_process_if_block (struct noce_if_info *if_info)

    if (!set_b && MEM_P (orig_x))
      {
-      /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
-	 for optimizations if writing to x may trap or fault,
-	 i.e. it's a memory other than a static var or a stack slot,
-	 is misaligned on strict aligned machines or is read-only.  If
-	 x is a read-only memory, then the program is valid only if we
-	 avoid the store into it.  If there are stores on both the
-	 THEN and ELSE arms, then we can go ahead with the conversion;
-	 either the program is broken, or the condition is always
-	 false such that the other memory is selected.  */
+      /* Disallow the "if (...) x = a;" form (with no "else") for optimizations
+	 when x is misaligned on strict-alignment machines or is read-only.
+	 If x is a memory other than a static var or a stack slot: for targets
+	 _with_ conditional move and _without_ conditional execution,
+	 convert using the scratchpad technique, otherwise don`t convert.
+	 If x is a read-only memory, then the program is valid only if we avoid
+	 the store into it.  If there are stores on both the THEN and ELSE arms,
+	 then we can go ahead with the conversion; either the program is broken,
+	 or the condition is always false such that the other memory is selected.
+	 The non-scratchpad-based conversion here has an implicit "else x = x;".  */
        if (noce_mem_write_may_trap_or_fault_p (orig_x))
-	return FALSE;
+	{
+	  if (reload_completed
+	      || optimize < 2
+	      || optimize_function_for_size_p (cfun)
+	      || targetm.have_conditional_execution ()
+	      || !HAVE_conditional_move
+	      || (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1)))
+	      || RTL_ifcvt_never_use_scratchpads
+	           == targetm.RTL_ifcvt_when_to_use_scratchpads)
+	    return FALSE;
+
+
+	  if (RTL_ifcvt_use_spads_as_per_profile
+		== targetm.RTL_ifcvt_when_to_use_scratchpads
+	      && (PROFILE_ABSENT == profile_status_for_fn (cfun)
+		  || PROFILE_GUESSED == profile_status_for_fn (cfun)
+		  || predictable_edge_p (then_edge)
+		  || ! maybe_hot_bb_p (cfun, then_bb)))
+	    return FALSE;
+
+
+	  if (noce_mem_write_may_trap_or_fault_p_1 (orig_x)
+	      || !MEM_SIZE_KNOWN_P (orig_x))
+	    return FALSE;
+
+	  const size_t MEM_size = MEM_SIZE (orig_x);
+	  if (MEM_size > SCRATCHPAD_MAX_SIZE)
+	    return FALSE;
+
+	  rtx biggest_spad = 0;
+	  unsigned int biggest_spad_size = 0;
+	  if (size_t vec_len = scratchpads.length ())
+	    {
+	      std::pair<rtx, unsigned> tmp_pair = scratchpads[vec_len - 1];
+	      biggest_spad = tmp_pair.first;
+	      biggest_spad_size = tmp_pair.second;
+	    }
+	  if (MEM_size > biggest_spad_size)
+	    {
+	      biggest_spad_size = MEM_size;
+	      biggest_spad = assign_stack_local (GET_MODE (orig_x), MEM_size, 0);
+	      gcc_assert (biggest_spad);
+	      scratchpads.safe_push (std::make_pair (biggest_spad, MEM_size));
+	    }
+
+	  gcc_assert (biggest_spad);
+
+	  rtx reg_for_store_addr = gen_reg_rtx (Pmode);
+	  set_used_flags (reg_for_store_addr);
+
+	  start_sequence ();
+
+	  for (rtx_insn *insn = BB_HEAD (then_bb);
+	       insn && insn != insn_a && insn != BB_END (then_bb);
+	       insn = NEXT_INSN (insn))
+	    if (!(NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn)))
+	      duplicate_insn_chain (insn, insn);
+
+	  rtx target = noce_emit_cmove (if_info,
+					reg_for_store_addr,
+					GET_CODE (cond),
+					XEXP (cond, 0),
+					XEXP (cond, 1),
+					XEXP (orig_x, 0),
+					XEXP (biggest_spad, 0));
+
+	  if (!target)
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+	  if (target != reg_for_store_addr)
+	    noce_emit_move_insn (reg_for_store_addr, target);
+
+	  rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr);
+	  MEM_NOTRAP_P (mem) = MEM_NOTRAP_P (orig_x);
+	  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x);
+
+	  alias_set_type temp_alias_set = new_alias_set ();
+	  if (MEM_ALIAS_SET (orig_x))
+	    record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set);
+	  set_mem_alias_set (mem, temp_alias_set);
+
+	  set_mem_align (mem, MIN (MEM_ALIGN (biggest_spad),
+				   MEM_ALIGN (orig_x)));
+	  if (MEM_ADDR_SPACE (orig_x) != MEM_ADDR_SPACE (biggest_spad))
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+
+	  set_used_flags (mem);
+
+	  noce_emit_move_insn (mem, a);
+
+	  rtx_insn *seq = end_ifcvt_sequence (if_info);
+	  if (!seq)
+	    return FALSE;
+
+	  unshare_all_rtl_in_chain (seq);
+
+	  /* Prevent the code right after "success:"
+	     from throwing away the changes.  */
+	  x = orig_x;
+
+	  emit_insn_before_setloc (seq, if_info->jump,
+				   INSN_LOCATION (if_info->insn_a));
+	  goto success;
+
+	}

        /* Avoid store speculation: given "if (...) x = a" where x is a
  	 MEM, we only want to do the store if x is always set
@@ -3696,7 +3822,7 @@  noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,

    /* Do the real work.  */

-  if (noce_process_if_block (&if_info))
+  if (noce_process_if_block (&if_info, then_edge))
      return TRUE;

    if (HAVE_conditional_move
@@ -5003,6 +5129,9 @@  if_convert (bool after_combine)
    basic_block bb;
    int pass;

+  /* Ensure that we start the scratchpads data fresh each time.  */
+  scratchpads.truncate (0);
+
    if (optimize == 1)
      {
        df_live_add_problem ();
diff --git a/gcc/target.def b/gcc/target.def
index d29aad5..dc007e5 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5915,6 +5915,11 @@  HOOK_VECTOR_END (mode_switching)
  #include "target-insns.def"
  #undef DEF_TARGET_INSN

+DEFHOOKPOD
+(RTL_ifcvt_when_to_use_scratchpads,
+"*",
+enum RTL_ifcvt_when_to_use_scratchpads, RTL_ifcvt_use_spads_as_per_profile)
+
  /* Close the 'struct gcc_target' definition.  */
  HOOK_VECTOR_END (C90_EMPTY_HACK)

diff --git a/gcc/target.h b/gcc/target.h
index a79f424..bbe4814 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -187,6 +187,12 @@  enum vect_cost_model_location {
  #define DEFHOOK_UNDOC DEFHOOK
  #define HOOKSTRUCT(FRAGMENT) FRAGMENT

+enum RTL_ifcvt_when_to_use_scratchpads {
+  RTL_ifcvt_never_use_scratchpads = 0,
+  RTL_ifcvt_always_use_scratchpads,
+  RTL_ifcvt_use_spads_as_per_profile
+};
+
  #include "target.def"

  extern struct gcc_target targetm;