diff mbox series

Extend fnsummary to predict SRA oppurtunities

Message ID ZI9phUDoOu81ivtb@kam.mff.cuni.cz
State New
Headers show
Series Extend fnsummary to predict SRA oppurtunities | expand

Commit Message

Jan Hubicka June 18, 2023, 8:31 p.m. UTC
Hi,
this patch extends ipa-fnsummary to anticipate statements that will be removed
by SRA.  This is done by looking for calls passing addresses of automatic
variables.  In function body we look for dereferences from pointers of such
variables and mark them with new not_sra_candidate condition.

This is just first step which is overly optimistic.  We do not try to prove that
given automatic variable will not be SRAed even after inlining.  We now also
optimistically assume that the transformation will always happen.  I will restrict
this in a followup patch, but I think it is useful to gether some data on how
much code is affected by this.

This is motivated by PR109849 where we fail to fully inline push_back.
The patch alone does not solve the problem even for -O3, but improves
analysis in this case.

Bootstrapped/regtested x86_64-linux, commited.

gcc/ChangeLog:

	PR tree-optimization/109849
	* ipa-fnsummary.cc (evaluate_conditions_for_known_args): Add new parameter
	ES; handle ipa_predicate::not_sra_candidate.
	(evaluate_properties_for_edge): Pass es to
	evaluate_conditions_for_known_args.
	(ipa_fn_summary_t::duplicate): Handle sra candidates.
	(dump_ipa_call_summary): Dump points_to_possible_sra_candidate.
	(load_or_store_of_ptr_parameter): New function.
	(points_to_possible_sra_candidate_p): New function.
	(analyze_function_body): Initialize points_to_possible_sra_candidate;
	determine sra predicates.
	(estimate_ipcp_clone_size_and_time): Update call of
	evaluate_conditions_for_known_args.
	(remap_edge_params): Update points_to_possible_sra_candidate.
	(read_ipa_call_summary): Stream points_to_possible_sra_candidate
	(write_ipa_call_summary): Likewise.
	* ipa-predicate.cc (ipa_predicate::add_clause): Handle not_sra_candidate.
	(dump_condition): Dump it.
	* ipa-predicate.h (struct inline_param_summary): Add
	points_to_possible_sra_candidate.

gcc/testsuite/ChangeLog:

	PR tree-optimization/109849
	* g++.dg/ipa/devirt-45.C: Update template.

Comments

Jan Hubicka June 18, 2023, 8:41 p.m. UTC | #1
Hi,
as noticed by Jeff, this patch also triggers warning in one of LTO
testcases.  The testcase is reduced and warning seems legit, triggered
by extra inlining.  So I have just silenced it.

Honza

gcc/testsuite/ChangeLog:

	* gcc.dg/lto/20091013-1_0.c: Disable stringop-overread warning.

diff --git a/gcc/testsuite/gcc.dg/lto/20091013-1_0.c b/gcc/testsuite/gcc.dg/lto/20091013-1_0.c
index afceb2436cd..7737e252b99 100644
--- a/gcc/testsuite/gcc.dg/lto/20091013-1_0.c
+++ b/gcc/testsuite/gcc.dg/lto/20091013-1_0.c
@@ -2,7 +2,7 @@
 /* { dg-require-effective-target fpic } */
 /* { dg-require-effective-target ptr_eq_long } */
 /* { dg-lto-options {{-fPIC -r -nostdlib -flto} {-fPIC -r -nostdlib -O2 -flto}} } */
-/* { dg-extra-ld-options "-flinker-output=nolto-rel" } */
+/* { dg-extra-ld-options "-flinker-output=nolto-rel -Wno-stringop-overread" } */
 
 void * HeapAlloc(void*,unsigned int,unsigned long);
diff mbox series

Patch

diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc
index a301396fb3f..a5f5a50c8a5 100644
--- a/gcc/ipa-fnsummary.cc
+++ b/gcc/ipa-fnsummary.cc
@@ -371,7 +371,8 @@  evaluate_conditions_for_known_args (struct cgraph_node *node,
 				    bool inline_p,
 				    ipa_auto_call_arg_values *avals,
 				    clause_t *ret_clause,
-				    clause_t *ret_nonspec_clause)
+				    clause_t *ret_nonspec_clause,
+				    ipa_call_summary *es)
 {
   clause_t clause = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
   clause_t nonspec_clause = 1 << ipa_predicate::not_inlined_condition;
@@ -386,6 +387,17 @@  evaluate_conditions_for_known_args (struct cgraph_node *node,
       int j;
       struct expr_eval_op *op;
 
+      if (c->code == ipa_predicate::not_sra_candidate)
+	{
+	  if (!inline_p
+	      || !es
+	      || (int)es->param.length () <= c->operand_num
+	      || !es->param[c->operand_num].points_to_possible_sra_candidate)
+	    clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
+	  nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
+	  continue;
+	}
+
       if (c->agg_contents)
 	{
 	  if (c->code == ipa_predicate::changed
@@ -592,6 +604,7 @@  evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
   class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
   class ipa_edge_args *args;
+  class ipa_call_summary *es = NULL;
 
   if (clause_ptr)
     *clause_ptr = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
@@ -603,8 +616,8 @@  evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
     {
       struct cgraph_node *caller;
       class ipa_node_params *caller_parms_info, *callee_pi = NULL;
-      class ipa_call_summary *es = ipa_call_summaries->get (e);
       int i, count = ipa_get_cs_argument_count (args);
+      es = ipa_call_summaries->get (e);
 
       if (count)
 	{
@@ -720,7 +733,7 @@  evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
     }
 
   evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
-				      nonspec_clause_ptr);
+				      nonspec_clause_ptr, es);
 }
 
 
@@ -847,6 +860,7 @@  ipa_fn_summary_t::duplicate (cgraph_node *src,
 					  &possible_truths,
 					  /* We are going to specialize,
 					     so ignore nonspec truths.  */
+					  NULL,
 					  NULL);
 
       info->account_size_time (0, 0, true_pred, true_pred);
@@ -1035,6 +1049,9 @@  dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
 	    if (es->param[i].points_to_local_or_readonly_memory)
 	      fprintf (f, "%*s op%i points to local or readonly memory\n",
 		       indent + 2, "", i);
+	    if (es->param[i].points_to_possible_sra_candidate)
+	      fprintf (f, "%*s op%i points to possible sra candidate\n",
+		       indent + 2, "", i);
 	  }
       if (!edge->inline_failed)
 	{
@@ -1291,6 +1308,35 @@  unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
 				 size_p, &aggpos->by_ref);
 }
 
+/* If stmt is simple load or store of value pointed to by a function parmaeter,
+   return its index.  */
+
+static int
+load_or_store_of_ptr_parameter (ipa_func_body_info *fbi, gimple *stmt)
+{
+  if (!optimize)
+    return -1;
+  gassign *assign = dyn_cast <gassign *> (stmt);
+  if (!assign)
+    return -1;
+  tree param;
+  if (gimple_assign_load_p (stmt))
+    param = gimple_assign_rhs1 (stmt);
+  else if (gimple_store_p (stmt))
+    param = gimple_assign_lhs (stmt);
+  else
+    return -1;
+  tree base = get_base_address (param);
+  if (TREE_CODE (base) != MEM_REF
+      || TREE_CODE (TREE_OPERAND (base, 0)) != SSA_NAME
+      || !SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base, 0)))
+    return -1;
+  tree p = SSA_NAME_VAR (TREE_OPERAND (base, 0));
+  if (TREE_CODE (p) != PARM_DECL)
+    return -1;
+  return ipa_get_param_decl_index (fbi->info, p);
+}
+
 /* See if statement might disappear after inlining.
    0 - means not eliminated
    1 - half of statements goes away
@@ -2585,6 +2631,23 @@  points_to_local_or_readonly_memory_p (tree t)
   return false;
 }
 
+/* Return true if T is a pointer pointing to memory location that is possible
+   sra candidate if all functions it is passed to are inlined.  */
+
+static bool
+points_to_possible_sra_candidate_p (tree t)
+{
+  if (TREE_CODE (t) != ADDR_EXPR)
+    return false;
+
+  t = get_base_address (TREE_OPERAND (t, 0));
+
+  /* Automatic variables are fine.  */
+  if (DECL_P (t)
+      && auto_var_in_fn_p (t, current_function_decl))
+    return true;
+  return false;
+}
 
 /* Analyze function body for NODE.
    EARLY indicates run from early optimization pipeline.  */
@@ -2797,6 +2860,9 @@  analyze_function_body (struct cgraph_node *node, bool early)
 		      es->param[i].points_to_local_or_readonly_memory
 			 = points_to_local_or_readonly_memory_p
 			     (gimple_call_arg (stmt, i));
+		      es->param[i].points_to_possible_sra_candidate
+			 = points_to_possible_sra_candidate_p
+			     (gimple_call_arg (stmt, i));
 		    }
 		}
 	      /* We cannot setup VLA parameters during inlining.  */
@@ -2856,6 +2921,12 @@  analyze_function_body (struct cgraph_node *node, bool early)
 		fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
 
 	      ipa_predicate p = bb_predicate & will_be_nonconstant;
+	      int parm = load_or_store_of_ptr_parameter (&fbi, stmt);
+	      ipa_predicate sra_predicate = true;
+	      if (parm != -1)
+		sra_predicate &= add_condition (info, params_summary, parm,
+						ptr_type_node, NULL,
+						ipa_predicate::not_sra_candidate, NULL, 0);
 
 	      /* We can ignore statement when we proved it is never going
 		 to happen, but we cannot do that for call statements
@@ -2875,7 +2946,7 @@  analyze_function_body (struct cgraph_node *node, bool early)
 		  if (prob)
 		    {
 		      ipa_predicate ip
-			= bb_predicate & ipa_predicate::not_inlined ();
+			= bb_predicate & ipa_predicate::not_inlined () & sra_predicate;
 		      info->account_size_time (this_size * prob,
 					       (final_time * prob) / 2, ip,
 					       p);
@@ -2883,7 +2954,7 @@  analyze_function_body (struct cgraph_node *node, bool early)
 		  if (prob != 2)
 		    info->account_size_time (this_size * (2 - prob),
 					     (final_time * (2 - prob) / 2),
-					     bb_predicate,
+					     bb_predicate & sra_predicate,
 					     p);
 		}
 
@@ -3930,7 +4001,7 @@  estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
   clause_t clause, nonspec_clause;
 
   evaluate_conditions_for_known_args (node, false, avals, &clause,
-				      &nonspec_clause);
+				      &nonspec_clause, NULL);
   ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
   ctx.estimate_size_and_time (estimates);
 }
@@ -4024,6 +4095,9 @@  remap_edge_params (struct cgraph_edge *inlined_edge,
 		  if (inlined_es
 			->param[id].points_to_local_or_readonly_memory)
 		    es->param[i].points_to_local_or_readonly_memory = true;
+		  if (inlined_es
+			->param[id].points_to_possible_sra_candidate)
+		    es->param[i].points_to_possible_sra_candidate = true;
 		}
 	      if (!es->param[i].points_to_local_or_readonly_memory
 		  && jfunc->type == IPA_JF_CONST
@@ -4420,8 +4494,11 @@  read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
       for (i = 0; i < length; i++)
 	{
 	  es->param[i].change_prob = streamer_read_uhwi (ib);
+	  bitpack_d bp = streamer_read_bitpack (ib);
 	  es->param[i].points_to_local_or_readonly_memory
-	    = streamer_read_uhwi (ib);
+	    = bp_unpack_value (&bp, 1);	
+	  es->param[i].points_to_possible_sra_candidate
+	    = bp_unpack_value (&bp, 1);	
 	}
     }
   else
@@ -4692,7 +4769,10 @@  write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
   for (i = 0; i < (int) es->param.length (); i++)
     {
       streamer_write_uhwi (ob, es->param[i].change_prob);
-      streamer_write_uhwi (ob, es->param[i].points_to_local_or_readonly_memory);
+      bp = bitpack_create (ob->main_stream);
+      bp_pack_value (&bp, es->param[i].points_to_local_or_readonly_memory, 1);
+      bp_pack_value (&bp, es->param[i].points_to_possible_sra_candidate, 1);
+      streamer_write_bitpack (&bp);
     }
 }
 
diff --git a/gcc/ipa-predicate.cc b/gcc/ipa-predicate.cc
index 29478692b17..09a253abc93 100644
--- a/gcc/ipa-predicate.cc
+++ b/gcc/ipa-predicate.cc
@@ -132,7 +132,7 @@  ipa_predicate::add_clause (conditions conditions, clause_t new_clause)
 	cc1 = &(*conditions)[c1 - ipa_predicate::first_dynamic_condition];
 	/* We have no way to represent !changed and !is_not_constant
 	   and thus there is no point for looking for them.  */
-	if (cc1->code == changed || cc1->code == is_not_constant)
+	if (cc1->code == changed || cc1->code == is_not_constant || cc1->code == not_sra_candidate)
 	  continue;
 	for (c2 = c1 + 1; c2 < num_conditions; c2++)
 	  if (new_clause & (1 << c2))
@@ -142,6 +142,7 @@  ipa_predicate::add_clause (conditions conditions, clause_t new_clause)
 	      if (cc1->operand_num == cc2->operand_num
 		  && vrp_operand_equal_p (cc1->val, cc2->val)
 		  && cc2->code != is_not_constant
+		  && cc2->code != not_sra_candidate
 		  && cc2->code != changed
 		  && expr_eval_ops_equal_p (cc1->param_ops, cc2->param_ops)
 		  && cc2->agg_contents == cc1->agg_contents
@@ -416,6 +417,11 @@  dump_condition (FILE *f, conditions conditions, int cond)
 	  fprintf (f, " changed");
 	  return;
 	}
+      if (c->code == ipa_predicate::not_sra_candidate)
+	{
+	  fprintf (f, " not sra candidate");
+	  return;
+	}
       fprintf (f, " %s ", op_symbol_code (c->code));
       print_generic_expr (f, c->val);
     }
diff --git a/gcc/ipa-predicate.h b/gcc/ipa-predicate.h
index 2882bf8e6f0..5abf253d096 100644
--- a/gcc/ipa-predicate.h
+++ b/gcc/ipa-predicate.h
@@ -78,16 +78,20 @@  struct inline_param_summary
      Value 0 is reserved for compile time invariants. */
   short change_prob;
   unsigned points_to_local_or_readonly_memory : 1;
+  unsigned points_to_possible_sra_candidate : 1;
   bool equal_to (const inline_param_summary &other) const
   {
     return change_prob == other.change_prob
 	   && points_to_local_or_readonly_memory
-	      == other.points_to_local_or_readonly_memory;
+	      == other.points_to_local_or_readonly_memory
+	   && points_to_possible_sra_candidate
+	      == other.points_to_possible_sra_candidate;
   }
   bool useless_p (void) const
   {
     return change_prob == REG_BR_PROB_BASE
-	   && !points_to_local_or_readonly_memory;
+	   && !points_to_local_or_readonly_memory
+	   && !points_to_possible_sra_candidate;
   }
 };
 
@@ -135,6 +139,9 @@  public:
      percentage of executions even when they are not compile time constants.  */
   static const tree_code changed = IDENTIFIER_NODE;
 
+  /* Special condition code we use to check that the parameter is likely SRA
+     candidate.  */
+  static const tree_code not_sra_candidate = TREE_LIST;
 
 
   /* Initialize predicate either to true of false depending on P.  */
diff --git a/gcc/testsuite/g++.dg/ipa/devirt-45.C b/gcc/testsuite/g++.dg/ipa/devirt-45.C
index ce415e7c003..c26be21964c 100644
--- a/gcc/testsuite/g++.dg/ipa/devirt-45.C
+++ b/gcc/testsuite/g++.dg/ipa/devirt-45.C
@@ -37,5 +37,5 @@  int main()
 }
 
 /* One invocation is A::foo () other is B::foo () even though the type is destroyed and rebuilt in test() */
-/* { dg-final { scan-ipa-dump-times "Discovered a virtual call to a known target\[^\\n\]*A::foo" 1 "inline"  } } */
+/* { dg-final { scan-ipa-dump-times "Discovered a virtual call to a known target\[^\\n\]*A::foo" 2 "inline"  } } */
 /* { dg-final { scan-ipa-dump-times "Discovered a virtual call to a known target\[^\\n\]*B::foo" 1 "inline"  } } */