diff mbox series

[6/6] Add heuristic to take into account void* pattern.

Message ID 07eda32b-a683-52e7-caa7-97620a74e049@theobroma-systems.com
State New
Headers show
Series Dead Field Elimination and Field Reorder | expand

Commit Message

Erick Ochoa Nov. 6, 2020, 2:31 p.m. UTC
From ccd82a7e484d9e4562c23f1b9cbebf3f47e2a822 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoa@theobroma-systems.com>
Date: Fri, 16 Oct 2020 08:49:08 +0200
Subject: [PATCH 6/6] Add heuristic to take into account void* pattern.

We add a heuristic in order to be able to transform functions which
receive void* arguments as a way to generalize over arguments. An
example of this is qsort. The heuristic works by first inspecting
leaves in the call graph. If the leaves only contain a reference
to a single RECORD_TYPE then we color the nodes in the call graph
as "casts are safe in this function and does not call external
visible functions". We propagate this property up the callgraph
until a fixed point is reached. This will later be changed to
use ipa-modref.

2020-11-04  Erick Ochoa  <erick.ochoa@theobroma-systems.com>

     * ipa-type-escape-analysis.c : Add new heuristic
     * ipa-field-reorder.c : Use heuristic
     * ipa-type-escape-analysis.h : Change signatures
---
  gcc/ipa-field-reorder.c        |   3 +-
  gcc/ipa-type-escape-analysis.c | 182 +++++++++++++++++++++++++++++++--
  gcc/ipa-type-escape-analysis.h |  72 ++++++++++++-
  3 files changed, 243 insertions(+), 14 deletions(-)


@@ -1154,7 +1215,7 @@ typedef std::map<tree, field_offsets_t> 
record_field_offset_map_t;

  // Partition types into escaping or non escaping sets.
  tpartitions_t
-partition_types_into_escaping_nonescaping ();
+partition_types_into_escaping_nonescaping (std::map<tree, bool>&);

  // Compute set of not escaping unaccessed fields
  record_field_offset_map_t
@@ -1163,5 +1224,6 @@ obtain_nonescaping_unaccessed_fields 
(tpartitions_t casting,
  				      int warning);

  extern bool detected_incompatible_syntax;
+std::map<tree, bool> get_whitelisted_nodes();

  #endif /* GCC_IPA_TYPE_ESCAPE_ANALYSIS_H */
diff mbox series

Patch

diff --git a/gcc/ipa-field-reorder.c b/gcc/ipa-field-reorder.c
index 9a28097b473..2f694cff7ea 100644
--- a/gcc/ipa-field-reorder.c
+++ b/gcc/ipa-field-reorder.c
@@ -588,8 +588,9 @@  lto_fr_execute ()
    log ("here in field reordering \n");
    // Analysis.
    detected_incompatible_syntax = false;
+  std::map<tree, bool> whitelisted = get_whitelisted_nodes();
    tpartitions_t escaping_nonescaping_sets
-    = partition_types_into_escaping_nonescaping ();
+    = partition_types_into_escaping_nonescaping (whitelisted);
    record_field_map_t record_field_map = find_fields_accessed ();
    record_field_offset_map_t record_field_offset_map
      = obtain_nonescaping_unaccessed_fields (escaping_nonescaping_sets,
diff --git a/gcc/ipa-type-escape-analysis.c b/gcc/ipa-type-escape-analysis.c
index 40dc89c51a2..2fc504ce6f5 100644
--- a/gcc/ipa-type-escape-analysis.c
+++ b/gcc/ipa-type-escape-analysis.c
@@ -104,6 +104,7 @@  along with GCC; see the file COPYING3.  If not see
  #include <map>
  #include <stack>
  #include <string>
+#include <queue>

  #include "config.h"
  #include "system.h"
@@ -249,6 +250,99 @@  lto_dfe_execute ()
    return 0;
  }

+/* Heuristic to determine if casting is allowed in a function.
+ * This heuristic attempts to allow casting in functions which follow the
+ * pattern where a struct pointer or array pointer is casted to void* or
+ * char*.  The heuristic works as follows:
+ *
+ * There is a simple per-function analysis that determines whether there
+ * is more than 1 type of struct referenced in the body of the method.
+ * If there is more than 1 type of struct referenced in the body,
+ * then the layout of the structures referenced within the body
+ * cannot be casted.  However, if there's only one type of struct 
referenced
+ * in the body of the function, casting is allowed in the function itself.
+ * The logic behind this is that the if the code follows good programming
+ * practices, the only way the memory should be accessed is via a singular
+ * type. There is also another requisite to this per-function analysis, and
+ * that is that the function can only call colored functions or functions
+ * which are available in the linking unit.
+ *
+ * Using this per-function analysis, we then start coloring leaf nodes 
in the
+ * call graph as ``safe'' or ``unsafe''.  The color is propagated to the
+ * callers of the functions until a fixed point is reached.
+ */
+std::map<tree, bool>
+get_whitelisted_nodes ()
+{
+  cgraph_node *node = NULL;
+  std::set<cgraph_node *> nodes;
+  std::set<cgraph_node *> leaf_nodes;
+  std::set<tree> leaf_nodes_decl;
+  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+  {
+    node->get_untransformed_body ();
+    nodes.insert(node);
+    if (node->callees) continue;
+
+    leaf_nodes.insert (node);
+    leaf_nodes_decl.insert (node->decl);
+  }
+
+  std::queue<cgraph_node *> worklist;
+  for (std::set<cgraph_node*>::iterator i = leaf_nodes.begin (),
+    e = leaf_nodes.end (); i != e; ++i)
+  {
+    if (dump_file) fprintf (dump_file, "is a leaf node %s\n", 
(*i)->name ());
+    worklist.push (*i);
+  }
+
+  for (std::set<cgraph_node*>::iterator i = nodes.begin (),
+    e = nodes.end (); i != e; ++i)
+  {
+    worklist.push (*i);
+  }
+
+  std::map<tree, bool> map;
+  while (!worklist.empty ())
+  {
+
+    if (detected_incompatible_syntax) return map;
+    cgraph_node *i = worklist.front ();
+    worklist.pop ();
+    if (dump_file) fprintf (dump_file, "analyzing %s %p\n", i->name (), 
(void*)i);
+    GimpleWhiteLister whitelister;
+    whitelister._walk_cnode (i);
+    bool no_external = whitelister.does_not_call_external_functions (i, 
map);
+    bool before_in_map = map.find (i->decl) != map.end ();
+    bool place_callers_in_worklist = !before_in_map;
+    if (!before_in_map)
+    {
+      map.insert(std::pair<tree, bool>(i->decl, no_external));
+    } else
+    {
+      map[i->decl] = no_external;
+    }
+    bool previous_value = map[i->decl];
+    place_callers_in_worklist |= previous_value != no_external;
+    if (previous_value != no_external)
+    {
+       // This ensures we are having a total order
+       // from no_external -> !no_external
+       gcc_assert (!previous_value);
+       gcc_assert (no_external);
+    }
+
+    for (cgraph_edge *e = i->callers; place_callers_in_worklist && e;
+      e = e->next_caller)
+    {
+      worklist.push (e->caller);
+    }
+  }
+
+  return map;
+
+}
+
  /*
   * Perform dead field elimination at link-time.
   * This transformation is composed of two main stages:
@@ -261,11 +355,14 @@  lto_dead_field_elimination ()
  {
    // Analysis.
    detected_incompatible_syntax = false;
+  std::map<tree, bool> whitelisted = get_whitelisted_nodes ();
    tpartitions_t escaping_nonescaping_sets
-    = partition_types_into_escaping_nonescaping ();
+    = partition_types_into_escaping_nonescaping (whitelisted);
+  if (detected_incompatible_syntax) return;
    record_field_map_t record_field_map = find_fields_accessed ();
+  if (detected_incompatible_syntax) return;
    record_field_offset_map_t record_field_offset_map
-    = obtain_nonescaping_unaccessed_fields (escaping_nonescaping_sets,
+    = obtain_nonescaping_unaccessed_fields (escaping_nonescaping_sets,
  					    record_field_map, OPT_Wdfa);
    if (detected_incompatible_syntax || record_field_offset_map.empty ())
      return;
@@ -301,11 +398,12 @@  
partition_types_into_record_reaching_or_non_record_reaching ()
   * which types are escaping AND are being casted.
   */
  tpartitions_t
-partition_types_into_escaping_nonescaping ()
+partition_types_into_escaping_nonescaping (std::map<tree, bool> 
&whitelisted)
  {
    tpartitions_t partitions
      = partition_types_into_record_reaching_or_non_record_reaching ();
-  GimpleCaster caster (partitions);
+  if (detected_incompatible_syntax) return partitions;
+  GimpleCaster caster (partitions, whitelisted);
    caster.walk ();

    partitions = caster.get_sets ();
@@ -1186,6 +1284,7 @@  GimpleWalker::walk ()

        if (dump_file)
  	dump_function_to_file (node->decl, dump_file, TDF_NONE);
+
        _walk_cnode (node);
        fndecls.insert (decl);
      }
@@ -1244,6 +1343,7 @@  void
  GimpleWalker::_walk_cnode (cgraph_node *cnode)
  {
    gcc_assert (cnode);
+  currently_walking = cnode;
    _walk_decl (cnode);
    _walk_locals (cnode);
    _walk_ssa_names (cnode);
@@ -1526,7 +1626,6 @@  GimpleWalker::_walk_gphi (__attribute__((unused)) 
gphi *g)
  {
  }

-
  void
  TypeCollector::collect (tree t)
  {
@@ -1741,6 +1840,7 @@  TypeCollector::_walk_RECORD_TYPE_post (tree t)
        i->second = true;
      }
    _collect_simple (t);
+
  }

  void
@@ -1788,9 +1888,30 @@  TypeCollector::_walk_METHOD_TYPE_pre (tree t)
  inline void
  ExprCollector::_walk_pre (tree e)
  {
+  if (!e) return;
    tree t = TREE_TYPE (e);
    gcc_assert (t);
    typeCollector.collect (t);
+
+  if (RECORD_TYPE != TREE_CODE (t)) return;
+
+  if (typeCollector.ptrset.records.empty ()) {
+    typeCollector.ptrset.records.insert (TYPE_MAIN_VARIANT (t));
+    return;
+  }
+
+  for (std::set<tree>::iterator
+	i = typeCollector.ptrset.records.begin (),
+	e = typeCollector.ptrset.records.end (); i != e; ++i)
+  {
+    tree r = *i;
+    TypeIncompleteEquality structuralEquality;
+    bool is_same = structuralEquality.equal (TYPE_MAIN_VARIANT (r), 
TYPE_MAIN_VARIANT (t));
+    if (is_same) continue;
+
+    TypeStringifier stringifier;
+    typeCollector.ptrset.records.insert (TYPE_MAIN_VARIANT (t));
+  }
  }

  /*
@@ -1804,6 +1925,7 @@  ExprCollector::_walk_pre (tree e)
  void
  GimpleTypeCollector::_walk_pre_tree (tree t)
  {
+  if (!t) return;
    exprCollector.walk (t);
  }

@@ -1880,6 +2002,17 @@  GimpleTypeCollector::_walk_pre_gcall (gcall *s)
    exprCollector.walk (lhs);
  }

+void
+GimpleTypeCollector::_walk_pre_gdebug (gdebug *s)
+{
+  if (!gimple_debug_bind_p(s)) return;
+  tree var = gimple_debug_bind_get_var(s);
+  if (var) {
+	  gcc_assert(TREE_TYPE(var));
+	  exprCollector.walk(var);
+  }
+}
+
  // Print working set.
  void
  GimpleTypeCollector::print_collected ()
@@ -2447,16 +2580,23 @@  GimpleCaster::_walk_pre_gassign (gassign *s)
    tree t_lhs = TREE_TYPE (lhs);
    tree t_rhs = TREE_TYPE (rhs);
    gcc_assert (t_lhs && t_rhs);
-  bool is_cast = !equality.equal (t_lhs, t_rhs);
+  bool is_cast = !equality.equal (TYPE_MAIN_VARIANT(t_lhs), 
TYPE_MAIN_VARIANT(t_rhs));
    // If it is cast, we might need to look at the definition of rhs
    // If the definition comes from a known function... then we are good...
    bool is_ssa = TREE_CODE (rhs) == SSA_NAME;
-  is_cast = is_ssa ? follow_def_to_find_if_really_cast (rhs) : is_cast;
+  is_cast = is_cast && is_ssa ? follow_def_to_find_if_really_cast (rhs) 
: is_cast;
+  // we allow casts to pointers...
+  is_cast = is_cast && !(t_lhs == ptr_type_node);
+

    reason.type_is_casted = is_cast;
+  bool in_map = _whitelisted.find (currently_walking->decl) != 
_whitelisted.end ();
+  bool whitelisted = in_map && _whitelisted[currently_walking->decl];
+  if (whitelisted) goto escaper_label;
    exprEscaper.typeEscaper.update (TREE_TYPE (lhs), reason);
    exprEscaper.typeEscaper.update (TREE_TYPE (rhs), reason);

+escaper_label:
    GimpleEscaper::_walk_pre_gassign (s);
  }

@@ -2479,9 +2619,19 @@  GimpleCaster::_walk_pre_gcall (gcall *s)
      return;

    tree f_t = TREE_TYPE (fn);
+  if (_whitelisted.find(fn) != _whitelisted.end() && _whitelisted[fn]) 
return;
+  bool in_map = _whitelisted.find(currently_walking->decl) != 
_whitelisted.end();
+  bool whitelisted = in_map && _whitelisted[currently_walking->decl];
+  if (whitelisted) return;
+
+  if (!currently_walking->callers) return;
+
+  if (!node && currently_walking->inlined_to) return;
+
    TypeIncompleteEquality equality;

    unsigned i = 0;
+  TypeStringifier stringifier;
    for (tree a = TYPE_ARG_TYPES (f_t); NULL_TREE != a; a = TREE_CHAIN (a))
      {
        tree formal_t = TREE_VALUE (a);
@@ -2494,6 +2644,7 @@  GimpleCaster::_walk_pre_gcall (gcall *s)
        tree real = gimple_call_arg (s, i);
        tree real_t = TREE_TYPE (real);
        const bool is_casted = !equality.equal (formal_t, real_t);
+      log("arg %s is casted: %s\n", 
stringifier.stringify(real_t).c_str(), is_casted ? "T" : "F");
        Reason arg_reason;
        arg_reason.type_is_casted = is_casted;
        exprEscaper.typeEscaper.update (TREE_TYPE (real), arg_reason);
@@ -3251,7 +3402,12 @@  TypeStructuralEquality::_equal_code (tree l, tree r)
    const enum tree_code code_l = TREE_CODE (l);
    const enum tree_code code_r = TREE_CODE (r);
    const bool equal = code_l == code_r;
-  return equal;
+  bool array_or_ptr_l = TREE_CODE (l) == ARRAY_TYPE
+    || TREE_CODE (l) == POINTER_TYPE;
+  bool array_or_ptr_r = TREE_CODE (r) == ARRAY_TYPE
+    || TREE_CODE (r) == POINTER_TYPE;
+  bool array_or_ptr = array_or_ptr_l && array_or_ptr_r;
+  return equal || array_or_ptr;
  }

  #define TSE_FUNC_DEF_SIMPLE(code) 					  \
@@ -3273,7 +3429,17 @@  bool
  TypeStructuralEquality::_equal_wrapper (tree l, tree r)
  {
    tree inner_l = TREE_TYPE (l);
+  if (TREE_CODE (inner_l) == ARRAY_TYPE
+    && TREE_CODE (TREE_TYPE (inner_l)) == POINTER_TYPE )
+  {
+    inner_l = TREE_TYPE(inner_l);
+  }
    tree inner_r = TREE_TYPE (r);
+  if (TREE_CODE (inner_r) == ARRAY_TYPE
+    && TREE_CODE(TREE_TYPE (inner_r)) == POINTER_TYPE )
+  {
+    inner_r = TREE_TYPE(inner_r);
+  }
    return _equal (inner_l, inner_r);
  }

diff --git a/gcc/ipa-type-escape-analysis.h b/gcc/ipa-type-escape-analysis.h
index f29127b7f03..35f96ff19d5 100644
--- a/gcc/ipa-type-escape-analysis.h
+++ b/gcc/ipa-type-escape-analysis.h
@@ -313,6 +313,8 @@  protected:
     */
    bool _deleted;

+  cgraph_node *currently_walking;
+
    /* Walk global variables.  */
    void _walk_globals ();

@@ -320,7 +322,7 @@  protected:
    virtual void _walk_global (varpool_node *v);

    /* Will walk declarations, locals, ssa names, and basic blocks.  */
-  void _walk_cnode (cgraph_node *cnode);
+  virtual void _walk_cnode (cgraph_node *cnode);

    /* This will walk the CNODE->decl.  */
    void _walk_decl (cgraph_node *cnode);
@@ -436,6 +438,9 @@  struct type_partitions_s
    /* The set of all non escaping types.  */
    tset_t non_escaping;

+  /* The set of all records. */
+  tset_t records;
+
    /* Determine if we have seen this type before.  */
    bool in_universe (tree) const;

@@ -491,8 +496,11 @@  private:
     * from PTR.
     * This datastructure persists across calls to collect.
     */
+public:
    tpartitions_t ptrset;

+private:
+
    /* Sanity check determines that the partitions are mutually
     * exclusive.
     */
@@ -645,9 +653,9 @@  public:
      return typeCollector.get_record_reaching_trees ();
    }

-private:
    TypeCollector typeCollector;

+private:
    /* Catch all callback for all nested expressions E.  */
    virtual void _walk_pre (tree e);
  };
@@ -673,9 +681,10 @@  public:
    // TODO: I believe this could be made const.
    void print_collected ();

-private:
    ExprCollector exprCollector;

+private:
+
    /* Call back for global variables.  */
    virtual void _walk_pre_tree (tree);

@@ -690,6 +699,55 @@  private:

    /* Call back for gcall.  */
    virtual void _walk_pre_gcall (gcall *s);
+
+  /* Call back for gdebug. */
+  virtual void _walk_pre_gdebug (gdebug *s);
+
+};
+
+class GimpleWhiteLister : GimpleTypeCollector
+{
+public:
+  GimpleWhiteLister ()
+  {};
+
+  bool _no_external = true;
+  bool does_not_call_external_functions (cgraph_node *c,
+	std::map<tree, bool> &whitelisted)
+  {
+    gcc_assert(c);
+
+    for (cgraph_edge *edge = c->callees; edge; edge = edge->next_callee)
+    {
+       cgraph_node *callee = edge->callee;
+       if (callee == c) continue;
+       bool in_map = whitelisted.find (callee->decl) != whitelisted.end();
+       if (!in_map) {
+	       return false;
+       }
+       bool w = whitelisted[callee->decl];
+       if (!w) {
+	       return false;
+       }
+    }
+
+    unsigned int how_many_records =
+	    exprCollector.typeCollector.ptrset.records.size ();
+    return how_many_records <= 1;
+
+  }
+
+  /* Will walk declarations, locals, ssa names, and basic blocks.  */
+  virtual void _walk_cnode (cgraph_node *cnode)
+  {
+    GimpleTypeCollector::_walk_cnode(cnode);
+  }
+
+private:
+  virtual void _walk_pre_gcall (__attribute__((unused))gcall *s)
+  {
+    this->_no_external = false;
+  }
  };

  // Reason for why a type is escaping
@@ -1001,10 +1059,13 @@  protected:
  class GimpleCaster : public GimpleEscaper
  {
  public:
-  GimpleCaster (tpartitions_t &types) : GimpleEscaper (types)
+  GimpleCaster (tpartitions_t &types, std::map<tree, bool> 
&whitelisted) : GimpleEscaper (types), _whitelisted(whitelisted)
  {};

  private:
+
+  std::map<tree, bool> &_whitelisted;
+
    /* Determine if cast comes from a known function.  */
    static bool follow_def_to_find_if_really_cast (tree);