Message ID | 1573867416-55618-29-git-send-email-dmalcolm@redhat.com |
---|---|
State | New |
Headers | show |
Series | RFC: Add a static analysis framework to GCC | expand |
On 11/15/19, David Malcolm <dmalcolm@redhat.com> wrote: > gcc/ChangeLog: > * analyzer/analyzer.cc: New file. > * analyzer/analyzer.h: New file. > --- > gcc/analyzer/analyzer.cc | 125 > ++++++++++++++++++++++++++++++++++++++++++++++ > gcc/analyzer/analyzer.h | 126 > +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 251 insertions(+) > create mode 100644 gcc/analyzer/analyzer.cc > create mode 100644 gcc/analyzer/analyzer.h > > diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc > new file mode 100644 > index 0000000..399925c > --- /dev/null > +++ b/gcc/analyzer/analyzer.cc > @@ -0,0 +1,125 @@ > +/* Utility functions for the analyzer. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalcolm@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#include "config.h" > +#include "gcc-plugin.h" > +#include "system.h" > +#include "coretypes.h" > +#include "tree.h" > +#include "gimple.h" > +#include "diagnostic.h" > +#include "intl.h" > +#include "analyzer/analyzer.h" > + > +/* Helper function for checkers. Is the CALL to the given function name? > */ > + > +bool > +is_named_call_p (const gcall *call, const char *funcname) > +{ > + gcc_assert (funcname); > + > + tree fndecl = gimple_call_fndecl (call); > + if (!fndecl) > + return false; > + > + return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname); > +} > + > +/* Helper function for checkers. Is the CALL to the given function name, > + and with the given number of arguments? */ > + > +bool > +is_named_call_p (const gcall *call, const char *funcname, > + unsigned int num_args) > +{ > + gcc_assert (funcname); > + > + if (!is_named_call_p (call, funcname)) > + return false; > + > + if (gimple_call_num_args (call) != num_args) > + return false; > + > + return true; > +} > + > +/* Return true if stmt is a setjmp call. */ > + > +bool > +is_setjmp_call_p (const gimple *stmt) > +{ > + /* TODO: is there a less hacky way to check for "setjmp"? */ > + if (const gcall *call = dyn_cast <const gcall *> (stmt)) > + if (is_named_call_p (call, "_setjmp", 1)) > + return true; > + > + return false; > +} > + > +/* Return true if stmt is a longjmp call. */ > + > +bool > +is_longjmp_call_p (const gcall *call) > +{ > + /* TODO: is there a less hacky way to check for "longjmp"? */ > + if (is_named_call_p (call, "longjmp", 2)) > + return true; > + > + return false; > +} > + > +/* Generate a label_text instance by formatting FMT, using a > + temporary clone of the global_dc's printer (thus using its > + formatting callbacks). > + > + Colorize if the global_dc supports colorization and CAN_COLORIZE is > + true. */ > + > +label_text > +make_label_text (bool can_colorize, const char *fmt, ...) > +{ > + pretty_printer *pp = global_dc->printer->clone (); > + pp_clear_output_area (pp); > + > + if (!can_colorize) > + pp_show_color (pp) = false; > + > + text_info ti; > + rich_location rich_loc (line_table, UNKNOWN_LOCATION); > + > + va_list ap; > + > + va_start (ap, fmt); > + > + ti.format_spec = _(fmt); > + ti.args_ptr = ≈ > + ti.err_no = 0; > + ti.x_data = NULL; > + ti.m_richloc = &rich_loc; > + > + pp_format (pp, &ti); > + pp_output_formatted_text (pp); > + > + va_end (ap); > + > + label_text result = label_text::take (xstrdup (pp_formatted_text (pp))); > + delete pp; > + return result; > +} > diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h > new file mode 100644 > index 0000000..ace8924 > --- /dev/null > +++ b/gcc/analyzer/analyzer.h > @@ -0,0 +1,126 @@ > +/* Utility functions for the analyzer. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalcolm@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#ifndef GCC_ANALYZER_ANALYZER_H > +#define GCC_ANALYZER_ANALYZER_H > + > +/* Forward decls of common types, with indentation to show inheritance. */ I'm wondering about the "with indentation to show inheritance" part... does that require tweaking any editor configuration files or adding /*INDENT-OFF*/ comments or anything to prevent automatic formatting tools from "fixing" the indentation to go back to the normal style of having everything be aligned? > + > +class graphviz_out; > +class supergraph; > +class supernode; > +class superedge; > + class cfg_superedge; > + class switch_cfg_superedge; > + class callgraph_superedge; > + class call_superedge; > + class return_superedge; > +class svalue; > + class region_svalue; > + class constant_svalue; > + class poisoned_svalue; > + class unknown_svalue; > + class setjmp_svalue; > +class region; > + class map_region; > + class symbolic_region; > +class region_model; > +class region_model_context; > + class impl_region_model_context; > +class constraint_manager; > +class equiv_class; > +struct model_merger; > +struct svalue_id_merger_mapping; > +struct canonicalization; > +class pending_diagnostic; > +class checker_path; > +class extrinsic_state; > +class sm_state_map; > +class stmt_finder; > +class program_point; > +class program_state; > +class exploded_graph; > +class exploded_node; > +class exploded_edge; > +class exploded_cluster; > +class exploded_path; > +class analysis_plan; > +class state_purge_map; > +class state_purge_per_ssa_name; > +class state_change; > + > +//////////////////////////////////////////////////////////////////////////// > + > +extern bool is_named_call_p (const gcall *call, const char *funcname); > +extern bool is_named_call_p (const gcall *call, const char *funcname, > + unsigned int num_args); > +extern bool is_setjmp_call_p (const gimple *stmt); > +extern bool is_longjmp_call_p (const gcall *call); > + > +extern void register_analyzer_pass (); > + > +extern label_text make_label_text (bool can_colorize, const char *fmt, > ...); > + > +//////////////////////////////////////////////////////////////////////////// > + > +/* An RAII-style class for pushing/popping cfun within a scope. > + Doing so ensures we get "In function " announcements > + from the diagnostics subsystem. */ > + > +class auto_cfun > +{ > +public: > + auto_cfun (function *fun) { push_cfun (fun); } > + ~auto_cfun () { pop_cfun (); } > +}; > + > +//////////////////////////////////////////////////////////////////////////// > + > +/* Begin suppressing -Wformat and -Wformat-extra-args. */ > + > +#define PUSH_IGNORE_WFORMAT \ > + _Pragma("GCC diagnostic push") \ > + _Pragma("GCC diagnostic ignored \"-Wformat\"") \ > + _Pragma("GCC diagnostic ignored \"-Wformat-extra-args\"") > + > +/* Finish suppressing -Wformat and -Wformat-extra-args. */ > + > +#define POP_IGNORE_WFORMAT \ > + _Pragma("GCC diagnostic pop") > + > +//////////////////////////////////////////////////////////////////////////// > + > +/* A template for creating hash traits for a POD type. */ > + > +template <typename Type> > +struct pod_hash_traits : typed_noop_remove<Type> > +{ > + typedef Type value_type; > + typedef Type compare_type; > + static inline hashval_t hash (value_type); > + static inline bool equal (const value_type &existing, > + const value_type &candidate); > + static inline void mark_deleted (Type &); > + static inline void mark_empty (Type &); > + static inline bool is_deleted (Type); > + static inline bool is_empty (Type); > +}; > + > +#endif /* GCC_ANALYZER_ANALYZER_H */ > -- > 1.8.5.3 > >
On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote: > gcc/ChangeLog: > * analyzer/analyzer.cc: New file. > * analyzer/analyzer.h: New file. > --- > gcc/analyzer/analyzer.cc | 125 > ++++++++++++++++++++++++++++++++++++++++++++++ > gcc/analyzer/analyzer.h | 126 > +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 251 insertions(+) > create mode 100644 gcc/analyzer/analyzer.cc > create mode 100644 gcc/analyzer/analyzer.h > > diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc > new file mode 100644 > index 0000000..399925c > --- /dev/null > +++ b/gcc/analyzer/analyzer.cc > @@ -0,0 +1,125 @@ > +/* Utility functions for the analyzer. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalcolm@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>;. */ > + > +#include "config.h" > +#include "gcc-plugin.h" > +#include "system.h" > +#include "coretypes.h" > +#include "tree.h" > +#include "gimple.h" > +#include "diagnostic.h" > +#include "intl.h" > +#include "analyzer/analyzer.h" > + > +/* Helper function for checkers. Is the CALL to the given function > name? */ > + > +bool > +is_named_call_p (const gcall *call, const char *funcname) > +{ > + gcc_assert (funcname); > + > + tree fndecl = gimple_call_fndecl (call); > + if (!fndecl) > + return false; > + > + return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), > funcname); > +} > + > +/* Helper function for checkers. Is the CALL to the given function > name, > + and with the given number of arguments? */ > + > +bool > +is_named_call_p (const gcall *call, const char *funcname, > + unsigned int num_args) > +{ > + gcc_assert (funcname); > + > + if (!is_named_call_p (call, funcname)) > + return false; > + > + if (gimple_call_num_args (call) != num_args) > + return false; > + > + return true; These seem generic enough to live elsewhere. > +} > + > +/* Return true if stmt is a setjmp call. */ > + > +bool > +is_setjmp_call_p (const gimple *stmt) > +{ > + /* TODO: is there a less hacky way to check for "setjmp"? */ > + if (const gcall *call = dyn_cast <const gcall *> (stmt)) > + if (is_named_call_p (call, "_setjmp", 1)) > + return true; > + > + return false; > +} > + > +/* Return true if stmt is a longjmp call. */ > + > +bool > +is_longjmp_call_p (const gcall *call) > +{ > + /* TODO: is there a less hacky way to check for "longjmp"? */ > + if (is_named_call_p (call, "longjmp", 2)) > + return true; > + > + return false; > +} I thought we already had routines to detect these. We certainly have *code* to detect them. If it's the former we really just want one implementation (that hands the various permutations we've seen through the years). If it's the latter, then we probably want to use these routines to simplify those implementations. > + > +/* Generate a label_text instance by formatting FMT, using a > + temporary clone of the global_dc's printer (thus using its > + formatting callbacks). > + > + Colorize if the global_dc supports colorization and CAN_COLORIZE > is > + true. */ > + > +label_text > +make_label_text (bool can_colorize, const char *fmt, ...) > +{ > + pretty_printer *pp = global_dc->printer->clone (); > + pp_clear_output_area (pp); > + > + if (!can_colorize) > + pp_show_color (pp) = false; > + > + text_info ti; > + rich_location rich_loc (line_table, UNKNOWN_LOCATION); > + > + va_list ap; > + > + va_start (ap, fmt); > + > + ti.format_spec = _(fmt); > + ti.args_ptr = ≈ > + ti.err_no = 0; > + ti.x_data = NULL; > + ti.m_richloc = &rich_loc; > + > + pp_format (pp, &ti); > + pp_output_formatted_text (pp); > + > + va_end (ap); > + > + label_text result = label_text::take (xstrdup (pp_formatted_text > (pp))); > + delete pp; > + return result; > +} Is this better in the pretty-printer infrastructure? Jeff
On Fri, 2019-12-06 at 22:38 -0500, Eric Gallager wrote: > On 11/15/19, David Malcolm <dmalcolm@redhat.com> wrote: [...] > > diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h > > new file mode 100644 > > index 0000000..ace8924 > > --- /dev/null > > +++ b/gcc/analyzer/analyzer.h > > @@ -0,0 +1,126 @@ > > +/* Utility functions for the analyzer. > > + Copyright (C) 2019 Free Software Foundation, Inc. > > + Contributed by David Malcolm <dmalcolm@redhat.com>. > > + > > +This file is part of GCC. > > + > > +GCC is free software; you can redistribute it and/or modify it > > +under the terms of the GNU General Public License as published by > > +the Free Software Foundation; either version 3, or (at your > > option) > > +any later version. > > + > > +GCC is distributed in the hope that it will be useful, but > > +WITHOUT ANY WARRANTY; without even the implied warranty of > > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > +General Public License for more details. > > + > > +You should have received a copy of the GNU General Public License > > +along with GCC; see the file COPYING3. If not see > > +<http://www.gnu.org/licenses/>;. */ > > + > > +#ifndef GCC_ANALYZER_ANALYZER_H > > +#define GCC_ANALYZER_ANALYZER_H > > + > > +/* Forward decls of common types, with indentation to show > > inheritance. */ > > I'm wondering about the "with indentation to show inheritance" > part... > does that require tweaking any editor configuration files or adding > /*INDENT-OFF*/ comments or anything to prevent automatic formatting > tools from "fixing" the indentation to go back to the normal style of > having everything be aligned? If we had some kind of automatic formatting then I guess it would, but I don't think we have such a system in place. [...]
On 11/15/19 6:23 PM, David Malcolm wrote: > gcc/ChangeLog: > * analyzer/analyzer.cc: New file. > * analyzer/analyzer.h: New file. Here are a few more comments/suggestions as I'm slowly making my way through the patch, partly for my own benefit. Hopefully they're at least moderately useful. > --- > gcc/analyzer/analyzer.cc | 125 ++++++++++++++++++++++++++++++++++++++++++++++ > gcc/analyzer/analyzer.h | 126 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 251 insertions(+) > create mode 100644 gcc/analyzer/analyzer.cc > create mode 100644 gcc/analyzer/analyzer.h > > diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc > new file mode 100644 > index 0000000..399925c > --- /dev/null > +++ b/gcc/analyzer/analyzer.cc > @@ -0,0 +1,125 @@ > +/* Utility functions for the analyzer. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalcolm@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#include "config.h" > +#include "gcc-plugin.h" > +#include "system.h" > +#include "coretypes.h" > +#include "tree.h" > +#include "gimple.h" > +#include "diagnostic.h" > +#include "intl.h" > +#include "analyzer/analyzer.h" > + > +/* Helper function for checkers. Is the CALL to the given function name? */ > + > +bool > +is_named_call_p (const gcall *call, const char *funcname) > +{ > + gcc_assert (funcname); > + > + tree fndecl = gimple_call_fndecl (call); > + if (!fndecl) > + return false; > + > + return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname); > +} > + > +/* Helper function for checkers. Is the CALL to the given function name, > + and with the given number of arguments? */ > + > +bool > +is_named_call_p (const gcall *call, const char *funcname, > + unsigned int num_args) > +{ > + gcc_assert (funcname); > + > + if (!is_named_call_p (call, funcname)) > + return false; > + > + if (gimple_call_num_args (call) != num_args) > + return false; > + > + return true; > +} > + > +/* Return true if stmt is a setjmp call. */ > + > +bool > +is_setjmp_call_p (const gimple *stmt) > +{ > + /* TODO: is there a less hacky way to check for "setjmp"? */ > + if (const gcall *call = dyn_cast <const gcall *> (stmt)) > + if (is_named_call_p (call, "_setjmp", 1)) > + return true; > + > + return false; > +} > + > +/* Return true if stmt is a longjmp call. */ > + > +bool > +is_longjmp_call_p (const gcall *call) > +{ > + /* TODO: is there a less hacky way to check for "longjmp"? */ > + if (is_named_call_p (call, "longjmp", 2)) > + return true; > + > + return false; > +} I haven't actually needed these helper functions myself but they seem quite generic and might make good additions to gimple.h. > + > +/* Generate a label_text instance by formatting FMT, using a > + temporary clone of the global_dc's printer (thus using its > + formatting callbacks). > + > + Colorize if the global_dc supports colorization and CAN_COLORIZE is > + true. */ > + > +label_text > +make_label_text (bool can_colorize, const char *fmt, ...) > +{ > + pretty_printer *pp = global_dc->printer->clone (); > + pp_clear_output_area (pp); > + > + if (!can_colorize) > + pp_show_color (pp) = false; > + > + text_info ti; > + rich_location rich_loc (line_table, UNKNOWN_LOCATION); > + > + va_list ap; > + > + va_start (ap, fmt); > + > + ti.format_spec = _(fmt); > + ti.args_ptr = ≈ > + ti.err_no = 0; > + ti.x_data = NULL; > + ti.m_richloc = &rich_loc; > + > + pp_format (pp, &ti); > + pp_output_formatted_text (pp); > + > + va_end (ap); > + > + label_text result = label_text::take (xstrdup (pp_formatted_text (pp))); > + delete pp; > + return result; > +} > diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h > new file mode 100644 > index 0000000..ace8924 > --- /dev/null > +++ b/gcc/analyzer/analyzer.h > @@ -0,0 +1,126 @@ > +/* Utility functions for the analyzer. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalcolm@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#ifndef GCC_ANALYZER_ANALYZER_H > +#define GCC_ANALYZER_ANALYZER_H > + > +/* Forward decls of common types, with indentation to show inheritance. */ > + > +class graphviz_out; > +class supergraph; > +class supernode; > +class superedge; > + class cfg_superedge; > + class switch_cfg_superedge; > + class callgraph_superedge; > + class call_superedge; > + class return_superedge; > +class svalue; > + class region_svalue; > + class constant_svalue; > + class poisoned_svalue; > + class unknown_svalue; > + class setjmp_svalue; > +class region; > + class map_region; > + class symbolic_region; > +class region_model; > +class region_model_context; > + class impl_region_model_context; > +class constraint_manager; > +class equiv_class; > +struct model_merger; > +struct svalue_id_merger_mapping; > +struct canonicalization; > +class pending_diagnostic; > +class checker_path; > +class extrinsic_state; > +class sm_state_map; > +class stmt_finder; > +class program_point; > +class program_state; > +class exploded_graph; > +class exploded_node; > +class exploded_edge; > +class exploded_cluster; > +class exploded_path; > +class analysis_plan; > +class state_purge_map; > +class state_purge_per_ssa_name; > +class state_change; > + > +//////////////////////////////////////////////////////////////////////////// > + > +extern bool is_named_call_p (const gcall *call, const char *funcname); > +extern bool is_named_call_p (const gcall *call, const char *funcname, > + unsigned int num_args); > +extern bool is_setjmp_call_p (const gimple *stmt); > +extern bool is_longjmp_call_p (const gcall *call); There are functions in GCC that match either is_xxx() and xxx_p() but by in my experience (and now also by my count) the latter is far more prevalent: 538 vs 3,172. I count just 28 functions that combine the two into is_xxx_p(). I think it would make sense to choose either of the two dominant forms over the is_xxx_p() combination here. (Ideally, there'd be just one style to make it easy to remember.) > + > +extern void register_analyzer_pass (); > + > +extern label_text make_label_text (bool can_colorize, const char *fmt, ...); > + > +//////////////////////////////////////////////////////////////////////////// > + > +/* An RAII-style class for pushing/popping cfun within a scope. > + Doing so ensures we get "In function " announcements > + from the diagnostics subsystem. */ > + > +class auto_cfun > +{ > +public: > + auto_cfun (function *fun) { push_cfun (fun); } > + ~auto_cfun () { pop_cfun (); } > +}; This also seems like a nice general-purpose utility that might be worth adding someplace more central. I thought I'd seen code like this in the compiler already but all I can find now is a plenty of calls to push_cfun and but just one to pop. > + > +//////////////////////////////////////////////////////////////////////////// > + > +/* Begin suppressing -Wformat and -Wformat-extra-args. */ > + > +#define PUSH_IGNORE_WFORMAT \ > + _Pragma("GCC diagnostic push") \ > + _Pragma("GCC diagnostic ignored \"-Wformat\"") \ > + _Pragma("GCC diagnostic ignored \"-Wformat-extra-args\"") > + > +/* Finish suppressing -Wformat and -Wformat-extra-args. */ > + > +#define POP_IGNORE_WFORMAT \ > + _Pragma("GCC diagnostic pop") > + > +//////////////////////////////////////////////////////////////////////////// > + > +/* A template for creating hash traits for a POD type. */ > + > +template <typename Type> > +struct pod_hash_traits : typed_noop_remove<Type> Would it make sense to add this to hash-traits.h? Martin > +{ > + typedef Type value_type; > + typedef Type compare_type; > + static inline hashval_t hash (value_type); > + static inline bool equal (const value_type &existing, > + const value_type &candidate); > + static inline void mark_deleted (Type &); > + static inline void mark_empty (Type &); > + static inline bool is_deleted (Type); > + static inline bool is_empty (Type); > +}; > + > +#endif /* GCC_ANALYZER_ANALYZER_H */ >
On 12/9/19, David Malcolm <dmalcolm@redhat.com> wrote: > On Fri, 2019-12-06 at 22:38 -0500, Eric Gallager wrote: >> On 11/15/19, David Malcolm <dmalcolm@redhat.com> wrote: > [...] >> > diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h >> > new file mode 100644 >> > index 0000000..ace8924 >> > --- /dev/null >> > +++ b/gcc/analyzer/analyzer.h >> > @@ -0,0 +1,126 @@ >> > +/* Utility functions for the analyzer. >> > + Copyright (C) 2019 Free Software Foundation, Inc. >> > + Contributed by David Malcolm <dmalcolm@redhat.com>. >> > + >> > +This file is part of GCC. >> > + >> > +GCC is free software; you can redistribute it and/or modify it >> > +under the terms of the GNU General Public License as published by >> > +the Free Software Foundation; either version 3, or (at your >> > option) >> > +any later version. >> > + >> > +GCC is distributed in the hope that it will be useful, but >> > +WITHOUT ANY WARRANTY; without even the implied warranty of >> > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> > +General Public License for more details. >> > + >> > +You should have received a copy of the GNU General Public License >> > +along with GCC; see the file COPYING3. If not see >> > +<http://www.gnu.org/licenses/>;. */ >> > + >> > +#ifndef GCC_ANALYZER_ANALYZER_H >> > +#define GCC_ANALYZER_ANALYZER_H >> > + >> > +/* Forward decls of common types, with indentation to show >> > inheritance. */ >> >> I'm wondering about the "with indentation to show inheritance" >> part... >> does that require tweaking any editor configuration files or adding >> /*INDENT-OFF*/ comments or anything to prevent automatic formatting >> tools from "fixing" the indentation to go back to the normal style of >> having everything be aligned? > > If we had some kind of automatic formatting then I guess it would, but > I don't think we have such a system in place. > Check the contrib directory; there's a clang-format file and a vimrc file in there that provide automatic formatting; do `make vimrc` and `make clang-format` from the top-level to use them. There's also the check_GNU_style scripts, but those just check & don't actually reformat, AFAIK... > [...] > >
On Sat, 2019-12-07 at 08:01 -0700, Jeff Law wrote: > On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote: [...] > > diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc > > new file mode 100644 > > index 0000000..399925c > > --- /dev/null > > +++ b/gcc/analyzer/analyzer.cc [...] > > + > > +/* Helper function for checkers. Is the CALL to the given > > function > > name? */ > > + > > +bool > > +is_named_call_p (const gcall *call, const char *funcname) > > +{ > > + gcc_assert (funcname); > > + > > + tree fndecl = gimple_call_fndecl (call); > > + if (!fndecl) > > + return false; > > + > > + return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), > > funcname); > > +} > > + > > +/* Helper function for checkers. Is the CALL to the given > > function > > name, > > + and with the given number of arguments? */ > > + > > +bool > > +is_named_call_p (const gcall *call, const char *funcname, > > + unsigned int num_args) > > +{ > > + gcc_assert (funcname); > > + > > + if (!is_named_call_p (call, funcname)) > > + return false; > > + > > + if (gimple_call_num_args (call) != num_args) > > + return false; > > + > > + return true; > These seem generic enough to live elsewhere. There's a potential can of worms here: these functions are used by the checkers to detect fndecls by name, so that checkers can associate behavior with them. Examples: * sm-malloc.c recognizes "malloc" and "free" as being special * sm-signal.c knows that fprintf is async-signal-unsafe (but currently doesn't know about any other fns; it's a proof-of-concept) * sm-file.c currently doesn't know anything about "__fsetlocking", which is one of the reasons the analyzer currently doesn't detect the leak in intl/localealias.c reported as PR 47170, as it assumes that the FILE * might have been closed and stops tracking state for it. etc. So we're going to have a lot more "recognize this function by name" just to finish the existing proof-of-concept checkers. In a perfect world, perhaps we'd have attributes for all of this, and the user's code and their system headers would have dutifully marked the pertinent decls with attributes, and the use of is_named_call_p could be thought of as a bug (or wart)... but we don't live in that perfect world, and a good static analyzer shouldn't need to rely on the user marking their code. There's also integration and chicken-and-egg issues with attributes where if we rely e.g. on the user's libc headers having attributes for the checker, then we need to coordinate with e.g. glibc to add the attributes, and implement them, and then the checker doesn't work if someone is using a different libc, etc, etc. So I think we want a concept of "known functions" in the analyzer, where the analyzer can have its own "on the side" model of APIs - perhaps a mixture of baked-in (e.g. for malloc/free), perhaps from an overridable config file, but I'm not quite sure what form it should take. Maybe even a pragma that lets us tag named functions with attributes, or somesuch. For now, however, I want to take the pragmatic approach, and use these functions as needed (and to review this as we gain experience with the analyzer). So I think I prefer to keep them in the analyzer subdir (but I'm happy to move them if you'd prefer that) Does the above sound sane? > > +} > > + > > +/* Return true if stmt is a setjmp call. */ > > + > > +bool > > +is_setjmp_call_p (const gimple *stmt) > > +{ > > + /* TODO: is there a less hacky way to check for "setjmp"? */ > > + if (const gcall *call = dyn_cast <const gcall *> (stmt)) > > + if (is_named_call_p (call, "_setjmp", 1)) > > + return true; > > + > > + return false; > > +} > > + > > +/* Return true if stmt is a longjmp call. */ > > + > > +bool > > +is_longjmp_call_p (const gcall *call) > > +{ > > + /* TODO: is there a less hacky way to check for "longjmp"? */ > > + if (is_named_call_p (call, "longjmp", 2)) > > + return true; > > + > > + return false; > > +} > I thought we already had routines to detect these. We certainly have > *code* to detect them. If it's the former we really just want one > implementation (that hands the various permutations we've seen > through > the years). If it's the latter, then we probably want to use these > routines to simplify those implementations. We have several: * calls.c has: * setjmp_call_p (const_tree fndecl) * special_function_p which does name matching on "setjmp", "sigsetjmp", "savectx", "vfork", "getcontext" and sets ECF_RETURNS_TWICE (dropping leading single and double underscores) * except.c has "tree setjmp_fn;" used with #ifdef DONT_USE_BUILTIN_SETJMP * omp-low.c has: setjmp_or_longjmp_p (const_tree fndecl) (and maybe more). I reviewed where I'm using the two the patch proposed adding above: * I use is_setjmp_call_p in diagnostic_manager::add_events_for_eedge for PK_BEFORE_STMT on the gcall to capture recording the setjmp buf (so that the event can be cross-referenced at the longjmp call), and in engine.cc for identifying that initial "store" call * I use is_longjmp_call_p in only one place, in engine.cc, for detecting the fn and handling the non-standard control flow As for the other functions identified in calls.c special_function_p: * "sigsetjmp": looks like I can generalize my code to handle this * "savectx": https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71876 suggests it's Solaris-specific. I have no idea of the signature or semantics, so I'm inclined not to handle it in the analyzer. * "vfork": appears to me to be an old performance hack from before COW pages. Semantics appear sufficiently different from setjmp/longjmp that it would need its own special-casing (and isn't a priority for me) * "getcontext"/"setcontext": have slightly different semantics to setjmp/longjmp (e.g. no val is passed like for longjmp; they can fail and set errno); again, would need their own special-casing, and again, these are not a priority for me) I think the requirements for the analyzer here differ from that of the rest of the compiler: the analyzer needs to capture semantics of the fns in terms of the effects on states and paths, which is rather different to that needed by code generation. For example the analyzer's special-casing captures subtleties like "Passing 0 as the val to longjmp leads to setjmp returning 1". So I'm thinking that for the next iteration of the kit I'll drop these two functions in favor of: setjmp_like_p longjmp_like_p to detect setjmp/sigsetjmp and longjmp/siglongjmp, specifically for the analyzer, so that it can trigger the special-casing for these. Does that sound reasonable? > > +/* Generate a label_text instance by formatting FMT, using a > > + temporary clone of the global_dc's printer (thus using its > > + formatting callbacks). > > + > > + Colorize if the global_dc supports colorization and > > CAN_COLORIZE > > is > > + true. */ > > + > > +label_text > > +make_label_text (bool can_colorize, const char *fmt, ...) > > +{ > > + pretty_printer *pp = global_dc->printer->clone (); > > + pp_clear_output_area (pp); > > + > > + if (!can_colorize) > > + pp_show_color (pp) = false; > > + > > + text_info ti; > > + rich_location rich_loc (line_table, UNKNOWN_LOCATION); > > + > > + va_list ap; > > + > > + va_start (ap, fmt); > > + > > + ti.format_spec = _(fmt); > > + ti.args_ptr = ≈ > > + ti.err_no = 0; > > + ti.x_data = NULL; > > + ti.m_richloc = &rich_loc; > > + > > + pp_format (pp, &ti); > > + pp_output_formatted_text (pp); > > + > > + va_end (ap); > > + > > + label_text result = label_text::take (xstrdup (pp_formatted_text > > (pp))); > > + delete pp; > > + return result; > > +} > Is this better in the pretty-printer infrastructure? This touches on another can of worms... it's rather clunky to be cloning the printer and using it to generate a temporary string. I've been thinking that maybe diagnostic_event::get_desc should instead print to a pp, which might avoid a lot of printer cloning, but I've attempted that before, unsuccessfully. I guess I'll try again; it feels cleaner (I think it required cloning the printer if we're going to use it from within a rich_location label, since otherwise the printing of the description interferes with that of diagnostic_show_locus, and we end up with garbled text). Dave
diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc new file mode 100644 index 0000000..399925c --- /dev/null +++ b/gcc/analyzer/analyzer.cc @@ -0,0 +1,125 @@ +/* Utility functions for the analyzer. + Copyright (C) 2019 Free Software Foundation, Inc. + Contributed by David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "gimple.h" +#include "diagnostic.h" +#include "intl.h" +#include "analyzer/analyzer.h" + +/* Helper function for checkers. Is the CALL to the given function name? */ + +bool +is_named_call_p (const gcall *call, const char *funcname) +{ + gcc_assert (funcname); + + tree fndecl = gimple_call_fndecl (call); + if (!fndecl) + return false; + + return 0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname); +} + +/* Helper function for checkers. Is the CALL to the given function name, + and with the given number of arguments? */ + +bool +is_named_call_p (const gcall *call, const char *funcname, + unsigned int num_args) +{ + gcc_assert (funcname); + + if (!is_named_call_p (call, funcname)) + return false; + + if (gimple_call_num_args (call) != num_args) + return false; + + return true; +} + +/* Return true if stmt is a setjmp call. */ + +bool +is_setjmp_call_p (const gimple *stmt) +{ + /* TODO: is there a less hacky way to check for "setjmp"? */ + if (const gcall *call = dyn_cast <const gcall *> (stmt)) + if (is_named_call_p (call, "_setjmp", 1)) + return true; + + return false; +} + +/* Return true if stmt is a longjmp call. */ + +bool +is_longjmp_call_p (const gcall *call) +{ + /* TODO: is there a less hacky way to check for "longjmp"? */ + if (is_named_call_p (call, "longjmp", 2)) + return true; + + return false; +} + +/* Generate a label_text instance by formatting FMT, using a + temporary clone of the global_dc's printer (thus using its + formatting callbacks). + + Colorize if the global_dc supports colorization and CAN_COLORIZE is + true. */ + +label_text +make_label_text (bool can_colorize, const char *fmt, ...) +{ + pretty_printer *pp = global_dc->printer->clone (); + pp_clear_output_area (pp); + + if (!can_colorize) + pp_show_color (pp) = false; + + text_info ti; + rich_location rich_loc (line_table, UNKNOWN_LOCATION); + + va_list ap; + + va_start (ap, fmt); + + ti.format_spec = _(fmt); + ti.args_ptr = ≈ + ti.err_no = 0; + ti.x_data = NULL; + ti.m_richloc = &rich_loc; + + pp_format (pp, &ti); + pp_output_formatted_text (pp); + + va_end (ap); + + label_text result = label_text::take (xstrdup (pp_formatted_text (pp))); + delete pp; + return result; +} diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h new file mode 100644 index 0000000..ace8924 --- /dev/null +++ b/gcc/analyzer/analyzer.h @@ -0,0 +1,126 @@ +/* Utility functions for the analyzer. + Copyright (C) 2019 Free Software Foundation, Inc. + Contributed by David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_ANALYZER_ANALYZER_H +#define GCC_ANALYZER_ANALYZER_H + +/* Forward decls of common types, with indentation to show inheritance. */ + +class graphviz_out; +class supergraph; +class supernode; +class superedge; + class cfg_superedge; + class switch_cfg_superedge; + class callgraph_superedge; + class call_superedge; + class return_superedge; +class svalue; + class region_svalue; + class constant_svalue; + class poisoned_svalue; + class unknown_svalue; + class setjmp_svalue; +class region; + class map_region; + class symbolic_region; +class region_model; +class region_model_context; + class impl_region_model_context; +class constraint_manager; +class equiv_class; +struct model_merger; +struct svalue_id_merger_mapping; +struct canonicalization; +class pending_diagnostic; +class checker_path; +class extrinsic_state; +class sm_state_map; +class stmt_finder; +class program_point; +class program_state; +class exploded_graph; +class exploded_node; +class exploded_edge; +class exploded_cluster; +class exploded_path; +class analysis_plan; +class state_purge_map; +class state_purge_per_ssa_name; +class state_change; + +//////////////////////////////////////////////////////////////////////////// + +extern bool is_named_call_p (const gcall *call, const char *funcname); +extern bool is_named_call_p (const gcall *call, const char *funcname, + unsigned int num_args); +extern bool is_setjmp_call_p (const gimple *stmt); +extern bool is_longjmp_call_p (const gcall *call); + +extern void register_analyzer_pass (); + +extern label_text make_label_text (bool can_colorize, const char *fmt, ...); + +//////////////////////////////////////////////////////////////////////////// + +/* An RAII-style class for pushing/popping cfun within a scope. + Doing so ensures we get "In function " announcements + from the diagnostics subsystem. */ + +class auto_cfun +{ +public: + auto_cfun (function *fun) { push_cfun (fun); } + ~auto_cfun () { pop_cfun (); } +}; + +//////////////////////////////////////////////////////////////////////////// + +/* Begin suppressing -Wformat and -Wformat-extra-args. */ + +#define PUSH_IGNORE_WFORMAT \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wformat\"") \ + _Pragma("GCC diagnostic ignored \"-Wformat-extra-args\"") + +/* Finish suppressing -Wformat and -Wformat-extra-args. */ + +#define POP_IGNORE_WFORMAT \ + _Pragma("GCC diagnostic pop") + +//////////////////////////////////////////////////////////////////////////// + +/* A template for creating hash traits for a POD type. */ + +template <typename Type> +struct pod_hash_traits : typed_noop_remove<Type> +{ + typedef Type value_type; + typedef Type compare_type; + static inline hashval_t hash (value_type); + static inline bool equal (const value_type &existing, + const value_type &candidate); + static inline void mark_deleted (Type &); + static inline void mark_empty (Type &); + static inline bool is_deleted (Type); + static inline bool is_empty (Type); +}; + +#endif /* GCC_ANALYZER_ANALYZER_H */