diff mbox series

[committed] analyzer: use dominator info in -Wanalyzer-deref-before-check [PR108455]

Message ID 20230119185619.78452-1-dmalcolm@redhat.com
State New
Headers show
Series [committed] analyzer: use dominator info in -Wanalyzer-deref-before-check [PR108455] | expand

Commit Message

David Malcolm Jan. 19, 2023, 6:56 p.m. UTC
My integration testing [1] of -fanalyzer in GCC 13 is showing a lot of
diagnostics from the new -Wanalyzer-deref-before-check warning on
real-world C projects, and most of these seem to be false positives.

This patch updates the warning to make it much less likely to fire:
- only intraprocedural cases are now reported
- reject cases in which there are control flow paths to the check
  that didn't come through the dereference, by looking at BB dominator
  information.  This fixes a false positive seen in git-2.39.0's
  pack-revindex.c: load_revindex_from_disk (PR analyzer/108455), in
  which a shared "cleanup:" section checks "data" for NULL, and
  depending on how much of the function is executed "data" might or
  might not have already been dereferenced.

The counts of -Wanalyzer-deref-before-check diagnostics in [1]
before/after this patch show this improvement:
  Known false positives:    6 ->  0  (-6)
  Known true positives:     1 ->  1
  Unclassified positives: 123 -> 63 (-60)

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-5261-g0d6f7b1dd62e9c.

[1] https://github.com/davidmalcolm/gcc-analyzer-integration-tests

gcc/analyzer/ChangeLog:
	PR analyzer/108455
	* analyzer.h (class checker_event): New forward decl.
	(class state_change_event): Indent.
	(class warning_event): New forward decl.
	* checker-event.cc (state_change_event::state_change_event): Add
	"enode" param.
	(warning_event::get_desc): Update for new param of
	evdesc::final_event ctor.
	* checker-event.h (state_change_event::state_change_event): Add
	"enode" param.
	(state_change_event::get_exploded_node): New accessor.
	(state_change_event::m_enode): New field.
	(warning_event::warning_event): New "enode" param.
	(warning_event::get_exploded_node): New accessor.
	(warning_event::m_enode): New field.
	* diagnostic-manager.cc
	(state_change_event_creator::on_global_state_change): Pass
	src_node to state_change_event ctor.
	(state_change_event_creator::on_state_change): Likewise.
	(null_assignment_sm_context::set_next_state): Pass NULL for
	new param of state_change_event ctor.
	* infinite-recursion.cc
	(infinite_recursion_diagnostic::add_final_event): Update for new
	param of warning_event ctor.
	* pending-diagnostic.cc (pending_diagnostic::add_final_event):
	Pass enode to warning_event ctor.
	* pending-diagnostic.h (evdesc::final_event): Add reference to
	warning_event.
	* sm-malloc.cc: Include "analyzer/checker-event.h" and
	"analyzer/exploded-graph.h".
	(deref_before_check::deref_before_check): Initialize new fields.
	(deref_before_check::emit): Reject warnings in which we were
	unable to determine the enodes of the dereference and the check.
	Reject warnings interprocedural warnings. Reject warnings in which
	the dereference doesn't dominate the check.
	(deref_before_check::describe_state_change): Set m_deref_enode.
	(deref_before_check::describe_final_event): Set m_check_enode.
	(deref_before_check::m_deref_enode): New field.
	(deref_before_check::m_check_enode): New field.

gcc/testsuite/ChangeLog:
	PR analyzer/108455
	* gcc.dg/analyzer/deref-before-check-1.c: Add test coverage
	involving dominance.
	* gcc.dg/analyzer/deref-before-check-pr108455-1.c: New test.
	* gcc.dg/analyzer/deref-before-check-pr108455-git-pack-revindex.c:
	New test.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/analyzer/analyzer.h                       |   4 +-
 gcc/analyzer/checker-event.cc                 |   8 +-
 gcc/analyzer/checker-event.h                  |  11 +-
 gcc/analyzer/diagnostic-manager.cc            |  12 +-
 gcc/analyzer/infinite-recursion.cc            |   3 +-
 gcc/analyzer/pending-diagnostic.cc            |   1 +
 gcc/analyzer/pending-diagnostic.h             |   6 +-
 gcc/analyzer/sm-malloc.cc                     |  35 ++++-
 .../gcc.dg/analyzer/deref-before-check-1.c    |  36 +++++
 .../analyzer/deref-before-check-pr108455-1.c  |  36 +++++
 ...-before-check-pr108455-git-pack-revindex.c | 133 ++++++++++++++++++
 11 files changed, 272 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-git-pack-revindex.c
diff mbox series

Patch

diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h
index bfd098b8613..8f79e4b5df5 100644
--- a/gcc/analyzer/analyzer.h
+++ b/gcc/analyzer/analyzer.h
@@ -93,7 +93,9 @@  class bounded_ranges_manager;
 class pending_diagnostic;
 class pending_note;
 struct event_loc_info;
-class state_change_event;
+class checker_event;
+  class state_change_event;
+  class warning_event;
 class checker_path;
 class extrinsic_state;
 class sm_state_map;
diff --git a/gcc/analyzer/checker-event.cc b/gcc/analyzer/checker-event.cc
index b0128e56d50..3612df7bd1d 100644
--- a/gcc/analyzer/checker-event.cc
+++ b/gcc/analyzer/checker-event.cc
@@ -410,7 +410,8 @@  state_change_event::state_change_event (const supernode *node,
 					state_machine::state_t from,
 					state_machine::state_t to,
 					const svalue *origin,
-					const program_state &dst_state)
+					const program_state &dst_state,
+					const exploded_node *enode)
 : checker_event (EK_STATE_CHANGE,
 		 event_loc_info (stmt->location,
 				 node->m_fun->decl,
@@ -418,7 +419,8 @@  state_change_event::state_change_event (const supernode *node,
   m_node (node), m_stmt (stmt), m_sm (sm),
   m_sval (sval), m_from (from), m_to (to),
   m_origin (origin),
-  m_dst_state (dst_state)
+  m_dst_state (dst_state),
+  m_enode (enode)
 {
 }
 
@@ -1159,7 +1161,7 @@  warning_event::get_desc (bool can_colorize) const
       tree var = fixup_tree_for_diagnostic (m_var);
       label_text ev_desc
 	= m_pending_diagnostic->describe_final_event
-	    (evdesc::final_event (can_colorize, var, m_state));
+	    (evdesc::final_event (can_colorize, var, m_state, *this));
       if (ev_desc.get ())
 	{
 	  if (m_sm && flag_analyzer_verbose_state_changes)
diff --git a/gcc/analyzer/checker-event.h b/gcc/analyzer/checker-event.h
index 429d4cbca81..5dd25cb0775 100644
--- a/gcc/analyzer/checker-event.h
+++ b/gcc/analyzer/checker-event.h
@@ -357,7 +357,8 @@  public:
 		      state_machine::state_t from,
 		      state_machine::state_t to,
 		      const svalue *origin,
-		      const program_state &dst_state);
+		      const program_state &dst_state,
+		      const exploded_node *enode);
 
   label_text get_desc (bool can_colorize) const final override;
   meaning get_meaning () const override;
@@ -367,6 +368,8 @@  public:
     return m_dst_state.get_current_function ();
   }
 
+  const exploded_node *get_exploded_node () const { return m_enode; }
+
   const supernode *m_node;
   const gimple *m_stmt;
   const state_machine &m_sm;
@@ -375,6 +378,7 @@  public:
   state_machine::state_t m_to;
   const svalue *m_origin;
   program_state m_dst_state;
+  const exploded_node *m_enode;
 };
 
 /* Subclass of checker_event; parent class for subclasses that relate to
@@ -668,9 +672,11 @@  class warning_event : public checker_event
 {
 public:
   warning_event (const event_loc_info &loc_info,
+		 const exploded_node *enode,
 		 const state_machine *sm,
 		 tree var, state_machine::state_t state)
   : checker_event (EK_WARNING, loc_info),
+    m_enode (enode),
     m_sm (sm), m_var (var), m_state (state)
   {
   }
@@ -678,7 +684,10 @@  public:
   label_text get_desc (bool can_colorize) const final override;
   meaning get_meaning () const override;
 
+  const exploded_node *get_exploded_node () const { return m_enode; }
+
 private:
+  const exploded_node *m_enode;
   const state_machine *m_sm;
   tree m_var;
   state_machine::state_t m_state;
diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc
index f4d35617ae3..1227d6c1151 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -1572,7 +1572,8 @@  public:
 					src_sm_val,
 					dst_sm_val,
 					NULL,
-					dst_state));
+					dst_state,
+					src_node));
     return false;
   }
 
@@ -1616,7 +1617,8 @@  public:
 					src_sm_val,
 					dst_sm_val,
 					dst_origin_sval,
-					dst_state));
+					dst_state,
+					src_node));
     return false;
   }
 
@@ -1760,7 +1762,8 @@  struct null_assignment_sm_context : public sm_context
 					var_new_sval,
 					from, to,
 					NULL,
-					*m_new_state));
+					*m_new_state,
+					NULL));
   }
 
   void set_next_state (const gimple *stmt,
@@ -1785,7 +1788,8 @@  struct null_assignment_sm_context : public sm_context
 					sval,
 					from, to,
 					NULL,
-					*m_new_state));
+					*m_new_state,
+					NULL));
   }
 
   void warn (const supernode *, const gimple *,
diff --git a/gcc/analyzer/infinite-recursion.cc b/gcc/analyzer/infinite-recursion.cc
index a49ad2d7118..c4e85bfac18 100644
--- a/gcc/analyzer/infinite-recursion.cc
+++ b/gcc/analyzer/infinite-recursion.cc
@@ -182,7 +182,7 @@  public:
   /* Customize the location where the warning_event appears, putting
      it at the topmost entrypoint to the function.  */
   void add_final_event (const state_machine *,
-			const exploded_node *,
+			const exploded_node *enode,
 			const gimple *,
 			tree,
 			state_machine::state_t,
@@ -195,6 +195,7 @@  public:
 			  ()->get_start_location (),
 			m_callee_fndecl,
 			m_new_entry_enode->get_stack_depth ()),
+	enode,
 	NULL, NULL, NULL));
   }
 
diff --git a/gcc/analyzer/pending-diagnostic.cc b/gcc/analyzer/pending-diagnostic.cc
index c5a0e9ce306..79e6c5528eb 100644
--- a/gcc/analyzer/pending-diagnostic.cc
+++ b/gcc/analyzer/pending-diagnostic.cc
@@ -232,6 +232,7 @@  pending_diagnostic::add_final_event (const state_machine *sm,
      (event_loc_info (get_stmt_location (stmt, enode->get_function ()),
 		      enode->get_function ()->decl,
 		      enode->get_stack_depth ()),
+      enode,
       sm, var, state));
 }
 
diff --git a/gcc/analyzer/pending-diagnostic.h b/gcc/analyzer/pending-diagnostic.h
index 6016bf69c9f..de79890b0e0 100644
--- a/gcc/analyzer/pending-diagnostic.h
+++ b/gcc/analyzer/pending-diagnostic.h
@@ -131,13 +131,15 @@  struct return_of_state : public event_desc
 struct final_event : public event_desc
 {
   final_event (bool colorize,
-	       tree expr, state_machine::state_t state)
+	       tree expr, state_machine::state_t state,
+	       const warning_event &event)
   : event_desc (colorize),
-    m_expr (expr), m_state (state)
+    m_expr (expr), m_state (state), m_event (event)
   {}
 
   tree m_expr;
   state_machine::state_t m_state;
+  const warning_event &m_event;
 };
 
 } /* end of namespace evdesc */
diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc
index 212e9c2c1e2..9aee810f818 100644
--- a/gcc/analyzer/sm-malloc.cc
+++ b/gcc/analyzer/sm-malloc.cc
@@ -45,6 +45,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "attribs.h"
 #include "analyzer/function-set.h"
 #include "analyzer/program-state.h"
+#include "analyzer/checker-event.h"
+#include "analyzer/exploded-graph.h"
 
 #if ENABLE_ANALYZER
 
@@ -1490,7 +1492,9 @@  class deref_before_check : public malloc_diagnostic
 {
 public:
   deref_before_check (const malloc_state_machine &sm, tree arg)
-  : malloc_diagnostic (sm, arg)
+  : malloc_diagnostic (sm, arg),
+    m_deref_enode (NULL),
+    m_check_enode (NULL)
   {}
 
   const char *get_kind () const final override { return "deref_before_check"; }
@@ -1502,6 +1506,31 @@  public:
 
   bool emit (rich_location *rich_loc) final override
   {
+    /* Don't emit the warning if we can't show where the deref
+       and the check occur.  */
+    if (!m_deref_enode)
+      return false;
+    if (!m_check_enode)
+      return false;
+    /* Only emit the warning for intraprocedural cases.  */
+    if (m_deref_enode->get_function () != m_check_enode->get_function ())
+      return false;
+    if (&m_deref_enode->get_point ().get_call_string ()
+	!= &m_check_enode->get_point ().get_call_string ())
+      return false;
+
+    /* Reject the warning if the deref's BB doesn't dominate that
+       of the check, so that we don't warn e.g. for shared cleanup
+       code that checks a pointer for NULL, when that code is sometimes
+       used before a deref and sometimes after.
+       Using the dominance code requires setting cfun.  */
+    auto_cfun sentinel (m_deref_enode->get_function ());
+    calculate_dominance_info (CDI_DOMINATORS);
+    if (!dominated_by_p (CDI_DOMINATORS,
+			 m_check_enode->get_supernode ()->m_bb,
+			 m_deref_enode->get_supernode ()->m_bb))
+      return false;
+
     if (m_arg)
       return warning_at (rich_loc, get_controlling_option (),
 			 "check of %qE for NULL after already"
@@ -1520,6 +1549,7 @@  public:
 	&& assumed_non_null_p (change.m_new_state))
       {
 	m_first_deref_event = change.m_event_id;
+	m_deref_enode = change.m_event.get_exploded_node ();
 	if (m_arg)
 	  return change.formatted_print ("pointer %qE is dereferenced here",
 					 m_arg);
@@ -1531,6 +1561,7 @@  public:
 
   label_text describe_final_event (const evdesc::final_event &ev) final override
   {
+    m_check_enode = ev.m_event.get_exploded_node ();
     if (m_first_deref_event.known_p ())
       {
 	if (m_arg)
@@ -1556,6 +1587,8 @@  public:
 
 private:
   diagnostic_event_id_t m_first_deref_event;
+  const exploded_node *m_deref_enode;
+  const exploded_node *m_check_enode;
 };
 
 /* struct allocation_state : public state_machine::state.  */
diff --git a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-1.c b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-1.c
index e1c7a00b97c..1b11da5d8e1 100644
--- a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-1.c
@@ -167,3 +167,39 @@  int test_checking_ptr_after_calling_deref (int *q)
     return 0;
   return v;
 }
+
+extern void foo ();
+extern void bar ();
+extern void baz ();
+
+int test_cfg_diamond_1 (int *p, int flag)
+{
+  int x;
+  x = *p; /* { dg-message "pointer 'p' is dereferenced here" } */
+  if (flag)
+    foo ();
+  else
+    bar ();
+  if (p) /* { dg-warning "check of 'p' for NULL after already dereferencing it" } */
+    {
+      baz ();
+    }
+  return x;
+}
+
+int test_cfg_diamond_2 (int *p, int flag)
+{
+  int x = 0;
+  if (flag)
+    foo ();
+  else
+    {
+      x = *p;
+      bar ();
+    }
+  if (p) /* { dg-bogus "check of 'p' for NULL after already dereferencing it" } */
+    {
+      baz ();
+    }
+  return x;
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-1.c b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-1.c
new file mode 100644
index 00000000000..d7d873edc51
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-1.c
@@ -0,0 +1,36 @@ 
+extern int could_fail_1 (void);
+extern void *could_fail_2 (int);
+extern void cleanup (void *);
+
+struct header {
+  int signature;
+};
+
+int test_1 (void) {
+  int fd, ret = 0;
+  void *data = ((void *)0);
+  struct header *hdr;
+
+  fd = could_fail_1 ();
+
+  if (fd < 0) {
+    ret = -1;
+    goto cleanup;
+  }
+
+  data = could_fail_2 (fd);
+  hdr = data;
+
+  if (hdr->signature != 42) {
+    ret = -2;
+    goto cleanup;
+  }
+
+cleanup:
+  if (ret) {
+    if (data) /* { dg-bogus "check of 'data' for NULL after already dereferencing it" } */
+      cleanup (data);
+  }
+
+  return ret;
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-git-pack-revindex.c b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-git-pack-revindex.c
new file mode 100644
index 00000000000..7553f86051d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108455-git-pack-revindex.c
@@ -0,0 +1,133 @@ 
+/* Reduced from git-2.39.0's pack-revindex.c  */
+
+typedef unsigned int __uint32_t;
+typedef unsigned long int __uintmax_t;
+typedef long int __off_t;
+typedef long int __off64_t;
+typedef __SIZE_TYPE__ size_t;
+typedef __off64_t off_t;
+typedef __uint32_t uint32_t;
+typedef __uintmax_t uintmax_t;
+
+struct stat {
+  /* [...snip...] */
+  __off_t st_size;
+  /* [...snip...] */
+};
+
+extern int close(int __fd);
+extern int fstat(int __fd, struct stat *__buf)
+  __attribute__((__nothrow__, __leaf__)) __attribute__((__nonnull__(2)));
+extern uint32_t default_swab32(uint32_t val);
+extern uint32_t git_bswap32(uint32_t x);
+__attribute__((__noreturn__)) void die(const char *err, ...)
+    __attribute__((format(printf, 1, 2)));
+int error(const char *err, ...) __attribute__((format(printf, 1, 2)));
+int error_errno(const char *err, ...) __attribute__((format(printf, 1, 2)));
+static inline int const_error(void) { return -1; }
+extern int munmap(void *__addr, size_t __len)
+    __attribute__((__nothrow__, __leaf__));
+extern size_t st_mult(size_t a, size_t b);
+extern void *xmmap(void *start, size_t length, int prot, int flags, int fd,
+		   off_t offset);
+extern size_t xsize_t(off_t len);
+
+extern char *gettext(const char *__msgid) __attribute__((__nothrow__, __leaf__))
+__attribute__((__format_arg__(1)));
+static inline __attribute__((format_arg(1))) const char *_(const char *msgid) {
+  if (!*msgid)
+    return "";
+  return gettext(msgid);
+}
+
+struct repository {
+  /* [...snip...] */
+  const struct git_hash_algo *hash_algo;
+  /* [...snip...] */
+};
+extern struct repository *the_repository;
+struct git_hash_algo {
+  /* [...snip...] */
+  size_t rawsz;
+  /* [...snip...] */
+};
+
+int git_open_cloexec(const char *name, int flags);
+
+struct revindex_header {
+  uint32_t signature;
+  uint32_t version;
+  uint32_t hash_id;
+};
+
+int load_revindex_from_disk(char *revindex_name, uint32_t num_objects,
+                            const uint32_t **data_p, size_t *len_p) {
+  int fd, ret = 0;
+  struct stat st;
+  void *data = ((void *)0);
+  size_t revindex_size;
+  struct revindex_header *hdr;
+
+  fd = git_open_cloexec(revindex_name, 00);
+
+  if (fd < 0) {
+    ret = -1;
+    goto cleanup;
+  }
+  if (fstat(fd, &st)) {
+    ret = (error_errno(_("failed to read %s"), revindex_name), const_error());
+    goto cleanup;
+  }
+
+  revindex_size = xsize_t(st.st_size);
+
+  if (revindex_size < ((12) + (2 * the_repository->hash_algo->rawsz))) {
+    ret = (error(_("reverse-index file %s is too small"), revindex_name),
+           const_error());
+    goto cleanup;
+  }
+
+  if (revindex_size - ((12) + (2 * the_repository->hash_algo->rawsz)) !=
+      st_mult(sizeof(uint32_t), num_objects)) {
+    ret = (error(_("reverse-index file %s is corrupt"), revindex_name),
+           const_error());
+    goto cleanup;
+  }
+
+  data = xmmap(((void *)0), revindex_size, 0x1, 0x02, fd, 0);
+  hdr = data;
+
+  if (git_bswap32(hdr->signature) != 0x52494458) {
+    ret =
+        (error(_("reverse-index file %s has unknown signature"), revindex_name),
+         const_error());
+    goto cleanup;
+  }
+  if (git_bswap32(hdr->version) != 1) {
+    ret = (error(_("reverse-index file %s has unsupported version %"
+                   "u"),
+                 revindex_name, git_bswap32(hdr->version)),
+           const_error());
+    goto cleanup;
+  }
+  if (!(git_bswap32(hdr->hash_id) == 1 || git_bswap32(hdr->hash_id) == 2)) {
+    ret = (error(_("reverse-index file %s has unsupported hash id %"
+                   "u"),
+                 revindex_name, git_bswap32(hdr->hash_id)),
+           const_error());
+    goto cleanup;
+  }
+
+cleanup:
+  if (ret) {
+    if (data) /* { dg-bogus "check of 'data' for NULL after already dereferencing it" } */
+      munmap(data, revindex_size);
+  } else {
+    *len_p = revindex_size;
+    *data_p = (const uint32_t *)data;
+  }
+
+  if (fd >= 0)
+    close(fd);
+  return ret;
+}