diff mbox series

[15/49] Add ordered_hash_map

Message ID 1573867416-55618-16-git-send-email-dmalcolm@redhat.com
State New
Headers show
Series RFC: Add a static analysis framework to GCC | expand

Commit Message

David Malcolm Nov. 16, 2019, 1:23 a.m. UTC
This patch adds an ordered_hash_map template, which is similar to
hash_map, but preserves insertion order.

gcc/ChangeLog:
	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.
	* ordered-hash-map-tests.cc: New file.
	* ordered-hash-map.h: New file.
	* selftest-run-tests.c (selftest::run_tests): Call
	selftest::ordered_hash_map_tests_cc_tests.
	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New
	decl.
---
 gcc/Makefile.in               |   1 +
 gcc/ordered-hash-map-tests.cc | 247 ++++++++++++++++++++++++++++++++++++++++++
 gcc/ordered-hash-map.h        | 184 +++++++++++++++++++++++++++++++
 gcc/selftest-run-tests.c      |   1 +
 gcc/selftest.h                |   1 +
 5 files changed, 434 insertions(+)
 create mode 100644 gcc/ordered-hash-map-tests.cc
 create mode 100644 gcc/ordered-hash-map.h

Comments

Martin Sebor Dec. 4, 2019, 5:59 p.m. UTC | #1
On 11/15/19 6:23 PM, David Malcolm wrote:
> This patch adds an ordered_hash_map template, which is similar to
> hash_map, but preserves insertion order.
> 
> gcc/ChangeLog:
> 	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.
> 	* ordered-hash-map-tests.cc: New file.
> 	* ordered-hash-map.h: New file.
> 	* selftest-run-tests.c (selftest::run_tests): Call
> 	selftest::ordered_hash_map_tests_cc_tests.
> 	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New
> 	decl.
> ---
>   gcc/Makefile.in               |   1 +
>   gcc/ordered-hash-map-tests.cc | 247 ++++++++++++++++++++++++++++++++++++++++++
>   gcc/ordered-hash-map.h        | 184 +++++++++++++++++++++++++++++++
>   gcc/selftest-run-tests.c      |   1 +
>   gcc/selftest.h                |   1 +
>   5 files changed, 434 insertions(+)
>   create mode 100644 gcc/ordered-hash-map-tests.cc
>   create mode 100644 gcc/ordered-hash-map.h
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index c87b00f..5feef6a 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1443,6 +1443,7 @@ OBJS = \
>   	optinfo-emit-json.o \
>   	options-save.o \
>   	opts-global.o \
> +	ordered-hash-map-tests.o \
>   	passes.o \
>   	plugin.o \
>   	postreload-gcse.o \
> diff --git a/gcc/ordered-hash-map-tests.cc b/gcc/ordered-hash-map-tests.cc
> new file mode 100644
> index 0000000..266f60c
> --- /dev/null
> +++ b/gcc/ordered-hash-map-tests.cc
> @@ -0,0 +1,247 @@
> +/* Unit tests for ordered-hash-map.h.
> +   Copyright (C) 2015-2019 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "tm.h"
> +#include "opts.h"
> +#include "hash-set.h"
> +#include "fixed-value.h"
> +#include "alias.h"
> +#include "flags.h"
> +#include "symtab.h"
> +#include "tree-core.h"
> +#include "stor-layout.h"
> +#include "tree.h"
> +#include "stringpool.h"
> +#include "ordered-hash-map.h"
> +#include "selftest.h"
> +
> +#if CHECKING_P
> +
> +namespace selftest {
> +
> +/* Populate *OUT_KVS with the key/value pairs of M.  */
> +
> +template <typename HashMap, typename Key, typename Value>
> +static void
> +get_kv_pairs (const HashMap &m,
> +	      auto_vec<std::pair<Key, Value> > *out_kvs)
> +{
> +  for (typename HashMap::iterator iter = m.begin ();
> +       iter != m.end ();
> +       ++iter)
> +    out_kvs->safe_push (std::make_pair ((*iter).first, (*iter).second));
> +}
> +
> +/* Construct an ordered_hash_map <const char *, int> and verify that
> +   various operations work correctly.  */
> +
> +static void
> +test_map_of_strings_to_int ()
> +{
> +  ordered_hash_map <const char *, int> m;
> +
> +  const char *ostrich = "ostrich";
> +  const char *elephant = "elephant";
> +  const char *ant = "ant";
> +  const char *spider = "spider";
> +  const char *millipede = "Illacme plenipes";
> +  const char *eric = "half a bee";
> +
> +  /* A fresh hash_map should be empty.  */
> +  ASSERT_EQ (0, m.elements ());
> +  ASSERT_EQ (NULL, m.get (ostrich));
> +
> +  /* Populate the hash_map.  */
> +  ASSERT_EQ (false, m.put (ostrich, 2));
> +  ASSERT_EQ (false, m.put (elephant, 4));
> +  ASSERT_EQ (false, m.put (ant, 6));
> +  ASSERT_EQ (false, m.put (spider, 8));
> +  ASSERT_EQ (false, m.put (millipede, 750));
> +  ASSERT_EQ (false, m.put (eric, 3));
> +
> +  /* Verify that we can recover the stored values.  */
> +  ASSERT_EQ (6, m.elements ());
> +  ASSERT_EQ (2, *m.get (ostrich));
> +  ASSERT_EQ (4, *m.get (elephant));
> +  ASSERT_EQ (6, *m.get (ant));
> +  ASSERT_EQ (8, *m.get (spider));
> +  ASSERT_EQ (750, *m.get (millipede));
> +  ASSERT_EQ (3, *m.get (eric));
> +
> +  /* Verify that the order of insertion is preserved.  */
> +  auto_vec<std::pair<const char *, int> > kvs;
> +  get_kv_pairs (m, &kvs);
> +  ASSERT_EQ (kvs.length (), 6);
> +  ASSERT_EQ (kvs[0].first, ostrich);
> +  ASSERT_EQ (kvs[0].second, 2);
> +  ASSERT_EQ (kvs[1].first, elephant);
> +  ASSERT_EQ (kvs[1].second, 4);
> +  ASSERT_EQ (kvs[2].first, ant);
> +  ASSERT_EQ (kvs[2].second, 6);
> +  ASSERT_EQ (kvs[3].first, spider);
> +  ASSERT_EQ (kvs[3].second, 8);
> +  ASSERT_EQ (kvs[4].first, millipede);
> +  ASSERT_EQ (kvs[4].second, 750);
> +  ASSERT_EQ (kvs[5].first, eric);
> +  ASSERT_EQ (kvs[5].second, 3);
> +}
> +
> +/* Construct an ordered_hash_map using int_hash and verify that various
> +   operations work correctly.  */
> +
> +static void
> +test_map_of_int_to_strings ()
> +{
> +  const int EMPTY = -1;
> +  const int DELETED = -2;
> +  typedef int_hash <int, EMPTY, DELETED> int_hash_t;
> +  ordered_hash_map <int_hash_t, const char *> m;
> +
> +  const char *ostrich = "ostrich";
> +  const char *elephant = "elephant";
> +  const char *ant = "ant";
> +  const char *spider = "spider";
> +  const char *millipede = "Illacme plenipes";
> +  const char *eric = "half a bee";
> +
> +  /* A fresh hash_map should be empty.  */
> +  ASSERT_EQ (0, m.elements ());
> +  ASSERT_EQ (NULL, m.get (2));
> +
> +  /* Populate the hash_map.  */
> +  ASSERT_EQ (false, m.put (2, ostrich));
> +  ASSERT_EQ (false, m.put (4, elephant));
> +  ASSERT_EQ (false, m.put (6, ant));
> +  ASSERT_EQ (false, m.put (8, spider));
> +  ASSERT_EQ (false, m.put (750, millipede));
> +  ASSERT_EQ (false, m.put (3, eric));
> +
> +  /* Verify that we can recover the stored values.  */
> +  ASSERT_EQ (6, m.elements ());
> +  ASSERT_EQ (*m.get (2), ostrich);
> +  ASSERT_EQ (*m.get (4), elephant);
> +  ASSERT_EQ (*m.get (6), ant);
> +  ASSERT_EQ (*m.get (8), spider);
> +  ASSERT_EQ (*m.get (750), millipede);
> +  ASSERT_EQ (*m.get (3), eric);
> +
> +  /* Verify that the order of insertion is preserved.  */
> +  auto_vec<std::pair<int, const char *> > kvs;
> +  get_kv_pairs (m, &kvs);
> +  ASSERT_EQ (kvs.length (), 6);
> +  ASSERT_EQ (kvs[0].first, 2);
> +  ASSERT_EQ (kvs[0].second, ostrich);
> +  ASSERT_EQ (kvs[1].first, 4);
> +  ASSERT_EQ (kvs[1].second, elephant);
> +  ASSERT_EQ (kvs[2].first, 6);
> +  ASSERT_EQ (kvs[2].second, ant);
> +  ASSERT_EQ (kvs[3].first, 8);
> +  ASSERT_EQ (kvs[3].second, spider);
> +  ASSERT_EQ (kvs[4].first, 750);
> +  ASSERT_EQ (kvs[4].second, millipede);
> +  ASSERT_EQ (kvs[5].first, 3);
> +  ASSERT_EQ (kvs[5].second, eric);
> +}
> +
> +/* Verify that we can remove items from an ordered_hash_map.  */
> +
> +static void
> +test_removal ()
> +{
> +  ordered_hash_map <const char *, int> m;
> +
> +  const char *ostrich = "ostrich";
> +  ASSERT_EQ (false, m.put (ostrich, 2));
> +
> +  ASSERT_EQ (1, m.elements ());
> +  ASSERT_EQ (2, *m.get (ostrich));
> +
> +  {
> +    auto_vec<std::pair<const char *, int> > kvs;
> +    get_kv_pairs (m, &kvs);
> +    ASSERT_EQ (kvs.length (), 1);
> +    ASSERT_EQ (kvs[0].first, ostrich);
> +    ASSERT_EQ (kvs[0].second, 2);
> +  }
> +
> +  m.remove (ostrich);
> +
> +  ASSERT_EQ (0, m.elements ());
> +  {
> +    auto_vec<std::pair<const char *, int> > kvs;
> +    get_kv_pairs (m, &kvs);
> +    ASSERT_EQ (kvs.length (), 0);
> +  }
> +
> +  /* Reinsertion (with a different value).  */
> +  ASSERT_EQ (false, m.put (ostrich, 42));
> +  ASSERT_EQ (1, m.elements ());
> +  ASSERT_EQ (42, *m.get (ostrich));
> +  {
> +    auto_vec<std::pair<const char *, int> > kvs;
> +    get_kv_pairs (m, &kvs);
> +    ASSERT_EQ (kvs.length (), 1);
> +    ASSERT_EQ (kvs[0].first, ostrich);
> +    ASSERT_EQ (kvs[0].second, 42);
> +  }
> +}
> +
> +/* Verify that ordered_hash_map's copy-ctor works.  */
> +
> +static void
> +test_copy_ctor ()
> +{
> +  ordered_hash_map <const char *, int> m;
> +
> +  const char *ostrich = "ostrich";
> +  ASSERT_EQ (false, m.put (ostrich, 2));
> +
> +  ASSERT_EQ (1, m.elements ());
> +  ASSERT_EQ (2, *m.get (ostrich));
> +
> +  ordered_hash_map <const char *, int> copy (m);
> +  ASSERT_EQ (1, copy.elements ());
> +  ASSERT_EQ (2, *copy.get (ostrich));
> +
> +  /* Remove from source.  */
> +  m.remove (ostrich);
> +  ASSERT_EQ (0, m.elements ());
> +
> +  /* Copy should be unaffected.  */
> +  ASSERT_EQ (1, copy.elements ());
> +  ASSERT_EQ (2, *copy.get (ostrich));
> +}
> +
> +/* Run all of the selftests within this file.  */
> +
> +void
> +ordered_hash_map_tests_cc_tests ()
> +{
> +  test_map_of_strings_to_int ();
> +  test_map_of_int_to_strings ();
> +  test_removal ();
> +  test_copy_ctor ();
> +}
> +
> +} // namespace selftest
> +
> +#endif /* CHECKING_P */
> diff --git a/gcc/ordered-hash-map.h b/gcc/ordered-hash-map.h
> new file mode 100644
> index 0000000..4e35cad
> --- /dev/null
> +++ b/gcc/ordered-hash-map.h
> @@ -0,0 +1,184 @@
> +/* A type-safe hash map that retains the insertion order of keys.
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +
> +#ifndef GCC_ORDERED_HASH_MAP_H
> +#define GCC_ORDERED_HASH_MAP_H
> +
> +/* Notes:
> +   - The keys must be PODs, since vec<> uses assignment to populate slots
> +     without properly initializing them.
> +   - doesn't have GTY support.
> +   - supports removal, but retains order of original insertion.
> +     (Removal might be better handled by using a doubly-linked list
> +     of nodes, holding the values).  */
> +
> +template<typename KeyId, typename Value,
> +	 typename Traits>
> +class ordered_hash_map
> +{
> +  typedef typename Traits::key_type Key;
> +
> +public:
> +  ordered_hash_map () {}
> +
> +  ordered_hash_map (const ordered_hash_map &other)

The container defines a copy-constructor but no copy assignment.
Is it safely assignable? (I don't think auto_vec is safely copyable
or assignable due to PR 90904.  It looks like the copy ctor works
around it below.)

I don't think I've made use of the hash_map copy ctor or copy
assignment but if it's anything like other GCC containers I'd
worry about it not doing the right thing, especially for non-
PODs.  I spent too much time chasing down miscompilations and
other problems due to bugs (or design limitations) in these
classes.

I'd far prefer to see us use libstdc++ containers in new code
than introduce new ones of our own.  They are better designed
and much better tested than these.  (I realize we're still
hampered by targeting C++ 98.)

Martin

> +  : m_map (other.m_map),
> +    m_keys (other.m_keys.length ()),
> +    m_key_index (other.m_key_index)
> +  {
> +     unsigned i;
> +     Key key;
> +     FOR_EACH_VEC_ELT (other.m_keys, i, key)
> +       m_keys.quick_push (key);
> +  }
> +
> +  /* If key K isn't already in the map add key K with value V to the map, and
> +     return false.  Otherwise set the value of the entry for key K to be V and
> +     return true.  */
> +
> +  bool put (const Key &k, const Value &v)
> +  {
> +    bool existed = m_map.put (k, v);
> +    if (!existed)
> +      {
> +        bool key_present;
> +        int &slot = m_key_index.get_or_insert (k, &key_present);
> +        if (!key_present)
> +          {
> +             slot = m_keys.length ();
> +             m_keys.safe_push (k);
> +          }
> +      }
> +    return existed;
> +  }
> +
> +  /* If the passed in key is in the map return its value otherwise NULL.  */
> +
> +  Value *get (const Key &k)
> +  {
> +    return m_map.get (k);
> +  }
> +
> +  /* Removing a key removes it from the map, but retains the insertion
> +     order.  */
> +
> +  void remove (const Key &k)
> +  {
> +     m_map.remove (k);
> +  }
> +
> +  size_t elements () const { return m_map.elements (); }
> +
> +  class iterator
> +  {
> +  public:
> +    explicit iterator (const ordered_hash_map &map, unsigned idx) :
> +      m_ordered_hash_map (map), m_idx (idx) {}
> +
> +    iterator &operator++ ()
> +    {
> +       /* Increment m_idx until we find a non-deleted element, or go beyond
> +	  the end.  */
> +       while (1)
> +	 {
> +	   ++m_idx;
> +	   if (valid_index_p ())
> +	     break;
> +	}
> +      return *this;
> +    }
> +
> +    /* Can't use std::pair here, because GCC before 4.3 don't handle
> +       std::pair where template parameters are references well.
> +       See PR86739.  */
> +    struct reference_pair {
> +      const Key &first;
> +      Value &second;
> +
> +      reference_pair (const Key &key, Value &value)
> +      : first (key), second (value) {}
> +
> +      template <typename K, typename V>
> +      operator std::pair<K, V> () const { return std::pair<K, V> (first, second); }
> +    };
> +
> +    reference_pair operator* ()
> +    {
> +      const Key &k = m_ordered_hash_map.m_keys[m_idx];
> +      Value *slot
> +        = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);
> +      gcc_assert (slot);
> +      return reference_pair (k, *slot);
> +    }
> +
> +    bool
> +    operator != (const iterator &other) const
> +    {
> +      return m_idx != other.m_idx;
> +    }
> +
> +    /* Treat one-beyond-the-end as valid, for handling the "end" case.  */
> +
> +    bool valid_index_p () const
> +    {
> +      if (m_idx > m_ordered_hash_map.m_keys.length ())
> +	return false;
> +      if (m_idx == m_ordered_hash_map.m_keys.length ())
> +	return true;
> +      const Key &k = m_ordered_hash_map.m_keys[m_idx];
> +      Value *slot
> +	= const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);
> +      return slot != NULL;
> +    }
> +
> +    const ordered_hash_map &m_ordered_hash_map;
> +    unsigned m_idx;
> +  };
> +
> +  /* Standard iterator retrieval methods.  */
> +
> +  iterator begin () const
> +  {
> +    iterator i = iterator (*this, 0);
> +    while (!i.valid_index_p () && i != end ())
> +      ++i;
> +    return i;
> +  }
> +  iterator end () const { return iterator (*this, m_keys.length ()); }
> +
> +private:
> +  /* The underlying map.  */
> +  hash_map<KeyId, Value, Traits> m_map;
> +
> +  /* The ordering of the keys.  */
> +  auto_vec<Key> m_keys;
> +
> +  /* For each key that's ever been in the map, its index within m_keys.  */
> +  hash_map<KeyId, int> m_key_index;
> +};
> +
> +/* Two-argument form.  */
> +
> +template<typename Key, typename Value,
> +	 typename Traits = simple_hashmap_traits<default_hash_traits<Key>,
> +						 Value> >
> +class ordered_hash_map;
> +
> +#endif /* GCC_ORDERED_HASH_MAP_H */
> diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
> index 92c8de7..f41244b 100644
> --- a/gcc/selftest-run-tests.c
> +++ b/gcc/selftest-run-tests.c
> @@ -76,6 +76,7 @@ selftest::run_tests ()
>     cgraph_c_tests ();
>     optinfo_emit_json_cc_tests ();
>     opt_problem_cc_tests ();
> +  ordered_hash_map_tests_cc_tests ();
>   
>     /* Mid-level data structures.  */
>     input_c_tests ();
> diff --git a/gcc/selftest.h b/gcc/selftest.h
> index c67b688..933d1cd 100644
> --- a/gcc/selftest.h
> +++ b/gcc/selftest.h
> @@ -242,6 +242,7 @@ extern void input_c_tests ();
>   extern void json_cc_tests ();
>   extern void opt_problem_cc_tests ();
>   extern void optinfo_emit_json_cc_tests ();
> +extern void ordered_hash_map_tests_cc_tests ();
>   extern void predict_c_tests ();
>   extern void pretty_print_c_tests ();
>   extern void read_rtl_function_c_tests ();
>
David Malcolm Jan. 8, 2020, 11:47 p.m. UTC | #2
On Wed, 2019-12-04 at 10:59 -0700, Martin Sebor wrote:
> On 11/15/19 6:23 PM, David Malcolm wrote:
> > This patch adds an ordered_hash_map template, which is similar to
> > hash_map, but preserves insertion order.
> > 
> > gcc/ChangeLog:
> > 	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.
> > 	* ordered-hash-map-tests.cc: New file.
> > 	* ordered-hash-map.h: New file.
> > 	* selftest-run-tests.c (selftest::run_tests): Call
> > 	selftest::ordered_hash_map_tests_cc_tests.
> > 	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New
> > 	decl.
> > ---
> >   gcc/Makefile.in               |   1 +
> >   gcc/ordered-hash-map-tests.cc | 247
> > ++++++++++++++++++++++++++++++++++++++++++
> >   gcc/ordered-hash-map.h        | 184
> > +++++++++++++++++++++++++++++++
> >   gcc/selftest-run-tests.c      |   1 +
> >   gcc/selftest.h                |   1 +
> >   5 files changed, 434 insertions(+)
> >   create mode 100644 gcc/ordered-hash-map-tests.cc
> >   create mode 100644 gcc/ordered-hash-map.h
> > 

[...]

> The container defines a copy-constructor but no copy assignment.
> Is it safely assignable? (I don't think auto_vec is safely copyable
> or assignable due to PR 90904.  It looks like the copy ctor works
> around it below.)

It's not safely assignable; I don't believe I'm using that (I am using
the copy ctor).

I can make it private or similar to ensure it's not used.

> I don't think I've made use of the hash_map copy ctor or copy
> assignment but if it's anything like other GCC containers I'd
> worry about it not doing the right thing, especially for non-
> PODs.  I spent too much time chasing down miscompilations and
> other problems due to bugs (or design limitations) in these
> classes.
> 
> I'd far prefer to see us use libstdc++ containers in new code
> than introduce new ones of our own.  They are better designed
> and much better tested than these.  (I realize we're still
> hampered by targeting C++ 98.)
> 
> Martin

[CCing Jonathan for libstdc++ expertise]

Is there such a container in libstdc++?

This patch implements a map that preserves insertion order when
iterating, without requiring the Key type to be comparable.

As far as I can tell, std::map instead is a map that requires the Key
type to be comparable, and uses that to implement a red-black tree
(rather than via hashing), and doesn't preserve insertion ordering.

I use this ordered_hash_map class later in various places in the
analyzer patch kit to ensure more deterministic results, so that
results aren't affected by hash values of possibly-changing pointer
values.

[...]

Dave
Jonathan Wakely Jan. 9, 2020, 11:56 a.m. UTC | #3
On 08/01/20 18:47 -0500, David Malcolm wrote:
>On Wed, 2019-12-04 at 10:59 -0700, Martin Sebor wrote:
>> On 11/15/19 6:23 PM, David Malcolm wrote:
>> > This patch adds an ordered_hash_map template, which is similar to
>> > hash_map, but preserves insertion order.
>> >
>> > gcc/ChangeLog:
>> > 	* Makefile.in (OBJS): Add ordered-hash-map-tests.o.
>> > 	* ordered-hash-map-tests.cc: New file.
>> > 	* ordered-hash-map.h: New file.
>> > 	* selftest-run-tests.c (selftest::run_tests): Call
>> > 	selftest::ordered_hash_map_tests_cc_tests.
>> > 	* selftest.h (selftest::ordered_hash_map_tests_cc_tests): New
>> > 	decl.
>> > ---
>> >   gcc/Makefile.in               |   1 +
>> >   gcc/ordered-hash-map-tests.cc | 247
>> > ++++++++++++++++++++++++++++++++++++++++++
>> >   gcc/ordered-hash-map.h        | 184
>> > +++++++++++++++++++++++++++++++
>> >   gcc/selftest-run-tests.c      |   1 +
>> >   gcc/selftest.h                |   1 +
>> >   5 files changed, 434 insertions(+)
>> >   create mode 100644 gcc/ordered-hash-map-tests.cc
>> >   create mode 100644 gcc/ordered-hash-map.h
>> >
>
>[...]
>
>> The container defines a copy-constructor but no copy assignment.
>> Is it safely assignable? (I don't think auto_vec is safely copyable
>> or assignable due to PR 90904.  It looks like the copy ctor works
>> around it below.)
>
>It's not safely assignable; I don't believe I'm using that (I am using
>the copy ctor).
>
>I can make it private or similar to ensure it's not used.

Yes, if the compiler-generated assignment isn't safe, please declare
(but don't define) a private assignment operator, to suppress the
compiler-generated one.

>> I don't think I've made use of the hash_map copy ctor or copy
>> assignment but if it's anything like other GCC containers I'd
>> worry about it not doing the right thing, especially for non-
>> PODs.  I spent too much time chasing down miscompilations and
>> other problems due to bugs (or design limitations) in these
>> classes.
>>
>> I'd far prefer to see us use libstdc++ containers in new code
>> than introduce new ones of our own.  They are better designed
>> and much better tested than these.  (I realize we're still
>> hampered by targeting C++ 98.)
>>
>> Martin
>
>[CCing Jonathan for libstdc++ expertise]
>
>Is there such a container in libstdc++?


>This patch implements a map that preserves insertion order when
>iterating, without requiring the Key type to be comparable.
>
>As far as I can tell, std::map instead is a map that requires the Key
>type to be comparable,

Right.

>and uses that to implement a red-black tree
>(rather than via hashing),

Right.

>and doesn't preserve insertion ordering.

std::map has unique keys anyway, so insertion order is irrelevant.
Iteration is done by key order.

std::multimap is the non-unique version, and preserves insertion order
for equivalent keys, but only since C++11. Before that it was
unspecified whether or not a duplicate key was inserted after existing
elements with equivalent keys.

>I use this ordered_hash_map class later in various places in the
>analyzer patch kit to ensure more deterministic results, so that
>results aren't affected by hash values of possibly-changing pointer
>values.

Nothing in the standard library provides that functionality out of the
box. The C++11 hash containers don't preserve order at all, even
equivalent elements can be reordered when insertion triggers
rehashing.

You could build something using a sequence container (vector, deque or
list) that preserves a stable order according to insertion order, and
then manually maintain a separate index of hash values used for
lookup. That might not make things any simpler (I haven't looked at
your actual patch).
diff mbox series

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index c87b00f..5feef6a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1443,6 +1443,7 @@  OBJS = \
 	optinfo-emit-json.o \
 	options-save.o \
 	opts-global.o \
+	ordered-hash-map-tests.o \
 	passes.o \
 	plugin.o \
 	postreload-gcse.o \
diff --git a/gcc/ordered-hash-map-tests.cc b/gcc/ordered-hash-map-tests.cc
new file mode 100644
index 0000000..266f60c
--- /dev/null
+++ b/gcc/ordered-hash-map-tests.cc
@@ -0,0 +1,247 @@ 
+/* Unit tests for ordered-hash-map.h.
+   Copyright (C) 2015-2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "opts.h"
+#include "hash-set.h"
+#include "fixed-value.h"
+#include "alias.h"
+#include "flags.h"
+#include "symtab.h"
+#include "tree-core.h"
+#include "stor-layout.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "ordered-hash-map.h"
+#include "selftest.h"
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Populate *OUT_KVS with the key/value pairs of M.  */
+
+template <typename HashMap, typename Key, typename Value>
+static void
+get_kv_pairs (const HashMap &m,
+	      auto_vec<std::pair<Key, Value> > *out_kvs)
+{
+  for (typename HashMap::iterator iter = m.begin ();
+       iter != m.end ();
+       ++iter)
+    out_kvs->safe_push (std::make_pair ((*iter).first, (*iter).second));
+}
+
+/* Construct an ordered_hash_map <const char *, int> and verify that
+   various operations work correctly.  */
+
+static void
+test_map_of_strings_to_int ()
+{
+  ordered_hash_map <const char *, int> m;
+
+  const char *ostrich = "ostrich";
+  const char *elephant = "elephant";
+  const char *ant = "ant";
+  const char *spider = "spider";
+  const char *millipede = "Illacme plenipes";
+  const char *eric = "half a bee";
+
+  /* A fresh hash_map should be empty.  */
+  ASSERT_EQ (0, m.elements ());
+  ASSERT_EQ (NULL, m.get (ostrich));
+
+  /* Populate the hash_map.  */
+  ASSERT_EQ (false, m.put (ostrich, 2));
+  ASSERT_EQ (false, m.put (elephant, 4));
+  ASSERT_EQ (false, m.put (ant, 6));
+  ASSERT_EQ (false, m.put (spider, 8));
+  ASSERT_EQ (false, m.put (millipede, 750));
+  ASSERT_EQ (false, m.put (eric, 3));
+
+  /* Verify that we can recover the stored values.  */
+  ASSERT_EQ (6, m.elements ());
+  ASSERT_EQ (2, *m.get (ostrich));
+  ASSERT_EQ (4, *m.get (elephant));
+  ASSERT_EQ (6, *m.get (ant));
+  ASSERT_EQ (8, *m.get (spider));
+  ASSERT_EQ (750, *m.get (millipede));
+  ASSERT_EQ (3, *m.get (eric));
+
+  /* Verify that the order of insertion is preserved.  */
+  auto_vec<std::pair<const char *, int> > kvs;
+  get_kv_pairs (m, &kvs);
+  ASSERT_EQ (kvs.length (), 6);
+  ASSERT_EQ (kvs[0].first, ostrich);
+  ASSERT_EQ (kvs[0].second, 2);
+  ASSERT_EQ (kvs[1].first, elephant);
+  ASSERT_EQ (kvs[1].second, 4);
+  ASSERT_EQ (kvs[2].first, ant);
+  ASSERT_EQ (kvs[2].second, 6);
+  ASSERT_EQ (kvs[3].first, spider);
+  ASSERT_EQ (kvs[3].second, 8);
+  ASSERT_EQ (kvs[4].first, millipede);
+  ASSERT_EQ (kvs[4].second, 750);
+  ASSERT_EQ (kvs[5].first, eric);
+  ASSERT_EQ (kvs[5].second, 3);
+}
+
+/* Construct an ordered_hash_map using int_hash and verify that various
+   operations work correctly.  */
+
+static void
+test_map_of_int_to_strings ()
+{
+  const int EMPTY = -1;
+  const int DELETED = -2;
+  typedef int_hash <int, EMPTY, DELETED> int_hash_t;
+  ordered_hash_map <int_hash_t, const char *> m;
+
+  const char *ostrich = "ostrich";
+  const char *elephant = "elephant";
+  const char *ant = "ant";
+  const char *spider = "spider";
+  const char *millipede = "Illacme plenipes";
+  const char *eric = "half a bee";
+
+  /* A fresh hash_map should be empty.  */
+  ASSERT_EQ (0, m.elements ());
+  ASSERT_EQ (NULL, m.get (2));
+
+  /* Populate the hash_map.  */
+  ASSERT_EQ (false, m.put (2, ostrich));
+  ASSERT_EQ (false, m.put (4, elephant));
+  ASSERT_EQ (false, m.put (6, ant));
+  ASSERT_EQ (false, m.put (8, spider));
+  ASSERT_EQ (false, m.put (750, millipede));
+  ASSERT_EQ (false, m.put (3, eric));
+
+  /* Verify that we can recover the stored values.  */
+  ASSERT_EQ (6, m.elements ());
+  ASSERT_EQ (*m.get (2), ostrich);
+  ASSERT_EQ (*m.get (4), elephant);
+  ASSERT_EQ (*m.get (6), ant);
+  ASSERT_EQ (*m.get (8), spider);
+  ASSERT_EQ (*m.get (750), millipede);
+  ASSERT_EQ (*m.get (3), eric);
+
+  /* Verify that the order of insertion is preserved.  */
+  auto_vec<std::pair<int, const char *> > kvs;
+  get_kv_pairs (m, &kvs);
+  ASSERT_EQ (kvs.length (), 6);
+  ASSERT_EQ (kvs[0].first, 2);
+  ASSERT_EQ (kvs[0].second, ostrich);
+  ASSERT_EQ (kvs[1].first, 4);
+  ASSERT_EQ (kvs[1].second, elephant);
+  ASSERT_EQ (kvs[2].first, 6);
+  ASSERT_EQ (kvs[2].second, ant);
+  ASSERT_EQ (kvs[3].first, 8);
+  ASSERT_EQ (kvs[3].second, spider);
+  ASSERT_EQ (kvs[4].first, 750);
+  ASSERT_EQ (kvs[4].second, millipede);
+  ASSERT_EQ (kvs[5].first, 3);
+  ASSERT_EQ (kvs[5].second, eric);
+}
+
+/* Verify that we can remove items from an ordered_hash_map.  */
+
+static void
+test_removal ()
+{
+  ordered_hash_map <const char *, int> m;
+
+  const char *ostrich = "ostrich";
+  ASSERT_EQ (false, m.put (ostrich, 2));
+
+  ASSERT_EQ (1, m.elements ());
+  ASSERT_EQ (2, *m.get (ostrich));
+
+  {
+    auto_vec<std::pair<const char *, int> > kvs;
+    get_kv_pairs (m, &kvs);
+    ASSERT_EQ (kvs.length (), 1);
+    ASSERT_EQ (kvs[0].first, ostrich);
+    ASSERT_EQ (kvs[0].second, 2);
+  }
+
+  m.remove (ostrich);
+
+  ASSERT_EQ (0, m.elements ());
+  {
+    auto_vec<std::pair<const char *, int> > kvs;
+    get_kv_pairs (m, &kvs);
+    ASSERT_EQ (kvs.length (), 0);
+  }
+
+  /* Reinsertion (with a different value).  */
+  ASSERT_EQ (false, m.put (ostrich, 42));
+  ASSERT_EQ (1, m.elements ());
+  ASSERT_EQ (42, *m.get (ostrich));
+  {
+    auto_vec<std::pair<const char *, int> > kvs;
+    get_kv_pairs (m, &kvs);
+    ASSERT_EQ (kvs.length (), 1);
+    ASSERT_EQ (kvs[0].first, ostrich);
+    ASSERT_EQ (kvs[0].second, 42);
+  }
+}
+
+/* Verify that ordered_hash_map's copy-ctor works.  */
+
+static void
+test_copy_ctor ()
+{
+  ordered_hash_map <const char *, int> m;
+
+  const char *ostrich = "ostrich";
+  ASSERT_EQ (false, m.put (ostrich, 2));
+
+  ASSERT_EQ (1, m.elements ());
+  ASSERT_EQ (2, *m.get (ostrich));
+
+  ordered_hash_map <const char *, int> copy (m);
+  ASSERT_EQ (1, copy.elements ());
+  ASSERT_EQ (2, *copy.get (ostrich));
+
+  /* Remove from source.  */
+  m.remove (ostrich);
+  ASSERT_EQ (0, m.elements ());
+
+  /* Copy should be unaffected.  */
+  ASSERT_EQ (1, copy.elements ());
+  ASSERT_EQ (2, *copy.get (ostrich));
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+ordered_hash_map_tests_cc_tests ()
+{
+  test_map_of_strings_to_int ();
+  test_map_of_int_to_strings ();
+  test_removal ();
+  test_copy_ctor ();
+}
+
+} // namespace selftest
+
+#endif /* CHECKING_P */
diff --git a/gcc/ordered-hash-map.h b/gcc/ordered-hash-map.h
new file mode 100644
index 0000000..4e35cad
--- /dev/null
+++ b/gcc/ordered-hash-map.h
@@ -0,0 +1,184 @@ 
+/* A type-safe hash map that retains the insertion order of keys.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef GCC_ORDERED_HASH_MAP_H
+#define GCC_ORDERED_HASH_MAP_H
+
+/* Notes:
+   - The keys must be PODs, since vec<> uses assignment to populate slots
+     without properly initializing them.
+   - doesn't have GTY support.
+   - supports removal, but retains order of original insertion.
+     (Removal might be better handled by using a doubly-linked list
+     of nodes, holding the values).  */
+
+template<typename KeyId, typename Value,
+	 typename Traits>
+class ordered_hash_map
+{
+  typedef typename Traits::key_type Key;
+
+public:
+  ordered_hash_map () {}
+
+  ordered_hash_map (const ordered_hash_map &other)
+  : m_map (other.m_map),
+    m_keys (other.m_keys.length ()),
+    m_key_index (other.m_key_index)
+  {
+     unsigned i;
+     Key key;
+     FOR_EACH_VEC_ELT (other.m_keys, i, key)
+       m_keys.quick_push (key);
+  }
+
+  /* If key K isn't already in the map add key K with value V to the map, and
+     return false.  Otherwise set the value of the entry for key K to be V and
+     return true.  */
+
+  bool put (const Key &k, const Value &v)
+  {
+    bool existed = m_map.put (k, v);
+    if (!existed)
+      {
+        bool key_present;
+        int &slot = m_key_index.get_or_insert (k, &key_present);
+        if (!key_present)
+          {
+             slot = m_keys.length ();
+             m_keys.safe_push (k);
+          }
+      }
+    return existed;
+  }
+
+  /* If the passed in key is in the map return its value otherwise NULL.  */
+
+  Value *get (const Key &k)
+  {
+    return m_map.get (k);
+  }
+
+  /* Removing a key removes it from the map, but retains the insertion
+     order.  */
+
+  void remove (const Key &k)
+  {
+     m_map.remove (k);
+  }
+
+  size_t elements () const { return m_map.elements (); }
+
+  class iterator
+  {
+  public:
+    explicit iterator (const ordered_hash_map &map, unsigned idx) :
+      m_ordered_hash_map (map), m_idx (idx) {}
+
+    iterator &operator++ ()
+    {
+       /* Increment m_idx until we find a non-deleted element, or go beyond
+	  the end.  */
+       while (1)
+	 {
+	   ++m_idx;
+	   if (valid_index_p ())
+	     break;
+	}
+      return *this;
+    }
+
+    /* Can't use std::pair here, because GCC before 4.3 don't handle
+       std::pair where template parameters are references well.
+       See PR86739.  */
+    struct reference_pair {
+      const Key &first;
+      Value &second;
+
+      reference_pair (const Key &key, Value &value)
+      : first (key), second (value) {}
+
+      template <typename K, typename V>
+      operator std::pair<K, V> () const { return std::pair<K, V> (first, second); }
+    };
+
+    reference_pair operator* ()
+    {
+      const Key &k = m_ordered_hash_map.m_keys[m_idx];
+      Value *slot
+        = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);
+      gcc_assert (slot);
+      return reference_pair (k, *slot);
+    }
+
+    bool
+    operator != (const iterator &other) const
+    {
+      return m_idx != other.m_idx;
+    }
+
+    /* Treat one-beyond-the-end as valid, for handling the "end" case.  */
+
+    bool valid_index_p () const
+    {
+      if (m_idx > m_ordered_hash_map.m_keys.length ())
+	return false;
+      if (m_idx == m_ordered_hash_map.m_keys.length ())
+	return true;
+      const Key &k = m_ordered_hash_map.m_keys[m_idx];
+      Value *slot
+	= const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k);
+      return slot != NULL;
+    }
+
+    const ordered_hash_map &m_ordered_hash_map;
+    unsigned m_idx;
+  };
+
+  /* Standard iterator retrieval methods.  */
+
+  iterator begin () const
+  {
+    iterator i = iterator (*this, 0);
+    while (!i.valid_index_p () && i != end ())
+      ++i;
+    return i;
+  }
+  iterator end () const { return iterator (*this, m_keys.length ()); }
+
+private:
+  /* The underlying map.  */
+  hash_map<KeyId, Value, Traits> m_map;
+
+  /* The ordering of the keys.  */
+  auto_vec<Key> m_keys;
+
+  /* For each key that's ever been in the map, its index within m_keys.  */
+  hash_map<KeyId, int> m_key_index;
+};
+
+/* Two-argument form.  */
+
+template<typename Key, typename Value,
+	 typename Traits = simple_hashmap_traits<default_hash_traits<Key>,
+						 Value> >
+class ordered_hash_map;
+
+#endif /* GCC_ORDERED_HASH_MAP_H */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index 92c8de7..f41244b 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -76,6 +76,7 @@  selftest::run_tests ()
   cgraph_c_tests ();
   optinfo_emit_json_cc_tests ();
   opt_problem_cc_tests ();
+  ordered_hash_map_tests_cc_tests ();
 
   /* Mid-level data structures.  */
   input_c_tests ();
diff --git a/gcc/selftest.h b/gcc/selftest.h
index c67b688..933d1cd 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -242,6 +242,7 @@  extern void input_c_tests ();
 extern void json_cc_tests ();
 extern void opt_problem_cc_tests ();
 extern void optinfo_emit_json_cc_tests ();
+extern void ordered_hash_map_tests_cc_tests ();
 extern void predict_c_tests ();
 extern void pretty_print_c_tests ();
 extern void read_rtl_function_c_tests ();