Message ID | 1573867416-55618-16-git-send-email-dmalcolm@redhat.com |
---|---|
State | New |
Headers | show |
Series | RFC: Add a static analysis framework to GCC | expand |
On 11/15/19 6:23 PM, David Malcolm wrote: > This patch adds an ordered_hash_map template, which is similar to > hash_map, but preserves insertion order. > > gcc/ChangeLog: > * Makefile.in (OBJS): Add ordered-hash-map-tests.o. > * ordered-hash-map-tests.cc: New file. > * ordered-hash-map.h: New file. > * selftest-run-tests.c (selftest::run_tests): Call > selftest::ordered_hash_map_tests_cc_tests. > * selftest.h (selftest::ordered_hash_map_tests_cc_tests): New > decl. > --- > gcc/Makefile.in | 1 + > gcc/ordered-hash-map-tests.cc | 247 ++++++++++++++++++++++++++++++++++++++++++ > gcc/ordered-hash-map.h | 184 +++++++++++++++++++++++++++++++ > gcc/selftest-run-tests.c | 1 + > gcc/selftest.h | 1 + > 5 files changed, 434 insertions(+) > create mode 100644 gcc/ordered-hash-map-tests.cc > create mode 100644 gcc/ordered-hash-map.h > > diff --git a/gcc/Makefile.in b/gcc/Makefile.in > index c87b00f..5feef6a 100644 > --- a/gcc/Makefile.in > +++ b/gcc/Makefile.in > @@ -1443,6 +1443,7 @@ OBJS = \ > optinfo-emit-json.o \ > options-save.o \ > opts-global.o \ > + ordered-hash-map-tests.o \ > passes.o \ > plugin.o \ > postreload-gcse.o \ > diff --git a/gcc/ordered-hash-map-tests.cc b/gcc/ordered-hash-map-tests.cc > new file mode 100644 > index 0000000..266f60c > --- /dev/null > +++ b/gcc/ordered-hash-map-tests.cc > @@ -0,0 +1,247 @@ > +/* Unit tests for ordered-hash-map.h. > + Copyright (C) 2015-2019 Free Software Foundation, Inc. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "tm.h" > +#include "opts.h" > +#include "hash-set.h" > +#include "fixed-value.h" > +#include "alias.h" > +#include "flags.h" > +#include "symtab.h" > +#include "tree-core.h" > +#include "stor-layout.h" > +#include "tree.h" > +#include "stringpool.h" > +#include "ordered-hash-map.h" > +#include "selftest.h" > + > +#if CHECKING_P > + > +namespace selftest { > + > +/* Populate *OUT_KVS with the key/value pairs of M. */ > + > +template <typename HashMap, typename Key, typename Value> > +static void > +get_kv_pairs (const HashMap &m, > + auto_vec<std::pair<Key, Value> > *out_kvs) > +{ > + for (typename HashMap::iterator iter = m.begin (); > + iter != m.end (); > + ++iter) > + out_kvs->safe_push (std::make_pair ((*iter).first, (*iter).second)); > +} > + > +/* Construct an ordered_hash_map <const char *, int> and verify that > + various operations work correctly. */ > + > +static void > +test_map_of_strings_to_int () > +{ > + ordered_hash_map <const char *, int> m; > + > + const char *ostrich = "ostrich"; > + const char *elephant = "elephant"; > + const char *ant = "ant"; > + const char *spider = "spider"; > + const char *millipede = "Illacme plenipes"; > + const char *eric = "half a bee"; > + > + /* A fresh hash_map should be empty. */ > + ASSERT_EQ (0, m.elements ()); > + ASSERT_EQ (NULL, m.get (ostrich)); > + > + /* Populate the hash_map. */ > + ASSERT_EQ (false, m.put (ostrich, 2)); > + ASSERT_EQ (false, m.put (elephant, 4)); > + ASSERT_EQ (false, m.put (ant, 6)); > + ASSERT_EQ (false, m.put (spider, 8)); > + ASSERT_EQ (false, m.put (millipede, 750)); > + ASSERT_EQ (false, m.put (eric, 3)); > + > + /* Verify that we can recover the stored values. */ > + ASSERT_EQ (6, m.elements ()); > + ASSERT_EQ (2, *m.get (ostrich)); > + ASSERT_EQ (4, *m.get (elephant)); > + ASSERT_EQ (6, *m.get (ant)); > + ASSERT_EQ (8, *m.get (spider)); > + ASSERT_EQ (750, *m.get (millipede)); > + ASSERT_EQ (3, *m.get (eric)); > + > + /* Verify that the order of insertion is preserved. */ > + auto_vec<std::pair<const char *, int> > kvs; > + get_kv_pairs (m, &kvs); > + ASSERT_EQ (kvs.length (), 6); > + ASSERT_EQ (kvs[0].first, ostrich); > + ASSERT_EQ (kvs[0].second, 2); > + ASSERT_EQ (kvs[1].first, elephant); > + ASSERT_EQ (kvs[1].second, 4); > + ASSERT_EQ (kvs[2].first, ant); > + ASSERT_EQ (kvs[2].second, 6); > + ASSERT_EQ (kvs[3].first, spider); > + ASSERT_EQ (kvs[3].second, 8); > + ASSERT_EQ (kvs[4].first, millipede); > + ASSERT_EQ (kvs[4].second, 750); > + ASSERT_EQ (kvs[5].first, eric); > + ASSERT_EQ (kvs[5].second, 3); > +} > + > +/* Construct an ordered_hash_map using int_hash and verify that various > + operations work correctly. */ > + > +static void > +test_map_of_int_to_strings () > +{ > + const int EMPTY = -1; > + const int DELETED = -2; > + typedef int_hash <int, EMPTY, DELETED> int_hash_t; > + ordered_hash_map <int_hash_t, const char *> m; > + > + const char *ostrich = "ostrich"; > + const char *elephant = "elephant"; > + const char *ant = "ant"; > + const char *spider = "spider"; > + const char *millipede = "Illacme plenipes"; > + const char *eric = "half a bee"; > + > + /* A fresh hash_map should be empty. */ > + ASSERT_EQ (0, m.elements ()); > + ASSERT_EQ (NULL, m.get (2)); > + > + /* Populate the hash_map. */ > + ASSERT_EQ (false, m.put (2, ostrich)); > + ASSERT_EQ (false, m.put (4, elephant)); > + ASSERT_EQ (false, m.put (6, ant)); > + ASSERT_EQ (false, m.put (8, spider)); > + ASSERT_EQ (false, m.put (750, millipede)); > + ASSERT_EQ (false, m.put (3, eric)); > + > + /* Verify that we can recover the stored values. */ > + ASSERT_EQ (6, m.elements ()); > + ASSERT_EQ (*m.get (2), ostrich); > + ASSERT_EQ (*m.get (4), elephant); > + ASSERT_EQ (*m.get (6), ant); > + ASSERT_EQ (*m.get (8), spider); > + ASSERT_EQ (*m.get (750), millipede); > + ASSERT_EQ (*m.get (3), eric); > + > + /* Verify that the order of insertion is preserved. */ > + auto_vec<std::pair<int, const char *> > kvs; > + get_kv_pairs (m, &kvs); > + ASSERT_EQ (kvs.length (), 6); > + ASSERT_EQ (kvs[0].first, 2); > + ASSERT_EQ (kvs[0].second, ostrich); > + ASSERT_EQ (kvs[1].first, 4); > + ASSERT_EQ (kvs[1].second, elephant); > + ASSERT_EQ (kvs[2].first, 6); > + ASSERT_EQ (kvs[2].second, ant); > + ASSERT_EQ (kvs[3].first, 8); > + ASSERT_EQ (kvs[3].second, spider); > + ASSERT_EQ (kvs[4].first, 750); > + ASSERT_EQ (kvs[4].second, millipede); > + ASSERT_EQ (kvs[5].first, 3); > + ASSERT_EQ (kvs[5].second, eric); > +} > + > +/* Verify that we can remove items from an ordered_hash_map. */ > + > +static void > +test_removal () > +{ > + ordered_hash_map <const char *, int> m; > + > + const char *ostrich = "ostrich"; > + ASSERT_EQ (false, m.put (ostrich, 2)); > + > + ASSERT_EQ (1, m.elements ()); > + ASSERT_EQ (2, *m.get (ostrich)); > + > + { > + auto_vec<std::pair<const char *, int> > kvs; > + get_kv_pairs (m, &kvs); > + ASSERT_EQ (kvs.length (), 1); > + ASSERT_EQ (kvs[0].first, ostrich); > + ASSERT_EQ (kvs[0].second, 2); > + } > + > + m.remove (ostrich); > + > + ASSERT_EQ (0, m.elements ()); > + { > + auto_vec<std::pair<const char *, int> > kvs; > + get_kv_pairs (m, &kvs); > + ASSERT_EQ (kvs.length (), 0); > + } > + > + /* Reinsertion (with a different value). */ > + ASSERT_EQ (false, m.put (ostrich, 42)); > + ASSERT_EQ (1, m.elements ()); > + ASSERT_EQ (42, *m.get (ostrich)); > + { > + auto_vec<std::pair<const char *, int> > kvs; > + get_kv_pairs (m, &kvs); > + ASSERT_EQ (kvs.length (), 1); > + ASSERT_EQ (kvs[0].first, ostrich); > + ASSERT_EQ (kvs[0].second, 42); > + } > +} > + > +/* Verify that ordered_hash_map's copy-ctor works. */ > + > +static void > +test_copy_ctor () > +{ > + ordered_hash_map <const char *, int> m; > + > + const char *ostrich = "ostrich"; > + ASSERT_EQ (false, m.put (ostrich, 2)); > + > + ASSERT_EQ (1, m.elements ()); > + ASSERT_EQ (2, *m.get (ostrich)); > + > + ordered_hash_map <const char *, int> copy (m); > + ASSERT_EQ (1, copy.elements ()); > + ASSERT_EQ (2, *copy.get (ostrich)); > + > + /* Remove from source. */ > + m.remove (ostrich); > + ASSERT_EQ (0, m.elements ()); > + > + /* Copy should be unaffected. */ > + ASSERT_EQ (1, copy.elements ()); > + ASSERT_EQ (2, *copy.get (ostrich)); > +} > + > +/* Run all of the selftests within this file. */ > + > +void > +ordered_hash_map_tests_cc_tests () > +{ > + test_map_of_strings_to_int (); > + test_map_of_int_to_strings (); > + test_removal (); > + test_copy_ctor (); > +} > + > +} // namespace selftest > + > +#endif /* CHECKING_P */ > diff --git a/gcc/ordered-hash-map.h b/gcc/ordered-hash-map.h > new file mode 100644 > index 0000000..4e35cad > --- /dev/null > +++ b/gcc/ordered-hash-map.h > @@ -0,0 +1,184 @@ > +/* A type-safe hash map that retains the insertion order of keys. > + Copyright (C) 2019 Free Software Foundation, Inc. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > + > +#ifndef GCC_ORDERED_HASH_MAP_H > +#define GCC_ORDERED_HASH_MAP_H > + > +/* Notes: > + - The keys must be PODs, since vec<> uses assignment to populate slots > + without properly initializing them. > + - doesn't have GTY support. > + - supports removal, but retains order of original insertion. > + (Removal might be better handled by using a doubly-linked list > + of nodes, holding the values). */ > + > +template<typename KeyId, typename Value, > + typename Traits> > +class ordered_hash_map > +{ > + typedef typename Traits::key_type Key; > + > +public: > + ordered_hash_map () {} > + > + ordered_hash_map (const ordered_hash_map &other) The container defines a copy-constructor but no copy assignment. Is it safely assignable? (I don't think auto_vec is safely copyable or assignable due to PR 90904. It looks like the copy ctor works around it below.) I don't think I've made use of the hash_map copy ctor or copy assignment but if it's anything like other GCC containers I'd worry about it not doing the right thing, especially for non- PODs. I spent too much time chasing down miscompilations and other problems due to bugs (or design limitations) in these classes. I'd far prefer to see us use libstdc++ containers in new code than introduce new ones of our own. They are better designed and much better tested than these. (I realize we're still hampered by targeting C++ 98.) Martin > + : m_map (other.m_map), > + m_keys (other.m_keys.length ()), > + m_key_index (other.m_key_index) > + { > + unsigned i; > + Key key; > + FOR_EACH_VEC_ELT (other.m_keys, i, key) > + m_keys.quick_push (key); > + } > + > + /* If key K isn't already in the map add key K with value V to the map, and > + return false. Otherwise set the value of the entry for key K to be V and > + return true. */ > + > + bool put (const Key &k, const Value &v) > + { > + bool existed = m_map.put (k, v); > + if (!existed) > + { > + bool key_present; > + int &slot = m_key_index.get_or_insert (k, &key_present); > + if (!key_present) > + { > + slot = m_keys.length (); > + m_keys.safe_push (k); > + } > + } > + return existed; > + } > + > + /* If the passed in key is in the map return its value otherwise NULL. */ > + > + Value *get (const Key &k) > + { > + return m_map.get (k); > + } > + > + /* Removing a key removes it from the map, but retains the insertion > + order. */ > + > + void remove (const Key &k) > + { > + m_map.remove (k); > + } > + > + size_t elements () const { return m_map.elements (); } > + > + class iterator > + { > + public: > + explicit iterator (const ordered_hash_map &map, unsigned idx) : > + m_ordered_hash_map (map), m_idx (idx) {} > + > + iterator &operator++ () > + { > + /* Increment m_idx until we find a non-deleted element, or go beyond > + the end. */ > + while (1) > + { > + ++m_idx; > + if (valid_index_p ()) > + break; > + } > + return *this; > + } > + > + /* Can't use std::pair here, because GCC before 4.3 don't handle > + std::pair where template parameters are references well. > + See PR86739. */ > + struct reference_pair { > + const Key &first; > + Value &second; > + > + reference_pair (const Key &key, Value &value) > + : first (key), second (value) {} > + > + template <typename K, typename V> > + operator std::pair<K, V> () const { return std::pair<K, V> (first, second); } > + }; > + > + reference_pair operator* () > + { > + const Key &k = m_ordered_hash_map.m_keys[m_idx]; > + Value *slot > + = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k); > + gcc_assert (slot); > + return reference_pair (k, *slot); > + } > + > + bool > + operator != (const iterator &other) const > + { > + return m_idx != other.m_idx; > + } > + > + /* Treat one-beyond-the-end as valid, for handling the "end" case. */ > + > + bool valid_index_p () const > + { > + if (m_idx > m_ordered_hash_map.m_keys.length ()) > + return false; > + if (m_idx == m_ordered_hash_map.m_keys.length ()) > + return true; > + const Key &k = m_ordered_hash_map.m_keys[m_idx]; > + Value *slot > + = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k); > + return slot != NULL; > + } > + > + const ordered_hash_map &m_ordered_hash_map; > + unsigned m_idx; > + }; > + > + /* Standard iterator retrieval methods. */ > + > + iterator begin () const > + { > + iterator i = iterator (*this, 0); > + while (!i.valid_index_p () && i != end ()) > + ++i; > + return i; > + } > + iterator end () const { return iterator (*this, m_keys.length ()); } > + > +private: > + /* The underlying map. */ > + hash_map<KeyId, Value, Traits> m_map; > + > + /* The ordering of the keys. */ > + auto_vec<Key> m_keys; > + > + /* For each key that's ever been in the map, its index within m_keys. */ > + hash_map<KeyId, int> m_key_index; > +}; > + > +/* Two-argument form. */ > + > +template<typename Key, typename Value, > + typename Traits = simple_hashmap_traits<default_hash_traits<Key>, > + Value> > > +class ordered_hash_map; > + > +#endif /* GCC_ORDERED_HASH_MAP_H */ > diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c > index 92c8de7..f41244b 100644 > --- a/gcc/selftest-run-tests.c > +++ b/gcc/selftest-run-tests.c > @@ -76,6 +76,7 @@ selftest::run_tests () > cgraph_c_tests (); > optinfo_emit_json_cc_tests (); > opt_problem_cc_tests (); > + ordered_hash_map_tests_cc_tests (); > > /* Mid-level data structures. */ > input_c_tests (); > diff --git a/gcc/selftest.h b/gcc/selftest.h > index c67b688..933d1cd 100644 > --- a/gcc/selftest.h > +++ b/gcc/selftest.h > @@ -242,6 +242,7 @@ extern void input_c_tests (); > extern void json_cc_tests (); > extern void opt_problem_cc_tests (); > extern void optinfo_emit_json_cc_tests (); > +extern void ordered_hash_map_tests_cc_tests (); > extern void predict_c_tests (); > extern void pretty_print_c_tests (); > extern void read_rtl_function_c_tests (); >
On Wed, 2019-12-04 at 10:59 -0700, Martin Sebor wrote: > On 11/15/19 6:23 PM, David Malcolm wrote: > > This patch adds an ordered_hash_map template, which is similar to > > hash_map, but preserves insertion order. > > > > gcc/ChangeLog: > > * Makefile.in (OBJS): Add ordered-hash-map-tests.o. > > * ordered-hash-map-tests.cc: New file. > > * ordered-hash-map.h: New file. > > * selftest-run-tests.c (selftest::run_tests): Call > > selftest::ordered_hash_map_tests_cc_tests. > > * selftest.h (selftest::ordered_hash_map_tests_cc_tests): New > > decl. > > --- > > gcc/Makefile.in | 1 + > > gcc/ordered-hash-map-tests.cc | 247 > > ++++++++++++++++++++++++++++++++++++++++++ > > gcc/ordered-hash-map.h | 184 > > +++++++++++++++++++++++++++++++ > > gcc/selftest-run-tests.c | 1 + > > gcc/selftest.h | 1 + > > 5 files changed, 434 insertions(+) > > create mode 100644 gcc/ordered-hash-map-tests.cc > > create mode 100644 gcc/ordered-hash-map.h > > [...] > The container defines a copy-constructor but no copy assignment. > Is it safely assignable? (I don't think auto_vec is safely copyable > or assignable due to PR 90904. It looks like the copy ctor works > around it below.) It's not safely assignable; I don't believe I'm using that (I am using the copy ctor). I can make it private or similar to ensure it's not used. > I don't think I've made use of the hash_map copy ctor or copy > assignment but if it's anything like other GCC containers I'd > worry about it not doing the right thing, especially for non- > PODs. I spent too much time chasing down miscompilations and > other problems due to bugs (or design limitations) in these > classes. > > I'd far prefer to see us use libstdc++ containers in new code > than introduce new ones of our own. They are better designed > and much better tested than these. (I realize we're still > hampered by targeting C++ 98.) > > Martin [CCing Jonathan for libstdc++ expertise] Is there such a container in libstdc++? This patch implements a map that preserves insertion order when iterating, without requiring the Key type to be comparable. As far as I can tell, std::map instead is a map that requires the Key type to be comparable, and uses that to implement a red-black tree (rather than via hashing), and doesn't preserve insertion ordering. I use this ordered_hash_map class later in various places in the analyzer patch kit to ensure more deterministic results, so that results aren't affected by hash values of possibly-changing pointer values. [...] Dave
On 08/01/20 18:47 -0500, David Malcolm wrote: >On Wed, 2019-12-04 at 10:59 -0700, Martin Sebor wrote: >> On 11/15/19 6:23 PM, David Malcolm wrote: >> > This patch adds an ordered_hash_map template, which is similar to >> > hash_map, but preserves insertion order. >> > >> > gcc/ChangeLog: >> > * Makefile.in (OBJS): Add ordered-hash-map-tests.o. >> > * ordered-hash-map-tests.cc: New file. >> > * ordered-hash-map.h: New file. >> > * selftest-run-tests.c (selftest::run_tests): Call >> > selftest::ordered_hash_map_tests_cc_tests. >> > * selftest.h (selftest::ordered_hash_map_tests_cc_tests): New >> > decl. >> > --- >> > gcc/Makefile.in | 1 + >> > gcc/ordered-hash-map-tests.cc | 247 >> > ++++++++++++++++++++++++++++++++++++++++++ >> > gcc/ordered-hash-map.h | 184 >> > +++++++++++++++++++++++++++++++ >> > gcc/selftest-run-tests.c | 1 + >> > gcc/selftest.h | 1 + >> > 5 files changed, 434 insertions(+) >> > create mode 100644 gcc/ordered-hash-map-tests.cc >> > create mode 100644 gcc/ordered-hash-map.h >> > > >[...] > >> The container defines a copy-constructor but no copy assignment. >> Is it safely assignable? (I don't think auto_vec is safely copyable >> or assignable due to PR 90904. It looks like the copy ctor works >> around it below.) > >It's not safely assignable; I don't believe I'm using that (I am using >the copy ctor). > >I can make it private or similar to ensure it's not used. Yes, if the compiler-generated assignment isn't safe, please declare (but don't define) a private assignment operator, to suppress the compiler-generated one. >> I don't think I've made use of the hash_map copy ctor or copy >> assignment but if it's anything like other GCC containers I'd >> worry about it not doing the right thing, especially for non- >> PODs. I spent too much time chasing down miscompilations and >> other problems due to bugs (or design limitations) in these >> classes. >> >> I'd far prefer to see us use libstdc++ containers in new code >> than introduce new ones of our own. They are better designed >> and much better tested than these. (I realize we're still >> hampered by targeting C++ 98.) >> >> Martin > >[CCing Jonathan for libstdc++ expertise] > >Is there such a container in libstdc++? >This patch implements a map that preserves insertion order when >iterating, without requiring the Key type to be comparable. > >As far as I can tell, std::map instead is a map that requires the Key >type to be comparable, Right. >and uses that to implement a red-black tree >(rather than via hashing), Right. >and doesn't preserve insertion ordering. std::map has unique keys anyway, so insertion order is irrelevant. Iteration is done by key order. std::multimap is the non-unique version, and preserves insertion order for equivalent keys, but only since C++11. Before that it was unspecified whether or not a duplicate key was inserted after existing elements with equivalent keys. >I use this ordered_hash_map class later in various places in the >analyzer patch kit to ensure more deterministic results, so that >results aren't affected by hash values of possibly-changing pointer >values. Nothing in the standard library provides that functionality out of the box. The C++11 hash containers don't preserve order at all, even equivalent elements can be reordered when insertion triggers rehashing. You could build something using a sequence container (vector, deque or list) that preserves a stable order according to insertion order, and then manually maintain a separate index of hash values used for lookup. That might not make things any simpler (I haven't looked at your actual patch).
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index c87b00f..5feef6a 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1443,6 +1443,7 @@ OBJS = \ optinfo-emit-json.o \ options-save.o \ opts-global.o \ + ordered-hash-map-tests.o \ passes.o \ plugin.o \ postreload-gcse.o \ diff --git a/gcc/ordered-hash-map-tests.cc b/gcc/ordered-hash-map-tests.cc new file mode 100644 index 0000000..266f60c --- /dev/null +++ b/gcc/ordered-hash-map-tests.cc @@ -0,0 +1,247 @@ +/* Unit tests for ordered-hash-map.h. + Copyright (C) 2015-2019 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "opts.h" +#include "hash-set.h" +#include "fixed-value.h" +#include "alias.h" +#include "flags.h" +#include "symtab.h" +#include "tree-core.h" +#include "stor-layout.h" +#include "tree.h" +#include "stringpool.h" +#include "ordered-hash-map.h" +#include "selftest.h" + +#if CHECKING_P + +namespace selftest { + +/* Populate *OUT_KVS with the key/value pairs of M. */ + +template <typename HashMap, typename Key, typename Value> +static void +get_kv_pairs (const HashMap &m, + auto_vec<std::pair<Key, Value> > *out_kvs) +{ + for (typename HashMap::iterator iter = m.begin (); + iter != m.end (); + ++iter) + out_kvs->safe_push (std::make_pair ((*iter).first, (*iter).second)); +} + +/* Construct an ordered_hash_map <const char *, int> and verify that + various operations work correctly. */ + +static void +test_map_of_strings_to_int () +{ + ordered_hash_map <const char *, int> m; + + const char *ostrich = "ostrich"; + const char *elephant = "elephant"; + const char *ant = "ant"; + const char *spider = "spider"; + const char *millipede = "Illacme plenipes"; + const char *eric = "half a bee"; + + /* A fresh hash_map should be empty. */ + ASSERT_EQ (0, m.elements ()); + ASSERT_EQ (NULL, m.get (ostrich)); + + /* Populate the hash_map. */ + ASSERT_EQ (false, m.put (ostrich, 2)); + ASSERT_EQ (false, m.put (elephant, 4)); + ASSERT_EQ (false, m.put (ant, 6)); + ASSERT_EQ (false, m.put (spider, 8)); + ASSERT_EQ (false, m.put (millipede, 750)); + ASSERT_EQ (false, m.put (eric, 3)); + + /* Verify that we can recover the stored values. */ + ASSERT_EQ (6, m.elements ()); + ASSERT_EQ (2, *m.get (ostrich)); + ASSERT_EQ (4, *m.get (elephant)); + ASSERT_EQ (6, *m.get (ant)); + ASSERT_EQ (8, *m.get (spider)); + ASSERT_EQ (750, *m.get (millipede)); + ASSERT_EQ (3, *m.get (eric)); + + /* Verify that the order of insertion is preserved. */ + auto_vec<std::pair<const char *, int> > kvs; + get_kv_pairs (m, &kvs); + ASSERT_EQ (kvs.length (), 6); + ASSERT_EQ (kvs[0].first, ostrich); + ASSERT_EQ (kvs[0].second, 2); + ASSERT_EQ (kvs[1].first, elephant); + ASSERT_EQ (kvs[1].second, 4); + ASSERT_EQ (kvs[2].first, ant); + ASSERT_EQ (kvs[2].second, 6); + ASSERT_EQ (kvs[3].first, spider); + ASSERT_EQ (kvs[3].second, 8); + ASSERT_EQ (kvs[4].first, millipede); + ASSERT_EQ (kvs[4].second, 750); + ASSERT_EQ (kvs[5].first, eric); + ASSERT_EQ (kvs[5].second, 3); +} + +/* Construct an ordered_hash_map using int_hash and verify that various + operations work correctly. */ + +static void +test_map_of_int_to_strings () +{ + const int EMPTY = -1; + const int DELETED = -2; + typedef int_hash <int, EMPTY, DELETED> int_hash_t; + ordered_hash_map <int_hash_t, const char *> m; + + const char *ostrich = "ostrich"; + const char *elephant = "elephant"; + const char *ant = "ant"; + const char *spider = "spider"; + const char *millipede = "Illacme plenipes"; + const char *eric = "half a bee"; + + /* A fresh hash_map should be empty. */ + ASSERT_EQ (0, m.elements ()); + ASSERT_EQ (NULL, m.get (2)); + + /* Populate the hash_map. */ + ASSERT_EQ (false, m.put (2, ostrich)); + ASSERT_EQ (false, m.put (4, elephant)); + ASSERT_EQ (false, m.put (6, ant)); + ASSERT_EQ (false, m.put (8, spider)); + ASSERT_EQ (false, m.put (750, millipede)); + ASSERT_EQ (false, m.put (3, eric)); + + /* Verify that we can recover the stored values. */ + ASSERT_EQ (6, m.elements ()); + ASSERT_EQ (*m.get (2), ostrich); + ASSERT_EQ (*m.get (4), elephant); + ASSERT_EQ (*m.get (6), ant); + ASSERT_EQ (*m.get (8), spider); + ASSERT_EQ (*m.get (750), millipede); + ASSERT_EQ (*m.get (3), eric); + + /* Verify that the order of insertion is preserved. */ + auto_vec<std::pair<int, const char *> > kvs; + get_kv_pairs (m, &kvs); + ASSERT_EQ (kvs.length (), 6); + ASSERT_EQ (kvs[0].first, 2); + ASSERT_EQ (kvs[0].second, ostrich); + ASSERT_EQ (kvs[1].first, 4); + ASSERT_EQ (kvs[1].second, elephant); + ASSERT_EQ (kvs[2].first, 6); + ASSERT_EQ (kvs[2].second, ant); + ASSERT_EQ (kvs[3].first, 8); + ASSERT_EQ (kvs[3].second, spider); + ASSERT_EQ (kvs[4].first, 750); + ASSERT_EQ (kvs[4].second, millipede); + ASSERT_EQ (kvs[5].first, 3); + ASSERT_EQ (kvs[5].second, eric); +} + +/* Verify that we can remove items from an ordered_hash_map. */ + +static void +test_removal () +{ + ordered_hash_map <const char *, int> m; + + const char *ostrich = "ostrich"; + ASSERT_EQ (false, m.put (ostrich, 2)); + + ASSERT_EQ (1, m.elements ()); + ASSERT_EQ (2, *m.get (ostrich)); + + { + auto_vec<std::pair<const char *, int> > kvs; + get_kv_pairs (m, &kvs); + ASSERT_EQ (kvs.length (), 1); + ASSERT_EQ (kvs[0].first, ostrich); + ASSERT_EQ (kvs[0].second, 2); + } + + m.remove (ostrich); + + ASSERT_EQ (0, m.elements ()); + { + auto_vec<std::pair<const char *, int> > kvs; + get_kv_pairs (m, &kvs); + ASSERT_EQ (kvs.length (), 0); + } + + /* Reinsertion (with a different value). */ + ASSERT_EQ (false, m.put (ostrich, 42)); + ASSERT_EQ (1, m.elements ()); + ASSERT_EQ (42, *m.get (ostrich)); + { + auto_vec<std::pair<const char *, int> > kvs; + get_kv_pairs (m, &kvs); + ASSERT_EQ (kvs.length (), 1); + ASSERT_EQ (kvs[0].first, ostrich); + ASSERT_EQ (kvs[0].second, 42); + } +} + +/* Verify that ordered_hash_map's copy-ctor works. */ + +static void +test_copy_ctor () +{ + ordered_hash_map <const char *, int> m; + + const char *ostrich = "ostrich"; + ASSERT_EQ (false, m.put (ostrich, 2)); + + ASSERT_EQ (1, m.elements ()); + ASSERT_EQ (2, *m.get (ostrich)); + + ordered_hash_map <const char *, int> copy (m); + ASSERT_EQ (1, copy.elements ()); + ASSERT_EQ (2, *copy.get (ostrich)); + + /* Remove from source. */ + m.remove (ostrich); + ASSERT_EQ (0, m.elements ()); + + /* Copy should be unaffected. */ + ASSERT_EQ (1, copy.elements ()); + ASSERT_EQ (2, *copy.get (ostrich)); +} + +/* Run all of the selftests within this file. */ + +void +ordered_hash_map_tests_cc_tests () +{ + test_map_of_strings_to_int (); + test_map_of_int_to_strings (); + test_removal (); + test_copy_ctor (); +} + +} // namespace selftest + +#endif /* CHECKING_P */ diff --git a/gcc/ordered-hash-map.h b/gcc/ordered-hash-map.h new file mode 100644 index 0000000..4e35cad --- /dev/null +++ b/gcc/ordered-hash-map.h @@ -0,0 +1,184 @@ +/* A type-safe hash map that retains the insertion order of keys. + Copyright (C) 2019 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + + +#ifndef GCC_ORDERED_HASH_MAP_H +#define GCC_ORDERED_HASH_MAP_H + +/* Notes: + - The keys must be PODs, since vec<> uses assignment to populate slots + without properly initializing them. + - doesn't have GTY support. + - supports removal, but retains order of original insertion. + (Removal might be better handled by using a doubly-linked list + of nodes, holding the values). */ + +template<typename KeyId, typename Value, + typename Traits> +class ordered_hash_map +{ + typedef typename Traits::key_type Key; + +public: + ordered_hash_map () {} + + ordered_hash_map (const ordered_hash_map &other) + : m_map (other.m_map), + m_keys (other.m_keys.length ()), + m_key_index (other.m_key_index) + { + unsigned i; + Key key; + FOR_EACH_VEC_ELT (other.m_keys, i, key) + m_keys.quick_push (key); + } + + /* If key K isn't already in the map add key K with value V to the map, and + return false. Otherwise set the value of the entry for key K to be V and + return true. */ + + bool put (const Key &k, const Value &v) + { + bool existed = m_map.put (k, v); + if (!existed) + { + bool key_present; + int &slot = m_key_index.get_or_insert (k, &key_present); + if (!key_present) + { + slot = m_keys.length (); + m_keys.safe_push (k); + } + } + return existed; + } + + /* If the passed in key is in the map return its value otherwise NULL. */ + + Value *get (const Key &k) + { + return m_map.get (k); + } + + /* Removing a key removes it from the map, but retains the insertion + order. */ + + void remove (const Key &k) + { + m_map.remove (k); + } + + size_t elements () const { return m_map.elements (); } + + class iterator + { + public: + explicit iterator (const ordered_hash_map &map, unsigned idx) : + m_ordered_hash_map (map), m_idx (idx) {} + + iterator &operator++ () + { + /* Increment m_idx until we find a non-deleted element, or go beyond + the end. */ + while (1) + { + ++m_idx; + if (valid_index_p ()) + break; + } + return *this; + } + + /* Can't use std::pair here, because GCC before 4.3 don't handle + std::pair where template parameters are references well. + See PR86739. */ + struct reference_pair { + const Key &first; + Value &second; + + reference_pair (const Key &key, Value &value) + : first (key), second (value) {} + + template <typename K, typename V> + operator std::pair<K, V> () const { return std::pair<K, V> (first, second); } + }; + + reference_pair operator* () + { + const Key &k = m_ordered_hash_map.m_keys[m_idx]; + Value *slot + = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k); + gcc_assert (slot); + return reference_pair (k, *slot); + } + + bool + operator != (const iterator &other) const + { + return m_idx != other.m_idx; + } + + /* Treat one-beyond-the-end as valid, for handling the "end" case. */ + + bool valid_index_p () const + { + if (m_idx > m_ordered_hash_map.m_keys.length ()) + return false; + if (m_idx == m_ordered_hash_map.m_keys.length ()) + return true; + const Key &k = m_ordered_hash_map.m_keys[m_idx]; + Value *slot + = const_cast<ordered_hash_map &> (m_ordered_hash_map).get (k); + return slot != NULL; + } + + const ordered_hash_map &m_ordered_hash_map; + unsigned m_idx; + }; + + /* Standard iterator retrieval methods. */ + + iterator begin () const + { + iterator i = iterator (*this, 0); + while (!i.valid_index_p () && i != end ()) + ++i; + return i; + } + iterator end () const { return iterator (*this, m_keys.length ()); } + +private: + /* The underlying map. */ + hash_map<KeyId, Value, Traits> m_map; + + /* The ordering of the keys. */ + auto_vec<Key> m_keys; + + /* For each key that's ever been in the map, its index within m_keys. */ + hash_map<KeyId, int> m_key_index; +}; + +/* Two-argument form. */ + +template<typename Key, typename Value, + typename Traits = simple_hashmap_traits<default_hash_traits<Key>, + Value> > +class ordered_hash_map; + +#endif /* GCC_ORDERED_HASH_MAP_H */ diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c index 92c8de7..f41244b 100644 --- a/gcc/selftest-run-tests.c +++ b/gcc/selftest-run-tests.c @@ -76,6 +76,7 @@ selftest::run_tests () cgraph_c_tests (); optinfo_emit_json_cc_tests (); opt_problem_cc_tests (); + ordered_hash_map_tests_cc_tests (); /* Mid-level data structures. */ input_c_tests (); diff --git a/gcc/selftest.h b/gcc/selftest.h index c67b688..933d1cd 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -242,6 +242,7 @@ extern void input_c_tests (); extern void json_cc_tests (); extern void opt_problem_cc_tests (); extern void optinfo_emit_json_cc_tests (); +extern void ordered_hash_map_tests_cc_tests (); extern void predict_c_tests (); extern void pretty_print_c_tests (); extern void read_rtl_function_c_tests ();