1 //===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the SparseSet class derived from the version described in 11 // Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters 12 // on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec. 1993. 13 // 14 // A sparse set holds a small number of objects identified by integer keys from 15 // a moderately sized universe. The sparse set uses more memory than other 16 // containers in order to provide faster operations. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #ifndef LLVM_ADT_SPARSESET_H 21 #define LLVM_ADT_SPARSESET_H 22 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/Support/DataTypes.h" 26 #include <limits> 27 28 namespace llvm { 29 30 /// SparseSetValTraits - Objects in a SparseSet are identified by keys that can 31 /// be uniquely converted to a small integer less than the set's universe. This 32 /// class allows the set to hold values that differ from the set's key type as 33 /// long as an index can still be derived from the value. SparseSet never 34 /// directly compares ValueT, only their indices, so it can map keys to 35 /// arbitrary values. SparseSetValTraits computes the index from the value 36 /// object. To compute the index from a key, SparseSet uses a separate 37 /// KeyFunctorT template argument. 38 /// 39 /// A simple type declaration, SparseSet<Type>, handles these cases: 40 /// - unsigned key, identity index, identity value 41 /// - unsigned key, identity index, fat value providing getSparseSetIndex() 42 /// 43 /// The type declaration SparseSet<Type, UnaryFunction> handles: 44 /// - unsigned key, remapped index, identity value (virtual registers) 45 /// - pointer key, pointer-derived index, identity value (node+ID) 46 /// - pointer key, pointer-derived index, fat value with getSparseSetIndex() 47 /// 48 /// Only other, unexpected cases require specializing SparseSetValTraits. 49 /// 50 /// For best results, ValueT should not require a destructor. 51 /// 52 template<typename ValueT> 53 struct SparseSetValTraits { getValIndexSparseSetValTraits54 static unsigned getValIndex(const ValueT &Val) { 55 return Val.getSparseSetIndex(); 56 } 57 }; 58 59 /// SparseSetValFunctor - Helper class for selecting SparseSetValTraits. The 60 /// generic implementation handles ValueT classes which either provide 61 /// getSparseSetIndex() or specialize SparseSetValTraits<>. 62 /// 63 template<typename KeyT, typename ValueT, typename KeyFunctorT> 64 struct SparseSetValFunctor { operatorSparseSetValFunctor65 unsigned operator()(const ValueT &Val) const { 66 return SparseSetValTraits<ValueT>::getValIndex(Val); 67 } 68 }; 69 70 /// SparseSetValFunctor<KeyT, KeyT> - Helper class for the common case of 71 /// identity key/value sets. 72 template<typename KeyT, typename KeyFunctorT> 73 struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> { 74 unsigned operator()(const KeyT &Key) const { 75 return KeyFunctorT()(Key); 76 } 77 }; 78 79 /// SparseSet - Fast set implmentation for objects that can be identified by 80 /// small unsigned keys. 81 /// 82 /// SparseSet allocates memory proportional to the size of the key universe, so 83 /// it is not recommended for building composite data structures. It is useful 84 /// for algorithms that require a single set with fast operations. 85 /// 86 /// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast 87 /// clear() and iteration as fast as a vector. The find(), insert(), and 88 /// erase() operations are all constant time, and typically faster than a hash 89 /// table. The iteration order doesn't depend on numerical key values, it only 90 /// depends on the order of insert() and erase() operations. When no elements 91 /// have been erased, the iteration order is the insertion order. 92 /// 93 /// Compared to BitVector, SparseSet<unsigned> uses 8x-40x more memory, but 94 /// offers constant-time clear() and size() operations as well as fast 95 /// iteration independent on the size of the universe. 96 /// 97 /// SparseSet contains a dense vector holding all the objects and a sparse 98 /// array holding indexes into the dense vector. Most of the memory is used by 99 /// the sparse array which is the size of the key universe. The SparseT 100 /// template parameter provides a space/speed tradeoff for sets holding many 101 /// elements. 102 /// 103 /// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse 104 /// array uses 4 x Universe bytes. 105 /// 106 /// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache 107 /// lines, but the sparse array is 4x smaller. N is the number of elements in 108 /// the set. 109 /// 110 /// For sets that may grow to thousands of elements, SparseT should be set to 111 /// uint16_t or uint32_t. 112 /// 113 /// @tparam ValueT The type of objects in the set. 114 /// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT. 115 /// @tparam SparseT An unsigned integer type. See above. 116 /// 117 template<typename ValueT, 118 typename KeyFunctorT = llvm::identity<unsigned>, 119 typename SparseT = uint8_t> 120 class SparseSet { 121 static_assert(std::numeric_limits<SparseT>::is_integer && 122 !std::numeric_limits<SparseT>::is_signed, 123 "SparseT must be an unsigned integer type"); 124 125 typedef typename KeyFunctorT::argument_type KeyT; 126 typedef SmallVector<ValueT, 8> DenseT; 127 typedef unsigned size_type; 128 DenseT Dense; 129 SparseT *Sparse; 130 unsigned Universe; 131 KeyFunctorT KeyIndexOf; 132 SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf; 133 134 // Disable copy construction and assignment. 135 // This data structure is not meant to be used that way. 136 SparseSet(const SparseSet&) = delete; 137 SparseSet &operator=(const SparseSet&) = delete; 138 139 public: 140 typedef ValueT value_type; 141 typedef ValueT &reference; 142 typedef const ValueT &const_reference; 143 typedef ValueT *pointer; 144 typedef const ValueT *const_pointer; 145 146 SparseSet() : Sparse(nullptr), Universe(0) {} 147 ~SparseSet() { free(Sparse); } 148 149 /// setUniverse - Set the universe size which determines the largest key the 150 /// set can hold. The universe must be sized before any elements can be 151 /// added. 152 /// 153 /// @param U Universe size. All object keys must be less than U. 154 /// 155 void setUniverse(unsigned U) { 156 // It's not hard to resize the universe on a non-empty set, but it doesn't 157 // seem like a likely use case, so we can add that code when we need it. 158 assert(empty() && "Can only resize universe on an empty map"); 159 // Hysteresis prevents needless reallocations. 160 if (U >= Universe/4 && U <= Universe) 161 return; 162 free(Sparse); 163 // The Sparse array doesn't actually need to be initialized, so malloc 164 // would be enough here, but that will cause tools like valgrind to 165 // complain about branching on uninitialized data. 166 Sparse = reinterpret_cast<SparseT*>(calloc(U, sizeof(SparseT))); 167 Universe = U; 168 } 169 170 // Import trivial vector stuff from DenseT. 171 typedef typename DenseT::iterator iterator; 172 typedef typename DenseT::const_iterator const_iterator; 173 174 const_iterator begin() const { return Dense.begin(); } 175 const_iterator end() const { return Dense.end(); } 176 iterator begin() { return Dense.begin(); } 177 iterator end() { return Dense.end(); } 178 179 /// empty - Returns true if the set is empty. 180 /// 181 /// This is not the same as BitVector::empty(). 182 /// 183 bool empty() const { return Dense.empty(); } 184 185 /// size - Returns the number of elements in the set. 186 /// 187 /// This is not the same as BitVector::size() which returns the size of the 188 /// universe. 189 /// 190 size_type size() const { return Dense.size(); } 191 192 /// clear - Clears the set. This is a very fast constant time operation. 193 /// 194 void clear() { 195 // Sparse does not need to be cleared, see find(). 196 Dense.clear(); 197 } 198 199 /// findIndex - Find an element by its index. 200 /// 201 /// @param Idx A valid index to find. 202 /// @returns An iterator to the element identified by key, or end(). 203 /// 204 iterator findIndex(unsigned Idx) { 205 assert(Idx < Universe && "Key out of range"); 206 const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u; 207 for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) { 208 const unsigned FoundIdx = ValIndexOf(Dense[i]); 209 assert(FoundIdx < Universe && "Invalid key in set. Did object mutate?"); 210 if (Idx == FoundIdx) 211 return begin() + i; 212 // Stride is 0 when SparseT >= unsigned. We don't need to loop. 213 if (!Stride) 214 break; 215 } 216 return end(); 217 } 218 219 /// find - Find an element by its key. 220 /// 221 /// @param Key A valid key to find. 222 /// @returns An iterator to the element identified by key, or end(). 223 /// 224 iterator find(const KeyT &Key) { 225 return findIndex(KeyIndexOf(Key)); 226 } 227 228 const_iterator find(const KeyT &Key) const { 229 return const_cast<SparseSet*>(this)->findIndex(KeyIndexOf(Key)); 230 } 231 232 /// count - Returns 1 if this set contains an element identified by Key, 233 /// 0 otherwise. 234 /// 235 size_type count(const KeyT &Key) const { 236 return find(Key) == end() ? 0 : 1; 237 } 238 239 /// insert - Attempts to insert a new element. 240 /// 241 /// If Val is successfully inserted, return (I, true), where I is an iterator 242 /// pointing to the newly inserted element. 243 /// 244 /// If the set already contains an element with the same key as Val, return 245 /// (I, false), where I is an iterator pointing to the existing element. 246 /// 247 /// Insertion invalidates all iterators. 248 /// 249 std::pair<iterator, bool> insert(const ValueT &Val) { 250 unsigned Idx = ValIndexOf(Val); 251 iterator I = findIndex(Idx); 252 if (I != end()) 253 return std::make_pair(I, false); 254 Sparse[Idx] = size(); 255 Dense.push_back(Val); 256 return std::make_pair(end() - 1, true); 257 } 258 259 /// array subscript - If an element already exists with this key, return it. 260 /// Otherwise, automatically construct a new value from Key, insert it, 261 /// and return the newly inserted element. 262 ValueT &operator[](const KeyT &Key) { 263 return *insert(ValueT(Key)).first; 264 } 265 266 /// erase - Erases an existing element identified by a valid iterator. 267 /// 268 /// This invalidates all iterators, but erase() returns an iterator pointing 269 /// to the next element. This makes it possible to erase selected elements 270 /// while iterating over the set: 271 /// 272 /// for (SparseSet::iterator I = Set.begin(); I != Set.end();) 273 /// if (test(*I)) 274 /// I = Set.erase(I); 275 /// else 276 /// ++I; 277 /// 278 /// Note that end() changes when elements are erased, unlike std::list. 279 /// 280 iterator erase(iterator I) { 281 assert(unsigned(I - begin()) < size() && "Invalid iterator"); 282 if (I != end() - 1) { 283 *I = Dense.back(); 284 unsigned BackIdx = ValIndexOf(Dense.back()); 285 assert(BackIdx < Universe && "Invalid key in set. Did object mutate?"); 286 Sparse[BackIdx] = I - begin(); 287 } 288 // This depends on SmallVector::pop_back() not invalidating iterators. 289 // std::vector::pop_back() doesn't give that guarantee. 290 Dense.pop_back(); 291 return I; 292 } 293 294 /// erase - Erases an element identified by Key, if it exists. 295 /// 296 /// @param Key The key identifying the element to erase. 297 /// @returns True when an element was erased, false if no element was found. 298 /// 299 bool erase(const KeyT &Key) { 300 iterator I = find(Key); 301 if (I == end()) 302 return false; 303 erase(I); 304 return true; 305 } 306 307 }; 308 309 } // end namespace llvm 310 311 #endif 312