• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_SPARSE_SET_H_
6 #define RE2_SPARSE_SET_H_
7 
8 // DESCRIPTION
9 //
10 // SparseSet(m) is a set of integers in [0, m).
11 // It requires sizeof(int)*m memory, but it provides
12 // fast iteration through the elements in the set and fast clearing
13 // of the set.
14 //
15 // Insertion and deletion are constant time operations.
16 //
17 // Allocating the set is a constant time operation
18 // when memory allocation is a constant time operation.
19 //
20 // Clearing the set is a constant time operation (unusual!).
21 //
22 // Iterating through the set is an O(n) operation, where n
23 // is the number of items in the set (not O(m)).
24 //
25 // The set iterator visits entries in the order they were first
26 // inserted into the set.  It is safe to add items to the set while
27 // using an iterator: the iterator will visit indices added to the set
28 // during the iteration, but will not re-visit indices whose values
29 // change after visiting.  Thus SparseSet can be a convenient
30 // implementation of a work queue.
31 //
32 // The SparseSet implementation is NOT thread-safe.  It is up to the
33 // caller to make sure only one thread is accessing the set.  (Typically
34 // these sets are temporary values and used in situations where speed is
35 // important.)
36 //
37 // The SparseSet interface does not present all the usual STL bells and
38 // whistles.
39 //
40 // Implemented with reference to Briggs & Torczon, An Efficient
41 // Representation for Sparse Sets, ACM Letters on Programming Languages
42 // and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.
43 //
44 // This is a specialization of sparse array; see sparse_array.h.
45 
46 // IMPLEMENTATION
47 //
48 // See sparse_array.h for implementation details.
49 
50 #include <assert.h>
51 #include <stdint.h>
52 
53 #include <algorithm>
54 #include <memory>
55 #include <utility>
56 
57 #include "re2/pod_array.h"
58 
59 // Doing this simplifies the logic below.
60 #ifndef __has_feature
61 #define __has_feature(x) 0
62 #endif
63 
64 #if __has_feature(memory_sanitizer)
65 #include <sanitizer/msan_interface.h>
66 #endif
67 
68 namespace re2 {
69 
70 template<typename Value>
71 class SparseSetT {
72  public:
73   SparseSetT();
74   explicit SparseSetT(int max_size);
75   ~SparseSetT();
76 
77   typedef int* iterator;
78   typedef const int* const_iterator;
79 
80   // Return the number of entries in the set.
size()81   int size() const {
82     return size_;
83   }
84 
85   // Indicate whether the set is empty.
empty()86   int empty() const {
87     return size_ == 0;
88   }
89 
90   // Iterate over the set.
begin()91   iterator begin() {
92     return dense_.data();
93   }
end()94   iterator end() {
95     return dense_.data() + size_;
96   }
97 
begin()98   const_iterator begin() const {
99     return dense_.data();
100   }
end()101   const_iterator end() const {
102     return dense_.data() + size_;
103   }
104 
105   // Change the maximum size of the set.
106   // Invalidates all iterators.
107   void resize(int new_max_size);
108 
109   // Return the maximum size of the set.
110   // Indices can be in the range [0, max_size).
max_size()111   int max_size() const {
112     if (dense_.data() != NULL)
113       return dense_.size();
114     else
115       return 0;
116   }
117 
118   // Clear the set.
clear()119   void clear() {
120     size_ = 0;
121   }
122 
123   // Check whether index i is in the set.
124   bool contains(int i) const;
125 
126   // Comparison function for sorting.
127   // Can sort the sparse set so that future iterations
128   // will visit indices in increasing order using
129   // std::sort(arr.begin(), arr.end(), arr.less);
130   static bool less(int a, int b);
131 
132  public:
133   // Insert index i into the set.
insert(int i)134   iterator insert(int i) {
135     return InsertInternal(true, i);
136   }
137 
138   // Insert index i into the set.
139   // Fast but unsafe: only use if contains(i) is false.
insert_new(int i)140   iterator insert_new(int i) {
141     return InsertInternal(false, i);
142   }
143 
144  private:
InsertInternal(bool allow_existing,int i)145   iterator InsertInternal(bool allow_existing, int i) {
146     DebugCheckInvariants();
147     if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
148       assert(false && "illegal index");
149       // Semantically, end() would be better here, but we already know
150       // the user did something stupid, so begin() insulates them from
151       // dereferencing an invalid pointer.
152       return begin();
153     }
154     if (!allow_existing) {
155       assert(!contains(i));
156       create_index(i);
157     } else {
158       if (!contains(i))
159         create_index(i);
160     }
161     DebugCheckInvariants();
162     return dense_.data() + sparse_[i];
163   }
164 
165   // Add the index i to the set.
166   // Only use if contains(i) is known to be false.
167   // This function is private, only intended as a helper
168   // for other methods.
169   void create_index(int i);
170 
171   // In debug mode, verify that some invariant properties of the class
172   // are being maintained. This is called at the end of the constructor
173   // and at the beginning and end of all public non-const member functions.
174   void DebugCheckInvariants() const;
175 
176   // Initializes memory for elements [min, max).
MaybeInitializeMemory(int min,int max)177   void MaybeInitializeMemory(int min, int max) {
178 #if __has_feature(memory_sanitizer)
179     __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
180 #elif defined(RE2_ON_VALGRIND)
181     for (int i = min; i < max; i++) {
182       sparse_[i] = 0xababababU;
183     }
184 #endif
185   }
186 
187   int size_ = 0;
188   PODArray<int> sparse_;
189   PODArray<int> dense_;
190 };
191 
192 template<typename Value>
193 SparseSetT<Value>::SparseSetT() = default;
194 
195 // Change the maximum size of the set.
196 // Invalidates all iterators.
197 template<typename Value>
resize(int new_max_size)198 void SparseSetT<Value>::resize(int new_max_size) {
199   DebugCheckInvariants();
200   if (new_max_size > max_size()) {
201     const int old_max_size = max_size();
202 
203     // Construct these first for exception safety.
204     PODArray<int> a(new_max_size);
205     PODArray<int> b(new_max_size);
206 
207     std::copy_n(sparse_.data(), old_max_size, a.data());
208     std::copy_n(dense_.data(), old_max_size, b.data());
209 
210     sparse_ = std::move(a);
211     dense_ = std::move(b);
212 
213     MaybeInitializeMemory(old_max_size, new_max_size);
214   }
215   if (size_ > new_max_size)
216     size_ = new_max_size;
217   DebugCheckInvariants();
218 }
219 
220 // Check whether index i is in the set.
221 template<typename Value>
contains(int i)222 bool SparseSetT<Value>::contains(int i) const {
223   assert(i >= 0);
224   assert(i < max_size());
225   if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
226     return false;
227   }
228   // Unsigned comparison avoids checking sparse_[i] < 0.
229   return (uint32_t)sparse_[i] < (uint32_t)size_ &&
230          dense_[sparse_[i]] == i;
231 }
232 
233 template<typename Value>
create_index(int i)234 void SparseSetT<Value>::create_index(int i) {
235   assert(!contains(i));
236   assert(size_ < max_size());
237   sparse_[i] = size_;
238   dense_[size_] = i;
239   size_++;
240 }
241 
SparseSetT(int max_size)242 template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
243     sparse_(max_size), dense_(max_size) {
244   MaybeInitializeMemory(size_, max_size);
245   DebugCheckInvariants();
246 }
247 
~SparseSetT()248 template<typename Value> SparseSetT<Value>::~SparseSetT() {
249   DebugCheckInvariants();
250 }
251 
DebugCheckInvariants()252 template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
253   assert(0 <= size_);
254   assert(size_ <= max_size());
255 }
256 
257 // Comparison function for sorting.
less(int a,int b)258 template<typename Value> bool SparseSetT<Value>::less(int a, int b) {
259   return a < b;
260 }
261 
262 typedef SparseSetT<void> SparseSet;
263 
264 }  // namespace re2
265 
266 #endif  // RE2_SPARSE_SET_H_
267