• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #ifndef PANDA_VERIFICATION_UTIL_INT_SET_H
16 #define PANDA_VERIFICATION_UTIL_INT_SET_H
17 
18 #include "bit_vector.h"
19 
20 namespace panda::verifier {
21 
22 /**
23  * @brief A set implementation for integral types which automatically switches between representations
24  * (e.g. std::unordered_set or a sorted vector for small sets, bitvector for large sets).
25  *
26  * @tparam T Element type
27  * @tparam THRESHOLD threshold for switching between representations
28  */
29 template <typename T, size_t THRESHOLD = 256>
30 class IntSet {
31 public:
IntSet()32     IntSet() : repr_ {MMakePandaUnique<SmallRepr>()} {};
33 
IntSet(const IntSet & other)34     IntSet(const IntSet &other) : repr_ {other.repr_->Clone()} {};
35     IntSet &operator=(const IntSet &other)
36     {
37         repr_ = other.repr_->Clone();
38         return *this;
39     }
40 
41     IntSet(IntSet &&) noexcept = default;
42     IntSet &operator=(IntSet &&) noexcept = default;
43 
44     ~IntSet() = default;
45 
Contains(T x)46     bool Contains(T x) const
47     {
48         return repr_->Contains(x);
49     }
50 
Size()51     size_t Size() const
52     {
53         return repr_->Size();
54     }
55 
Insert(T x)56     void Insert(T x)
57     {
58         repr_->Insert(x);
59         if (UNLIKELY(repr_->Type() == ReprType::SMALL && Size() >= THRESHOLD)) {
60             MoveToLargeRepr();
61         }
62     }
63 
64     template <bool known_to_be_sorted = false, typename Iter>
Insert(Iter begin,Iter end)65     void Insert(Iter begin, Iter end)
66     {
67         switch (repr_->Type()) {
68             case ReprType::SMALL:
69                 AsSmallRepr().template InsertManyImpl<known_to_be_sorted>(begin, end);
70                 if (UNLIKELY(Size() >= THRESHOLD)) {
71                     MoveToLargeRepr();
72                     if (begin != end) {
73                         // if we get here, repr is large now and there are remaining elements
74                         AsLargeRepr().template InsertManyImpl<known_to_be_sorted>(std::move(begin), std::move(end));
75                     }
76                 }
77                 return;
78             case ReprType::LARGE:
79                 AsLargeRepr().template InsertManyImpl<known_to_be_sorted>(std::move(begin), std::move(end));
80                 return;
81             default:
82                 UNREACHABLE();
83         }
84     }
85 
TheOnlyElement()86     Index<T> TheOnlyElement() const
87     {
88         return SwitchOnRepr(
89             [](const SmallRepr &repr) -> Index<T> {
90                 if (repr.Size() == 1) {
91                     return repr.repr_[0];
92                 }
93                 return {};
94             },
95             []([[maybe_unused]] const LargeRepr &repr) { return Index<T> {}; });
96     }
97 
98     template <size_t THRESHOLD2>
99     IntSet<T, THRESHOLD> operator&(const IntSet<T, THRESHOLD2> &other) const
100     {
101         return other.SwitchOnRepr([this](const auto &other_repr) { return repr_->Intersect(other_repr); });
102     }
103 
104     template <size_t THRESHOLD2>
105     IntSet<T, THRESHOLD> &operator&=(const IntSet<T, THRESHOLD2> &other)
106     {
107         // TODO if this case is checked separately, consider switching on all 4 options
108         if (repr_->Type() == ReprType::LARGE && other.repr_->Type() == ReprType::SMALL) {
109             *this = other & *this;
110         } else {
111             bool change_repr =
112                 other.SwitchOnRepr([this](const auto &other_repr) { return repr_->IntersectInPlace(other_repr); });
113             if (change_repr) {
114                 MoveToSmallRepr();
115             }
116         }
117         return *this;
118     }
119 
120     template <size_t THRESHOLD2>
121     IntSet<T, THRESHOLD> operator|(const IntSet<T, THRESHOLD2> &other) const
122     {
123         return other.SwitchOnRepr([this](const auto &other_repr) { return repr_->Union(other_repr); });
124     }
125 
126     template <size_t THRESHOLD2>
127     IntSet<T, THRESHOLD> &operator|=(const IntSet<T, THRESHOLD2> &other)
128     {
129         if (other.repr_->Type() == ReprType::SMALL) {
130             const auto &other_repr = other.AsSmallRepr().repr_;
131             Insert<true>(other_repr.cbegin(), other_repr.cend());
132         } else {
133             if (repr_->Type() == ReprType::SMALL) {
134                 *this = other | *this;
135             } else {
136                 AsLargeRepr().UnionInPlace(other.AsLargeRepr());
137             }
138         }
139         return *this;
140     }
141 
142     /// Returns a lambda repeated calls to which return ordered values of the intersection
143     template <size_t THRESHOLD2>
LazyIntersect(const IntSet<T,THRESHOLD2> & other)144     auto LazyIntersect(const IntSet<T, THRESHOLD2> &other) const
145     {
146         auto &&stream1 = AsStream();
147         auto &&stream2 = other.AsStream();
148         return [val1 = stream1(), val2 = stream2(), stream1 = std::move(stream1),
149                 stream2 = std::move(stream2)]() mutable -> Index<T> {
150             while (val1.IsValid() && val2.IsValid()) {
151                 if (val1 < val2) {
152                     val1 = stream1();
153                 } else if (val1 > val2) {
154                     val2 = stream2();
155                 } else {
156                     auto tmp = val1;
157                     val1 = stream1();
158                     val2 = stream2();
159                     return tmp;
160                 }
161             }
162             return {};
163         };
164     }
165 
166     template <typename Handler>
ForAll(Handler && handler)167     bool ForAll(Handler &&handler) const
168     {
169         return SwitchOnRepr(
170             [handler = std::forward<Handler>(handler)](const auto &repr) { return repr.ForAll(handler); });
171     }
172 
AsStream()173     std::function<Index<T>()> AsStream() const
174     {
175         return SwitchOnRepr([](const auto &repr) { return std::function<Index<T>()>(repr.AsStream()); });
176     }
177 
178     template <size_t THRESHOLD2>
179     bool operator==(const IntSet<T, THRESHOLD2> &rhs) const
180     {
181         ReprType lhs_type = repr_->Type();
182         ReprType rhs_type = rhs.repr_->Type();
183         if (lhs_type == ReprType::SMALL && rhs_type == ReprType::SMALL) {
184             return AsSmallRepr().repr_ == rhs.AsSmallRepr().repr_;
185         } else if (lhs_type == ReprType::LARGE && rhs_type == ReprType::LARGE) {
186             return AsLargeRepr().repr_ == rhs.AsLargeRepr().repr_;
187         } else {
188             if (Size() != rhs.Size()) {
189                 return false;
190             }
191             auto lhs_stream {AsStream()};
192             auto rhs_stream {rhs.AsStream()};
193             auto lhs_val {lhs_stream()};
194             auto rhs_val {rhs_stream()};
195             while (lhs_val.IsValid() && rhs_val.IsValid()) {
196                 if (lhs_val != rhs_val) {
197                     return false;
198                 }
199                 lhs_val = lhs_stream();
200                 rhs_val = rhs_stream();
201             }
202             return lhs_val == rhs_val;
203         }
204     }
205 
206     template <size_t THRESHOLD2>
207     bool operator!=(const IntSet<T, THRESHOLD2> &rhs) const
208     {
209         return !(*this == rhs);
210     }
211 
212 private:
213     enum class ReprType { SMALL, LARGE };
214 
215     class SmallRepr;
216     class LargeRepr;
217 
218     class Repr {
219     public:
220         virtual ReprType Type() const = 0;
221         virtual bool Contains(T x) const = 0;
222         virtual void Insert(T x) = 0;
223         virtual size_t Size() const = 0;
224         virtual IntSet<T, THRESHOLD> Intersect(const SmallRepr &other) const = 0;
225         virtual IntSet<T, THRESHOLD> Intersect(const LargeRepr &other) const = 0;
226         // returns true if repr should be changed (from Large to Small)
227         virtual bool IntersectInPlace(const SmallRepr &other) = 0;
228         virtual bool IntersectInPlace(const LargeRepr &other) = 0;
229         virtual IntSet<T, THRESHOLD> Union(const SmallRepr &other) const = 0;
230         virtual IntSet<T, THRESHOLD> Union(const LargeRepr &other) const = 0;
231         virtual MPandaUniquePtr<Repr> Clone() const = 0;
232         virtual ~Repr() = default;
233     };
234 
235     class SmallRepr final : public Repr {
236     public:
237         SmallRepr() = default;
SmallRepr(MPandaVector<T> set)238         SmallRepr(MPandaVector<T> set) : repr_ {set} {};
239 
Type()240         ReprType Type() const override
241         {
242             return ReprType::SMALL;
243         }
244 
Contains(T x)245         bool Contains(T x) const override
246         {
247             return std::binary_search(repr_.begin(), repr_.end(), x);
248         }
249 
Insert(T x)250         void Insert(T x) override
251         {
252             Insert(x, 0);
253         }
254 
Size()255         size_t Size() const override
256         {
257             return repr_.size();
258         }
259 
Intersect(const SmallRepr & other)260         IntSet<T, THRESHOLD> Intersect(const SmallRepr &other) const override
261         {
262             if (other.Size() < Size()) {
263                 return other.Intersect(*this);
264             } else {
265                 MPandaVector<T> result;
266                 std::set_intersection(repr_.begin(), repr_.end(), other.repr_.begin(), other.repr_.end(),
267                                       std::back_inserter(result));
268                 return {result};
269             }
270         }
271 
Intersect(const LargeRepr & other)272         IntSet<T, THRESHOLD> Intersect(const LargeRepr &other) const override
273         {
274             MPandaVector<T> result;
275             for (T value : repr_) {
276                 if (other.Contains(value)) {
277                     result.push_back(value);
278                 }
279             }
280             return {result};
281         }
282 
IntersectInPlace(const SmallRepr & other)283         bool IntersectInPlace(const SmallRepr &other) override
284         {
285             repr_.erase(
286                 std::remove_if(repr_.begin(), repr_.end(),
287                                [&, other_iter = other.repr_.begin(), other_end = other.repr_.end()](T x) mutable {
288                                    other_iter = std::lower_bound(other_iter, other_end, x);
289                                    return other_iter == other_end || *other_iter != x;
290                                }),
291                 repr_.end());
292             return false;
293         }
294 
IntersectInPlace(const LargeRepr & other)295         bool IntersectInPlace(const LargeRepr &other) override
296         {
297             repr_.erase(std::remove_if(repr_.begin(), repr_.end(), [&](T x) { return !other.Contains(x); }),
298                         repr_.end());
299             return false;
300         }
301 
Union(const SmallRepr & other)302         IntSet<T, THRESHOLD> Union(const SmallRepr &other) const override
303         {
304             MPandaVector<T> result;
305             std::set_union(repr_.begin(), repr_.end(), other.repr_.begin(), other.repr_.end(),
306                            std::back_inserter(result));
307             if (result.size() < THRESHOLD) {
308                 return result;
309             } else {
310                 return VectorToBitVector(result);
311             }
312         }
313 
Union(const LargeRepr & other)314         IntSet<T, THRESHOLD> Union(const LargeRepr &other) const override
315         {
316             return other.Union(*this);
317         }
318 
Clone()319         MPandaUniquePtr<Repr> Clone() const override
320         {
321             return MMakePandaUnique<SmallRepr>(repr_);
322         }
323 
MaxElem()324         T MaxElem() const
325         {
326             return *repr_.rbegin();
327         }
328 
329         template <bool known_to_be_sorted, typename Iter>
InsertManyImpl(Iter & begin,const Iter & end)330         void InsertManyImpl(Iter &begin, const Iter &end)
331         {
332             size_t sz = Size();
333             size_t lower_bound = 0;
334             while (sz < THRESHOLD) {
335                 for (size_t i = sz; i < THRESHOLD; i++) {
336                     if (begin == end) {
337                         return;
338                     }
339                     if (known_to_be_sorted) {
340                         lower_bound = Insert(*begin, lower_bound);
341                     } else {
342                         Insert(*begin, 0);
343                     }
344                     ++begin;
345                 }
346                 sz = Size();
347             }
348         }
349 
350         template <typename Handler>
ForAll(Handler && handler)351         bool ForAll(Handler &&handler) const
352         {
353             for (T value : repr_) {
354                 if (!handler(value)) {
355                     return false;
356                 }
357             }
358             return true;
359         }
360 
AsStream()361         auto AsStream() const
362         {
363             return [i = size_t(0), this]() mutable -> Index<T> {
364                 if (i < repr_.size()) {
365                     return repr_[i++];
366                 } else {
367                     return {};
368                 }
369             };
370         }
371 
372     private:
Insert(T x,size_t lower_bound)373         size_t Insert(T x, size_t lower_bound)
374         {
375             auto iter = std::lower_bound(repr_.begin() + lower_bound, repr_.end(), x);
376             auto new_lower_bound = static_cast<size_t>(iter - repr_.begin());
377             if (iter == repr_.end()) {
378                 repr_.push_back(x);
379             } else if (*iter != x) {
380                 repr_.insert(iter, x);
381             }
382             return new_lower_bound;
383         }
384 
385         MPandaVector<T> repr_;
386         friend class IntSet;
387     };
388 
389     class LargeRepr final : public Repr {
390     public:
LargeRepr(BitVector set)391         LargeRepr(BitVector set) : repr_ {set} {};
392 
Type()393         ReprType Type() const override
394         {
395             return ReprType::LARGE;
396         }
397 
Contains(T x)398         bool Contains(T x) const override
399         {
400             return x < repr_.size() && repr_[x];
401         }
402 
Insert(T x)403         void Insert(T x) override
404         {
405             if (x >= repr_.size()) {
406                 // clang-tidy under GCC bug, static_cast<size_t>(x) * 3U / 2U is enough
407                 repr_.resize(std::max(static_cast<size_t>(x) * 3U / 2U, THRESHOLD));
408             }
409             repr_.Set(x);
410         }
411 
Size()412         size_t Size() const override
413         {
414             return repr_.SetBitsCount();
415         }
416 
Intersect(const SmallRepr & other)417         IntSet<T, THRESHOLD> Intersect(const SmallRepr &other) const override
418         {
419             return other.Intersect(*this);
420         }
421 
Intersect(const LargeRepr & other)422         IntSet<T, THRESHOLD> Intersect(const LargeRepr &other) const override
423         {
424             BitVector res = repr_ & other.repr_;
425             if (res.SetBitsCount() >= THRESHOLD) {
426                 return res;
427             } else {
428                 return BitVectorToVector(res);
429             }
430         }
431 
IntersectInPlace(const SmallRepr & other)432         bool IntersectInPlace(const SmallRepr &other) override
433         {
434             if (other.Size() == 0) {
435                 repr_ = BitVector(0);
436             } else {
437                 size_t other_bv_size = other.MaxElem() + 1;
438                 BitVector bv(other_bv_size);
439                 for (T x : other.repr_) {
440                     bv.Set(x);
441                 }
442                 ResizeDownOnly(other_bv_size);
443                 repr_ &= bv;
444             }
445             return true;
446         }
447 
IntersectInPlace(const LargeRepr & other)448         bool IntersectInPlace(const LargeRepr &other) override
449         {
450             ResizeDownOnly(other.repr_.size());
451             repr_ &= other.repr_;
452             return Size() < THRESHOLD;
453         }
454 
Union(const SmallRepr & other)455         IntSet<T, THRESHOLD> Union(const SmallRepr &other) const override
456         {
457             IntSet<T, THRESHOLD> result {Clone()};
458             result.Insert<true>(other.repr_.cbegin(), other.repr_.cend());
459             return result;
460         }
461 
Union(const LargeRepr & other)462         IntSet<T, THRESHOLD> Union(const LargeRepr &other) const override
463         {
464             return {repr_ | other.repr_};
465         }
466 
UnionInPlace(const LargeRepr & other)467         void UnionInPlace(const LargeRepr &other)
468         {
469             ResizeUpOnly(other.repr_.size());
470             repr_ |= other.repr_;
471         }
472 
Clone()473         MPandaUniquePtr<Repr> Clone() const override
474         {
475             return MMakePandaUnique<LargeRepr>(repr_);
476         }
477 
478         template <bool known_to_be_sorted, typename Iter>
InsertManyImpl(Iter begin,Iter end)479         void InsertManyImpl(Iter begin, Iter end)
480         {
481             while (begin != end) {
482                 Insert(*begin);
483                 ++begin;
484             }
485         }
486 
487         template <typename Handler>
ForAll(Handler && handler)488         bool ForAll(Handler &&handler) const
489         {
490             return repr_.for_all_idx_of<1>(std::forward<Handler>(handler));
491         }
492 
AsStream()493         auto AsStream() const
494         {
495             return repr_.LazyIndicesOf<1>();
496         }
497 
498     private:
499         BitVector repr_;
500 
ResizeDownOnly(size_t sz)501         void ResizeDownOnly(size_t sz)
502         {
503             if (sz < repr_.size()) {
504                 repr_.resize(sz);
505             }
506         }
507 
ResizeUpOnly(size_t sz)508         void ResizeUpOnly(size_t sz)
509         {
510             if (sz > repr_.size()) {
511                 repr_.resize(sz);
512             }
513         }
514 
515         friend class IntSet;
516     };
517 
518     friend class SmallRepr;
519     friend class LargeRepr;
520 
521     MPandaUniquePtr<Repr> repr_;
522 
IntSet(MPandaVector<T> set)523     IntSet(MPandaVector<T> set) : repr_ {MMakePandaUnique<SmallRepr>(set)} {};
IntSet(BitVector set)524     IntSet(BitVector set) : repr_ {MMakePandaUnique<LargeRepr>(set)} {};
IntSet(MPandaUniquePtr<Repr> && repr)525     IntSet(MPandaUniquePtr<Repr> &&repr) : repr_ {std::move(repr)} {};
526 
527     // unsafe!
AsSmallRepr()528     const SmallRepr &AsSmallRepr() const
529     {
530         return *static_cast<const SmallRepr *>(repr_.get());
531     }
532 
AsLargeRepr()533     const LargeRepr &AsLargeRepr() const
534     {
535         return *static_cast<const LargeRepr *>(repr_.get());
536     }
537 
AsSmallRepr()538     SmallRepr &AsSmallRepr()
539     {
540         return *static_cast<SmallRepr *>(repr_.get());
541     }
542 
AsLargeRepr()543     LargeRepr &AsLargeRepr()
544     {
545         return *static_cast<LargeRepr *>(repr_.get());
546     }
547 
548     template <typename SmallCase, typename LargeCase>
SwitchOnRepr(SmallCase && smallCase,LargeCase && largeCase)549     auto SwitchOnRepr(SmallCase &&smallCase, LargeCase &&largeCase) const
550     {
551         switch (repr_->Type()) {
552             case ReprType::SMALL:
553                 return smallCase(AsSmallRepr());
554             case ReprType::LARGE:
555                 return largeCase(AsLargeRepr());
556             default:
557                 UNREACHABLE();
558         }
559     }
560 
561     template <typename CommonCase>
SwitchOnRepr(CommonCase && commonCase)562     auto SwitchOnRepr(CommonCase &&commonCase) const
563     {
564         return SwitchOnRepr(commonCase, commonCase);
565     }
566 
MoveToLargeRepr()567     void MoveToLargeRepr()
568     {
569         repr_ = MMakePandaUnique<LargeRepr>(VectorToBitVector(AsSmallRepr().repr_));
570     }
571 
MoveToSmallRepr()572     void MoveToSmallRepr()
573     {
574         repr_ = MMakePandaUnique<SmallRepr>(BitVectorToVector(AsLargeRepr().repr_));
575     }
576 
BitVectorToVector(const BitVector & bv)577     static MPandaVector<T> BitVectorToVector(const BitVector &bv)
578     {
579         MPandaVector<T> res;
580         bv.for_all_idx_of<1>([&](size_t idx) {
581             res.push_back(idx);
582             return true;
583         });
584         return res;
585     }
586 
VectorToBitVector(const MPandaVector<T> & vec)587     static BitVector VectorToBitVector(const MPandaVector<T> &vec)
588     {
589         BitVector bv(*vec.rbegin() * 3 / 2);
590         for (T y : vec) {
591             bv.Set(y);
592         }
593         return bv;
594     }
595 };
596 
597 template <typename T, size_t THRESHOLD>
598 std::ostream &operator<<(std::ostream &os, const IntSet<T, THRESHOLD> &set)
599 {
600     os << "IntSet{";
601     bool first = true;
602     set.ForAll([&](T value) {
603         if (first) {
604             first = false;
605         } else {
606             os << " ";
607         }
608         os << value;
609         return true;
610     });
611     os << "}";
612     return os;
613 }
614 
615 }  // namespace panda::verifier
616 
617 #endif  // PANDA_VERIFICATION_UTIL_INT_SET_H
618