1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 #ifndef PANDA_VERIFICATION_UTIL_INT_SET_H 16 #define PANDA_VERIFICATION_UTIL_INT_SET_H 17 18 #include "bit_vector.h" 19 20 namespace panda::verifier { 21 22 /** 23 * @brief A set implementation for integral types which automatically switches between representations 24 * (e.g. std::unordered_set or a sorted vector for small sets, bitvector for large sets). 25 * 26 * @tparam T Element type 27 * @tparam THRESHOLD threshold for switching between representations 28 */ 29 template <typename T, size_t THRESHOLD = 256> 30 class IntSet { 31 public: IntSet()32 IntSet() : repr_ {MMakePandaUnique<SmallRepr>()} {}; 33 IntSet(const IntSet & other)34 IntSet(const IntSet &other) : repr_ {other.repr_->Clone()} {}; 35 IntSet &operator=(const IntSet &other) 36 { 37 repr_ = other.repr_->Clone(); 38 return *this; 39 } 40 41 IntSet(IntSet &&) noexcept = default; 42 IntSet &operator=(IntSet &&) noexcept = default; 43 44 ~IntSet() = default; 45 Contains(T x)46 bool Contains(T x) const 47 { 48 return repr_->Contains(x); 49 } 50 Size()51 size_t Size() const 52 { 53 return repr_->Size(); 54 } 55 Insert(T x)56 void Insert(T x) 57 { 58 repr_->Insert(x); 59 if (UNLIKELY(repr_->Type() == ReprType::SMALL && Size() >= THRESHOLD)) { 60 MoveToLargeRepr(); 61 } 62 } 63 64 template <bool known_to_be_sorted = false, typename Iter> Insert(Iter begin,Iter end)65 void Insert(Iter begin, Iter end) 66 { 67 switch (repr_->Type()) { 68 case ReprType::SMALL: 69 AsSmallRepr().template InsertManyImpl<known_to_be_sorted>(begin, end); 70 if (UNLIKELY(Size() >= THRESHOLD)) { 71 MoveToLargeRepr(); 72 if (begin != end) { 73 // if we get here, repr is large now and there are remaining elements 74 AsLargeRepr().template InsertManyImpl<known_to_be_sorted>(std::move(begin), std::move(end)); 75 } 76 } 77 return; 78 case ReprType::LARGE: 79 AsLargeRepr().template InsertManyImpl<known_to_be_sorted>(std::move(begin), std::move(end)); 80 return; 81 default: 82 UNREACHABLE(); 83 } 84 } 85 TheOnlyElement()86 Index<T> TheOnlyElement() const 87 { 88 return SwitchOnRepr( 89 [](const SmallRepr &repr) -> Index<T> { 90 if (repr.Size() == 1) { 91 return repr.repr_[0]; 92 } 93 return {}; 94 }, 95 []([[maybe_unused]] const LargeRepr &repr) { return Index<T> {}; }); 96 } 97 98 template <size_t THRESHOLD2> 99 IntSet<T, THRESHOLD> operator&(const IntSet<T, THRESHOLD2> &other) const 100 { 101 return other.SwitchOnRepr([this](const auto &other_repr) { return repr_->Intersect(other_repr); }); 102 } 103 104 template <size_t THRESHOLD2> 105 IntSet<T, THRESHOLD> &operator&=(const IntSet<T, THRESHOLD2> &other) 106 { 107 // TODO if this case is checked separately, consider switching on all 4 options 108 if (repr_->Type() == ReprType::LARGE && other.repr_->Type() == ReprType::SMALL) { 109 *this = other & *this; 110 } else { 111 bool change_repr = 112 other.SwitchOnRepr([this](const auto &other_repr) { return repr_->IntersectInPlace(other_repr); }); 113 if (change_repr) { 114 MoveToSmallRepr(); 115 } 116 } 117 return *this; 118 } 119 120 template <size_t THRESHOLD2> 121 IntSet<T, THRESHOLD> operator|(const IntSet<T, THRESHOLD2> &other) const 122 { 123 return other.SwitchOnRepr([this](const auto &other_repr) { return repr_->Union(other_repr); }); 124 } 125 126 template <size_t THRESHOLD2> 127 IntSet<T, THRESHOLD> &operator|=(const IntSet<T, THRESHOLD2> &other) 128 { 129 if (other.repr_->Type() == ReprType::SMALL) { 130 const auto &other_repr = other.AsSmallRepr().repr_; 131 Insert<true>(other_repr.cbegin(), other_repr.cend()); 132 } else { 133 if (repr_->Type() == ReprType::SMALL) { 134 *this = other | *this; 135 } else { 136 AsLargeRepr().UnionInPlace(other.AsLargeRepr()); 137 } 138 } 139 return *this; 140 } 141 142 /// Returns a lambda repeated calls to which return ordered values of the intersection 143 template <size_t THRESHOLD2> LazyIntersect(const IntSet<T,THRESHOLD2> & other)144 auto LazyIntersect(const IntSet<T, THRESHOLD2> &other) const 145 { 146 auto &&stream1 = AsStream(); 147 auto &&stream2 = other.AsStream(); 148 return [val1 = stream1(), val2 = stream2(), stream1 = std::move(stream1), 149 stream2 = std::move(stream2)]() mutable -> Index<T> { 150 while (val1.IsValid() && val2.IsValid()) { 151 if (val1 < val2) { 152 val1 = stream1(); 153 } else if (val1 > val2) { 154 val2 = stream2(); 155 } else { 156 auto tmp = val1; 157 val1 = stream1(); 158 val2 = stream2(); 159 return tmp; 160 } 161 } 162 return {}; 163 }; 164 } 165 166 template <typename Handler> ForAll(Handler && handler)167 bool ForAll(Handler &&handler) const 168 { 169 return SwitchOnRepr( 170 [handler = std::forward<Handler>(handler)](const auto &repr) { return repr.ForAll(handler); }); 171 } 172 AsStream()173 std::function<Index<T>()> AsStream() const 174 { 175 return SwitchOnRepr([](const auto &repr) { return std::function<Index<T>()>(repr.AsStream()); }); 176 } 177 178 template <size_t THRESHOLD2> 179 bool operator==(const IntSet<T, THRESHOLD2> &rhs) const 180 { 181 ReprType lhs_type = repr_->Type(); 182 ReprType rhs_type = rhs.repr_->Type(); 183 if (lhs_type == ReprType::SMALL && rhs_type == ReprType::SMALL) { 184 return AsSmallRepr().repr_ == rhs.AsSmallRepr().repr_; 185 } else if (lhs_type == ReprType::LARGE && rhs_type == ReprType::LARGE) { 186 return AsLargeRepr().repr_ == rhs.AsLargeRepr().repr_; 187 } else { 188 if (Size() != rhs.Size()) { 189 return false; 190 } 191 auto lhs_stream {AsStream()}; 192 auto rhs_stream {rhs.AsStream()}; 193 auto lhs_val {lhs_stream()}; 194 auto rhs_val {rhs_stream()}; 195 while (lhs_val.IsValid() && rhs_val.IsValid()) { 196 if (lhs_val != rhs_val) { 197 return false; 198 } 199 lhs_val = lhs_stream(); 200 rhs_val = rhs_stream(); 201 } 202 return lhs_val == rhs_val; 203 } 204 } 205 206 template <size_t THRESHOLD2> 207 bool operator!=(const IntSet<T, THRESHOLD2> &rhs) const 208 { 209 return !(*this == rhs); 210 } 211 212 private: 213 enum class ReprType { SMALL, LARGE }; 214 215 class SmallRepr; 216 class LargeRepr; 217 218 class Repr { 219 public: 220 virtual ReprType Type() const = 0; 221 virtual bool Contains(T x) const = 0; 222 virtual void Insert(T x) = 0; 223 virtual size_t Size() const = 0; 224 virtual IntSet<T, THRESHOLD> Intersect(const SmallRepr &other) const = 0; 225 virtual IntSet<T, THRESHOLD> Intersect(const LargeRepr &other) const = 0; 226 // returns true if repr should be changed (from Large to Small) 227 virtual bool IntersectInPlace(const SmallRepr &other) = 0; 228 virtual bool IntersectInPlace(const LargeRepr &other) = 0; 229 virtual IntSet<T, THRESHOLD> Union(const SmallRepr &other) const = 0; 230 virtual IntSet<T, THRESHOLD> Union(const LargeRepr &other) const = 0; 231 virtual MPandaUniquePtr<Repr> Clone() const = 0; 232 virtual ~Repr() = default; 233 }; 234 235 class SmallRepr final : public Repr { 236 public: 237 SmallRepr() = default; SmallRepr(MPandaVector<T> set)238 SmallRepr(MPandaVector<T> set) : repr_ {set} {}; 239 Type()240 ReprType Type() const override 241 { 242 return ReprType::SMALL; 243 } 244 Contains(T x)245 bool Contains(T x) const override 246 { 247 return std::binary_search(repr_.begin(), repr_.end(), x); 248 } 249 Insert(T x)250 void Insert(T x) override 251 { 252 Insert(x, 0); 253 } 254 Size()255 size_t Size() const override 256 { 257 return repr_.size(); 258 } 259 Intersect(const SmallRepr & other)260 IntSet<T, THRESHOLD> Intersect(const SmallRepr &other) const override 261 { 262 if (other.Size() < Size()) { 263 return other.Intersect(*this); 264 } else { 265 MPandaVector<T> result; 266 std::set_intersection(repr_.begin(), repr_.end(), other.repr_.begin(), other.repr_.end(), 267 std::back_inserter(result)); 268 return {result}; 269 } 270 } 271 Intersect(const LargeRepr & other)272 IntSet<T, THRESHOLD> Intersect(const LargeRepr &other) const override 273 { 274 MPandaVector<T> result; 275 for (T value : repr_) { 276 if (other.Contains(value)) { 277 result.push_back(value); 278 } 279 } 280 return {result}; 281 } 282 IntersectInPlace(const SmallRepr & other)283 bool IntersectInPlace(const SmallRepr &other) override 284 { 285 repr_.erase( 286 std::remove_if(repr_.begin(), repr_.end(), 287 [&, other_iter = other.repr_.begin(), other_end = other.repr_.end()](T x) mutable { 288 other_iter = std::lower_bound(other_iter, other_end, x); 289 return other_iter == other_end || *other_iter != x; 290 }), 291 repr_.end()); 292 return false; 293 } 294 IntersectInPlace(const LargeRepr & other)295 bool IntersectInPlace(const LargeRepr &other) override 296 { 297 repr_.erase(std::remove_if(repr_.begin(), repr_.end(), [&](T x) { return !other.Contains(x); }), 298 repr_.end()); 299 return false; 300 } 301 Union(const SmallRepr & other)302 IntSet<T, THRESHOLD> Union(const SmallRepr &other) const override 303 { 304 MPandaVector<T> result; 305 std::set_union(repr_.begin(), repr_.end(), other.repr_.begin(), other.repr_.end(), 306 std::back_inserter(result)); 307 if (result.size() < THRESHOLD) { 308 return result; 309 } else { 310 return VectorToBitVector(result); 311 } 312 } 313 Union(const LargeRepr & other)314 IntSet<T, THRESHOLD> Union(const LargeRepr &other) const override 315 { 316 return other.Union(*this); 317 } 318 Clone()319 MPandaUniquePtr<Repr> Clone() const override 320 { 321 return MMakePandaUnique<SmallRepr>(repr_); 322 } 323 MaxElem()324 T MaxElem() const 325 { 326 return *repr_.rbegin(); 327 } 328 329 template <bool known_to_be_sorted, typename Iter> InsertManyImpl(Iter & begin,const Iter & end)330 void InsertManyImpl(Iter &begin, const Iter &end) 331 { 332 size_t sz = Size(); 333 size_t lower_bound = 0; 334 while (sz < THRESHOLD) { 335 for (size_t i = sz; i < THRESHOLD; i++) { 336 if (begin == end) { 337 return; 338 } 339 if (known_to_be_sorted) { 340 lower_bound = Insert(*begin, lower_bound); 341 } else { 342 Insert(*begin, 0); 343 } 344 ++begin; 345 } 346 sz = Size(); 347 } 348 } 349 350 template <typename Handler> ForAll(Handler && handler)351 bool ForAll(Handler &&handler) const 352 { 353 for (T value : repr_) { 354 if (!handler(value)) { 355 return false; 356 } 357 } 358 return true; 359 } 360 AsStream()361 auto AsStream() const 362 { 363 return [i = size_t(0), this]() mutable -> Index<T> { 364 if (i < repr_.size()) { 365 return repr_[i++]; 366 } else { 367 return {}; 368 } 369 }; 370 } 371 372 private: Insert(T x,size_t lower_bound)373 size_t Insert(T x, size_t lower_bound) 374 { 375 auto iter = std::lower_bound(repr_.begin() + lower_bound, repr_.end(), x); 376 auto new_lower_bound = static_cast<size_t>(iter - repr_.begin()); 377 if (iter == repr_.end()) { 378 repr_.push_back(x); 379 } else if (*iter != x) { 380 repr_.insert(iter, x); 381 } 382 return new_lower_bound; 383 } 384 385 MPandaVector<T> repr_; 386 friend class IntSet; 387 }; 388 389 class LargeRepr final : public Repr { 390 public: LargeRepr(BitVector set)391 LargeRepr(BitVector set) : repr_ {set} {}; 392 Type()393 ReprType Type() const override 394 { 395 return ReprType::LARGE; 396 } 397 Contains(T x)398 bool Contains(T x) const override 399 { 400 return x < repr_.size() && repr_[x]; 401 } 402 Insert(T x)403 void Insert(T x) override 404 { 405 if (x >= repr_.size()) { 406 // clang-tidy under GCC bug, static_cast<size_t>(x) * 3U / 2U is enough 407 repr_.resize(std::max(static_cast<size_t>(x) * 3U / 2U, THRESHOLD)); 408 } 409 repr_.Set(x); 410 } 411 Size()412 size_t Size() const override 413 { 414 return repr_.SetBitsCount(); 415 } 416 Intersect(const SmallRepr & other)417 IntSet<T, THRESHOLD> Intersect(const SmallRepr &other) const override 418 { 419 return other.Intersect(*this); 420 } 421 Intersect(const LargeRepr & other)422 IntSet<T, THRESHOLD> Intersect(const LargeRepr &other) const override 423 { 424 BitVector res = repr_ & other.repr_; 425 if (res.SetBitsCount() >= THRESHOLD) { 426 return res; 427 } else { 428 return BitVectorToVector(res); 429 } 430 } 431 IntersectInPlace(const SmallRepr & other)432 bool IntersectInPlace(const SmallRepr &other) override 433 { 434 if (other.Size() == 0) { 435 repr_ = BitVector(0); 436 } else { 437 size_t other_bv_size = other.MaxElem() + 1; 438 BitVector bv(other_bv_size); 439 for (T x : other.repr_) { 440 bv.Set(x); 441 } 442 ResizeDownOnly(other_bv_size); 443 repr_ &= bv; 444 } 445 return true; 446 } 447 IntersectInPlace(const LargeRepr & other)448 bool IntersectInPlace(const LargeRepr &other) override 449 { 450 ResizeDownOnly(other.repr_.size()); 451 repr_ &= other.repr_; 452 return Size() < THRESHOLD; 453 } 454 Union(const SmallRepr & other)455 IntSet<T, THRESHOLD> Union(const SmallRepr &other) const override 456 { 457 IntSet<T, THRESHOLD> result {Clone()}; 458 result.Insert<true>(other.repr_.cbegin(), other.repr_.cend()); 459 return result; 460 } 461 Union(const LargeRepr & other)462 IntSet<T, THRESHOLD> Union(const LargeRepr &other) const override 463 { 464 return {repr_ | other.repr_}; 465 } 466 UnionInPlace(const LargeRepr & other)467 void UnionInPlace(const LargeRepr &other) 468 { 469 ResizeUpOnly(other.repr_.size()); 470 repr_ |= other.repr_; 471 } 472 Clone()473 MPandaUniquePtr<Repr> Clone() const override 474 { 475 return MMakePandaUnique<LargeRepr>(repr_); 476 } 477 478 template <bool known_to_be_sorted, typename Iter> InsertManyImpl(Iter begin,Iter end)479 void InsertManyImpl(Iter begin, Iter end) 480 { 481 while (begin != end) { 482 Insert(*begin); 483 ++begin; 484 } 485 } 486 487 template <typename Handler> ForAll(Handler && handler)488 bool ForAll(Handler &&handler) const 489 { 490 return repr_.for_all_idx_of<1>(std::forward<Handler>(handler)); 491 } 492 AsStream()493 auto AsStream() const 494 { 495 return repr_.LazyIndicesOf<1>(); 496 } 497 498 private: 499 BitVector repr_; 500 ResizeDownOnly(size_t sz)501 void ResizeDownOnly(size_t sz) 502 { 503 if (sz < repr_.size()) { 504 repr_.resize(sz); 505 } 506 } 507 ResizeUpOnly(size_t sz)508 void ResizeUpOnly(size_t sz) 509 { 510 if (sz > repr_.size()) { 511 repr_.resize(sz); 512 } 513 } 514 515 friend class IntSet; 516 }; 517 518 friend class SmallRepr; 519 friend class LargeRepr; 520 521 MPandaUniquePtr<Repr> repr_; 522 IntSet(MPandaVector<T> set)523 IntSet(MPandaVector<T> set) : repr_ {MMakePandaUnique<SmallRepr>(set)} {}; IntSet(BitVector set)524 IntSet(BitVector set) : repr_ {MMakePandaUnique<LargeRepr>(set)} {}; IntSet(MPandaUniquePtr<Repr> && repr)525 IntSet(MPandaUniquePtr<Repr> &&repr) : repr_ {std::move(repr)} {}; 526 527 // unsafe! AsSmallRepr()528 const SmallRepr &AsSmallRepr() const 529 { 530 return *static_cast<const SmallRepr *>(repr_.get()); 531 } 532 AsLargeRepr()533 const LargeRepr &AsLargeRepr() const 534 { 535 return *static_cast<const LargeRepr *>(repr_.get()); 536 } 537 AsSmallRepr()538 SmallRepr &AsSmallRepr() 539 { 540 return *static_cast<SmallRepr *>(repr_.get()); 541 } 542 AsLargeRepr()543 LargeRepr &AsLargeRepr() 544 { 545 return *static_cast<LargeRepr *>(repr_.get()); 546 } 547 548 template <typename SmallCase, typename LargeCase> SwitchOnRepr(SmallCase && smallCase,LargeCase && largeCase)549 auto SwitchOnRepr(SmallCase &&smallCase, LargeCase &&largeCase) const 550 { 551 switch (repr_->Type()) { 552 case ReprType::SMALL: 553 return smallCase(AsSmallRepr()); 554 case ReprType::LARGE: 555 return largeCase(AsLargeRepr()); 556 default: 557 UNREACHABLE(); 558 } 559 } 560 561 template <typename CommonCase> SwitchOnRepr(CommonCase && commonCase)562 auto SwitchOnRepr(CommonCase &&commonCase) const 563 { 564 return SwitchOnRepr(commonCase, commonCase); 565 } 566 MoveToLargeRepr()567 void MoveToLargeRepr() 568 { 569 repr_ = MMakePandaUnique<LargeRepr>(VectorToBitVector(AsSmallRepr().repr_)); 570 } 571 MoveToSmallRepr()572 void MoveToSmallRepr() 573 { 574 repr_ = MMakePandaUnique<SmallRepr>(BitVectorToVector(AsLargeRepr().repr_)); 575 } 576 BitVectorToVector(const BitVector & bv)577 static MPandaVector<T> BitVectorToVector(const BitVector &bv) 578 { 579 MPandaVector<T> res; 580 bv.for_all_idx_of<1>([&](size_t idx) { 581 res.push_back(idx); 582 return true; 583 }); 584 return res; 585 } 586 VectorToBitVector(const MPandaVector<T> & vec)587 static BitVector VectorToBitVector(const MPandaVector<T> &vec) 588 { 589 BitVector bv(*vec.rbegin() * 3 / 2); 590 for (T y : vec) { 591 bv.Set(y); 592 } 593 return bv; 594 } 595 }; 596 597 template <typename T, size_t THRESHOLD> 598 std::ostream &operator<<(std::ostream &os, const IntSet<T, THRESHOLD> &set) 599 { 600 os << "IntSet{"; 601 bool first = true; 602 set.ForAll([&](T value) { 603 if (first) { 604 first = false; 605 } else { 606 os << " "; 607 } 608 os << value; 609 return true; 610 }); 611 os << "}"; 612 return os; 613 } 614 615 } // namespace panda::verifier 616 617 #endif // PANDA_VERIFICATION_UTIL_INT_SET_H 618