1 /* 2 * Copyright © 2012,2017 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_HH 28 #define HB_SET_HH 29 30 #include "hb.hh" 31 32 33 /* 34 * hb_set_t 35 */ 36 37 /* TODO Keep a free-list so we can free pages that are completely zeroed. At that 38 * point maybe also use a sentinel value for "all-1" pages? */ 39 40 struct hb_set_t 41 { 42 HB_NO_COPY_ASSIGN (hb_set_t); hb_set_thb_set_t43 hb_set_t () { init (); } ~hb_set_thb_set_t44 ~hb_set_t () { fini (); } 45 46 struct page_map_t 47 { cmphb_set_t::page_map_t48 int cmp (const page_map_t &o) const { return (int) o.major - (int) major; } 49 50 uint32_t major; 51 uint32_t index; 52 }; 53 54 struct page_t 55 { init0hb_set_t::page_t56 void init0 () { v.clear (); } init1hb_set_t::page_t57 void init1 () { v.clear (0xFF); } 58 lenhb_set_t::page_t59 unsigned int len () const 60 { return ARRAY_LENGTH_CONST (v); } 61 is_emptyhb_set_t::page_t62 bool is_empty () const 63 { 64 for (unsigned int i = 0; i < len (); i++) 65 if (v[i]) 66 return false; 67 return true; 68 } 69 addhb_set_t::page_t70 void add (hb_codepoint_t g) { elt (g) |= mask (g); } delhb_set_t::page_t71 void del (hb_codepoint_t g) { elt (g) &= ~mask (g); } hashb_set_t::page_t72 bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); } 73 add_rangehb_set_t::page_t74 void add_range (hb_codepoint_t a, hb_codepoint_t b) 75 { 76 elt_t *la = &elt (a); 77 elt_t *lb = &elt (b); 78 if (la == lb) 79 *la |= (mask (b) << 1) - mask(a); 80 else 81 { 82 *la |= ~(mask (a) - 1); 83 la++; 84 85 memset (la, 0xff, (char *) lb - (char *) la); 86 87 *lb |= ((mask (b) << 1) - 1); 88 } 89 } 90 is_equalhb_set_t::page_t91 bool is_equal (const page_t *other) const 92 { 93 return 0 == hb_memcmp (&v, &other->v, sizeof (v)); 94 } 95 get_populationhb_set_t::page_t96 unsigned int get_population () const 97 { 98 unsigned int pop = 0; 99 for (unsigned int i = 0; i < len (); i++) 100 pop += hb_popcount (v[i]); 101 return pop; 102 } 103 nexthb_set_t::page_t104 bool next (hb_codepoint_t *codepoint) const 105 { 106 unsigned int m = (*codepoint + 1) & MASK; 107 if (!m) 108 { 109 *codepoint = INVALID; 110 return false; 111 } 112 unsigned int i = m / ELT_BITS; 113 unsigned int j = m & ELT_MASK; 114 115 const elt_t vv = v[i] & ~((elt_t (1) << j) - 1); 116 for (const elt_t *p = &vv; i < len (); p = &v[++i]) 117 if (*p) 118 { 119 *codepoint = i * ELT_BITS + elt_get_min (*p); 120 return true; 121 } 122 123 *codepoint = INVALID; 124 return false; 125 } previoushb_set_t::page_t126 bool previous (hb_codepoint_t *codepoint) const 127 { 128 unsigned int m = (*codepoint - 1) & MASK; 129 if (m == MASK) 130 { 131 *codepoint = INVALID; 132 return false; 133 } 134 unsigned int i = m / ELT_BITS; 135 unsigned int j = m & ELT_MASK; 136 137 const elt_t vv = v[i] & ((elt_t (1) << (j + 1)) - 1); 138 for (const elt_t *p = &vv; (int) i >= 0; p = &v[--i]) 139 if (*p) 140 { 141 *codepoint = i * ELT_BITS + elt_get_max (*p); 142 return true; 143 } 144 145 *codepoint = INVALID; 146 return false; 147 } get_minhb_set_t::page_t148 hb_codepoint_t get_min () const 149 { 150 for (unsigned int i = 0; i < len (); i++) 151 if (v[i]) 152 return i * ELT_BITS + elt_get_min (v[i]); 153 return INVALID; 154 } get_maxhb_set_t::page_t155 hb_codepoint_t get_max () const 156 { 157 for (int i = len () - 1; i >= 0; i--) 158 if (v[i]) 159 return i * ELT_BITS + elt_get_max (v[i]); 160 return 0; 161 } 162 163 typedef unsigned long long elt_t; 164 enum { PAGE_BITS = 512 }; 165 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); 166 elt_get_minhb_set_t::page_t167 static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); } elt_get_maxhb_set_t::page_t168 static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; } 169 170 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; 171 172 enum { ELT_BITS = sizeof (elt_t) * 8 }; 173 enum { ELT_MASK = ELT_BITS - 1 }; 174 enum { BITS = sizeof (vector_t) * 8 }; 175 enum { MASK = BITS - 1 }; 176 static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, ""); 177 elthb_set_t::page_t178 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } elthb_set_t::page_t179 elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } maskhb_set_t::page_t180 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); } 181 182 vector_t v; 183 }; 184 static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, ""); 185 186 hb_object_header_t header; 187 bool successful; /* Allocations successful */ 188 mutable unsigned int population; 189 hb_vector_t<page_map_t, 1> page_map; 190 hb_vector_t<page_t, 1> pages; 191 init_shallowhb_set_t192 void init_shallow () 193 { 194 successful = true; 195 population = 0; 196 page_map.init (); 197 pages.init (); 198 } inithb_set_t199 void init () 200 { 201 hb_object_init (this); 202 init_shallow (); 203 } fini_shallowhb_set_t204 void fini_shallow () 205 { 206 population = 0; 207 page_map.fini (); 208 pages.fini (); 209 } finihb_set_t210 void fini () 211 { 212 hb_object_fini (this); 213 fini_shallow (); 214 } 215 in_errorhb_set_t216 bool in_error () const { return !successful; } 217 resizehb_set_t218 bool resize (unsigned int count) 219 { 220 if (unlikely (!successful)) return false; 221 if (!pages.resize (count) || !page_map.resize (count)) 222 { 223 pages.resize (page_map.len); 224 successful = false; 225 return false; 226 } 227 return true; 228 } 229 clearhb_set_t230 void clear () 231 { 232 if (unlikely (hb_object_is_immutable (this))) 233 return; 234 successful = true; 235 population = 0; 236 page_map.resize (0); 237 pages.resize (0); 238 } is_emptyhb_set_t239 bool is_empty () const 240 { 241 unsigned int count = pages.len; 242 for (unsigned int i = 0; i < count; i++) 243 if (!pages[i].is_empty ()) 244 return false; 245 return true; 246 } 247 dirtyhb_set_t248 void dirty () { population = (unsigned int) -1; } 249 addhb_set_t250 void add (hb_codepoint_t g) 251 { 252 if (unlikely (!successful)) return; 253 if (unlikely (g == INVALID)) return; 254 dirty (); 255 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 256 page->add (g); 257 } add_rangehb_set_t258 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 259 { 260 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 261 if (unlikely (a > b || a == INVALID || b == INVALID)) return false; 262 dirty (); 263 unsigned int ma = get_major (a); 264 unsigned int mb = get_major (b); 265 if (ma == mb) 266 { 267 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 268 page->add_range (a, b); 269 } 270 else 271 { 272 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 273 page->add_range (a, major_start (ma + 1) - 1); 274 275 for (unsigned int m = ma + 1; m < mb; m++) 276 { 277 page = page_for_insert (major_start (m)); if (unlikely (!page)) return false; 278 page->init1 (); 279 } 280 281 page = page_for_insert (b); if (unlikely (!page)) return false; 282 page->add_range (major_start (mb), b); 283 } 284 return true; 285 } 286 287 template <typename T> add_arrayhb_set_t288 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 289 { 290 if (unlikely (!successful)) return; 291 if (!count) return; 292 dirty (); 293 hb_codepoint_t g = *array; 294 while (count) 295 { 296 unsigned int m = get_major (g); 297 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 298 unsigned int start = major_start (m); 299 unsigned int end = major_start (m + 1); 300 do 301 { 302 page->add (g); 303 304 array = (const T *) ((const char *) array + stride); 305 count--; 306 } 307 while (count && (g = *array, start <= g && g < end)); 308 } 309 } 310 311 /* Might return false if array looks unsorted. 312 * Used for faster rejection of corrupt data. */ 313 template <typename T> add_sorted_arrayhb_set_t314 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 315 { 316 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 317 if (!count) return true; 318 dirty (); 319 hb_codepoint_t g = *array; 320 hb_codepoint_t last_g = g; 321 while (count) 322 { 323 unsigned int m = get_major (g); 324 page_t *page = page_for_insert (g); if (unlikely (!page)) return false; 325 unsigned int end = major_start (m + 1); 326 do 327 { 328 /* If we try harder we can change the following comparison to <=; 329 * Not sure if it's worth it. */ 330 if (g < last_g) return false; 331 last_g = g; 332 page->add (g); 333 334 array = (const T *) ((const char *) array + stride); 335 count--; 336 } 337 while (count && (g = *array, g < end)); 338 } 339 return true; 340 } 341 delhb_set_t342 void del (hb_codepoint_t g) 343 { 344 /* TODO perform op even if !successful. */ 345 if (unlikely (!successful)) return; 346 page_t *page = page_for (g); 347 if (!page) 348 return; 349 dirty (); 350 page->del (g); 351 } del_rangehb_set_t352 void del_range (hb_codepoint_t a, hb_codepoint_t b) 353 { 354 /* TODO perform op even if !successful. */ 355 /* TODO Optimize, like add_range(). */ 356 if (unlikely (!successful)) return; 357 for (unsigned int i = a; i < b + 1; i++) 358 del (i); 359 } hashb_set_t360 bool has (hb_codepoint_t g) const 361 { 362 const page_t *page = page_for (g); 363 if (!page) 364 return false; 365 return page->has (g); 366 } intersectshb_set_t367 bool intersects (hb_codepoint_t first, 368 hb_codepoint_t last) const 369 { 370 hb_codepoint_t c = first - 1; 371 return next (&c) && c <= last; 372 } sethb_set_t373 void set (const hb_set_t *other) 374 { 375 if (unlikely (!successful)) return; 376 unsigned int count = other->pages.len; 377 if (!resize (count)) 378 return; 379 population = other->population; 380 memcpy ((void *) pages, (const void *) other->pages, count * pages.item_size); 381 memcpy ((void *) page_map, (const void *) other->page_map, count * page_map.item_size); 382 } 383 is_equalhb_set_t384 bool is_equal (const hb_set_t *other) const 385 { 386 if (get_population () != other->get_population ()) 387 return false; 388 389 unsigned int na = pages.len; 390 unsigned int nb = other->pages.len; 391 392 unsigned int a = 0, b = 0; 393 for (; a < na && b < nb; ) 394 { 395 if (page_at (a).is_empty ()) { a++; continue; } 396 if (other->page_at (b).is_empty ()) { b++; continue; } 397 if (page_map[a].major != other->page_map[b].major || 398 !page_at (a).is_equal (&other->page_at (b))) 399 return false; 400 a++; 401 b++; 402 } 403 for (; a < na; a++) 404 if (!page_at (a).is_empty ()) { return false; } 405 for (; b < nb; b++) 406 if (!other->page_at (b).is_empty ()) { return false; } 407 408 return true; 409 } 410 is_subsethb_set_t411 bool is_subset (const hb_set_t *larger_set) const 412 { 413 if (get_population () > larger_set->get_population ()) 414 return false; 415 416 /* TODO Optimize to use pages. */ 417 hb_codepoint_t c = INVALID; 418 while (next (&c)) 419 if (!larger_set->has (c)) 420 return false; 421 422 return true; 423 } 424 425 template <class Op> processhb_set_t426 void process (const hb_set_t *other) 427 { 428 if (unlikely (!successful)) return; 429 430 dirty (); 431 432 unsigned int na = pages.len; 433 unsigned int nb = other->pages.len; 434 unsigned int next_page = na; 435 436 unsigned int count = 0, newCount = 0; 437 unsigned int a = 0, b = 0; 438 for (; a < na && b < nb; ) 439 { 440 if (page_map[a].major == other->page_map[b].major) 441 { 442 count++; 443 a++; 444 b++; 445 } 446 else if (page_map[a].major < other->page_map[b].major) 447 { 448 if (Op::passthru_left) 449 count++; 450 a++; 451 } 452 else 453 { 454 if (Op::passthru_right) 455 count++; 456 b++; 457 } 458 } 459 if (Op::passthru_left) 460 count += na - a; 461 if (Op::passthru_right) 462 count += nb - b; 463 464 if (count > pages.len) 465 if (!resize (count)) 466 return; 467 newCount = count; 468 469 /* Process in-place backward. */ 470 a = na; 471 b = nb; 472 for (; a && b; ) 473 { 474 if (page_map[a - 1].major == other->page_map[b - 1].major) 475 { 476 a--; 477 b--; 478 count--; 479 page_map[count] = page_map[a]; 480 Op::process (page_at (count).v, page_at (a).v, other->page_at (b).v); 481 } 482 else if (page_map[a - 1].major > other->page_map[b - 1].major) 483 { 484 a--; 485 if (Op::passthru_left) 486 { 487 count--; 488 page_map[count] = page_map[a]; 489 } 490 } 491 else 492 { 493 b--; 494 if (Op::passthru_right) 495 { 496 count--; 497 page_map[count].major = other->page_map[b].major; 498 page_map[count].index = next_page++; 499 page_at (count).v = other->page_at (b).v; 500 } 501 } 502 } 503 if (Op::passthru_left) 504 while (a) 505 { 506 a--; 507 count--; 508 page_map[count] = page_map [a]; 509 } 510 if (Op::passthru_right) 511 while (b) 512 { 513 b--; 514 count--; 515 page_map[count].major = other->page_map[b].major; 516 page_map[count].index = next_page++; 517 page_at (count).v = other->page_at (b).v; 518 } 519 assert (!count); 520 if (pages.len > newCount) 521 resize (newCount); 522 } 523 union_hb_set_t524 void union_ (const hb_set_t *other) 525 { 526 process<HbOpOr> (other); 527 } intersecthb_set_t528 void intersect (const hb_set_t *other) 529 { 530 process<HbOpAnd> (other); 531 } subtracthb_set_t532 void subtract (const hb_set_t *other) 533 { 534 process<HbOpMinus> (other); 535 } symmetric_differencehb_set_t536 void symmetric_difference (const hb_set_t *other) 537 { 538 process<HbOpXor> (other); 539 } nexthb_set_t540 bool next (hb_codepoint_t *codepoint) const 541 { 542 if (unlikely (*codepoint == INVALID)) { 543 *codepoint = get_min (); 544 return *codepoint != INVALID; 545 } 546 547 page_map_t map = {get_major (*codepoint), 0}; 548 unsigned int i; 549 page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST); 550 if (i < page_map.len && page_map[i].major == map.major) 551 { 552 if (pages[page_map[i].index].next (codepoint)) 553 { 554 *codepoint += page_map[i].major * page_t::PAGE_BITS; 555 return true; 556 } 557 i++; 558 } 559 for (; i < page_map.len; i++) 560 { 561 hb_codepoint_t m = pages[page_map[i].index].get_min (); 562 if (m != INVALID) 563 { 564 *codepoint = page_map[i].major * page_t::PAGE_BITS + m; 565 return true; 566 } 567 } 568 *codepoint = INVALID; 569 return false; 570 } previoushb_set_t571 bool previous (hb_codepoint_t *codepoint) const 572 { 573 if (unlikely (*codepoint == INVALID)) { 574 *codepoint = get_max (); 575 return *codepoint != INVALID; 576 } 577 578 page_map_t map = {get_major (*codepoint), 0}; 579 unsigned int i; 580 page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST); 581 if (i < page_map.len && page_map[i].major == map.major) 582 { 583 if (pages[page_map[i].index].previous (codepoint)) 584 { 585 *codepoint += page_map[i].major * page_t::PAGE_BITS; 586 return true; 587 } 588 } 589 i--; 590 for (; (int) i >= 0; i--) 591 { 592 hb_codepoint_t m = pages[page_map[i].index].get_max (); 593 if (m != INVALID) 594 { 595 *codepoint = page_map[i].major * page_t::PAGE_BITS + m; 596 return true; 597 } 598 } 599 *codepoint = INVALID; 600 return false; 601 } next_rangehb_set_t602 bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 603 { 604 hb_codepoint_t i; 605 606 i = *last; 607 if (!next (&i)) 608 { 609 *last = *first = INVALID; 610 return false; 611 } 612 613 /* TODO Speed up. */ 614 *last = *first = i; 615 while (next (&i) && i == *last + 1) 616 (*last)++; 617 618 return true; 619 } previous_rangehb_set_t620 bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const 621 { 622 hb_codepoint_t i; 623 624 i = *first; 625 if (!previous (&i)) 626 { 627 *last = *first = INVALID; 628 return false; 629 } 630 631 /* TODO Speed up. */ 632 *last = *first = i; 633 while (previous (&i) && i == *first - 1) 634 (*first)--; 635 636 return true; 637 } 638 get_populationhb_set_t639 unsigned int get_population () const 640 { 641 if (population != (unsigned int) -1) 642 return population; 643 644 unsigned int pop = 0; 645 unsigned int count = pages.len; 646 for (unsigned int i = 0; i < count; i++) 647 pop += pages[i].get_population (); 648 649 population = pop; 650 return pop; 651 } get_minhb_set_t652 hb_codepoint_t get_min () const 653 { 654 unsigned int count = pages.len; 655 for (unsigned int i = 0; i < count; i++) 656 if (!page_at (i).is_empty ()) 657 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min (); 658 return INVALID; 659 } get_maxhb_set_t660 hb_codepoint_t get_max () const 661 { 662 unsigned int count = pages.len; 663 for (int i = count - 1; i >= 0; i++) 664 if (!page_at (i).is_empty ()) 665 return page_map[(unsigned) i].major * page_t::PAGE_BITS + page_at (i).get_max (); 666 return INVALID; 667 } 668 669 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 670 page_for_inserthb_set_t671 page_t *page_for_insert (hb_codepoint_t g) 672 { 673 page_map_t map = {get_major (g), pages.len}; 674 unsigned int i; 675 if (!page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST)) 676 { 677 if (!resize (pages.len + 1)) 678 return nullptr; 679 680 pages[map.index].init0 (); 681 memmove (page_map + i + 1, 682 page_map + i, 683 (page_map.len - 1 - i) * page_map.item_size); 684 page_map[i] = map; 685 } 686 return &pages[page_map[i].index]; 687 } page_forhb_set_t688 page_t *page_for (hb_codepoint_t g) 689 { 690 page_map_t key = {get_major (g)}; 691 const page_map_t *found = page_map.bsearch (key); 692 if (found) 693 return &pages[found->index]; 694 return nullptr; 695 } page_forhb_set_t696 const page_t *page_for (hb_codepoint_t g) const 697 { 698 page_map_t key = {get_major (g)}; 699 const page_map_t *found = page_map.bsearch (key); 700 if (found) 701 return &pages[found->index]; 702 return nullptr; 703 } page_athb_set_t704 page_t &page_at (unsigned int i) { return pages[page_map[i].index]; } page_athb_set_t705 const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; } get_majorhb_set_t706 unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; } major_starthb_set_t707 hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; } 708 }; 709 710 711 #endif /* HB_SET_HH */ 712