1 /* 2 * Copyright © 2012,2017 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_PRIVATE_HH 28 #define HB_SET_PRIVATE_HH 29 30 #include "hb-private.hh" 31 #include "hb-object-private.hh" 32 33 34 /* 35 * hb_set_t 36 */ 37 38 /* TODO Keep a free-list so we can free pages that are completely zeroed. At that 39 * point maybe also use a sentinel value for "all-1" pages? */ 40 41 struct hb_set_t 42 { 43 struct page_map_t 44 { cmphb_set_t::page_map_t45 inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; } 46 47 uint32_t major; 48 uint32_t index; 49 }; 50 51 struct page_t 52 { init0hb_set_t::page_t53 inline void init0 (void) { memset (&v, 0, sizeof (v)); } init1hb_set_t::page_t54 inline void init1 (void) { memset (&v, 0xff, sizeof (v)); } 55 lenhb_set_t::page_t56 inline unsigned int len (void) const 57 { return ARRAY_LENGTH_CONST (v); } 58 is_emptyhb_set_t::page_t59 inline bool is_empty (void) const 60 { 61 for (unsigned int i = 0; i < len (); i++) 62 if (v[i]) 63 return false; 64 return true; 65 } 66 addhb_set_t::page_t67 inline void add (hb_codepoint_t g) { elt (g) |= mask (g); } delhb_set_t::page_t68 inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); } hashb_set_t::page_t69 inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); } 70 add_rangehb_set_t::page_t71 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) 72 { 73 elt_t *la = &elt (a); 74 elt_t *lb = &elt (b); 75 if (la == lb) 76 *la |= (mask (b) << 1) - mask(a); 77 else 78 { 79 *la |= ~(mask (a) - 1); 80 la++; 81 82 memset (la, 0xff, (char *) lb - (char *) la); 83 84 *lb |= ((mask (b) << 1) - 1); 85 } 86 } 87 is_equalhb_set_t::page_t88 inline bool is_equal (const page_t *other) const 89 { 90 return 0 == memcmp (&v, &other->v, sizeof (v)); 91 } 92 get_populationhb_set_t::page_t93 inline unsigned int get_population (void) const 94 { 95 unsigned int pop = 0; 96 for (unsigned int i = 0; i < len (); i++) 97 pop += _hb_popcount (v[i]); 98 return pop; 99 } 100 nexthb_set_t::page_t101 inline bool next (hb_codepoint_t *codepoint) const 102 { 103 unsigned int m = (*codepoint + 1) & MASK; 104 if (!m) 105 { 106 *codepoint = INVALID; 107 return false; 108 } 109 unsigned int i = m / ELT_BITS; 110 unsigned int j = m & ELT_MASK; 111 112 for (; j < ELT_BITS; j++) 113 if (v[i] & (elt_t (1) << j)) 114 goto found; 115 for (i++; i < len (); i++) 116 if (v[i]) 117 for (j = 0; j < ELT_BITS; j++) 118 if (v[i] & (elt_t (1) << j)) 119 goto found; 120 121 *codepoint = INVALID; 122 return false; 123 124 found: 125 *codepoint = i * ELT_BITS + j; 126 return true; 127 } get_minhb_set_t::page_t128 inline hb_codepoint_t get_min (void) const 129 { 130 for (unsigned int i = 0; i < len (); i++) 131 if (v[i]) 132 { 133 elt_t e = v[i]; 134 for (unsigned int j = 0; j < ELT_BITS; j++) 135 if (e & (elt_t (1) << j)) 136 return i * ELT_BITS + j; 137 } 138 return INVALID; 139 } get_maxhb_set_t::page_t140 inline hb_codepoint_t get_max (void) const 141 { 142 for (int i = len () - 1; i >= 0; i--) 143 if (v[i]) 144 { 145 elt_t e = v[i]; 146 for (int j = ELT_BITS - 1; j >= 0; j--) 147 if (e & (elt_t (1) << j)) 148 return i * ELT_BITS + j; 149 } 150 return 0; 151 } 152 153 static const unsigned int PAGE_BITS = 8192; /* Use to tune. */ 154 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); 155 156 typedef uint64_t elt_t; 157 158 #if 0 && HAVE_VECTOR_SIZE 159 /* The vectorized version does not work with clang as non-const 160 * elt() errs "non-const reference cannot bind to vector element". */ 161 typedef elt_t vector_t __attribute__((vector_size (PAGE_BITS / 8))); 162 #else 163 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; 164 #endif 165 166 vector_t v; 167 168 static const unsigned int ELT_BITS = sizeof (elt_t) * 8; 169 static const unsigned int ELT_MASK = ELT_BITS - 1; 170 static const unsigned int BITS = sizeof (vector_t) * 8; 171 static const unsigned int MASK = BITS - 1; 172 static_assert (PAGE_BITS == BITS, ""); 173 elthb_set_t::page_t174 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } elthb_set_t::page_t175 elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } maskhb_set_t::page_t176 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); } 177 }; 178 static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, ""); 179 180 hb_object_header_t header; 181 ASSERT_POD (); 182 bool in_error; 183 hb_prealloced_array_t<page_map_t, 8> page_map; 184 hb_prealloced_array_t<page_t, 1> pages; 185 inithb_set_t186 inline void init (void) 187 { 188 page_map.init (); 189 pages.init (); 190 } finishhb_set_t191 inline void finish (void) 192 { 193 page_map.finish (); 194 pages.finish (); 195 } 196 resizehb_set_t197 inline bool resize (unsigned int count) 198 { 199 if (unlikely (in_error)) return false; 200 if (!pages.resize (count) || !page_map.resize (count)) 201 { 202 pages.resize (page_map.len); 203 in_error = true; 204 return false; 205 } 206 return true; 207 } 208 clearhb_set_t209 inline void clear (void) { 210 if (unlikely (hb_object_is_inert (this))) 211 return; 212 in_error = false; 213 page_map.resize (0); 214 pages.resize (0); 215 } is_emptyhb_set_t216 inline bool is_empty (void) const { 217 unsigned int count = pages.len; 218 for (unsigned int i = 0; i < count; i++) 219 if (!pages[i].is_empty ()) 220 return false; 221 return true; 222 } 223 addhb_set_t224 inline void add (hb_codepoint_t g) 225 { 226 if (unlikely (in_error)) return; 227 if (unlikely (g == INVALID)) return; 228 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 229 page->add (g); 230 } add_rangehb_set_t231 inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) 232 { 233 if (unlikely (in_error)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 234 if (unlikely (a > b || a == INVALID || b == INVALID)) return false; 235 unsigned int ma = get_major (a); 236 unsigned int mb = get_major (b); 237 if (ma == mb) 238 { 239 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 240 page->add_range (a, b); 241 } 242 else 243 { 244 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 245 page->add_range (a, major_start (ma + 1) - 1); 246 247 for (unsigned int m = ma + 1; m < mb; m++) 248 { 249 page = page_for_insert (major_start (m)); if (unlikely (!page)) return false; 250 page->init1 (); 251 } 252 253 page = page_for_insert (b); if (unlikely (!page)) return false; 254 page->add_range (major_start (mb), b); 255 } 256 return true; 257 } 258 259 template <typename T> add_arrayhb_set_t260 inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 261 { 262 if (unlikely (in_error)) return; 263 if (!count) return; 264 hb_codepoint_t g = *array; 265 while (count) 266 { 267 unsigned int m = get_major (g); 268 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 269 unsigned int start = major_start (m); 270 unsigned int end = major_start (m + 1); 271 do 272 { 273 page->add (g); 274 275 array++; 276 count--; 277 } 278 while (count && (g = *array, start <= g && g < end)); 279 } 280 } 281 282 /* Might return false if array looks unsorted. 283 * Used for faster rejection of corrupt data. */ 284 template <typename T> add_sorted_arrayhb_set_t285 inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 286 { 287 if (unlikely (in_error)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 288 if (!count) return true; 289 hb_codepoint_t g = *array; 290 hb_codepoint_t last_g = g; 291 while (count) 292 { 293 unsigned int m = get_major (g); 294 page_t *page = page_for_insert (g); if (unlikely (!page)) return false; 295 unsigned int end = major_start (m + 1); 296 do 297 { 298 /* If we try harder we can change the following comparison to <=; 299 * Not sure if it's worth it. */ 300 if (g < last_g) return false; 301 last_g = g; 302 page->add (g); 303 304 array++; 305 count--; 306 } 307 while (count && (g = *array, g < end)); 308 } 309 return true; 310 } 311 delhb_set_t312 inline void del (hb_codepoint_t g) 313 { 314 if (unlikely (in_error)) return; 315 page_t *p = page_for (g); 316 if (!p) 317 return; 318 p->del (g); 319 } del_rangehb_set_t320 inline void del_range (hb_codepoint_t a, hb_codepoint_t b) 321 { 322 /* TODO Optimize, like add_range(). */ 323 if (unlikely (in_error)) return; 324 for (unsigned int i = a; i < b + 1; i++) 325 del (i); 326 } hashb_set_t327 inline bool has (hb_codepoint_t g) const 328 { 329 const page_t *p = page_for (g); 330 if (!p) 331 return false; 332 return p->has (g); 333 } intersectshb_set_t334 inline bool intersects (hb_codepoint_t first, 335 hb_codepoint_t last) const 336 { 337 hb_codepoint_t c = first - 1; 338 return next (&c) && c <= last; 339 } sethb_set_t340 inline void set (const hb_set_t *other) 341 { 342 if (unlikely (in_error)) return; 343 unsigned int count = other->pages.len; 344 if (!resize (count)) 345 return; 346 347 memcpy (pages.array, other->pages.array, count * sizeof (pages.array[0])); 348 memcpy (page_map.array, other->page_map.array, count * sizeof (page_map.array[0])); 349 } 350 is_equalhb_set_t351 inline bool is_equal (const hb_set_t *other) const 352 { 353 unsigned int na = pages.len; 354 unsigned int nb = other->pages.len; 355 356 unsigned int a = 0, b = 0; 357 for (; a < na && b < nb; ) 358 { 359 if (page_at (a).is_empty ()) { a++; continue; } 360 if (other->page_at (b).is_empty ()) { b++; continue; } 361 if (page_map[a].major != other->page_map[b].major || 362 !page_at (a).is_equal (&other->page_at (b))) 363 return false; 364 a++; 365 b++; 366 } 367 for (; a < na; a++) 368 if (!page_at (a).is_empty ()) { return false; } 369 for (; b < nb; b++) 370 if (!other->page_at (b).is_empty ()) { return false; } 371 372 return true; 373 } 374 375 template <class Op> processhb_set_t376 inline void process (const hb_set_t *other) 377 { 378 if (unlikely (in_error)) return; 379 380 unsigned int na = pages.len; 381 unsigned int nb = other->pages.len; 382 383 unsigned int count = 0; 384 unsigned int a = 0, b = 0; 385 for (; a < na && b < nb; ) 386 { 387 if (page_map[a].major == other->page_map[b].major) 388 { 389 count++; 390 a++; 391 b++; 392 } 393 else if (page_map[a].major < other->page_map[b].major) 394 { 395 if (Op::passthru_left) 396 count++; 397 a++; 398 } 399 else 400 { 401 if (Op::passthru_right) 402 count++; 403 b++; 404 } 405 } 406 if (Op::passthru_left) 407 count += na - a; 408 if (Op::passthru_right) 409 count += nb - b; 410 411 if (!resize (count)) 412 return; 413 414 /* Process in-place backward. */ 415 a = na; 416 b = nb; 417 for (; a && b; ) 418 { 419 if (page_map[a - 1].major == other->page_map[b - 1].major) 420 { 421 a--; 422 b--; 423 Op::process (page_at (--count).v, page_at (a).v, other->page_at (b).v); 424 } 425 else if (page_map[a - 1].major > other->page_map[b - 1].major) 426 { 427 a--; 428 if (Op::passthru_left) 429 page_at (--count).v = page_at (a).v; 430 } 431 else 432 { 433 b--; 434 if (Op::passthru_right) 435 page_at (--count).v = other->page_at (b).v; 436 } 437 } 438 if (Op::passthru_left) 439 while (a) 440 page_at (--count).v = page_at (--a).v; 441 if (Op::passthru_right) 442 while (b) 443 page_at (--count).v = other->page_at (--b).v; 444 assert (!count); 445 } 446 union_hb_set_t447 inline void union_ (const hb_set_t *other) 448 { 449 process<HbOpOr> (other); 450 } intersecthb_set_t451 inline void intersect (const hb_set_t *other) 452 { 453 process<HbOpAnd> (other); 454 } subtracthb_set_t455 inline void subtract (const hb_set_t *other) 456 { 457 process<HbOpMinus> (other); 458 } symmetric_differencehb_set_t459 inline void symmetric_difference (const hb_set_t *other) 460 { 461 process<HbOpXor> (other); 462 } nexthb_set_t463 inline bool next (hb_codepoint_t *codepoint) const 464 { 465 if (unlikely (*codepoint == INVALID)) { 466 *codepoint = get_min (); 467 return *codepoint != INVALID; 468 } 469 470 page_map_t map = {get_major (*codepoint), 0}; 471 unsigned int i; 472 page_map.bfind (&map, &i); 473 if (i < page_map.len) 474 { 475 if (pages[page_map[i].index].next (codepoint)) 476 { 477 *codepoint += page_map[i].major * page_t::PAGE_BITS; 478 return true; 479 } 480 i++; 481 } 482 for (; i < page_map.len; i++) 483 { 484 hb_codepoint_t m = pages[page_map[i].index].get_min (); 485 if (m != INVALID) 486 { 487 *codepoint = page_map[i].major * page_t::PAGE_BITS + m; 488 return true; 489 } 490 } 491 *codepoint = INVALID; 492 return false; 493 } next_rangehb_set_t494 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 495 { 496 hb_codepoint_t i; 497 498 i = *last; 499 if (!next (&i)) 500 { 501 *last = *first = INVALID; 502 return false; 503 } 504 505 *last = *first = i; 506 while (next (&i) && i == *last + 1) 507 (*last)++; 508 509 return true; 510 } 511 get_populationhb_set_t512 inline unsigned int get_population (void) const 513 { 514 unsigned int pop = 0; 515 unsigned int count = pages.len; 516 for (unsigned int i = 0; i < count; i++) 517 pop += pages[i].get_population (); 518 return pop; 519 } get_minhb_set_t520 inline hb_codepoint_t get_min (void) const 521 { 522 unsigned int count = pages.len; 523 for (unsigned int i = 0; i < count; i++) 524 if (!page_at (i).is_empty ()) 525 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min (); 526 return INVALID; 527 } get_maxhb_set_t528 inline hb_codepoint_t get_max (void) const 529 { 530 unsigned int count = pages.len; 531 for (int i = count - 1; i >= 0; i++) 532 if (!page_at (i).is_empty ()) 533 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_max (); 534 return INVALID; 535 } 536 537 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 538 page_for_inserthb_set_t539 inline page_t *page_for_insert (hb_codepoint_t g) 540 { 541 page_map_t map = {get_major (g), pages.len}; 542 unsigned int i; 543 if (!page_map.bfind (&map, &i)) 544 { 545 if (!resize (pages.len + 1)) 546 return nullptr; 547 548 pages[map.index].init0 (); 549 memmove (&page_map[i + 1], &page_map[i], (page_map.len - 1 - i) * sizeof (page_map[0])); 550 page_map[i] = map; 551 } 552 return &pages[page_map[i].index]; 553 } page_forhb_set_t554 inline page_t *page_for (hb_codepoint_t g) 555 { 556 page_map_t key = {get_major (g)}; 557 const page_map_t *found = page_map.bsearch (&key); 558 if (found) 559 return &pages[found->index]; 560 return nullptr; 561 } page_forhb_set_t562 inline const page_t *page_for (hb_codepoint_t g) const 563 { 564 page_map_t key = {get_major (g)}; 565 const page_map_t *found = page_map.bsearch (&key); 566 if (found) 567 return &pages[found->index]; 568 return nullptr; 569 } page_athb_set_t570 inline page_t &page_at (unsigned int i) { return pages[page_map[i].index]; } page_athb_set_t571 inline const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; } get_majorhb_set_t572 inline unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; } major_starthb_set_t573 inline hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; } 574 }; 575 576 577 #endif /* HB_SET_PRIVATE_HH */ 578