1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_PRIVATE_HH 28 #define HB_SET_PRIVATE_HH 29 30 #include "hb-private.hh" 31 #include "hb-object-private.hh" 32 33 34 /* 35 * The set digests here implement various "filters" that support 36 * "approximate member query". Conceptually these are like Bloom 37 * Filter and Quotient Filter, however, much smaller, faster, and 38 * designed to fit the requirements of our uses for glyph coverage 39 * queries. 40 * 41 * Our filters are highly accurate if the lookup covers fairly local 42 * set of glyphs, but fully flooded and ineffective if coverage is 43 * all over the place. 44 * 45 * The frozen-set can be used instead of a digest, to trade more 46 * memory for 100% accuracy, but in practice, that doesn't look like 47 * an attractive trade-off. 48 */ 49 50 template <typename mask_t, unsigned int shift> 51 struct hb_set_digest_lowest_bits_t 52 { 53 ASSERT_POD (); 54 55 static const unsigned int mask_bytes = sizeof (mask_t); 56 static const unsigned int mask_bits = sizeof (mask_t) * 8; 57 static const unsigned int num_bits = 0 58 + (mask_bytes >= 1 ? 3 : 0) 59 + (mask_bytes >= 2 ? 1 : 0) 60 + (mask_bytes >= 4 ? 1 : 0) 61 + (mask_bytes >= 8 ? 1 : 0) 62 + (mask_bytes >= 16? 1 : 0) 63 + 0; 64 65 ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8); 66 ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8); 67 inithb_set_digest_lowest_bits_t68 inline void init (void) { 69 mask = 0; 70 } 71 addhb_set_digest_lowest_bits_t72 inline void add (hb_codepoint_t g) { 73 mask |= mask_for (g); 74 } 75 add_rangehb_set_digest_lowest_bits_t76 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 77 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 78 mask = (mask_t) -1; 79 else { 80 mask_t ma = mask_for (a); 81 mask_t mb = mask_for (b); 82 mask |= mb + (mb - ma) - (mb < ma); 83 } 84 } 85 may_havehb_set_digest_lowest_bits_t86 inline bool may_have (hb_codepoint_t g) const { 87 return !!(mask & mask_for (g)); 88 } 89 90 private: 91 mask_forhb_set_digest_lowest_bits_t92 static inline mask_t mask_for (hb_codepoint_t g) { 93 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); 94 } 95 mask_t mask; 96 }; 97 98 template <typename head_t, typename tail_t> 99 struct hb_set_digest_combiner_t 100 { 101 ASSERT_POD (); 102 inithb_set_digest_combiner_t103 inline void init (void) { 104 head.init (); 105 tail.init (); 106 } 107 addhb_set_digest_combiner_t108 inline void add (hb_codepoint_t g) { 109 head.add (g); 110 tail.add (g); 111 } 112 add_rangehb_set_digest_combiner_t113 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 114 head.add_range (a, b); 115 tail.add_range (a, b); 116 } 117 may_havehb_set_digest_combiner_t118 inline bool may_have (hb_codepoint_t g) const { 119 return head.may_have (g) && tail.may_have (g); 120 } 121 122 private: 123 head_t head; 124 tail_t tail; 125 }; 126 127 128 /* 129 * hb_set_digest_t 130 * 131 * This is a combination of digests that performs "best". 132 * There is not much science to this: it's a result of intuition 133 * and testing. 134 */ 135 typedef hb_set_digest_combiner_t 136 < 137 hb_set_digest_lowest_bits_t<unsigned long, 4>, 138 hb_set_digest_combiner_t 139 < 140 hb_set_digest_lowest_bits_t<unsigned long, 0>, 141 hb_set_digest_lowest_bits_t<unsigned long, 9> 142 > 143 > hb_set_digest_t; 144 145 146 147 /* 148 * hb_set_t 149 */ 150 151 152 /* TODO Make this faster and memmory efficient. */ 153 154 struct hb_set_t 155 { 156 friend struct hb_frozen_set_t; 157 158 hb_object_header_t header; 159 ASSERT_POD (); 160 bool in_error; 161 inithb_set_t162 inline void init (void) { 163 hb_object_init (this); 164 clear (); 165 } finihb_set_t166 inline void fini (void) { 167 } clearhb_set_t168 inline void clear (void) { 169 if (unlikely (hb_object_is_inert (this))) 170 return; 171 in_error = false; 172 memset (elts, 0, sizeof elts); 173 } is_emptyhb_set_t174 inline bool is_empty (void) const { 175 for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++) 176 if (elts[i]) 177 return false; 178 return true; 179 } addhb_set_t180 inline void add (hb_codepoint_t g) 181 { 182 if (unlikely (in_error)) return; 183 if (unlikely (g == INVALID)) return; 184 if (unlikely (g > MAX_G)) return; 185 elt (g) |= mask (g); 186 } add_rangehb_set_t187 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) 188 { 189 if (unlikely (in_error)) return; 190 /* TODO Speedup */ 191 for (unsigned int i = a; i < b + 1; i++) 192 add (i); 193 } delhb_set_t194 inline void del (hb_codepoint_t g) 195 { 196 if (unlikely (in_error)) return; 197 if (unlikely (g > MAX_G)) return; 198 elt (g) &= ~mask (g); 199 } del_rangehb_set_t200 inline void del_range (hb_codepoint_t a, hb_codepoint_t b) 201 { 202 if (unlikely (in_error)) return; 203 /* TODO Speedup */ 204 for (unsigned int i = a; i < b + 1; i++) 205 del (i); 206 } hashb_set_t207 inline bool has (hb_codepoint_t g) const 208 { 209 if (unlikely (g > MAX_G)) return false; 210 return !!(elt (g) & mask (g)); 211 } intersectshb_set_t212 inline bool intersects (hb_codepoint_t first, 213 hb_codepoint_t last) const 214 { 215 if (unlikely (first > MAX_G)) return false; 216 if (unlikely (last > MAX_G)) last = MAX_G; 217 unsigned int end = last + 1; 218 for (hb_codepoint_t i = first; i < end; i++) 219 if (has (i)) 220 return true; 221 return false; 222 } is_equalhb_set_t223 inline bool is_equal (const hb_set_t *other) const 224 { 225 for (unsigned int i = 0; i < ELTS; i++) 226 if (elts[i] != other->elts[i]) 227 return false; 228 return true; 229 } sethb_set_t230 inline void set (const hb_set_t *other) 231 { 232 if (unlikely (in_error)) return; 233 for (unsigned int i = 0; i < ELTS; i++) 234 elts[i] = other->elts[i]; 235 } union_hb_set_t236 inline void union_ (const hb_set_t *other) 237 { 238 if (unlikely (in_error)) return; 239 for (unsigned int i = 0; i < ELTS; i++) 240 elts[i] |= other->elts[i]; 241 } intersecthb_set_t242 inline void intersect (const hb_set_t *other) 243 { 244 if (unlikely (in_error)) return; 245 for (unsigned int i = 0; i < ELTS; i++) 246 elts[i] &= other->elts[i]; 247 } subtracthb_set_t248 inline void subtract (const hb_set_t *other) 249 { 250 if (unlikely (in_error)) return; 251 for (unsigned int i = 0; i < ELTS; i++) 252 elts[i] &= ~other->elts[i]; 253 } symmetric_differencehb_set_t254 inline void symmetric_difference (const hb_set_t *other) 255 { 256 if (unlikely (in_error)) return; 257 for (unsigned int i = 0; i < ELTS; i++) 258 elts[i] ^= other->elts[i]; 259 } inverthb_set_t260 inline void invert (void) 261 { 262 if (unlikely (in_error)) return; 263 for (unsigned int i = 0; i < ELTS; i++) 264 elts[i] = ~elts[i]; 265 } nexthb_set_t266 inline bool next (hb_codepoint_t *codepoint) const 267 { 268 if (unlikely (*codepoint == INVALID)) { 269 hb_codepoint_t i = get_min (); 270 if (i != INVALID) { 271 *codepoint = i; 272 return true; 273 } else { 274 *codepoint = INVALID; 275 return false; 276 } 277 } 278 for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++) 279 if (has (i)) { 280 *codepoint = i; 281 return true; 282 } 283 *codepoint = INVALID; 284 return false; 285 } next_rangehb_set_t286 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 287 { 288 hb_codepoint_t i; 289 290 i = *last; 291 if (!next (&i)) 292 { 293 *last = *first = INVALID; 294 return false; 295 } 296 297 *last = *first = i; 298 while (next (&i) && i == *last + 1) 299 (*last)++; 300 301 return true; 302 } 303 get_populationhb_set_t304 inline unsigned int get_population (void) const 305 { 306 unsigned int count = 0; 307 for (unsigned int i = 0; i < ELTS; i++) 308 count += _hb_popcount32 (elts[i]); 309 return count; 310 } get_minhb_set_t311 inline hb_codepoint_t get_min (void) const 312 { 313 for (unsigned int i = 0; i < ELTS; i++) 314 if (elts[i]) 315 for (unsigned int j = 0; j < BITS; j++) 316 if (elts[i] & (1u << j)) 317 return i * BITS + j; 318 return INVALID; 319 } get_maxhb_set_t320 inline hb_codepoint_t get_max (void) const 321 { 322 for (unsigned int i = ELTS; i; i--) 323 if (elts[i - 1]) 324 for (unsigned int j = BITS; j; j--) 325 if (elts[i - 1] & (1u << (j - 1))) 326 return (i - 1) * BITS + (j - 1); 327 return INVALID; 328 } 329 330 typedef uint32_t elt_t; 331 static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */ 332 static const unsigned int SHIFT = 5; 333 static const unsigned int BITS = (1 << SHIFT); 334 static const unsigned int MASK = BITS - 1; 335 static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS; 336 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 337 elthb_set_t338 elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; } elthb_set_t339 elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } maskhb_set_t340 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } 341 342 elt_t elts[ELTS]; /* XXX 8kb */ 343 344 ASSERT_STATIC (sizeof (elt_t) * 8 == BITS); 345 ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G); 346 }; 347 348 struct hb_frozen_set_t 349 { 350 static const unsigned int SHIFT = hb_set_t::SHIFT; 351 static const unsigned int BITS = hb_set_t::BITS; 352 static const unsigned int MASK = hb_set_t::MASK; 353 typedef hb_set_t::elt_t elt_t; 354 inithb_frozen_set_t355 inline void init (const hb_set_t &set) 356 { 357 start = count = 0; 358 elts = NULL; 359 360 unsigned int max = set.get_max (); 361 if (max == set.INVALID) 362 return; 363 unsigned int min = set.get_min (); 364 const elt_t &min_elt = set.elt (min); 365 366 start = min & ~MASK; 367 count = max - start + 1; 368 unsigned int num_elts = (count + BITS - 1) / BITS; 369 unsigned int elts_size = num_elts * sizeof (elt_t); 370 elts = (elt_t *) malloc (elts_size); 371 if (unlikely (!elts)) 372 { 373 start = count = 0; 374 return; 375 } 376 memcpy (elts, &min_elt, elts_size); 377 } 378 finihb_frozen_set_t379 inline void fini (void) 380 { 381 if (elts) 382 free (elts); 383 } 384 hashb_frozen_set_t385 inline bool has (hb_codepoint_t g) const 386 { 387 /* hb_codepoint_t is unsigned. */ 388 g -= start; 389 if (unlikely (g > count)) return false; 390 return !!(elt (g) & mask (g)); 391 } 392 elthb_frozen_set_t393 elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } maskhb_frozen_set_t394 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } 395 396 private: 397 hb_codepoint_t start, count; 398 elt_t *elts; 399 }; 400 401 402 #endif /* HB_SET_PRIVATE_HH */ 403