1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_PRIVATE_HH 28 #define HB_SET_PRIVATE_HH 29 30 #include "hb-private.hh" 31 #include "hb-object-private.hh" 32 33 34 /* 35 * The set digests here implement various "filters" that support 36 * "approximate member query". Conceptually these are like Bloom 37 * Filter and Quotient Filter, however, much smaller, faster, and 38 * designed to fit the requirements of our uses for glyph coverage 39 * queries. As a result, our filters have much higher. 40 */ 41 42 template <typename mask_t, unsigned int shift> 43 struct hb_set_digest_lowest_bits_t 44 { 45 ASSERT_POD (); 46 47 static const unsigned int mask_bytes = sizeof (mask_t); 48 static const unsigned int mask_bits = sizeof (mask_t) * 8; 49 static const unsigned int num_bits = 0 50 + (mask_bytes >= 1 ? 3 : 0) 51 + (mask_bytes >= 2 ? 1 : 0) 52 + (mask_bytes >= 4 ? 1 : 0) 53 + (mask_bytes >= 8 ? 1 : 0) 54 + (mask_bytes >= 16? 1 : 0) 55 + 0; 56 57 ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8); 58 ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8); 59 inithb_set_digest_lowest_bits_t60 inline void init (void) { 61 mask = 0; 62 } 63 addhb_set_digest_lowest_bits_t64 inline void add (hb_codepoint_t g) { 65 mask |= mask_for (g); 66 } 67 add_rangehb_set_digest_lowest_bits_t68 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 69 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 70 mask = (mask_t) -1; 71 else { 72 mask_t ma = mask_for (a); 73 mask_t mb = mask_for (b); 74 mask |= mb + (mb - ma) - (mb < ma); 75 } 76 } 77 may_havehb_set_digest_lowest_bits_t78 inline bool may_have (hb_codepoint_t g) const { 79 return !!(mask & mask_for (g)); 80 } 81 82 private: 83 mask_forhb_set_digest_lowest_bits_t84 static inline mask_t mask_for (hb_codepoint_t g) { 85 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); 86 } 87 mask_t mask; 88 }; 89 90 template <typename head_t, typename tail_t> 91 struct hb_set_digest_combiner_t 92 { 93 ASSERT_POD (); 94 inithb_set_digest_combiner_t95 inline void init (void) { 96 head.init (); 97 tail.init (); 98 } 99 addhb_set_digest_combiner_t100 inline void add (hb_codepoint_t g) { 101 head.add (g); 102 tail.add (g); 103 } 104 add_rangehb_set_digest_combiner_t105 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 106 head.add_range (a, b); 107 tail.add_range (a, b); 108 } 109 may_havehb_set_digest_combiner_t110 inline bool may_have (hb_codepoint_t g) const { 111 return head.may_have (g) && tail.may_have (g); 112 } 113 114 private: 115 head_t head; 116 tail_t tail; 117 }; 118 119 120 /* 121 * hb_set_digest_t 122 * 123 * This is a combination of digests that performs "best". 124 * There is not much science to this: it's a result of intuition 125 * and testing. 126 */ 127 typedef hb_set_digest_combiner_t 128 < 129 hb_set_digest_lowest_bits_t<unsigned long, 4>, 130 hb_set_digest_combiner_t 131 < 132 hb_set_digest_lowest_bits_t<unsigned long, 0>, 133 hb_set_digest_lowest_bits_t<unsigned long, 9> 134 > 135 > hb_set_digest_t; 136 137 138 139 /* 140 * hb_set_t 141 */ 142 143 144 /* TODO Make this faster and memmory efficient. */ 145 146 struct hb_set_t 147 { 148 hb_object_header_t header; 149 ASSERT_POD (); 150 bool in_error; 151 inithb_set_t152 inline void init (void) { 153 header.init (); 154 clear (); 155 } finihb_set_t156 inline void fini (void) { 157 } clearhb_set_t158 inline void clear (void) { 159 if (unlikely (hb_object_is_inert (this))) 160 return; 161 in_error = false; 162 memset (elts, 0, sizeof elts); 163 } is_emptyhb_set_t164 inline bool is_empty (void) const { 165 for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++) 166 if (elts[i]) 167 return false; 168 return true; 169 } addhb_set_t170 inline void add (hb_codepoint_t g) 171 { 172 if (unlikely (in_error)) return; 173 if (unlikely (g == INVALID)) return; 174 if (unlikely (g > MAX_G)) return; 175 elt (g) |= mask (g); 176 } add_rangehb_set_t177 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) 178 { 179 if (unlikely (in_error)) return; 180 /* TODO Speedup */ 181 for (unsigned int i = a; i < b + 1; i++) 182 add (i); 183 } delhb_set_t184 inline void del (hb_codepoint_t g) 185 { 186 if (unlikely (in_error)) return; 187 if (unlikely (g > MAX_G)) return; 188 elt (g) &= ~mask (g); 189 } del_rangehb_set_t190 inline void del_range (hb_codepoint_t a, hb_codepoint_t b) 191 { 192 if (unlikely (in_error)) return; 193 /* TODO Speedup */ 194 for (unsigned int i = a; i < b + 1; i++) 195 del (i); 196 } hashb_set_t197 inline bool has (hb_codepoint_t g) const 198 { 199 if (unlikely (g > MAX_G)) return false; 200 return !!(elt (g) & mask (g)); 201 } intersectshb_set_t202 inline bool intersects (hb_codepoint_t first, 203 hb_codepoint_t last) const 204 { 205 if (unlikely (first > MAX_G)) return false; 206 if (unlikely (last > MAX_G)) last = MAX_G; 207 unsigned int end = last + 1; 208 for (hb_codepoint_t i = first; i < end; i++) 209 if (has (i)) 210 return true; 211 return false; 212 } is_equalhb_set_t213 inline bool is_equal (const hb_set_t *other) const 214 { 215 for (unsigned int i = 0; i < ELTS; i++) 216 if (elts[i] != other->elts[i]) 217 return false; 218 return true; 219 } sethb_set_t220 inline void set (const hb_set_t *other) 221 { 222 if (unlikely (in_error)) return; 223 for (unsigned int i = 0; i < ELTS; i++) 224 elts[i] = other->elts[i]; 225 } union_hb_set_t226 inline void union_ (const hb_set_t *other) 227 { 228 if (unlikely (in_error)) return; 229 for (unsigned int i = 0; i < ELTS; i++) 230 elts[i] |= other->elts[i]; 231 } intersecthb_set_t232 inline void intersect (const hb_set_t *other) 233 { 234 if (unlikely (in_error)) return; 235 for (unsigned int i = 0; i < ELTS; i++) 236 elts[i] &= other->elts[i]; 237 } subtracthb_set_t238 inline void subtract (const hb_set_t *other) 239 { 240 if (unlikely (in_error)) return; 241 for (unsigned int i = 0; i < ELTS; i++) 242 elts[i] &= ~other->elts[i]; 243 } symmetric_differencehb_set_t244 inline void symmetric_difference (const hb_set_t *other) 245 { 246 if (unlikely (in_error)) return; 247 for (unsigned int i = 0; i < ELTS; i++) 248 elts[i] ^= other->elts[i]; 249 } inverthb_set_t250 inline void invert (void) 251 { 252 if (unlikely (in_error)) return; 253 for (unsigned int i = 0; i < ELTS; i++) 254 elts[i] = ~elts[i]; 255 } nexthb_set_t256 inline bool next (hb_codepoint_t *codepoint) const 257 { 258 if (unlikely (*codepoint == INVALID)) { 259 hb_codepoint_t i = get_min (); 260 if (i != INVALID) { 261 *codepoint = i; 262 return true; 263 } else { 264 *codepoint = INVALID; 265 return false; 266 } 267 } 268 for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++) 269 if (has (i)) { 270 *codepoint = i; 271 return true; 272 } 273 *codepoint = INVALID; 274 return false; 275 } next_rangehb_set_t276 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 277 { 278 hb_codepoint_t i; 279 280 i = *last; 281 if (!next (&i)) 282 { 283 *last = *first = INVALID; 284 return false; 285 } 286 287 *last = *first = i; 288 while (next (&i) && i == *last + 1) 289 (*last)++; 290 291 return true; 292 } 293 get_populationhb_set_t294 inline unsigned int get_population (void) const 295 { 296 unsigned int count = 0; 297 for (unsigned int i = 0; i < ELTS; i++) 298 count += _hb_popcount32 (elts[i]); 299 return count; 300 } get_minhb_set_t301 inline hb_codepoint_t get_min (void) const 302 { 303 for (unsigned int i = 0; i < ELTS; i++) 304 if (elts[i]) 305 for (unsigned int j = 0; j < BITS; j++) 306 if (elts[i] & (1 << j)) 307 return i * BITS + j; 308 return INVALID; 309 } get_maxhb_set_t310 inline hb_codepoint_t get_max (void) const 311 { 312 for (unsigned int i = ELTS; i; i--) 313 if (elts[i - 1]) 314 for (unsigned int j = BITS; j; j--) 315 if (elts[i - 1] & (1 << (j - 1))) 316 return (i - 1) * BITS + (j - 1); 317 return INVALID; 318 } 319 320 typedef uint32_t elt_t; 321 static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */ 322 static const unsigned int SHIFT = 5; 323 static const unsigned int BITS = (1 << SHIFT); 324 static const unsigned int MASK = BITS - 1; 325 static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS; 326 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 327 elthb_set_t328 elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; } elthb_set_t329 elt_t elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } maskhb_set_t330 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } 331 332 elt_t elts[ELTS]; /* XXX 8kb */ 333 334 ASSERT_STATIC (sizeof (elt_t) * 8 == BITS); 335 ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G); 336 }; 337 338 339 340 #endif /* HB_SET_PRIVATE_HH */ 341