1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_DIGEST_HH 28 #define HB_SET_DIGEST_HH 29 30 #include "hb.hh" 31 #include "hb-machinery.hh" 32 33 /* 34 * The set-digests here implement various "filters" that support 35 * "approximate member query". Conceptually these are like Bloom 36 * Filter and Quotient Filter, however, much smaller, faster, and 37 * designed to fit the requirements of our uses for glyph coverage 38 * queries. 39 * 40 * Our filters are highly accurate if the lookup covers fairly local 41 * set of glyphs, but fully flooded and ineffective if coverage is 42 * all over the place. 43 * 44 * The way these are used is that the filter is first populated by 45 * a lookup's or subtable's Coverage table(s), and then when we 46 * want to apply the lookup or subtable to a glyph, before trying 47 * to apply, we ask the filter if the glyph may be covered. If it's 48 * not, we return early. We can also match a digest against another 49 * digest. 50 * 51 * We use these filters at three levels: 52 * - If the digest for all the glyphs in the buffer as a whole 53 * does not match the digest for the lookup, skip the lookup. 54 * - For each glyph, if it doesn't match the lookup digest, 55 * skip it. 56 * - For each glyph, if it doesn't match the subtable digest, 57 * skip it. 58 * 59 * The main filter we use is a combination of three bits-pattern 60 * filters. A bits-pattern filter checks a number of bits (5 or 6) 61 * of the input number (glyph-id in this case) and checks whether 62 * its pattern is amongst the patterns of any of the accepted values. 63 * The accepted patterns are represented as a "long" integer. The 64 * check is done using four bitwise operations only. 65 */ 66 67 template <typename mask_t, unsigned int shift> 68 struct hb_set_digest_bits_pattern_t 69 { 70 static constexpr unsigned mask_bytes = sizeof (mask_t); 71 static constexpr unsigned mask_bits = sizeof (mask_t) * 8; 72 static constexpr unsigned num_bits = 0 73 + (mask_bytes >= 1 ? 3 : 0) 74 + (mask_bytes >= 2 ? 1 : 0) 75 + (mask_bytes >= 4 ? 1 : 0) 76 + (mask_bytes >= 8 ? 1 : 0) 77 + (mask_bytes >= 16? 1 : 0) 78 + 0; 79 80 static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); 81 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); 82 inithb_set_digest_bits_pattern_t83 void init () { mask = 0; } 84 addhb_set_digest_bits_pattern_t85 void add (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; } 86 addhb_set_digest_bits_pattern_t87 void add (hb_codepoint_t g) { mask |= mask_for (g); } 88 add_rangehb_set_digest_bits_pattern_t89 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 90 { 91 if (mask == (mask_t) -1) return false; 92 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 93 { 94 mask = (mask_t) -1; 95 return false; 96 } 97 else 98 { 99 mask_t ma = mask_for (a); 100 mask_t mb = mask_for (b); 101 mask |= mb + (mb - ma) - (mb < ma); 102 return true; 103 } 104 } 105 106 template <typename T> add_arrayhb_set_digest_bits_pattern_t107 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 108 { 109 for (unsigned int i = 0; i < count; i++) 110 { 111 add (*array); 112 array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); 113 } 114 } 115 template <typename T> add_arrayhb_set_digest_bits_pattern_t116 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 117 template <typename T> add_sorted_arrayhb_set_digest_bits_pattern_t118 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 119 { 120 add_array (array, count, stride); 121 return true; 122 } 123 template <typename T> add_sorted_arrayhb_set_digest_bits_pattern_t124 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 125 may_havehb_set_digest_bits_pattern_t126 bool may_have (const hb_set_digest_bits_pattern_t &o) const 127 { return mask & o.mask; } 128 may_havehb_set_digest_bits_pattern_t129 bool may_have (hb_codepoint_t g) const 130 { return mask & mask_for (g); } 131 132 private: 133 mask_forhb_set_digest_bits_pattern_t134 static mask_t mask_for (hb_codepoint_t g) 135 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } 136 mask_t mask; 137 }; 138 139 template <typename head_t, typename tail_t> 140 struct hb_set_digest_combiner_t 141 { inithb_set_digest_combiner_t142 void init () 143 { 144 head.init (); 145 tail.init (); 146 } 147 addhb_set_digest_combiner_t148 void add (const hb_set_digest_combiner_t &o) 149 { 150 head.add (o.head); 151 tail.add (o.tail); 152 } 153 addhb_set_digest_combiner_t154 void add (hb_codepoint_t g) 155 { 156 head.add (g); 157 tail.add (g); 158 } 159 add_rangehb_set_digest_combiner_t160 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 161 { 162 return (int) head.add_range (a, b) | (int) tail.add_range (a, b); 163 } 164 template <typename T> add_arrayhb_set_digest_combiner_t165 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 166 { 167 head.add_array (array, count, stride); 168 tail.add_array (array, count, stride); 169 } 170 template <typename T> add_arrayhb_set_digest_combiner_t171 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 172 template <typename T> add_sorted_arrayhb_set_digest_combiner_t173 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 174 { 175 return head.add_sorted_array (array, count, stride) && 176 tail.add_sorted_array (array, count, stride); 177 } 178 template <typename T> add_sorted_arrayhb_set_digest_combiner_t179 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 180 may_havehb_set_digest_combiner_t181 bool may_have (const hb_set_digest_combiner_t &o) const 182 { 183 return head.may_have (o.head) && tail.may_have (o.tail); 184 } 185 may_havehb_set_digest_combiner_t186 bool may_have (hb_codepoint_t g) const 187 { 188 return head.may_have (g) && tail.may_have (g); 189 } 190 191 private: 192 head_t head; 193 tail_t tail; 194 }; 195 196 197 /* 198 * hb_set_digest_t 199 * 200 * This is a combination of digests that performs "best". 201 * There is not much science to this: it's a result of intuition 202 * and testing. 203 */ 204 using hb_set_digest_t = 205 hb_set_digest_combiner_t 206 < 207 hb_set_digest_bits_pattern_t<unsigned long, 4>, 208 hb_set_digest_combiner_t 209 < 210 hb_set_digest_bits_pattern_t<unsigned long, 0>, 211 hb_set_digest_bits_pattern_t<unsigned long, 9> 212 > 213 > 214 ; 215 216 217 #endif /* HB_SET_DIGEST_HH */ 218