1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_DIGEST_HH 28 #define HB_SET_DIGEST_HH 29 30 #include "hb.hh" 31 #include "hb-machinery.hh" 32 33 /* 34 * The set-digests here implement various "filters" that support 35 * "approximate member query". Conceptually these are like Bloom 36 * Filter and Quotient Filter, however, much smaller, faster, and 37 * designed to fit the requirements of our uses for glyph coverage 38 * queries. 39 * 40 * Our filters are highly accurate if the lookup covers fairly local 41 * set of glyphs, but fully flooded and ineffective if coverage is 42 * all over the place. 43 * 44 * The way these are used is that the filter is first populated by 45 * a lookup's or subtable's Coverage table(s), and then when we 46 * want to apply the lookup or subtable to a glyph, before trying 47 * to apply, we ask the filter if the glyph may be covered. If it's 48 * not, we return early. 49 * 50 * We use these filters both at the lookup-level, and then again, 51 * at the subtable-level. Both have performance win. 52 * 53 * The main filter we use is a combination of three bits-pattern 54 * filters. A bits-pattern filter checks a number of bits (5 or 6) 55 * of the input number (glyph-id in this case) and checks whether 56 * its pattern is amongst the patterns of any of the accepted values. 57 * The accepted patterns are represented as a "long" integer. The 58 * check is done using four bitwise operations only. 59 */ 60 61 template <typename mask_t, unsigned int shift> 62 struct hb_set_digest_bits_pattern_t 63 { 64 static constexpr unsigned mask_bytes = sizeof (mask_t); 65 static constexpr unsigned mask_bits = sizeof (mask_t) * 8; 66 static constexpr unsigned num_bits = 0 67 + (mask_bytes >= 1 ? 3 : 0) 68 + (mask_bytes >= 2 ? 1 : 0) 69 + (mask_bytes >= 4 ? 1 : 0) 70 + (mask_bytes >= 8 ? 1 : 0) 71 + (mask_bytes >= 16? 1 : 0) 72 + 0; 73 74 static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); 75 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); 76 inithb_set_digest_bits_pattern_t77 void init () { mask = 0; } 78 addhb_set_digest_bits_pattern_t79 void add (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; } 80 addhb_set_digest_bits_pattern_t81 void add (hb_codepoint_t g) { mask |= mask_for (g); } 82 add_rangehb_set_digest_bits_pattern_t83 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 84 { 85 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 86 mask = (mask_t) -1; 87 else { 88 mask_t ma = mask_for (a); 89 mask_t mb = mask_for (b); 90 mask |= mb + (mb - ma) - (mb < ma); 91 } 92 return true; 93 } 94 95 template <typename T> add_arrayhb_set_digest_bits_pattern_t96 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 97 { 98 for (unsigned int i = 0; i < count; i++) 99 { 100 add (*array); 101 array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); 102 } 103 } 104 template <typename T> add_arrayhb_set_digest_bits_pattern_t105 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 106 template <typename T> add_sorted_arrayhb_set_digest_bits_pattern_t107 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 108 { 109 add_array (array, count, stride); 110 return true; 111 } 112 template <typename T> add_sorted_arrayhb_set_digest_bits_pattern_t113 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 114 may_havehb_set_digest_bits_pattern_t115 bool may_have (const hb_set_digest_bits_pattern_t &o) const 116 { return mask & o.mask; } 117 may_havehb_set_digest_bits_pattern_t118 bool may_have (hb_codepoint_t g) const 119 { return mask & mask_for (g); } 120 121 private: 122 mask_forhb_set_digest_bits_pattern_t123 static mask_t mask_for (hb_codepoint_t g) 124 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } 125 mask_t mask; 126 }; 127 128 template <typename head_t, typename tail_t> 129 struct hb_set_digest_combiner_t 130 { inithb_set_digest_combiner_t131 void init () 132 { 133 head.init (); 134 tail.init (); 135 } 136 addhb_set_digest_combiner_t137 void add (const hb_set_digest_combiner_t &o) 138 { 139 head.add (o.head); 140 tail.add (o.tail); 141 } 142 addhb_set_digest_combiner_t143 void add (hb_codepoint_t g) 144 { 145 head.add (g); 146 tail.add (g); 147 } 148 add_rangehb_set_digest_combiner_t149 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 150 { 151 return head.add_range (a, b) && 152 tail.add_range (a, b); 153 } 154 template <typename T> add_arrayhb_set_digest_combiner_t155 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 156 { 157 head.add_array (array, count, stride); 158 tail.add_array (array, count, stride); 159 } 160 template <typename T> add_arrayhb_set_digest_combiner_t161 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 162 template <typename T> add_sorted_arrayhb_set_digest_combiner_t163 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 164 { 165 return head.add_sorted_array (array, count, stride) && 166 tail.add_sorted_array (array, count, stride); 167 } 168 template <typename T> add_sorted_arrayhb_set_digest_combiner_t169 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 170 may_havehb_set_digest_combiner_t171 bool may_have (const hb_set_digest_combiner_t &o) const 172 { 173 return head.may_have (o.head) && tail.may_have (o.tail); 174 } 175 may_havehb_set_digest_combiner_t176 bool may_have (hb_codepoint_t g) const 177 { 178 return head.may_have (g) && tail.may_have (g); 179 } 180 181 private: 182 head_t head; 183 tail_t tail; 184 }; 185 186 187 /* 188 * hb_set_digest_t 189 * 190 * This is a combination of digests that performs "best". 191 * There is not much science to this: it's a result of intuition 192 * and testing. 193 */ 194 using hb_set_digest_t = 195 hb_set_digest_combiner_t 196 < 197 hb_set_digest_bits_pattern_t<unsigned long, 4>, 198 hb_set_digest_combiner_t 199 < 200 hb_set_digest_bits_pattern_t<unsigned long, 0>, 201 hb_set_digest_bits_pattern_t<unsigned long, 9> 202 > 203 > 204 ; 205 206 207 #endif /* HB_SET_DIGEST_HH */ 208