1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_DIGEST_HH 28 #define HB_SET_DIGEST_HH 29 30 #include "hb.hh" 31 32 /* 33 * The set digests here implement various "filters" that support 34 * "approximate member query". Conceptually these are like Bloom 35 * Filter and Quotient Filter, however, much smaller, faster, and 36 * designed to fit the requirements of our uses for glyph coverage 37 * queries. 38 * 39 * Our filters are highly accurate if the lookup covers fairly local 40 * set of glyphs, but fully flooded and ineffective if coverage is 41 * all over the place. 42 * 43 * The frozen-set can be used instead of a digest, to trade more 44 * memory for 100% accuracy, but in practice, that doesn't look like 45 * an attractive trade-off. 46 */ 47 48 template <typename mask_t, unsigned int shift> 49 struct hb_set_digest_lowest_bits_t 50 { 51 static constexpr unsigned mask_bytes = sizeof (mask_t); 52 static constexpr unsigned mask_bits = sizeof (mask_t) * 8; 53 static constexpr unsigned num_bits = 0 54 + (mask_bytes >= 1 ? 3 : 0) 55 + (mask_bytes >= 2 ? 1 : 0) 56 + (mask_bytes >= 4 ? 1 : 0) 57 + (mask_bytes >= 8 ? 1 : 0) 58 + (mask_bytes >= 16? 1 : 0) 59 + 0; 60 61 static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); 62 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); 63 inithb_set_digest_lowest_bits_t64 void init () { mask = 0; } 65 addhb_set_digest_lowest_bits_t66 void add (hb_codepoint_t g) { mask |= mask_for (g); } 67 add_rangehb_set_digest_lowest_bits_t68 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 69 { 70 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 71 mask = (mask_t) -1; 72 else { 73 mask_t ma = mask_for (a); 74 mask_t mb = mask_for (b); 75 mask |= mb + (mb - ma) - (mb < ma); 76 } 77 return true; 78 } 79 80 template <typename T> add_arrayhb_set_digest_lowest_bits_t81 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 82 { 83 for (unsigned int i = 0; i < count; i++) 84 { 85 add (*array); 86 array = (const T *) (stride + (const char *) array); 87 } 88 } 89 template <typename T> add_arrayhb_set_digest_lowest_bits_t90 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 91 template <typename T> add_sorted_arrayhb_set_digest_lowest_bits_t92 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 93 { 94 for (unsigned int i = 0; i < count; i++) 95 { 96 add (*array); 97 array = (const T *) (stride + (const char *) array); 98 } 99 return true; 100 } 101 template <typename T> add_sorted_arrayhb_set_digest_lowest_bits_t102 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 103 may_havehb_set_digest_lowest_bits_t104 bool may_have (hb_codepoint_t g) const 105 { return !!(mask & mask_for (g)); } 106 107 private: 108 mask_forhb_set_digest_lowest_bits_t109 static mask_t mask_for (hb_codepoint_t g) 110 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } 111 mask_t mask; 112 }; 113 114 template <typename head_t, typename tail_t> 115 struct hb_set_digest_combiner_t 116 { inithb_set_digest_combiner_t117 void init () 118 { 119 head.init (); 120 tail.init (); 121 } 122 addhb_set_digest_combiner_t123 void add (hb_codepoint_t g) 124 { 125 head.add (g); 126 tail.add (g); 127 } 128 add_rangehb_set_digest_combiner_t129 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 130 { 131 head.add_range (a, b); 132 tail.add_range (a, b); 133 return true; 134 } 135 template <typename T> add_arrayhb_set_digest_combiner_t136 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 137 { 138 head.add_array (array, count, stride); 139 tail.add_array (array, count, stride); 140 } 141 template <typename T> add_arrayhb_set_digest_combiner_t142 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } 143 template <typename T> add_sorted_arrayhb_set_digest_combiner_t144 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 145 { 146 head.add_sorted_array (array, count, stride); 147 tail.add_sorted_array (array, count, stride); 148 return true; 149 } 150 template <typename T> add_sorted_arrayhb_set_digest_combiner_t151 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } 152 may_havehb_set_digest_combiner_t153 bool may_have (hb_codepoint_t g) const 154 { 155 return head.may_have (g) && tail.may_have (g); 156 } 157 158 private: 159 head_t head; 160 tail_t tail; 161 }; 162 163 164 /* 165 * hb_set_digest_t 166 * 167 * This is a combination of digests that performs "best". 168 * There is not much science to this: it's a result of intuition 169 * and testing. 170 */ 171 using hb_set_digest_t = 172 hb_set_digest_combiner_t 173 < 174 hb_set_digest_lowest_bits_t<unsigned long, 4>, 175 hb_set_digest_combiner_t 176 < 177 hb_set_digest_lowest_bits_t<unsigned long, 0>, 178 hb_set_digest_lowest_bits_t<unsigned long, 9> 179 > 180 > 181 ; 182 183 184 #endif /* HB_SET_DIGEST_HH */ 185