1 /* 2 * Copyright © 2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_DIGEST_HH 28 #define HB_SET_DIGEST_HH 29 30 #include "hb.hh" 31 32 /* 33 * The set digests here implement various "filters" that support 34 * "approximate member query". Conceptually these are like Bloom 35 * Filter and Quotient Filter, however, much smaller, faster, and 36 * designed to fit the requirements of our uses for glyph coverage 37 * queries. 38 * 39 * Our filters are highly accurate if the lookup covers fairly local 40 * set of glyphs, but fully flooded and ineffective if coverage is 41 * all over the place. 42 * 43 * The frozen-set can be used instead of a digest, to trade more 44 * memory for 100% accuracy, but in practice, that doesn't look like 45 * an attractive trade-off. 46 */ 47 48 template <typename mask_t, unsigned int shift> 49 struct hb_set_digest_lowest_bits_t 50 { 51 enum { mask_bytes = sizeof (mask_t) }; 52 enum { mask_bits = sizeof (mask_t) * 8 }; 53 enum { num_bits = 0 54 + (mask_bytes >= 1 ? 3 : 0) 55 + (mask_bytes >= 2 ? 1 : 0) 56 + (mask_bytes >= 4 ? 1 : 0) 57 + (mask_bytes >= 8 ? 1 : 0) 58 + (mask_bytes >= 16? 1 : 0) 59 + 0 }; 60 61 static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); 62 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); 63 inithb_set_digest_lowest_bits_t64 void init () { mask = 0; } 65 addhb_set_digest_lowest_bits_t66 void add (hb_codepoint_t g) { mask |= mask_for (g); } 67 add_rangehb_set_digest_lowest_bits_t68 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 69 { 70 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 71 mask = (mask_t) -1; 72 else { 73 mask_t ma = mask_for (a); 74 mask_t mb = mask_for (b); 75 mask |= mb + (mb - ma) - (mb < ma); 76 } 77 return true; 78 } 79 80 template <typename T> add_arrayhb_set_digest_lowest_bits_t81 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 82 { 83 for (unsigned int i = 0; i < count; i++) 84 { 85 add (*array); 86 array = (const T *) (stride + (const char *) array); 87 } 88 } 89 template <typename T> add_sorted_arrayhb_set_digest_lowest_bits_t90 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 91 { 92 for (unsigned int i = 0; i < count; i++) 93 { 94 add (*array); 95 array = (const T *) (stride + (const char *) array); 96 } 97 return true; 98 } 99 may_havehb_set_digest_lowest_bits_t100 bool may_have (hb_codepoint_t g) const 101 { return !!(mask & mask_for (g)); } 102 103 private: 104 mask_forhb_set_digest_lowest_bits_t105 static mask_t mask_for (hb_codepoint_t g) 106 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } 107 mask_t mask; 108 }; 109 110 template <typename head_t, typename tail_t> 111 struct hb_set_digest_combiner_t 112 { inithb_set_digest_combiner_t113 void init () 114 { 115 head.init (); 116 tail.init (); 117 } 118 addhb_set_digest_combiner_t119 void add (hb_codepoint_t g) 120 { 121 head.add (g); 122 tail.add (g); 123 } 124 add_rangehb_set_digest_combiner_t125 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 126 { 127 head.add_range (a, b); 128 tail.add_range (a, b); 129 return true; 130 } 131 template <typename T> add_arrayhb_set_digest_combiner_t132 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 133 { 134 head.add_array (array, count, stride); 135 tail.add_array (array, count, stride); 136 } 137 template <typename T> add_sorted_arrayhb_set_digest_combiner_t138 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 139 { 140 head.add_sorted_array (array, count, stride); 141 tail.add_sorted_array (array, count, stride); 142 return true; 143 } 144 may_havehb_set_digest_combiner_t145 bool may_have (hb_codepoint_t g) const 146 { 147 return head.may_have (g) && tail.may_have (g); 148 } 149 150 private: 151 head_t head; 152 tail_t tail; 153 }; 154 155 156 /* 157 * hb_set_digest_t 158 * 159 * This is a combination of digests that performs "best". 160 * There is not much science to this: it's a result of intuition 161 * and testing. 162 */ 163 typedef hb_set_digest_combiner_t 164 < 165 hb_set_digest_lowest_bits_t<unsigned long, 4>, 166 hb_set_digest_combiner_t 167 < 168 hb_set_digest_lowest_bits_t<unsigned long, 0>, 169 hb_set_digest_lowest_bits_t<unsigned long, 9> 170 > 171 > hb_set_digest_t; 172 173 174 #endif /* HB_SET_DIGEST_HH */ 175