• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_SET_DIGEST_HH
28 #define HB_SET_DIGEST_HH
29 
30 #include "hb.hh"
31 
32 /*
33  * The set digests here implement various "filters" that support
34  * "approximate member query".  Conceptually these are like Bloom
35  * Filter and Quotient Filter, however, much smaller, faster, and
36  * designed to fit the requirements of our uses for glyph coverage
37  * queries.
38  *
39  * Our filters are highly accurate if the lookup covers fairly local
40  * set of glyphs, but fully flooded and ineffective if coverage is
41  * all over the place.
42  *
43  * The frozen-set can be used instead of a digest, to trade more
44  * memory for 100% accuracy, but in practice, that doesn't look like
45  * an attractive trade-off.
46  */
47 
48 template <typename mask_t, unsigned int shift>
49 struct hb_set_digest_lowest_bits_t
50 {
51   static constexpr unsigned mask_bytes = sizeof (mask_t);
52   static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
53   static constexpr unsigned num_bits = 0
54 				     + (mask_bytes >= 1 ? 3 : 0)
55 				     + (mask_bytes >= 2 ? 1 : 0)
56 				     + (mask_bytes >= 4 ? 1 : 0)
57 				     + (mask_bytes >= 8 ? 1 : 0)
58 				     + (mask_bytes >= 16? 1 : 0)
59 				     + 0;
60 
61   static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
62   static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
63 
inithb_set_digest_lowest_bits_t64   void init () { mask = 0; }
65 
addhb_set_digest_lowest_bits_t66   void add (hb_codepoint_t g) { mask |= mask_for (g); }
67 
add_rangehb_set_digest_lowest_bits_t68   bool add_range (hb_codepoint_t a, hb_codepoint_t b)
69   {
70     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
71       mask = (mask_t) -1;
72     else {
73       mask_t ma = mask_for (a);
74       mask_t mb = mask_for (b);
75       mask |= mb + (mb - ma) - (mb < ma);
76     }
77     return true;
78   }
79 
80   template <typename T>
add_arrayhb_set_digest_lowest_bits_t81   void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
82   {
83     for (unsigned int i = 0; i < count; i++)
84     {
85       add (*array);
86       array = (const T *) (stride + (const char *) array);
87     }
88   }
89   template <typename T>
add_arrayhb_set_digest_lowest_bits_t90   void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
91   template <typename T>
add_sorted_arrayhb_set_digest_lowest_bits_t92   bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
93   {
94     for (unsigned int i = 0; i < count; i++)
95     {
96       add (*array);
97       array = (const T *) (stride + (const char *) array);
98     }
99     return true;
100   }
101   template <typename T>
add_sorted_arrayhb_set_digest_lowest_bits_t102   bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
103 
may_havehb_set_digest_lowest_bits_t104   bool may_have (hb_codepoint_t g) const
105   { return !!(mask & mask_for (g)); }
106 
107   private:
108 
mask_forhb_set_digest_lowest_bits_t109   static mask_t mask_for (hb_codepoint_t g)
110   { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
111   mask_t mask;
112 };
113 
114 template <typename head_t, typename tail_t>
115 struct hb_set_digest_combiner_t
116 {
inithb_set_digest_combiner_t117   void init ()
118   {
119     head.init ();
120     tail.init ();
121   }
122 
addhb_set_digest_combiner_t123   void add (hb_codepoint_t g)
124   {
125     head.add (g);
126     tail.add (g);
127   }
128 
add_rangehb_set_digest_combiner_t129   bool add_range (hb_codepoint_t a, hb_codepoint_t b)
130   {
131     head.add_range (a, b);
132     tail.add_range (a, b);
133     return true;
134   }
135   template <typename T>
add_arrayhb_set_digest_combiner_t136   void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
137   {
138     head.add_array (array, count, stride);
139     tail.add_array (array, count, stride);
140   }
141   template <typename T>
add_arrayhb_set_digest_combiner_t142   void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
143   template <typename T>
add_sorted_arrayhb_set_digest_combiner_t144   bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
145   {
146     head.add_sorted_array (array, count, stride);
147     tail.add_sorted_array (array, count, stride);
148     return true;
149   }
150   template <typename T>
add_sorted_arrayhb_set_digest_combiner_t151   bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
152 
may_havehb_set_digest_combiner_t153   bool may_have (hb_codepoint_t g) const
154   {
155     return head.may_have (g) && tail.may_have (g);
156   }
157 
158   private:
159   head_t head;
160   tail_t tail;
161 };
162 
163 
164 /*
165  * hb_set_digest_t
166  *
167  * This is a combination of digests that performs "best".
168  * There is not much science to this: it's a result of intuition
169  * and testing.
170  */
171 using hb_set_digest_t =
172   hb_set_digest_combiner_t
173   <
174     hb_set_digest_lowest_bits_t<unsigned long, 4>,
175     hb_set_digest_combiner_t
176     <
177       hb_set_digest_lowest_bits_t<unsigned long, 0>,
178       hb_set_digest_lowest_bits_t<unsigned long, 9>
179     >
180   >
181 ;
182 
183 
184 #endif /* HB_SET_DIGEST_HH */
185