• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011,2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_UTF_PRIVATE_HH
28 #define HB_UTF_PRIVATE_HH
29 
30 #include "hb-private.hh"
31 
32 
33 /* UTF-8 */
34 
35 #define HB_UTF8_COMPUTE(Char, Mask, Len) \
36   if (Char < 128) { Len = 1; Mask = 0x7f; } \
37   else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \
38   else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \
39   else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \
40   else Len = 0;
41 
42 static inline const uint8_t *
hb_utf_next(const uint8_t * text,const uint8_t * end,hb_codepoint_t * unicode)43 hb_utf_next (const uint8_t *text,
44 	     const uint8_t *end,
45 	     hb_codepoint_t *unicode)
46 {
47   hb_codepoint_t c = *text, mask;
48   unsigned int len;
49 
50   /* TODO check for overlong sequences? */
51 
52   HB_UTF8_COMPUTE (c, mask, len);
53   if (unlikely (!len || (unsigned int) (end - text) < len)) {
54     *unicode = -1;
55     return text + 1;
56   } else {
57     hb_codepoint_t result;
58     unsigned int i;
59     result = c & mask;
60     for (i = 1; i < len; i++)
61       {
62 	if (unlikely ((text[i] & 0xc0) != 0x80))
63 	  {
64 	    *unicode = -1;
65 	    return text + 1;
66 	  }
67 	result <<= 6;
68 	result |= (text[i] & 0x3f);
69       }
70     *unicode = result;
71     return text + len;
72   }
73 }
74 
75 static inline const uint8_t *
hb_utf_prev(const uint8_t * text,const uint8_t * start,hb_codepoint_t * unicode)76 hb_utf_prev (const uint8_t *text,
77 	     const uint8_t *start,
78 	     hb_codepoint_t *unicode)
79 {
80   const uint8_t *end = text--;
81   while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
82     text--;
83 
84   hb_codepoint_t c = *text, mask;
85   unsigned int len;
86 
87   /* TODO check for overlong sequences? */
88 
89   HB_UTF8_COMPUTE (c, mask, len);
90   if (unlikely (!len || (unsigned int) (end - text) != len)) {
91     *unicode = -1;
92     return end - 1;
93   } else {
94     hb_codepoint_t result;
95     unsigned int i;
96     result = c & mask;
97     for (i = 1; i < len; i++)
98       {
99 	result <<= 6;
100 	result |= (text[i] & 0x3f);
101       }
102     *unicode = result;
103     return text;
104   }
105 }
106 
107 
108 static inline unsigned int
hb_utf_strlen(const uint8_t * text)109 hb_utf_strlen (const uint8_t *text)
110 {
111   return strlen ((const char *) text);
112 }
113 
114 
115 /* UTF-16 */
116 
117 static inline const uint16_t *
hb_utf_next(const uint16_t * text,const uint16_t * end,hb_codepoint_t * unicode)118 hb_utf_next (const uint16_t *text,
119 	     const uint16_t *end,
120 	     hb_codepoint_t *unicode)
121 {
122   hb_codepoint_t c = *text++;
123 
124   if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff)))
125   {
126     /* high surrogate */
127     hb_codepoint_t l;
128     if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff))))
129     {
130       /* low surrogate */
131       *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00);
132        text++;
133     } else
134       *unicode = -1;
135   } else
136     *unicode = c;
137 
138   return text;
139 }
140 
141 static inline const uint16_t *
hb_utf_prev(const uint16_t * text,const uint16_t * start,hb_codepoint_t * unicode)142 hb_utf_prev (const uint16_t *text,
143 	     const uint16_t *start,
144 	     hb_codepoint_t *unicode)
145 {
146   hb_codepoint_t c = *--text;
147 
148   if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff)))
149   {
150     /* low surrogate */
151     hb_codepoint_t h;
152     if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff))))
153     {
154       /* high surrogate */
155       *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00);
156        text--;
157     } else
158       *unicode = -1;
159   } else
160     *unicode = c;
161 
162   return text;
163 }
164 
165 
166 static inline unsigned int
hb_utf_strlen(const uint16_t * text)167 hb_utf_strlen (const uint16_t *text)
168 {
169   unsigned int l = 0;
170   while (*text++) l++;
171   return l;
172 }
173 
174 
175 /* UTF-32 */
176 
177 static inline const uint32_t *
hb_utf_next(const uint32_t * text,const uint32_t * end HB_UNUSED,hb_codepoint_t * unicode)178 hb_utf_next (const uint32_t *text,
179 	     const uint32_t *end HB_UNUSED,
180 	     hb_codepoint_t *unicode)
181 {
182   *unicode = *text++;
183   return text;
184 }
185 
186 static inline const uint32_t *
hb_utf_prev(const uint32_t * text,const uint32_t * start HB_UNUSED,hb_codepoint_t * unicode)187 hb_utf_prev (const uint32_t *text,
188 	     const uint32_t *start HB_UNUSED,
189 	     hb_codepoint_t *unicode)
190 {
191   *unicode = *--text;
192   return text;
193 }
194 
195 static inline unsigned int
hb_utf_strlen(const uint32_t * text)196 hb_utf_strlen (const uint32_t *text)
197 {
198   unsigned int l = 0;
199   while (*text++) l++;
200   return l;
201 }
202 
203 
204 #endif /* HB_UTF_PRIVATE_HH */
205