• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include <uchar.h>
19 
20 #include <gtest/gtest.h>
21 
22 #include <errno.h>
23 #include <limits.h>
24 #include <locale.h>
25 #include <stdint.h>
26 
TEST(uchar,sizeof_uchar_t)27 TEST(uchar, sizeof_uchar_t) {
28   EXPECT_EQ(2U, sizeof(char16_t));
29   EXPECT_EQ(4U, sizeof(char32_t));
30 }
31 
TEST(uchar,start_state)32 TEST(uchar, start_state) {
33   char out[MB_LEN_MAX];
34   mbstate_t ps;
35 
36   // Any non-initial state is invalid when calling c32rtomb.
37   memset(&ps, 0, sizeof(ps));
38   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xc2", 1, &ps));
39   errno = 0;
40   EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(out, 0x00a2, &ps));
41   EXPECT_EQ(EILSEQ, errno);
42 
43   // If the first argument to c32rtomb is nullptr or the second is L'\0' the shift
44   // state should be reset.
45   memset(&ps, 0, sizeof(ps));
46   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xc2", 1, &ps));
47   EXPECT_EQ(1U, c32rtomb(nullptr, 0x00a2, &ps));
48   EXPECT_TRUE(mbsinit(&ps));
49 
50   memset(&ps, 0, sizeof(ps));
51   EXPECT_EQ(static_cast<size_t>(-2), mbrtoc32(nullptr, "\xf0\xa4", 1, &ps));
52   EXPECT_EQ(1U, c32rtomb(out, L'\0', &ps));
53   EXPECT_TRUE(mbsinit(&ps));
54 }
55 
TEST(uchar,c16rtomb_null_out)56 TEST(uchar, c16rtomb_null_out) {
57   EXPECT_EQ(1U, c16rtomb(nullptr, L'\0', nullptr));
58   EXPECT_EQ(1U, c16rtomb(nullptr, L'h', nullptr));
59 }
60 
TEST(uchar,c16rtomb_null_char)61 TEST(uchar, c16rtomb_null_char) {
62   char bytes[MB_LEN_MAX];
63   EXPECT_EQ(1U, c16rtomb(bytes, L'\0', nullptr));
64 }
65 
TEST(uchar,c16rtomb)66 TEST(uchar, c16rtomb) {
67   char bytes[MB_LEN_MAX];
68 
69   memset(bytes, 0, sizeof(bytes));
70   EXPECT_EQ(1U, c16rtomb(bytes, L'h', nullptr));
71   EXPECT_EQ('h', bytes[0]);
72 
73   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
74   uselocale(LC_GLOBAL_LOCALE);
75 
76   // 1-byte UTF-8.
77   memset(bytes, 0, sizeof(bytes));
78   EXPECT_EQ(1U, c16rtomb(bytes, L'h', nullptr));
79   EXPECT_EQ('h', bytes[0]);
80   // 2-byte UTF-8.
81   memset(bytes, 0, sizeof(bytes));
82   EXPECT_EQ(2U, c16rtomb(bytes, 0x00a2, nullptr));
83   EXPECT_EQ('\xc2', bytes[0]);
84   EXPECT_EQ('\xa2', bytes[1]);
85   // 3-byte UTF-8.
86   memset(bytes, 0, sizeof(bytes));
87   EXPECT_EQ(3U, c16rtomb(bytes, 0x20ac, nullptr));
88   EXPECT_EQ('\xe2', bytes[0]);
89   EXPECT_EQ('\x82', bytes[1]);
90   EXPECT_EQ('\xac', bytes[2]);
91   // 4-byte UTF-8 from a surrogate pair...
92   memset(bytes, 0, sizeof(bytes));
93   EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, nullptr));
94   EXPECT_EQ(4U, c16rtomb(bytes, 0xdfcd, nullptr));
95   EXPECT_EQ('\xf4', bytes[0]);
96   EXPECT_EQ('\x8a', bytes[1]);
97   EXPECT_EQ('\xaf', bytes[2]);
98   EXPECT_EQ('\x8d', bytes[3]);
99 }
100 
TEST(uchar,c16rtomb_invalid)101 TEST(uchar, c16rtomb_invalid) {
102   char bytes[MB_LEN_MAX];
103 
104   memset(bytes, 0, sizeof(bytes));
105   EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdfcd, nullptr));
106 
107   EXPECT_EQ(0U, c16rtomb(bytes, 0xdbea, nullptr));
108   EXPECT_EQ(static_cast<size_t>(-1), c16rtomb(bytes, 0xdbea, nullptr));
109 }
110 
TEST(uchar,mbrtoc16_null)111 TEST(uchar, mbrtoc16_null) {
112   ASSERT_EQ(0U, mbrtoc16(nullptr, nullptr, 0, nullptr));
113 }
114 
TEST(uchar,mbrtoc16_zero_len)115 TEST(uchar, mbrtoc16_zero_len) {
116   char16_t out;
117 
118   out = L'x';
119   ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, nullptr));
120   ASSERT_EQ(L'x', out);
121 
122   ASSERT_EQ(0U, mbrtoc16(&out, "hello", 0, nullptr));
123   ASSERT_EQ(0U, mbrtoc16(&out, "", 0, nullptr));
124   ASSERT_EQ(1U, mbrtoc16(&out, "hello", 1, nullptr));
125   ASSERT_EQ(L'h', out);
126 }
127 
TEST(uchar,mbrtoc16)128 TEST(uchar, mbrtoc16) {
129   char16_t out;
130 
131   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
132   uselocale(LC_GLOBAL_LOCALE);
133 
134   // 1-byte UTF-8.
135   ASSERT_EQ(1U, mbrtoc16(&out, "abcdef", 6, nullptr));
136   ASSERT_EQ(L'a', out);
137   // 2-byte UTF-8.
138   ASSERT_EQ(2U, mbrtoc16(&out, "\xc2\xa2" "cdef", 6, nullptr));
139   ASSERT_EQ(static_cast<char16_t>(0x00a2), out);
140   // 3-byte UTF-8.
141   ASSERT_EQ(3U, mbrtoc16(&out, "\xe2\x82\xac" "def", 6, nullptr));
142   ASSERT_EQ(static_cast<char16_t>(0x20ac), out);
143   // 4-byte UTF-8 will be returned as a surrogate pair...
144   ASSERT_EQ(static_cast<size_t>(-3),
145             mbrtoc16(&out, "\xf4\x8a\xaf\x8d", 6, nullptr));
146   ASSERT_EQ(static_cast<char16_t>(0xdbea), out);
147   ASSERT_EQ(4U, mbrtoc16(&out, "\xf4\x8a\xaf\x8d" "ef", 6, nullptr));
148   ASSERT_EQ(static_cast<char16_t>(0xdfcd), out);
149   // Illegal 5-byte UTF-8.
150   errno = 0;
151   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\xf8\xa1\xa2\xa3\xa4", 5, nullptr));
152   ASSERT_EQ(EILSEQ, errno);
153 }
154 
TEST(uchar,mbrtoc16_reserved_range)155 TEST(uchar, mbrtoc16_reserved_range) {
156   char16_t out;
157   ASSERT_EQ(static_cast<size_t>(-1),
158             mbrtoc16(&out, "\xf0\x80\xbf\xbf", 6, nullptr));
159 }
160 
TEST(uchar,mbrtoc16_beyond_range)161 TEST(uchar, mbrtoc16_beyond_range) {
162   char16_t out;
163   ASSERT_EQ(static_cast<size_t>(-1),
164             mbrtoc16(&out, "\xf5\x80\x80\x80", 6, nullptr));
165 }
166 
test_mbrtoc16_incomplete(mbstate_t * ps)167 void test_mbrtoc16_incomplete(mbstate_t* ps) {
168   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
169   uselocale(LC_GLOBAL_LOCALE);
170 
171   char16_t out;
172   // 2-byte UTF-8.
173   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps));
174   ASSERT_EQ(1U, mbrtoc16(&out, "\xa2" "cdef", 5, ps));
175   ASSERT_EQ(static_cast<char16_t>(0x00a2), out);
176   ASSERT_TRUE(mbsinit(ps));
177   // 3-byte UTF-8.
178   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xe2", 1, ps));
179   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x82", 1, ps));
180   ASSERT_EQ(1U, mbrtoc16(&out, "\xac" "def", 4, ps));
181   ASSERT_EQ(static_cast<char16_t>(0x20ac), out);
182   ASSERT_TRUE(mbsinit(ps));
183   // 4-byte UTF-8.
184   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xf4", 1, ps));
185   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\x8a\xaf", 2, ps));
186   ASSERT_EQ(static_cast<size_t>(-3), mbrtoc16(&out, "\x8d" "ef", 3, ps));
187   ASSERT_EQ(static_cast<char16_t>(0xdbea), out);
188   ASSERT_EQ(1U, mbrtoc16(&out, "\x80" "ef", 3, ps));
189   ASSERT_EQ(static_cast<char16_t>(0xdfcd), out);
190   ASSERT_TRUE(mbsinit(ps));
191 
192   // Invalid 2-byte
193   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc16(&out, "\xc2", 1, ps));
194   errno = 0;
195   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc16(&out, "\x20" "cdef", 5, ps));
196   ASSERT_EQ(EILSEQ, errno);
197 }
198 
TEST(uchar,mbrtoc16_incomplete)199 TEST(uchar, mbrtoc16_incomplete) {
200   mbstate_t ps;
201   memset(&ps, 0, sizeof(ps));
202 
203   test_mbrtoc16_incomplete(&ps);
204   test_mbrtoc16_incomplete(nullptr);
205 }
206 
TEST(uchar,c32rtomb)207 TEST(uchar, c32rtomb) {
208   EXPECT_EQ(1U, c32rtomb(nullptr, L'\0', nullptr));
209   EXPECT_EQ(1U, c32rtomb(nullptr, L'h', nullptr));
210 
211   char bytes[MB_LEN_MAX];
212 
213   memset(bytes, 1, sizeof(bytes));
214   EXPECT_EQ(1U, c32rtomb(bytes, L'\0', nullptr));
215   EXPECT_EQ('\0', bytes[0]);
216   EXPECT_EQ('\x01', bytes[1]);
217 
218   memset(bytes, 0, sizeof(bytes));
219   EXPECT_EQ(1U, c32rtomb(bytes, L'h', nullptr));
220   EXPECT_EQ('h', bytes[0]);
221 
222   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
223   uselocale(LC_GLOBAL_LOCALE);
224 
225   // 1-byte UTF-8.
226   memset(bytes, 0, sizeof(bytes));
227   EXPECT_EQ(1U, c32rtomb(bytes, L'h', nullptr));
228   EXPECT_EQ('h', bytes[0]);
229   // 2-byte UTF-8.
230   memset(bytes, 0, sizeof(bytes));
231   EXPECT_EQ(2U, c32rtomb(bytes, 0x00a2, nullptr));
232   EXPECT_EQ('\xc2', bytes[0]);
233   EXPECT_EQ('\xa2', bytes[1]);
234   // 3-byte UTF-8.
235   memset(bytes, 0, sizeof(bytes));
236   EXPECT_EQ(3U, c32rtomb(bytes, 0x20ac, nullptr));
237   EXPECT_EQ('\xe2', bytes[0]);
238   EXPECT_EQ('\x82', bytes[1]);
239   EXPECT_EQ('\xac', bytes[2]);
240   // 4-byte UTF-8.
241   memset(bytes, 0, sizeof(bytes));
242   EXPECT_EQ(4U, c32rtomb(bytes, 0x24b62, nullptr));
243   EXPECT_EQ('\xf0', bytes[0]);
244   EXPECT_EQ('\xa4', bytes[1]);
245   EXPECT_EQ('\xad', bytes[2]);
246   EXPECT_EQ('\xa2', bytes[3]);
247   // Invalid code point.
248   errno = 0;
249   EXPECT_EQ(static_cast<size_t>(-1), c32rtomb(bytes, 0xffffffff, nullptr));
250   EXPECT_EQ(EILSEQ, errno);
251 }
252 
TEST(uchar,mbrtoc32_valid_non_characters)253 TEST(uchar, mbrtoc32_valid_non_characters) {
254   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
255   uselocale(LC_GLOBAL_LOCALE);
256 
257   char32_t out[8] = {};
258   ASSERT_EQ(3U, mbrtoc32(out, "\xef\xbf\xbe", 3, nullptr));
259   ASSERT_EQ(0xfffeU, out[0]);
260   ASSERT_EQ(3U, mbrtoc32(out, "\xef\xbf\xbf", 3, nullptr));
261   ASSERT_EQ(0xffffU, out[0]);
262 }
263 
TEST(uchar,mbrtoc32_out_of_range)264 TEST(uchar, mbrtoc32_out_of_range) {
265   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
266   uselocale(LC_GLOBAL_LOCALE);
267 
268   char32_t out[8] = {};
269   errno = 0;
270   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf5\x80\x80\x80", 4, nullptr));
271   ASSERT_EQ(EILSEQ, errno);
272 }
273 
TEST(uchar,mbrtoc32)274 TEST(uchar, mbrtoc32) {
275   char32_t out[8];
276 
277   out[0] = L'x';
278   ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, nullptr));
279   ASSERT_EQ(static_cast<char32_t>(L'x'), out[0]);
280 
281   ASSERT_EQ(0U, mbrtoc32(out, "hello", 0, nullptr));
282   ASSERT_EQ(0U, mbrtoc32(out, "", 0, nullptr));
283   ASSERT_EQ(1U, mbrtoc32(out, "hello", 1, nullptr));
284   ASSERT_EQ(static_cast<char32_t>(L'h'), out[0]);
285 
286   ASSERT_EQ(0U, mbrtoc32(nullptr, "hello", 0, nullptr));
287   ASSERT_EQ(0U, mbrtoc32(nullptr, "", 0, nullptr));
288   ASSERT_EQ(1U, mbrtoc32(nullptr, "hello", 1, nullptr));
289 
290   ASSERT_EQ(0U, mbrtoc32(nullptr, nullptr, 0, nullptr));
291 
292   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
293   uselocale(LC_GLOBAL_LOCALE);
294 
295   // 1-byte UTF-8.
296   ASSERT_EQ(1U, mbrtoc32(out, "abcdef", 6, nullptr));
297   ASSERT_EQ(static_cast<char32_t>(L'a'), out[0]);
298   // 2-byte UTF-8.
299   ASSERT_EQ(2U, mbrtoc32(out, "\xc2\xa2" "cdef", 6, nullptr));
300   ASSERT_EQ(static_cast<char32_t>(0x00a2), out[0]);
301   // 3-byte UTF-8.
302   ASSERT_EQ(3U, mbrtoc32(out, "\xe2\x82\xac" "def", 6, nullptr));
303   ASSERT_EQ(static_cast<char32_t>(0x20ac), out[0]);
304   // 4-byte UTF-8.
305   ASSERT_EQ(4U, mbrtoc32(out, "\xf0\xa4\xad\xa2" "ef", 6, nullptr));
306   ASSERT_EQ(static_cast<char32_t>(0x24b62), out[0]);
307 #if defined(__BIONIC__) // glibc allows this.
308   // Illegal 5-byte UTF-8.
309   errno = 0;
310   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf8\xa1\xa2\xa3\xa4" "f", 6, nullptr));
311   ASSERT_EQ(EILSEQ, errno);
312 #endif
313   // Illegal over-long sequence.
314   errno = 0;
315   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(out, "\xf0\x82\x82\xac" "ef", 6, nullptr));
316   ASSERT_EQ(EILSEQ, errno);
317 }
318 
test_mbrtoc32_incomplete(mbstate_t * ps)319 void test_mbrtoc32_incomplete(mbstate_t* ps) {
320   ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
321   uselocale(LC_GLOBAL_LOCALE);
322 
323   char32_t out;
324   // 2-byte UTF-8.
325   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps));
326   ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "cdef", 5, ps));
327   ASSERT_EQ(static_cast<char32_t>(0x00a2), out);
328   ASSERT_TRUE(mbsinit(ps));
329   // 3-byte UTF-8.
330   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xe2", 1, ps));
331   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\x82", 1, ps));
332   ASSERT_EQ(1U, mbrtoc32(&out, "\xac" "def", 4, ps));
333   ASSERT_EQ(static_cast<char32_t>(0x20ac), out);
334   ASSERT_TRUE(mbsinit(ps));
335   // 4-byte UTF-8.
336   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xf0", 1, ps));
337   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xa4\xad", 2, ps));
338   ASSERT_EQ(1U, mbrtoc32(&out, "\xa2" "ef", 3, ps));
339   ASSERT_EQ(static_cast<char32_t>(0x24b62), out);
340   ASSERT_TRUE(mbsinit(ps));
341 
342   // Invalid 2-byte
343   ASSERT_EQ(static_cast<size_t>(-2), mbrtoc32(&out, "\xc2", 1, ps));
344   errno = 0;
345   ASSERT_EQ(static_cast<size_t>(-1), mbrtoc32(&out, "\x20" "cdef", 5, ps));
346   ASSERT_EQ(EILSEQ, errno);
347 }
348 
TEST(uchar,mbrtoc32_incomplete)349 TEST(uchar, mbrtoc32_incomplete) {
350   mbstate_t ps;
351   memset(&ps, 0, sizeof(ps));
352 
353   test_mbrtoc32_incomplete(&ps);
354   test_mbrtoc32_incomplete(nullptr);
355 }
356