1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1998-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*
9 * File test.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/01/2000 Madhu Creation
15 *******************************************************************************
16 */
17
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/utf16.h"
21 #include "unicode/utf_old.h"
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "cintltst.h"
25 #include <stdio.h>
26
27 // Obsolete macro from obsolete unicode/utf_old.h, for some old test data.
28 #ifndef UTF_ERROR_VALUE
29 # define UTF_ERROR_VALUE 0xffff
30 #endif
31
32 #if !U_HIDE_OBSOLETE_UTF_OLD_H
printUChars(const UChar * uchars)33 static void printUChars(const UChar *uchars) {
34 int16_t i=0;
35 for(i=0; i<u_strlen(uchars); i++) {
36 printf("%x ", *(uchars+i));
37 }
38 }
39 #endif
40
41 static void TestCodeUnitValues(void);
42 static void TestCharLength(void);
43 static void TestGetChar(void);
44 static void TestNextPrevChar(void);
45 static void TestNulTerminated(void);
46 static void TestFwdBack(void);
47 static void TestSetChar(void);
48 static void TestAppendChar(void);
49 static void TestAppend(void);
50 static void TestSurrogate(void);
51
52 void addUTF16Test(TestNode** root);
53
54 void
addUTF16Test(TestNode ** root)55 addUTF16Test(TestNode** root)
56 {
57 addTest(root, &TestCodeUnitValues, "utf16tst/TestCodeUnitValues");
58 addTest(root, &TestCharLength, "utf16tst/TestCharLength");
59 addTest(root, &TestGetChar, "utf16tst/TestGetChar");
60 addTest(root, &TestNextPrevChar, "utf16tst/TestNextPrevChar");
61 addTest(root, &TestNulTerminated, "utf16tst/TestNulTerminated");
62 addTest(root, &TestFwdBack, "utf16tst/TestFwdBack");
63 addTest(root, &TestSetChar, "utf16tst/TestSetChar");
64 addTest(root, &TestAppendChar, "utf16tst/TestAppendChar");
65 addTest(root, &TestAppend, "utf16tst/TestAppend");
66 addTest(root, &TestSurrogate, "utf16tst/TestSurrogate");
67 }
68
TestCodeUnitValues()69 static void TestCodeUnitValues()
70 {
71 static uint16_t codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
72
73 int16_t i;
74 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
75 UChar c=codeunit[i];
76 log_verbose("Testing code unit value of %x\n", c);
77 if(i<4){
78 if(
79 #if !U_HIDE_OBSOLETE_UTF_OLD_H
80 !UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || UTF16_IS_TRAIL(c) ||
81 #endif
82 !U16_IS_SINGLE(c) || U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
83 log_err("ERROR: %x is a single character\n", c);
84 }
85 }
86 if(i >= 4 && i< 8){
87 if(
88 #if !U_HIDE_OBSOLETE_UTF_OLD_H
89 !UTF16_IS_LEAD(c) || UTF16_IS_SINGLE(c) || UTF16_IS_TRAIL(c) ||
90 #endif
91 !U16_IS_LEAD(c) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c)){
92 log_err("ERROR: %x is a first surrogate\n", c);
93 }
94 }
95 if(i >= 8 && i< 12){
96 if(
97 #if !U_HIDE_OBSOLETE_UTF_OLD_H
98 !UTF16_IS_TRAIL(c) || UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) ||
99 #endif
100 !U16_IS_TRAIL(c) || U16_IS_SINGLE(c) || U16_IS_LEAD(c)) {
101 log_err("ERROR: %x is a second surrogate\n", c);
102 }
103 }
104 }
105 }
106
TestCharLength()107 static void TestCharLength()
108 {
109 static uint32_t codepoint[]={
110 1, 0x0061,
111 1, 0xe065,
112 1, 0x20ac,
113 2, 0x20402,
114 2, 0x23456,
115 2, 0x24506,
116 2, 0x20402,
117 2, 0x10402,
118 1, 0xd7ff,
119 1, 0xe000
120 };
121
122 int16_t i;
123 #if !U_HIDE_OBSOLETE_UTF_OLD_H
124 UBool multiple;
125 #endif
126 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
127 UChar32 c=codepoint[i+1];
128 if(
129 #if !U_HIDE_OBSOLETE_UTF_OLD_H
130 UTF16_CHAR_LENGTH(c) != (uint16_t)codepoint[i] ||
131 #endif
132 U16_LENGTH(c) != (uint16_t)codepoint[i]) {
133 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
134 }else{
135 log_verbose("The no: of code units for %lx is %d\n",c, U16_LENGTH(c));
136 }
137 #if !U_HIDE_OBSOLETE_UTF_OLD_H
138 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
139 if(UTF16_NEED_MULTIPLE_UCHAR(c) != multiple){
140 log_err("ERROR: UTF16_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
141 }
142 #endif
143 }
144 }
145
TestGetChar()146 static void TestGetChar()
147 {
148 static UChar input[]={
149 /* code unit,*/
150 0xdc00,
151 0x20ac,
152 0xd841,
153 0x61,
154 0xd841,
155 0xdc02,
156 0xd842,
157 0xdc06,
158 0,
159 0xd842,
160 0xd7ff,
161 0xdc41,
162 0xe000,
163 0xd800
164 };
165 static UChar32 result[]={
166 /*codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict)*/
167 (UChar32)0xfca10000, 0xdc00, UTF_ERROR_VALUE,
168 0x20ac, 0x20ac, 0x20ac,
169 0x12861, 0xd841, UTF_ERROR_VALUE,
170 0x61, 0x61, 0x61,
171 0x20402, 0x20402, 0x20402,
172 0x20402, 0x20402, 0x20402,
173 0x20806, 0x20806, 0x20806,
174 0x20806, 0x20806, 0x20806,
175 0x00, 0x00, 0x00,
176 0x203ff, 0xd842, UTF_ERROR_VALUE,
177 0xd7ff, 0xd7ff, 0xd7ff,
178 0xfc41, 0xdc41, UTF_ERROR_VALUE,
179 0xe000, 0xe000, 0xe000,
180 0x11734, 0xd800, UTF_ERROR_VALUE
181 };
182 uint16_t i=0;
183 UChar32 c, expected;
184 uint16_t offset=0;
185 for(offset=0; offset<UPRV_LENGTHOF(input); offset++) {
186 if(0<offset && offset<UPRV_LENGTHOF(input)-1){
187 #if !U_HIDE_OBSOLETE_UTF_OLD_H
188 UTF16_GET_CHAR_UNSAFE(input, offset, c);
189 if(c != result[i]){
190 log_err("ERROR: UTF16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
191 }
192 #endif
193 U16_GET_UNSAFE(input, offset, c);
194 if(c != result[i]){
195 log_err("ERROR: U16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
196 }
197 }
198 expected=result[i+1];
199 #if !U_HIDE_OBSOLETE_UTF_OLD_H
200 UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, FALSE);
201 if(c != expected) {
202 log_err("ERROR: UTF16_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
203 }
204 #endif
205 U16_GET(input, 0, offset, UPRV_LENGTHOF(input), c);
206 if(c != expected) {
207 log_err("ERROR: U16_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
208 }
209
210 U16_GET_OR_FFFD(input, 0, offset, UPRV_LENGTHOF(input), c);
211 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
212 if(c != expected) {
213 log_err("ERROR: U16_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
214 }
215 #if !U_HIDE_OBSOLETE_UTF_OLD_H
216 UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, TRUE);
217 if(c != result[i+2]){
218 log_err("ERROR: UTF16_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
219 }
220 #endif
221 i=(uint16_t)(i+3);
222 }
223 }
224
TestNextPrevChar()225 static void TestNextPrevChar(){
226
227 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
228 static UChar32 result[]={
229 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
230 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000,
231 0x10000, 0x10000, 0x10000, 0x120400, 0xdc00, UTF_ERROR_VALUE,
232 0xdc00, 0xdc00, UTF_ERROR_VALUE, 0x20441, 0x20441, 0x20441,
233 0x10ffff, 0x10ffff, 0x10ffff, 0xd841, 0xd841, UTF_ERROR_VALUE,
234 0xdfff, 0xdfff, UTF_ERROR_VALUE, 0xd7ff, 0xd7ff, 0xd7ff,
235 0x0062, 0x0062, 0x0062, 0xd841, 0xd841, UTF_ERROR_VALUE,
236 0x1ffff, 0xd841, UTF_ERROR_VALUE, 0x0062, 0x0062, 0x0062,
237 0xd7ff, 0xd7ff, 0xd7ff, 0x10ffff, 0x10ffff, 0x10ffff,
238 0x20441, 0x20441, 0x20441, 0xdbff, 0xdbff, UTF_ERROR_VALUE,
239 0xdc41, 0xdc41, UTF_ERROR_VALUE, 0x10000, 0x10000, 0x10000,
240 0xdc00, 0xdc00, UTF_ERROR_VALUE, 0xd800, 0xd800, UTF_ERROR_VALUE,
241 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061
242 };
243 static uint16_t movedOffset[]={
244 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
245 1, 1, 1, 11, 11, 11,
246 3, 3, 3, 9, 10 , 10,
247 3, 3, 3, 8, 8, 8,
248 5, 5, 4, 8, 8, 8,
249 5, 5, 5, 7, 7, 7,
250 6, 6, 6, 6, 6, 6,
251 8, 7, 7, 5, 5, 5,
252 8, 8, 8, 3, 3, 3,
253 10, 10, 10, 3, 3, 3,
254 10, 10, 10, 1, 1, 1,
255 11, 11, 11, 1, 1, 1,
256 12, 12, 12, 0, 0, 0,
257 };
258
259
260 UChar32 c=0x0000, expected;
261 uint16_t i=0;
262 uint16_t offset=0, setOffset=0;
263 for(offset=0; offset<UPRV_LENGTHOF(input); offset++){
264 setOffset=offset;
265 #if !U_HIDE_OBSOLETE_UTF_OLD_H
266 UTF16_NEXT_CHAR_UNSAFE(input, setOffset, c);
267 if(setOffset != movedOffset[i]){
268 log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
269 offset, movedOffset[i], setOffset);
270 }
271 if(c != result[i]){
272 log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
273 }
274 #endif
275 setOffset=offset;
276 U16_NEXT_UNSAFE(input, setOffset, c);
277 if(setOffset != movedOffset[i]){
278 log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
279 offset, movedOffset[i], setOffset);
280 }
281 if(c != result[i]){
282 log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
283 }
284 expected=result[i+1];
285 #if !U_HIDE_OBSOLETE_UTF_OLD_H
286 setOffset=offset;
287 UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, FALSE);
288 if(setOffset != movedOffset[i+1]){
289 log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
290 offset, movedOffset[i+1], setOffset);
291 }
292 if(c != expected) {
293 log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
294 }
295 #endif
296 setOffset=offset;
297 U16_NEXT(input, setOffset, UPRV_LENGTHOF(input), c);
298 if(setOffset != movedOffset[i+1]){
299 log_err("ERROR: U16_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
300 offset, movedOffset[i+1], setOffset);
301 }
302 if(c != expected){
303 log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
304 }
305
306 setOffset=offset;
307 U16_NEXT_OR_FFFD(input, setOffset, UPRV_LENGTHOF(input), c);
308 if(setOffset != movedOffset[i+1]){
309 log_err("ERROR: U16_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
310 offset, movedOffset[i+1], setOffset);
311 }
312 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
313 if(c != expected){
314 log_err("ERROR: U16_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
315 }
316 #if !U_HIDE_OBSOLETE_UTF_OLD_H
317 setOffset=offset;
318 UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, TRUE);
319 if(setOffset != movedOffset[i+1]){
320 log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
321 offset, movedOffset[i+2], setOffset);
322 }
323 if(c != result[i+2]){
324 log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
325 }
326 #endif
327 i=(uint16_t)(i+6);
328 }
329 i=0;
330 for(offset=(uint16_t)UPRV_LENGTHOF(input); offset > 0; --offset){
331 setOffset=offset;
332 #if !U_HIDE_OBSOLETE_UTF_OLD_H
333 UTF16_PREV_CHAR_UNSAFE(input, setOffset, c);
334 if(setOffset != movedOffset[i+3]){
335 log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
336 offset, movedOffset[i+3], setOffset);
337 }
338 if(c != result[i+3]){
339 log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
340 }
341 #endif
342 setOffset=offset;
343 U16_PREV_UNSAFE(input, setOffset, c);
344 if(setOffset != movedOffset[i+3]){
345 log_err("ERROR: U16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
346 offset, movedOffset[i+3], setOffset);
347 }
348 if(c != result[i+3]){
349 log_err("ERROR: U16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
350 }
351 #if !U_HIDE_OBSOLETE_UTF_OLD_H
352 setOffset=offset;
353 UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
354 if(setOffset != movedOffset[i+4]){
355 log_err("ERROR: UTF16_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
356 offset, movedOffset[i+4], setOffset);
357 }
358 if(c != result[i+4]){
359 log_err("ERROR: UTF16_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
360 }
361 #endif
362 setOffset=offset;
363 U16_PREV(input, 0, setOffset, c);
364 if(setOffset != movedOffset[i+4]){
365 log_err("ERROR: U16_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
366 offset, movedOffset[i+4], setOffset);
367 }
368 expected = result[i+4];
369 if(c != expected) {
370 log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
371 }
372
373 setOffset=offset;
374 U16_PREV_OR_FFFD(input, 0, setOffset, c);
375 if(setOffset != movedOffset[i+4]){
376 log_err("ERROR: U16_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
377 offset, movedOffset[i+4], setOffset);
378 }
379 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
380 if(c != expected) {
381 log_err("ERROR: U16_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
382 }
383 #if !U_HIDE_OBSOLETE_UTF_OLD_H
384 setOffset=offset;
385 UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE);
386 if(setOffset != movedOffset[i+5]){
387 log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
388 offset, movedOffset[i+5], setOffset);
389 }
390 if(c != result[i+5]){
391 log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
392 }
393 #endif
394 i=(uint16_t)(i+6);
395 }
396
397 }
398
399 /* keep this in sync with utf8tst.c's TestNulTerminated() */
TestNulTerminated()400 static void TestNulTerminated() {
401 static const UChar input[]={
402 /* 0 */ 0x61,
403 /* 1 */ 0xd801, 0xdc01,
404 /* 3 */ 0xdc01,
405 /* 4 */ 0x62,
406 /* 5 */ 0xd801,
407 /* 6 */ 0x00
408 /* 7 */
409 };
410 static const UChar32 result[]={
411 0x61,
412 0x10401,
413 0xdc01,
414 0x62,
415 0xd801,
416 0
417 };
418
419 UChar32 c, c2, expected;
420 int32_t i0, i=0, j, k, expectedIndex;
421 int32_t cpIndex=0;
422 do {
423 i0=i;
424 U16_NEXT(input, i, -1, c);
425 expected=result[cpIndex];
426 if(c!=expected) {
427 log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
428 }
429 j=i0;
430 U16_NEXT_OR_FFFD(input, j, -1, c);
431 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
432 if(c!=expected) {
433 log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
434 }
435 if(j!=i) {
436 log_err("U16_NEXT_OR_FFFD() moved to index %d but U16_NEXT() moved to %d\n", j, i);
437 }
438 j=i0;
439 U16_FWD_1(input, j, -1);
440 if(j!=i) {
441 log_err("U16_FWD_1() moved to index %d but U16_NEXT() moved to %d\n", j, i);
442 }
443 ++cpIndex;
444 /*
445 * Move by this many code points from the start.
446 * U16_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
447 */
448 expectedIndex= (c==0) ? i-1 : i;
449 k=0;
450 U16_FWD_N(input, k, -1, cpIndex);
451 if(k!=expectedIndex) {
452 log_err("U16_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
453 }
454 } while(c!=0);
455
456 i=0;
457 do {
458 j=i0=i;
459 U16_NEXT(input, i, -1, c);
460 do {
461 U16_GET(input, 0, j, -1, c2);
462 if(c2!=c) {
463 log_err("U16_NEXT(from %d)=U+%04x != U+%04x=U16_GET(at %d)\n", i0, c, c2, j);
464 }
465 U16_GET_OR_FFFD(input, 0, j, -1, c2);
466 expected= U_IS_SURROGATE(c) ? 0xfffd : c;
467 if(c2!=expected) {
468 log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U16_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
469 }
470 /* U16_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
471 k=j+1;
472 U16_SET_CP_LIMIT(input, 0, k, -1);
473 if(k!=i) {
474 log_err("U16_NEXT() moved to %d but U16_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
475 }
476 } while(++j<i);
477 } while(c!=0);
478 }
479
TestFwdBack()480 static void TestFwdBack(){
481 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
482 static uint16_t fwd_unsafe[] ={1, 3, 5, 6, 8, 10, 11, 12};
483 static uint16_t fwd_safe[] ={1, 3, 5, 6, 7, 8, 10, 11, 12};
484 static uint16_t back_unsafe[]={11, 9, 8, 7, 6, 5, 3, 1, 0};
485 static uint16_t back_safe[] ={11, 10, 8, 7, 6, 5, 3, 1, 0};
486
487 static uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1};
488 static uint16_t fwd_N_unsafe[] ={0, 1, 5, 10, 11};
489 static uint16_t fwd_N_safe[] ={0, 1, 5, 8, 10, 12, 12}; /*safe macro keeps it at the end of the string */
490 static uint16_t back_N_unsafe[]={12, 11, 8, 5, 3};
491 static uint16_t back_N_safe[] ={12, 11, 8, 5, 3, 0, 0};
492
493 uint16_t offunsafe=0, offsafe=0;
494 uint16_t i=0;
495 #if !U_HIDE_OBSOLETE_UTF_OLD_H
496 while(offunsafe < UPRV_LENGTHOF(input)){
497 UTF16_FWD_1_UNSAFE(input, offunsafe);
498 if(offunsafe != fwd_unsafe[i]){
499 log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
500 }
501 i++;
502 }
503 #endif
504 offunsafe=0, offsafe=0;
505 i=0;
506 while(offunsafe < UPRV_LENGTHOF(input)){
507 U16_FWD_1_UNSAFE(input, offunsafe);
508 if(offunsafe != fwd_unsafe[i]){
509 log_err("ERROR: U16_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
510 }
511 i++;
512 }
513 #if !U_HIDE_OBSOLETE_UTF_OLD_H
514 offunsafe=0, offsafe=0;
515 i=0;
516 while(offsafe < UPRV_LENGTHOF(input)){
517 UTF16_FWD_1_SAFE(input, offsafe, UPRV_LENGTHOF(input));
518 if(offsafe != fwd_safe[i]){
519 log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
520 }
521 i++;
522 }
523 #endif
524 offunsafe=0, offsafe=0;
525 i=0;
526 while(offsafe < UPRV_LENGTHOF(input)){
527 U16_FWD_1(input, offsafe, UPRV_LENGTHOF(input));
528 if(offsafe != fwd_safe[i]){
529 log_err("ERROR: U16_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
530 }
531 i++;
532 }
533 #if !U_HIDE_OBSOLETE_UTF_OLD_H
534 offunsafe=UPRV_LENGTHOF(input);
535 offsafe=UPRV_LENGTHOF(input);
536 i=0;
537 while(offunsafe > 0){
538 UTF16_BACK_1_UNSAFE(input, offunsafe);
539 if(offunsafe != back_unsafe[i]){
540 log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
541 }
542 i++;
543 }
544 #endif
545 offunsafe=UPRV_LENGTHOF(input);
546 offsafe=UPRV_LENGTHOF(input);
547 i=0;
548 while(offunsafe > 0){
549 U16_BACK_1_UNSAFE(input, offunsafe);
550 if(offunsafe != back_unsafe[i]){
551 log_err("ERROR: U16_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
552 }
553 i++;
554 }
555 #if !U_HIDE_OBSOLETE_UTF_OLD_H
556 offunsafe=UPRV_LENGTHOF(input);
557 offsafe=UPRV_LENGTHOF(input);
558 i=0;
559 while(offsafe > 0){
560 UTF16_BACK_1_SAFE(input,0, offsafe);
561 if(offsafe != back_safe[i]){
562 log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
563 }
564 i++;
565 }
566 #endif
567 offunsafe=UPRV_LENGTHOF(input);
568 offsafe=UPRV_LENGTHOF(input);
569 i=0;
570 while(offsafe > 0){
571 U16_BACK_1(input,0, offsafe);
572 if(offsafe != back_safe[i]){
573 log_err("ERROR: U16_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
574 }
575 i++;
576 }
577
578 offunsafe=0;
579 offsafe=0;
580 #if !U_HIDE_OBSOLETE_UTF_OLD_H
581 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/
582 UTF16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
583 if(offunsafe != fwd_N_unsafe[i]){
584 log_err("ERROR: Forward_N_unsafe offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
585 }
586 }
587 #endif
588 offunsafe=0;
589 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/
590 U16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
591 if(offunsafe != fwd_N_unsafe[i]){
592 log_err("ERROR: U16_FWD_N_UNSAFE offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
593 }
594 }
595 #if !U_HIDE_OBSOLETE_UTF_OLD_H
596 offsafe=0;
597 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
598 UTF16_FWD_N_SAFE(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]);
599 if(offsafe != fwd_N_safe[i]){
600 log_err("ERROR: Forward_N_safe offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
601 }
602 }
603 #endif
604 offsafe=0;
605 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
606 U16_FWD_N(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]);
607 if(offsafe != fwd_N_safe[i]){
608 log_err("ERROR: U16_FWD_N offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
609 }
610 }
611 #if !U_HIDE_OBSOLETE_UTF_OLD_H
612 offunsafe=UPRV_LENGTHOF(input);
613 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){
614 UTF16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
615 if(offunsafe != back_N_unsafe[i]){
616 log_err("ERROR: backward_N_unsafe offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
617 }
618 }
619 #endif
620 offunsafe=UPRV_LENGTHOF(input);
621 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){
622 U16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
623 if(offunsafe != back_N_unsafe[i]){
624 log_err("ERROR: U16_BACK_N_UNSAFE offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
625 }
626 }
627 #if !U_HIDE_OBSOLETE_UTF_OLD_H
628 offsafe=UPRV_LENGTHOF(input);
629 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
630 UTF16_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
631 if(offsafe != back_N_safe[i]){
632 log_err("ERROR: backward_N_safe offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
633 }
634 }
635 #endif
636 offsafe=UPRV_LENGTHOF(input);
637 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
638 U16_BACK_N(input, 0, offsafe, Nvalue[i]);
639 if(offsafe != back_N_safe[i]){
640 log_err("ERROR: U16_BACK_N offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
641 }
642 }
643 }
644
TestSetChar()645 static void TestSetChar(){
646 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
647 static uint16_t start_unsafe[]={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 9, 11};
648 static uint16_t start_safe[] ={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 10, 11};
649 static uint16_t limit_unsafe[]={0, 1, 3, 3, 5, 5, 6, 8, 8, 10, 10, 11};
650 static uint16_t limit_safe[] ={0, 1, 3, 3, 5, 5, 6, 7, 8, 10, 10, 11};
651
652 uint16_t i=0;
653 uint16_t offset=0, setOffset=0;
654 for(offset=0; offset<UPRV_LENGTHOF(input); offset++){
655 #if !U_HIDE_OBSOLETE_UTF_OLD_H
656 setOffset=offset;
657 UTF16_SET_CHAR_START_UNSAFE(input, setOffset);
658 if(setOffset != start_unsafe[i]){
659 log_err("ERROR: UTF16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
660 }
661 #endif
662 setOffset=offset;
663 U16_SET_CP_START_UNSAFE(input, setOffset);
664 if(setOffset != start_unsafe[i]){
665 log_err("ERROR: U16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
666 }
667 #if !U_HIDE_OBSOLETE_UTF_OLD_H
668 setOffset=offset;
669 UTF16_SET_CHAR_START_SAFE(input, 0, setOffset);
670 if(setOffset != start_safe[i]){
671 log_err("ERROR: UTF16_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
672 }
673 #endif
674 setOffset=offset;
675 U16_SET_CP_START(input, 0, setOffset);
676 if(setOffset != start_safe[i]){
677 log_err("ERROR: U16_SET_CHAR_START failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
678 }
679
680 if (offset > 0) {
681 #if !U_HIDE_OBSOLETE_UTF_OLD_H
682 setOffset=offset;
683 UTF16_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
684 if(setOffset != limit_unsafe[i]){
685 log_err("ERROR: UTF16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
686 }
687 #endif
688 setOffset=offset;
689 U16_SET_CP_LIMIT_UNSAFE(input, setOffset);
690 if(setOffset != limit_unsafe[i]){
691 log_err("ERROR: U16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
692 }
693 }
694
695 setOffset=offset;
696 U16_SET_CP_LIMIT(input,0, setOffset, UPRV_LENGTHOF(input));
697 if(setOffset != limit_safe[i]){
698 log_err("ERROR: U16_SET_CHAR_LIMIT failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_safe[i], setOffset);
699 }
700
701 i++;
702 }
703 }
704
TestAppendChar()705 static void TestAppendChar(){
706 #if !U_HIDE_OBSOLETE_UTF_OLD_H
707 static UChar s[5]={0x0061, 0x0062, 0x0063, 0x0064, 0x0000};
708 static uint32_t test[]={
709 /*append-position(unsafe), CHAR to be appended */
710 0, 0x20441,
711 2, 0x0028,
712 2, 0xdc00,
713 3, 0xd800,
714 1, 0x20402,
715
716 /*append-position(safe), CHAR to be appended */
717 0, 0x20441,
718 2, 0xdc00,
719 3, 0xd800,
720 1, 0x20402,
721 3, 0x20402,
722 3, 0x10402,
723 2, 0x10402,
724
725 };
726 static uint16_t movedOffset[]={
727 /*offset-moved-to(unsafe)*/
728 2, /*for append-pos: 0 , CHAR 0x20441*/
729 3,
730 3,
731 4,
732 3,
733 /*offse-moved-to(safe)*/
734 2, /*for append-pos: 0, CHAR 0x20441*/
735 3,
736 4,
737 3,
738 4,
739 4,
740 4
741 };
742
743 static UChar result[][5]={
744 /*unsafe*/
745 {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000},
746 {0x0061, 0x0062, 0x0028, 0x0064, 0x0000},
747 {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000},
748 {0x0061, 0x0062, 0x0063, 0xd800, 0x0000},
749 {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000},
750
751 /*safe*/
752 {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000},
753 {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000},
754 {0x0061, 0x0062, 0x0063, 0xd800, 0x0000},
755 {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000},
756 {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000},
757 {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000},
758 {0x0061, 0x0062, 0xd801, 0xdc02, 0x0000},
759
760
761 };
762 uint16_t i, count=0;
763 UChar *str=(UChar*)malloc(sizeof(UChar) * (u_strlen(s)+1));
764 uint16_t offset;
765 for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){
766 if(count<5){
767 u_strcpy(str, s);
768 offset=(uint16_t)test[i];
769 UTF16_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
770 if(offset != movedOffset[count]){
771 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
772 count, movedOffset[count], offset);
773
774 }
775 if(u_strcmp(str, result[count]) !=0){
776 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed for count=%d. Expected:", count);
777 printUChars(result[count]);
778 printf("\nGot:");
779 printUChars(str);
780 printf("\n");
781 }
782 }else{
783 u_strcpy(str, s);
784 offset=(uint16_t)test[i];
785 UTF16_APPEND_CHAR_SAFE(str, offset, (uint16_t)u_strlen(str), test[i+1]);
786 if(offset != movedOffset[count]){
787 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
788 count, movedOffset[count], offset);
789
790 }
791 if(u_strcmp(str, result[count]) !=0){
792 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed for count=%d. Expected:", count);
793 printUChars(result[count]);
794 printf("\nGot:");
795 printUChars(str);
796 printf("\n");
797 }
798 }
799 count++;
800 }
801 free(str);
802 #endif
803 }
804
TestAppend()805 static void TestAppend() {
806 static const UChar32 codePoints[]={
807 0x61, 0xdf, 0x901, 0x3040,
808 0xac00, 0xd800, 0xdbff, 0xdcde,
809 0xdffd, 0xe000, 0xffff, 0x10000,
810 0x12345, 0xe0021, 0x10ffff, 0x110000,
811 0x234567, 0x7fffffff, -1, -1000,
812 0, 0x400
813 };
814 static const UChar expectUnsafe[]={
815 0x61, 0xdf, 0x901, 0x3040,
816 0xac00, 0xd800, 0xdbff, 0xdcde,
817 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
818 0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
819 /* none from this line */
820 0, 0x400
821 }, expectSafe[]={
822 0x61, 0xdf, 0x901, 0x3040,
823 0xac00, 0xd800, 0xdbff, 0xdcde,
824 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
825 0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
826 /* none from this line */
827 0, 0x400
828 };
829
830 UChar buffer[100];
831 UChar32 c;
832 int32_t i, length;
833 UBool isError, expectIsError, wrongIsError;
834
835 length=0;
836 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
837 c=codePoints[i];
838 if(c<0 || 0x10ffff<c) {
839 continue; /* skip non-code points for U16_APPEND_UNSAFE */
840 }
841
842 U16_APPEND_UNSAFE(buffer, length, c);
843 }
844 if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) {
845 log_err("U16_APPEND_UNSAFE did not generate the expected output\n");
846 }
847
848 length=0;
849 wrongIsError=FALSE;
850 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
851 c=codePoints[i];
852 expectIsError= c<0 || 0x10ffff<c; /* || U_IS_SURROGATE(c); */ /* surrogates in UTF-32 shouldn't be used, but it's okay to pass them around internally. */
853 isError=FALSE;
854
855 U16_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError);
856 wrongIsError|= isError!=expectIsError;
857 }
858 if(wrongIsError) {
859 log_err("U16_APPEND did not set isError correctly\n");
860 }
861 if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) {
862 log_err("U16_APPEND did not generate the expected output\n");
863 }
864 }
865
TestSurrogate()866 static void TestSurrogate(){
867 static UChar32 s[] = {0x10000, 0x10ffff, 0x50000, 0x100000, 0x1abcd};
868 int i = 0;
869 while (i < 5) {
870 UChar first = U16_LEAD(s[i]);
871 UChar second = U16_TRAIL(s[i]);
872 /* algorithm from the Unicode consortium */
873 UChar firstresult = (UChar)(((s[i] - 0x10000) / 0x400) + 0xD800);
874 UChar secondresult = (UChar)(((s[i] - 0x10000) % 0x400) + 0xDC00);
875
876 if (
877 #if !U_HIDE_OBSOLETE_UTF_OLD_H
878 first != UTF16_LEAD(s[i]) || first != UTF_FIRST_SURROGATE(s[i]) ||
879 #endif
880 first != firstresult) {
881 log_err("Failure in first surrogate in 0x%x expected to be 0x%x\n",
882 s[i], firstresult);
883 }
884 if (
885 #if !U_HIDE_OBSOLETE_UTF_OLD_H
886 second != UTF16_TRAIL(s[i]) || second != UTF_SECOND_SURROGATE(s[i]) ||
887 #endif
888 second != secondresult) {
889 log_err("Failure in second surrogate in 0x%x expected to be 0x%x\n",
890 s[i], secondresult);
891 }
892 i ++;
893 }
894 }
895