1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1998-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*
9 * File test.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/01/2000 Madhu Creation
15 *******************************************************************************
16 */
17
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/utf16.h"
21 #include "unicode/utf_old.h"
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "cintltst.h"
25 #include <stdbool.h>
26 #include <stdio.h>
27
28 // Obsolete macro from obsolete unicode/utf_old.h, for some old test data.
29 #ifndef UTF_ERROR_VALUE
30 # define UTF_ERROR_VALUE 0xffff
31 #endif
32
33 #if !U_HIDE_OBSOLETE_UTF_OLD_H
printUChars(const UChar * uchars)34 static void printUChars(const UChar *uchars) {
35 int16_t i=0;
36 for(i=0; i<u_strlen(uchars); i++) {
37 printf("%x ", *(uchars+i));
38 }
39 }
40 #endif
41
42 static void TestCodeUnitValues(void);
43 static void TestCharLength(void);
44 static void TestGetChar(void);
45 static void TestNextPrevChar(void);
46 static void TestNulTerminated(void);
47 static void TestFwdBack(void);
48 static void TestSetChar(void);
49 static void TestAppendChar(void);
50 static void TestAppend(void);
51 static void TestSurrogate(void);
52
53 void addUTF16Test(TestNode** root);
54
55 void
addUTF16Test(TestNode ** root)56 addUTF16Test(TestNode** root)
57 {
58 addTest(root, &TestCodeUnitValues, "utf16tst/TestCodeUnitValues");
59 addTest(root, &TestCharLength, "utf16tst/TestCharLength");
60 addTest(root, &TestGetChar, "utf16tst/TestGetChar");
61 addTest(root, &TestNextPrevChar, "utf16tst/TestNextPrevChar");
62 addTest(root, &TestNulTerminated, "utf16tst/TestNulTerminated");
63 addTest(root, &TestFwdBack, "utf16tst/TestFwdBack");
64 addTest(root, &TestSetChar, "utf16tst/TestSetChar");
65 addTest(root, &TestAppendChar, "utf16tst/TestAppendChar");
66 addTest(root, &TestAppend, "utf16tst/TestAppend");
67 addTest(root, &TestSurrogate, "utf16tst/TestSurrogate");
68 }
69
TestCodeUnitValues()70 static void TestCodeUnitValues()
71 {
72 static uint16_t codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
73
74 int16_t i;
75 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
76 UChar c=codeunit[i];
77 log_verbose("Testing code unit value of %x\n", c);
78 if(i<4){
79 if(
80 #if !U_HIDE_OBSOLETE_UTF_OLD_H
81 !UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || UTF16_IS_TRAIL(c) ||
82 #endif
83 !U16_IS_SINGLE(c) || U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
84 log_err("ERROR: %x is a single character\n", c);
85 }
86 }
87 if(i >= 4 && i< 8){
88 if(
89 #if !U_HIDE_OBSOLETE_UTF_OLD_H
90 !UTF16_IS_LEAD(c) || UTF16_IS_SINGLE(c) || UTF16_IS_TRAIL(c) ||
91 #endif
92 !U16_IS_LEAD(c) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c)){
93 log_err("ERROR: %x is a first surrogate\n", c);
94 }
95 }
96 if(i >= 8 && i< 12){
97 if(
98 #if !U_HIDE_OBSOLETE_UTF_OLD_H
99 !UTF16_IS_TRAIL(c) || UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) ||
100 #endif
101 !U16_IS_TRAIL(c) || U16_IS_SINGLE(c) || U16_IS_LEAD(c)) {
102 log_err("ERROR: %x is a second surrogate\n", c);
103 }
104 }
105 }
106 }
107
TestCharLength()108 static void TestCharLength()
109 {
110 static uint32_t codepoint[]={
111 1, 0x0061,
112 1, 0xe065,
113 1, 0x20ac,
114 2, 0x20402,
115 2, 0x23456,
116 2, 0x24506,
117 2, 0x20402,
118 2, 0x10402,
119 1, 0xd7ff,
120 1, 0xe000
121 };
122
123 int16_t i;
124 #if !U_HIDE_OBSOLETE_UTF_OLD_H
125 UBool multiple;
126 #endif
127 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
128 UChar32 c=codepoint[i+1];
129 if(
130 #if !U_HIDE_OBSOLETE_UTF_OLD_H
131 UTF16_CHAR_LENGTH(c) != (uint16_t)codepoint[i] ||
132 #endif
133 U16_LENGTH(c) != (uint16_t)codepoint[i]) {
134 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
135 }else{
136 log_verbose("The no: of code units for %lx is %d\n",c, U16_LENGTH(c));
137 }
138 #if !U_HIDE_OBSOLETE_UTF_OLD_H
139 multiple=(UBool)(codepoint[i] == 1 ? false : true);
140 if(UTF16_NEED_MULTIPLE_UCHAR(c) != multiple){
141 log_err("ERROR: UTF16_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
142 }
143 #endif
144 }
145 }
146
TestGetChar()147 static void TestGetChar()
148 {
149 static UChar input[]={
150 /* code unit,*/
151 0xdc00,
152 0x20ac,
153 0xd841,
154 0x61,
155 0xd841,
156 0xdc02,
157 0xd842,
158 0xdc06,
159 0,
160 0xd842,
161 0xd7ff,
162 0xdc41,
163 0xe000,
164 0xd800
165 };
166 static UChar32 result[]={
167 /*codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict)*/
168 (UChar32)0xfca10000, 0xdc00, UTF_ERROR_VALUE,
169 0x20ac, 0x20ac, 0x20ac,
170 0x12861, 0xd841, UTF_ERROR_VALUE,
171 0x61, 0x61, 0x61,
172 0x20402, 0x20402, 0x20402,
173 0x20402, 0x20402, 0x20402,
174 0x20806, 0x20806, 0x20806,
175 0x20806, 0x20806, 0x20806,
176 0x00, 0x00, 0x00,
177 0x203ff, 0xd842, UTF_ERROR_VALUE,
178 0xd7ff, 0xd7ff, 0xd7ff,
179 0xfc41, 0xdc41, UTF_ERROR_VALUE,
180 0xe000, 0xe000, 0xe000,
181 0x11734, 0xd800, UTF_ERROR_VALUE
182 };
183 uint16_t i=0;
184 UChar32 c, expected;
185 uint16_t offset=0;
186 for(offset=0; offset<UPRV_LENGTHOF(input); offset++) {
187 if(0<offset && offset<UPRV_LENGTHOF(input)-1){
188 #if !U_HIDE_OBSOLETE_UTF_OLD_H
189 UTF16_GET_CHAR_UNSAFE(input, offset, c);
190 if(c != result[i]){
191 log_err("ERROR: UTF16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
192 }
193 #endif
194 U16_GET_UNSAFE(input, offset, c);
195 if(c != result[i]){
196 log_err("ERROR: U16_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
197 }
198 }
199 expected=result[i+1];
200 #if !U_HIDE_OBSOLETE_UTF_OLD_H
201 UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, false);
202 if(c != expected) {
203 log_err("ERROR: UTF16_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
204 }
205 #endif
206 U16_GET(input, 0, offset, UPRV_LENGTHOF(input), c);
207 if(c != expected) {
208 log_err("ERROR: U16_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
209 }
210
211 U16_GET_OR_FFFD(input, 0, offset, UPRV_LENGTHOF(input), c);
212 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
213 if(c != expected) {
214 log_err("ERROR: U16_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
215 }
216 #if !U_HIDE_OBSOLETE_UTF_OLD_H
217 UTF16_GET_CHAR_SAFE(input, 0, offset, UPRV_LENGTHOF(input), c, true);
218 if(c != result[i+2]){
219 log_err("ERROR: UTF16_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
220 }
221 #endif
222 i=(uint16_t)(i+3);
223 }
224 }
225
TestNextPrevChar()226 static void TestNextPrevChar(){
227
228 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
229 static UChar32 result[]={
230 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
231 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000,
232 0x10000, 0x10000, 0x10000, 0x120400, 0xdc00, UTF_ERROR_VALUE,
233 0xdc00, 0xdc00, UTF_ERROR_VALUE, 0x20441, 0x20441, 0x20441,
234 0x10ffff, 0x10ffff, 0x10ffff, 0xd841, 0xd841, UTF_ERROR_VALUE,
235 0xdfff, 0xdfff, UTF_ERROR_VALUE, 0xd7ff, 0xd7ff, 0xd7ff,
236 0x0062, 0x0062, 0x0062, 0xd841, 0xd841, UTF_ERROR_VALUE,
237 0x1ffff, 0xd841, UTF_ERROR_VALUE, 0x0062, 0x0062, 0x0062,
238 0xd7ff, 0xd7ff, 0xd7ff, 0x10ffff, 0x10ffff, 0x10ffff,
239 0x20441, 0x20441, 0x20441, 0xdbff, 0xdbff, UTF_ERROR_VALUE,
240 0xdc41, 0xdc41, UTF_ERROR_VALUE, 0x10000, 0x10000, 0x10000,
241 0xdc00, 0xdc00, UTF_ERROR_VALUE, 0xd800, 0xd800, UTF_ERROR_VALUE,
242 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061
243 };
244 static uint16_t movedOffset[]={
245 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
246 1, 1, 1, 11, 11, 11,
247 3, 3, 3, 9, 10 , 10,
248 3, 3, 3, 8, 8, 8,
249 5, 5, 4, 8, 8, 8,
250 5, 5, 5, 7, 7, 7,
251 6, 6, 6, 6, 6, 6,
252 8, 7, 7, 5, 5, 5,
253 8, 8, 8, 3, 3, 3,
254 10, 10, 10, 3, 3, 3,
255 10, 10, 10, 1, 1, 1,
256 11, 11, 11, 1, 1, 1,
257 12, 12, 12, 0, 0, 0,
258 };
259
260
261 UChar32 c=0x0000, expected;
262 uint16_t i=0;
263 uint16_t offset=0, setOffset=0;
264 for(offset=0; offset<UPRV_LENGTHOF(input); offset++){
265 setOffset=offset;
266 #if !U_HIDE_OBSOLETE_UTF_OLD_H
267 UTF16_NEXT_CHAR_UNSAFE(input, setOffset, c);
268 if(setOffset != movedOffset[i]){
269 log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
270 offset, movedOffset[i], setOffset);
271 }
272 if(c != result[i]){
273 log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
274 }
275 #endif
276 setOffset=offset;
277 U16_NEXT_UNSAFE(input, setOffset, c);
278 if(setOffset != movedOffset[i]){
279 log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
280 offset, movedOffset[i], setOffset);
281 }
282 if(c != result[i]){
283 log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
284 }
285 expected=result[i+1];
286 #if !U_HIDE_OBSOLETE_UTF_OLD_H
287 setOffset=offset;
288 UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, false);
289 if(setOffset != movedOffset[i+1]){
290 log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
291 offset, movedOffset[i+1], setOffset);
292 }
293 if(c != expected) {
294 log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
295 }
296 #endif
297 setOffset=offset;
298 U16_NEXT(input, setOffset, UPRV_LENGTHOF(input), c);
299 if(setOffset != movedOffset[i+1]){
300 log_err("ERROR: U16_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
301 offset, movedOffset[i+1], setOffset);
302 }
303 if(c != expected){
304 log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
305 }
306
307 setOffset=offset;
308 U16_NEXT_OR_FFFD(input, setOffset, UPRV_LENGTHOF(input), c);
309 if(setOffset != movedOffset[i+1]){
310 log_err("ERROR: U16_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
311 offset, movedOffset[i+1], setOffset);
312 }
313 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
314 if(c != expected){
315 log_err("ERROR: U16_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
316 }
317 #if !U_HIDE_OBSOLETE_UTF_OLD_H
318 setOffset=offset;
319 UTF16_NEXT_CHAR_SAFE(input, setOffset, UPRV_LENGTHOF(input), c, true);
320 if(setOffset != movedOffset[i+1]){
321 log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
322 offset, movedOffset[i+2], setOffset);
323 }
324 if(c != result[i+2]){
325 log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
326 }
327 #endif
328 i=(uint16_t)(i+6);
329 }
330 i=0;
331 for(offset=(uint16_t)UPRV_LENGTHOF(input); offset > 0; --offset){
332 setOffset=offset;
333 #if !U_HIDE_OBSOLETE_UTF_OLD_H
334 UTF16_PREV_CHAR_UNSAFE(input, setOffset, c);
335 if(setOffset != movedOffset[i+3]){
336 log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
337 offset, movedOffset[i+3], setOffset);
338 }
339 if(c != result[i+3]){
340 log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
341 }
342 #endif
343 setOffset=offset;
344 U16_PREV_UNSAFE(input, setOffset, c);
345 if(setOffset != movedOffset[i+3]){
346 log_err("ERROR: U16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
347 offset, movedOffset[i+3], setOffset);
348 }
349 if(c != result[i+3]){
350 log_err("ERROR: U16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
351 }
352 #if !U_HIDE_OBSOLETE_UTF_OLD_H
353 setOffset=offset;
354 UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, false);
355 if(setOffset != movedOffset[i+4]){
356 log_err("ERROR: UTF16_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
357 offset, movedOffset[i+4], setOffset);
358 }
359 if(c != result[i+4]){
360 log_err("ERROR: UTF16_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
361 }
362 #endif
363 setOffset=offset;
364 U16_PREV(input, 0, setOffset, c);
365 if(setOffset != movedOffset[i+4]){
366 log_err("ERROR: U16_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
367 offset, movedOffset[i+4], setOffset);
368 }
369 expected = result[i+4];
370 if(c != expected) {
371 log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
372 }
373
374 setOffset=offset;
375 U16_PREV_OR_FFFD(input, 0, setOffset, c);
376 if(setOffset != movedOffset[i+4]){
377 log_err("ERROR: U16_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
378 offset, movedOffset[i+4], setOffset);
379 }
380 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
381 if(c != expected) {
382 log_err("ERROR: U16_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
383 }
384 #if !U_HIDE_OBSOLETE_UTF_OLD_H
385 setOffset=offset;
386 UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, true);
387 if(setOffset != movedOffset[i+5]){
388 log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
389 offset, movedOffset[i+5], setOffset);
390 }
391 if(c != result[i+5]){
392 log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
393 }
394 #endif
395 i=(uint16_t)(i+6);
396 }
397
398 }
399
400 /* keep this in sync with utf8tst.c's TestNulTerminated() */
TestNulTerminated()401 static void TestNulTerminated() {
402 static const UChar input[]={
403 /* 0 */ 0x61,
404 /* 1 */ 0xd801, 0xdc01,
405 /* 3 */ 0xdc01,
406 /* 4 */ 0x62,
407 /* 5 */ 0xd801,
408 /* 6 */ 0x00
409 /* 7 */
410 };
411 static const UChar32 result[]={
412 0x61,
413 0x10401,
414 0xdc01,
415 0x62,
416 0xd801,
417 0
418 };
419
420 UChar32 c, c2, expected;
421 int32_t i0, i=0, j, k, expectedIndex;
422 int32_t cpIndex=0;
423 do {
424 i0=i;
425 U16_NEXT(input, i, -1, c);
426 expected=result[cpIndex];
427 if(c!=expected) {
428 log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
429 }
430 j=i0;
431 U16_NEXT_OR_FFFD(input, j, -1, c);
432 if(U_IS_SURROGATE(expected)) { expected=0xfffd; }
433 if(c!=expected) {
434 log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
435 }
436 if(j!=i) {
437 log_err("U16_NEXT_OR_FFFD() moved to index %d but U16_NEXT() moved to %d\n", j, i);
438 }
439 j=i0;
440 U16_FWD_1(input, j, -1);
441 if(j!=i) {
442 log_err("U16_FWD_1() moved to index %d but U16_NEXT() moved to %d\n", j, i);
443 }
444 ++cpIndex;
445 /*
446 * Move by this many code points from the start.
447 * U16_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
448 */
449 expectedIndex= (c==0) ? i-1 : i;
450 k=0;
451 U16_FWD_N(input, k, -1, cpIndex);
452 if(k!=expectedIndex) {
453 log_err("U16_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
454 }
455 } while(c!=0);
456
457 i=0;
458 do {
459 j=i0=i;
460 U16_NEXT(input, i, -1, c);
461 do {
462 U16_GET(input, 0, j, -1, c2);
463 if(c2!=c) {
464 log_err("U16_NEXT(from %d)=U+%04x != U+%04x=U16_GET(at %d)\n", i0, c, c2, j);
465 }
466 U16_GET_OR_FFFD(input, 0, j, -1, c2);
467 expected= U_IS_SURROGATE(c) ? 0xfffd : c;
468 if(c2!=expected) {
469 log_err("U16_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U16_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
470 }
471 /* U16_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
472 k=j+1;
473 U16_SET_CP_LIMIT(input, 0, k, -1);
474 if(k!=i) {
475 log_err("U16_NEXT() moved to %d but U16_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
476 }
477 } while(++j<i);
478 } while(c!=0);
479 }
480
TestFwdBack()481 static void TestFwdBack(){
482 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
483 static uint16_t fwd_unsafe[] ={1, 3, 5, 6, 8, 10, 11, 12};
484 static uint16_t fwd_safe[] ={1, 3, 5, 6, 7, 8, 10, 11, 12};
485 static uint16_t back_unsafe[]={11, 9, 8, 7, 6, 5, 3, 1, 0};
486 static uint16_t back_safe[] ={11, 10, 8, 7, 6, 5, 3, 1, 0};
487
488 static uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1};
489 static uint16_t fwd_N_unsafe[] ={0, 1, 5, 10, 11};
490 static uint16_t fwd_N_safe[] ={0, 1, 5, 8, 10, 12, 12}; /*safe macro keeps it at the end of the string */
491 static uint16_t back_N_unsafe[]={12, 11, 8, 5, 3};
492 static uint16_t back_N_safe[] ={12, 11, 8, 5, 3, 0, 0};
493
494 uint16_t offunsafe=0, offsafe=0;
495 uint16_t i=0;
496 #if !U_HIDE_OBSOLETE_UTF_OLD_H
497 while(offunsafe < UPRV_LENGTHOF(input)){
498 UTF16_FWD_1_UNSAFE(input, offunsafe);
499 if(offunsafe != fwd_unsafe[i]){
500 log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
501 }
502 i++;
503 }
504 #endif
505 offunsafe=0, offsafe=0;
506 i=0;
507 while(offunsafe < UPRV_LENGTHOF(input)){
508 U16_FWD_1_UNSAFE(input, offunsafe);
509 if(offunsafe != fwd_unsafe[i]){
510 log_err("ERROR: U16_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
511 }
512 i++;
513 }
514 #if !U_HIDE_OBSOLETE_UTF_OLD_H
515 offunsafe=0, offsafe=0;
516 i=0;
517 while(offsafe < UPRV_LENGTHOF(input)){
518 UTF16_FWD_1_SAFE(input, offsafe, UPRV_LENGTHOF(input));
519 if(offsafe != fwd_safe[i]){
520 log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
521 }
522 i++;
523 }
524 #endif
525 offunsafe=0, offsafe=0;
526 i=0;
527 while(offsafe < UPRV_LENGTHOF(input)){
528 U16_FWD_1(input, offsafe, UPRV_LENGTHOF(input));
529 if(offsafe != fwd_safe[i]){
530 log_err("ERROR: U16_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
531 }
532 i++;
533 }
534 #if !U_HIDE_OBSOLETE_UTF_OLD_H
535 offunsafe=UPRV_LENGTHOF(input);
536 offsafe=UPRV_LENGTHOF(input);
537 i=0;
538 while(offunsafe > 0){
539 UTF16_BACK_1_UNSAFE(input, offunsafe);
540 if(offunsafe != back_unsafe[i]){
541 log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
542 }
543 i++;
544 }
545 #endif
546 offunsafe=UPRV_LENGTHOF(input);
547 offsafe=UPRV_LENGTHOF(input);
548 i=0;
549 while(offunsafe > 0){
550 U16_BACK_1_UNSAFE(input, offunsafe);
551 if(offunsafe != back_unsafe[i]){
552 log_err("ERROR: U16_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
553 }
554 i++;
555 }
556 #if !U_HIDE_OBSOLETE_UTF_OLD_H
557 offunsafe=UPRV_LENGTHOF(input);
558 offsafe=UPRV_LENGTHOF(input);
559 i=0;
560 while(offsafe > 0){
561 UTF16_BACK_1_SAFE(input,0, offsafe);
562 if(offsafe != back_safe[i]){
563 log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
564 }
565 i++;
566 }
567 #endif
568 offunsafe=UPRV_LENGTHOF(input);
569 offsafe=UPRV_LENGTHOF(input);
570 i=0;
571 while(offsafe > 0){
572 U16_BACK_1(input,0, offsafe);
573 if(offsafe != back_safe[i]){
574 log_err("ERROR: U16_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
575 }
576 i++;
577 }
578
579 offunsafe=0;
580 offsafe=0;
581 #if !U_HIDE_OBSOLETE_UTF_OLD_H
582 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/
583 UTF16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
584 if(offunsafe != fwd_N_unsafe[i]){
585 log_err("ERROR: Forward_N_unsafe offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
586 }
587 }
588 #endif
589 offunsafe=0;
590 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/
591 U16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
592 if(offunsafe != fwd_N_unsafe[i]){
593 log_err("ERROR: U16_FWD_N_UNSAFE offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
594 }
595 }
596 #if !U_HIDE_OBSOLETE_UTF_OLD_H
597 offsafe=0;
598 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
599 UTF16_FWD_N_SAFE(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]);
600 if(offsafe != fwd_N_safe[i]){
601 log_err("ERROR: Forward_N_safe offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
602 }
603 }
604 #endif
605 offsafe=0;
606 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
607 U16_FWD_N(input, offsafe, UPRV_LENGTHOF(input), Nvalue[i]);
608 if(offsafe != fwd_N_safe[i]){
609 log_err("ERROR: U16_FWD_N offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
610 }
611 }
612 #if !U_HIDE_OBSOLETE_UTF_OLD_H
613 offunsafe=UPRV_LENGTHOF(input);
614 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){
615 UTF16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
616 if(offunsafe != back_N_unsafe[i]){
617 log_err("ERROR: backward_N_unsafe offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
618 }
619 }
620 #endif
621 offunsafe=UPRV_LENGTHOF(input);
622 for(i=0; i<UPRV_LENGTHOF(Nvalue)-2; i++){
623 U16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
624 if(offunsafe != back_N_unsafe[i]){
625 log_err("ERROR: U16_BACK_N_UNSAFE offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
626 }
627 }
628 #if !U_HIDE_OBSOLETE_UTF_OLD_H
629 offsafe=UPRV_LENGTHOF(input);
630 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
631 UTF16_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
632 if(offsafe != back_N_safe[i]){
633 log_err("ERROR: backward_N_safe offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
634 }
635 }
636 #endif
637 offsafe=UPRV_LENGTHOF(input);
638 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
639 U16_BACK_N(input, 0, offsafe, Nvalue[i]);
640 if(offsafe != back_N_safe[i]){
641 log_err("ERROR: U16_BACK_N offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
642 }
643 }
644 }
645
TestSetChar()646 static void TestSetChar(){
647 static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
648 static uint16_t start_unsafe[]={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 9, 11};
649 static uint16_t start_safe[] ={0, 1, 1, 3, 3, 5, 6, 7, 8, 8, 10, 11};
650 static uint16_t limit_unsafe[]={0, 1, 3, 3, 5, 5, 6, 8, 8, 10, 10, 11};
651 static uint16_t limit_safe[] ={0, 1, 3, 3, 5, 5, 6, 7, 8, 10, 10, 11};
652
653 uint16_t i=0;
654 uint16_t offset=0, setOffset=0;
655 for(offset=0; offset<UPRV_LENGTHOF(input); offset++){
656 #if !U_HIDE_OBSOLETE_UTF_OLD_H
657 setOffset=offset;
658 UTF16_SET_CHAR_START_UNSAFE(input, setOffset);
659 if(setOffset != start_unsafe[i]){
660 log_err("ERROR: UTF16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
661 }
662 #endif
663 setOffset=offset;
664 U16_SET_CP_START_UNSAFE(input, setOffset);
665 if(setOffset != start_unsafe[i]){
666 log_err("ERROR: U16_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_unsafe[i], setOffset);
667 }
668 #if !U_HIDE_OBSOLETE_UTF_OLD_H
669 setOffset=offset;
670 UTF16_SET_CHAR_START_SAFE(input, 0, setOffset);
671 if(setOffset != start_safe[i]){
672 log_err("ERROR: UTF16_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
673 }
674 #endif
675 setOffset=offset;
676 U16_SET_CP_START(input, 0, setOffset);
677 if(setOffset != start_safe[i]){
678 log_err("ERROR: U16_SET_CHAR_START failed for offset=%ld. Expected:%lx Got:%lx\n", offset, start_safe[i], setOffset);
679 }
680
681 if (offset > 0) {
682 #if !U_HIDE_OBSOLETE_UTF_OLD_H
683 setOffset=offset;
684 UTF16_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
685 if(setOffset != limit_unsafe[i]){
686 log_err("ERROR: UTF16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
687 }
688 #endif
689 setOffset=offset;
690 U16_SET_CP_LIMIT_UNSAFE(input, setOffset);
691 if(setOffset != limit_unsafe[i]){
692 log_err("ERROR: U16_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_unsafe[i], setOffset);
693 }
694 }
695
696 setOffset=offset;
697 U16_SET_CP_LIMIT(input,0, setOffset, UPRV_LENGTHOF(input));
698 if(setOffset != limit_safe[i]){
699 log_err("ERROR: U16_SET_CHAR_LIMIT failed for offset=%ld. Expected:%lx Got:%lx\n", offset, limit_safe[i], setOffset);
700 }
701
702 i++;
703 }
704 }
705
TestAppendChar()706 static void TestAppendChar(){
707 #if !U_HIDE_OBSOLETE_UTF_OLD_H
708 static UChar s[5]={0x0061, 0x0062, 0x0063, 0x0064, 0x0000};
709 static uint32_t test[]={
710 /*append-position(unsafe), CHAR to be appended */
711 0, 0x20441,
712 2, 0x0028,
713 2, 0xdc00,
714 3, 0xd800,
715 1, 0x20402,
716
717 /*append-position(safe), CHAR to be appended */
718 0, 0x20441,
719 2, 0xdc00,
720 3, 0xd800,
721 1, 0x20402,
722 3, 0x20402,
723 3, 0x10402,
724 2, 0x10402,
725
726 };
727 static uint16_t movedOffset[]={
728 /*offset-moved-to(unsafe)*/
729 2, /*for append-pos: 0 , CHAR 0x20441*/
730 3,
731 3,
732 4,
733 3,
734 /*offse-moved-to(safe)*/
735 2, /*for append-pos: 0, CHAR 0x20441*/
736 3,
737 4,
738 3,
739 4,
740 4,
741 4
742 };
743
744 static UChar result[][5]={
745 /*unsafe*/
746 {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000},
747 {0x0061, 0x0062, 0x0028, 0x0064, 0x0000},
748 {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000},
749 {0x0061, 0x0062, 0x0063, 0xd800, 0x0000},
750 {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000},
751
752 /*safe*/
753 {0xd841, 0xdc41, 0x0063, 0x0064, 0x0000},
754 {0x0061, 0x0062, 0xdc00, 0x0064, 0x0000},
755 {0x0061, 0x0062, 0x0063, 0xd800, 0x0000},
756 {0x0061, 0xd841, 0xdc02, 0x0064, 0x0000},
757 {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000},
758 {0x0061, 0x0062, 0x0063, UTF_ERROR_VALUE, 0x0000},
759 {0x0061, 0x0062, 0xd801, 0xdc02, 0x0000},
760
761
762 };
763 uint16_t i, count=0;
764 UChar *str=(UChar*)malloc(sizeof(UChar) * (u_strlen(s)+1));
765 uint16_t offset;
766 for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){
767 if(count<5){
768 u_strcpy(str, s);
769 offset=(uint16_t)test[i];
770 UTF16_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
771 if(offset != movedOffset[count]){
772 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
773 count, movedOffset[count], offset);
774
775 }
776 if(u_strcmp(str, result[count]) !=0){
777 log_err("ERROR: UTF16_APPEND_CHAR_UNSAFE failed for count=%d. Expected:", count);
778 printUChars(result[count]);
779 printf("\nGot:");
780 printUChars(str);
781 printf("\n");
782 }
783 }else{
784 u_strcpy(str, s);
785 offset=(uint16_t)test[i];
786 UTF16_APPEND_CHAR_SAFE(str, offset, (uint16_t)u_strlen(str), test[i+1]);
787 if(offset != movedOffset[count]){
788 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
789 count, movedOffset[count], offset);
790
791 }
792 if(u_strcmp(str, result[count]) !=0){
793 log_err("ERROR: UTF16_APPEND_CHAR_SAFE failed for count=%d. Expected:", count);
794 printUChars(result[count]);
795 printf("\nGot:");
796 printUChars(str);
797 printf("\n");
798 }
799 }
800 count++;
801 }
802 free(str);
803 #endif
804 }
805
TestAppend()806 static void TestAppend() {
807 static const UChar32 codePoints[]={
808 0x61, 0xdf, 0x901, 0x3040,
809 0xac00, 0xd800, 0xdbff, 0xdcde,
810 0xdffd, 0xe000, 0xffff, 0x10000,
811 0x12345, 0xe0021, 0x10ffff, 0x110000,
812 0x234567, 0x7fffffff, -1, -1000,
813 0, 0x400
814 };
815 static const UChar expectUnsafe[]={
816 0x61, 0xdf, 0x901, 0x3040,
817 0xac00, 0xd800, 0xdbff, 0xdcde,
818 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
819 0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
820 /* none from this line */
821 0, 0x400
822 }, expectSafe[]={
823 0x61, 0xdf, 0x901, 0x3040,
824 0xac00, 0xd800, 0xdbff, 0xdcde,
825 0xdffd, 0xe000, 0xffff, 0xd800, 0xdc00,
826 0xd808, 0xdf45, 0xdb40, 0xdc21, 0xdbff, 0xdfff, /* not 0x110000 */
827 /* none from this line */
828 0, 0x400
829 };
830
831 UChar buffer[100];
832 UChar32 c;
833 int32_t i, length;
834 UBool isError, expectIsError, wrongIsError;
835
836 length=0;
837 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
838 c=codePoints[i];
839 if(c<0 || 0x10ffff<c) {
840 continue; /* skip non-code points for U16_APPEND_UNSAFE */
841 }
842
843 U16_APPEND_UNSAFE(buffer, length, c);
844 }
845 if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length*U_SIZEOF_UCHAR)) {
846 log_err("U16_APPEND_UNSAFE did not generate the expected output\n");
847 }
848
849 length=0;
850 wrongIsError=false;
851 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
852 c=codePoints[i];
853 expectIsError= c<0 || 0x10ffff<c; /* || U_IS_SURROGATE(c); */ /* surrogates in UTF-32 shouldn't be used, but it's okay to pass them around internally. */
854 isError=false;
855
856 U16_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError);
857 wrongIsError|= isError!=expectIsError;
858 }
859 if(wrongIsError) {
860 log_err("U16_APPEND did not set isError correctly\n");
861 }
862 if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length*U_SIZEOF_UCHAR)) {
863 log_err("U16_APPEND did not generate the expected output\n");
864 }
865 }
866
TestSurrogate()867 static void TestSurrogate(){
868 static UChar32 s[] = {0x10000, 0x10ffff, 0x50000, 0x100000, 0x1abcd};
869 int i = 0;
870 while (i < 5) {
871 UChar first = U16_LEAD(s[i]);
872 UChar second = U16_TRAIL(s[i]);
873 /* algorithm from the Unicode consortium */
874 UChar firstresult = (UChar)(((s[i] - 0x10000) / 0x400) + 0xD800);
875 UChar secondresult = (UChar)(((s[i] - 0x10000) % 0x400) + 0xDC00);
876
877 if (
878 #if !U_HIDE_OBSOLETE_UTF_OLD_H
879 first != UTF16_LEAD(s[i]) || first != UTF_FIRST_SURROGATE(s[i]) ||
880 #endif
881 first != firstresult) {
882 log_err("Failure in first surrogate in 0x%x expected to be 0x%x\n",
883 s[i], firstresult);
884 }
885 if (
886 #if !U_HIDE_OBSOLETE_UTF_OLD_H
887 second != UTF16_TRAIL(s[i]) || second != UTF_SECOND_SURROGATE(s[i]) ||
888 #endif
889 second != secondresult) {
890 log_err("Failure in second surrogate in 0x%x expected to be 0x%x\n",
891 s[i], secondresult);
892 }
893 i ++;
894 }
895 }
896