• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File CUCDTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API, added tests for string functions
15 ********************************************************************************
16 */
17 
18 #include <string.h>
19 #include <math.h>
20 #include <stdlib.h>
21 
22 #include "unicode/utypes.h"
23 #include "unicode/uchar.h"
24 #include "unicode/putil.h"
25 #include "unicode/ustring.h"
26 #include "unicode/uloc.h"
27 #include "unicode/unorm2.h"
28 #include "unicode/utf16.h"
29 #include "unicode/utf_old.h"
30 #include "cintltst.h"
31 #include "putilimp.h"
32 #include "uparse.h"
33 #include "ucase.h"
34 #include "ubidi_props.h"
35 #include "uprops.h"
36 #include "uset_imp.h"
37 #include "usc_impl.h"
38 #include "udatamem.h"
39 #include "cucdapi.h"
40 #include "cmemory.h"
41 
42 /* prototypes --------------------------------------------------------------- */
43 
44 static void TestUpperLower(void);
45 static void TestLetterNumber(void);
46 static void TestMisc(void);
47 static void TestPOSIX(void);
48 static void TestControlPrint(void);
49 static void TestIdentifier(void);
50 static void TestUnicodeData(void);
51 static void TestCodeUnit(void);
52 static void TestCodePoint(void);
53 static void TestCharLength(void);
54 static void TestCharNames(void);
55 static void TestUCharFromNameUnderflow(void);
56 static void TestMirroring(void);
57 static void TestUScriptRunAPI(void);
58 static void TestAdditionalProperties(void);
59 static void TestNumericProperties(void);
60 static void TestPropertyNames(void);
61 static void TestPropertyValues(void);
62 static void TestConsistency(void);
63 static void TestUBiDiProps(void);
64 static void TestCaseFolding(void);
65 
66 /* internal methods used */
67 static int32_t MakeProp(char* str);
68 static int32_t MakeDir(char* str);
69 
70 /* helpers ------------------------------------------------------------------ */
71 
72 static void
parseUCDFile(const char * filename,char * fields[][2],int32_t fieldCount,UParseLineFn * lineFn,void * context,UErrorCode * pErrorCode)73 parseUCDFile(const char *filename,
74              char *fields[][2], int32_t fieldCount,
75              UParseLineFn *lineFn, void *context,
76              UErrorCode *pErrorCode) {
77     char path[256];
78     char backupPath[256];
79 
80     if(U_FAILURE(*pErrorCode)) {
81         return;
82     }
83 
84     /* Look inside ICU_DATA first */
85     strcpy(path, u_getDataDirectory());
86     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
87     strcat(path, filename);
88 
89     /* As a fallback, try to guess where the source data was located
90      *    at the time ICU was built, and look there.
91      */
92     strcpy(backupPath, ctest_dataSrcDir());
93     strcat(backupPath, U_FILE_SEP_STRING);
94     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
95     strcat(backupPath, filename);
96 
97     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
98     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
99         *pErrorCode=U_ZERO_ERROR;
100         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
101     }
102     if(U_FAILURE(*pErrorCode)) {
103         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
104     }
105 }
106 
107 /* test data ---------------------------------------------------------------- */
108 
109 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
110 static const int32_t tagValues[] =
111     {
112     /* Mn */ U_NON_SPACING_MARK,
113     /* Mc */ U_COMBINING_SPACING_MARK,
114     /* Me */ U_ENCLOSING_MARK,
115     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
116     /* Nl */ U_LETTER_NUMBER,
117     /* No */ U_OTHER_NUMBER,
118     /* Zs */ U_SPACE_SEPARATOR,
119     /* Zl */ U_LINE_SEPARATOR,
120     /* Zp */ U_PARAGRAPH_SEPARATOR,
121     /* Cc */ U_CONTROL_CHAR,
122     /* Cf */ U_FORMAT_CHAR,
123     /* Cs */ U_SURROGATE,
124     /* Co */ U_PRIVATE_USE_CHAR,
125     /* Cn */ U_UNASSIGNED,
126     /* Lu */ U_UPPERCASE_LETTER,
127     /* Ll */ U_LOWERCASE_LETTER,
128     /* Lt */ U_TITLECASE_LETTER,
129     /* Lm */ U_MODIFIER_LETTER,
130     /* Lo */ U_OTHER_LETTER,
131     /* Pc */ U_CONNECTOR_PUNCTUATION,
132     /* Pd */ U_DASH_PUNCTUATION,
133     /* Ps */ U_START_PUNCTUATION,
134     /* Pe */ U_END_PUNCTUATION,
135     /* Po */ U_OTHER_PUNCTUATION,
136     /* Sm */ U_MATH_SYMBOL,
137     /* Sc */ U_CURRENCY_SYMBOL,
138     /* Sk */ U_MODIFIER_SYMBOL,
139     /* So */ U_OTHER_SYMBOL,
140     /* Pi */ U_INITIAL_PUNCTUATION,
141     /* Pf */ U_FINAL_PUNCTUATION
142     };
143 
144 static const char dirStrings[][5] = {
145     "L",
146     "R",
147     "EN",
148     "ES",
149     "ET",
150     "AN",
151     "CS",
152     "B",
153     "S",
154     "WS",
155     "ON",
156     "LRE",
157     "LRO",
158     "AL",
159     "RLE",
160     "RLO",
161     "PDF",
162     "NSM",
163     "BN",
164     /* new in Unicode 6.3/ICU 52 */
165     "FSI",
166     "LRI",
167     "RLI",
168     "PDI"
169 };
170 
171 void addUnicodeTest(TestNode** root);
172 
addUnicodeTest(TestNode ** root)173 void addUnicodeTest(TestNode** root)
174 {
175     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
176     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
177     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
178     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
179     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
180     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
181     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
182     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
183     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
184     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
185     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
186     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
187     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
188     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
189     addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
190     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
191     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
192     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
193     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
194     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
195     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
196     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
197     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
198     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
199     addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
200     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
201 }
202 
203 /*==================================================== */
204 /* test u_toupper() and u_tolower()                    */
205 /*==================================================== */
TestUpperLower()206 static void TestUpperLower()
207 {
208     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
209     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
210     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
211     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
212     int32_t i;
213 
214     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
215     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
216 
217 /*
218 Checks LetterLike Symbols which were previously a source of confusion
219 [Bertrand A. D. 02/04/98]
220 */
221     for (i=0x2100;i<0x2138;i++)
222     {
223         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
224         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
225         {
226             if (i != (int)u_tolower(i)) /* itself */
227                 log_err("Failed case conversion with itself: U+%04x\n", i);
228             if (i != (int)u_toupper(i))
229                 log_err("Failed case conversion with itself: U+%04x\n", i);
230         }
231     }
232 
233     for(i=0; i < u_strlen(upper); i++){
234         if(u_tolower(upper[i]) != lower[i]){
235             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
236         }
237     }
238 
239     log_verbose("testing upper lower\n");
240     for (i = 0; i < 21; i++) {
241 
242         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
243         {
244             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
245         }
246         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
247          {
248             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
249         }
250         else if (upperTest[i] != u_tolower(lowerTest[i]))
251         {
252             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
253         }
254         else if (lowerTest[i] != u_toupper(upperTest[i]))
255          {
256             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
257         }
258         else if (upperTest[i] != u_tolower(upperTest[i]))
259         {
260             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
261         }
262         else if (lowerTest[i] != u_toupper(lowerTest[i]))
263         {
264             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
265         }
266     }
267     log_verbose("done testing upper lower\n");
268 
269     log_verbose("testing u_istitle\n");
270     {
271         static const UChar expected[] = {
272             0x1F88,
273             0x1F89,
274             0x1F8A,
275             0x1F8B,
276             0x1F8C,
277             0x1F8D,
278             0x1F8E,
279             0x1F8F,
280             0x1F88,
281             0x1F89,
282             0x1F8A,
283             0x1F8B,
284             0x1F8C,
285             0x1F8D,
286             0x1F8E,
287             0x1F8F,
288             0x1F98,
289             0x1F99,
290             0x1F9A,
291             0x1F9B,
292             0x1F9C,
293             0x1F9D,
294             0x1F9E,
295             0x1F9F,
296             0x1F98,
297             0x1F99,
298             0x1F9A,
299             0x1F9B,
300             0x1F9C,
301             0x1F9D,
302             0x1F9E,
303             0x1F9F,
304             0x1FA8,
305             0x1FA9,
306             0x1FAA,
307             0x1FAB,
308             0x1FAC,
309             0x1FAD,
310             0x1FAE,
311             0x1FAF,
312             0x1FA8,
313             0x1FA9,
314             0x1FAA,
315             0x1FAB,
316             0x1FAC,
317             0x1FAD,
318             0x1FAE,
319             0x1FAF,
320             0x1FBC,
321             0x1FBC,
322             0x1FCC,
323             0x1FCC,
324             0x1FFC,
325             0x1FFC,
326         };
327         int32_t num = UPRV_LENGTHOF(expected);
328         for(i=0; i<num; i++){
329             if(!u_istitle(expected[i])){
330                 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
331             }
332         }
333 
334     }
335 }
336 
337 /* compare two sets and verify that their difference or intersection is empty */
338 static UBool
showADiffB(const USet * a,const USet * b,const char * a_name,const char * b_name,UBool expect,UBool diffIsError)339 showADiffB(const USet *a, const USet *b,
340            const char *a_name, const char *b_name,
341            UBool expect, UBool diffIsError) {
342     USet *aa;
343     int32_t i, start, end, length;
344     UErrorCode errorCode;
345 
346     /*
347      * expect:
348      * TRUE  -> a-b should be empty, that is, b should contain all of a
349      * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
350      */
351     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
352         return TRUE;
353     }
354 
355     /* clone a to aa because a is const */
356     aa=uset_open(1, 0);
357     if(aa==NULL) {
358         /* unusual problem - out of memory? */
359         return FALSE;
360     }
361     uset_addAll(aa, a);
362 
363     /* compute the set in question */
364     if(expect) {
365         /* a-b */
366         uset_removeAll(aa, b);
367     } else {
368         /* a&b */
369         uset_retainAll(aa, b);
370     }
371 
372     /* aa is not empty because of the initial tests above; show its contents */
373     errorCode=U_ZERO_ERROR;
374     i=0;
375     for(;;) {
376         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
377         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
378             break; /* done */
379         }
380         if(U_FAILURE(errorCode)) {
381             log_err("error comparing %s with %s at difference item %d: %s\n",
382                 a_name, b_name, i, u_errorName(errorCode));
383             break;
384         }
385         if(length!=0) {
386             break; /* done with code points, got a string or -1 */
387         }
388 
389         if(diffIsError) {
390             if(expect) {
391                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
392             } else {
393                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
394             }
395         } else {
396             if(expect) {
397                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
398             } else {
399                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
400             }
401         }
402 
403         ++i;
404     }
405 
406     uset_close(aa);
407     return FALSE;
408 }
409 
410 static UBool
showAMinusB(const USet * a,const USet * b,const char * a_name,const char * b_name,UBool diffIsError)411 showAMinusB(const USet *a, const USet *b,
412             const char *a_name, const char *b_name,
413             UBool diffIsError) {
414     return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
415 }
416 
417 static UBool
showAIntersectB(const USet * a,const USet * b,const char * a_name,const char * b_name,UBool diffIsError)418 showAIntersectB(const USet *a, const USet *b,
419                 const char *a_name, const char *b_name,
420                 UBool diffIsError) {
421     return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
422 }
423 
424 static UBool
compareUSets(const USet * a,const USet * b,const char * a_name,const char * b_name,UBool diffIsError)425 compareUSets(const USet *a, const USet *b,
426              const char *a_name, const char *b_name,
427              UBool diffIsError) {
428     /*
429      * Use an arithmetic & not a logical && so that both branches
430      * are always taken and all differences are shown.
431      */
432     return
433         showAMinusB(a, b, a_name, b_name, diffIsError) &
434         showAMinusB(b, a, b_name, a_name, diffIsError);
435 }
436 
437 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
TestLetterNumber()438 static void TestLetterNumber()
439 {
440     UChar i = 0x0000;
441 
442     log_verbose("Testing for isalpha\n");
443     for (i = 0x0041; i < 0x005B; i++) {
444         if (!u_isalpha(i))
445         {
446             log_err("Failed isLetter test at  %.4X\n", i);
447         }
448     }
449     for (i = 0x0660; i < 0x066A; i++) {
450         if (u_isalpha(i))
451         {
452             log_err("Failed isLetter test with numbers at %.4X\n", i);
453         }
454     }
455 
456     log_verbose("Testing for isdigit\n");
457     for (i = 0x0660; i < 0x066A; i++) {
458         if (!u_isdigit(i))
459         {
460             log_verbose("Failed isNumber test at %.4X\n", i);
461         }
462     }
463 
464     log_verbose("Testing for isalnum\n");
465     for (i = 0x0041; i < 0x005B; i++) {
466         if (!u_isalnum(i))
467         {
468             log_err("Failed isAlNum test at  %.4X\n", i);
469         }
470     }
471     for (i = 0x0660; i < 0x066A; i++) {
472         if (!u_isalnum(i))
473         {
474             log_err("Failed isAlNum test at  %.4X\n", i);
475         }
476     }
477 
478     {
479         /*
480          * The following checks work only starting from Unicode 4.0.
481          * Check the version number here.
482          */
483         static UVersionInfo u401={ 4, 0, 1, 0 };
484         UVersionInfo version;
485         u_getUnicodeVersion(version);
486         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
487             return;
488         }
489     }
490 
491     {
492         /*
493          * Sanity check:
494          * Verify that exactly the digit characters have decimal digit values.
495          * This assumption is used in the implementation of u_digit()
496          * (which checks nt=de)
497          * compared with the parallel java.lang.Character.digit()
498          * (which checks Nd).
499          *
500          * This was not true in Unicode 3.2 and earlier.
501          * Unicode 4.0 fixed discrepancies.
502          * Unicode 4.0.1 re-introduced problems in this area due to an
503          * unintentionally incomplete last-minute change.
504          */
505         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
506         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
507 
508         USet *digits, *decimalValues;
509         UErrorCode errorCode;
510 
511         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
512         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
513         errorCode=U_ZERO_ERROR;
514         digits=uset_openPattern(digitsPattern, 6, &errorCode);
515         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
516 
517         if(U_SUCCESS(errorCode)) {
518             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
519         }
520 
521         uset_close(digits);
522         uset_close(decimalValues);
523     }
524 }
525 
testSampleCharProps(UBool propFn (UChar32),const char * propName,const UChar32 * sampleChars,int32_t sampleCharsLength,UBool expected)526 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
527                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
528                                 UBool expected) {
529     int32_t i;
530     for (i = 0; i < sampleCharsLength; ++i) {
531         UBool result = propFn(sampleChars[i]);
532         if (result != expected) {
533             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
534                     propName, sampleChars[i], result);
535         }
536     }
537 }
538 
539 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
TestMisc()540 static void TestMisc()
541 {
542     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
543     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
544     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
545     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
546     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
547     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
548 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
549     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
550     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
551     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
552     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
553 
554     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
555 
556     uint32_t mask;
557 
558     int32_t i;
559     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
560     UVersionInfo realVersion;
561 
562     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
563 
564     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
565     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
566 
567     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
568                         sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
569     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
570                         sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
571 
572     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
573                         sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE);
574     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
575                         sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE);
576 
577     testSampleCharProps(u_isdefined, "u_isdefined",
578                         sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);
579     testSampleCharProps(u_isdefined, "u_isdefined",
580                         sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);
581 
582     testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE);
583     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE);
584 
585     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE);
586     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE);
587 
588     for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
589         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
590             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
591                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
592         }
593     }
594 
595     /* Tests the ICU version #*/
596     u_getVersion(realVersion);
597     u_versionToString(realVersion, icuVersion);
598     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
599     {
600         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
601     }
602 #if defined(ICU_VERSION)
603     /* test only happens where we have configure.in with VERSION - sanity check. */
604     if(strcmp(U_ICU_VERSION, ICU_VERSION))
605     {
606         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
607     }
608 #endif
609 
610     /* test U_GC_... */
611     if(
612         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
613         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
614         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
615         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
616         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
617         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
618     ) {
619         log_err("error: U_GET_GC_MASK does not work properly\n");
620     }
621 
622     mask=0;
623     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
624 
625     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
626     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
627     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
628     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
629     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
630 
631     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
632     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
633     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
634 
635     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
636     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
637     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
638 
639     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
640     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
641     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
642 
643     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
644     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
645     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
646     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
647 
648     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
649     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
650     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
651     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
652     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
653 
654     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
655     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
656     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
657     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
658 
659     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
660     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
661 
662     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
663         log_err("error: problems with U_GC_XX_MASK constants\n");
664     }
665 
666     mask=0;
667     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
668     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
669     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
670     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
671     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
672     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
673     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
674 
675     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
676         log_err("error: problems with U_GC_Y_MASK constants\n");
677     }
678     {
679         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
680         for(i=0; i<10; i++){
681             if(digit[i]!=u_forDigit(i,10)){
682                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
683             }
684         }
685     }
686 
687     /* test u_digit() */
688     {
689         static const struct {
690             UChar32 c;
691             int8_t radix, value;
692         } data[]={
693             /* base 16 */
694             { 0x0031, 16, 1 },
695             { 0x0038, 16, 8 },
696             { 0x0043, 16, 12 },
697             { 0x0066, 16, 15 },
698             { 0x00e4, 16, -1 },
699             { 0x0662, 16, 2 },
700             { 0x06f5, 16, 5 },
701             { 0xff13, 16, 3 },
702             { 0xff41, 16, 10 },
703 
704             /* base 8 */
705             { 0x0031, 8, 1 },
706             { 0x0038, 8, -1 },
707             { 0x0043, 8, -1 },
708             { 0x0066, 8, -1 },
709             { 0x00e4, 8, -1 },
710             { 0x0662, 8, 2 },
711             { 0x06f5, 8, 5 },
712             { 0xff13, 8, 3 },
713             { 0xff41, 8, -1 },
714 
715             /* base 36 */
716             { 0x5a, 36, 35 },
717             { 0x7a, 36, 35 },
718             { 0xff3a, 36, 35 },
719             { 0xff5a, 36, 35 },
720 
721             /* wrong radix values */
722             { 0x0031, 1, -1 },
723             { 0xff3a, 37, -1 }
724         };
725 
726         for(i=0; i<UPRV_LENGTHOF(data); ++i) {
727             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
728                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
729                         data[i].c,
730                         data[i].radix,
731                         u_digit(data[i].c, data[i].radix),
732                         data[i].value);
733             }
734         }
735     }
736 }
737 
738 /* test C/POSIX-style functions --------------------------------------------- */
739 
740 /* bit flags */
741 #define ISAL     1
742 #define ISLO     2
743 #define ISUP     4
744 
745 #define ISDI     8
746 #define ISXD  0x10
747 
748 #define ISAN  0x20
749 
750 #define ISPU  0x40
751 #define ISGR  0x80
752 #define ISPR 0x100
753 
754 #define ISSP 0x200
755 #define ISBL 0x400
756 #define ISCN 0x800
757 
758 /* C/POSIX-style functions, in the same order as the bit flags */
759 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
760 
761 static const struct {
762     IsPOSIXClass *fn;
763     const char *name;
764 } posixClasses[]={
765     { u_isalpha, "isalpha" },
766     { u_islower, "islower" },
767     { u_isupper, "isupper" },
768     { u_isdigit, "isdigit" },
769     { u_isxdigit, "isxdigit" },
770     { u_isalnum, "isalnum" },
771     { u_ispunct, "ispunct" },
772     { u_isgraph, "isgraph" },
773     { u_isprint, "isprint" },
774     { u_isspace, "isspace" },
775     { u_isblank, "isblank" },
776     { u_iscntrl, "iscntrl" }
777 };
778 
779 static const struct {
780     UChar32 c;
781     uint32_t posixResults;
782 } posixData[]={
783     { 0x0008,                                                        ISCN },    /* backspace */
784     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
785     { 0x000a,                                              ISSP|     ISCN },    /* LF */
786     { 0x000c,                                              ISSP|     ISCN },    /* FF */
787     { 0x000d,                                              ISSP|     ISCN },    /* CR */
788     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
789     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
790     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
791     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
792     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
793     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
794     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
795     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
796     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
797     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
798     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
799     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
800     { 0x0600,                                                        ISCN },    /* arabic number sign */
801     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
802     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
803     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
804     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
805     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
806     { 0x200b,                                                        ISCN },    /* ZWSP */
807   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
808     { 0x200e,                                                        ISCN },    /* LRM */
809     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
810     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
811     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
812     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
813     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
814     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
815     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
816     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
817 };
818 
819 static void
TestPOSIX()820 TestPOSIX() {
821     uint32_t mask;
822     int32_t cl, i;
823     UBool expect;
824 
825     mask=1;
826     for(cl=0; cl<12; ++cl) {
827         for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
828             expect=(UBool)((posixData[i].posixResults&mask)!=0);
829             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
830                 log_err("u_%s(U+%04x)=%s is wrong\n",
831                     posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
832             }
833         }
834         mask<<=1;
835     }
836 }
837 
838 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
TestControlPrint()839 static void TestControlPrint()
840 {
841     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
842     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
843     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
844     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
845     UChar32 c;
846 
847     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE);
848     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE);
849 
850     testSampleCharProps(u_isprint, "u_isprint",
851                         samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);
852     testSampleCharProps(u_isprint, "u_isprint",
853                         sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE);
854 
855     /* test all ISO 8 controls */
856     for(c=0; c<=0x9f; ++c) {
857         if(c==0x20) {
858             /* skip ASCII graphic characters and continue with DEL */
859             c=0x7f;
860         }
861         if(!u_iscntrl(c)) {
862             log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
863         }
864         if(!u_isISOControl(c)) {
865             log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
866         }
867         if(u_isprint(c)) {
868             log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
869         }
870     }
871 
872     /* test all Latin-1 graphic characters */
873     for(c=0x20; c<=0xff; ++c) {
874         if(c==0x7f) {
875             c=0xa0;
876         } else if(c==0xad) {
877             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
878             ++c;
879         }
880         if(!u_isprint(c)) {
881             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
882         }
883     }
884 }
885 
886 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
TestIdentifier()887 static void TestIdentifier()
888 {
889     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
890     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
891     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
892     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
893     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
894     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
895     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
896     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
897     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
898     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
899 
900     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
901                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
902     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
903                         sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE);
904 
905     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
906                         sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE);
907     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
908                         sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);
909 
910     /* IDPart should imply IDStart */
911     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
912                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
913 
914     testSampleCharProps(u_isIDStart, "u_isIDStart",
915                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
916     testSampleCharProps(u_isIDStart, "u_isIDStart",
917                         sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE);
918 
919     testSampleCharProps(u_isIDPart, "u_isIDPart",
920                         sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);
921     testSampleCharProps(u_isIDPart, "u_isIDPart",
922                         sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE);
923 
924     /* IDPart should imply IDStart */
925     testSampleCharProps(u_isIDPart, "u_isIDPart",
926                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
927 
928     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
929                         sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);
930     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
931                         sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE);
932 }
933 
934 /* for each line of UnicodeData.txt, check some of the properties */
935 typedef struct UnicodeDataContext {
936 #if UCONFIG_NO_NORMALIZATION
937     const void *dummy;
938 #else
939     const UNormalizer2 *nfc;
940     const UNormalizer2 *nfkc;
941 #endif
942 } UnicodeDataContext;
943 
944 /*
945  * ### TODO
946  * This test fails incorrectly if the First or Last code point of a repetitive area
947  * is overridden, which is allowed and is encouraged for the PUAs.
948  * Currently, this means that both area First/Last and override lines are
949  * tested against the properties from the API,
950  * and the area boundary will not match and cause an error.
951  *
952  * This function should detect area boundaries and skip them for the test of individual
953  * code points' properties.
954  * Then it should check that the areas contain all the same properties except where overridden.
955  * For this, it would have had to set a flag for which code points were listed explicitly.
956  */
957 static void U_CALLCONV
unicodeDataLineFn(void * context,char * fields[][2],int32_t fieldCount,UErrorCode * pErrorCode)958 unicodeDataLineFn(void *context,
959                   char *fields[][2], int32_t fieldCount,
960                   UErrorCode *pErrorCode)
961 {
962     char buffer[100];
963     const char *d;
964     char *end;
965     uint32_t value;
966     UChar32 c;
967     int32_t i;
968     int8_t type;
969     int32_t dt;
970     UChar dm[32], s[32];
971     int32_t dmLength, length;
972 
973 #if !UCONFIG_NO_NORMALIZATION
974     const UNormalizer2 *nfc, *nfkc;
975 #endif
976 
977     /* get the character code, field 0 */
978     c=strtoul(fields[0][0], &end, 16);
979     if(end<=fields[0][0] || end!=fields[0][1]) {
980         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
981         return;
982     }
983     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
984         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
985         return;
986     }
987 
988     /* get general category, field 2 */
989     *fields[2][1]=0;
990     type = (int8_t)tagValues[MakeProp(fields[2][0])];
991     if(u_charType(c)!=type) {
992         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
993     }
994     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
995         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
996     }
997 
998     /* get canonical combining class, field 3 */
999     value=strtoul(fields[3][0], &end, 10);
1000     if(end<=fields[3][0] || end!=fields[3][1]) {
1001         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1002         return;
1003     }
1004     if(value>255) {
1005         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1006         return;
1007     }
1008 #if !UCONFIG_NO_NORMALIZATION
1009     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1010         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1011     }
1012     nfkc=((UnicodeDataContext *)context)->nfkc;
1013     if(value!=unorm2_getCombiningClass(nfkc, c)) {
1014         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1015     }
1016 #endif
1017 
1018     /* get BiDi category, field 4 */
1019     *fields[4][1]=0;
1020     i=MakeDir(fields[4][0]);
1021     if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1022         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1023     }
1024 
1025     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1026     d=NULL;
1027     if(fields[5][0]==fields[5][1]) {
1028         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1029         if(c==0xac00 || c==0xd7a3) {
1030             dt=U_DT_CANONICAL;
1031         } else {
1032             dt=U_DT_NONE;
1033         }
1034     } else {
1035         d=fields[5][0];
1036         *fields[5][1]=0;
1037         dt=UCHAR_INVALID_CODE;
1038         if(*d=='<') {
1039             end=strchr(++d, '>');
1040             if(end!=NULL) {
1041                 *end=0;
1042                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1043                 d=u_skipWhitespace(end+1);
1044             }
1045         } else {
1046             dt=U_DT_CANONICAL;
1047         }
1048     }
1049     if(dt>U_DT_NONE) {
1050         if(c==0xac00) {
1051             dm[0]=0x1100;
1052             dm[1]=0x1161;
1053             dm[2]=0;
1054             dmLength=2;
1055         } else if(c==0xd7a3) {
1056             dm[0]=0xd788;
1057             dm[1]=0x11c2;
1058             dm[2]=0;
1059             dmLength=2;
1060         } else {
1061             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1062         }
1063     } else {
1064         dmLength=-1;
1065     }
1066     if(dt<0 || U_FAILURE(*pErrorCode)) {
1067         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1068         return;
1069     }
1070 #if !UCONFIG_NO_NORMALIZATION
1071     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1072     if(i!=dt) {
1073         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1074     }
1075     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1076     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1077     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1078         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1079                 "or the Decomposition_Mapping is different (%s)\n",
1080                 c, length, dmLength, u_errorName(*pErrorCode));
1081         return;
1082     }
1083     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1084     if(dt!=U_DT_CANONICAL) {
1085         dmLength=-1;
1086     }
1087     nfc=((UnicodeDataContext *)context)->nfc;
1088     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1089     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1090         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1091                 "or the Decomposition_Mapping is different (%s)\n",
1092                 c, length, dmLength, u_errorName(*pErrorCode));
1093         return;
1094     }
1095     /* recompose */
1096     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1097         UChar32 a, b, composite;
1098         i=0;
1099         U16_NEXT(dm, i, dmLength, a);
1100         U16_NEXT(dm, i, dmLength, b);
1101         /* i==dmLength */
1102         composite=unorm2_composePair(nfc, a, b);
1103         if(composite!=c) {
1104             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1105                     (long)c, (long)a, (long)b, (long)composite);
1106         }
1107         /*
1108          * Note: NFKC has fewer round-trip mappings than NFC,
1109          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1110          */
1111     }
1112 #endif
1113 
1114     /* get ISO Comment, field 11 */
1115     *fields[11][1]=0;
1116     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1117     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1118         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1119             c, u_errorName(*pErrorCode),
1120             U_FAILURE(*pErrorCode) ? buffer : "[error]",
1121             fields[11][0]);
1122     }
1123 
1124     /* get uppercase mapping, field 12 */
1125     if(fields[12][0]!=fields[12][1]) {
1126         value=strtoul(fields[12][0], &end, 16);
1127         if(end!=fields[12][1]) {
1128             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1129             return;
1130         }
1131         if((UChar32)value!=u_toupper(c)) {
1132             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1133         }
1134     } else {
1135         /* no case mapping: the API must map the code point to itself */
1136         if(c!=u_toupper(c)) {
1137             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1138         }
1139     }
1140 
1141     /* get lowercase mapping, field 13 */
1142     if(fields[13][0]!=fields[13][1]) {
1143         value=strtoul(fields[13][0], &end, 16);
1144         if(end!=fields[13][1]) {
1145             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1146             return;
1147         }
1148         if((UChar32)value!=u_tolower(c)) {
1149             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1150         }
1151     } else {
1152         /* no case mapping: the API must map the code point to itself */
1153         if(c!=u_tolower(c)) {
1154             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1155         }
1156     }
1157 
1158     /* get titlecase mapping, field 14 */
1159     if(fields[14][0]!=fields[14][1]) {
1160         value=strtoul(fields[14][0], &end, 16);
1161         if(end!=fields[14][1]) {
1162             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1163             return;
1164         }
1165         if((UChar32)value!=u_totitle(c)) {
1166             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1167         }
1168     } else {
1169         /* no case mapping: the API must map the code point to itself */
1170         if(c!=u_totitle(c)) {
1171             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1172         }
1173     }
1174 }
1175 
1176 static UBool U_CALLCONV
enumTypeRange(const void * context,UChar32 start,UChar32 limit,UCharCategory type)1177 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1178     static const UChar32 test[][2]={
1179         {0x41, U_UPPERCASE_LETTER},
1180         {0x308, U_NON_SPACING_MARK},
1181         {0xfffe, U_GENERAL_OTHER_TYPES},
1182         {0xe0041, U_FORMAT_CHAR},
1183         {0xeffff, U_UNASSIGNED}
1184     };
1185 
1186     int32_t i, count;
1187 
1188     if(0!=strcmp((const char *)context, "a1")) {
1189         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1190         return FALSE;
1191     }
1192 
1193     count=UPRV_LENGTHOF(test);
1194     for(i=0; i<count; ++i) {
1195         if(start<=test[i][0] && test[i][0]<limit) {
1196             if(type!=(UCharCategory)test[i][1]) {
1197                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1198                         start, limit, (long)type, test[i][0], test[i][1]);
1199             }
1200             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1201             return i==(count-1) ? FALSE : TRUE;
1202         }
1203     }
1204 
1205     if(start>test[count-1][0]) {
1206         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1207                 start, limit, (long)type);
1208         return FALSE;
1209     }
1210 
1211     return TRUE;
1212 }
1213 
1214 static UBool U_CALLCONV
enumDefaultsRange(const void * context,UChar32 start,UChar32 limit,UCharCategory type)1215 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1216     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1217     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1218         { 0x0590, U_LEFT_TO_RIGHT },
1219         { 0x0600, U_RIGHT_TO_LEFT },
1220         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1221         { 0x0860, U_RIGHT_TO_LEFT },
1222         { 0x0870, U_RIGHT_TO_LEFT_ARABIC },  // Unicode 10 changes U+0860..U+086F from R to AL.
1223         { 0x08A0, U_RIGHT_TO_LEFT },
1224         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1225         { 0x20A0, U_LEFT_TO_RIGHT },
1226         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1227         { 0xFB1D, U_LEFT_TO_RIGHT },
1228         { 0xFB50, U_RIGHT_TO_LEFT },
1229         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1230         { 0xFE70, U_LEFT_TO_RIGHT },
1231         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1232         { 0x10800, U_LEFT_TO_RIGHT },
1233         { 0x11000, U_RIGHT_TO_LEFT },
1234         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1235         { 0x1EE00, U_RIGHT_TO_LEFT },
1236         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1237         { 0x1F000, U_RIGHT_TO_LEFT },
1238         { 0x110000, U_LEFT_TO_RIGHT }
1239     };
1240 
1241     UChar32 c;
1242     int32_t i;
1243     UCharDirection shouldBeDir;
1244 
1245     /*
1246      * LineBreak.txt specifies:
1247      *   #  - Assigned characters that are not listed explicitly are given the value
1248      *   #    "AL".
1249      *   #  - Unassigned characters are given the value "XX".
1250      *
1251      * PUA characters are listed explicitly with "XX".
1252      * Verify that no assigned character has "XX".
1253      */
1254     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1255         c=start;
1256         while(c<limit) {
1257             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1258                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1259             }
1260             ++c;
1261         }
1262     }
1263 
1264     /*
1265      * Verify default Bidi classes.
1266      * See DerivedBidiClass.txt, especially for unassigned code points.
1267      */
1268     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1269         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1270         c=start;
1271         for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
1272             if((int32_t)c<defaultBidi[i][0]) {
1273                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1274                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1275                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1276                     } else {
1277                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1278                     }
1279 
1280                     if( u_charDirection(c)!=shouldBeDir ||
1281                         u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1282                     ) {
1283                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1284                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1285                     }
1286                     ++c;
1287                 }
1288             }
1289         }
1290     }
1291 
1292     return TRUE;
1293 }
1294 
1295 /* tests for several properties */
TestUnicodeData()1296 static void TestUnicodeData()
1297 {
1298     UVersionInfo expectVersionArray;
1299     UVersionInfo versionArray;
1300     char *fields[15][2];
1301     UErrorCode errorCode;
1302     UChar32 c;
1303     int8_t type;
1304 
1305     UnicodeDataContext context;
1306 
1307     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1308     u_getUnicodeVersion(versionArray);
1309     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1310     {
1311         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1312         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1313     }
1314 
1315 #if defined(ICU_UNICODE_VERSION)
1316     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1317     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1318     {
1319          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1320     }
1321 #endif
1322 
1323     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1324         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1325     }
1326 
1327     errorCode=U_ZERO_ERROR;
1328 #if !UCONFIG_NO_NORMALIZATION
1329     context.nfc=unorm2_getNFCInstance(&errorCode);
1330     context.nfkc=unorm2_getNFKCInstance(&errorCode);
1331     if(U_FAILURE(errorCode)) {
1332         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1333         return;
1334     }
1335 #endif
1336     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1337     if(U_FAILURE(errorCode)) {
1338         return; /* if we couldn't parse UnicodeData.txt, we should return */
1339     }
1340 
1341     /* sanity check on repeated properties */
1342     for(c=0xfffe; c<=0x10ffff;) {
1343         type=u_charType(c);
1344         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1345             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1346         }
1347         if(type!=U_UNASSIGNED) {
1348             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1349         }
1350         if((c&0xffff)==0xfffe) {
1351             ++c;
1352         } else {
1353             c+=0xffff;
1354         }
1355     }
1356 
1357     /* test that PUA is not "unassigned" */
1358     for(c=0xe000; c<=0x10fffd;) {
1359         type=u_charType(c);
1360         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1361             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1362         }
1363         if(type==U_UNASSIGNED) {
1364             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1365         } else if(type!=U_PRIVATE_USE_CHAR) {
1366             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1367         }
1368         if(c==0xf8ff) {
1369             c=0xf0000;
1370         } else if(c==0xffffd) {
1371             c=0x100000;
1372         } else {
1373             ++c;
1374         }
1375     }
1376 
1377     /* test u_enumCharTypes() */
1378     u_enumCharTypes(enumTypeRange, "a1");
1379 
1380     /* check default properties */
1381     u_enumCharTypes(enumDefaultsRange, NULL);
1382 }
1383 
TestCodeUnit()1384 static void TestCodeUnit(){
1385     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1386 
1387     int32_t i;
1388 
1389     for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
1390         UChar c=codeunit[i];
1391         if(i<4){
1392             if(!(U16_IS_SINGLE(c)) || (U16_IS_LEAD(c)) || (U16_IS_TRAIL(c)) ||
1393                     U16_IS_SURROGATE(c) || U_IS_SURROGATE(c)) {
1394                 log_err("ERROR: U+%04x is a single", c);
1395             }
1396 
1397         }
1398         if(i >= 4 && i< 8){
1399             if(!(U16_IS_LEAD(c)) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c) ||
1400                     !U16_IS_SURROGATE(c) || !U_IS_SURROGATE(c)){
1401                 log_err("ERROR: U+%04x is a first surrogate", c);
1402             }
1403         }
1404         if(i >= 8 && i< 12){
1405             if(!(U16_IS_TRAIL(c)) || U16_IS_SINGLE(c) || U16_IS_LEAD(c) ||
1406                     !U16_IS_SURROGATE(c) || !U_IS_SURROGATE(c)){
1407                 log_err("ERROR: U+%04x is a second surrogate", c);
1408             }
1409         }
1410 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1411         if(i<4){
1412             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1413                 log_err("ERROR: U+%04x is a single", c);
1414             }
1415 
1416         }
1417         if(i >= 4 && i< 8){
1418             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1419                 log_err("ERROR: U+%04x is a first surrogate", c);
1420             }
1421         }
1422         if(i >= 8 && i< 12){
1423             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1424                 log_err("ERROR: U+%04x is a second surrogate", c);
1425             }
1426         }
1427 #endif
1428     }
1429 }
1430 
TestCodePoint()1431 static void TestCodePoint(){
1432     const UChar32 codePoint[]={
1433         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1434         0xd800,
1435         0xdbff,
1436         0xdc00,
1437         0xdfff,
1438         0xdc04,
1439         0xd821,
1440         /*not a surrogate, valid, isUnicodeChar , not Error*/
1441         0x20ac,
1442         0xd7ff,
1443         0xe000,
1444         0xe123,
1445         0x0061,
1446         0xe065,
1447         0x20402,
1448         0x24506,
1449         0x23456,
1450         0x20402,
1451         0x10402,
1452         0x23456,
1453         /*not a surrogate, not valid, isUnicodeChar, isError */
1454         0x0015,
1455         0x009f,
1456         /*not a surrogate, not valid, not isUnicodeChar, isError */
1457         0xffff,
1458         0xfffe,
1459     };
1460     int32_t i;
1461     for(i=0; i<UPRV_LENGTHOF(codePoint); i++) {
1462         UChar32 c=codePoint[i];
1463         if(i<6) {
1464             if(!U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)) {
1465                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1466             }
1467             if(U_IS_UNICODE_CHAR(c)) {
1468                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1469             }
1470         } else if(i >=6 && i<18) {
1471             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1472                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1473             }
1474             if(!U_IS_UNICODE_CHAR(c)) {
1475                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1476             }
1477         } else if(i >=18 && i<20) {
1478             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1479                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1480             }
1481             if(!U_IS_UNICODE_CHAR(c)) {
1482                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1483             }
1484         } else if(i >=18 && i<UPRV_LENGTHOF(codePoint)) {
1485             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1486                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1487             }
1488             if(U_IS_UNICODE_CHAR(c)) {
1489                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1490             }
1491         }
1492 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1493         if(i<6){
1494             if(!UTF_IS_SURROGATE(c)){
1495                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1496             }
1497             if(UTF_IS_VALID(c)){
1498                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1499             }
1500             if(UTF_IS_UNICODE_CHAR(c)){
1501                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1502             }
1503             if(UTF_IS_ERROR(c)){
1504                 log_err("ERROR: isError() failed for U+%04x\n", c);
1505             }
1506         }else if(i >=6 && i<18){
1507             if(UTF_IS_SURROGATE(c)){
1508                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1509             }
1510             if(!UTF_IS_VALID(c)){
1511                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1512             }
1513             if(!UTF_IS_UNICODE_CHAR(c)){
1514                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1515             }
1516             if(UTF_IS_ERROR(c)){
1517                 log_err("ERROR: isError() failed for U+%04x\n", c);
1518             }
1519         }else if(i >=18 && i<20){
1520             if(UTF_IS_SURROGATE(c)){
1521                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1522             }
1523             if(UTF_IS_VALID(c)){
1524                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1525             }
1526             if(!UTF_IS_UNICODE_CHAR(c)){
1527                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1528             }
1529             if(!UTF_IS_ERROR(c)){
1530                 log_err("ERROR: isError() failed for U+%04x\n", c);
1531             }
1532         }
1533         else if(i >=18 && i<UPRV_LENGTHOF(codePoint)){
1534             if(UTF_IS_SURROGATE(c)){
1535                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1536             }
1537             if(UTF_IS_VALID(c)){
1538                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1539             }
1540             if(UTF_IS_UNICODE_CHAR(c)){
1541                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1542             }
1543             if(!UTF_IS_ERROR(c)){
1544                 log_err("ERROR: isError() failed for U+%04x\n", c);
1545             }
1546         }
1547 #endif
1548     }
1549 
1550     if(
1551         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1552         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1553         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1554         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1555     ) {
1556         log_err("error with U_IS_BMP()\n");
1557     }
1558 
1559     if(
1560         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1561         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1562         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1563         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1564     ) {
1565         log_err("error with U_IS_SUPPLEMENTARY()\n");
1566     }
1567 }
1568 
TestCharLength()1569 static void TestCharLength()
1570 {
1571     const int32_t codepoint[]={
1572         1, 0x0061,
1573         1, 0xe065,
1574         1, 0x20ac,
1575         2, 0x20402,
1576         2, 0x23456,
1577         2, 0x24506,
1578         2, 0x20402,
1579         2, 0x10402,
1580         1, 0xd7ff,
1581         1, 0xe000
1582     };
1583 
1584     int32_t i;
1585 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1586     UBool multiple;
1587 #endif
1588     for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
1589         UChar32 c=codepoint[i+1];
1590         if(
1591 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1592                 UTF_CHAR_LENGTH(c) != codepoint[i] ||
1593 #endif
1594                 U16_LENGTH(c) != codepoint[i]) {
1595             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1596         }
1597 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1598         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1599         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1600             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1601         }
1602 #endif
1603     }
1604 }
1605 
1606 /*internal functions ----*/
MakeProp(char * str)1607 static int32_t MakeProp(char* str)
1608 {
1609     int32_t result = 0;
1610     char* matchPosition =0;
1611 
1612     matchPosition = strstr(tagStrings, str);
1613     if (matchPosition == 0)
1614     {
1615         log_err("unrecognized type letter ");
1616         log_err(str);
1617     }
1618     else
1619         result = (int32_t)((matchPosition - tagStrings) / 2);
1620     return result;
1621 }
1622 
MakeDir(char * str)1623 static int32_t MakeDir(char* str)
1624 {
1625     int32_t pos = 0;
1626     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1627         if (strcmp(str, dirStrings[pos]) == 0) {
1628             return pos;
1629         }
1630     }
1631     return -1;
1632 }
1633 
1634 /* test u_charName() -------------------------------------------------------- */
1635 
1636 static const struct {
1637     uint32_t code;
1638     const char *name, *oldName, *extName, *alias;
1639 } names[]={
1640     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1641     {0x01a2, "LATIN CAPITAL LETTER OI", "",
1642              "LATIN CAPITAL LETTER OI",
1643              "LATIN CAPITAL LETTER GHA"},
1644     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1645              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1646     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1647              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1648              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1649     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1650     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1651     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1652     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1653     {0xd800, "", "", "<lead surrogate-D800>" },
1654     {0xdc00, "", "", "<trail surrogate-DC00>" },
1655     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1656     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1657     {0xffff, "", "", "<noncharacter-FFFF>" },
1658     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1659               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1660               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1661     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1662 };
1663 
1664 static UBool
enumCharNamesFn(void * context,UChar32 code,UCharNameChoice nameChoice,const char * name,int32_t length)1665 enumCharNamesFn(void *context,
1666                 UChar32 code, UCharNameChoice nameChoice,
1667                 const char *name, int32_t length) {
1668     int32_t *pCount=(int32_t *)context;
1669     const char *expected;
1670     int i;
1671 
1672     if(length<=0 || length!=(int32_t)strlen(name)) {
1673         /* should not be called with an empty string or invalid length */
1674         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1675         return TRUE;
1676     }
1677 
1678     ++*pCount;
1679     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1680         if(code==(UChar32)names[i].code) {
1681             switch (nameChoice) {
1682                 case U_EXTENDED_CHAR_NAME:
1683                     if(0!=strcmp(name, names[i].extName)) {
1684                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1685                     }
1686                     break;
1687                 case U_UNICODE_CHAR_NAME:
1688                     if(0!=strcmp(name, names[i].name)) {
1689                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1690                     }
1691                     break;
1692                 case U_UNICODE_10_CHAR_NAME:
1693                     expected=names[i].oldName;
1694                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
1695                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1696                     }
1697                     break;
1698                 case U_CHAR_NAME_ALIAS:
1699                     expected=names[i].alias;
1700                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1701                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1702                     }
1703                     break;
1704                 case U_CHAR_NAME_CHOICE_COUNT:
1705                     break;
1706             }
1707             break;
1708         }
1709     }
1710     return TRUE;
1711 }
1712 
1713 struct enumExtCharNamesContext {
1714     uint32_t length;
1715     int32_t last;
1716 };
1717 
1718 static UBool
enumExtCharNamesFn(void * context,UChar32 code,UCharNameChoice nameChoice,const char * name,int32_t length)1719 enumExtCharNamesFn(void *context,
1720                 UChar32 code, UCharNameChoice nameChoice,
1721                 const char *name, int32_t length) {
1722     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1723 
1724     if (ecncp->last != (int32_t) code - 1) {
1725         if (ecncp->last < 0) {
1726             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1727         } else {
1728             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1729         }
1730     }
1731     ecncp->last = (int32_t) code;
1732 
1733     if (!*name) {
1734         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1735     }
1736 
1737     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1738 }
1739 
1740 /**
1741  * This can be made more efficient by moving it into putil.c and having
1742  * it directly access the ebcdic translation tables.
1743  * TODO: If we get this method in putil.c, then delete it from here.
1744  */
1745 static UChar
u_charToUChar(char c)1746 u_charToUChar(char c) {
1747     UChar uc;
1748     u_charsToUChars(&c, &uc, 1);
1749     return uc;
1750 }
1751 
1752 static void
TestCharNames()1753 TestCharNames() {
1754     static char name[80];
1755     UErrorCode errorCode=U_ZERO_ERROR;
1756     struct enumExtCharNamesContext extContext;
1757     const char *expected;
1758     int32_t length;
1759     UChar32 c;
1760     int32_t i;
1761 
1762     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1763     length=uprv_getMaxCharNameLength();
1764     if(length==0) {
1765         /* no names data available */
1766         return;
1767     }
1768     if(length<83) { /* Unicode 3.2 max char name length */
1769         log_err("uprv_getMaxCharNameLength()=%d is too short");
1770     }
1771     /* ### TODO same tests for max ISO comment length as for max name length */
1772 
1773     log_verbose("Testing u_charName()\n");
1774     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1775         /* modern Unicode character name */
1776         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1777         if(U_FAILURE(errorCode)) {
1778             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1779             return;
1780         }
1781         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1782             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1783         }
1784 
1785         /* find the modern name */
1786         if (*names[i].name) {
1787             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1788             if(U_FAILURE(errorCode)) {
1789                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1790                 return;
1791             }
1792             if(c!=(UChar32)names[i].code) {
1793                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1794             }
1795         }
1796 
1797         /* Unicode 1.0 character name */
1798         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1799         if(U_FAILURE(errorCode)) {
1800             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1801             return;
1802         }
1803         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1804             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1805         }
1806 
1807         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1808         if(names[i].oldName[0]!=0 /* && length>0 */) {
1809             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1810             if(U_FAILURE(errorCode)) {
1811                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1812                 return;
1813             }
1814             if(c!=(UChar32)names[i].code) {
1815                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1816             }
1817         }
1818 
1819         /* Unicode character name alias */
1820         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1821         if(U_FAILURE(errorCode)) {
1822             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1823             return;
1824         }
1825         expected=names[i].alias;
1826         if(expected==NULL) {
1827             expected="";
1828         }
1829         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1830             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1831                     names[i].code, name, length, expected);
1832         }
1833 
1834         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1835         if(expected[0]!=0 /* && length>0 */) {
1836             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1837             if(U_FAILURE(errorCode)) {
1838                 log_err("u_charFromName(%s - alias) error %s\n",
1839                         expected, u_errorName(errorCode));
1840                 return;
1841             }
1842             if(c!=(UChar32)names[i].code) {
1843                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1844                         expected, c, names[i].code);
1845             }
1846         }
1847     }
1848 
1849     /* test u_enumCharNames() */
1850     length=0;
1851     errorCode=U_ZERO_ERROR;
1852     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1853     if(U_FAILURE(errorCode) || length<94140) {
1854         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1855     }
1856 
1857     extContext.length = 0;
1858     extContext.last = -1;
1859     errorCode=U_ZERO_ERROR;
1860     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1861     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1862         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1863     }
1864 
1865     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1866     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1867         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1868     }
1869 
1870     /* Test getCharNameCharacters */
1871     if(!getTestOption(QUICK_OPTION)) {
1872         enum { BUFSIZE = 256 };
1873         UErrorCode ec = U_ZERO_ERROR;
1874         char buf[BUFSIZE];
1875         int32_t maxLength;
1876         UChar32 cp;
1877         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1878         int32_t l1, l2;
1879         UBool map[256];
1880         UBool ok;
1881 
1882         USet* set = uset_open(1, 0); /* empty set */
1883         USet* dumb = uset_open(1, 0); /* empty set */
1884 
1885         /*
1886          * uprv_getCharNameCharacters() will likely return more lowercase
1887          * letters than actual character names contain because
1888          * it includes all the characters in lowercased names of
1889          * general categories, for the full possible set of extended names.
1890          */
1891         {
1892             USetAdder sa={
1893                 NULL,
1894                 uset_add,
1895                 uset_addRange,
1896                 uset_addString,
1897                 NULL /* don't need remove() */
1898             };
1899             sa.set=set;
1900             uprv_getCharNameCharacters(&sa);
1901         }
1902 
1903         /* build set the dumb (but sure-fire) way */
1904         for (i=0; i<256; ++i) {
1905             map[i] = FALSE;
1906         }
1907 
1908         maxLength=0;
1909         for (cp=0; cp<0x110000; ++cp) {
1910             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1911                                      buf, BUFSIZE, &ec);
1912             if (U_FAILURE(ec)) {
1913                 log_err("FAIL: u_charName failed when it shouldn't\n");
1914                 uset_close(set);
1915                 uset_close(dumb);
1916                 return;
1917             }
1918             if(len>maxLength) {
1919                 maxLength=len;
1920             }
1921 
1922             for (i=0; i<len; ++i) {
1923                 if (!map[(uint8_t) buf[i]]) {
1924                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1925                     map[(uint8_t) buf[i]] = TRUE;
1926                 }
1927             }
1928 
1929             /* test for leading/trailing whitespace */
1930             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1931                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1932             }
1933         }
1934 
1935         if(map[(uint8_t)'\t']) {
1936             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1937         }
1938 
1939         length=uprv_getMaxCharNameLength();
1940         if(length!=maxLength) {
1941             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1942                     length, maxLength);
1943         }
1944 
1945         /* compare the sets.  Where is my uset_equals?!! */
1946         ok=TRUE;
1947         for(i=0; i<256; ++i) {
1948             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1949                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1950                     /* ignore lowercase a-z that are in set but not in dumb */
1951                     ok=TRUE;
1952                 } else {
1953                     ok=FALSE;
1954                     break;
1955                 }
1956             }
1957         }
1958 
1959         l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1960         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1961         if (U_FAILURE(ec)) {
1962             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1963             uset_close(set);
1964             uset_close(dumb);
1965             return;
1966         }
1967 
1968         if (l1 >= BUFSIZE) {
1969             l1 = BUFSIZE-1;
1970             pat[l1] = 0;
1971         }
1972         if (l2 >= BUFSIZE) {
1973             l2 = BUFSIZE-1;
1974             dumbPat[l2] = 0;
1975         }
1976 
1977         if (!ok) {
1978             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1979                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1980         } else if(getTestOption(VERBOSITY_OPTION)) {
1981             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1982         }
1983 
1984         uset_close(set);
1985         uset_close(dumb);
1986     }
1987 
1988     /* ### TODO: test error cases and other interesting things */
1989 }
1990 
1991 static void
TestUCharFromNameUnderflow()1992 TestUCharFromNameUnderflow() {
1993     // Ticket #10889: Underflow crash when there is no dash.
1994     UErrorCode errorCode=U_ZERO_ERROR;
1995     UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
1996     if(U_SUCCESS(errorCode)) {
1997         log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1998     }
1999 
2000     // Test related edge cases.
2001     errorCode=U_ZERO_ERROR;
2002     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
2003     if(U_SUCCESS(errorCode)) {
2004         log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
2005     }
2006 
2007     errorCode=U_ZERO_ERROR;
2008     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
2009     if(U_SUCCESS(errorCode)) {
2010         log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
2011     }
2012 
2013     errorCode=U_ZERO_ERROR;
2014     c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
2015     if(U_SUCCESS(errorCode)) {
2016         log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
2017     }
2018 }
2019 
2020 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
2021 
2022 static void
TestMirroring()2023 TestMirroring() {
2024     USet *set;
2025     UErrorCode errorCode;
2026 
2027     UChar32 start, end, c2, c3;
2028     int32_t i;
2029 
2030     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
2031 
2032     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
2033 
2034     log_verbose("Testing u_isMirrored()\n");
2035     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
2036          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
2037         )
2038     ) {
2039         log_err("u_isMirrored() does not work correctly\n");
2040     }
2041 
2042     log_verbose("Testing u_charMirror()\n");
2043     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
2044          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
2045          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
2046          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2047          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
2048          )
2049     ) {
2050         log_err("u_charMirror() does not work correctly\n");
2051     }
2052 
2053     /* verify that Bidi_Mirroring_Glyph roundtrips */
2054     errorCode=U_ZERO_ERROR;
2055     set=uset_openPattern(mirroredPattern, 17, &errorCode);
2056 
2057     if (U_FAILURE(errorCode)) {
2058         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
2059     } else {
2060         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
2061             do {
2062                 c2=u_charMirror(start);
2063                 c3=u_charMirror(c2);
2064                 if(c3!=start) {
2065                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
2066                 }
2067                 c3=u_getBidiPairedBracket(start);
2068                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
2069                     if(c3!=start) {
2070                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
2071                                 (long)start);
2072                     }
2073                 } else {
2074                     if(c3!=c2) {
2075                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
2076                                 (long)start, (long)c2);
2077                     }
2078                 }
2079             } while(++start<=end);
2080         }
2081     }
2082 
2083     uset_close(set);
2084 }
2085 
2086 
2087 struct RunTestData
2088 {
2089     const char *runText;
2090     UScriptCode runCode;
2091 };
2092 
2093 typedef struct RunTestData RunTestData;
2094 
2095 static void
CheckScriptRuns(UScriptRun * scriptRun,int32_t * runStarts,const RunTestData * testData,int32_t nRuns,const char * prefix)2096 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2097                 const char *prefix)
2098 {
2099     int32_t run, runStart, runLimit;
2100     UScriptCode runCode;
2101 
2102     /* iterate over all the runs */
2103     run = 0;
2104     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2105         if (runStart != runStarts[run]) {
2106             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2107                 prefix, run, runStarts[run], runStart);
2108         }
2109 
2110         if (runLimit != runStarts[run + 1]) {
2111             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2112                 prefix, run, runStarts[run + 1], runLimit);
2113         }
2114 
2115         if (runCode != testData[run].runCode) {
2116             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2117                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2118         }
2119 
2120         run += 1;
2121 
2122         /* stop when we've seen all the runs we expect to see */
2123         if (run >= nRuns) {
2124             break;
2125         }
2126     }
2127 
2128     /* Complain if we didn't see then number of runs we expected */
2129     if (run != nRuns) {
2130         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2131     }
2132 }
2133 
2134 static void
TestUScriptRunAPI()2135 TestUScriptRunAPI()
2136 {
2137     static const RunTestData testData1[] = {
2138         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2139         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2140         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2141         {"English (", USCRIPT_LATIN},
2142         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2143         {") ", USCRIPT_LATIN},
2144         {"\\u6F22\\u5B75", USCRIPT_HAN},
2145         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2146         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2147         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2148     };
2149 
2150     static const RunTestData testData2[] = {
2151        {"((((((((((abc))))))))))", USCRIPT_LATIN}
2152     };
2153 
2154     static const struct {
2155       const RunTestData *testData;
2156       int32_t nRuns;
2157     } testDataEntries[] = {
2158         {testData1, UPRV_LENGTHOF(testData1)},
2159         {testData2, UPRV_LENGTHOF(testData2)}
2160     };
2161 
2162     static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
2163     int32_t testEntry;
2164 
2165     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2166         UChar testString[1024];
2167         int32_t runStarts[256];
2168         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2169         const RunTestData *testData = testDataEntries[testEntry].testData;
2170 
2171         int32_t run, stringLimit;
2172         UScriptRun *scriptRun = NULL;
2173         UErrorCode err;
2174 
2175         /*
2176          * Fill in the test string and the runStarts array.
2177          */
2178         stringLimit = 0;
2179         for (run = 0; run < nTestRuns; run += 1) {
2180             runStarts[run] = stringLimit;
2181             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2182             /*stringLimit -= 1;*/
2183         }
2184 
2185         /* The limit of the last run */
2186         runStarts[nTestRuns] = stringLimit;
2187 
2188         /*
2189          * Make sure that calling uscript_OpenRun with a NULL text pointer
2190          * and a non-zero text length returns the correct error.
2191          */
2192         err = U_ZERO_ERROR;
2193         scriptRun = uscript_openRun(NULL, stringLimit, &err);
2194 
2195         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2196             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2197         }
2198 
2199         if (scriptRun != NULL) {
2200             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2201             uscript_closeRun(scriptRun);
2202         }
2203 
2204         /*
2205          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2206          * and a zero text length returns the correct error.
2207          */
2208         err = U_ZERO_ERROR;
2209         scriptRun = uscript_openRun(testString, 0, &err);
2210 
2211         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2212             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2213         }
2214 
2215         if (scriptRun != NULL) {
2216             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2217             uscript_closeRun(scriptRun);
2218         }
2219 
2220         /*
2221          * Make sure that calling uscript_openRun with a NULL text pointer
2222          * and a zero text length doesn't return an error.
2223          */
2224         err = U_ZERO_ERROR;
2225         scriptRun = uscript_openRun(NULL, 0, &err);
2226 
2227         if (U_FAILURE(err)) {
2228             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2229         }
2230 
2231         /* Make sure that the empty iterator doesn't find any runs */
2232         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2233             log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2234         }
2235 
2236         /*
2237          * Make sure that calling uscript_setRunText with a NULL text pointer
2238          * and a non-zero text length returns the correct error.
2239          */
2240         err = U_ZERO_ERROR;
2241         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2242 
2243         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2244             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2245         }
2246 
2247         /*
2248          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2249          * and a zero text length returns the correct error.
2250          */
2251         err = U_ZERO_ERROR;
2252         uscript_setRunText(scriptRun, testString, 0, &err);
2253 
2254         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2255             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2256         }
2257 
2258         /*
2259          * Now call uscript_setRunText on the empty iterator
2260          * and make sure that it works.
2261          */
2262         err = U_ZERO_ERROR;
2263         uscript_setRunText(scriptRun, testString, stringLimit, &err);
2264 
2265         if (U_FAILURE(err)) {
2266             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2267         } else {
2268             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2269         }
2270 
2271         uscript_closeRun(scriptRun);
2272 
2273         /*
2274          * Now open an interator over the testString
2275          * using uscript_openRun and make sure that it works
2276          */
2277         scriptRun = uscript_openRun(testString, stringLimit, &err);
2278 
2279         if (U_FAILURE(err)) {
2280             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2281         } else {
2282             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2283         }
2284 
2285         /* Now reset the iterator, and make sure
2286          * that it still works.
2287          */
2288         uscript_resetRun(scriptRun);
2289 
2290         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2291 
2292         /* Close the iterator */
2293         uscript_closeRun(scriptRun);
2294     }
2295 }
2296 
2297 /* test additional, non-core properties */
2298 static void
TestAdditionalProperties()2299 TestAdditionalProperties() {
2300     /* test data for u_charAge() */
2301     static const struct {
2302         UChar32 c;
2303         UVersionInfo version;
2304     } charAges[]={
2305         {0x41,    { 1, 1, 0, 0 }},
2306         {0xffff,  { 1, 1, 0, 0 }},
2307         {0x20ab,  { 2, 0, 0, 0 }},
2308         {0x2fffe, { 2, 0, 0, 0 }},
2309         {0x20ac,  { 2, 1, 0, 0 }},
2310         {0xfb1d,  { 3, 0, 0, 0 }},
2311         {0x3f4,   { 3, 1, 0, 0 }},
2312         {0x10300, { 3, 1, 0, 0 }},
2313         {0x220,   { 3, 2, 0, 0 }},
2314         {0xff60,  { 3, 2, 0, 0 }}
2315     };
2316 
2317     /* test data for u_hasBinaryProperty() */
2318     static const int32_t
2319     props[][3]={ /* code point, property, value */
2320         { 0x0627, UCHAR_ALPHABETIC, TRUE },
2321         { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2322         { 0x2028, UCHAR_ALPHABETIC, FALSE },
2323 
2324         { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2325         { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2326 
2327         { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2328         { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2329 
2330         { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2331         { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2332 
2333         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2334         { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2335         { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2336         { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2337         { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2338 
2339         { 0x058a, UCHAR_DASH, TRUE },
2340         { 0x007e, UCHAR_DASH, FALSE },
2341 
2342         { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2343         { 0x3000, UCHAR_DIACRITIC, FALSE },
2344 
2345         { 0x0e46, UCHAR_EXTENDER, TRUE },
2346         { 0x0020, UCHAR_EXTENDER, FALSE },
2347 
2348 #if !UCONFIG_NO_NORMALIZATION
2349         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2350         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2351         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2352 
2353         { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2354         { 0x0308, UCHAR_NFD_INERT, FALSE },
2355 
2356         { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2357         { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2358 
2359         { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2360         { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2361         { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2362         { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2363         { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2364         { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2365 
2366         { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2367         { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2368 
2369         { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2370         { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2371         { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2372         { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2373         { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2374         { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2375 #endif
2376 
2377         { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2378         { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2379         { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2380 
2381         { 0x30fb, UCHAR_HYPHEN, TRUE },
2382         { 0xfe58, UCHAR_HYPHEN, FALSE },
2383 
2384         { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2385         { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2386         { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2387 
2388         { 0x2172, UCHAR_ID_START, TRUE },
2389         { 0x007a, UCHAR_ID_START, TRUE },
2390         { 0x0039, UCHAR_ID_START, FALSE },
2391 
2392         { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2393         { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2394         { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2395 
2396         { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2397         { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2398 
2399         { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2400         { 0x0345, UCHAR_LOWERCASE, TRUE },
2401         { 0x0030, UCHAR_LOWERCASE, FALSE },
2402 
2403         { 0x1d7a9, UCHAR_MATH, TRUE },
2404         { 0x2135, UCHAR_MATH, TRUE },
2405         { 0x0062, UCHAR_MATH, FALSE },
2406 
2407         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2408         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2409         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2410 
2411         { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2412         { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2413         { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2414 
2415         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2416         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2417 
2418         { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2419         { 0x2162, UCHAR_UPPERCASE, TRUE },
2420         { 0x0345, UCHAR_UPPERCASE, FALSE },
2421 
2422         { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2423         { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2424         { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2425 
2426         { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2427         { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2428         { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2429 
2430         { 0x16ee, UCHAR_XID_START, TRUE },
2431         { 0x23456, UCHAR_XID_START, TRUE },
2432         { 0x1d1aa, UCHAR_XID_START, FALSE },
2433 
2434         /*
2435          * Version break:
2436          * The following properties are only supported starting with the
2437          * Unicode version indicated in the second field.
2438          */
2439         { -1, 0x320, 0 },
2440 
2441         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2442         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2443         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2444 
2445         { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2446         { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2447         { 0xe0001, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2448         { 0xe0100, UCHAR_DEPRECATED, FALSE },
2449 
2450         { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2451         { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2452         { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2453         { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2454 
2455         { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2456         { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2457         { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2458         { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2459 
2460         { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2461         { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2462 
2463         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2464         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2465 
2466         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2467         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2468 
2469         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2470         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2471 
2472         { 0x2e9b, UCHAR_RADICAL, TRUE },
2473         { 0x4e00, UCHAR_RADICAL, FALSE },
2474 
2475         { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2476         { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2477 
2478         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2479         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2480 
2481         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2482 
2483         { 0x002e, UCHAR_S_TERM, TRUE },
2484         { 0x0061, UCHAR_S_TERM, FALSE },
2485 
2486         { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2487         { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2488         { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2489         { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2490 
2491         /* enum/integer type properties */
2492 
2493         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2494         /* test default Bidi classes for unassigned code points */
2495         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2496         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2497         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2498         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2499         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2500         { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2501         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2502         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2503         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2504         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2505         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2506 
2507         { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2508         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2509         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2510         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2511         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2512         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2513         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2514 
2515         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2516         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2517         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2518         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2519         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2520         { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2521         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2522         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2523         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2524         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2525         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2526 
2527         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2528         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2529 
2530         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2531         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2532         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2533         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2534         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2535         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2536         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2537         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2538         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2539 
2540         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2541         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2542         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2543         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2544         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2545         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2546         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2547         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2548         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2549         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2550         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2551         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2552         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2553         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2554         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2555         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2556         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2557 
2558         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2559         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2560         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2561 
2562         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2563         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2564         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2565         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2566         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2567 
2568         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2569         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2570         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2571         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2572         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2573         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2574         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2575         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2576 
2577         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2578         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2579         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2580         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2581         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2582         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2583         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2584         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2585         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2586         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2587         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2588         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2589         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2590         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2591         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2592         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2593 
2594         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2595 
2596         /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2597 
2598         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2599         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2600         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2601         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2602         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2603         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2604         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2605 
2606         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2607         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2608         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2609         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2610 
2611         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2612         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2613         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2614         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2615         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2616         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2617 
2618         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2619         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2620         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2621         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2622 
2623         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2624         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2625         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2626         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2627         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2628         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2629         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2630 
2631         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2632         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2633         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2634         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2635 
2636         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2637         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2638         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2639         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2640 
2641         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2642         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2643         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2644         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2645         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2646 
2647         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2648 
2649         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2650 
2651         { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2652         { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2653         { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2654 
2655         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2656         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2657         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2658         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2659         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2660 
2661         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2662         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2663         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2664 
2665         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2666         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2667         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2668         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2669 
2670         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2671         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2672         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2673         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2674         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2675         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2676 
2677         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2678         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2679         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2680         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2681 
2682         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2683         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2684         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2685         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2686 
2687         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2688         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2689         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2690         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2691 
2692         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2693 
2694         /* unassigned code points in new default Bidi R blocks */
2695         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2696         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2697 
2698         /* test some script codes >127 */
2699         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2700         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2701         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2702 
2703         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2704 
2705         /* value changed in Unicode 6.0 */
2706         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2707 
2708         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2709 
2710         /* unassigned code points in new/changed default Bidi AL blocks */
2711         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2712         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2713 
2714         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2715 
2716         /* unassigned code points in the currency symbols block now default to ET */
2717         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2718         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2719 
2720         /* new property in Unicode 6.3 */
2721         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2722         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2723         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2724         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2725         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2726         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2727 
2728         { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
2729 
2730         /* new character range with Joining_Group values */
2731         { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2732         { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
2733         { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
2734         { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
2735         { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2736 
2737         { -1, 0xa00, 0 },  // version break for Unicode 10
2738 
2739         { 0x1F1E5, UCHAR_REGIONAL_INDICATOR, FALSE },
2740         { 0x1F1E7, UCHAR_REGIONAL_INDICATOR, TRUE },
2741         { 0x1F1FF, UCHAR_REGIONAL_INDICATOR, TRUE },
2742         { 0x1F200, UCHAR_REGIONAL_INDICATOR, FALSE },
2743 
2744         { 0x0600, UCHAR_PREPENDED_CONCATENATION_MARK, TRUE },
2745         { 0x0606, UCHAR_PREPENDED_CONCATENATION_MARK, FALSE },
2746         { 0x110BD, UCHAR_PREPENDED_CONCATENATION_MARK, TRUE },
2747 
2748         /* undefined UProperty values */
2749         { 0x61, 0x4a7, 0 },
2750         { 0x234bc, 0x15ed, 0 }
2751     };
2752 
2753     UVersionInfo version;
2754     UChar32 c;
2755     int32_t i, result, uVersion;
2756     UProperty which;
2757 
2758     /* what is our Unicode version? */
2759     u_getUnicodeVersion(version);
2760     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2761 
2762     u_charAge(0x20, version);
2763     if(version[0]==0) {
2764         /* no additional properties available */
2765         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2766         return;
2767     }
2768 
2769     /* test u_charAge() */
2770     for(i=0; i<UPRV_LENGTHOF(charAges); ++i) {
2771         u_charAge(charAges[i].c, version);
2772         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2773             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2774                 charAges[i].c,
2775                 version[0], version[1], version[2], version[3],
2776                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2777         }
2778     }
2779 
2780     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2781         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2782         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2783         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2784         u_getIntPropertyMinValue(0x2345)!=0
2785     ) {
2786         log_err("error: u_getIntPropertyMinValue() wrong\n");
2787     }
2788     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2789         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2790     }
2791     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2792         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2793     }
2794     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2795         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2796     }
2797     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2798         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2799     }
2800     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2801         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2802     }
2803     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2804         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2805     }
2806     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2807         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2808     }
2809     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2810         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2811     }
2812     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2813         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2814     }
2815     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2816         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2817     }
2818     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2819         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2820     }
2821     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2822         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2823     }
2824     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2825         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2826     }
2827     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2828         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2829     }
2830     /*JB#2410*/
2831     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2832         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2833     }
2834     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2835         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2836     }
2837     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2838         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2839     }
2840     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2841         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2842     }
2843     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2844         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2845     }
2846 
2847     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2848     for(i=0; i<UPRV_LENGTHOF(props); ++i) {
2849         const char *whichName;
2850 
2851         if(props[i][0]<0) {
2852             /* Unicode version break */
2853             if(uVersion<props[i][1]) {
2854                 break; /* do not test properties that are not yet supported */
2855             } else {
2856                 continue; /* skip this row */
2857             }
2858         }
2859 
2860         c=(UChar32)props[i][0];
2861         which=(UProperty)props[i][1];
2862         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2863 
2864         if(which<UCHAR_INT_START) {
2865             result=u_hasBinaryProperty(c, which);
2866             if(result!=props[i][2]) {
2867                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2868                         c, whichName, result, i);
2869             }
2870         }
2871 
2872         result=u_getIntPropertyValue(c, which);
2873         if(result!=props[i][2]) {
2874             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2875                     c, whichName, result, props[i][2], i);
2876         }
2877 
2878         /* test separate functions, too */
2879         switch((UProperty)props[i][1]) {
2880         case UCHAR_ALPHABETIC:
2881             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2882                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2883                         props[i][0], result, i);
2884             }
2885             break;
2886         case UCHAR_LOWERCASE:
2887             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2888                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2889                         props[i][0], result, i);
2890             }
2891             break;
2892         case UCHAR_UPPERCASE:
2893             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2894                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2895                         props[i][0], result, i);
2896             }
2897             break;
2898         case UCHAR_WHITE_SPACE:
2899             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2900                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2901                         props[i][0], result, i);
2902             }
2903             break;
2904         default:
2905             break;
2906         }
2907     }
2908 }
2909 
2910 static void
TestNumericProperties(void)2911 TestNumericProperties(void) {
2912     /* see UnicodeData.txt, DerivedNumericValues.txt */
2913     static const struct {
2914         UChar32 c;
2915         int32_t type;
2916         double numValue;
2917     } values[]={
2918         { 0x0F33, U_NT_NUMERIC, -1./2. },
2919         { 0x0C66, U_NT_DECIMAL, 0 },
2920         { 0x96f6, U_NT_NUMERIC, 0 },
2921         { 0xa833, U_NT_NUMERIC, 1./16. },
2922         { 0x2152, U_NT_NUMERIC, 1./10. },
2923         { 0x2151, U_NT_NUMERIC, 1./9. },
2924         { 0x1245f, U_NT_NUMERIC, 1./8. },
2925         { 0x2150, U_NT_NUMERIC, 1./7. },
2926         { 0x2159, U_NT_NUMERIC, 1./6. },
2927         { 0x09f6, U_NT_NUMERIC, 3./16. },
2928         { 0x2155, U_NT_NUMERIC, 1./5. },
2929         { 0x00BD, U_NT_NUMERIC, 1./2. },
2930         { 0x0031, U_NT_DECIMAL, 1. },
2931         { 0x4e00, U_NT_NUMERIC, 1. },
2932         { 0x58f1, U_NT_NUMERIC, 1. },
2933         { 0x10320, U_NT_NUMERIC, 1. },
2934         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2935         { 0x00B2, U_NT_DIGIT, 2. },
2936         { 0x5f10, U_NT_NUMERIC, 2. },
2937         { 0x1813, U_NT_DECIMAL, 3. },
2938         { 0x5f0e, U_NT_NUMERIC, 3. },
2939         { 0x2173, U_NT_NUMERIC, 4. },
2940         { 0x8086, U_NT_NUMERIC, 4. },
2941         { 0x278E, U_NT_DIGIT, 5. },
2942         { 0x1D7F2, U_NT_DECIMAL, 6. },
2943         { 0x247A, U_NT_DIGIT, 7. },
2944         { 0x7396, U_NT_NUMERIC, 9. },
2945         { 0x1372, U_NT_NUMERIC, 10. },
2946         { 0x216B, U_NT_NUMERIC, 12. },
2947         { 0x16EE, U_NT_NUMERIC, 17. },
2948         { 0x249A, U_NT_NUMERIC, 19. },
2949         { 0x303A, U_NT_NUMERIC, 30. },
2950         { 0x5345, U_NT_NUMERIC, 30. },
2951         { 0x32B2, U_NT_NUMERIC, 37. },
2952         { 0x1375, U_NT_NUMERIC, 40. },
2953         { 0x10323, U_NT_NUMERIC, 50. },
2954         { 0x0BF1, U_NT_NUMERIC, 100. },
2955         { 0x964c, U_NT_NUMERIC, 100. },
2956         { 0x217E, U_NT_NUMERIC, 500. },
2957         { 0x2180, U_NT_NUMERIC, 1000. },
2958         { 0x4edf, U_NT_NUMERIC, 1000. },
2959         { 0x2181, U_NT_NUMERIC, 5000. },
2960         { 0x137C, U_NT_NUMERIC, 10000. },
2961         { 0x4e07, U_NT_NUMERIC, 10000. },
2962         { 0x12432, U_NT_NUMERIC, 216000. },
2963         { 0x12433, U_NT_NUMERIC, 432000. },
2964         { 0x4ebf, U_NT_NUMERIC, 100000000. },
2965         { 0x5146, U_NT_NUMERIC, 1000000000000. },
2966         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2967         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2968         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2969         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2970         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2971         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2972         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2973         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2974     };
2975 
2976     double nv;
2977     UChar32 c;
2978     int32_t i, type;
2979 
2980     for(i=0; i<UPRV_LENGTHOF(values); ++i) {
2981         c=values[i].c;
2982         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2983         nv=u_getNumericValue(c);
2984 
2985         if(type!=values[i].type) {
2986             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2987         }
2988         if(0.000001 <= fabs(nv - values[i].numValue)) {
2989             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2990         }
2991     }
2992 }
2993 
2994 /**
2995  * Test the property names and property value names API.
2996  */
2997 static void
TestPropertyNames(void)2998 TestPropertyNames(void) {
2999     int32_t p, v, choice=0, rev;
3000     UBool atLeastSomething = FALSE;
3001 
3002     for (p=0; ; ++p) {
3003         UProperty propEnum = (UProperty)p;
3004         UBool sawProp = FALSE;
3005         if(p > 10 && !atLeastSomething) {
3006           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
3007           return;
3008         }
3009 
3010         for (choice=0; ; ++choice) {
3011             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
3012             if (name) {
3013                 if (!sawProp)
3014                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
3015                 log_verbose("%d=\"%s\"", choice, name);
3016                 sawProp = TRUE;
3017                 atLeastSomething = TRUE;
3018 
3019                 /* test reverse mapping */
3020                 rev = u_getPropertyEnum(name);
3021                 if (rev != p) {
3022                     log_err("Property round-trip failure: %d -> %s -> %d\n",
3023                             p, name, rev);
3024                 }
3025             }
3026             if (!name && choice>0) break;
3027         }
3028         if (sawProp) {
3029             /* looks like a valid property; check the values */
3030             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3031             int32_t max = 0;
3032             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
3033                 max = 255;
3034             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
3035                 /* it's far too slow to iterate all the way up to
3036                    the real max, U_GC_P_MASK */
3037                 max = U_GC_NL_MASK;
3038             } else if (p == UCHAR_BLOCK) {
3039                 /* UBlockCodes, unlike other values, start at 1 */
3040                 max = 1;
3041             }
3042             log_verbose("\n");
3043             for (v=-1; ; ++v) {
3044                 UBool sawValue = FALSE;
3045                 for (choice=0; ; ++choice) {
3046                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
3047                     if (vname) {
3048                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
3049                         log_verbose("%d=\"%s\"", choice, vname);
3050                         sawValue = TRUE;
3051 
3052                         /* test reverse mapping */
3053                         rev = u_getPropertyValueEnum(propEnum, vname);
3054                         if (rev != v) {
3055                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
3056                                     pname, v, vname, rev);
3057                         }
3058                     }
3059                     if (!vname && choice>0) break;
3060                 }
3061                 if (sawValue) {
3062                     log_verbose("\n");
3063                 }
3064                 if (!sawValue && v>=max) break;
3065             }
3066         }
3067         if (!sawProp) {
3068             if (p>=UCHAR_STRING_LIMIT) {
3069                 break;
3070             } else if (p>=UCHAR_DOUBLE_LIMIT) {
3071                 p = UCHAR_STRING_START - 1;
3072             } else if (p>=UCHAR_MASK_LIMIT) {
3073                 p = UCHAR_DOUBLE_START - 1;
3074             } else if (p>=UCHAR_INT_LIMIT) {
3075                 p = UCHAR_MASK_START - 1;
3076             } else if (p>=UCHAR_BINARY_LIMIT) {
3077                 p = UCHAR_INT_START - 1;
3078             }
3079         }
3080     }
3081 }
3082 
3083 /**
3084  * Test the property values API.  See JB#2410.
3085  */
3086 static void
TestPropertyValues(void)3087 TestPropertyValues(void) {
3088     int32_t i, p, min, max;
3089     UErrorCode ec;
3090 
3091     /* Min should be 0 for everything. */
3092     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
3093     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
3094         UProperty propEnum = (UProperty)p;
3095         min = u_getIntPropertyMinValue(propEnum);
3096         if (min != 0) {
3097             if (p == UCHAR_BLOCK) {
3098                 /* This is okay...for now.  See JB#2487.
3099                    TODO Update this for JB#2487. */
3100             } else {
3101                 const char* name;
3102                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3103                 if (name == NULL)
3104                     name = "<ERROR>";
3105                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3106                         name, min);
3107             }
3108         }
3109     }
3110 
3111     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3112         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3113         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3114     }
3115 
3116     /* Max should be -1 for invalid properties. */
3117     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3118     if (max != -1) {
3119         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3120                 max);
3121     }
3122 
3123     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3124     for (i=0; i<2; ++i) {
3125         int32_t script;
3126         const char* desc;
3127         ec = U_ZERO_ERROR;
3128         switch (i) {
3129         case 0:
3130             script = uscript_getScript(-1, &ec);
3131             desc = "uscript_getScript(-1)";
3132             break;
3133         case 1:
3134             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3135             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3136             break;
3137         default:
3138             log_err("Internal test error. Too many scripts\n");
3139             return;
3140         }
3141         /* We don't explicitly test ec.  It should be U_FAILURE but it
3142            isn't documented as such. */
3143         if (script != (int32_t)USCRIPT_INVALID_CODE) {
3144             log_err("FAIL: %s = %d, exp. 0\n",
3145                     desc, script);
3146         }
3147     }
3148 }
3149 
3150 /* various tests for consistency of UCD data and API behavior */
3151 static void
TestConsistency()3152 TestConsistency() {
3153     char buffer[300];
3154     USet *set1, *set2, *set3, *set4;
3155     UErrorCode errorCode;
3156 
3157     UChar32 start, end;
3158     int32_t i, length;
3159 
3160     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3161     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3162     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3163     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3164     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3165 
3166     U_STRING_DECL(mathBlocksPattern,
3167         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3168         214);
3169     U_STRING_DECL(mathPattern, "[:Math:]", 8);
3170     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3171     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3172     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3173 
3174     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3175     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3176     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3177     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3178     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3179 
3180     U_STRING_INIT(mathBlocksPattern,
3181         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3182         214);
3183     U_STRING_INIT(mathPattern, "[:Math:]", 8);
3184     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3185     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3186     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3187 
3188     /*
3189      * It used to be that UCD.html and its precursors said
3190      * "Those dashes used to mark connections between pieces of words,
3191      *  plus the Katakana middle dot."
3192      *
3193      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3194      * but not from Hyphen.
3195      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3196      * Therefore, do not show errors when testing the Hyphen property.
3197      */
3198     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3199                 "known to the UTC and not considered errors.\n");
3200 
3201     errorCode=U_ZERO_ERROR;
3202     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3203     set2=uset_openPattern(dashPattern, 8, &errorCode);
3204     if(U_SUCCESS(errorCode)) {
3205         /* remove the Katakana middle dot(s) from set1 */
3206         uset_remove(set1, 0x30fb);
3207         uset_remove(set1, 0xff65); /* halfwidth variant */
3208         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3209     } else {
3210         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3211     }
3212 
3213     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3214     set3=uset_openPattern(formatPattern, 6, &errorCode);
3215     set4=uset_openPattern(alphaPattern, 14, &errorCode);
3216     if(U_SUCCESS(errorCode)) {
3217         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3218         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3219         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3220     } else {
3221         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3222     }
3223 
3224     uset_close(set1);
3225     uset_close(set2);
3226     uset_close(set3);
3227     uset_close(set4);
3228 
3229     /*
3230      * Check that each lowercase character has "small" in its name
3231      * and not "capital".
3232      * There are some such characters, some of which seem odd.
3233      * Use the verbose flag to see these notices.
3234      */
3235     errorCode=U_ZERO_ERROR;
3236     set1=uset_openPattern(lowerPattern, 13, &errorCode);
3237     if(U_SUCCESS(errorCode)) {
3238         for(i=0;; ++i) {
3239             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3240             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3241                 break; /* done */
3242             }
3243             if(U_FAILURE(errorCode)) {
3244                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3245                         i, u_errorName(errorCode));
3246                 break;
3247             }
3248             if(length!=0) {
3249                 break; /* done with code points, got a string or -1 */
3250             }
3251 
3252             while(start<=end) {
3253                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3254                 if(U_FAILURE(errorCode)) {
3255                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3256                     errorCode=U_ZERO_ERROR;
3257                 }
3258                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3259                     strstr(buffer, "SMALL CAPITAL")==NULL
3260                 ) {
3261                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3262                 }
3263                 ++start;
3264             }
3265         }
3266     } else {
3267         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3268     }
3269     uset_close(set1);
3270 
3271     /* verify that all assigned characters in Math blocks are exactly Math characters */
3272     errorCode=U_ZERO_ERROR;
3273     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3274     set2=uset_openPattern(mathPattern, 8, &errorCode);
3275     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3276     if(U_SUCCESS(errorCode)) {
3277         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3278         uset_complement(set3);      /* assigned characters */
3279         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3280         compareUSets(set1, set2,
3281                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
3282                      TRUE);
3283     } else {
3284         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3285     }
3286     uset_close(set1);
3287     uset_close(set2);
3288     uset_close(set3);
3289 
3290     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3291     errorCode=U_ZERO_ERROR;
3292     set1=uset_openPattern(unknownPattern, 14, &errorCode);
3293     set2=uset_openPattern(reservedPattern, 20, &errorCode);
3294     if(U_SUCCESS(errorCode)) {
3295         compareUSets(set1, set2,
3296                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3297                      TRUE);
3298     } else {
3299         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3300     }
3301     uset_close(set1);
3302     uset_close(set2);
3303 }
3304 
3305 /*
3306  * Starting with ICU4C 3.4, the core Unicode properties files
3307  * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3308  * are hardcoded in the common DLL and therefore not included
3309  * in the data package any more.
3310  * Test requiring these files are disabled so that
3311  * we need not jump through hoops (like adding snapshots of these files
3312  * to testdata).
3313  * See Jitterbug 4497.
3314  */
3315 #define HARDCODED_DATA_4497 1
3316 
3317 /* API coverage for ubidi_props.c */
TestUBiDiProps()3318 static void TestUBiDiProps() {
3319 #if !HARDCODED_DATA_4497
3320     UDataMemory *pData;
3321     UBiDiProps *bdp;
3322     const UBiDiProps *cbdp;
3323     UErrorCode errorCode;
3324 
3325     /* coverage for ubidi_openBinary() */
3326     errorCode=U_ZERO_ERROR;
3327     pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3328     if(U_FAILURE(errorCode)) {
3329         log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3330                     u_errorName(errorCode));
3331         return;
3332     }
3333 
3334     bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3335     if(U_FAILURE(errorCode)) {
3336         log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3337                 u_errorName(errorCode));
3338         udata_close(pData);
3339         return;
3340     }
3341 
3342     if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3343         log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3344     }
3345 
3346     ubidi_closeProps(bdp);
3347     udata_close(pData);
3348 
3349     /* coverage for ubidi_getDummy() */
3350     errorCode=U_ZERO_ERROR;
3351     cbdp=ubidi_getDummy(&errorCode);
3352     if(ubidi_getClass(cbdp, 0x20)!=0) {
3353         log_err("ubidi_getClass(dummy, space)!=0\n");
3354     }
3355 #endif
3356 }
3357 
3358 /* test case folding, compare return values with CaseFolding.txt ------------ */
3359 
3360 /* bit set for which case foldings for a character have been tested already */
3361 enum {
3362     CF_SIMPLE=1,
3363     CF_FULL=2,
3364     CF_TURKIC=4,
3365     CF_ALL=7
3366 };
3367 
3368 static void
testFold(UChar32 c,int which,UChar32 simple,UChar32 turkic,const UChar * full,int32_t fullLength,const UChar * turkicFull,int32_t turkicFullLength)3369 testFold(UChar32 c, int which,
3370          UChar32 simple, UChar32 turkic,
3371          const UChar *full, int32_t fullLength,
3372          const UChar *turkicFull, int32_t turkicFullLength) {
3373     UChar s[2], t[32];
3374     UChar32 c2;
3375     int32_t length, length2;
3376 
3377     UErrorCode errorCode=U_ZERO_ERROR;
3378 
3379     length=0;
3380     U16_APPEND_UNSAFE(s, length, c);
3381 
3382     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3383         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3384     }
3385     if((which&CF_FULL)!=0) {
3386         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
3387         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3388             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3389         }
3390     }
3391     if((which&CF_TURKIC)!=0) {
3392         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3393             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3394         }
3395 
3396         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3397         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3398             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3399         }
3400     }
3401 }
3402 
3403 /* test that c case-folds to itself */
3404 static void
testFoldToSelf(UChar32 c,int which)3405 testFoldToSelf(UChar32 c, int which) {
3406     UChar s[2];
3407     int32_t length;
3408 
3409     length=0;
3410     U16_APPEND_UNSAFE(s, length, c);
3411     testFold(c, which, c, c, s, length, s, length);
3412 }
3413 
3414 struct CaseFoldingData {
3415     USet *notSeen;
3416     UChar32 prev, prevSimple;
3417     UChar prevFull[32];
3418     int32_t prevFullLength;
3419     int which;
3420 };
3421 typedef struct CaseFoldingData CaseFoldingData;
3422 
3423 static void U_CALLCONV
caseFoldingLineFn(void * context,char * fields[][2],int32_t fieldCount,UErrorCode * pErrorCode)3424 caseFoldingLineFn(void *context,
3425                   char *fields[][2], int32_t fieldCount,
3426                   UErrorCode *pErrorCode) {
3427     CaseFoldingData *pData=(CaseFoldingData *)context;
3428     char *end;
3429     UChar full[32];
3430     UChar32 c, prev, simple;
3431     int32_t count;
3432     int which;
3433     char status;
3434 
3435     /* get code point */
3436     const char *s=u_skipWhitespace(fields[0][0]);
3437     if(0==strncmp(s, "0000..10FFFF", 12)) {
3438         /*
3439          * Ignore the line
3440          * # @missing: 0000..10FFFF; C; <code point>
3441          * because maps-to-self is already our default, and this line breaks this parser.
3442          */
3443         return;
3444     }
3445     c=(UChar32)strtoul(s, &end, 16);
3446     end=(char *)u_skipWhitespace(end);
3447     if(end<=fields[0][0] || end!=fields[0][1]) {
3448         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3449         *pErrorCode=U_PARSE_ERROR;
3450         return;
3451     }
3452 
3453     /* get the status of this mapping */
3454     status=*u_skipWhitespace(fields[1][0]);
3455     if(status!='C' && status!='S' && status!='F' && status!='T') {
3456         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3457         *pErrorCode=U_PARSE_ERROR;
3458         return;
3459     }
3460 
3461     /* get the mapping */
3462     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3463     if(U_FAILURE(*pErrorCode)) {
3464         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3465         return;
3466     }
3467 
3468     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3469     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3470         simple=c;
3471     }
3472 
3473     if(c!=(prev=pData->prev)) {
3474         /*
3475          * Test remaining mappings for the previous code point.
3476          * If a turkic folding was not mentioned, then it should fold the same
3477          * as the regular simple case folding.
3478          */
3479         UChar prevString[2];
3480         int32_t length;
3481 
3482         length=0;
3483         U16_APPEND_UNSAFE(prevString, length, prev);
3484         testFold(prev, (~pData->which)&CF_ALL,
3485                  prev, pData->prevSimple,
3486                  prevString, length,
3487                  pData->prevFull, pData->prevFullLength);
3488         pData->prev=pData->prevSimple=c;
3489         length=0;
3490         U16_APPEND_UNSAFE(pData->prevFull, length, c);
3491         pData->prevFullLength=length;
3492         pData->which=0;
3493     }
3494 
3495     /*
3496      * Turn the status into a bit set of case foldings to test.
3497      * Remember non-Turkic case foldings as defaults for Turkic mode.
3498      */
3499     switch(status) {
3500     case 'C':
3501         which=CF_SIMPLE|CF_FULL;
3502         pData->prevSimple=simple;
3503         u_memcpy(pData->prevFull, full, count);
3504         pData->prevFullLength=count;
3505         break;
3506     case 'S':
3507         which=CF_SIMPLE;
3508         pData->prevSimple=simple;
3509         break;
3510     case 'F':
3511         which=CF_FULL;
3512         u_memcpy(pData->prevFull, full, count);
3513         pData->prevFullLength=count;
3514         break;
3515     case 'T':
3516         which=CF_TURKIC;
3517         break;
3518     default:
3519         which=0;
3520         break; /* won't happen because of test above */
3521     }
3522 
3523     testFold(c, which, simple, simple, full, count, full, count);
3524 
3525     /* remember which case foldings of c have been tested */
3526     pData->which|=which;
3527 
3528     /* remove c from the set of ones not mentioned in CaseFolding.txt */
3529     uset_remove(pData->notSeen, c);
3530 }
3531 
3532 static void
TestCaseFolding()3533 TestCaseFolding() {
3534     CaseFoldingData data={ NULL };
3535     char *fields[3][2];
3536     UErrorCode errorCode;
3537 
3538     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3539 
3540     errorCode=U_ZERO_ERROR;
3541     /* test BMP & plane 1 - nothing interesting above */
3542     data.notSeen=uset_open(0, 0x1ffff);
3543     data.prevFullLength=1; /* length of full case folding of U+0000 */
3544 
3545     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3546     if(U_SUCCESS(errorCode)) {
3547         int32_t i, start, end;
3548 
3549         /* add a pseudo-last line to finish testing of the actual last one */
3550         fields[0][0]=lastLine;
3551         fields[0][1]=lastLine+6;
3552         fields[1][0]=lastLine+7;
3553         fields[1][1]=lastLine+9;
3554         fields[2][0]=lastLine+10;
3555         fields[2][1]=lastLine+17;
3556         caseFoldingLineFn(&data, fields, 3, &errorCode);
3557 
3558         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3559         for(i=0;
3560             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3561                 U_SUCCESS(errorCode);
3562             ++i
3563         ) {
3564             do {
3565                 testFoldToSelf(start, CF_ALL);
3566             } while(++start<=end);
3567         }
3568     }
3569 
3570     uset_close(data.notSeen);
3571 }
3572