1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2016, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7
8 #include "unicode/ustring.h"
9 #include "unicode/uchar.h"
10 #include "unicode/ucpmap.h"
11 #include "unicode/uniset.h"
12 #include "unicode/putil.h"
13 #include "unicode/uscript.h"
14 #include "unicode/uset.h"
15 #include "cstring.h"
16 #include "hash.h"
17 #include "patternprops.h"
18 #include "normalizer2impl.h"
19 #include "testutil.h"
20 #include "uparse.h"
21 #include "ucdtest.h"
22
23 static const char *ignorePropNames[]={
24 "FC_NFKC",
25 "NFD_QC",
26 "NFC_QC",
27 "NFKD_QC",
28 "NFKC_QC",
29 "Expands_On_NFD",
30 "Expands_On_NFC",
31 "Expands_On_NFKD",
32 "Expands_On_NFKC",
33 "NFKC_CF"
34 };
35
UnicodeTest()36 UnicodeTest::UnicodeTest()
37 {
38 UErrorCode errorCode=U_ZERO_ERROR;
39 unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode);
40 if(U_FAILURE(errorCode)) {
41 delete unknownPropertyNames;
42 unknownPropertyNames=NULL;
43 }
44 // Ignore some property names altogether.
45 for(int32_t i=0; i<UPRV_LENGTHOF(ignorePropNames); ++i) {
46 unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV), 1, errorCode);
47 }
48 }
49
~UnicodeTest()50 UnicodeTest::~UnicodeTest()
51 {
52 delete unknownPropertyNames;
53 }
54
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)55 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
56 {
57 if(exec) {
58 logln("TestSuite UnicodeTest: ");
59 }
60 TESTCASE_AUTO_BEGIN;
61 TESTCASE_AUTO(TestAdditionalProperties);
62 TESTCASE_AUTO(TestBinaryValues);
63 TESTCASE_AUTO(TestConsistency);
64 TESTCASE_AUTO(TestPatternProperties);
65 TESTCASE_AUTO(TestScriptMetadata);
66 TESTCASE_AUTO(TestBidiPairedBracketType);
67 TESTCASE_AUTO(TestEmojiProperties);
68 TESTCASE_AUTO(TestEmojiPropertiesOfStrings);
69 TESTCASE_AUTO(TestIndicPositionalCategory);
70 TESTCASE_AUTO(TestIndicSyllabicCategory);
71 TESTCASE_AUTO(TestVerticalOrientation);
72 TESTCASE_AUTO(TestDefaultScriptExtensions);
73 TESTCASE_AUTO(TestInvalidCodePointFolding);
74 #if !UCONFIG_NO_NORMALIZATION
75 TESTCASE_AUTO(TestBinaryCharacterProperties);
76 TESTCASE_AUTO(TestIntCharacterProperties);
77 #endif
78 TESTCASE_AUTO_END;
79 }
80
81 //====================================================
82 // private data used by the tests
83 //====================================================
84
85 // test DerivedCoreProperties.txt -------------------------------------------
86
87 // copied from genprops.c
88 static int32_t
getTokenIndex(const char * const tokens[],int32_t countTokens,const char * s)89 getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) {
90 const char *t, *z;
91 int32_t i, j;
92
93 s=u_skipWhitespace(s);
94 for(i=0; i<countTokens; ++i) {
95 t=tokens[i];
96 if(t!=NULL) {
97 for(j=0;; ++j) {
98 if(t[j]!=0) {
99 if(s[j]!=t[j]) {
100 break;
101 }
102 } else {
103 z=u_skipWhitespace(s+j);
104 if(*z==';' || *z==0) {
105 return i;
106 } else {
107 break;
108 }
109 }
110 }
111 }
112 }
113 return -1;
114 }
115
116 static const char *const
117 derivedPropsNames[]={
118 "Math",
119 "Alphabetic",
120 "Lowercase",
121 "Uppercase",
122 "ID_Start",
123 "ID_Continue",
124 "XID_Start",
125 "XID_Continue",
126 "Default_Ignorable_Code_Point",
127 "Full_Composition_Exclusion",
128 "Grapheme_Extend",
129 "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */
130 "Grapheme_Base",
131 "Cased",
132 "Case_Ignorable",
133 "Changes_When_Lowercased",
134 "Changes_When_Uppercased",
135 "Changes_When_Titlecased",
136 "Changes_When_Casefolded",
137 "Changes_When_Casemapped",
138 "Changes_When_NFKC_Casefolded"
139 };
140
141 static const UProperty
142 derivedPropsIndex[]={
143 UCHAR_MATH,
144 UCHAR_ALPHABETIC,
145 UCHAR_LOWERCASE,
146 UCHAR_UPPERCASE,
147 UCHAR_ID_START,
148 UCHAR_ID_CONTINUE,
149 UCHAR_XID_START,
150 UCHAR_XID_CONTINUE,
151 UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
152 UCHAR_FULL_COMPOSITION_EXCLUSION,
153 UCHAR_GRAPHEME_EXTEND,
154 UCHAR_GRAPHEME_LINK,
155 UCHAR_GRAPHEME_BASE,
156 UCHAR_CASED,
157 UCHAR_CASE_IGNORABLE,
158 UCHAR_CHANGES_WHEN_LOWERCASED,
159 UCHAR_CHANGES_WHEN_UPPERCASED,
160 UCHAR_CHANGES_WHEN_TITLECASED,
161 UCHAR_CHANGES_WHEN_CASEFOLDED,
162 UCHAR_CHANGES_WHEN_CASEMAPPED,
163 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
164 };
165
166 static int32_t numErrors[UPRV_LENGTHOF(derivedPropsIndex)]={ 0 };
167
168 enum { MAX_ERRORS=50 };
169
170 U_CFUNC void U_CALLCONV
derivedPropsLineFn(void * context,char * fields[][2],int32_t,UErrorCode * pErrorCode)171 derivedPropsLineFn(void *context,
172 char *fields[][2], int32_t /* fieldCount */,
173 UErrorCode *pErrorCode)
174 {
175 UnicodeTest *me=(UnicodeTest *)context;
176 uint32_t start, end;
177 int32_t i;
178
179 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
180 if(U_FAILURE(*pErrorCode)) {
181 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or DerivedNormalizationProps.txt field 0 at %s\n", fields[0][0]);
182 return;
183 }
184
185 /* parse derived binary property name, ignore unknown names */
186 i=getTokenIndex(derivedPropsNames, UPRV_LENGTHOF(derivedPropsNames), fields[1][0]);
187 if(i<0) {
188 UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0]));
189 propName.trim();
190 if(me->unknownPropertyNames->find(propName)==NULL) {
191 UErrorCode errorCode=U_ZERO_ERROR;
192 me->unknownPropertyNames->puti(propName, 1, errorCode);
193 me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]);
194 }
195 return;
196 }
197
198 me->derivedProps[i].add(start, end);
199 }
200
TestAdditionalProperties()201 void UnicodeTest::TestAdditionalProperties() {
202 #if !UCONFIG_NO_NORMALIZATION
203 // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt
204 if(UPRV_LENGTHOF(derivedProps)<UPRV_LENGTHOF(derivedPropsNames)) {
205 errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n",
206 UPRV_LENGTHOF(derivedPropsNames));
207 return;
208 }
209 if(UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)) {
210 errln("error in ucdtest.cpp: UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)\n");
211 return;
212 }
213
214 char path[500];
215 if(getUnidataPath(path) == NULL) {
216 errln("unable to find path to source/data/unidata/");
217 return;
218 }
219 char *basename=strchr(path, 0);
220 strcpy(basename, "DerivedCoreProperties.txt");
221
222 char *fields[2][2];
223 UErrorCode errorCode=U_ZERO_ERROR;
224 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
225 if(U_FAILURE(errorCode)) {
226 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
227 return;
228 }
229
230 strcpy(basename, "DerivedNormalizationProps.txt");
231 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
232 if(U_FAILURE(errorCode)) {
233 errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode));
234 return;
235 }
236
237 // now we have all derived core properties in the UnicodeSets
238 // run them all through the API
239 int32_t rangeCount, range;
240 uint32_t i;
241 UChar32 start, end;
242
243 // test all true properties
244 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
245 rangeCount=derivedProps[i].getRangeCount();
246 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
247 start=derivedProps[i].getRangeStart(range);
248 end=derivedProps[i].getRangeEnd(range);
249 for(; start<=end; ++start) {
250 if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) {
251 dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==false is wrong", start, derivedPropsNames[i]);
252 if(++numErrors[i]>=MAX_ERRORS) {
253 dataerrln("Too many errors, moving to the next test");
254 break;
255 }
256 }
257 }
258 }
259 }
260
261 // invert all properties
262 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
263 derivedProps[i].complement();
264 }
265
266 // test all false properties
267 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
268 rangeCount=derivedProps[i].getRangeCount();
269 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
270 start=derivedProps[i].getRangeStart(range);
271 end=derivedProps[i].getRangeEnd(range);
272 for(; start<=end; ++start) {
273 if(u_hasBinaryProperty(start, derivedPropsIndex[i])) {
274 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==true is wrong\n", start, derivedPropsNames[i]);
275 if(++numErrors[i]>=MAX_ERRORS) {
276 errln("Too many errors, moving to the next test");
277 break;
278 }
279 }
280 }
281 }
282 }
283 #endif /* !UCONFIG_NO_NORMALIZATION */
284 }
285
TestBinaryValues()286 void UnicodeTest::TestBinaryValues() {
287 /*
288 * Unicode 5.1 explicitly defines binary property value aliases.
289 * Verify that they are all recognized.
290 */
291 UErrorCode errorCode=U_ZERO_ERROR;
292 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode);
293 if(U_FAILURE(errorCode)) {
294 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCode));
295 return;
296 }
297
298 static const char *const falseValues[]={ "N", "No", "F", "False" };
299 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
300 int32_t i;
301 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
302 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
303 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_INV));
304 errorCode=U_ZERO_ERROR;
305 UnicodeSet set(pattern, errorCode);
306 if(U_FAILURE(errorCode)) {
307 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i], u_errorName(errorCode));
308 continue;
309 }
310 set.complement();
311 if(set!=alpha) {
312 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alphabetic:])\n", falseValues[i]);
313 }
314 }
315 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
316 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
317 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_INV));
318 errorCode=U_ZERO_ERROR;
319 UnicodeSet set(pattern, errorCode);
320 if(U_FAILURE(errorCode)) {
321 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i], u_errorName(errorCode));
322 continue;
323 }
324 if(set!=alpha) {
325 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues[i]);
326 }
327 }
328 }
329
TestConsistency()330 void UnicodeTest::TestConsistency() {
331 #if !UCONFIG_NO_NORMALIZATION
332 /*
333 * Test for an example that getCanonStartSet() delivers
334 * all characters that compose from the input one,
335 * even in multiple steps.
336 * For example, the set for "I" (0049) should contain both
337 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
338 * In general, the set for the middle such character should be a subset
339 * of the set for the first.
340 */
341 IcuTestErrorCode errorCode(*this, "TestConsistency");
342 const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
343 const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);
344 if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) {
345 dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n",
346 errorCode.errorName());
347 errorCode.reset();
348 return;
349 }
350
351 UnicodeSet set1, set2;
352 if (nfcImpl->getCanonStartSet(0x49, set1)) {
353 /* enumerate all characters that are plausible to be latin letters */
354 for(UChar start=0xa0; start<0x2000; ++start) {
355 UnicodeString decomp=nfd->normalize(UnicodeString(start), errorCode);
356 if(decomp.length()>1 && decomp[0]==0x49) {
357 set2.add(start);
358 }
359 }
360
361 if (set1!=set2) {
362 errln("[canon start set of 0049] != [all c with canon decomp with 0049]");
363 }
364 // This was available in cucdtst.c but the test had to move to intltest
365 // because the new internal normalization functions are in C++.
366 //compareUSets(set1, set2,
367 // "[canon start set of 0049]", "[all c with canon decomp with 0049]",
368 // true);
369 } else {
370 errln("NFC.getCanonStartSet() returned false");
371 }
372 #endif
373 }
374
375 /**
376 * Test various implementations of Pattern_Syntax & Pattern_White_Space.
377 */
TestPatternProperties()378 void UnicodeTest::TestPatternProperties() {
379 IcuTestErrorCode errorCode(*this, "TestPatternProperties()");
380 UnicodeSet syn_pp;
381 UnicodeSet syn_prop(UNICODE_STRING_SIMPLE("[:Pattern_Syntax:]"), errorCode);
382 UnicodeSet syn_list(
383 "[!-/\\:-@\\[-\\^`\\{-~"
384 "\\u00A1-\\u00A7\\u00A9\\u00AB\\u00AC\\u00AE\\u00B0\\u00B1\\u00B6\\u00BB\\u00BF\\u00D7\\u00F7"
385 "\\u2010-\\u2027\\u2030-\\u203E\\u2041-\\u2053\\u2055-\\u205E\\u2190-\\u245F\\u2500-\\u2775"
386 "\\u2794-\\u2BFF\\u2E00-\\u2E7F\\u3001-\\u3003\\u3008-\\u3020\\u3030\\uFD3E\\uFD3F\\uFE45\\uFE46]", errorCode);
387 UnicodeSet ws_pp;
388 UnicodeSet ws_prop(UNICODE_STRING_SIMPLE("[:Pattern_White_Space:]"), errorCode);
389 UnicodeSet ws_list(UNICODE_STRING_SIMPLE("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"), errorCode);
390 UnicodeSet syn_ws_pp;
391 UnicodeSet syn_ws_prop(syn_prop);
392 syn_ws_prop.addAll(ws_prop);
393 for(UChar32 c=0; c<=0xffff; ++c) {
394 if(PatternProps::isSyntax(c)) {
395 syn_pp.add(c);
396 }
397 if(PatternProps::isWhiteSpace(c)) {
398 ws_pp.add(c);
399 }
400 if(PatternProps::isSyntaxOrWhiteSpace(c)) {
401 syn_ws_pp.add(c);
402 }
403 }
404 compareUSets(syn_pp, syn_prop,
405 "PatternProps.isSyntax()", "[:Pattern_Syntax:]", true);
406 compareUSets(syn_pp, syn_list,
407 "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", true);
408 compareUSets(ws_pp, ws_prop,
409 "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", true);
410 compareUSets(ws_pp, ws_list,
411 "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", true);
412 compareUSets(syn_ws_pp, syn_ws_prop,
413 "PatternProps.isSyntaxOrWhiteSpace()",
414 "[[:Pattern_Syntax:][:Pattern_White_Space:]]", true);
415 }
416
417 // So far only minimal port of Java & cucdtst.c compareUSets().
418 UBool
compareUSets(const UnicodeSet & a,const UnicodeSet & b,const char * a_name,const char * b_name,UBool diffIsError)419 UnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b,
420 const char *a_name, const char *b_name,
421 UBool diffIsError) {
422 UBool same= a==b;
423 if(!same && diffIsError) {
424 errln("Sets are different: %s vs. %s\n", a_name, b_name);
425 }
426 return same;
427 }
428
429 namespace {
430
431 /**
432 * Maps a special script code to the most common script of its encoded characters.
433 */
getCharScript(UScriptCode script)434 UScriptCode getCharScript(UScriptCode script) {
435 switch(script) {
436 case USCRIPT_HAN_WITH_BOPOMOFO:
437 case USCRIPT_SIMPLIFIED_HAN:
438 case USCRIPT_TRADITIONAL_HAN:
439 return USCRIPT_HAN;
440 case USCRIPT_JAPANESE:
441 return USCRIPT_HIRAGANA;
442 case USCRIPT_JAMO:
443 case USCRIPT_KOREAN:
444 return USCRIPT_HANGUL;
445 case USCRIPT_SYMBOLS_EMOJI:
446 return USCRIPT_SYMBOLS;
447 default:
448 return script;
449 }
450 }
451
452 } // namespace
453
TestScriptMetadata()454 void UnicodeTest::TestScriptMetadata() {
455 IcuTestErrorCode errorCode(*this, "TestScriptMetadata()");
456 UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode);
457 // So far, sample characters are uppercase.
458 // Georgian is special.
459 UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode);
460 for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) {
461 UScriptCode sc = (UScriptCode)sci;
462 // Run the test with -v to see which script has failures:
463 // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 6 FAIL
464 logln(uscript_getShortName(sc));
465 UScriptUsage usage = uscript_getUsage(sc);
466 UnicodeString sample = uscript_getSampleUnicodeString(sc);
467 UnicodeSet scriptSet;
468 scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode);
469 if(usage == USCRIPT_USAGE_NOT_ENCODED) {
470 assertTrue("not encoded, no sample", sample.isEmpty());
471 assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc));
472 assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc));
473 assertFalse("not encoded, not cased", uscript_isCased(sc));
474 assertTrue("not encoded, no characters", scriptSet.isEmpty());
475 } else {
476 assertFalse("encoded, has a sample character", sample.isEmpty());
477 UChar32 firstChar = sample.char32At(0);
478 UScriptCode charScript = getCharScript(sc);
479 assertEquals("script(sample(script))",
480 (int32_t)charScript, (int32_t)uscript_getScript(firstChar, errorCode));
481 assertEquals("RTL vs. set", (UBool)rtl.contains(firstChar), (UBool)uscript_isRightToLeft(sc));
482 assertEquals("cased vs. set", (UBool)cased.contains(firstChar), (UBool)uscript_isCased(sc));
483 assertEquals("encoded, has characters", (UBool)(sc == charScript), (UBool)(!scriptSet.isEmpty()));
484 if(uscript_isRightToLeft(sc)) {
485 rtl.removeAll(scriptSet);
486 }
487 if(uscript_isCased(sc)) {
488 cased.removeAll(scriptSet);
489 }
490 }
491 }
492 UnicodeString pattern;
493 assertEquals("no remaining RTL characters",
494 UnicodeString("[]"), rtl.toPattern(pattern));
495 assertEquals("no remaining cased characters",
496 UnicodeString("[]"), cased.toPattern(pattern));
497
498 assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN));
499 assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI));
500 assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN));
501 }
502
TestBidiPairedBracketType()503 void UnicodeTest::TestBidiPairedBracketType() {
504 // BidiBrackets-6.3.0.txt says:
505 //
506 // The set of code points listed in this file was originally derived
507 // using the character properties General_Category (gc), Bidi_Class (bc),
508 // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows:
509 // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe,
510 // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket
511 // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type
512 // property values are Open and Close, respectively.
513 IcuTestErrorCode errorCode(*this, "TestBidiPairedBracketType()");
514 UnicodeSet bpt("[:^bpt=n:]", errorCode);
515 assertTrue("bpt!=None is not empty", !bpt.isEmpty());
516 // The following should always be true.
517 UnicodeSet mirrored("[:Bidi_M:]", errorCode);
518 UnicodeSet other_neutral("[:bc=ON:]", errorCode);
519 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt));
520 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt));
521 // The following are true at least initially in Unicode 6.3.
522 UnicodeSet bpt_open("[:bpt=o:]", errorCode);
523 UnicodeSet bpt_close("[:bpt=c:]", errorCode);
524 UnicodeSet ps("[:Ps:]", errorCode);
525 UnicodeSet pe("[:Pe:]", errorCode);
526 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open));
527 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close));
528 }
529
TestEmojiProperties()530 void UnicodeTest::TestEmojiProperties() {
531 assertFalse("space is not Emoji", u_hasBinaryProperty(0x20, UCHAR_EMOJI));
532 assertTrue("shooting star is Emoji", u_hasBinaryProperty(0x1F320, UCHAR_EMOJI));
533 IcuTestErrorCode errorCode(*this, "TestEmojiProperties()");
534 UnicodeSet emoji("[:Emoji:]", errorCode);
535 assertTrue("lots of Emoji", emoji.size() > 700);
536
537 assertTrue("shooting star is Emoji_Presentation",
538 u_hasBinaryProperty(0x1F320, UCHAR_EMOJI_PRESENTATION));
539 assertTrue("Fitzpatrick 6 is Emoji_Modifier",
540 u_hasBinaryProperty(0x1F3FF, UCHAR_EMOJI_MODIFIER));
541 assertTrue("happy person is Emoji_Modifier_Base",
542 u_hasBinaryProperty(0x1F64B, UCHAR_EMOJI_MODIFIER_BASE));
543 assertTrue("asterisk is Emoji_Component",
544 u_hasBinaryProperty(0x2A, UCHAR_EMOJI_COMPONENT));
545 assertTrue("copyright is Extended_Pictographic",
546 u_hasBinaryProperty(0xA9, UCHAR_EXTENDED_PICTOGRAPHIC));
547 }
548
549 namespace {
550
hbp(const UChar * s,int32_t length,UProperty which)551 UBool hbp(const UChar *s, int32_t length, UProperty which) {
552 return u_stringHasBinaryProperty(s, length, which);
553 }
554
hbp(const UChar * s,UProperty which)555 UBool hbp(const UChar *s, UProperty which) {
556 return u_stringHasBinaryProperty(s, -1, which);
557 }
558
559 } // namespace
560
TestEmojiPropertiesOfStrings()561 void UnicodeTest::TestEmojiPropertiesOfStrings() {
562 // Property of code points, for coverage
563 assertFalse("null is not Ideographic", hbp(nullptr, 1, UCHAR_IDEOGRAPHIC));
564 assertFalse("null/0 is not Ideographic", hbp(nullptr, -1, UCHAR_IDEOGRAPHIC));
565 assertFalse("empty string is not Ideographic", hbp(u"", 0, UCHAR_IDEOGRAPHIC));
566 assertFalse("empty string/0 is not Ideographic", hbp(u"", -1, UCHAR_IDEOGRAPHIC));
567 assertFalse("L is not Ideographic", hbp(u"L", 1, UCHAR_IDEOGRAPHIC));
568 assertFalse("L/0 is not Ideographic", hbp(u"L", -1, UCHAR_IDEOGRAPHIC));
569 assertTrue("U+4E02 is Ideographic", hbp(u"丂", 1, UCHAR_IDEOGRAPHIC));
570 assertTrue("U+4E02/0 is Ideographic", hbp(u"丂", -1, UCHAR_IDEOGRAPHIC));
571 assertFalse("2*U+4E02 is not Ideographic", hbp(u"丂丂", 2, UCHAR_IDEOGRAPHIC));
572 assertFalse("2*U+4E02/0 is not Ideographic", hbp(u"丂丂", -1, UCHAR_IDEOGRAPHIC));
573 assertFalse("bicycle is not Ideographic", hbp(u"", 2, UCHAR_IDEOGRAPHIC));
574 assertFalse("bicycle/0 is not Ideographic", hbp(u"", -1, UCHAR_IDEOGRAPHIC));
575 assertTrue("U+23456 is Ideographic", hbp(u"\U00023456", 2, UCHAR_IDEOGRAPHIC));
576 assertTrue("U+23456/0 is Ideographic", hbp(u"\U00023456", -1, UCHAR_IDEOGRAPHIC));
577
578 // Property of (code points and) strings
579 assertFalse("null is not Basic_Emoji", hbp(nullptr, 1, UCHAR_BASIC_EMOJI));
580 assertFalse("null/0 is not Basic_Emoji", hbp(nullptr, -1, UCHAR_BASIC_EMOJI));
581 assertFalse("empty string is not Basic_Emoji", hbp(u"", 0, UCHAR_BASIC_EMOJI));
582 assertFalse("empty string/0 is not Basic_Emoji", hbp(u"", -1, UCHAR_BASIC_EMOJI));
583 assertFalse("L is not Basic_Emoji", hbp(u"L", 1, UCHAR_BASIC_EMOJI));
584 assertFalse("L/0 is not Basic_Emoji", hbp(u"L", -1, UCHAR_BASIC_EMOJI));
585 assertFalse("U+4E02 is not Basic_Emoji", hbp(u"丂", 1, UCHAR_BASIC_EMOJI));
586 assertFalse("U+4E02/0 is not Basic_Emoji", hbp(u"丂", -1, UCHAR_BASIC_EMOJI));
587 assertTrue("bicycle is Basic_Emoji", hbp(u"", 2, UCHAR_BASIC_EMOJI));
588 assertTrue("bicycle/0 is Basic_Emoji", hbp(u"", -1, UCHAR_BASIC_EMOJI));
589 assertFalse("2*bicycle is Basic_Emoji", hbp(u"", 4, UCHAR_BASIC_EMOJI));
590 assertFalse("2*bicycle/0 is Basic_Emoji", hbp(u"", -1, UCHAR_BASIC_EMOJI));
591 assertFalse("U+23456 is not Basic_Emoji", hbp(u"\U00023456", 2, UCHAR_BASIC_EMOJI));
592 assertFalse("U+23456/0 is not Basic_Emoji", hbp(u"\U00023456", -1, UCHAR_BASIC_EMOJI));
593
594 assertFalse("stopwatch is not Basic_Emoji", hbp(u"⏱", 1, UCHAR_BASIC_EMOJI));
595 assertFalse("stopwatch/0 is not Basic_Emoji", hbp(u"⏱", -1, UCHAR_BASIC_EMOJI));
596 assertTrue("stopwatch+emoji is Basic_Emoji", hbp(u"⏱\uFE0F", 2, UCHAR_BASIC_EMOJI));
597 assertTrue("stopwatch+emoji/0 is Basic_Emoji", hbp(u"⏱\uFE0F", -1, UCHAR_BASIC_EMOJI));
598
599 assertFalse("chipmunk is not Basic_Emoji", hbp(u"", UCHAR_BASIC_EMOJI));
600 assertTrue("chipmunk+emoji is Basic_Emoji", hbp(u"\uFE0F", UCHAR_BASIC_EMOJI));
601 assertFalse("chipmunk+2*emoji is not Basic_Emoji", hbp(u"\uFE0F\uFE0F", UCHAR_BASIC_EMOJI));
602
603 // Properties of strings (only)
604 assertFalse("4+emoji is not Emoji_Keycap_Sequence",
605 hbp(u"4\uFE0F", UCHAR_EMOJI_KEYCAP_SEQUENCE));
606 assertTrue("4+emoji+keycap is Emoji_Keycap_Sequence",
607 hbp(u"4\uFE0F\u20E3", UCHAR_EMOJI_KEYCAP_SEQUENCE));
608
609 assertFalse("[B] is not RGI_Emoji_Flag_Sequence",
610 hbp(u"\U0001F1E7", UCHAR_RGI_EMOJI_FLAG_SEQUENCE));
611 assertTrue("[BE] is RGI_Emoji_Flag_Sequence",
612 hbp(u"", UCHAR_RGI_EMOJI_FLAG_SEQUENCE));
613
614 assertFalse("[flag] is not RGI_Emoji_Tag_Sequence",
615 hbp(u"\U0001F3F4", UCHAR_RGI_EMOJI_TAG_SEQUENCE));
616 assertTrue("[Scotland] is RGI_Emoji_Tag_Sequence",
617 hbp(u"", UCHAR_RGI_EMOJI_TAG_SEQUENCE));
618
619 assertFalse("bicyclist is not RGI_Emoji_Modifier_Sequence",
620 hbp(u"", UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE));
621 assertTrue("bicyclist+medium is RGI_Emoji_Modifier_Sequence",
622 hbp(u"\U0001F3FD", UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE));
623
624 assertFalse("woman+dark+ZWJ is not RGI_Emoji_ZWJ_Sequence",
625 hbp(u"\U0001F3FF\u200D", UCHAR_RGI_EMOJI_ZWJ_SEQUENCE));
626 assertTrue("woman pilot: dark skin tone is RGI_Emoji_ZWJ_Sequence",
627 hbp(u"\U0001F3FF\u200D✈\uFE0F", UCHAR_RGI_EMOJI_ZWJ_SEQUENCE));
628
629 // RGI_Emoji = all of the above
630 assertFalse("stopwatch is not RGI_Emoji", hbp(u"⏱", UCHAR_RGI_EMOJI));
631 assertTrue("stopwatch+emoji is RGI_Emoji", hbp(u"⏱\uFE0F", UCHAR_RGI_EMOJI));
632
633 assertFalse("chipmunk is not RGI_Emoji", hbp(u"", UCHAR_RGI_EMOJI));
634 assertTrue("chipmunk+emoji is RGI_Emoji", hbp(u"\uFE0F", UCHAR_RGI_EMOJI));
635
636 assertFalse("4+emoji is not RGI_Emoji", hbp(u"4\uFE0F", UCHAR_RGI_EMOJI));
637 assertTrue("4+emoji+keycap is RGI_Emoji", hbp(u"4\uFE0F\u20E3", UCHAR_RGI_EMOJI));
638
639 assertFalse("[B] is not RGI_Emoji", hbp(u"\U0001F1E7", UCHAR_RGI_EMOJI));
640 assertTrue("[BE] is RGI_Emoji", hbp(u"", UCHAR_RGI_EMOJI));
641
642 assertTrue("[flag] is RGI_Emoji", hbp(u"\U0001F3F4", UCHAR_RGI_EMOJI));
643 assertTrue("[Scotland] is RGI_Emoji", hbp(u"", UCHAR_RGI_EMOJI));
644
645 assertTrue("bicyclist is RGI_Emoji", hbp(u"", UCHAR_RGI_EMOJI));
646 assertTrue("bicyclist+medium is RGI_Emoji", hbp(u"\U0001F3FD", UCHAR_RGI_EMOJI));
647
648 assertFalse("woman+dark+ZWJ is not RGI_Emoji", hbp(u"\U0001F3FF\u200D", UCHAR_RGI_EMOJI));
649 assertTrue("woman pilot: dark skin tone is RGI_Emoji",
650 hbp(u"\U0001F3FF\u200D✈\uFE0F", UCHAR_RGI_EMOJI));
651
652 // UnicodeSet with properties of strings
653 IcuTestErrorCode errorCode(*this, "TestEmojiPropertiesOfStrings()");
654 UnicodeSet basic("[:Basic_Emoji:]", errorCode);
655 UnicodeSet keycaps("[:Emoji_Keycap_Sequence:]", errorCode);
656 UnicodeSet modified("[:RGI_Emoji_Modifier_Sequence:]", errorCode);
657 UnicodeSet flags("[:RGI_Emoji_Flag_Sequence:]", errorCode);
658 UnicodeSet tags("[:RGI_Emoji_Tag_Sequence:]", errorCode);
659 UnicodeSet combos("[:RGI_Emoji_ZWJ_Sequence:]", errorCode);
660 UnicodeSet rgi("[:RGI_Emoji:]", errorCode);
661 if (errorCode.errDataIfFailureAndReset("UnicodeSets")) {
662 return;
663 }
664
665 // union of all sets except for "rgi" -- should be the same as "rgi"
666 UnicodeSet all(basic);
667 all.addAll(keycaps).addAll(modified).addAll(flags).addAll(tags).addAll(combos);
668
669 UnicodeSet basicOnlyCp(basic);
670 basicOnlyCp.removeAllStrings();
671
672 UnicodeSet rgiOnlyCp(rgi);
673 rgiOnlyCp.removeAllStrings();
674
675 assertTrue("lots of Basic_Emoji", basic.size() > 1000);
676 assertEquals("12 Emoji_Keycap_Sequence", 12, keycaps.size());
677 assertTrue("lots of RGI_Emoji_Modifier_Sequence", modified.size() > 600);
678 assertTrue("lots of RGI_Emoji_Flag_Sequence", flags.size() > 250);
679 assertTrue("some RGI_Emoji_Tag_Sequence", tags.size() >= 3);
680 assertTrue("lots of RGI_Emoji_ZWJ_Sequence", combos.size() > 1300);
681 assertTrue("lots of RGI_Emoji", rgi.size() > 3000);
682
683 assertTrue("lots of Basic_Emoji code points", basicOnlyCp.size() > 1000);
684 assertTrue("Basic_Emoji.hasStrings()", basic.hasStrings());
685 assertEquals("no Emoji_Keycap_Sequence code points", 0, keycaps.getRangeCount());
686 assertEquals("lots of RGI_Emoji_Modifier_Sequence", 0, modified.getRangeCount());
687 assertEquals("lots of RGI_Emoji_Flag_Sequence", 0, flags.getRangeCount());
688 assertEquals("some RGI_Emoji_Tag_Sequence", 0, tags.getRangeCount());
689 assertEquals("lots of RGI_Emoji_ZWJ_Sequence", 0, combos.getRangeCount());
690
691 assertTrue("lots of RGI_Emoji code points", rgiOnlyCp.size() > 1000);
692 assertTrue("RGI_Emoji.hasStrings()", rgi.hasStrings());
693 assertEquals("RGI_Emoji/only-cp.size() == Basic_Emoji/only-cp.size()",
694 rgiOnlyCp.size(), basicOnlyCp.size());
695 assertTrue("RGI_Emoji/only-cp == Basic_Emoji/only-cp", rgiOnlyCp == basicOnlyCp);
696 assertEquals("RGI_Emoji.size() == union.size()", rgi.size(), all.size());
697 assertTrue("RGI_Emoji == union", rgi == all);
698
699 assertTrue("Basic_Emoji.contains(stopwatch+emoji)", basic.contains(u"⏱\uFE0F"));
700 assertTrue("Basic_Emoji.contains(chipmunk+emoji)", basic.contains(u"\uFE0F"));
701 assertTrue("Emoji_Keycap_Sequence.contains(4+emoji+keycap)",
702 keycaps.contains(u"4\uFE0F\u20E3"));
703 assertTrue("RGI_Emoji_Flag_Sequence.contains([BE])", flags.contains(u""));
704 assertTrue("RGI_Emoji_Tag_Sequence.contains([Scotland])", tags.contains(u""));
705 assertTrue("RGI_Emoji_Modifier_Sequence.contains(bicyclist+medium)",
706 modified.contains(u"\U0001F3FD"));
707 assertTrue("RGI_Emoji_ZWJ_Sequence.contains(woman pilot: dark skin tone)",
708 combos.contains(u"\U0001F3FF\u200D✈\uFE0F"));
709 assertTrue("RGI_Emoji.contains(stopwatch+emoji)", rgi.contains(u"⏱\uFE0F"));
710 assertTrue("RGI_Emoji.contains(chipmunk+emoji)", rgi.contains(u"\uFE0F"));
711 assertTrue("RGI_Emoji.contains(4+emoji+keycap)", rgi.contains(u"4\uFE0F\u20E3"));
712 assertTrue("RGI_Emoji.contains([BE] is RGI_Emoji)", rgi.contains(u""));
713 assertTrue("RGI_Emoji.contains([flag])", rgi.contains(u"\U0001F3F4"));
714 assertTrue("RGI_Emoji.contains([Scotland])", rgi.contains(u""));
715 assertTrue("RGI_Emoji.contains(bicyclist)", rgi.contains(u""));
716 assertTrue("RGI_Emoji.contains(bicyclist+medium)", rgi.contains(u"\U0001F3FD"));
717 assertTrue("RGI_Emoji.contains(woman pilot: dark skin tone)", rgi.contains(u"\U0001F3FF\u200D✈\uFE0F"));
718 }
719
TestIndicPositionalCategory()720 void UnicodeTest::TestIndicPositionalCategory() {
721 IcuTestErrorCode errorCode(*this, "TestIndicPositionalCategory()");
722 UnicodeSet na(u"[:InPC=NA:]", errorCode);
723 assertTrue("mostly NA", 1000000 <= na.size() && na.size() <= UCHAR_MAX_VALUE - 500);
724 UnicodeSet vol(u"[:InPC=Visual_Order_Left:]", errorCode);
725 assertTrue("some Visual_Order_Left", 19 <= vol.size() && vol.size() <= 100);
726 assertEquals("U+08FF: NA", U_INPC_NA,
727 u_getIntPropertyValue(0x08FF, UCHAR_INDIC_POSITIONAL_CATEGORY));
728 assertEquals("U+0900: Top", U_INPC_TOP,
729 u_getIntPropertyValue(0x0900, UCHAR_INDIC_POSITIONAL_CATEGORY));
730 assertEquals("U+10A06: Overstruck", U_INPC_OVERSTRUCK,
731 u_getIntPropertyValue(0x10A06, UCHAR_INDIC_POSITIONAL_CATEGORY));
732 }
733
TestIndicSyllabicCategory()734 void UnicodeTest::TestIndicSyllabicCategory() {
735 IcuTestErrorCode errorCode(*this, "TestIndicSyllabicCategory()");
736 UnicodeSet other(u"[:InSC=Other:]", errorCode);
737 assertTrue("mostly Other", 1000000 <= other.size() && other.size() <= UCHAR_MAX_VALUE - 500);
738 UnicodeSet ava(u"[:InSC=Avagraha:]", errorCode);
739 assertTrue("some Avagraha", 16 <= ava.size() && ava.size() <= 100);
740 assertEquals("U+08FF: Other", U_INSC_OTHER,
741 u_getIntPropertyValue(0x08FF, UCHAR_INDIC_SYLLABIC_CATEGORY));
742 assertEquals("U+0900: Bindu", U_INSC_BINDU,
743 u_getIntPropertyValue(0x0900, UCHAR_INDIC_SYLLABIC_CATEGORY));
744 assertEquals("U+11065: Brahmi_Joining_Number", U_INSC_BRAHMI_JOINING_NUMBER,
745 u_getIntPropertyValue(0x11065, UCHAR_INDIC_SYLLABIC_CATEGORY));
746 }
747
TestVerticalOrientation()748 void UnicodeTest::TestVerticalOrientation() {
749 IcuTestErrorCode errorCode(*this, "TestVerticalOrientation()");
750 UnicodeSet r(u"[:vo=R:]", errorCode);
751 assertTrue("mostly R", 0xc0000 <= r.size() && r.size() <= 0xd0000);
752 UnicodeSet u(u"[:vo=U:]", errorCode);
753 assertTrue("much U", 0x40000 <= u.size() && u.size() <= 0x50000);
754 UnicodeSet tu(u"[:vo=Tu:]", errorCode);
755 assertTrue("some Tu", 147 <= tu.size() && tu.size() <= 300);
756 assertEquals("U+0E01: Rotated", U_VO_ROTATED,
757 u_getIntPropertyValue(0x0E01, UCHAR_VERTICAL_ORIENTATION));
758 assertEquals("U+3008: Transformed_Rotated", U_VO_TRANSFORMED_ROTATED,
759 u_getIntPropertyValue(0x3008, UCHAR_VERTICAL_ORIENTATION));
760 assertEquals("U+33333: Upright", U_VO_UPRIGHT,
761 u_getIntPropertyValue(0x33333, UCHAR_VERTICAL_ORIENTATION));
762 }
763
TestDefaultScriptExtensions()764 void UnicodeTest::TestDefaultScriptExtensions() {
765 // Block 3000..303F CJK Symbols and Punctuation defaults to scx=Bopo Hang Hani Hira Kana Yiii
766 // but some of its characters revert to scx=<script> which is usually Common.
767 IcuTestErrorCode errorCode(*this, "TestDefaultScriptExtensions()");
768 UScriptCode scx[20];
769 scx[0] = USCRIPT_INVALID_CODE;
770 assertEquals("U+3000 num scx", 1, // IDEOGRAPHIC SPACE
771 uscript_getScriptExtensions(0x3000, scx, UPRV_LENGTHOF(scx), errorCode));
772 assertEquals("U+3000 num scx[0]", USCRIPT_COMMON, scx[0]);
773 scx[0] = USCRIPT_INVALID_CODE;
774 assertEquals("U+3012 num scx", 1, // POSTAL MARK
775 uscript_getScriptExtensions(0x3012, scx, UPRV_LENGTHOF(scx), errorCode));
776 assertEquals("U+3012 num scx[0]", USCRIPT_COMMON, scx[0]);
777 }
778
TestInvalidCodePointFolding(void)779 void UnicodeTest::TestInvalidCodePointFolding(void) {
780 // Test behavior when an invalid code point is passed to u_foldCase
781 static const UChar32 invalidCodePoints[] = {
782 0xD800, // lead surrogate
783 0xDFFF, // trail surrogate
784 0xFDD0, // noncharacter
785 0xFFFF, // noncharacter
786 0x110000, // out of range
787 -1 // negative
788 };
789 for (int32_t i=0; i<UPRV_LENGTHOF(invalidCodePoints); ++i) {
790 UChar32 cp = invalidCodePoints[i];
791 assertEquals("Invalid code points should be echoed back",
792 cp, u_foldCase(cp, U_FOLD_CASE_DEFAULT));
793 assertEquals("Invalid code points should be echoed back",
794 cp, u_foldCase(cp, U_FOLD_CASE_EXCLUDE_SPECIAL_I));
795 }
796 }
797
TestBinaryCharacterProperties()798 void UnicodeTest::TestBinaryCharacterProperties() {
799 #if !UCONFIG_NO_NORMALIZATION
800 IcuTestErrorCode errorCode(*this, "TestBinaryCharacterProperties()");
801 // Spot-check getBinaryPropertySet() vs. hasBinaryProperty().
802 for (int32_t prop = 0; prop < UCHAR_BINARY_LIMIT; ++prop) {
803 const USet *uset = u_getBinaryPropertySet((UProperty)prop, errorCode);
804 if (errorCode.errIfFailureAndReset("u_getBinaryPropertySet(%d)", (int)prop)) {
805 continue;
806 }
807 const UnicodeSet &set = *UnicodeSet::fromUSet(uset);
808 int32_t count = set.getRangeCount();
809 if (count == 0) {
810 assertFalse(UnicodeString("!hasBinaryProperty(U+0020, ") + prop + u")",
811 u_hasBinaryProperty(0x20, (UProperty)prop));
812 assertFalse(UnicodeString("!hasBinaryProperty(U+0061, ") + prop + u")",
813 u_hasBinaryProperty(0x61, (UProperty)prop));
814 assertFalse(UnicodeString("!hasBinaryProperty(U+4E00, ") + prop + u")",
815 u_hasBinaryProperty(0x4e00, (UProperty)prop));
816 } else {
817 UChar32 c = set.getRangeStart(0);
818 if (c > 0) {
819 assertFalse(
820 UnicodeString("!hasBinaryProperty(") + TestUtility::hex(c - 1) +
821 u", " + prop + u")",
822 u_hasBinaryProperty(c - 1, (UProperty)prop));
823 }
824 assertTrue(
825 UnicodeString("hasBinaryProperty(") + TestUtility::hex(c) +
826 u", " + prop + u")",
827 u_hasBinaryProperty(c, (UProperty)prop));
828 c = set.getRangeEnd(count - 1);
829 assertTrue(
830 UnicodeString("hasBinaryProperty(") + TestUtility::hex(c) +
831 u", " + prop + u")",
832 u_hasBinaryProperty(c, (UProperty)prop));
833 if (c < 0x10ffff) {
834 assertFalse(
835 UnicodeString("!hasBinaryProperty(") + TestUtility::hex(c + 1) +
836 u", " + prop + u")",
837 u_hasBinaryProperty(c + 1, (UProperty)prop));
838 }
839 }
840 }
841 #endif
842 }
843
TestIntCharacterProperties()844 void UnicodeTest::TestIntCharacterProperties() {
845 #if !UCONFIG_NO_NORMALIZATION
846 IcuTestErrorCode errorCode(*this, "TestIntCharacterProperties()");
847 // Spot-check getIntPropertyMap() vs. getIntPropertyValue().
848 for (int32_t prop = UCHAR_INT_START; prop < UCHAR_INT_LIMIT; ++prop) {
849 const UCPMap *map = u_getIntPropertyMap((UProperty)prop, errorCode);
850 if (errorCode.errIfFailureAndReset("u_getIntPropertyMap(%d)", (int)prop)) {
851 continue;
852 }
853 uint32_t value;
854 UChar32 end = ucpmap_getRange(map, 0, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value);
855 assertTrue("int property first range", end >= 0);
856 UChar32 c = end / 2;
857 assertEquals(UnicodeString("int property first range value at ") + TestUtility::hex(c),
858 u_getIntPropertyValue(c, (UProperty)prop), value);
859 end = ucpmap_getRange(map, 0x5000, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value);
860 assertTrue("int property later range", end >= 0);
861 assertEquals(UnicodeString("int property later range value at ") + TestUtility::hex(end),
862 u_getIntPropertyValue(end, (UProperty)prop), value);
863 // ucpmap_get() API coverage
864 // TODO: move to cucdtst.c
865 assertEquals(
866 "int property upcmap_get(U+0061)",
867 u_getIntPropertyValue(0x61, (UProperty)prop), ucpmap_get(map, 0x61));
868 }
869 #endif
870 }
871