1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 #include "unicode/ustring.h"
8 #include "unicode/uchar.h"
9 #include "unicode/uniset.h"
10 #include "unicode/putil.h"
11 #include "cstring.h"
12 #include "uparse.h"
13 #include "ucdtest.h"
14
15 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
16
UnicodeTest()17 UnicodeTest::UnicodeTest()
18 {
19 }
20
~UnicodeTest()21 UnicodeTest::~UnicodeTest()
22 {
23 }
24
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)25 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
26 {
27 if (exec) logln("TestSuite UnicodeTest: ");
28 switch (index) {
29 case 0: name = "TestAdditionalProperties"; if(exec) TestAdditionalProperties(); break;
30 case 1: name = "TestBinaryValues"; if(exec) TestBinaryValues(); break;
31 default: name = ""; break; //needed to end loop
32 }
33 }
34
35 //====================================================
36 // private data used by the tests
37 //====================================================
38
39 // test DerivedCoreProperties.txt -------------------------------------------
40
41 // copied from genprops.c
42 static int32_t
getTokenIndex(const char * const tokens[],int32_t countTokens,const char * s)43 getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) {
44 const char *t, *z;
45 int32_t i, j;
46
47 s=u_skipWhitespace(s);
48 for(i=0; i<countTokens; ++i) {
49 t=tokens[i];
50 if(t!=NULL) {
51 for(j=0;; ++j) {
52 if(t[j]!=0) {
53 if(s[j]!=t[j]) {
54 break;
55 }
56 } else {
57 z=u_skipWhitespace(s+j);
58 if(*z==';' || *z==0) {
59 return i;
60 } else {
61 break;
62 }
63 }
64 }
65 }
66 }
67 return -1;
68 }
69
70 static const char *const
71 derivedCorePropsNames[]={
72 "Math",
73 "Alphabetic",
74 "Lowercase",
75 "Uppercase",
76 "ID_Start",
77 "ID_Continue",
78 "XID_Start",
79 "XID_Continue",
80 "Default_Ignorable_Code_Point",
81 "Grapheme_Extend",
82 "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */
83 "Grapheme_Base"
84 };
85
86 static const UProperty
87 derivedCorePropsIndex[]={
88 UCHAR_MATH,
89 UCHAR_ALPHABETIC,
90 UCHAR_LOWERCASE,
91 UCHAR_UPPERCASE,
92 UCHAR_ID_START,
93 UCHAR_ID_CONTINUE,
94 UCHAR_XID_START,
95 UCHAR_XID_CONTINUE,
96 UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
97 UCHAR_GRAPHEME_EXTEND,
98 UCHAR_GRAPHEME_LINK,
99 UCHAR_GRAPHEME_BASE
100 };
101
102 U_CFUNC void U_CALLCONV
derivedCorePropsLineFn(void * context,char * fields[][2],int32_t,UErrorCode * pErrorCode)103 derivedCorePropsLineFn(void *context,
104 char *fields[][2], int32_t /* fieldCount */,
105 UErrorCode *pErrorCode)
106 {
107 UnicodeTest *me=(UnicodeTest *)context;
108 uint32_t start, end;
109 int32_t i;
110
111 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
112 if(U_FAILURE(*pErrorCode)) {
113 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt field 0 at %s\n", fields[0][0]);
114 return;
115 }
116
117 /* parse derived binary property name, ignore unknown names */
118 i=getTokenIndex(derivedCorePropsNames, LENGTHOF(derivedCorePropsNames), fields[1][0]);
119 if(i<0) {
120 me->errln("UnicodeTest warning: unknown property name '%s' in \n", fields[1][0]);
121 return;
122 }
123
124 me->derivedCoreProps[i].add(start, end);
125 }
126
TestAdditionalProperties()127 void UnicodeTest::TestAdditionalProperties() {
128 // test DerivedCoreProperties.txt
129 if(LENGTHOF(derivedCoreProps)<LENGTHOF(derivedCorePropsNames)) {
130 errln("error: UnicodeTest::derivedCoreProps[] too short, need at least %d UnicodeSets\n",
131 LENGTHOF(derivedCorePropsNames));
132 return;
133 }
134 if(LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)) {
135 errln("error in ucdtest.cpp: LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)\n");
136 return;
137 }
138
139 char newPath[256];
140 char backupPath[256];
141 char *fields[2][2];
142 UErrorCode errorCode=U_ZERO_ERROR;
143
144 /* Look inside ICU_DATA first */
145 strcpy(newPath, pathToDataDirectory());
146 strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");
147
148 // As a fallback, try to guess where the source data was located
149 // at the time ICU was built, and look there.
150 # ifdef U_TOPSRCDIR
151 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
152 # else
153 strcpy(backupPath, loadTestData(errorCode));
154 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
155 # endif
156 strcat(backupPath, U_FILE_SEP_STRING);
157 strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");
158
159 u_parseDelimitedFile(newPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode);
160
161 if(errorCode==U_FILE_ACCESS_ERROR) {
162 errorCode=U_ZERO_ERROR;
163 u_parseDelimitedFile(backupPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode);
164 }
165 if(U_FAILURE(errorCode)) {
166 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
167 return;
168 }
169
170 // now we have all derived core properties in the UnicodeSets
171 // run them all through the API
172 int32_t rangeCount, range;
173 uint32_t i;
174 UChar32 start, end;
175 int32_t noErrors = 0;
176
177 // test all TRUE properties
178 for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
179 rangeCount=derivedCoreProps[i].getRangeCount();
180 for(range=0; range<rangeCount; ++range) {
181 start=derivedCoreProps[i].getRangeStart(range);
182 end=derivedCoreProps[i].getRangeEnd(range);
183 for(; start<=end; ++start) {
184 if(!u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
185 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong\n", start, derivedCorePropsNames[i]);
186 if(noErrors++ > 100) {
187 errln("Too many errors, moving to the next test");
188 break;
189 }
190 }
191 }
192 }
193 }
194
195 noErrors = 0;
196 // invert all properties
197 for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
198 derivedCoreProps[i].complement();
199 }
200
201 // test all FALSE properties
202 for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
203 rangeCount=derivedCoreProps[i].getRangeCount();
204 for(range=0; range<rangeCount; ++range) {
205 start=derivedCoreProps[i].getRangeStart(range);
206 end=derivedCoreProps[i].getRangeEnd(range);
207 for(; start<=end; ++start) {
208 if(u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
209 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedCorePropsNames[i]);
210 if(noErrors++ > 100) {
211 errln("Too many errors, moving to the next test");
212 break;
213 }
214 }
215 }
216 }
217 }
218 }
219
TestBinaryValues()220 void UnicodeTest::TestBinaryValues() {
221 /*
222 * Unicode 5.1 explicitly defines binary property value aliases.
223 * Verify that they are all recognized.
224 */
225 UErrorCode errorCode=U_ZERO_ERROR;
226 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode);
227 if(U_FAILURE(errorCode)) {
228 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCode));
229 return;
230 }
231
232 static const char *const falseValues[]={ "N", "No", "F", "False" };
233 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
234 int32_t i;
235 for(i=0; i<LENGTHOF(falseValues); ++i) {
236 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
237 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_INV));
238 errorCode=U_ZERO_ERROR;
239 UnicodeSet set(pattern, errorCode);
240 if(U_FAILURE(errorCode)) {
241 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i], u_errorName(errorCode));
242 continue;
243 }
244 set.complement();
245 if(set!=alpha) {
246 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alphabetic:])\n", falseValues[i]);
247 }
248 }
249 for(i=0; i<LENGTHOF(trueValues); ++i) {
250 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
251 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_INV));
252 errorCode=U_ZERO_ERROR;
253 UnicodeSet set(pattern, errorCode);
254 if(U_FAILURE(errorCode)) {
255 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i], u_errorName(errorCode));
256 continue;
257 }
258 if(set!=alpha) {
259 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues[i]);
260 }
261 }
262 }
263