• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2009, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 #include "unicode/ustring.h"
8 #include "unicode/uchar.h"
9 #include "unicode/uniset.h"
10 #include "unicode/putil.h"
11 #include "cstring.h"
12 #include "uparse.h"
13 #include "ucdtest.h"
14 
15 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
16 
UnicodeTest()17 UnicodeTest::UnicodeTest()
18 {
19 }
20 
~UnicodeTest()21 UnicodeTest::~UnicodeTest()
22 {
23 }
24 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)25 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
26 {
27     if (exec) logln("TestSuite UnicodeTest: ");
28     switch (index) {
29         case 0: name = "TestAdditionalProperties"; if(exec) TestAdditionalProperties(); break;
30         case 1: name = "TestBinaryValues"; if(exec) TestBinaryValues(); break;
31         default: name = ""; break; //needed to end loop
32     }
33 }
34 
35 //====================================================
36 // private data used by the tests
37 //====================================================
38 
39 // test DerivedCoreProperties.txt -------------------------------------------
40 
41 // copied from genprops.c
42 static int32_t
getTokenIndex(const char * const tokens[],int32_t countTokens,const char * s)43 getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) {
44     const char *t, *z;
45     int32_t i, j;
46 
47     s=u_skipWhitespace(s);
48     for(i=0; i<countTokens; ++i) {
49         t=tokens[i];
50         if(t!=NULL) {
51             for(j=0;; ++j) {
52                 if(t[j]!=0) {
53                     if(s[j]!=t[j]) {
54                         break;
55                     }
56                 } else {
57                     z=u_skipWhitespace(s+j);
58                     if(*z==';' || *z==0) {
59                         return i;
60                     } else {
61                         break;
62                     }
63                 }
64             }
65         }
66     }
67     return -1;
68 }
69 
70 static const char *const
71 derivedCorePropsNames[]={
72     "Math",
73     "Alphabetic",
74     "Lowercase",
75     "Uppercase",
76     "ID_Start",
77     "ID_Continue",
78     "XID_Start",
79     "XID_Continue",
80     "Default_Ignorable_Code_Point",
81     "Grapheme_Extend",
82     "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */
83     "Grapheme_Base"
84 };
85 
86 static const UProperty
87 derivedCorePropsIndex[]={
88     UCHAR_MATH,
89     UCHAR_ALPHABETIC,
90     UCHAR_LOWERCASE,
91     UCHAR_UPPERCASE,
92     UCHAR_ID_START,
93     UCHAR_ID_CONTINUE,
94     UCHAR_XID_START,
95     UCHAR_XID_CONTINUE,
96     UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
97     UCHAR_GRAPHEME_EXTEND,
98     UCHAR_GRAPHEME_LINK,
99     UCHAR_GRAPHEME_BASE
100 };
101 
102 U_CFUNC void U_CALLCONV
derivedCorePropsLineFn(void * context,char * fields[][2],int32_t,UErrorCode * pErrorCode)103 derivedCorePropsLineFn(void *context,
104                         char *fields[][2], int32_t /* fieldCount */,
105                         UErrorCode *pErrorCode)
106 {
107     UnicodeTest *me=(UnicodeTest *)context;
108     uint32_t start, end;
109     int32_t i;
110 
111     u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
112     if(U_FAILURE(*pErrorCode)) {
113         me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt field 0 at %s\n", fields[0][0]);
114         return;
115     }
116 
117     /* parse derived binary property name, ignore unknown names */
118     i=getTokenIndex(derivedCorePropsNames, LENGTHOF(derivedCorePropsNames), fields[1][0]);
119     if(i<0) {
120         me->errln("UnicodeTest warning: unknown property name '%s' in \n", fields[1][0]);
121         return;
122     }
123 
124     me->derivedCoreProps[i].add(start, end);
125 }
126 
TestAdditionalProperties()127 void UnicodeTest::TestAdditionalProperties() {
128     // test DerivedCoreProperties.txt
129     if(LENGTHOF(derivedCoreProps)<LENGTHOF(derivedCorePropsNames)) {
130         errln("error: UnicodeTest::derivedCoreProps[] too short, need at least %d UnicodeSets\n",
131               LENGTHOF(derivedCorePropsNames));
132         return;
133     }
134     if(LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)) {
135         errln("error in ucdtest.cpp: LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)\n");
136         return;
137     }
138 
139     char newPath[256];
140     char backupPath[256];
141     char *fields[2][2];
142     UErrorCode errorCode=U_ZERO_ERROR;
143 
144     /* Look inside ICU_DATA first */
145     strcpy(newPath, pathToDataDirectory());
146     strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");
147 
148     // As a fallback, try to guess where the source data was located
149     // at the time ICU was built, and look there.
150 #   ifdef U_TOPSRCDIR
151         strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
152 #   else
153         strcpy(backupPath, loadTestData(errorCode));
154         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
155 #   endif
156     strcat(backupPath, U_FILE_SEP_STRING);
157     strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");
158 
159     u_parseDelimitedFile(newPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode);
160 
161     if(errorCode==U_FILE_ACCESS_ERROR) {
162         errorCode=U_ZERO_ERROR;
163         u_parseDelimitedFile(backupPath, ';', fields, 2, derivedCorePropsLineFn, this, &errorCode);
164     }
165     if(U_FAILURE(errorCode)) {
166         errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
167         return;
168     }
169 
170     // now we have all derived core properties in the UnicodeSets
171     // run them all through the API
172     int32_t rangeCount, range;
173     uint32_t i;
174     UChar32 start, end;
175     int32_t noErrors = 0;
176 
177     // test all TRUE properties
178     for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
179         rangeCount=derivedCoreProps[i].getRangeCount();
180         for(range=0; range<rangeCount; ++range) {
181             start=derivedCoreProps[i].getRangeStart(range);
182             end=derivedCoreProps[i].getRangeEnd(range);
183             for(; start<=end; ++start) {
184                 if(!u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
185                     errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong\n", start, derivedCorePropsNames[i]);
186                     if(noErrors++ > 100) {
187                       errln("Too many errors, moving to the next test");
188                       break;
189                     }
190                 }
191             }
192         }
193     }
194 
195     noErrors = 0;
196     // invert all properties
197     for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
198         derivedCoreProps[i].complement();
199     }
200 
201     // test all FALSE properties
202     for(i=0; i<LENGTHOF(derivedCorePropsNames); ++i) {
203         rangeCount=derivedCoreProps[i].getRangeCount();
204         for(range=0; range<rangeCount; ++range) {
205             start=derivedCoreProps[i].getRangeStart(range);
206             end=derivedCoreProps[i].getRangeEnd(range);
207             for(; start<=end; ++start) {
208                 if(u_hasBinaryProperty(start, derivedCorePropsIndex[i])) {
209                     errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedCorePropsNames[i]);
210                     if(noErrors++ > 100) {
211                       errln("Too many errors, moving to the next test");
212                       break;
213                     }
214                 }
215             }
216         }
217     }
218 }
219 
TestBinaryValues()220 void UnicodeTest::TestBinaryValues() {
221     /*
222      * Unicode 5.1 explicitly defines binary property value aliases.
223      * Verify that they are all recognized.
224      */
225     UErrorCode errorCode=U_ZERO_ERROR;
226     UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode);
227     if(U_FAILURE(errorCode)) {
228         dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCode));
229         return;
230     }
231 
232     static const char *const falseValues[]={ "N", "No", "F", "False" };
233     static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
234     int32_t i;
235     for(i=0; i<LENGTHOF(falseValues); ++i) {
236         UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
237         pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_INV));
238         errorCode=U_ZERO_ERROR;
239         UnicodeSet set(pattern, errorCode);
240         if(U_FAILURE(errorCode)) {
241             errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i], u_errorName(errorCode));
242             continue;
243         }
244         set.complement();
245         if(set!=alpha) {
246             errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alphabetic:])\n", falseValues[i]);
247         }
248     }
249     for(i=0; i<LENGTHOF(trueValues); ++i) {
250         UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
251         pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_INV));
252         errorCode=U_ZERO_ERROR;
253         UnicodeSet set(pattern, errorCode);
254         if(U_FAILURE(errorCode)) {
255             errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i], u_errorName(errorCode));
256             continue;
257         }
258         if(set!=alpha) {
259             errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues[i]);
260         }
261     }
262 }
263