• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  testidn.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003-02-06
14 *   created by: Ram Viswanadha
15 *
16 *   This program reads the rfc3454_*.txt files,
17 *   parses them, and extracts the data for Nameprep conformance.
18 *   It then preprocesses it and writes a binary file for efficient use
19 *   in various IDNA conversion processes.
20 */
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
25 
26 #define USPREP_TYPE_NAMES_ARRAY
27 
28 #include "unicode/uchar.h"
29 #include "unicode/putil.h"
30 #include "cmemory.h"
31 #include "cstring.h"
32 #include "unicode/udata.h"
33 #include "unicode/utf16.h"
34 #include "unewdata.h"
35 #include "uoptions.h"
36 #include "uparse.h"
37 #include "utrie.h"
38 #include "umutex.h"
39 #include "sprpimpl.h"
40 #include "testidna.h"
41 #include "punyref.h"
42 #include <stdlib.h>
43 
44 UBool beVerbose=FALSE, haveCopyright=TRUE;
45 
46 /* prototypes --------------------------------------------------------------- */
47 
48 
49 static void
50 parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
51 
52 static void
53 compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
54                UStringPrepType option);
55 
56 static void
57 compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
58 
59 static void
60 testAllCodepoints(TestIDNA& test);
61 
62 static TestIDNA* pTestIDNA =NULL;
63 
64 static const char* fileNames[] = {
65                                     "rfc3491.txt"
66                                  };
67 static const UTrie *idnTrie              = NULL;
68 static const int32_t *indexes            = NULL;
69 static const uint16_t *mappingData       = NULL;
70 /* -------------------------------------------------------------------------- */
71 
72 /* file definitions */
73 #define DATA_TYPE "icu"
74 
75 #define SPREP_DIR "sprep"
76 
77 extern int
testData(TestIDNA & test)78 testData(TestIDNA& test) {
79     char *basename=NULL;
80     UErrorCode errorCode=U_ZERO_ERROR;
81     char *saveBasename =NULL;
82 
83     LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode));
84     if(U_FAILURE(errorCode)){
85         test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
86         return errorCode;
87     }
88 
89     char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
90     //TODO get the srcDir dynamically
91     const char *srcDir=IntlTest::pathToDataDirectory();
92 
93     idnTrie     = &profile->sprepTrie;
94     indexes     = profile->indexes;
95     mappingData = profile->mappingData;
96 
97     //initialize
98     pTestIDNA = &test;
99 
100     /* prepare the filename beginning with the source dir */
101     if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
102         filename[0] = 0x2E;
103         filename[1] = U_FILE_SEP_CHAR;
104         uprv_strcpy(filename+2,srcDir);
105     }else{
106         uprv_strcpy(filename, srcDir);
107     }
108     basename=filename+uprv_strlen(filename);
109     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
110         *basename++=U_FILE_SEP_CHAR;
111     }
112 
113     /* process unassigned */
114     basename=filename+uprv_strlen(filename);
115     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
116         *basename++=U_FILE_SEP_CHAR;
117     }
118 
119     /* first copy misc directory */
120     saveBasename = basename;
121     uprv_strcpy(basename,SPREP_DIR);
122     basename = basename + uprv_strlen(SPREP_DIR);
123     *basename++=U_FILE_SEP_CHAR;
124 
125     /* process unassigned */
126     uprv_strcpy(basename,fileNames[0]);
127     parseMappings(filename,TRUE, test,&errorCode);
128     if(U_FAILURE(errorCode)) {
129         test.errln( "Could not open file %s for reading \n", filename);
130         return errorCode;
131     }
132 
133     testAllCodepoints(test);
134 
135     pTestIDNA = NULL;
136     free(filename);
137     return errorCode;
138 }
139 U_CDECL_BEGIN
140 
141 static void U_CALLCONV
strprepProfileLineFn(void *,char * fields[][2],int32_t fieldCount,UErrorCode * pErrorCode)142 strprepProfileLineFn(void * /*context*/,
143               char *fields[][2], int32_t fieldCount,
144               UErrorCode *pErrorCode) {
145     uint32_t mapping[40];
146     char *end, *map;
147     uint32_t code;
148     int32_t length;
149    /*UBool* mapWithNorm = (UBool*) context;*/
150     const char* typeName;
151     uint32_t rangeStart=0,rangeEnd =0;
152     const char *s;
153 
154     s = u_skipWhitespace(fields[0][0]);
155     if (*s == '@') {
156         /* a special directive introduced in 4.2 */
157         return;
158     }
159 
160     if(fieldCount != 3){
161         *pErrorCode = U_INVALID_FORMAT_ERROR;
162         return;
163     }
164 
165     typeName = fields[2][0];
166     map = fields[1][0];
167 
168     if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
169 
170         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
171 
172         /* store the range */
173         compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
174 
175     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
176 
177         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
178 
179         /* store the range */
180         compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
181 
182     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
183         /* get the character code, field 0 */
184         code=(uint32_t)uprv_strtoul(s, &end, 16);
185 
186         /* parse the mapping string */
187         length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
188 
189         /* store the mapping */
190         compareMapping(code,mapping, length,USPREP_MAP);
191 
192     }else{
193         *pErrorCode = U_INVALID_FORMAT_ERROR;
194     }
195 
196 }
197 
198 U_CDECL_END
199 
200 static void
parseMappings(const char * filename,UBool reportError,TestIDNA & test,UErrorCode * pErrorCode)201 parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
202     char *fields[3][2];
203 
204     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
205         return;
206     }
207 
208     u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
209 
210     //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
211 
212     if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
213         test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
214     }
215 }
216 
217 
218 static inline UStringPrepType
getValues(uint32_t result,int32_t & value,UBool & isIndex)219 getValues(uint32_t result, int32_t& value, UBool& isIndex){
220 
221     UStringPrepType type;
222 
223     if(result == 0){
224         /*
225          * Initial value stored in the mapping table
226          * just return USPREP_TYPE_LIMIT .. so that
227          * the source codepoint is copied to the destination
228          */
229         type = USPREP_TYPE_LIMIT;
230         isIndex =FALSE;
231         value = 0;
232     }else if(result >= _SPREP_TYPE_THRESHOLD){
233         type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
234         isIndex =FALSE;
235         value = 0;
236     }else{
237         /* get the state */
238         type = USPREP_MAP;
239         /* ascertain if the value is index or delta */
240         if(result & 0x02){
241             isIndex = TRUE;
242             value = result  >> 2; //mask off the lower 2 bits and shift
243 
244         }else{
245             isIndex = FALSE;
246             value = (int16_t)result;
247             value =  (value >> 2);
248 
249         }
250         if((result>>2) == _SPREP_MAX_INDEX_VALUE){
251             type = USPREP_DELETE;
252             isIndex =FALSE;
253             value = 0;
254         }
255     }
256     return type;
257 }
258 
259 
260 
261 static void
testAllCodepoints(TestIDNA & test)262 testAllCodepoints(TestIDNA& test){
263     /*
264     {
265         UChar str[19] = {
266                             0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
267                             0x070F,//prohibited
268                             0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
269                         };
270         uint32_t in[19] = {0};
271         UErrorCode status = U_ZERO_ERROR;
272         int32_t inLength=0, outLength=100;
273         char output[100] = {0};
274         punycode_status error;
275         u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
276 
277         error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
278         printf(output);
279 
280     }
281     */
282 
283     uint32_t i = 0;
284     int32_t unassigned      = 0;
285     int32_t prohibited      = 0;
286     int32_t mappedWithNorm  = 0;
287     int32_t mapped          = 0;
288     int32_t noValueInTrie   = 0;
289 
290     UStringPrepType type;
291     int32_t value;
292     UBool isIndex = FALSE;
293 
294     for(i=0;i<=0x10FFFF;i++){
295         uint32_t result = 0;
296         UTRIE_GET16(idnTrie,i, result);
297         type = getValues(result,value, isIndex);
298         if(type != USPREP_TYPE_LIMIT ){
299             if(type == USPREP_UNASSIGNED){
300                 unassigned++;
301             }
302             if(type == USPREP_PROHIBITED){
303                 prohibited++;
304             }
305             if(type == USPREP_MAP){
306                 mapped++;
307             }
308         }else{
309             noValueInTrie++;
310             if(result > 0){
311                 test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
312             }
313         }
314     }
315 
316     test.logln("Number of Unassinged code points : %i \n",unassigned);
317     test.logln("Number of Prohibited code points : %i \n",prohibited);
318     test.logln("Number of Mapped code points : %i \n",mapped);
319     test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
320     test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
321 
322 
323 }
324 
325 static void
compareMapping(uint32_t codepoint,uint32_t * mapping,int32_t mapLength,UStringPrepType type)326 compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
327                UStringPrepType type){
328     uint32_t result = 0;
329     UTRIE_GET16(idnTrie,codepoint, result);
330 
331     int32_t length=0;
332     UBool isIndex;
333     UStringPrepType retType;
334     int32_t value, index=0, delta=0;
335 
336     retType = getValues(result,value,isIndex);
337 
338 
339     if(type != retType && retType != USPREP_DELETE){
340 
341         pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
342 
343     }
344 
345     if(isIndex){
346         index = value;
347         if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
348                  index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
349             length = 1;
350         }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
351                  index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
352             length = 2;
353         }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
354                  index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
355             length = 3;
356         }else{
357             length = mappingData[index++];
358         }
359     }else{
360         delta = value;
361         length = (retType == USPREP_DELETE)? 0 :  1;
362     }
363 
364     int32_t realLength =0;
365     /* figure out the real length */
366     for(int32_t j=0; j<mapLength; j++){
367         if(mapping[j] > 0xFFFF){
368             realLength +=2;
369         }else{
370             realLength++;
371         }
372     }
373 
374     if(realLength != length){
375         pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
376     }
377 
378     if(isIndex){
379         for(int8_t i =0; i< mapLength; i++){
380             if(mapping[i] <= 0xFFFF){
381                 if(mappingData[index+i] != (uint16_t)mapping[i]){
382                     pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
383                 }
384             }else{
385                 UChar lead  = U16_LEAD(mapping[i]);
386                 UChar trail = U16_TRAIL(mapping[i]);
387                 if(mappingData[index+i] != lead ||
388                     mappingData[index+i+1] != trail){
389                     pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
390                 }
391             }
392         }
393     }else{
394         if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
395             pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
396         }
397     }
398 
399 }
400 
401 static void
compareFlagsForRange(uint32_t start,uint32_t end,UStringPrepType type)402 compareFlagsForRange(uint32_t start, uint32_t end,
403                      UStringPrepType type){
404 
405     uint32_t result =0 ;
406     UStringPrepType retType;
407     UBool isIndex=FALSE;
408     int32_t value=0;
409 /*
410     // supplementary code point
411     UChar __lead16=U16_LEAD(0x2323E);
412     int32_t __offset;
413 
414     // get data for lead surrogate
415     (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
416     __offset=(&idnTrie)->getFoldingOffset(result);
417 
418     // get the real data from the folded lead/trail units
419     if(__offset>0) {
420         (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
421     } else {
422         (result)=(uint32_t)((&idnTrie)->initialValue);
423     }
424 
425     UTRIE_GET16(&idnTrie,0x2323E, result);
426 */
427     while(start < end+1){
428         UTRIE_GET16(idnTrie,start, result);
429         retType = getValues(result,value,isIndex);
430         if(result > _SPREP_TYPE_THRESHOLD){
431             if(retType != type){
432                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
433             }
434         }else{
435             if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
436                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
437             }
438         }
439 
440         start++;
441     }
442 
443 }
444 
445 
446 #endif /* #if !UCONFIG_NO_IDNA */
447 
448 /*
449  * Hey, Emacs, please set the following:
450  *
451  * Local Variables:
452  * indent-tabs-mode: nil
453  * End:
454  *
455  */
456