• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2013, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  testidn.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003-02-06
16 *   created by: Ram Viswanadha
17 *
18 *   This program reads the rfc3454_*.txt files,
19 *   parses them, and extracts the data for Nameprep conformance.
20 *   It then preprocesses it and writes a binary file for efficient use
21 *   in various IDNA conversion processes.
22 */
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
27 
28 #define USPREP_TYPE_NAMES_ARRAY
29 
30 #include "unicode/uchar.h"
31 #include "unicode/putil.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "unicode/udata.h"
35 #include "unicode/utf16.h"
36 #include "unewdata.h"
37 #include "uoptions.h"
38 #include "uparse.h"
39 #include "utrie.h"
40 #include "umutex.h"
41 #include "sprpimpl.h"
42 #include "testidna.h"
43 #include "punyref.h"
44 #include <stdlib.h>
45 
46 UBool beVerbose=FALSE, haveCopyright=TRUE;
47 
48 /* prototypes --------------------------------------------------------------- */
49 
50 
51 static void
52 parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
53 
54 static void
55 compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
56                UStringPrepType option);
57 
58 static void
59 compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
60 
61 static void
62 testAllCodepoints(TestIDNA& test);
63 
64 static TestIDNA* pTestIDNA =NULL;
65 
66 static const char* fileNames[] = {
67                                     "rfc3491.txt"
68                                  };
69 static const UTrie *idnTrie              = NULL;
70 static const int32_t *indexes            = NULL;
71 static const uint16_t *mappingData       = NULL;
72 /* -------------------------------------------------------------------------- */
73 
74 /* file definitions */
75 #define DATA_TYPE "icu"
76 
77 #define SPREP_DIR "sprep"
78 
79 extern int
testData(TestIDNA & test)80 testData(TestIDNA& test) {
81     char *basename=NULL;
82     UErrorCode errorCode=U_ZERO_ERROR;
83     char *saveBasename =NULL;
84 
85     LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode));
86     if(U_FAILURE(errorCode)){
87         test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
88         return errorCode;
89     }
90 
91     char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
92     //TODO get the srcDir dynamically
93     const char *srcDir=IntlTest::pathToDataDirectory();
94 
95     idnTrie     = &profile->sprepTrie;
96     indexes     = profile->indexes;
97     mappingData = profile->mappingData;
98 
99     //initialize
100     pTestIDNA = &test;
101 
102     /* prepare the filename beginning with the source dir */
103     if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
104         filename[0] = 0x2E;
105         filename[1] = U_FILE_SEP_CHAR;
106         uprv_strcpy(filename+2,srcDir);
107     }else{
108         uprv_strcpy(filename, srcDir);
109     }
110     basename=filename+uprv_strlen(filename);
111     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
112         *basename++=U_FILE_SEP_CHAR;
113     }
114 
115     /* process unassigned */
116     basename=filename+uprv_strlen(filename);
117     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
118         *basename++=U_FILE_SEP_CHAR;
119     }
120 
121     /* first copy misc directory */
122     saveBasename = basename;
123     (void)saveBasename;    // Suppress set but not used warning.
124     uprv_strcpy(basename,SPREP_DIR);
125     basename = basename + uprv_strlen(SPREP_DIR);
126     *basename++=U_FILE_SEP_CHAR;
127 
128     /* process unassigned */
129     uprv_strcpy(basename,fileNames[0]);
130     parseMappings(filename,TRUE, test,&errorCode);
131     if(U_FAILURE(errorCode)) {
132         test.errln( "Could not open file %s for reading \n", filename);
133         return errorCode;
134     }
135 
136     testAllCodepoints(test);
137 
138     pTestIDNA = NULL;
139     free(filename);
140     return errorCode;
141 }
142 U_CDECL_BEGIN
143 
144 static void U_CALLCONV
strprepProfileLineFn(void *,char * fields[][2],int32_t fieldCount,UErrorCode * pErrorCode)145 strprepProfileLineFn(void * /*context*/,
146               char *fields[][2], int32_t fieldCount,
147               UErrorCode *pErrorCode) {
148     uint32_t mapping[40];
149     char *end, *map;
150     uint32_t code;
151     int32_t length;
152    /*UBool* mapWithNorm = (UBool*) context;*/
153     const char* typeName;
154     uint32_t rangeStart=0,rangeEnd =0;
155     const char *s;
156 
157     s = u_skipWhitespace(fields[0][0]);
158     if (*s == '@') {
159         /* a special directive introduced in 4.2 */
160         return;
161     }
162 
163     if(fieldCount != 3){
164         *pErrorCode = U_INVALID_FORMAT_ERROR;
165         return;
166     }
167 
168     typeName = fields[2][0];
169     map = fields[1][0];
170 
171     if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
172 
173         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
174 
175         /* store the range */
176         compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
177 
178     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
179 
180         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
181 
182         /* store the range */
183         compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
184 
185     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
186         /* get the character code, field 0 */
187         code=(uint32_t)uprv_strtoul(s, &end, 16);
188 
189         /* parse the mapping string */
190         length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
191 
192         /* store the mapping */
193         compareMapping(code,mapping, length,USPREP_MAP);
194 
195     }else{
196         *pErrorCode = U_INVALID_FORMAT_ERROR;
197     }
198 
199 }
200 
201 U_CDECL_END
202 
203 static void
parseMappings(const char * filename,UBool reportError,TestIDNA & test,UErrorCode * pErrorCode)204 parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
205     char *fields[3][2];
206 
207     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
208         return;
209     }
210 
211     u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
212 
213     //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
214 
215     if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
216         test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
217     }
218 }
219 
220 
221 static inline UStringPrepType
getValues(uint32_t result,int32_t & value,UBool & isIndex)222 getValues(uint32_t result, int32_t& value, UBool& isIndex){
223 
224     UStringPrepType type;
225 
226     if(result == 0){
227         /*
228          * Initial value stored in the mapping table
229          * just return USPREP_TYPE_LIMIT .. so that
230          * the source codepoint is copied to the destination
231          */
232         type = USPREP_TYPE_LIMIT;
233         isIndex =FALSE;
234         value = 0;
235     }else if(result >= _SPREP_TYPE_THRESHOLD){
236         type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
237         isIndex =FALSE;
238         value = 0;
239     }else{
240         /* get the state */
241         type = USPREP_MAP;
242         /* ascertain if the value is index or delta */
243         if(result & 0x02){
244             isIndex = TRUE;
245             value = result  >> 2; //mask off the lower 2 bits and shift
246 
247         }else{
248             isIndex = FALSE;
249             value = (int16_t)result;
250             value =  (value >> 2);
251 
252         }
253         if((result>>2) == _SPREP_MAX_INDEX_VALUE){
254             type = USPREP_DELETE;
255             isIndex =FALSE;
256             value = 0;
257         }
258     }
259     return type;
260 }
261 
262 
263 
264 static void
testAllCodepoints(TestIDNA & test)265 testAllCodepoints(TestIDNA& test){
266     /*
267     {
268         UChar str[19] = {
269                             0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
270                             0x070F,//prohibited
271                             0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
272                         };
273         uint32_t in[19] = {0};
274         UErrorCode status = U_ZERO_ERROR;
275         int32_t inLength=0, outLength=100;
276         char output[100] = {0};
277         punycode_status error;
278         u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
279 
280         error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
281         printf(output);
282 
283     }
284     */
285 
286     uint32_t i = 0;
287     int32_t unassigned      = 0;
288     int32_t prohibited      = 0;
289     int32_t mappedWithNorm  = 0;
290     int32_t mapped          = 0;
291     int32_t noValueInTrie   = 0;
292 
293     UStringPrepType type;
294     int32_t value;
295     UBool isIndex = FALSE;
296 
297     for(i=0;i<=0x10FFFF;i++){
298         uint32_t result = 0;
299         UTRIE_GET16(idnTrie,i, result);
300         type = getValues(result,value, isIndex);
301         if(type != USPREP_TYPE_LIMIT ){
302             if(type == USPREP_UNASSIGNED){
303                 unassigned++;
304             }
305             if(type == USPREP_PROHIBITED){
306                 prohibited++;
307             }
308             if(type == USPREP_MAP){
309                 mapped++;
310             }
311         }else{
312             noValueInTrie++;
313             if(result > 0){
314                 test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
315             }
316         }
317     }
318 
319     test.logln("Number of Unassinged code points : %i \n",unassigned);
320     test.logln("Number of Prohibited code points : %i \n",prohibited);
321     test.logln("Number of Mapped code points : %i \n",mapped);
322     test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
323     test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
324 
325 
326 }
327 
328 static void
compareMapping(uint32_t codepoint,uint32_t * mapping,int32_t mapLength,UStringPrepType type)329 compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
330                UStringPrepType type){
331     uint32_t result = 0;
332     UTRIE_GET16(idnTrie,codepoint, result);
333 
334     int32_t length=0;
335     UBool isIndex;
336     UStringPrepType retType;
337     int32_t value, index=0, delta=0;
338 
339     retType = getValues(result,value,isIndex);
340 
341 
342     if(type != retType && retType != USPREP_DELETE){
343 
344         pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
345 
346     }
347 
348     if(isIndex){
349         index = value;
350         if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
351                  index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
352             length = 1;
353         }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
354                  index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
355             length = 2;
356         }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
357                  index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
358             length = 3;
359         }else{
360             length = mappingData[index++];
361         }
362     }else{
363         delta = value;
364         length = (retType == USPREP_DELETE)? 0 :  1;
365     }
366 
367     int32_t realLength =0;
368     /* figure out the real length */
369     for(int32_t j=0; j<mapLength; j++){
370         if(mapping[j] > 0xFFFF){
371             realLength +=2;
372         }else{
373             realLength++;
374         }
375     }
376 
377     if(realLength != length){
378         pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
379     }
380 
381     if(isIndex){
382         for(int8_t i =0; i< mapLength; i++){
383             if(mapping[i] <= 0xFFFF){
384                 if(mappingData[index+i] != (uint16_t)mapping[i]){
385                     pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
386                 }
387             }else{
388                 UChar lead  = U16_LEAD(mapping[i]);
389                 UChar trail = U16_TRAIL(mapping[i]);
390                 if(mappingData[index+i] != lead ||
391                     mappingData[index+i+1] != trail){
392                     pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
393                 }
394             }
395         }
396     }else{
397         if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
398             pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
399         }
400     }
401 
402 }
403 
404 static void
compareFlagsForRange(uint32_t start,uint32_t end,UStringPrepType type)405 compareFlagsForRange(uint32_t start, uint32_t end,
406                      UStringPrepType type){
407 
408     uint32_t result =0 ;
409     UStringPrepType retType;
410     UBool isIndex=FALSE;
411     int32_t value=0;
412 /*
413     // supplementary code point
414     UChar __lead16=U16_LEAD(0x2323E);
415     int32_t __offset;
416 
417     // get data for lead surrogate
418     (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
419     __offset=(&idnTrie)->getFoldingOffset(result);
420 
421     // get the real data from the folded lead/trail units
422     if(__offset>0) {
423         (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
424     } else {
425         (result)=(uint32_t)((&idnTrie)->initialValue);
426     }
427 
428     UTRIE_GET16(&idnTrie,0x2323E, result);
429 */
430     while(start < end+1){
431         UTRIE_GET16(idnTrie,start, result);
432         retType = getValues(result,value,isIndex);
433         if(result > _SPREP_TYPE_THRESHOLD){
434             if(retType != type){
435                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
436             }
437         }else{
438             if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
439                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
440             }
441         }
442 
443         start++;
444     }
445 
446 }
447 
448 
449 #endif /* #if !UCONFIG_NO_IDNA */
450 
451 /*
452  * Hey, Emacs, please set the following:
453  *
454  * Local Variables:
455  * indent-tabs-mode: nil
456  * End:
457  *
458  */
459