• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2009, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  usprep.cpp
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003jul2
14  *   created by: Ram Viswanadha
15  */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_IDNA
20 
21 #include "unicode/usprep.h"
22 
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "unormimp.h"
36 #include "ubidi_props.h"
37 
38 U_CDECL_BEGIN
39 
40 /*
41 Static cache for already opened StringPrep profiles
42 */
43 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
44 
45 static UMTX usprepMutex = NULL;
46 
47 /* format version of spp file */
48 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
49 
50 /* the Unicode version of the sprep data */
51 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
52 
53 /* Profile names must be aligned to UStringPrepProfileType */
54 static const char *PROFILE_NAMES[] = {
55     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
56     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
57     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
58     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
59     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
60     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
61     "rfc3722",      /* USPREP_RFC3722_ISCSI */
62     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
63     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
64     "rfc4011",      /* USPREP_RFC4011_MIB */
65     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
66     "rfc4505",      /* USPREP_RFC4505_TRACE */
67     "rfc4518",      /* USPREP_RFC4518_LDAP */
68     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
69 };
70 
71 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)72 isSPrepAcceptable(void * /* context */,
73              const char * /* type */,
74              const char * /* name */,
75              const UDataInfo *pInfo) {
76     if(
77         pInfo->size>=20 &&
78         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
79         pInfo->charsetFamily==U_CHARSET_FAMILY &&
80         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
81         pInfo->dataFormat[1]==0x50 &&
82         pInfo->dataFormat[2]==0x52 &&
83         pInfo->dataFormat[3]==0x50 &&
84         pInfo->formatVersion[0]==3 &&
85         pInfo->formatVersion[2]==UTRIE_SHIFT &&
86         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
87     ) {
88         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
89         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
90         return TRUE;
91     } else {
92         return FALSE;
93     }
94 }
95 
96 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)97 getSPrepFoldingOffset(uint32_t data) {
98 
99     return (int32_t)data;
100 
101 }
102 
103 /* hashes an entry  */
104 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)105 hashEntry(const UHashTok parm) {
106     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
107     UHashTok namekey, pathkey;
108     namekey.pointer = b->name;
109     pathkey.pointer = b->path;
110     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
111 }
112 
113 /* compares two entries */
114 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)115 compareEntries(const UHashTok p1, const UHashTok p2) {
116     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
117     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
118     UHashTok name1, name2, path1, path2;
119     name1.pointer = b1->name;
120     name2.pointer = b2->name;
121     path1.pointer = b1->path;
122     path2.pointer = b2->path;
123     return ((UBool)(uhash_compareChars(name1, name2) &
124         uhash_compareChars(path1, path2)));
125 }
126 
127 static void
usprep_unload(UStringPrepProfile * data)128 usprep_unload(UStringPrepProfile* data){
129     udata_close(data->sprepData);
130 }
131 
132 static int32_t
usprep_internal_flushCache(UBool noRefCount)133 usprep_internal_flushCache(UBool noRefCount){
134     UStringPrepProfile *profile = NULL;
135     UStringPrepKey  *key  = NULL;
136     int32_t pos = -1;
137     int32_t deletedNum = 0;
138     const UHashElement *e;
139 
140     /*
141      * if shared data hasn't even been lazy evaluated yet
142      * return 0
143      */
144     umtx_lock(&usprepMutex);
145     if (SHARED_DATA_HASHTABLE == NULL) {
146         umtx_unlock(&usprepMutex);
147         return 0;
148     }
149 
150     /*creates an enumeration to iterate through every element in the table */
151     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
152     {
153         profile = (UStringPrepProfile *) e->value.pointer;
154         key  = (UStringPrepKey *) e->key.pointer;
155 
156         if ((noRefCount== FALSE && profile->refCount == 0) ||
157              noRefCount== TRUE) {
158             deletedNum++;
159             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
160 
161             /* unload the data */
162             usprep_unload(profile);
163 
164             if(key->name != NULL) {
165                 uprv_free(key->name);
166                 key->name=NULL;
167             }
168             if(key->path != NULL) {
169                 uprv_free(key->path);
170                 key->path=NULL;
171             }
172             uprv_free(profile);
173             uprv_free(key);
174         }
175 
176     }
177     umtx_unlock(&usprepMutex);
178 
179     return deletedNum;
180 }
181 
182 /* Works just like ucnv_flushCache()
183 static int32_t
184 usprep_flushCache(){
185     return usprep_internal_flushCache(FALSE);
186 }
187 */
188 
usprep_cleanup(void)189 static UBool U_CALLCONV usprep_cleanup(void){
190     if (SHARED_DATA_HASHTABLE != NULL) {
191         usprep_internal_flushCache(TRUE);
192         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
193             uhash_close(SHARED_DATA_HASHTABLE);
194             SHARED_DATA_HASHTABLE = NULL;
195         }
196     }
197 
198     umtx_destroy(&usprepMutex);             /* Don't worry about destroying the mutex even  */
199                                             /*  if the hash table still exists.  The mutex  */
200                                             /*  will lazily re-init  itself if needed.      */
201     return (SHARED_DATA_HASHTABLE == NULL);
202 }
203 U_CDECL_END
204 
205 static void
usprep_init()206 usprep_init() {
207     umtx_init(&usprepMutex);
208 }
209 
210 /** Initializes the cache for resources */
211 static void
initCache(UErrorCode * status)212 initCache(UErrorCode *status) {
213     UBool makeCache;
214     UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE ==  NULL), makeCache);
215     if(makeCache) {
216         UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
217         if (U_SUCCESS(*status)) {
218             umtx_lock(&usprepMutex);
219             if(SHARED_DATA_HASHTABLE == NULL) {
220                 SHARED_DATA_HASHTABLE = newCache;
221                 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
222                 newCache = NULL;
223             }
224             umtx_unlock(&usprepMutex);
225         }
226         if(newCache != NULL) {
227             uhash_close(newCache);
228         }
229     }
230 }
231 
232 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)233 loadData(UStringPrepProfile* profile,
234          const char* path,
235          const char* name,
236          const char* type,
237          UErrorCode* errorCode) {
238     /* load Unicode SPREP data from file */
239     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
240     UDataMemory *dataMemory;
241     const int32_t *p=NULL;
242     const uint8_t *pb;
243     UVersionInfo normUnicodeVersion;
244     int32_t normUniVer, sprepUniVer, normCorrVer;
245 
246     if(errorCode==NULL || U_FAILURE(*errorCode)) {
247         return 0;
248     }
249 
250     /* open the data outside the mutex block */
251     //TODO: change the path
252     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
253     if(U_FAILURE(*errorCode)) {
254         return FALSE;
255     }
256 
257     p=(const int32_t *)udata_getMemory(dataMemory);
258     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
259     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
260     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
261 
262 
263     if(U_FAILURE(*errorCode)) {
264         udata_close(dataMemory);
265         return FALSE;
266     }
267 
268     /* in the mutex block, set the data for this process */
269     umtx_lock(&usprepMutex);
270     if(profile->sprepData==NULL) {
271         profile->sprepData=dataMemory;
272         dataMemory=NULL;
273         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
274         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
275     } else {
276         p=(const int32_t *)udata_getMemory(profile->sprepData);
277     }
278     umtx_unlock(&usprepMutex);
279     /* initialize some variables */
280     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
281 
282     unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
283     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
284                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
285     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
286                   (dataVersion[2] << 8 ) + (dataVersion[3]);
287     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
288 
289     if(U_FAILURE(*errorCode)){
290         udata_close(dataMemory);
291         return FALSE;
292     }
293     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
294         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
295         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
296       ){
297         *errorCode = U_INVALID_FORMAT_ERROR;
298         udata_close(dataMemory);
299         return FALSE;
300     }
301     profile->isDataLoaded = TRUE;
302 
303     /* if a different thread set it first, then close the extra data */
304     if(dataMemory!=NULL) {
305         udata_close(dataMemory); /* NULL if it was set correctly */
306     }
307 
308 
309     return profile->isDataLoaded;
310 }
311 
312 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)313 usprep_getProfile(const char* path,
314                   const char* name,
315                   UErrorCode *status){
316 
317     UStringPrepProfile* profile = NULL;
318 
319     initCache(status);
320 
321     if(U_FAILURE(*status)){
322         return NULL;
323     }
324 
325     UStringPrepKey stackKey;
326     /*
327      * const is cast way to save malloc, strcpy and free calls
328      * we use the passed in pointers for fetching the data from the
329      * hash table which is safe
330      */
331     stackKey.name = (char*) name;
332     stackKey.path = (char*) path;
333 
334     /* fetch the data from the cache */
335     umtx_lock(&usprepMutex);
336     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
337     umtx_unlock(&usprepMutex);
338 
339     if(profile == NULL){
340         UStringPrepKey* key   = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
341         if(key == NULL){
342             *status = U_MEMORY_ALLOCATION_ERROR;
343             return NULL;
344         }
345         /* else load the data and put the data in the cache */
346         profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
347         if(profile == NULL){
348             *status = U_MEMORY_ALLOCATION_ERROR;
349             uprv_free(key);
350             return NULL;
351         }
352 
353         /* initialize the data struct members */
354         uprv_memset(profile->indexes,0,sizeof(profile->indexes));
355         profile->mappingData = NULL;
356         profile->sprepData   = NULL;
357         profile->refCount    = 0;
358 
359         /* initialize the  key memebers */
360         key->name  = (char*) uprv_malloc(uprv_strlen(name)+1);
361         if(key->name == NULL){
362             *status = U_MEMORY_ALLOCATION_ERROR;
363             uprv_free(key);
364             uprv_free(profile);
365             return NULL;
366         }
367 
368         uprv_strcpy(key->name, name);
369 
370         key->path=NULL;
371 
372         if(path != NULL){
373             key->path      = (char*) uprv_malloc(uprv_strlen(path)+1);
374             if(key->path == NULL){
375                 *status = U_MEMORY_ALLOCATION_ERROR;
376                 uprv_free(key->name);
377                 uprv_free(key);
378                 uprv_free(profile);
379                 return NULL;
380             }
381             uprv_strcpy(key->path, path);
382         }
383 
384         /* load the data */
385         if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
386             uprv_free(key->path);
387             uprv_free(key->name);
388             uprv_free(key);
389             uprv_free(profile);
390             return NULL;
391         }
392 
393         /* get the options */
394         profile->doNFKC            = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
395         profile->checkBiDi         = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
396 
397         if(profile->checkBiDi) {
398             profile->bdp = ubidi_getSingleton(status);
399             if(U_FAILURE(*status)) {
400                 usprep_unload(profile);
401                 uprv_free(key->path);
402                 uprv_free(key->name);
403                 uprv_free(key);
404                 uprv_free(profile);
405                 return NULL;
406             }
407         } else {
408             profile->bdp = NULL;
409         }
410 
411         umtx_lock(&usprepMutex);
412         /* add the data object to the cache */
413         uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
414         umtx_unlock(&usprepMutex);
415     }
416     umtx_lock(&usprepMutex);
417     /* increment the refcount */
418     profile->refCount++;
419     umtx_unlock(&usprepMutex);
420 
421     return profile;
422 }
423 
424 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)425 usprep_open(const char* path,
426             const char* name,
427             UErrorCode* status){
428 
429     if(status == NULL || U_FAILURE(*status)){
430         return NULL;
431     }
432     /* initialize the mutex */
433     usprep_init();
434 
435     /* initialize the profile struct members */
436     return usprep_getProfile(path,name,status);
437 }
438 
439 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)440 usprep_openByType(UStringPrepProfileType type,
441 				  UErrorCode* status) {
442     if(status == NULL || U_FAILURE(*status)){
443         return NULL;
444     }
445     int32_t index = (int32_t)type;
446     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
447         *status = U_ILLEGAL_ARGUMENT_ERROR;
448         return NULL;
449     }
450     return usprep_open(NULL, PROFILE_NAMES[index], status);
451 }
452 
453 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)454 usprep_close(UStringPrepProfile* profile){
455     if(profile==NULL){
456         return;
457     }
458 
459     umtx_lock(&usprepMutex);
460     /* decrement the ref count*/
461     if(profile->refCount > 0){
462         profile->refCount--;
463     }
464     umtx_unlock(&usprepMutex);
465 
466 }
467 
468 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)469 uprv_syntaxError(const UChar* rules,
470                  int32_t pos,
471                  int32_t rulesLen,
472                  UParseError* parseError){
473     if(parseError == NULL){
474         return;
475     }
476     parseError->offset = pos;
477     parseError->line = 0 ; // we are not using line numbers
478 
479     // for pre-context
480     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
481     int32_t limit = pos;
482 
483     u_memcpy(parseError->preContext,rules+start,limit-start);
484     //null terminate the buffer
485     parseError->preContext[limit-start] = 0;
486 
487     // for post-context; include error rules[pos]
488     start = pos;
489     limit = start + (U_PARSE_CONTEXT_LEN-1);
490     if (limit > rulesLen) {
491         limit = rulesLen;
492     }
493     if (start < rulesLen) {
494         u_memcpy(parseError->postContext,rules+start,limit-start);
495     }
496     //null terminate the buffer
497     parseError->postContext[limit-start]= 0;
498 }
499 
500 
501 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)502 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
503 
504     UStringPrepType type;
505     if(trieWord == 0){
506         /*
507          * Initial value stored in the mapping table
508          * just return USPREP_TYPE_LIMIT .. so that
509          * the source codepoint is copied to the destination
510          */
511         type = USPREP_TYPE_LIMIT;
512         isIndex =FALSE;
513         value = 0;
514     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
515         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
516         isIndex =FALSE;
517         value = 0;
518     }else{
519         /* get the type */
520         type = USPREP_MAP;
521         /* ascertain if the value is index or delta */
522         if(trieWord & 0x02){
523             isIndex = TRUE;
524             value = trieWord  >> 2; //mask off the lower 2 bits and shift
525         }else{
526             isIndex = FALSE;
527             value = (int16_t)trieWord;
528             value =  (value >> 2);
529         }
530 
531         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
532             type = USPREP_DELETE;
533             isIndex =FALSE;
534             value = 0;
535         }
536     }
537     return type;
538 }
539 
540 
541 
542 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)543 usprep_map(  const UStringPrepProfile* profile,
544              const UChar* src, int32_t srcLength,
545              UChar* dest, int32_t destCapacity,
546              int32_t options,
547              UParseError* parseError,
548              UErrorCode* status ){
549 
550     uint16_t result;
551     int32_t destIndex=0;
552     int32_t srcIndex;
553     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
554     UStringPrepType type;
555     int16_t value;
556     UBool isIndex;
557     const int32_t* indexes = profile->indexes;
558 
559     // no error checking the caller check for error and arguments
560     // no string length check the caller finds out the string length
561 
562     for(srcIndex=0;srcIndex<srcLength;){
563         UChar32 ch;
564 
565         U16_NEXT(src,srcIndex,srcLength,ch);
566 
567         result=0;
568 
569         UTRIE_GET16(&profile->sprepTrie,ch,result);
570 
571         type = getValues(result, value, isIndex);
572 
573         // check if the source codepoint is unassigned
574         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
575 
576             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
577             *status = U_STRINGPREP_UNASSIGNED_ERROR;
578             return 0;
579 
580         }else if(type == USPREP_MAP){
581 
582             int32_t index, length;
583 
584             if(isIndex){
585                 index = value;
586                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
587                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
588                     length = 1;
589                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
590                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
591                     length = 2;
592                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
593                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
594                     length = 3;
595                 }else{
596                     length = profile->mappingData[index++];
597 
598                 }
599 
600                 /* copy mapping to destination */
601                 for(int32_t i=0; i< length; i++){
602                     if(destIndex < destCapacity  ){
603                         dest[destIndex] = profile->mappingData[index+i];
604                     }
605                     destIndex++; /* for pre-flighting */
606                 }
607                 continue;
608             }else{
609                 // subtract the delta to arrive at the code point
610                 ch -= value;
611             }
612 
613         }else if(type==USPREP_DELETE){
614              // just consume the codepoint and contine
615             continue;
616         }
617         //copy the code point into destination
618         if(ch <= 0xFFFF){
619             if(destIndex < destCapacity ){
620                 dest[destIndex] = (UChar)ch;
621             }
622             destIndex++;
623         }else{
624             if(destIndex+1 < destCapacity ){
625                 dest[destIndex]   = U16_LEAD(ch);
626                 dest[destIndex+1] = U16_TRAIL(ch);
627             }
628             destIndex +=2;
629         }
630 
631     }
632 
633     return u_terminateUChars(dest, destCapacity, destIndex, status);
634 }
635 
636 
637 static int32_t
usprep_normalize(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode * status)638 usprep_normalize(   const UChar* src, int32_t srcLength,
639                     UChar* dest, int32_t destCapacity,
640                     UErrorCode* status ){
641     /*
642      * Option UNORM_BEFORE_PRI_29:
643      *
644      * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
645      * requires strict adherence to Unicode 3.2 normalization,
646      * including buggy composition from before fixing Public Review Issue #29.
647      * Note that this results in some valid but nonsensical text to be
648      * either corrupted or rejected, depending on the text.
649      * See http://www.unicode.org/review/resolved-pri.html#pri29
650      * See unorm.cpp and cnormtst.c
651      */
652     return unorm_normalize(
653         src, srcLength,
654         UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
655         dest, destCapacity,
656         status);
657 }
658 
659 
660  /*
661    1) Map -- For each character in the input, check if it has a mapping
662       and, if so, replace it with its mapping.
663 
664    2) Normalize -- Possibly normalize the result of step 1 using Unicode
665       normalization.
666 
667    3) Prohibit -- Check for any characters that are not allowed in the
668       output.  If any are found, return an error.
669 
670    4) Check bidi -- Possibly check for right-to-left characters, and if
671       any are found, make sure that the whole string satisfies the
672       requirements for bidirectional strings.  If the string does not
673       satisfy the requirements for bidirectional strings, return an
674       error.
675       [Unicode3.2] defines several bidirectional categories; each character
676        has one bidirectional category assigned to it.  For the purposes of
677        the requirements below, an "RandALCat character" is a character that
678        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
679        is a character that has Unicode bidirectional category "L".  Note
680 
681 
682        that there are many characters which fall in neither of the above
683        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
684        this because they have bidirectional category "EN".
685 
686        In any profile that specifies bidirectional character handling, all
687        three of the following requirements MUST be met:
688 
689        1) The characters in section 5.8 MUST be prohibited.
690 
691        2) If a string contains any RandALCat character, the string MUST NOT
692           contain any LCat character.
693 
694        3) If a string contains any RandALCat character, a RandALCat
695           character MUST be the first character of the string, and a
696           RandALCat character MUST be the last character of the string.
697 */
698 
699 #define MAX_STACK_BUFFER_SIZE 300
700 
701 
702 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)703 usprep_prepare(   const UStringPrepProfile* profile,
704                   const UChar* src, int32_t srcLength,
705                   UChar* dest, int32_t destCapacity,
706                   int32_t options,
707                   UParseError* parseError,
708                   UErrorCode* status ){
709 
710     // check error status
711     if(status == NULL || U_FAILURE(*status)){
712         return 0;
713     }
714 
715     //check arguments
716     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
717         *status=U_ILLEGAL_ARGUMENT_ERROR;
718         return 0;
719     }
720 
721     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
722     UChar *b1 = b1Stack, *b2 = b2Stack;
723     int32_t b1Len, b2Len=0,
724             b1Capacity = MAX_STACK_BUFFER_SIZE ,
725             b2Capacity = MAX_STACK_BUFFER_SIZE;
726     uint16_t result;
727     int32_t b2Index = 0;
728     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
729     UBool leftToRight=FALSE, rightToLeft=FALSE;
730     int32_t rtlPos =-1, ltrPos =-1;
731 
732     //get the string length
733     if(srcLength == -1){
734         srcLength = u_strlen(src);
735     }
736     // map
737     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
738 
739     if(*status == U_BUFFER_OVERFLOW_ERROR){
740         // redo processing of string
741         /* we do not have enough room so grow the buffer*/
742         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
743         if(b1==NULL){
744             *status = U_MEMORY_ALLOCATION_ERROR;
745             goto CLEANUP;
746         }
747 
748         *status = U_ZERO_ERROR; // reset error
749 
750         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
751 
752     }
753 
754     // normalize
755     if(profile->doNFKC == TRUE){
756         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
757 
758         if(*status == U_BUFFER_OVERFLOW_ERROR){
759             // redo processing of string
760             /* we do not have enough room so grow the buffer*/
761             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
762             if(b2==NULL){
763                 *status = U_MEMORY_ALLOCATION_ERROR;
764                 goto CLEANUP;
765             }
766 
767             *status = U_ZERO_ERROR; // reset error
768 
769             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
770 
771         }
772 
773     }else{
774         b2 = b1;
775         b2Len = b1Len;
776     }
777 
778 
779     if(U_FAILURE(*status)){
780         goto CLEANUP;
781     }
782 
783     UChar32 ch;
784     UStringPrepType type;
785     int16_t value;
786     UBool isIndex;
787 
788     // Prohibit and checkBiDi in one pass
789     for(b2Index=0; b2Index<b2Len;){
790 
791         ch = 0;
792 
793         U16_NEXT(b2, b2Index, b2Len, ch);
794 
795         UTRIE_GET16(&profile->sprepTrie,ch,result);
796 
797         type = getValues(result, value, isIndex);
798 
799         if( type == USPREP_PROHIBITED ||
800             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
801            ){
802             *status = U_STRINGPREP_PROHIBITED_ERROR;
803             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
804             goto CLEANUP;
805         }
806 
807         if(profile->checkBiDi) {
808             direction = ubidi_getClass(profile->bdp, ch);
809             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
810                 firstCharDir = direction;
811             }
812             if(direction == U_LEFT_TO_RIGHT){
813                 leftToRight = TRUE;
814                 ltrPos = b2Index-1;
815             }
816             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
817                 rightToLeft = TRUE;
818                 rtlPos = b2Index-1;
819             }
820         }
821     }
822     if(profile->checkBiDi == TRUE){
823         // satisfy 2
824         if( leftToRight == TRUE && rightToLeft == TRUE){
825             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
826             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
827             goto CLEANUP;
828         }
829 
830         //satisfy 3
831         if( rightToLeft == TRUE &&
832             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
833               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
834            ){
835             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
836             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
837             return FALSE;
838         }
839     }
840     if(b2Len>0 && b2Len <= destCapacity){
841         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
842     }
843 
844 CLEANUP:
845     if(b1!=b1Stack){
846         uprv_free(b1);
847         b1=NULL;
848     }
849 
850     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
851         uprv_free(b2);
852         b2=NULL;
853     }
854     return u_terminateUChars(dest, destCapacity, b2Len, status);
855 }
856 
857 
858 /* data swapping ------------------------------------------------------------ */
859 
860 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)861 usprep_swap(const UDataSwapper *ds,
862             const void *inData, int32_t length, void *outData,
863             UErrorCode *pErrorCode) {
864     const UDataInfo *pInfo;
865     int32_t headerSize;
866 
867     const uint8_t *inBytes;
868     uint8_t *outBytes;
869 
870     const int32_t *inIndexes;
871     int32_t indexes[16];
872 
873     int32_t i, offset, count, size;
874 
875     /* udata_swapDataHeader checks the arguments */
876     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
877     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
878         return 0;
879     }
880 
881     /* check data format and format version */
882     pInfo=(const UDataInfo *)((const char *)inData+4);
883     if(!(
884         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
885         pInfo->dataFormat[1]==0x50 &&
886         pInfo->dataFormat[2]==0x52 &&
887         pInfo->dataFormat[3]==0x50 &&
888         pInfo->formatVersion[0]==3
889     )) {
890         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
891                          pInfo->dataFormat[0], pInfo->dataFormat[1],
892                          pInfo->dataFormat[2], pInfo->dataFormat[3],
893                          pInfo->formatVersion[0]);
894         *pErrorCode=U_UNSUPPORTED_ERROR;
895         return 0;
896     }
897 
898     inBytes=(const uint8_t *)inData+headerSize;
899     outBytes=(uint8_t *)outData+headerSize;
900 
901     inIndexes=(const int32_t *)inBytes;
902 
903     if(length>=0) {
904         length-=headerSize;
905         if(length<16*4) {
906             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
907                              length);
908             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
909             return 0;
910         }
911     }
912 
913     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
914     for(i=0; i<16; ++i) {
915         indexes[i]=udata_readInt32(ds, inIndexes[i]);
916     }
917 
918     /* calculate the total length of the data */
919     size=
920         16*4+ /* size of indexes[] */
921         indexes[_SPREP_INDEX_TRIE_SIZE]+
922         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
923 
924     if(length>=0) {
925         if(length<size) {
926             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
927                              length);
928             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
929             return 0;
930         }
931 
932         /* copy the data for inaccessible bytes */
933         if(inBytes!=outBytes) {
934             uprv_memcpy(outBytes, inBytes, size);
935         }
936 
937         offset=0;
938 
939         /* swap the int32_t indexes[] */
940         count=16*4;
941         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
942         offset+=count;
943 
944         /* swap the UTrie */
945         count=indexes[_SPREP_INDEX_TRIE_SIZE];
946         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
947         offset+=count;
948 
949         /* swap the uint16_t mappingTable[] */
950         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
951         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
952         offset+=count;
953     }
954 
955     return headerSize+size;
956 }
957 
958 #endif /* #if !UCONFIG_NO_IDNA */
959