• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2007, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  usprep.cpp
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003jul2
14  *   created by: Ram Viswanadha
15  */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_IDNA
20 
21 #include "unicode/usprep.h"
22 
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "unormimp.h"
36 #include "ubidi_props.h"
37 
38 U_CDECL_BEGIN
39 
40 /*
41 Static cache for already opened StringPrep profiles
42 */
43 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
44 
45 static UMTX usprepMutex = NULL;
46 
47 /* format version of spp file */
48 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
49 
50 /* the Unicode version of the sprep data */
51 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
52 
53 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)54 isSPrepAcceptable(void * /* context */,
55              const char * /* type */,
56              const char * /* name */,
57              const UDataInfo *pInfo) {
58     if(
59         pInfo->size>=20 &&
60         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
61         pInfo->charsetFamily==U_CHARSET_FAMILY &&
62         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
63         pInfo->dataFormat[1]==0x50 &&
64         pInfo->dataFormat[2]==0x52 &&
65         pInfo->dataFormat[3]==0x50 &&
66         pInfo->formatVersion[0]==3 &&
67         pInfo->formatVersion[2]==UTRIE_SHIFT &&
68         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
69     ) {
70         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
71         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
72         return TRUE;
73     } else {
74         return FALSE;
75     }
76 }
77 
78 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)79 getSPrepFoldingOffset(uint32_t data) {
80 
81     return (int32_t)data;
82 
83 }
84 
85 /* hashes an entry  */
86 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)87 hashEntry(const UHashTok parm) {
88     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
89     UHashTok namekey, pathkey;
90     namekey.pointer = b->name;
91     pathkey.pointer = b->path;
92     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
93 }
94 
95 /* compares two entries */
96 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)97 compareEntries(const UHashTok p1, const UHashTok p2) {
98     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
99     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
100     UHashTok name1, name2, path1, path2;
101     name1.pointer = b1->name;
102     name2.pointer = b2->name;
103     path1.pointer = b1->path;
104     path2.pointer = b2->path;
105     return ((UBool)(uhash_compareChars(name1, name2) &
106         uhash_compareChars(path1, path2)));
107 }
108 
109 static void
usprep_unload(UStringPrepProfile * data)110 usprep_unload(UStringPrepProfile* data){
111     udata_close(data->sprepData);
112 }
113 
114 static int32_t
usprep_internal_flushCache(UBool noRefCount)115 usprep_internal_flushCache(UBool noRefCount){
116     UStringPrepProfile *profile = NULL;
117     UStringPrepKey  *key  = NULL;
118     int32_t pos = -1;
119     int32_t deletedNum = 0;
120     const UHashElement *e;
121 
122     /*
123      * if shared data hasn't even been lazy evaluated yet
124      * return 0
125      */
126     umtx_lock(&usprepMutex);
127     if (SHARED_DATA_HASHTABLE == NULL) {
128         umtx_unlock(&usprepMutex);
129         return 0;
130     }
131 
132     /*creates an enumeration to iterate through every element in the table */
133     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
134     {
135         profile = (UStringPrepProfile *) e->value.pointer;
136         key  = (UStringPrepKey *) e->key.pointer;
137 
138         if ((noRefCount== FALSE && profile->refCount == 0) ||
139              noRefCount== TRUE) {
140             deletedNum++;
141             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
142 
143             /* unload the data */
144             usprep_unload(profile);
145 
146             if(key->name != NULL) {
147                 uprv_free(key->name);
148                 key->name=NULL;
149             }
150             if(key->path != NULL) {
151                 uprv_free(key->path);
152                 key->path=NULL;
153             }
154             uprv_free(profile);
155             uprv_free(key);
156         }
157 
158     }
159     umtx_unlock(&usprepMutex);
160 
161     return deletedNum;
162 }
163 
164 /* Works just like ucnv_flushCache()
165 static int32_t
166 usprep_flushCache(){
167     return usprep_internal_flushCache(FALSE);
168 }
169 */
170 
usprep_cleanup(void)171 static UBool U_CALLCONV usprep_cleanup(void){
172     if (SHARED_DATA_HASHTABLE != NULL) {
173         usprep_internal_flushCache(TRUE);
174         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
175             uhash_close(SHARED_DATA_HASHTABLE);
176             SHARED_DATA_HASHTABLE = NULL;
177         }
178     }
179 
180     umtx_destroy(&usprepMutex);             /* Don't worry about destroying the mutex even  */
181                                             /*  if the hash table still exists.  The mutex  */
182                                             /*  will lazily re-init  itself if needed.      */
183     return (SHARED_DATA_HASHTABLE == NULL);
184 }
185 U_CDECL_END
186 
187 static void
usprep_init()188 usprep_init() {
189     umtx_init(&usprepMutex);
190 }
191 
192 /** Initializes the cache for resources */
193 static void
initCache(UErrorCode * status)194 initCache(UErrorCode *status) {
195     UBool makeCache;
196     UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE ==  NULL), makeCache);
197     if(makeCache) {
198         UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
199         if (U_SUCCESS(*status)) {
200             umtx_lock(&usprepMutex);
201             if(SHARED_DATA_HASHTABLE == NULL) {
202                 SHARED_DATA_HASHTABLE = newCache;
203                 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
204                 newCache = NULL;
205             }
206             umtx_unlock(&usprepMutex);
207             if(newCache != NULL) {
208                 uhash_close(newCache);
209             }
210         }
211     }
212 }
213 
214 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)215 loadData(UStringPrepProfile* profile,
216          const char* path,
217          const char* name,
218          const char* type,
219          UErrorCode* errorCode) {
220     /* load Unicode SPREP data from file */
221     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
222     UDataMemory *dataMemory;
223     const int32_t *p=NULL;
224     const uint8_t *pb;
225     UVersionInfo normUnicodeVersion;
226     int32_t normUniVer, sprepUniVer, normCorrVer;
227 
228     if(errorCode==NULL || U_FAILURE(*errorCode)) {
229         return 0;
230     }
231 
232     /* open the data outside the mutex block */
233     //TODO: change the path
234     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
235     if(U_FAILURE(*errorCode)) {
236         return FALSE;
237     }
238 
239     p=(const int32_t *)udata_getMemory(dataMemory);
240     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
241     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
242     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
243 
244 
245     if(U_FAILURE(*errorCode)) {
246         udata_close(dataMemory);
247         return FALSE;
248     }
249 
250     /* in the mutex block, set the data for this process */
251     umtx_lock(&usprepMutex);
252     if(profile->sprepData==NULL) {
253         profile->sprepData=dataMemory;
254         dataMemory=NULL;
255         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
256         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
257     } else {
258         p=(const int32_t *)udata_getMemory(profile->sprepData);
259     }
260     umtx_unlock(&usprepMutex);
261     /* initialize some variables */
262     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
263 
264     unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
265     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
266                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
267     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
268                   (dataVersion[2] << 8 ) + (dataVersion[3]);
269     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
270 
271     if(U_FAILURE(*errorCode)){
272         udata_close(dataMemory);
273         return FALSE;
274     }
275     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
276         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
277         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
278       ){
279         *errorCode = U_INVALID_FORMAT_ERROR;
280         udata_close(dataMemory);
281         return FALSE;
282     }
283     profile->isDataLoaded = TRUE;
284 
285     /* if a different thread set it first, then close the extra data */
286     if(dataMemory!=NULL) {
287         udata_close(dataMemory); /* NULL if it was set correctly */
288     }
289 
290 
291     return profile->isDataLoaded;
292 }
293 
294 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)295 usprep_getProfile(const char* path,
296                   const char* name,
297                   UErrorCode *status){
298 
299     UStringPrepProfile* profile = NULL;
300 
301     initCache(status);
302 
303     if(U_FAILURE(*status)){
304         return NULL;
305     }
306 
307     UStringPrepKey stackKey;
308     /*
309      * const is cast way to save malloc, strcpy and free calls
310      * we use the passed in pointers for fetching the data from the
311      * hash table which is safe
312      */
313     stackKey.name = (char*) name;
314     stackKey.path = (char*) path;
315 
316     /* fetch the data from the cache */
317     umtx_lock(&usprepMutex);
318     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
319     umtx_unlock(&usprepMutex);
320 
321     if(profile == NULL){
322         UStringPrepKey* key   = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
323         if(key == NULL){
324             *status = U_MEMORY_ALLOCATION_ERROR;
325             return NULL;
326         }
327         /* else load the data and put the data in the cache */
328         profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
329         if(profile == NULL){
330             *status = U_MEMORY_ALLOCATION_ERROR;
331             uprv_free(key);
332             return NULL;
333         }
334 
335         /* initialize the data struct members */
336         uprv_memset(profile->indexes,0,sizeof(profile->indexes));
337         profile->mappingData = NULL;
338         profile->sprepData   = NULL;
339         profile->refCount    = 0;
340 
341         /* initialize the  key memebers */
342         key->name  = (char*) uprv_malloc(uprv_strlen(name)+1);
343         if(key->name == NULL){
344             *status = U_MEMORY_ALLOCATION_ERROR;
345             uprv_free(key);
346             uprv_free(profile);
347             return NULL;
348         }
349 
350         uprv_strcpy(key->name, name);
351 
352         key->path=NULL;
353 
354         if(path != NULL){
355             key->path      = (char*) uprv_malloc(uprv_strlen(path)+1);
356             if(key->path == NULL){
357                 *status = U_MEMORY_ALLOCATION_ERROR;
358                 uprv_free(key->name);
359                 uprv_free(key);
360                 uprv_free(profile);
361                 return NULL;
362             }
363             uprv_strcpy(key->path, path);
364         }
365 
366         /* load the data */
367         if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
368             uprv_free(key->path);
369             uprv_free(key->name);
370             uprv_free(key);
371             uprv_free(profile);
372             return NULL;
373         }
374 
375         /* get the options */
376         profile->doNFKC            = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
377         profile->checkBiDi         = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
378 
379         if(profile->checkBiDi) {
380             profile->bdp = ubidi_getSingleton(status);
381             if(U_FAILURE(*status)) {
382                 usprep_unload(profile);
383                 uprv_free(key->path);
384                 uprv_free(key->name);
385                 uprv_free(key);
386                 uprv_free(profile);
387                 return NULL;
388             }
389         } else {
390             profile->bdp = NULL;
391         }
392 
393         umtx_lock(&usprepMutex);
394         /* add the data object to the cache */
395         uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
396         umtx_unlock(&usprepMutex);
397     }
398     umtx_lock(&usprepMutex);
399     /* increment the refcount */
400     profile->refCount++;
401     umtx_unlock(&usprepMutex);
402 
403     return profile;
404 }
405 
406 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)407 usprep_open(const char* path,
408             const char* name,
409             UErrorCode* status){
410 
411     if(status == NULL || U_FAILURE(*status)){
412         return NULL;
413     }
414     /* initialize the mutex */
415     usprep_init();
416 
417     /* initialize the profile struct members */
418     return usprep_getProfile(path,name,status);
419 }
420 
421 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)422 usprep_close(UStringPrepProfile* profile){
423     if(profile==NULL){
424         return;
425     }
426 
427     umtx_lock(&usprepMutex);
428     /* decrement the ref count*/
429     if(profile->refCount > 0){
430         profile->refCount--;
431     }
432     umtx_unlock(&usprepMutex);
433 
434 }
435 
436 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)437 uprv_syntaxError(const UChar* rules,
438                  int32_t pos,
439                  int32_t rulesLen,
440                  UParseError* parseError){
441     if(parseError == NULL){
442         return;
443     }
444     parseError->offset = pos;
445     parseError->line = 0 ; // we are not using line numbers
446 
447     // for pre-context
448     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
449     int32_t limit = pos;
450 
451     u_memcpy(parseError->preContext,rules+start,limit-start);
452     //null terminate the buffer
453     parseError->preContext[limit-start] = 0;
454 
455     // for post-context; include error rules[pos]
456     start = pos;
457     limit = start + (U_PARSE_CONTEXT_LEN-1);
458     if (limit > rulesLen) {
459         limit = rulesLen;
460     }
461     if (start < rulesLen) {
462         u_memcpy(parseError->postContext,rules+start,limit-start);
463     }
464     //null terminate the buffer
465     parseError->postContext[limit-start]= 0;
466 }
467 
468 
469 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)470 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
471 
472     UStringPrepType type;
473     if(trieWord == 0){
474         /*
475          * Initial value stored in the mapping table
476          * just return USPREP_TYPE_LIMIT .. so that
477          * the source codepoint is copied to the destination
478          */
479         type = USPREP_TYPE_LIMIT;
480         isIndex =FALSE;
481         value = 0;
482     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
483         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
484         isIndex =FALSE;
485         value = 0;
486     }else{
487         /* get the type */
488         type = USPREP_MAP;
489         /* ascertain if the value is index or delta */
490         if(trieWord & 0x02){
491             isIndex = TRUE;
492             value = trieWord  >> 2; //mask off the lower 2 bits and shift
493         }else{
494             isIndex = FALSE;
495             value = (int16_t)trieWord;
496             value =  (value >> 2);
497         }
498 
499         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
500             type = USPREP_DELETE;
501             isIndex =FALSE;
502             value = 0;
503         }
504     }
505     return type;
506 }
507 
508 
509 
510 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)511 usprep_map(  const UStringPrepProfile* profile,
512              const UChar* src, int32_t srcLength,
513              UChar* dest, int32_t destCapacity,
514              int32_t options,
515              UParseError* parseError,
516              UErrorCode* status ){
517 
518     uint16_t result;
519     int32_t destIndex=0;
520     int32_t srcIndex;
521     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
522     UStringPrepType type;
523     int16_t value;
524     UBool isIndex;
525     const int32_t* indexes = profile->indexes;
526 
527     // no error checking the caller check for error and arguments
528     // no string length check the caller finds out the string length
529 
530     for(srcIndex=0;srcIndex<srcLength;){
531         UChar32 ch;
532 
533         U16_NEXT(src,srcIndex,srcLength,ch);
534 
535         result=0;
536 
537         UTRIE_GET16(&profile->sprepTrie,ch,result);
538 
539         type = getValues(result, value, isIndex);
540 
541         // check if the source codepoint is unassigned
542         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
543 
544             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
545             *status = U_STRINGPREP_UNASSIGNED_ERROR;
546             return 0;
547 
548         }else if(type == USPREP_MAP){
549 
550             int32_t index, length;
551 
552             if(isIndex){
553                 index = value;
554                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
555                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
556                     length = 1;
557                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
558                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
559                     length = 2;
560                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
561                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
562                     length = 3;
563                 }else{
564                     length = profile->mappingData[index++];
565 
566                 }
567 
568                 /* copy mapping to destination */
569                 for(int32_t i=0; i< length; i++){
570                     if(destIndex < destCapacity  ){
571                         dest[destIndex] = profile->mappingData[index+i];
572                     }
573                     destIndex++; /* for pre-flighting */
574                 }
575                 continue;
576             }else{
577                 // subtract the delta to arrive at the code point
578                 ch -= value;
579             }
580 
581         }else if(type==USPREP_DELETE){
582              // just consume the codepoint and contine
583             continue;
584         }
585         //copy the code point into destination
586         if(ch <= 0xFFFF){
587             if(destIndex < destCapacity ){
588                 dest[destIndex] = (UChar)ch;
589             }
590             destIndex++;
591         }else{
592             if(destIndex+1 < destCapacity ){
593                 dest[destIndex]   = U16_LEAD(ch);
594                 dest[destIndex+1] = U16_TRAIL(ch);
595             }
596             destIndex +=2;
597         }
598 
599     }
600 
601     return u_terminateUChars(dest, destCapacity, destIndex, status);
602 }
603 
604 
605 static int32_t
usprep_normalize(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode * status)606 usprep_normalize(   const UChar* src, int32_t srcLength,
607                     UChar* dest, int32_t destCapacity,
608                     UErrorCode* status ){
609     /*
610      * Option UNORM_BEFORE_PRI_29:
611      *
612      * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
613      * requires strict adherence to Unicode 3.2 normalization,
614      * including buggy composition from before fixing Public Review Issue #29.
615      * Note that this results in some valid but nonsensical text to be
616      * either corrupted or rejected, depending on the text.
617      * See http://www.unicode.org/review/resolved-pri.html#pri29
618      * See unorm.cpp and cnormtst.c
619      */
620     return unorm_normalize(
621         src, srcLength,
622         UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
623         dest, destCapacity,
624         status);
625 }
626 
627 
628  /*
629    1) Map -- For each character in the input, check if it has a mapping
630       and, if so, replace it with its mapping.
631 
632    2) Normalize -- Possibly normalize the result of step 1 using Unicode
633       normalization.
634 
635    3) Prohibit -- Check for any characters that are not allowed in the
636       output.  If any are found, return an error.
637 
638    4) Check bidi -- Possibly check for right-to-left characters, and if
639       any are found, make sure that the whole string satisfies the
640       requirements for bidirectional strings.  If the string does not
641       satisfy the requirements for bidirectional strings, return an
642       error.
643       [Unicode3.2] defines several bidirectional categories; each character
644        has one bidirectional category assigned to it.  For the purposes of
645        the requirements below, an "RandALCat character" is a character that
646        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
647        is a character that has Unicode bidirectional category "L".  Note
648 
649 
650        that there are many characters which fall in neither of the above
651        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
652        this because they have bidirectional category "EN".
653 
654        In any profile that specifies bidirectional character handling, all
655        three of the following requirements MUST be met:
656 
657        1) The characters in section 5.8 MUST be prohibited.
658 
659        2) If a string contains any RandALCat character, the string MUST NOT
660           contain any LCat character.
661 
662        3) If a string contains any RandALCat character, a RandALCat
663           character MUST be the first character of the string, and a
664           RandALCat character MUST be the last character of the string.
665 */
666 
667 #define MAX_STACK_BUFFER_SIZE 300
668 
669 
670 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)671 usprep_prepare(   const UStringPrepProfile* profile,
672                   const UChar* src, int32_t srcLength,
673                   UChar* dest, int32_t destCapacity,
674                   int32_t options,
675                   UParseError* parseError,
676                   UErrorCode* status ){
677 
678     // check error status
679     if(status == NULL || U_FAILURE(*status)){
680         return 0;
681     }
682 
683     //check arguments
684     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
685         *status=U_ILLEGAL_ARGUMENT_ERROR;
686         return 0;
687     }
688 
689     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
690     UChar *b1 = b1Stack, *b2 = b2Stack;
691     int32_t b1Len, b2Len=0,
692             b1Capacity = MAX_STACK_BUFFER_SIZE ,
693             b2Capacity = MAX_STACK_BUFFER_SIZE;
694     uint16_t result;
695     int32_t b2Index = 0;
696     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
697     UBool leftToRight=FALSE, rightToLeft=FALSE;
698     int32_t rtlPos =-1, ltrPos =-1;
699 
700     //get the string length
701     if(srcLength == -1){
702         srcLength = u_strlen(src);
703     }
704     // map
705     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
706 
707     if(*status == U_BUFFER_OVERFLOW_ERROR){
708         // redo processing of string
709         /* we do not have enough room so grow the buffer*/
710         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
711         if(b1==NULL){
712             *status = U_MEMORY_ALLOCATION_ERROR;
713             goto CLEANUP;
714         }
715 
716         *status = U_ZERO_ERROR; // reset error
717 
718         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
719 
720     }
721 
722     // normalize
723     if(profile->doNFKC == TRUE){
724         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
725 
726         if(*status == U_BUFFER_OVERFLOW_ERROR){
727             // redo processing of string
728             /* we do not have enough room so grow the buffer*/
729             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
730             if(b2==NULL){
731                 *status = U_MEMORY_ALLOCATION_ERROR;
732                 goto CLEANUP;
733             }
734 
735             *status = U_ZERO_ERROR; // reset error
736 
737             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
738 
739         }
740 
741     }else{
742         b2 = b1;
743         b2Len = b1Len;
744     }
745 
746 
747     if(U_FAILURE(*status)){
748         goto CLEANUP;
749     }
750 
751     UChar32 ch;
752     UStringPrepType type;
753     int16_t value;
754     UBool isIndex;
755 
756     // Prohibit and checkBiDi in one pass
757     for(b2Index=0; b2Index<b2Len;){
758 
759         ch = 0;
760 
761         U16_NEXT(b2, b2Index, b2Len, ch);
762 
763         UTRIE_GET16(&profile->sprepTrie,ch,result);
764 
765         type = getValues(result, value, isIndex);
766 
767         if( type == USPREP_PROHIBITED ||
768             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
769            ){
770             *status = U_STRINGPREP_PROHIBITED_ERROR;
771             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
772             goto CLEANUP;
773         }
774 
775         if(profile->checkBiDi) {
776             direction = ubidi_getClass(profile->bdp, ch);
777             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
778                 firstCharDir = direction;
779             }
780             if(direction == U_LEFT_TO_RIGHT){
781                 leftToRight = TRUE;
782                 ltrPos = b2Index-1;
783             }
784             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
785                 rightToLeft = TRUE;
786                 rtlPos = b2Index-1;
787             }
788         }
789     }
790     if(profile->checkBiDi == TRUE){
791         // satisfy 2
792         if( leftToRight == TRUE && rightToLeft == TRUE){
793             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
794             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
795             goto CLEANUP;
796         }
797 
798         //satisfy 3
799         if( rightToLeft == TRUE &&
800             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
801               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
802            ){
803             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
804             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
805             return FALSE;
806         }
807     }
808     if(b2Len>0 && b2Len <= destCapacity){
809         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
810     }
811 
812 CLEANUP:
813     if(b1!=b1Stack){
814         uprv_free(b1);
815         b1=NULL;
816     }
817 
818     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
819         uprv_free(b2);
820         b2=NULL;
821     }
822     return u_terminateUChars(dest, destCapacity, b2Len, status);
823 }
824 
825 
826 /* data swapping ------------------------------------------------------------ */
827 
828 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)829 usprep_swap(const UDataSwapper *ds,
830             const void *inData, int32_t length, void *outData,
831             UErrorCode *pErrorCode) {
832     const UDataInfo *pInfo;
833     int32_t headerSize;
834 
835     const uint8_t *inBytes;
836     uint8_t *outBytes;
837 
838     const int32_t *inIndexes;
839     int32_t indexes[16];
840 
841     int32_t i, offset, count, size;
842 
843     /* udata_swapDataHeader checks the arguments */
844     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
845     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
846         return 0;
847     }
848 
849     /* check data format and format version */
850     pInfo=(const UDataInfo *)((const char *)inData+4);
851     if(!(
852         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
853         pInfo->dataFormat[1]==0x50 &&
854         pInfo->dataFormat[2]==0x52 &&
855         pInfo->dataFormat[3]==0x50 &&
856         pInfo->formatVersion[0]==3
857     )) {
858         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
859                          pInfo->dataFormat[0], pInfo->dataFormat[1],
860                          pInfo->dataFormat[2], pInfo->dataFormat[3],
861                          pInfo->formatVersion[0]);
862         *pErrorCode=U_UNSUPPORTED_ERROR;
863         return 0;
864     }
865 
866     inBytes=(const uint8_t *)inData+headerSize;
867     outBytes=(uint8_t *)outData+headerSize;
868 
869     inIndexes=(const int32_t *)inBytes;
870 
871     if(length>=0) {
872         length-=headerSize;
873         if(length<16*4) {
874             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
875                              length);
876             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
877             return 0;
878         }
879     }
880 
881     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
882     for(i=0; i<16; ++i) {
883         indexes[i]=udata_readInt32(ds, inIndexes[i]);
884     }
885 
886     /* calculate the total length of the data */
887     size=
888         16*4+ /* size of indexes[] */
889         indexes[_SPREP_INDEX_TRIE_SIZE]+
890         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
891 
892     if(length>=0) {
893         if(length<size) {
894             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
895                              length);
896             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
897             return 0;
898         }
899 
900         /* copy the data for inaccessible bytes */
901         if(inBytes!=outBytes) {
902             uprv_memcpy(outBytes, inBytes, size);
903         }
904 
905         offset=0;
906 
907         /* swap the int32_t indexes[] */
908         count=16*4;
909         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
910         offset+=count;
911 
912         /* swap the UTrie */
913         count=indexes[_SPREP_INDEX_TRIE_SIZE];
914         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
915         offset+=count;
916 
917         /* swap the uint16_t mappingTable[] */
918         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
919         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
920         offset+=count;
921     }
922 
923     return headerSize+size;
924 }
925 
926 #endif /* #if !UCONFIG_NO_IDNA */
927