• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2010, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  usprep.cpp
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003jul2
14  *   created by: Ram Viswanadha
15  */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_IDNA
20 
21 #include "unicode/usprep.h"
22 
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "ubidi_props.h"
36 
37 U_NAMESPACE_USE
38 
39 U_CDECL_BEGIN
40 
41 /*
42 Static cache for already opened StringPrep profiles
43 */
44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
45 
46 static UMTX usprepMutex = NULL;
47 
48 /* format version of spp file */
49 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
50 
51 /* the Unicode version of the sprep data */
52 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
53 
54 /* Profile names must be aligned to UStringPrepProfileType */
55 static const char *PROFILE_NAMES[] = {
56     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
57     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
58     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
59     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
60     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
61     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
62     "rfc3722",      /* USPREP_RFC3722_ISCSI */
63     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
64     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
65     "rfc4011",      /* USPREP_RFC4011_MIB */
66     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
67     "rfc4505",      /* USPREP_RFC4505_TRACE */
68     "rfc4518",      /* USPREP_RFC4518_LDAP */
69     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
70 };
71 
72 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)73 isSPrepAcceptable(void * /* context */,
74              const char * /* type */,
75              const char * /* name */,
76              const UDataInfo *pInfo) {
77     if(
78         pInfo->size>=20 &&
79         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
80         pInfo->charsetFamily==U_CHARSET_FAMILY &&
81         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
82         pInfo->dataFormat[1]==0x50 &&
83         pInfo->dataFormat[2]==0x52 &&
84         pInfo->dataFormat[3]==0x50 &&
85         pInfo->formatVersion[0]==3 &&
86         pInfo->formatVersion[2]==UTRIE_SHIFT &&
87         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
88     ) {
89         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
90         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
91         return TRUE;
92     } else {
93         return FALSE;
94     }
95 }
96 
97 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)98 getSPrepFoldingOffset(uint32_t data) {
99 
100     return (int32_t)data;
101 
102 }
103 
104 /* hashes an entry  */
105 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)106 hashEntry(const UHashTok parm) {
107     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
108     UHashTok namekey, pathkey;
109     namekey.pointer = b->name;
110     pathkey.pointer = b->path;
111     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
112 }
113 
114 /* compares two entries */
115 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)116 compareEntries(const UHashTok p1, const UHashTok p2) {
117     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
118     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
119     UHashTok name1, name2, path1, path2;
120     name1.pointer = b1->name;
121     name2.pointer = b2->name;
122     path1.pointer = b1->path;
123     path2.pointer = b2->path;
124     return ((UBool)(uhash_compareChars(name1, name2) &
125         uhash_compareChars(path1, path2)));
126 }
127 
128 static void
usprep_unload(UStringPrepProfile * data)129 usprep_unload(UStringPrepProfile* data){
130     udata_close(data->sprepData);
131 }
132 
133 static int32_t
usprep_internal_flushCache(UBool noRefCount)134 usprep_internal_flushCache(UBool noRefCount){
135     UStringPrepProfile *profile = NULL;
136     UStringPrepKey  *key  = NULL;
137     int32_t pos = -1;
138     int32_t deletedNum = 0;
139     const UHashElement *e;
140 
141     /*
142      * if shared data hasn't even been lazy evaluated yet
143      * return 0
144      */
145     umtx_lock(&usprepMutex);
146     if (SHARED_DATA_HASHTABLE == NULL) {
147         umtx_unlock(&usprepMutex);
148         return 0;
149     }
150 
151     /*creates an enumeration to iterate through every element in the table */
152     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
153     {
154         profile = (UStringPrepProfile *) e->value.pointer;
155         key  = (UStringPrepKey *) e->key.pointer;
156 
157         if ((noRefCount== FALSE && profile->refCount == 0) ||
158              noRefCount== TRUE) {
159             deletedNum++;
160             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
161 
162             /* unload the data */
163             usprep_unload(profile);
164 
165             if(key->name != NULL) {
166                 uprv_free(key->name);
167                 key->name=NULL;
168             }
169             if(key->path != NULL) {
170                 uprv_free(key->path);
171                 key->path=NULL;
172             }
173             uprv_free(profile);
174             uprv_free(key);
175         }
176 
177     }
178     umtx_unlock(&usprepMutex);
179 
180     return deletedNum;
181 }
182 
183 /* Works just like ucnv_flushCache()
184 static int32_t
185 usprep_flushCache(){
186     return usprep_internal_flushCache(FALSE);
187 }
188 */
189 
usprep_cleanup(void)190 static UBool U_CALLCONV usprep_cleanup(void){
191     if (SHARED_DATA_HASHTABLE != NULL) {
192         usprep_internal_flushCache(TRUE);
193         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
194             uhash_close(SHARED_DATA_HASHTABLE);
195             SHARED_DATA_HASHTABLE = NULL;
196         }
197     }
198 
199     umtx_destroy(&usprepMutex);             /* Don't worry about destroying the mutex even  */
200                                             /*  if the hash table still exists.  The mutex  */
201                                             /*  will lazily re-init  itself if needed.      */
202     return (SHARED_DATA_HASHTABLE == NULL);
203 }
204 U_CDECL_END
205 
206 
207 /** Initializes the cache for resources */
208 static void
initCache(UErrorCode * status)209 initCache(UErrorCode *status) {
210     UBool makeCache;
211     UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE ==  NULL), makeCache);
212     if(makeCache) {
213         UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
214         if (U_SUCCESS(*status)) {
215             umtx_lock(&usprepMutex);
216             if(SHARED_DATA_HASHTABLE == NULL) {
217                 SHARED_DATA_HASHTABLE = newCache;
218                 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
219                 newCache = NULL;
220             }
221             umtx_unlock(&usprepMutex);
222         }
223         if(newCache != NULL) {
224             uhash_close(newCache);
225         }
226     }
227 }
228 
229 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)230 loadData(UStringPrepProfile* profile,
231          const char* path,
232          const char* name,
233          const char* type,
234          UErrorCode* errorCode) {
235     /* load Unicode SPREP data from file */
236     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
237     UDataMemory *dataMemory;
238     const int32_t *p=NULL;
239     const uint8_t *pb;
240     UVersionInfo normUnicodeVersion;
241     int32_t normUniVer, sprepUniVer, normCorrVer;
242 
243     if(errorCode==NULL || U_FAILURE(*errorCode)) {
244         return 0;
245     }
246 
247     /* open the data outside the mutex block */
248     //TODO: change the path
249     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
250     if(U_FAILURE(*errorCode)) {
251         return FALSE;
252     }
253 
254     p=(const int32_t *)udata_getMemory(dataMemory);
255     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
256     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
257     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
258 
259 
260     if(U_FAILURE(*errorCode)) {
261         udata_close(dataMemory);
262         return FALSE;
263     }
264 
265     /* in the mutex block, set the data for this process */
266     umtx_lock(&usprepMutex);
267     if(profile->sprepData==NULL) {
268         profile->sprepData=dataMemory;
269         dataMemory=NULL;
270         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
271         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
272     } else {
273         p=(const int32_t *)udata_getMemory(profile->sprepData);
274     }
275     umtx_unlock(&usprepMutex);
276     /* initialize some variables */
277     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
278 
279     u_getUnicodeVersion(normUnicodeVersion);
280     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
281                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
282     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
283                   (dataVersion[2] << 8 ) + (dataVersion[3]);
284     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
285 
286     if(U_FAILURE(*errorCode)){
287         udata_close(dataMemory);
288         return FALSE;
289     }
290     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
291         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
292         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
293       ){
294         *errorCode = U_INVALID_FORMAT_ERROR;
295         udata_close(dataMemory);
296         return FALSE;
297     }
298     profile->isDataLoaded = TRUE;
299 
300     /* if a different thread set it first, then close the extra data */
301     if(dataMemory!=NULL) {
302         udata_close(dataMemory); /* NULL if it was set correctly */
303     }
304 
305 
306     return profile->isDataLoaded;
307 }
308 
309 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)310 usprep_getProfile(const char* path,
311                   const char* name,
312                   UErrorCode *status){
313 
314     UStringPrepProfile* profile = NULL;
315 
316     initCache(status);
317 
318     if(U_FAILURE(*status)){
319         return NULL;
320     }
321 
322     UStringPrepKey stackKey;
323     /*
324      * const is cast way to save malloc, strcpy and free calls
325      * we use the passed in pointers for fetching the data from the
326      * hash table which is safe
327      */
328     stackKey.name = (char*) name;
329     stackKey.path = (char*) path;
330 
331     /* fetch the data from the cache */
332     umtx_lock(&usprepMutex);
333     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
334     if(profile != NULL) {
335         profile->refCount++;
336     }
337     umtx_unlock(&usprepMutex);
338 
339     if(profile == NULL) {
340         /* else load the data and put the data in the cache */
341         LocalMemory<UStringPrepProfile> newProfile;
342         if(newProfile.allocateInsteadAndReset() == NULL) {
343             *status = U_MEMORY_ALLOCATION_ERROR;
344             return NULL;
345         }
346 
347         /* load the data */
348         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
349             return NULL;
350         }
351 
352         /* get the options */
353         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
354         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
355 
356         if(newProfile->checkBiDi) {
357             newProfile->bdp = ubidi_getSingleton();
358         }
359 
360         LocalMemory<UStringPrepKey> key;
361         LocalMemory<char> keyName;
362         LocalMemory<char> keyPath;
363         if( key.allocateInsteadAndReset() == NULL ||
364             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
365             (path != NULL &&
366              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
367          ) {
368             *status = U_MEMORY_ALLOCATION_ERROR;
369             usprep_unload(newProfile.getAlias());
370             return NULL;
371         }
372 
373         umtx_lock(&usprepMutex);
374         // If another thread already inserted the same key/value, refcount and cleanup our thread data
375         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
376         if(profile != NULL) {
377             profile->refCount++;
378             usprep_unload(newProfile.getAlias());
379         }
380         else {
381             /* initialize the key members */
382             key->name = keyName.orphan();
383             uprv_strcpy(key->name, name);
384             if(path != NULL){
385                 key->path = keyPath.orphan();
386                 uprv_strcpy(key->path, path);
387             }
388             profile = newProfile.orphan();
389 
390             /* add the data object to the cache */
391             profile->refCount = 1;
392             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
393         }
394         umtx_unlock(&usprepMutex);
395     }
396 
397     return profile;
398 }
399 
400 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)401 usprep_open(const char* path,
402             const char* name,
403             UErrorCode* status){
404 
405     if(status == NULL || U_FAILURE(*status)){
406         return NULL;
407     }
408 
409     /* initialize the profile struct members */
410     return usprep_getProfile(path,name,status);
411 }
412 
413 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)414 usprep_openByType(UStringPrepProfileType type,
415 				  UErrorCode* status) {
416     if(status == NULL || U_FAILURE(*status)){
417         return NULL;
418     }
419     int32_t index = (int32_t)type;
420     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
421         *status = U_ILLEGAL_ARGUMENT_ERROR;
422         return NULL;
423     }
424     return usprep_open(NULL, PROFILE_NAMES[index], status);
425 }
426 
427 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)428 usprep_close(UStringPrepProfile* profile){
429     if(profile==NULL){
430         return;
431     }
432 
433     umtx_lock(&usprepMutex);
434     /* decrement the ref count*/
435     if(profile->refCount > 0){
436         profile->refCount--;
437     }
438     umtx_unlock(&usprepMutex);
439 
440 }
441 
442 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)443 uprv_syntaxError(const UChar* rules,
444                  int32_t pos,
445                  int32_t rulesLen,
446                  UParseError* parseError){
447     if(parseError == NULL){
448         return;
449     }
450     parseError->offset = pos;
451     parseError->line = 0 ; // we are not using line numbers
452 
453     // for pre-context
454     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
455     int32_t limit = pos;
456 
457     u_memcpy(parseError->preContext,rules+start,limit-start);
458     //null terminate the buffer
459     parseError->preContext[limit-start] = 0;
460 
461     // for post-context; include error rules[pos]
462     start = pos;
463     limit = start + (U_PARSE_CONTEXT_LEN-1);
464     if (limit > rulesLen) {
465         limit = rulesLen;
466     }
467     if (start < rulesLen) {
468         u_memcpy(parseError->postContext,rules+start,limit-start);
469     }
470     //null terminate the buffer
471     parseError->postContext[limit-start]= 0;
472 }
473 
474 
475 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)476 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
477 
478     UStringPrepType type;
479     if(trieWord == 0){
480         /*
481          * Initial value stored in the mapping table
482          * just return USPREP_TYPE_LIMIT .. so that
483          * the source codepoint is copied to the destination
484          */
485         type = USPREP_TYPE_LIMIT;
486         isIndex =FALSE;
487         value = 0;
488     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
489         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
490         isIndex =FALSE;
491         value = 0;
492     }else{
493         /* get the type */
494         type = USPREP_MAP;
495         /* ascertain if the value is index or delta */
496         if(trieWord & 0x02){
497             isIndex = TRUE;
498             value = trieWord  >> 2; //mask off the lower 2 bits and shift
499         }else{
500             isIndex = FALSE;
501             value = (int16_t)trieWord;
502             value =  (value >> 2);
503         }
504 
505         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
506             type = USPREP_DELETE;
507             isIndex =FALSE;
508             value = 0;
509         }
510     }
511     return type;
512 }
513 
514 
515 
516 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)517 usprep_map(  const UStringPrepProfile* profile,
518              const UChar* src, int32_t srcLength,
519              UChar* dest, int32_t destCapacity,
520              int32_t options,
521              UParseError* parseError,
522              UErrorCode* status ){
523 
524     uint16_t result;
525     int32_t destIndex=0;
526     int32_t srcIndex;
527     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
528     UStringPrepType type;
529     int16_t value;
530     UBool isIndex;
531     const int32_t* indexes = profile->indexes;
532 
533     // no error checking the caller check for error and arguments
534     // no string length check the caller finds out the string length
535 
536     for(srcIndex=0;srcIndex<srcLength;){
537         UChar32 ch;
538 
539         U16_NEXT(src,srcIndex,srcLength,ch);
540 
541         result=0;
542 
543         UTRIE_GET16(&profile->sprepTrie,ch,result);
544 
545         type = getValues(result, value, isIndex);
546 
547         // check if the source codepoint is unassigned
548         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
549 
550             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
551             *status = U_STRINGPREP_UNASSIGNED_ERROR;
552             return 0;
553 
554         }else if(type == USPREP_MAP){
555 
556             int32_t index, length;
557 
558             if(isIndex){
559                 index = value;
560                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
561                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
562                     length = 1;
563                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
564                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
565                     length = 2;
566                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
567                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
568                     length = 3;
569                 }else{
570                     length = profile->mappingData[index++];
571 
572                 }
573 
574                 /* copy mapping to destination */
575                 for(int32_t i=0; i< length; i++){
576                     if(destIndex < destCapacity  ){
577                         dest[destIndex] = profile->mappingData[index+i];
578                     }
579                     destIndex++; /* for pre-flighting */
580                 }
581                 continue;
582             }else{
583                 // subtract the delta to arrive at the code point
584                 ch -= value;
585             }
586 
587         }else if(type==USPREP_DELETE){
588              // just consume the codepoint and contine
589             continue;
590         }
591         //copy the code point into destination
592         if(ch <= 0xFFFF){
593             if(destIndex < destCapacity ){
594                 dest[destIndex] = (UChar)ch;
595             }
596             destIndex++;
597         }else{
598             if(destIndex+1 < destCapacity ){
599                 dest[destIndex]   = U16_LEAD(ch);
600                 dest[destIndex+1] = U16_TRAIL(ch);
601             }
602             destIndex +=2;
603         }
604 
605     }
606 
607     return u_terminateUChars(dest, destCapacity, destIndex, status);
608 }
609 
610 
611 static int32_t
usprep_normalize(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode * status)612 usprep_normalize(   const UChar* src, int32_t srcLength,
613                     UChar* dest, int32_t destCapacity,
614                     UErrorCode* status ){
615     return unorm_normalize(
616         src, srcLength,
617         UNORM_NFKC, UNORM_UNICODE_3_2,
618         dest, destCapacity,
619         status);
620 }
621 
622 
623  /*
624    1) Map -- For each character in the input, check if it has a mapping
625       and, if so, replace it with its mapping.
626 
627    2) Normalize -- Possibly normalize the result of step 1 using Unicode
628       normalization.
629 
630    3) Prohibit -- Check for any characters that are not allowed in the
631       output.  If any are found, return an error.
632 
633    4) Check bidi -- Possibly check for right-to-left characters, and if
634       any are found, make sure that the whole string satisfies the
635       requirements for bidirectional strings.  If the string does not
636       satisfy the requirements for bidirectional strings, return an
637       error.
638       [Unicode3.2] defines several bidirectional categories; each character
639        has one bidirectional category assigned to it.  For the purposes of
640        the requirements below, an "RandALCat character" is a character that
641        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
642        is a character that has Unicode bidirectional category "L".  Note
643 
644 
645        that there are many characters which fall in neither of the above
646        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
647        this because they have bidirectional category "EN".
648 
649        In any profile that specifies bidirectional character handling, all
650        three of the following requirements MUST be met:
651 
652        1) The characters in section 5.8 MUST be prohibited.
653 
654        2) If a string contains any RandALCat character, the string MUST NOT
655           contain any LCat character.
656 
657        3) If a string contains any RandALCat character, a RandALCat
658           character MUST be the first character of the string, and a
659           RandALCat character MUST be the last character of the string.
660 */
661 
662 #define MAX_STACK_BUFFER_SIZE 300
663 
664 
665 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)666 usprep_prepare(   const UStringPrepProfile* profile,
667                   const UChar* src, int32_t srcLength,
668                   UChar* dest, int32_t destCapacity,
669                   int32_t options,
670                   UParseError* parseError,
671                   UErrorCode* status ){
672 
673     // check error status
674     if(status == NULL || U_FAILURE(*status)){
675         return 0;
676     }
677 
678     //check arguments
679     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
680         *status=U_ILLEGAL_ARGUMENT_ERROR;
681         return 0;
682     }
683 
684     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
685     UChar *b1 = b1Stack, *b2 = b2Stack;
686     int32_t b1Len, b2Len=0,
687             b1Capacity = MAX_STACK_BUFFER_SIZE ,
688             b2Capacity = MAX_STACK_BUFFER_SIZE;
689     uint16_t result;
690     int32_t b2Index = 0;
691     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
692     UBool leftToRight=FALSE, rightToLeft=FALSE;
693     int32_t rtlPos =-1, ltrPos =-1;
694 
695     //get the string length
696     if(srcLength == -1){
697         srcLength = u_strlen(src);
698     }
699     // map
700     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
701 
702     if(*status == U_BUFFER_OVERFLOW_ERROR){
703         // redo processing of string
704         /* we do not have enough room so grow the buffer*/
705         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
706         if(b1==NULL){
707             *status = U_MEMORY_ALLOCATION_ERROR;
708             goto CLEANUP;
709         }
710 
711         *status = U_ZERO_ERROR; // reset error
712 
713         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
714 
715     }
716 
717     // normalize
718     if(profile->doNFKC == TRUE){
719         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
720 
721         if(*status == U_BUFFER_OVERFLOW_ERROR){
722             // redo processing of string
723             /* we do not have enough room so grow the buffer*/
724             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
725             if(b2==NULL){
726                 *status = U_MEMORY_ALLOCATION_ERROR;
727                 goto CLEANUP;
728             }
729 
730             *status = U_ZERO_ERROR; // reset error
731 
732             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
733 
734         }
735 
736     }else{
737         b2 = b1;
738         b2Len = b1Len;
739     }
740 
741 
742     if(U_FAILURE(*status)){
743         goto CLEANUP;
744     }
745 
746     UChar32 ch;
747     UStringPrepType type;
748     int16_t value;
749     UBool isIndex;
750 
751     // Prohibit and checkBiDi in one pass
752     for(b2Index=0; b2Index<b2Len;){
753 
754         ch = 0;
755 
756         U16_NEXT(b2, b2Index, b2Len, ch);
757 
758         UTRIE_GET16(&profile->sprepTrie,ch,result);
759 
760         type = getValues(result, value, isIndex);
761 
762         if( type == USPREP_PROHIBITED ||
763             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
764            ){
765             *status = U_STRINGPREP_PROHIBITED_ERROR;
766             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
767             goto CLEANUP;
768         }
769 
770         if(profile->checkBiDi) {
771             direction = ubidi_getClass(profile->bdp, ch);
772             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
773                 firstCharDir = direction;
774             }
775             if(direction == U_LEFT_TO_RIGHT){
776                 leftToRight = TRUE;
777                 ltrPos = b2Index-1;
778             }
779             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
780                 rightToLeft = TRUE;
781                 rtlPos = b2Index-1;
782             }
783         }
784     }
785     if(profile->checkBiDi == TRUE){
786         // satisfy 2
787         if( leftToRight == TRUE && rightToLeft == TRUE){
788             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
789             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
790             goto CLEANUP;
791         }
792 
793         //satisfy 3
794         if( rightToLeft == TRUE &&
795             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
796               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
797            ){
798             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
799             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
800             return FALSE;
801         }
802     }
803     if(b2Len>0 && b2Len <= destCapacity){
804         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
805     }
806 
807 CLEANUP:
808     if(b1!=b1Stack){
809         uprv_free(b1);
810         b1=NULL;
811     }
812 
813     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
814         uprv_free(b2);
815         b2=NULL;
816     }
817     return u_terminateUChars(dest, destCapacity, b2Len, status);
818 }
819 
820 
821 /* data swapping ------------------------------------------------------------ */
822 
823 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)824 usprep_swap(const UDataSwapper *ds,
825             const void *inData, int32_t length, void *outData,
826             UErrorCode *pErrorCode) {
827     const UDataInfo *pInfo;
828     int32_t headerSize;
829 
830     const uint8_t *inBytes;
831     uint8_t *outBytes;
832 
833     const int32_t *inIndexes;
834     int32_t indexes[16];
835 
836     int32_t i, offset, count, size;
837 
838     /* udata_swapDataHeader checks the arguments */
839     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
840     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
841         return 0;
842     }
843 
844     /* check data format and format version */
845     pInfo=(const UDataInfo *)((const char *)inData+4);
846     if(!(
847         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
848         pInfo->dataFormat[1]==0x50 &&
849         pInfo->dataFormat[2]==0x52 &&
850         pInfo->dataFormat[3]==0x50 &&
851         pInfo->formatVersion[0]==3
852     )) {
853         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
854                          pInfo->dataFormat[0], pInfo->dataFormat[1],
855                          pInfo->dataFormat[2], pInfo->dataFormat[3],
856                          pInfo->formatVersion[0]);
857         *pErrorCode=U_UNSUPPORTED_ERROR;
858         return 0;
859     }
860 
861     inBytes=(const uint8_t *)inData+headerSize;
862     outBytes=(uint8_t *)outData+headerSize;
863 
864     inIndexes=(const int32_t *)inBytes;
865 
866     if(length>=0) {
867         length-=headerSize;
868         if(length<16*4) {
869             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
870                              length);
871             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
872             return 0;
873         }
874     }
875 
876     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
877     for(i=0; i<16; ++i) {
878         indexes[i]=udata_readInt32(ds, inIndexes[i]);
879     }
880 
881     /* calculate the total length of the data */
882     size=
883         16*4+ /* size of indexes[] */
884         indexes[_SPREP_INDEX_TRIE_SIZE]+
885         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
886 
887     if(length>=0) {
888         if(length<size) {
889             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
890                              length);
891             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
892             return 0;
893         }
894 
895         /* copy the data for inaccessible bytes */
896         if(inBytes!=outBytes) {
897             uprv_memcpy(outBytes, inBytes, size);
898         }
899 
900         offset=0;
901 
902         /* swap the int32_t indexes[] */
903         count=16*4;
904         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
905         offset+=count;
906 
907         /* swap the UTrie */
908         count=indexes[_SPREP_INDEX_TRIE_SIZE];
909         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
910         offset+=count;
911 
912         /* swap the uint16_t mappingTable[] */
913         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
914         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
915         offset+=count;
916     }
917 
918     return headerSize+size;
919 }
920 
921 #endif /* #if !UCONFIG_NO_IDNA */
922