• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2016, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  usprep.cpp
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003jul2
16  *   created by: Ram Viswanadha
17  */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_IDNA
22 
23 #include "unicode/usprep.h"
24 
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39 
40 U_NAMESPACE_USE
41 
42 U_CDECL_BEGIN
43 
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce;
49 
50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
51 
52 /* format version of spp file */
53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54 
55 /* the Unicode version of the sprep data */
56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57 
58 /* Profile names must be aligned to UStringPrepProfileType */
59 static const char * const PROFILE_NAMES[] = {
60     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
61     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
62     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
64     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66     "rfc3722",      /* USPREP_RFC3722_ISCSI */
67     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
68     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
69     "rfc4011",      /* USPREP_RFC4011_MIB */
70     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
71     "rfc4505",      /* USPREP_RFC4505_TRACE */
72     "rfc4518",      /* USPREP_RFC4518_LDAP */
73     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
74 };
75 
76 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)77 isSPrepAcceptable(void * /* context */,
78              const char * /* type */,
79              const char * /* name */,
80              const UDataInfo *pInfo) {
81     if(
82         pInfo->size>=20 &&
83         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84         pInfo->charsetFamily==U_CHARSET_FAMILY &&
85         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
86         pInfo->dataFormat[1]==0x50 &&
87         pInfo->dataFormat[2]==0x52 &&
88         pInfo->dataFormat[3]==0x50 &&
89         pInfo->formatVersion[0]==3 &&
90         pInfo->formatVersion[2]==UTRIE_SHIFT &&
91         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92     ) {
93         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95         return TRUE;
96     } else {
97         return FALSE;
98     }
99 }
100 
101 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)102 getSPrepFoldingOffset(uint32_t data) {
103 
104     return (int32_t)data;
105 
106 }
107 
108 /* hashes an entry  */
109 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)110 hashEntry(const UHashTok parm) {
111     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112     UHashTok namekey, pathkey;
113     namekey.pointer = b->name;
114     pathkey.pointer = b->path;
115     uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
116             37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
117     return static_cast<int32_t>(unsignedHash);
118 }
119 
120 /* compares two entries */
121 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)122 compareEntries(const UHashTok p1, const UHashTok p2) {
123     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
124     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
125     UHashTok name1, name2, path1, path2;
126     name1.pointer = b1->name;
127     name2.pointer = b2->name;
128     path1.pointer = b1->path;
129     path2.pointer = b2->path;
130     return ((UBool)(uhash_compareChars(name1, name2) &
131         uhash_compareChars(path1, path2)));
132 }
133 
134 static void
usprep_unload(UStringPrepProfile * data)135 usprep_unload(UStringPrepProfile* data){
136     udata_close(data->sprepData);
137 }
138 
139 static int32_t
usprep_internal_flushCache(UBool noRefCount)140 usprep_internal_flushCache(UBool noRefCount){
141     UStringPrepProfile *profile = NULL;
142     UStringPrepKey  *key  = NULL;
143     int32_t pos = UHASH_FIRST;
144     int32_t deletedNum = 0;
145     const UHashElement *e;
146 
147     /*
148      * if shared data hasn't even been lazy evaluated yet
149      * return 0
150      */
151     umtx_lock(&usprepMutex);
152     if (SHARED_DATA_HASHTABLE == NULL) {
153         umtx_unlock(&usprepMutex);
154         return 0;
155     }
156 
157     /*creates an enumeration to iterate through every element in the table */
158     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
159     {
160         profile = (UStringPrepProfile *) e->value.pointer;
161         key  = (UStringPrepKey *) e->key.pointer;
162 
163         if ((noRefCount== FALSE && profile->refCount == 0) ||
164              noRefCount== TRUE) {
165             deletedNum++;
166             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
167 
168             /* unload the data */
169             usprep_unload(profile);
170 
171             if(key->name != NULL) {
172                 uprv_free(key->name);
173                 key->name=NULL;
174             }
175             if(key->path != NULL) {
176                 uprv_free(key->path);
177                 key->path=NULL;
178             }
179             uprv_free(profile);
180             uprv_free(key);
181         }
182 
183     }
184     umtx_unlock(&usprepMutex);
185 
186     return deletedNum;
187 }
188 
189 /* Works just like ucnv_flushCache()
190 static int32_t
191 usprep_flushCache(){
192     return usprep_internal_flushCache(FALSE);
193 }
194 */
195 
usprep_cleanup(void)196 static UBool U_CALLCONV usprep_cleanup(void){
197     if (SHARED_DATA_HASHTABLE != NULL) {
198         usprep_internal_flushCache(TRUE);
199         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
200             uhash_close(SHARED_DATA_HASHTABLE);
201             SHARED_DATA_HASHTABLE = NULL;
202         }
203     }
204     gSharedDataInitOnce.reset();
205     return (SHARED_DATA_HASHTABLE == NULL);
206 }
207 U_CDECL_END
208 
209 
210 /** Initializes the cache for resources */
211 static void U_CALLCONV
createCache(UErrorCode & status)212 createCache(UErrorCode &status) {
213     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
214     if (U_FAILURE(status)) {
215         SHARED_DATA_HASHTABLE = NULL;
216     }
217     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
218 }
219 
220 static void
initCache(UErrorCode * status)221 initCache(UErrorCode *status) {
222     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
223 }
224 
225 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)226 loadData(UStringPrepProfile* profile,
227          const char* path,
228          const char* name,
229          const char* type,
230          UErrorCode* errorCode) {
231     /* load Unicode SPREP data from file */
232     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
233     UDataMemory *dataMemory;
234     const int32_t *p=NULL;
235     const uint8_t *pb;
236     UVersionInfo normUnicodeVersion;
237     int32_t normUniVer, sprepUniVer, normCorrVer;
238 
239     if(errorCode==NULL || U_FAILURE(*errorCode)) {
240         return 0;
241     }
242 
243     /* open the data outside the mutex block */
244     //TODO: change the path
245     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
246     if(U_FAILURE(*errorCode)) {
247         return FALSE;
248     }
249 
250     p=(const int32_t *)udata_getMemory(dataMemory);
251     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
252     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
253     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
254 
255 
256     if(U_FAILURE(*errorCode)) {
257         udata_close(dataMemory);
258         return FALSE;
259     }
260 
261     /* in the mutex block, set the data for this process */
262     umtx_lock(&usprepMutex);
263     if(profile->sprepData==NULL) {
264         profile->sprepData=dataMemory;
265         dataMemory=NULL;
266         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
267         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
268     } else {
269         p=(const int32_t *)udata_getMemory(profile->sprepData);
270     }
271     umtx_unlock(&usprepMutex);
272     /* initialize some variables */
273     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
274 
275     u_getUnicodeVersion(normUnicodeVersion);
276     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
277                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
278     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
279                   (dataVersion[2] << 8 ) + (dataVersion[3]);
280     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
281 
282     if(U_FAILURE(*errorCode)){
283         udata_close(dataMemory);
284         return FALSE;
285     }
286     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
287         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
288         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
289       ){
290         *errorCode = U_INVALID_FORMAT_ERROR;
291         udata_close(dataMemory);
292         return FALSE;
293     }
294     profile->isDataLoaded = TRUE;
295 
296     /* if a different thread set it first, then close the extra data */
297     if(dataMemory!=NULL) {
298         udata_close(dataMemory); /* NULL if it was set correctly */
299     }
300 
301 
302     return profile->isDataLoaded;
303 }
304 
305 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)306 usprep_getProfile(const char* path,
307                   const char* name,
308                   UErrorCode *status){
309 
310     UStringPrepProfile* profile = NULL;
311 
312     initCache(status);
313 
314     if(U_FAILURE(*status)){
315         return NULL;
316     }
317 
318     UStringPrepKey stackKey;
319     /*
320      * const is cast way to save malloc, strcpy and free calls
321      * we use the passed in pointers for fetching the data from the
322      * hash table which is safe
323      */
324     stackKey.name = (char*) name;
325     stackKey.path = (char*) path;
326 
327     /* fetch the data from the cache */
328     umtx_lock(&usprepMutex);
329     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
330     if(profile != NULL) {
331         profile->refCount++;
332     }
333     umtx_unlock(&usprepMutex);
334 
335     if(profile == NULL) {
336         /* else load the data and put the data in the cache */
337         LocalMemory<UStringPrepProfile> newProfile;
338         if(newProfile.allocateInsteadAndReset() == NULL) {
339             *status = U_MEMORY_ALLOCATION_ERROR;
340             return NULL;
341         }
342 
343         /* load the data */
344         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
345             return NULL;
346         }
347 
348         /* get the options */
349         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
350         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
351 
352         LocalMemory<UStringPrepKey> key;
353         LocalMemory<char> keyName;
354         LocalMemory<char> keyPath;
355         if( key.allocateInsteadAndReset() == NULL ||
356             keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
357             (path != NULL &&
358              keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
359          ) {
360             *status = U_MEMORY_ALLOCATION_ERROR;
361             usprep_unload(newProfile.getAlias());
362             return NULL;
363         }
364 
365         umtx_lock(&usprepMutex);
366         // If another thread already inserted the same key/value, refcount and cleanup our thread data
367         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
368         if(profile != NULL) {
369             profile->refCount++;
370             usprep_unload(newProfile.getAlias());
371         }
372         else {
373             /* initialize the key members */
374             key->name = keyName.orphan();
375             uprv_strcpy(key->name, name);
376             if(path != NULL){
377                 key->path = keyPath.orphan();
378                 uprv_strcpy(key->path, path);
379             }
380             profile = newProfile.orphan();
381 
382             /* add the data object to the cache */
383             profile->refCount = 1;
384             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
385         }
386         umtx_unlock(&usprepMutex);
387     }
388 
389     return profile;
390 }
391 
392 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)393 usprep_open(const char* path,
394             const char* name,
395             UErrorCode* status){
396 
397     if(status == NULL || U_FAILURE(*status)){
398         return NULL;
399     }
400 
401     /* initialize the profile struct members */
402     return usprep_getProfile(path,name,status);
403 }
404 
405 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)406 usprep_openByType(UStringPrepProfileType type,
407 				  UErrorCode* status) {
408     if(status == NULL || U_FAILURE(*status)){
409         return NULL;
410     }
411     int32_t index = (int32_t)type;
412     if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
413         *status = U_ILLEGAL_ARGUMENT_ERROR;
414         return NULL;
415     }
416     return usprep_open(NULL, PROFILE_NAMES[index], status);
417 }
418 
419 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)420 usprep_close(UStringPrepProfile* profile){
421     if(profile==NULL){
422         return;
423     }
424 
425     umtx_lock(&usprepMutex);
426     /* decrement the ref count*/
427     if(profile->refCount > 0){
428         profile->refCount--;
429     }
430     umtx_unlock(&usprepMutex);
431 
432 }
433 
434 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)435 uprv_syntaxError(const UChar* rules,
436                  int32_t pos,
437                  int32_t rulesLen,
438                  UParseError* parseError){
439     if(parseError == NULL){
440         return;
441     }
442     parseError->offset = pos;
443     parseError->line = 0 ; // we are not using line numbers
444 
445     // for pre-context
446     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
447     int32_t limit = pos;
448 
449     u_memcpy(parseError->preContext,rules+start,limit-start);
450     //null terminate the buffer
451     parseError->preContext[limit-start] = 0;
452 
453     // for post-context; include error rules[pos]
454     start = pos;
455     limit = start + (U_PARSE_CONTEXT_LEN-1);
456     if (limit > rulesLen) {
457         limit = rulesLen;
458     }
459     if (start < rulesLen) {
460         u_memcpy(parseError->postContext,rules+start,limit-start);
461     }
462     //null terminate the buffer
463     parseError->postContext[limit-start]= 0;
464 }
465 
466 
467 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)468 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
469 
470     UStringPrepType type;
471     if(trieWord == 0){
472         /*
473          * Initial value stored in the mapping table
474          * just return USPREP_TYPE_LIMIT .. so that
475          * the source codepoint is copied to the destination
476          */
477         type = USPREP_TYPE_LIMIT;
478         isIndex =FALSE;
479         value = 0;
480     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
481         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
482         isIndex =FALSE;
483         value = 0;
484     }else{
485         /* get the type */
486         type = USPREP_MAP;
487         /* ascertain if the value is index or delta */
488         if(trieWord & 0x02){
489             isIndex = TRUE;
490             value = trieWord  >> 2; //mask off the lower 2 bits and shift
491         }else{
492             isIndex = FALSE;
493             value = (int16_t)trieWord;
494             value =  (value >> 2);
495         }
496 
497         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
498             type = USPREP_DELETE;
499             isIndex =FALSE;
500             value = 0;
501         }
502     }
503     return type;
504 }
505 
506 // TODO: change to writing to UnicodeString not UChar *
507 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)508 usprep_map(  const UStringPrepProfile* profile,
509              const UChar* src, int32_t srcLength,
510              UChar* dest, int32_t destCapacity,
511              int32_t options,
512              UParseError* parseError,
513              UErrorCode* status ){
514 
515     uint16_t result;
516     int32_t destIndex=0;
517     int32_t srcIndex;
518     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519     UStringPrepType type;
520     int16_t value;
521     UBool isIndex;
522     const int32_t* indexes = profile->indexes;
523 
524     // no error checking the caller check for error and arguments
525     // no string length check the caller finds out the string length
526 
527     for(srcIndex=0;srcIndex<srcLength;){
528         UChar32 ch;
529 
530         U16_NEXT(src,srcIndex,srcLength,ch);
531 
532         result=0;
533 
534         UTRIE_GET16(&profile->sprepTrie,ch,result);
535 
536         type = getValues(result, value, isIndex);
537 
538         // check if the source codepoint is unassigned
539         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
540 
541             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542             *status = U_STRINGPREP_UNASSIGNED_ERROR;
543             return 0;
544 
545         }else if(type == USPREP_MAP){
546 
547             int32_t index, length;
548 
549             if(isIndex){
550                 index = value;
551                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553                     length = 1;
554                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556                     length = 2;
557                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559                     length = 3;
560                 }else{
561                     length = profile->mappingData[index++];
562 
563                 }
564 
565                 /* copy mapping to destination */
566                 for(int32_t i=0; i< length; i++){
567                     if(destIndex < destCapacity  ){
568                         dest[destIndex] = profile->mappingData[index+i];
569                     }
570                     destIndex++; /* for pre-flighting */
571                 }
572                 continue;
573             }else{
574                 // subtract the delta to arrive at the code point
575                 ch -= value;
576             }
577 
578         }else if(type==USPREP_DELETE){
579              // just consume the codepoint and contine
580             continue;
581         }
582         //copy the code point into destination
583         if(ch <= 0xFFFF){
584             if(destIndex < destCapacity ){
585                 dest[destIndex] = (UChar)ch;
586             }
587             destIndex++;
588         }else{
589             if(destIndex+1 < destCapacity ){
590                 dest[destIndex]   = U16_LEAD(ch);
591                 dest[destIndex+1] = U16_TRAIL(ch);
592             }
593             destIndex +=2;
594         }
595 
596     }
597 
598     return u_terminateUChars(dest, destCapacity, destIndex, status);
599 }
600 
601 /*
602    1) Map -- For each character in the input, check if it has a mapping
603       and, if so, replace it with its mapping.
604 
605    2) Normalize -- Possibly normalize the result of step 1 using Unicode
606       normalization.
607 
608    3) Prohibit -- Check for any characters that are not allowed in the
609       output.  If any are found, return an error.
610 
611    4) Check bidi -- Possibly check for right-to-left characters, and if
612       any are found, make sure that the whole string satisfies the
613       requirements for bidirectional strings.  If the string does not
614       satisfy the requirements for bidirectional strings, return an
615       error.
616       [Unicode3.2] defines several bidirectional categories; each character
617        has one bidirectional category assigned to it.  For the purposes of
618        the requirements below, an "RandALCat character" is a character that
619        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
620        is a character that has Unicode bidirectional category "L".  Note
621 
622 
623        that there are many characters which fall in neither of the above
624        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
625        this because they have bidirectional category "EN".
626 
627        In any profile that specifies bidirectional character handling, all
628        three of the following requirements MUST be met:
629 
630        1) The characters in section 5.8 MUST be prohibited.
631 
632        2) If a string contains any RandALCat character, the string MUST NOT
633           contain any LCat character.
634 
635        3) If a string contains any RandALCat character, a RandALCat
636           character MUST be the first character of the string, and a
637           RandALCat character MUST be the last character of the string.
638 */
639 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)640 usprep_prepare(   const UStringPrepProfile* profile,
641                   const UChar* src, int32_t srcLength,
642                   UChar* dest, int32_t destCapacity,
643                   int32_t options,
644                   UParseError* parseError,
645                   UErrorCode* status ){
646 
647     // check error status
648     if(U_FAILURE(*status)){
649         return 0;
650     }
651 
652     //check arguments
653     if(profile==NULL ||
654             (src==NULL ? srcLength!=0 : srcLength<-1) ||
655             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
656         *status=U_ILLEGAL_ARGUMENT_ERROR;
657         return 0;
658     }
659 
660     //get the string length
661     if(srcLength < 0){
662         srcLength = u_strlen(src);
663     }
664     // map
665     UnicodeString s1;
666     UChar *b1 = s1.getBuffer(srcLength);
667     if(b1==NULL){
668         *status = U_MEMORY_ALLOCATION_ERROR;
669         return 0;
670     }
671     int32_t b1Len = usprep_map(profile, src, srcLength,
672                                b1, s1.getCapacity(), options, parseError, status);
673     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
674 
675     if(*status == U_BUFFER_OVERFLOW_ERROR){
676         // redo processing of string
677         /* we do not have enough room so grow the buffer*/
678         b1 = s1.getBuffer(b1Len);
679         if(b1==NULL){
680             *status = U_MEMORY_ALLOCATION_ERROR;
681             return 0;
682         }
683 
684         *status = U_ZERO_ERROR; // reset error
685         b1Len = usprep_map(profile, src, srcLength,
686                            b1, s1.getCapacity(), options, parseError, status);
687         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
688     }
689     if(U_FAILURE(*status)){
690         return 0;
691     }
692 
693     // normalize
694     UnicodeString s2;
695     if(profile->doNFKC){
696         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
697         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
698         if(U_FAILURE(*status)){
699             return 0;
700         }
701         fn2.normalize(s1, s2, *status);
702     }else{
703         s2.fastCopyFrom(s1);
704     }
705     if(U_FAILURE(*status)){
706         return 0;
707     }
708 
709     // Prohibit and checkBiDi in one pass
710     const UChar *b2 = s2.getBuffer();
711     int32_t b2Len = s2.length();
712     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
713     UBool leftToRight=FALSE, rightToLeft=FALSE;
714     int32_t rtlPos =-1, ltrPos =-1;
715 
716     for(int32_t b2Index=0; b2Index<b2Len;){
717         UChar32 ch = 0;
718         U16_NEXT(b2, b2Index, b2Len, ch);
719 
720         uint16_t result;
721         UTRIE_GET16(&profile->sprepTrie,ch,result);
722 
723         int16_t value;
724         UBool isIndex;
725         UStringPrepType type = getValues(result, value, isIndex);
726 
727         if( type == USPREP_PROHIBITED ||
728             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
729            ){
730             *status = U_STRINGPREP_PROHIBITED_ERROR;
731             uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
732             return 0;
733         }
734 
735         if(profile->checkBiDi) {
736             direction = ubidi_getClass(ch);
737             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
738                 firstCharDir = direction;
739             }
740             if(direction == U_LEFT_TO_RIGHT){
741                 leftToRight = TRUE;
742                 ltrPos = b2Index-1;
743             }
744             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
745                 rightToLeft = TRUE;
746                 rtlPos = b2Index-1;
747             }
748         }
749     }
750     if(profile->checkBiDi == TRUE){
751         // satisfy 2
752         if( leftToRight == TRUE && rightToLeft == TRUE){
753             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
754             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
755             return 0;
756         }
757 
758         //satisfy 3
759         if( rightToLeft == TRUE &&
760             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
761               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
762            ){
763             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
764             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
765             return FALSE;
766         }
767     }
768     return s2.extract(dest, destCapacity, *status);
769 }
770 
771 
772 /* data swapping ------------------------------------------------------------ */
773 
774 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)775 usprep_swap(const UDataSwapper *ds,
776             const void *inData, int32_t length, void *outData,
777             UErrorCode *pErrorCode) {
778     const UDataInfo *pInfo;
779     int32_t headerSize;
780 
781     const uint8_t *inBytes;
782     uint8_t *outBytes;
783 
784     const int32_t *inIndexes;
785     int32_t indexes[16];
786 
787     int32_t i, offset, count, size;
788 
789     /* udata_swapDataHeader checks the arguments */
790     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
791     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
792         return 0;
793     }
794 
795     /* check data format and format version */
796     pInfo=(const UDataInfo *)((const char *)inData+4);
797     if(!(
798         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
799         pInfo->dataFormat[1]==0x50 &&
800         pInfo->dataFormat[2]==0x52 &&
801         pInfo->dataFormat[3]==0x50 &&
802         pInfo->formatVersion[0]==3
803     )) {
804         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
805                          pInfo->dataFormat[0], pInfo->dataFormat[1],
806                          pInfo->dataFormat[2], pInfo->dataFormat[3],
807                          pInfo->formatVersion[0]);
808         *pErrorCode=U_UNSUPPORTED_ERROR;
809         return 0;
810     }
811 
812     inBytes=(const uint8_t *)inData+headerSize;
813     outBytes=(uint8_t *)outData+headerSize;
814 
815     inIndexes=(const int32_t *)inBytes;
816 
817     if(length>=0) {
818         length-=headerSize;
819         if(length<16*4) {
820             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
821                              length);
822             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
823             return 0;
824         }
825     }
826 
827     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
828     for(i=0; i<16; ++i) {
829         indexes[i]=udata_readInt32(ds, inIndexes[i]);
830     }
831 
832     /* calculate the total length of the data */
833     size=
834         16*4+ /* size of indexes[] */
835         indexes[_SPREP_INDEX_TRIE_SIZE]+
836         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
837 
838     if(length>=0) {
839         if(length<size) {
840             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
841                              length);
842             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
843             return 0;
844         }
845 
846         /* copy the data for inaccessible bytes */
847         if(inBytes!=outBytes) {
848             uprv_memcpy(outBytes, inBytes, size);
849         }
850 
851         offset=0;
852 
853         /* swap the int32_t indexes[] */
854         count=16*4;
855         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
856         offset+=count;
857 
858         /* swap the UTrie */
859         count=indexes[_SPREP_INDEX_TRIE_SIZE];
860         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
861         offset+=count;
862 
863         /* swap the uint16_t mappingTable[] */
864         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
865         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
866         //offset+=count;
867     }
868 
869     return headerSize+size;
870 }
871 
872 #endif /* #if !UCONFIG_NO_IDNA */
873