• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2016, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  usprep.cpp
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003jul2
16  *   created by: Ram Viswanadha
17  */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_IDNA
22 
23 #include "unicode/usprep.h"
24 
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39 
40 U_NAMESPACE_USE
41 
42 U_CDECL_BEGIN
43 
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER;
49 
50 static UMutex usprepMutex;
51 /* format version of spp file */
52 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
53 
54 /* the Unicode version of the sprep data */
55 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
56 
57 /* Profile names must be aligned to UStringPrepProfileType */
58 static const char * const PROFILE_NAMES[] = {
59     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
60     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
61     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
62     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
63     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
64     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
65     "rfc3722",      /* USPREP_RFC3722_ISCSI */
66     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
67     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
68     "rfc4011",      /* USPREP_RFC4011_MIB */
69     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
70     "rfc4505",      /* USPREP_RFC4505_TRACE */
71     "rfc4518",      /* USPREP_RFC4518_LDAP */
72     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
73 };
74 
75 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)76 isSPrepAcceptable(void * /* context */,
77              const char * /* type */,
78              const char * /* name */,
79              const UDataInfo *pInfo) {
80     if(
81         pInfo->size>=20 &&
82         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
83         pInfo->charsetFamily==U_CHARSET_FAMILY &&
84         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
85         pInfo->dataFormat[1]==0x50 &&
86         pInfo->dataFormat[2]==0x52 &&
87         pInfo->dataFormat[3]==0x50 &&
88         pInfo->formatVersion[0]==3 &&
89         pInfo->formatVersion[2]==UTRIE_SHIFT &&
90         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
91     ) {
92         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
93         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
94         return TRUE;
95     } else {
96         return FALSE;
97     }
98 }
99 
100 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)101 getSPrepFoldingOffset(uint32_t data) {
102 
103     return (int32_t)data;
104 
105 }
106 
107 /* hashes an entry  */
108 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)109 hashEntry(const UHashTok parm) {
110     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
111     UHashTok namekey, pathkey;
112     namekey.pointer = b->name;
113     pathkey.pointer = b->path;
114     uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
115             37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
116     return static_cast<int32_t>(unsignedHash);
117 }
118 
119 /* compares two entries */
120 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)121 compareEntries(const UHashTok p1, const UHashTok p2) {
122     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
123     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
124     UHashTok name1, name2, path1, path2;
125     name1.pointer = b1->name;
126     name2.pointer = b2->name;
127     path1.pointer = b1->path;
128     path2.pointer = b2->path;
129     return ((UBool)(uhash_compareChars(name1, name2) &
130         uhash_compareChars(path1, path2)));
131 }
132 
133 static void
usprep_unload(UStringPrepProfile * data)134 usprep_unload(UStringPrepProfile* data){
135     udata_close(data->sprepData);
136 }
137 
138 static int32_t
usprep_internal_flushCache(UBool noRefCount)139 usprep_internal_flushCache(UBool noRefCount){
140     UStringPrepProfile *profile = NULL;
141     UStringPrepKey  *key  = NULL;
142     int32_t pos = UHASH_FIRST;
143     int32_t deletedNum = 0;
144     const UHashElement *e;
145 
146     /*
147      * if shared data hasn't even been lazy evaluated yet
148      * return 0
149      */
150     umtx_lock(&usprepMutex);
151     if (SHARED_DATA_HASHTABLE == NULL) {
152         umtx_unlock(&usprepMutex);
153         return 0;
154     }
155 
156     /*creates an enumeration to iterate through every element in the table */
157     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
158     {
159         profile = (UStringPrepProfile *) e->value.pointer;
160         key  = (UStringPrepKey *) e->key.pointer;
161 
162         if ((noRefCount== FALSE && profile->refCount == 0) ||
163              noRefCount== TRUE) {
164             deletedNum++;
165             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
166 
167             /* unload the data */
168             usprep_unload(profile);
169 
170             if(key->name != NULL) {
171                 uprv_free(key->name);
172                 key->name=NULL;
173             }
174             if(key->path != NULL) {
175                 uprv_free(key->path);
176                 key->path=NULL;
177             }
178             uprv_free(profile);
179             uprv_free(key);
180         }
181 
182     }
183     umtx_unlock(&usprepMutex);
184 
185     return deletedNum;
186 }
187 
188 /* Works just like ucnv_flushCache()
189 static int32_t
190 usprep_flushCache(){
191     return usprep_internal_flushCache(FALSE);
192 }
193 */
194 
usprep_cleanup(void)195 static UBool U_CALLCONV usprep_cleanup(void){
196     if (SHARED_DATA_HASHTABLE != NULL) {
197         usprep_internal_flushCache(TRUE);
198         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
199             uhash_close(SHARED_DATA_HASHTABLE);
200             SHARED_DATA_HASHTABLE = NULL;
201         }
202     }
203     gSharedDataInitOnce.reset();
204     return (SHARED_DATA_HASHTABLE == NULL);
205 }
206 U_CDECL_END
207 
208 
209 /** Initializes the cache for resources */
210 static void U_CALLCONV
createCache(UErrorCode & status)211 createCache(UErrorCode &status) {
212     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
213     if (U_FAILURE(status)) {
214         SHARED_DATA_HASHTABLE = NULL;
215     }
216     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
217 }
218 
219 static void
initCache(UErrorCode * status)220 initCache(UErrorCode *status) {
221     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
222 }
223 
224 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)225 loadData(UStringPrepProfile* profile,
226          const char* path,
227          const char* name,
228          const char* type,
229          UErrorCode* errorCode) {
230     /* load Unicode SPREP data from file */
231     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
232     UDataMemory *dataMemory;
233     const int32_t *p=NULL;
234     const uint8_t *pb;
235     UVersionInfo normUnicodeVersion;
236     int32_t normUniVer, sprepUniVer, normCorrVer;
237 
238     if(errorCode==NULL || U_FAILURE(*errorCode)) {
239         return 0;
240     }
241 
242     /* open the data outside the mutex block */
243     //TODO: change the path
244     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
245     if(U_FAILURE(*errorCode)) {
246         return FALSE;
247     }
248 
249     p=(const int32_t *)udata_getMemory(dataMemory);
250     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
251     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
252     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
253 
254 
255     if(U_FAILURE(*errorCode)) {
256         udata_close(dataMemory);
257         return FALSE;
258     }
259 
260     /* in the mutex block, set the data for this process */
261     umtx_lock(&usprepMutex);
262     if(profile->sprepData==NULL) {
263         profile->sprepData=dataMemory;
264         dataMemory=NULL;
265         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
266         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
267     } else {
268         p=(const int32_t *)udata_getMemory(profile->sprepData);
269     }
270     umtx_unlock(&usprepMutex);
271     /* initialize some variables */
272     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
273 
274     u_getUnicodeVersion(normUnicodeVersion);
275     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
276                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
277     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
278                   (dataVersion[2] << 8 ) + (dataVersion[3]);
279     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
280 
281     if(U_FAILURE(*errorCode)){
282         udata_close(dataMemory);
283         return FALSE;
284     }
285     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
286         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
287         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
288       ){
289         *errorCode = U_INVALID_FORMAT_ERROR;
290         udata_close(dataMemory);
291         return FALSE;
292     }
293     profile->isDataLoaded = TRUE;
294 
295     /* if a different thread set it first, then close the extra data */
296     if(dataMemory!=NULL) {
297         udata_close(dataMemory); /* NULL if it was set correctly */
298     }
299 
300 
301     return profile->isDataLoaded;
302 }
303 
304 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)305 usprep_getProfile(const char* path,
306                   const char* name,
307                   UErrorCode *status){
308 
309     UStringPrepProfile* profile = NULL;
310 
311     initCache(status);
312 
313     if(U_FAILURE(*status)){
314         return NULL;
315     }
316 
317     UStringPrepKey stackKey;
318     /*
319      * const is cast way to save malloc, strcpy and free calls
320      * we use the passed in pointers for fetching the data from the
321      * hash table which is safe
322      */
323     stackKey.name = (char*) name;
324     stackKey.path = (char*) path;
325 
326     /* fetch the data from the cache */
327     umtx_lock(&usprepMutex);
328     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
329     if(profile != NULL) {
330         profile->refCount++;
331     }
332     umtx_unlock(&usprepMutex);
333 
334     if(profile == NULL) {
335         /* else load the data and put the data in the cache */
336         LocalMemory<UStringPrepProfile> newProfile;
337         if(newProfile.allocateInsteadAndReset() == NULL) {
338             *status = U_MEMORY_ALLOCATION_ERROR;
339             return NULL;
340         }
341 
342         /* load the data */
343         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
344             return NULL;
345         }
346 
347         /* get the options */
348         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
349         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
350 
351         LocalMemory<UStringPrepKey> key;
352         LocalMemory<char> keyName;
353         LocalMemory<char> keyPath;
354         if( key.allocateInsteadAndReset() == NULL ||
355             keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
356             (path != NULL &&
357              keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
358          ) {
359             *status = U_MEMORY_ALLOCATION_ERROR;
360             usprep_unload(newProfile.getAlias());
361             return NULL;
362         }
363 
364         umtx_lock(&usprepMutex);
365         // If another thread already inserted the same key/value, refcount and cleanup our thread data
366         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
367         if(profile != NULL) {
368             profile->refCount++;
369             usprep_unload(newProfile.getAlias());
370         }
371         else {
372             /* initialize the key members */
373             key->name = keyName.orphan();
374             uprv_strcpy(key->name, name);
375             if(path != NULL){
376                 key->path = keyPath.orphan();
377                 uprv_strcpy(key->path, path);
378             }
379             profile = newProfile.orphan();
380 
381             /* add the data object to the cache */
382             profile->refCount = 1;
383             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
384         }
385         umtx_unlock(&usprepMutex);
386     }
387 
388     return profile;
389 }
390 
391 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)392 usprep_open(const char* path,
393             const char* name,
394             UErrorCode* status){
395 
396     if(status == NULL || U_FAILURE(*status)){
397         return NULL;
398     }
399 
400     /* initialize the profile struct members */
401     return usprep_getProfile(path,name,status);
402 }
403 
404 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)405 usprep_openByType(UStringPrepProfileType type,
406 				  UErrorCode* status) {
407     if(status == NULL || U_FAILURE(*status)){
408         return NULL;
409     }
410     int32_t index = (int32_t)type;
411     if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
412         *status = U_ILLEGAL_ARGUMENT_ERROR;
413         return NULL;
414     }
415     return usprep_open(NULL, PROFILE_NAMES[index], status);
416 }
417 
418 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)419 usprep_close(UStringPrepProfile* profile){
420     if(profile==NULL){
421         return;
422     }
423 
424     umtx_lock(&usprepMutex);
425     /* decrement the ref count*/
426     if(profile->refCount > 0){
427         profile->refCount--;
428     }
429     umtx_unlock(&usprepMutex);
430 
431 }
432 
433 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)434 uprv_syntaxError(const UChar* rules,
435                  int32_t pos,
436                  int32_t rulesLen,
437                  UParseError* parseError){
438     if(parseError == NULL){
439         return;
440     }
441     parseError->offset = pos;
442     parseError->line = 0 ; // we are not using line numbers
443 
444     // for pre-context
445     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
446     int32_t limit = pos;
447 
448     u_memcpy(parseError->preContext,rules+start,limit-start);
449     //null terminate the buffer
450     parseError->preContext[limit-start] = 0;
451 
452     // for post-context; include error rules[pos]
453     start = pos;
454     limit = start + (U_PARSE_CONTEXT_LEN-1);
455     if (limit > rulesLen) {
456         limit = rulesLen;
457     }
458     if (start < rulesLen) {
459         u_memcpy(parseError->postContext,rules+start,limit-start);
460     }
461     //null terminate the buffer
462     parseError->postContext[limit-start]= 0;
463 }
464 
465 
466 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)467 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
468 
469     UStringPrepType type;
470     if(trieWord == 0){
471         /*
472          * Initial value stored in the mapping table
473          * just return USPREP_TYPE_LIMIT .. so that
474          * the source codepoint is copied to the destination
475          */
476         type = USPREP_TYPE_LIMIT;
477         isIndex =FALSE;
478         value = 0;
479     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
480         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
481         isIndex =FALSE;
482         value = 0;
483     }else{
484         /* get the type */
485         type = USPREP_MAP;
486         /* ascertain if the value is index or delta */
487         if(trieWord & 0x02){
488             isIndex = TRUE;
489             value = trieWord  >> 2; //mask off the lower 2 bits and shift
490         }else{
491             isIndex = FALSE;
492             value = (int16_t)trieWord;
493             value =  (value >> 2);
494         }
495 
496         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
497             type = USPREP_DELETE;
498             isIndex =FALSE;
499             value = 0;
500         }
501     }
502     return type;
503 }
504 
505 // TODO: change to writing to UnicodeString not UChar *
506 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)507 usprep_map(  const UStringPrepProfile* profile,
508              const UChar* src, int32_t srcLength,
509              UChar* dest, int32_t destCapacity,
510              int32_t options,
511              UParseError* parseError,
512              UErrorCode* status ){
513 
514     uint16_t result;
515     int32_t destIndex=0;
516     int32_t srcIndex;
517     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
518     UStringPrepType type;
519     int16_t value;
520     UBool isIndex;
521     const int32_t* indexes = profile->indexes;
522 
523     // no error checking the caller check for error and arguments
524     // no string length check the caller finds out the string length
525 
526     for(srcIndex=0;srcIndex<srcLength;){
527         UChar32 ch;
528 
529         U16_NEXT(src,srcIndex,srcLength,ch);
530 
531         result=0;
532 
533         UTRIE_GET16(&profile->sprepTrie,ch,result);
534 
535         type = getValues(result, value, isIndex);
536 
537         // check if the source codepoint is unassigned
538         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
539 
540             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
541             *status = U_STRINGPREP_UNASSIGNED_ERROR;
542             return 0;
543 
544         }else if(type == USPREP_MAP){
545 
546             int32_t index, length;
547 
548             if(isIndex){
549                 index = value;
550                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
551                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
552                     length = 1;
553                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
554                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
555                     length = 2;
556                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
557                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
558                     length = 3;
559                 }else{
560                     length = profile->mappingData[index++];
561 
562                 }
563 
564                 /* copy mapping to destination */
565                 for(int32_t i=0; i< length; i++){
566                     if(destIndex < destCapacity  ){
567                         dest[destIndex] = profile->mappingData[index+i];
568                     }
569                     destIndex++; /* for pre-flighting */
570                 }
571                 continue;
572             }else{
573                 // subtract the delta to arrive at the code point
574                 ch -= value;
575             }
576 
577         }else if(type==USPREP_DELETE){
578              // just consume the codepoint and continue
579             continue;
580         }
581         //copy the code point into destination
582         if(ch <= 0xFFFF){
583             if(destIndex < destCapacity ){
584                 dest[destIndex] = (UChar)ch;
585             }
586             destIndex++;
587         }else{
588             if(destIndex+1 < destCapacity ){
589                 dest[destIndex]   = U16_LEAD(ch);
590                 dest[destIndex+1] = U16_TRAIL(ch);
591             }
592             destIndex +=2;
593         }
594 
595     }
596 
597     return u_terminateUChars(dest, destCapacity, destIndex, status);
598 }
599 
600 /*
601    1) Map -- For each character in the input, check if it has a mapping
602       and, if so, replace it with its mapping.
603 
604    2) Normalize -- Possibly normalize the result of step 1 using Unicode
605       normalization.
606 
607    3) Prohibit -- Check for any characters that are not allowed in the
608       output.  If any are found, return an error.
609 
610    4) Check bidi -- Possibly check for right-to-left characters, and if
611       any are found, make sure that the whole string satisfies the
612       requirements for bidirectional strings.  If the string does not
613       satisfy the requirements for bidirectional strings, return an
614       error.
615       [Unicode3.2] defines several bidirectional categories; each character
616        has one bidirectional category assigned to it.  For the purposes of
617        the requirements below, an "RandALCat character" is a character that
618        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
619        is a character that has Unicode bidirectional category "L".  Note
620 
621 
622        that there are many characters which fall in neither of the above
623        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
624        this because they have bidirectional category "EN".
625 
626        In any profile that specifies bidirectional character handling, all
627        three of the following requirements MUST be met:
628 
629        1) The characters in section 5.8 MUST be prohibited.
630 
631        2) If a string contains any RandALCat character, the string MUST NOT
632           contain any LCat character.
633 
634        3) If a string contains any RandALCat character, a RandALCat
635           character MUST be the first character of the string, and a
636           RandALCat character MUST be the last character of the string.
637 */
638 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)639 usprep_prepare(   const UStringPrepProfile* profile,
640                   const UChar* src, int32_t srcLength,
641                   UChar* dest, int32_t destCapacity,
642                   int32_t options,
643                   UParseError* parseError,
644                   UErrorCode* status ){
645 
646     // check error status
647     if(U_FAILURE(*status)){
648         return 0;
649     }
650 
651     //check arguments
652     if(profile==NULL ||
653             (src==NULL ? srcLength!=0 : srcLength<-1) ||
654             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
655         *status=U_ILLEGAL_ARGUMENT_ERROR;
656         return 0;
657     }
658 
659     //get the string length
660     if(srcLength < 0){
661         srcLength = u_strlen(src);
662     }
663     // map
664     UnicodeString s1;
665     UChar *b1 = s1.getBuffer(srcLength);
666     if(b1==NULL){
667         *status = U_MEMORY_ALLOCATION_ERROR;
668         return 0;
669     }
670     int32_t b1Len = usprep_map(profile, src, srcLength,
671                                b1, s1.getCapacity(), options, parseError, status);
672     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
673 
674     if(*status == U_BUFFER_OVERFLOW_ERROR){
675         // redo processing of string
676         /* we do not have enough room so grow the buffer*/
677         b1 = s1.getBuffer(b1Len);
678         if(b1==NULL){
679             *status = U_MEMORY_ALLOCATION_ERROR;
680             return 0;
681         }
682 
683         *status = U_ZERO_ERROR; // reset error
684         b1Len = usprep_map(profile, src, srcLength,
685                            b1, s1.getCapacity(), options, parseError, status);
686         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
687     }
688     if(U_FAILURE(*status)){
689         return 0;
690     }
691 
692     // normalize
693     UnicodeString s2;
694     if(profile->doNFKC){
695         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
696         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
697         if(U_FAILURE(*status)){
698             return 0;
699         }
700         fn2.normalize(s1, s2, *status);
701     }else{
702         s2.fastCopyFrom(s1);
703     }
704     if(U_FAILURE(*status)){
705         return 0;
706     }
707 
708     // Prohibit and checkBiDi in one pass
709     const UChar *b2 = s2.getBuffer();
710     int32_t b2Len = s2.length();
711     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
712     UBool leftToRight=FALSE, rightToLeft=FALSE;
713     int32_t rtlPos =-1, ltrPos =-1;
714 
715     for(int32_t b2Index=0; b2Index<b2Len;){
716         UChar32 ch = 0;
717         U16_NEXT(b2, b2Index, b2Len, ch);
718 
719         uint16_t result;
720         UTRIE_GET16(&profile->sprepTrie,ch,result);
721 
722         int16_t value;
723         UBool isIndex;
724         UStringPrepType type = getValues(result, value, isIndex);
725 
726         if( type == USPREP_PROHIBITED ||
727             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
728            ){
729             *status = U_STRINGPREP_PROHIBITED_ERROR;
730             uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
731             return 0;
732         }
733 
734         if(profile->checkBiDi) {
735             direction = ubidi_getClass(ch);
736             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
737                 firstCharDir = direction;
738             }
739             if(direction == U_LEFT_TO_RIGHT){
740                 leftToRight = TRUE;
741                 ltrPos = b2Index-1;
742             }
743             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
744                 rightToLeft = TRUE;
745                 rtlPos = b2Index-1;
746             }
747         }
748     }
749     if(profile->checkBiDi == TRUE){
750         // satisfy 2
751         if( leftToRight == TRUE && rightToLeft == TRUE){
752             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
753             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
754             return 0;
755         }
756 
757         //satisfy 3
758         if( rightToLeft == TRUE &&
759             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
760               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
761            ){
762             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
763             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
764             return FALSE;
765         }
766     }
767     return s2.extract(dest, destCapacity, *status);
768 }
769 
770 
771 /* data swapping ------------------------------------------------------------ */
772 
773 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)774 usprep_swap(const UDataSwapper *ds,
775             const void *inData, int32_t length, void *outData,
776             UErrorCode *pErrorCode) {
777     const UDataInfo *pInfo;
778     int32_t headerSize;
779 
780     const uint8_t *inBytes;
781     uint8_t *outBytes;
782 
783     const int32_t *inIndexes;
784     int32_t indexes[16];
785 
786     int32_t i, offset, count, size;
787 
788     /* udata_swapDataHeader checks the arguments */
789     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
790     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
791         return 0;
792     }
793 
794     /* check data format and format version */
795     pInfo=(const UDataInfo *)((const char *)inData+4);
796     if(!(
797         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
798         pInfo->dataFormat[1]==0x50 &&
799         pInfo->dataFormat[2]==0x52 &&
800         pInfo->dataFormat[3]==0x50 &&
801         pInfo->formatVersion[0]==3
802     )) {
803         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
804                          pInfo->dataFormat[0], pInfo->dataFormat[1],
805                          pInfo->dataFormat[2], pInfo->dataFormat[3],
806                          pInfo->formatVersion[0]);
807         *pErrorCode=U_UNSUPPORTED_ERROR;
808         return 0;
809     }
810 
811     inBytes=(const uint8_t *)inData+headerSize;
812     outBytes=(uint8_t *)outData+headerSize;
813 
814     inIndexes=(const int32_t *)inBytes;
815 
816     if(length>=0) {
817         length-=headerSize;
818         if(length<16*4) {
819             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
820                              length);
821             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
822             return 0;
823         }
824     }
825 
826     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
827     for(i=0; i<16; ++i) {
828         indexes[i]=udata_readInt32(ds, inIndexes[i]);
829     }
830 
831     /* calculate the total length of the data */
832     size=
833         16*4+ /* size of indexes[] */
834         indexes[_SPREP_INDEX_TRIE_SIZE]+
835         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
836 
837     if(length>=0) {
838         if(length<size) {
839             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
840                              length);
841             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
842             return 0;
843         }
844 
845         /* copy the data for inaccessible bytes */
846         if(inBytes!=outBytes) {
847             uprv_memcpy(outBytes, inBytes, size);
848         }
849 
850         offset=0;
851 
852         /* swap the int32_t indexes[] */
853         count=16*4;
854         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
855         offset+=count;
856 
857         /* swap the UTrie */
858         count=indexes[_SPREP_INDEX_TRIE_SIZE];
859         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
860         offset+=count;
861 
862         /* swap the uint16_t mappingTable[] */
863         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
864         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
865         //offset+=count;
866     }
867 
868     return headerSize+size;
869 }
870 
871 #endif /* #if !UCONFIG_NO_IDNA */
872