1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA
20
21 #include "unicode/usprep.h"
22
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "unormimp.h"
36 #include "ubidi_props.h"
37
38 U_CDECL_BEGIN
39
40 /*
41 Static cache for already opened StringPrep profiles
42 */
43 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
44
45 static UMTX usprepMutex = NULL;
46
47 /* format version of spp file */
48 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
49
50 /* the Unicode version of the sprep data */
51 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
52
53 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)54 isSPrepAcceptable(void * /* context */,
55 const char * /* type */,
56 const char * /* name */,
57 const UDataInfo *pInfo) {
58 if(
59 pInfo->size>=20 &&
60 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
61 pInfo->charsetFamily==U_CHARSET_FAMILY &&
62 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
63 pInfo->dataFormat[1]==0x50 &&
64 pInfo->dataFormat[2]==0x52 &&
65 pInfo->dataFormat[3]==0x50 &&
66 pInfo->formatVersion[0]==3 &&
67 pInfo->formatVersion[2]==UTRIE_SHIFT &&
68 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
69 ) {
70 uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
71 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
72 return TRUE;
73 } else {
74 return FALSE;
75 }
76 }
77
78 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)79 getSPrepFoldingOffset(uint32_t data) {
80
81 return (int32_t)data;
82
83 }
84
85 /* hashes an entry */
86 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)87 hashEntry(const UHashTok parm) {
88 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
89 UHashTok namekey, pathkey;
90 namekey.pointer = b->name;
91 pathkey.pointer = b->path;
92 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
93 }
94
95 /* compares two entries */
96 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)97 compareEntries(const UHashTok p1, const UHashTok p2) {
98 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
99 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
100 UHashTok name1, name2, path1, path2;
101 name1.pointer = b1->name;
102 name2.pointer = b2->name;
103 path1.pointer = b1->path;
104 path2.pointer = b2->path;
105 return ((UBool)(uhash_compareChars(name1, name2) &
106 uhash_compareChars(path1, path2)));
107 }
108
109 static void
usprep_unload(UStringPrepProfile * data)110 usprep_unload(UStringPrepProfile* data){
111 udata_close(data->sprepData);
112 }
113
114 static int32_t
usprep_internal_flushCache(UBool noRefCount)115 usprep_internal_flushCache(UBool noRefCount){
116 UStringPrepProfile *profile = NULL;
117 UStringPrepKey *key = NULL;
118 int32_t pos = -1;
119 int32_t deletedNum = 0;
120 const UHashElement *e;
121
122 /*
123 * if shared data hasn't even been lazy evaluated yet
124 * return 0
125 */
126 umtx_lock(&usprepMutex);
127 if (SHARED_DATA_HASHTABLE == NULL) {
128 umtx_unlock(&usprepMutex);
129 return 0;
130 }
131
132 /*creates an enumeration to iterate through every element in the table */
133 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
134 {
135 profile = (UStringPrepProfile *) e->value.pointer;
136 key = (UStringPrepKey *) e->key.pointer;
137
138 if ((noRefCount== FALSE && profile->refCount == 0) ||
139 noRefCount== TRUE) {
140 deletedNum++;
141 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
142
143 /* unload the data */
144 usprep_unload(profile);
145
146 if(key->name != NULL) {
147 uprv_free(key->name);
148 key->name=NULL;
149 }
150 if(key->path != NULL) {
151 uprv_free(key->path);
152 key->path=NULL;
153 }
154 uprv_free(profile);
155 uprv_free(key);
156 }
157
158 }
159 umtx_unlock(&usprepMutex);
160
161 return deletedNum;
162 }
163
164 /* Works just like ucnv_flushCache()
165 static int32_t
166 usprep_flushCache(){
167 return usprep_internal_flushCache(FALSE);
168 }
169 */
170
usprep_cleanup(void)171 static UBool U_CALLCONV usprep_cleanup(void){
172 if (SHARED_DATA_HASHTABLE != NULL) {
173 usprep_internal_flushCache(TRUE);
174 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
175 uhash_close(SHARED_DATA_HASHTABLE);
176 SHARED_DATA_HASHTABLE = NULL;
177 }
178 }
179
180 umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
181 /* if the hash table still exists. The mutex */
182 /* will lazily re-init itself if needed. */
183 return (SHARED_DATA_HASHTABLE == NULL);
184 }
185 U_CDECL_END
186
187 static void
usprep_init()188 usprep_init() {
189 umtx_init(&usprepMutex);
190 }
191
192 /** Initializes the cache for resources */
193 static void
initCache(UErrorCode * status)194 initCache(UErrorCode *status) {
195 UBool makeCache;
196 UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE == NULL), makeCache);
197 if(makeCache) {
198 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
199 if (U_SUCCESS(*status)) {
200 umtx_lock(&usprepMutex);
201 if(SHARED_DATA_HASHTABLE == NULL) {
202 SHARED_DATA_HASHTABLE = newCache;
203 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
204 newCache = NULL;
205 }
206 umtx_unlock(&usprepMutex);
207 if(newCache != NULL) {
208 uhash_close(newCache);
209 }
210 }
211 }
212 }
213
214 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)215 loadData(UStringPrepProfile* profile,
216 const char* path,
217 const char* name,
218 const char* type,
219 UErrorCode* errorCode) {
220 /* load Unicode SPREP data from file */
221 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
222 UDataMemory *dataMemory;
223 const int32_t *p=NULL;
224 const uint8_t *pb;
225 UVersionInfo normUnicodeVersion;
226 int32_t normUniVer, sprepUniVer, normCorrVer;
227
228 if(errorCode==NULL || U_FAILURE(*errorCode)) {
229 return 0;
230 }
231
232 /* open the data outside the mutex block */
233 //TODO: change the path
234 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
235 if(U_FAILURE(*errorCode)) {
236 return FALSE;
237 }
238
239 p=(const int32_t *)udata_getMemory(dataMemory);
240 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
241 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
242 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
243
244
245 if(U_FAILURE(*errorCode)) {
246 udata_close(dataMemory);
247 return FALSE;
248 }
249
250 /* in the mutex block, set the data for this process */
251 umtx_lock(&usprepMutex);
252 if(profile->sprepData==NULL) {
253 profile->sprepData=dataMemory;
254 dataMemory=NULL;
255 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
256 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
257 } else {
258 p=(const int32_t *)udata_getMemory(profile->sprepData);
259 }
260 umtx_unlock(&usprepMutex);
261 /* initialize some variables */
262 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
263
264 unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
265 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
266 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
267 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
268 (dataVersion[2] << 8 ) + (dataVersion[3]);
269 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
270
271 if(U_FAILURE(*errorCode)){
272 udata_close(dataMemory);
273 return FALSE;
274 }
275 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
276 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
277 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
278 ){
279 *errorCode = U_INVALID_FORMAT_ERROR;
280 udata_close(dataMemory);
281 return FALSE;
282 }
283 profile->isDataLoaded = TRUE;
284
285 /* if a different thread set it first, then close the extra data */
286 if(dataMemory!=NULL) {
287 udata_close(dataMemory); /* NULL if it was set correctly */
288 }
289
290
291 return profile->isDataLoaded;
292 }
293
294 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)295 usprep_getProfile(const char* path,
296 const char* name,
297 UErrorCode *status){
298
299 UStringPrepProfile* profile = NULL;
300
301 initCache(status);
302
303 if(U_FAILURE(*status)){
304 return NULL;
305 }
306
307 UStringPrepKey stackKey;
308 /*
309 * const is cast way to save malloc, strcpy and free calls
310 * we use the passed in pointers for fetching the data from the
311 * hash table which is safe
312 */
313 stackKey.name = (char*) name;
314 stackKey.path = (char*) path;
315
316 /* fetch the data from the cache */
317 umtx_lock(&usprepMutex);
318 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
319 umtx_unlock(&usprepMutex);
320
321 if(profile == NULL){
322 UStringPrepKey* key = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
323 if(key == NULL){
324 *status = U_MEMORY_ALLOCATION_ERROR;
325 return NULL;
326 }
327 /* else load the data and put the data in the cache */
328 profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
329 if(profile == NULL){
330 *status = U_MEMORY_ALLOCATION_ERROR;
331 uprv_free(key);
332 return NULL;
333 }
334
335 /* initialize the data struct members */
336 uprv_memset(profile->indexes,0,sizeof(profile->indexes));
337 profile->mappingData = NULL;
338 profile->sprepData = NULL;
339 profile->refCount = 0;
340
341 /* initialize the key memebers */
342 key->name = (char*) uprv_malloc(uprv_strlen(name)+1);
343 if(key->name == NULL){
344 *status = U_MEMORY_ALLOCATION_ERROR;
345 uprv_free(key);
346 uprv_free(profile);
347 return NULL;
348 }
349
350 uprv_strcpy(key->name, name);
351
352 key->path=NULL;
353
354 if(path != NULL){
355 key->path = (char*) uprv_malloc(uprv_strlen(path)+1);
356 if(key->path == NULL){
357 *status = U_MEMORY_ALLOCATION_ERROR;
358 uprv_free(key->name);
359 uprv_free(key);
360 uprv_free(profile);
361 return NULL;
362 }
363 uprv_strcpy(key->path, path);
364 }
365
366 /* load the data */
367 if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
368 uprv_free(key->path);
369 uprv_free(key->name);
370 uprv_free(key);
371 uprv_free(profile);
372 return NULL;
373 }
374
375 /* get the options */
376 profile->doNFKC = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
377 profile->checkBiDi = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
378
379 if(profile->checkBiDi) {
380 profile->bdp = ubidi_getSingleton(status);
381 if(U_FAILURE(*status)) {
382 usprep_unload(profile);
383 uprv_free(key->path);
384 uprv_free(key->name);
385 uprv_free(key);
386 uprv_free(profile);
387 return NULL;
388 }
389 } else {
390 profile->bdp = NULL;
391 }
392
393 umtx_lock(&usprepMutex);
394 /* add the data object to the cache */
395 uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
396 umtx_unlock(&usprepMutex);
397 }
398 umtx_lock(&usprepMutex);
399 /* increment the refcount */
400 profile->refCount++;
401 umtx_unlock(&usprepMutex);
402
403 return profile;
404 }
405
406 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)407 usprep_open(const char* path,
408 const char* name,
409 UErrorCode* status){
410
411 if(status == NULL || U_FAILURE(*status)){
412 return NULL;
413 }
414 /* initialize the mutex */
415 usprep_init();
416
417 /* initialize the profile struct members */
418 return usprep_getProfile(path,name,status);
419 }
420
421 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)422 usprep_close(UStringPrepProfile* profile){
423 if(profile==NULL){
424 return;
425 }
426
427 umtx_lock(&usprepMutex);
428 /* decrement the ref count*/
429 if(profile->refCount > 0){
430 profile->refCount--;
431 }
432 umtx_unlock(&usprepMutex);
433
434 }
435
436 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)437 uprv_syntaxError(const UChar* rules,
438 int32_t pos,
439 int32_t rulesLen,
440 UParseError* parseError){
441 if(parseError == NULL){
442 return;
443 }
444 parseError->offset = pos;
445 parseError->line = 0 ; // we are not using line numbers
446
447 // for pre-context
448 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
449 int32_t limit = pos;
450
451 u_memcpy(parseError->preContext,rules+start,limit-start);
452 //null terminate the buffer
453 parseError->preContext[limit-start] = 0;
454
455 // for post-context; include error rules[pos]
456 start = pos;
457 limit = start + (U_PARSE_CONTEXT_LEN-1);
458 if (limit > rulesLen) {
459 limit = rulesLen;
460 }
461 if (start < rulesLen) {
462 u_memcpy(parseError->postContext,rules+start,limit-start);
463 }
464 //null terminate the buffer
465 parseError->postContext[limit-start]= 0;
466 }
467
468
469 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)470 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
471
472 UStringPrepType type;
473 if(trieWord == 0){
474 /*
475 * Initial value stored in the mapping table
476 * just return USPREP_TYPE_LIMIT .. so that
477 * the source codepoint is copied to the destination
478 */
479 type = USPREP_TYPE_LIMIT;
480 isIndex =FALSE;
481 value = 0;
482 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
483 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
484 isIndex =FALSE;
485 value = 0;
486 }else{
487 /* get the type */
488 type = USPREP_MAP;
489 /* ascertain if the value is index or delta */
490 if(trieWord & 0x02){
491 isIndex = TRUE;
492 value = trieWord >> 2; //mask off the lower 2 bits and shift
493 }else{
494 isIndex = FALSE;
495 value = (int16_t)trieWord;
496 value = (value >> 2);
497 }
498
499 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
500 type = USPREP_DELETE;
501 isIndex =FALSE;
502 value = 0;
503 }
504 }
505 return type;
506 }
507
508
509
510 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)511 usprep_map( const UStringPrepProfile* profile,
512 const UChar* src, int32_t srcLength,
513 UChar* dest, int32_t destCapacity,
514 int32_t options,
515 UParseError* parseError,
516 UErrorCode* status ){
517
518 uint16_t result;
519 int32_t destIndex=0;
520 int32_t srcIndex;
521 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
522 UStringPrepType type;
523 int16_t value;
524 UBool isIndex;
525 const int32_t* indexes = profile->indexes;
526
527 // no error checking the caller check for error and arguments
528 // no string length check the caller finds out the string length
529
530 for(srcIndex=0;srcIndex<srcLength;){
531 UChar32 ch;
532
533 U16_NEXT(src,srcIndex,srcLength,ch);
534
535 result=0;
536
537 UTRIE_GET16(&profile->sprepTrie,ch,result);
538
539 type = getValues(result, value, isIndex);
540
541 // check if the source codepoint is unassigned
542 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
543
544 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
545 *status = U_STRINGPREP_UNASSIGNED_ERROR;
546 return 0;
547
548 }else if(type == USPREP_MAP){
549
550 int32_t index, length;
551
552 if(isIndex){
553 index = value;
554 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
556 length = 1;
557 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
559 length = 2;
560 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
561 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
562 length = 3;
563 }else{
564 length = profile->mappingData[index++];
565
566 }
567
568 /* copy mapping to destination */
569 for(int32_t i=0; i< length; i++){
570 if(destIndex < destCapacity ){
571 dest[destIndex] = profile->mappingData[index+i];
572 }
573 destIndex++; /* for pre-flighting */
574 }
575 continue;
576 }else{
577 // subtract the delta to arrive at the code point
578 ch -= value;
579 }
580
581 }else if(type==USPREP_DELETE){
582 // just consume the codepoint and contine
583 continue;
584 }
585 //copy the code point into destination
586 if(ch <= 0xFFFF){
587 if(destIndex < destCapacity ){
588 dest[destIndex] = (UChar)ch;
589 }
590 destIndex++;
591 }else{
592 if(destIndex+1 < destCapacity ){
593 dest[destIndex] = U16_LEAD(ch);
594 dest[destIndex+1] = U16_TRAIL(ch);
595 }
596 destIndex +=2;
597 }
598
599 }
600
601 return u_terminateUChars(dest, destCapacity, destIndex, status);
602 }
603
604
605 static int32_t
usprep_normalize(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode * status)606 usprep_normalize( const UChar* src, int32_t srcLength,
607 UChar* dest, int32_t destCapacity,
608 UErrorCode* status ){
609 /*
610 * Option UNORM_BEFORE_PRI_29:
611 *
612 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
613 * requires strict adherence to Unicode 3.2 normalization,
614 * including buggy composition from before fixing Public Review Issue #29.
615 * Note that this results in some valid but nonsensical text to be
616 * either corrupted or rejected, depending on the text.
617 * See http://www.unicode.org/review/resolved-pri.html#pri29
618 * See unorm.cpp and cnormtst.c
619 */
620 return unorm_normalize(
621 src, srcLength,
622 UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
623 dest, destCapacity,
624 status);
625 }
626
627
628 /*
629 1) Map -- For each character in the input, check if it has a mapping
630 and, if so, replace it with its mapping.
631
632 2) Normalize -- Possibly normalize the result of step 1 using Unicode
633 normalization.
634
635 3) Prohibit -- Check for any characters that are not allowed in the
636 output. If any are found, return an error.
637
638 4) Check bidi -- Possibly check for right-to-left characters, and if
639 any are found, make sure that the whole string satisfies the
640 requirements for bidirectional strings. If the string does not
641 satisfy the requirements for bidirectional strings, return an
642 error.
643 [Unicode3.2] defines several bidirectional categories; each character
644 has one bidirectional category assigned to it. For the purposes of
645 the requirements below, an "RandALCat character" is a character that
646 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
647 is a character that has Unicode bidirectional category "L". Note
648
649
650 that there are many characters which fall in neither of the above
651 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
652 this because they have bidirectional category "EN".
653
654 In any profile that specifies bidirectional character handling, all
655 three of the following requirements MUST be met:
656
657 1) The characters in section 5.8 MUST be prohibited.
658
659 2) If a string contains any RandALCat character, the string MUST NOT
660 contain any LCat character.
661
662 3) If a string contains any RandALCat character, a RandALCat
663 character MUST be the first character of the string, and a
664 RandALCat character MUST be the last character of the string.
665 */
666
667 #define MAX_STACK_BUFFER_SIZE 300
668
669
670 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)671 usprep_prepare( const UStringPrepProfile* profile,
672 const UChar* src, int32_t srcLength,
673 UChar* dest, int32_t destCapacity,
674 int32_t options,
675 UParseError* parseError,
676 UErrorCode* status ){
677
678 // check error status
679 if(status == NULL || U_FAILURE(*status)){
680 return 0;
681 }
682
683 //check arguments
684 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
685 *status=U_ILLEGAL_ARGUMENT_ERROR;
686 return 0;
687 }
688
689 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
690 UChar *b1 = b1Stack, *b2 = b2Stack;
691 int32_t b1Len, b2Len=0,
692 b1Capacity = MAX_STACK_BUFFER_SIZE ,
693 b2Capacity = MAX_STACK_BUFFER_SIZE;
694 uint16_t result;
695 int32_t b2Index = 0;
696 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
697 UBool leftToRight=FALSE, rightToLeft=FALSE;
698 int32_t rtlPos =-1, ltrPos =-1;
699
700 //get the string length
701 if(srcLength == -1){
702 srcLength = u_strlen(src);
703 }
704 // map
705 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
706
707 if(*status == U_BUFFER_OVERFLOW_ERROR){
708 // redo processing of string
709 /* we do not have enough room so grow the buffer*/
710 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
711 if(b1==NULL){
712 *status = U_MEMORY_ALLOCATION_ERROR;
713 goto CLEANUP;
714 }
715
716 *status = U_ZERO_ERROR; // reset error
717
718 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
719
720 }
721
722 // normalize
723 if(profile->doNFKC == TRUE){
724 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
725
726 if(*status == U_BUFFER_OVERFLOW_ERROR){
727 // redo processing of string
728 /* we do not have enough room so grow the buffer*/
729 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
730 if(b2==NULL){
731 *status = U_MEMORY_ALLOCATION_ERROR;
732 goto CLEANUP;
733 }
734
735 *status = U_ZERO_ERROR; // reset error
736
737 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
738
739 }
740
741 }else{
742 b2 = b1;
743 b2Len = b1Len;
744 }
745
746
747 if(U_FAILURE(*status)){
748 goto CLEANUP;
749 }
750
751 UChar32 ch;
752 UStringPrepType type;
753 int16_t value;
754 UBool isIndex;
755
756 // Prohibit and checkBiDi in one pass
757 for(b2Index=0; b2Index<b2Len;){
758
759 ch = 0;
760
761 U16_NEXT(b2, b2Index, b2Len, ch);
762
763 UTRIE_GET16(&profile->sprepTrie,ch,result);
764
765 type = getValues(result, value, isIndex);
766
767 if( type == USPREP_PROHIBITED ||
768 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
769 ){
770 *status = U_STRINGPREP_PROHIBITED_ERROR;
771 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
772 goto CLEANUP;
773 }
774
775 if(profile->checkBiDi) {
776 direction = ubidi_getClass(profile->bdp, ch);
777 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
778 firstCharDir = direction;
779 }
780 if(direction == U_LEFT_TO_RIGHT){
781 leftToRight = TRUE;
782 ltrPos = b2Index-1;
783 }
784 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
785 rightToLeft = TRUE;
786 rtlPos = b2Index-1;
787 }
788 }
789 }
790 if(profile->checkBiDi == TRUE){
791 // satisfy 2
792 if( leftToRight == TRUE && rightToLeft == TRUE){
793 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
794 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
795 goto CLEANUP;
796 }
797
798 //satisfy 3
799 if( rightToLeft == TRUE &&
800 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
801 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
802 ){
803 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
804 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
805 return FALSE;
806 }
807 }
808 if(b2Len>0 && b2Len <= destCapacity){
809 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
810 }
811
812 CLEANUP:
813 if(b1!=b1Stack){
814 uprv_free(b1);
815 b1=NULL;
816 }
817
818 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
819 uprv_free(b2);
820 b2=NULL;
821 }
822 return u_terminateUChars(dest, destCapacity, b2Len, status);
823 }
824
825
826 /* data swapping ------------------------------------------------------------ */
827
828 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)829 usprep_swap(const UDataSwapper *ds,
830 const void *inData, int32_t length, void *outData,
831 UErrorCode *pErrorCode) {
832 const UDataInfo *pInfo;
833 int32_t headerSize;
834
835 const uint8_t *inBytes;
836 uint8_t *outBytes;
837
838 const int32_t *inIndexes;
839 int32_t indexes[16];
840
841 int32_t i, offset, count, size;
842
843 /* udata_swapDataHeader checks the arguments */
844 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
845 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
846 return 0;
847 }
848
849 /* check data format and format version */
850 pInfo=(const UDataInfo *)((const char *)inData+4);
851 if(!(
852 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
853 pInfo->dataFormat[1]==0x50 &&
854 pInfo->dataFormat[2]==0x52 &&
855 pInfo->dataFormat[3]==0x50 &&
856 pInfo->formatVersion[0]==3
857 )) {
858 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
859 pInfo->dataFormat[0], pInfo->dataFormat[1],
860 pInfo->dataFormat[2], pInfo->dataFormat[3],
861 pInfo->formatVersion[0]);
862 *pErrorCode=U_UNSUPPORTED_ERROR;
863 return 0;
864 }
865
866 inBytes=(const uint8_t *)inData+headerSize;
867 outBytes=(uint8_t *)outData+headerSize;
868
869 inIndexes=(const int32_t *)inBytes;
870
871 if(length>=0) {
872 length-=headerSize;
873 if(length<16*4) {
874 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
875 length);
876 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
877 return 0;
878 }
879 }
880
881 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
882 for(i=0; i<16; ++i) {
883 indexes[i]=udata_readInt32(ds, inIndexes[i]);
884 }
885
886 /* calculate the total length of the data */
887 size=
888 16*4+ /* size of indexes[] */
889 indexes[_SPREP_INDEX_TRIE_SIZE]+
890 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
891
892 if(length>=0) {
893 if(length<size) {
894 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
895 length);
896 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
897 return 0;
898 }
899
900 /* copy the data for inaccessible bytes */
901 if(inBytes!=outBytes) {
902 uprv_memcpy(outBytes, inBytes, size);
903 }
904
905 offset=0;
906
907 /* swap the int32_t indexes[] */
908 count=16*4;
909 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
910 offset+=count;
911
912 /* swap the UTrie */
913 count=indexes[_SPREP_INDEX_TRIE_SIZE];
914 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
915 offset+=count;
916
917 /* swap the uint16_t mappingTable[] */
918 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
919 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
920 offset+=count;
921 }
922
923 return headerSize+size;
924 }
925
926 #endif /* #if !UCONFIG_NO_IDNA */
927