1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2009, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA
20
21 #include "unicode/usprep.h"
22
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "unormimp.h"
36 #include "ubidi_props.h"
37
38 U_CDECL_BEGIN
39
40 /*
41 Static cache for already opened StringPrep profiles
42 */
43 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
44
45 static UMTX usprepMutex = NULL;
46
47 /* format version of spp file */
48 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
49
50 /* the Unicode version of the sprep data */
51 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
52
53 /* Profile names must be aligned to UStringPrepProfileType */
54 static const char *PROFILE_NAMES[] = {
55 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
56 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
57 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
58 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
59 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
60 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
61 "rfc3722", /* USPREP_RFC3722_ISCSI */
62 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
63 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
64 "rfc4011", /* USPREP_RFC4011_MIB */
65 "rfc4013", /* USPREP_RFC4013_SASLPREP */
66 "rfc4505", /* USPREP_RFC4505_TRACE */
67 "rfc4518", /* USPREP_RFC4518_LDAP */
68 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
69 };
70
71 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)72 isSPrepAcceptable(void * /* context */,
73 const char * /* type */,
74 const char * /* name */,
75 const UDataInfo *pInfo) {
76 if(
77 pInfo->size>=20 &&
78 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
79 pInfo->charsetFamily==U_CHARSET_FAMILY &&
80 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
81 pInfo->dataFormat[1]==0x50 &&
82 pInfo->dataFormat[2]==0x52 &&
83 pInfo->dataFormat[3]==0x50 &&
84 pInfo->formatVersion[0]==3 &&
85 pInfo->formatVersion[2]==UTRIE_SHIFT &&
86 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
87 ) {
88 uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
89 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
90 return TRUE;
91 } else {
92 return FALSE;
93 }
94 }
95
96 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)97 getSPrepFoldingOffset(uint32_t data) {
98
99 return (int32_t)data;
100
101 }
102
103 /* hashes an entry */
104 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)105 hashEntry(const UHashTok parm) {
106 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
107 UHashTok namekey, pathkey;
108 namekey.pointer = b->name;
109 pathkey.pointer = b->path;
110 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
111 }
112
113 /* compares two entries */
114 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)115 compareEntries(const UHashTok p1, const UHashTok p2) {
116 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
117 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
118 UHashTok name1, name2, path1, path2;
119 name1.pointer = b1->name;
120 name2.pointer = b2->name;
121 path1.pointer = b1->path;
122 path2.pointer = b2->path;
123 return ((UBool)(uhash_compareChars(name1, name2) &
124 uhash_compareChars(path1, path2)));
125 }
126
127 static void
usprep_unload(UStringPrepProfile * data)128 usprep_unload(UStringPrepProfile* data){
129 udata_close(data->sprepData);
130 }
131
132 static int32_t
usprep_internal_flushCache(UBool noRefCount)133 usprep_internal_flushCache(UBool noRefCount){
134 UStringPrepProfile *profile = NULL;
135 UStringPrepKey *key = NULL;
136 int32_t pos = -1;
137 int32_t deletedNum = 0;
138 const UHashElement *e;
139
140 /*
141 * if shared data hasn't even been lazy evaluated yet
142 * return 0
143 */
144 umtx_lock(&usprepMutex);
145 if (SHARED_DATA_HASHTABLE == NULL) {
146 umtx_unlock(&usprepMutex);
147 return 0;
148 }
149
150 /*creates an enumeration to iterate through every element in the table */
151 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
152 {
153 profile = (UStringPrepProfile *) e->value.pointer;
154 key = (UStringPrepKey *) e->key.pointer;
155
156 if ((noRefCount== FALSE && profile->refCount == 0) ||
157 noRefCount== TRUE) {
158 deletedNum++;
159 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
160
161 /* unload the data */
162 usprep_unload(profile);
163
164 if(key->name != NULL) {
165 uprv_free(key->name);
166 key->name=NULL;
167 }
168 if(key->path != NULL) {
169 uprv_free(key->path);
170 key->path=NULL;
171 }
172 uprv_free(profile);
173 uprv_free(key);
174 }
175
176 }
177 umtx_unlock(&usprepMutex);
178
179 return deletedNum;
180 }
181
182 /* Works just like ucnv_flushCache()
183 static int32_t
184 usprep_flushCache(){
185 return usprep_internal_flushCache(FALSE);
186 }
187 */
188
usprep_cleanup(void)189 static UBool U_CALLCONV usprep_cleanup(void){
190 if (SHARED_DATA_HASHTABLE != NULL) {
191 usprep_internal_flushCache(TRUE);
192 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
193 uhash_close(SHARED_DATA_HASHTABLE);
194 SHARED_DATA_HASHTABLE = NULL;
195 }
196 }
197
198 umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
199 /* if the hash table still exists. The mutex */
200 /* will lazily re-init itself if needed. */
201 return (SHARED_DATA_HASHTABLE == NULL);
202 }
203 U_CDECL_END
204
205 static void
usprep_init()206 usprep_init() {
207 umtx_init(&usprepMutex);
208 }
209
210 /** Initializes the cache for resources */
211 static void
initCache(UErrorCode * status)212 initCache(UErrorCode *status) {
213 UBool makeCache;
214 UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE == NULL), makeCache);
215 if(makeCache) {
216 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
217 if (U_SUCCESS(*status)) {
218 umtx_lock(&usprepMutex);
219 if(SHARED_DATA_HASHTABLE == NULL) {
220 SHARED_DATA_HASHTABLE = newCache;
221 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
222 newCache = NULL;
223 }
224 umtx_unlock(&usprepMutex);
225 }
226 if(newCache != NULL) {
227 uhash_close(newCache);
228 }
229 }
230 }
231
232 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)233 loadData(UStringPrepProfile* profile,
234 const char* path,
235 const char* name,
236 const char* type,
237 UErrorCode* errorCode) {
238 /* load Unicode SPREP data from file */
239 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
240 UDataMemory *dataMemory;
241 const int32_t *p=NULL;
242 const uint8_t *pb;
243 UVersionInfo normUnicodeVersion;
244 int32_t normUniVer, sprepUniVer, normCorrVer;
245
246 if(errorCode==NULL || U_FAILURE(*errorCode)) {
247 return 0;
248 }
249
250 /* open the data outside the mutex block */
251 //TODO: change the path
252 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
253 if(U_FAILURE(*errorCode)) {
254 return FALSE;
255 }
256
257 p=(const int32_t *)udata_getMemory(dataMemory);
258 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
259 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
260 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
261
262
263 if(U_FAILURE(*errorCode)) {
264 udata_close(dataMemory);
265 return FALSE;
266 }
267
268 /* in the mutex block, set the data for this process */
269 umtx_lock(&usprepMutex);
270 if(profile->sprepData==NULL) {
271 profile->sprepData=dataMemory;
272 dataMemory=NULL;
273 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
274 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
275 } else {
276 p=(const int32_t *)udata_getMemory(profile->sprepData);
277 }
278 umtx_unlock(&usprepMutex);
279 /* initialize some variables */
280 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
281
282 unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
283 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
284 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
285 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
286 (dataVersion[2] << 8 ) + (dataVersion[3]);
287 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
288
289 if(U_FAILURE(*errorCode)){
290 udata_close(dataMemory);
291 return FALSE;
292 }
293 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
294 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
295 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
296 ){
297 *errorCode = U_INVALID_FORMAT_ERROR;
298 udata_close(dataMemory);
299 return FALSE;
300 }
301 profile->isDataLoaded = TRUE;
302
303 /* if a different thread set it first, then close the extra data */
304 if(dataMemory!=NULL) {
305 udata_close(dataMemory); /* NULL if it was set correctly */
306 }
307
308
309 return profile->isDataLoaded;
310 }
311
312 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)313 usprep_getProfile(const char* path,
314 const char* name,
315 UErrorCode *status){
316
317 UStringPrepProfile* profile = NULL;
318
319 initCache(status);
320
321 if(U_FAILURE(*status)){
322 return NULL;
323 }
324
325 UStringPrepKey stackKey;
326 /*
327 * const is cast way to save malloc, strcpy and free calls
328 * we use the passed in pointers for fetching the data from the
329 * hash table which is safe
330 */
331 stackKey.name = (char*) name;
332 stackKey.path = (char*) path;
333
334 /* fetch the data from the cache */
335 umtx_lock(&usprepMutex);
336 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
337 umtx_unlock(&usprepMutex);
338
339 if(profile == NULL){
340 UStringPrepKey* key = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
341 if(key == NULL){
342 *status = U_MEMORY_ALLOCATION_ERROR;
343 return NULL;
344 }
345 /* else load the data and put the data in the cache */
346 profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
347 if(profile == NULL){
348 *status = U_MEMORY_ALLOCATION_ERROR;
349 uprv_free(key);
350 return NULL;
351 }
352
353 /* initialize the data struct members */
354 uprv_memset(profile->indexes,0,sizeof(profile->indexes));
355 profile->mappingData = NULL;
356 profile->sprepData = NULL;
357 profile->refCount = 0;
358
359 /* initialize the key memebers */
360 key->name = (char*) uprv_malloc(uprv_strlen(name)+1);
361 if(key->name == NULL){
362 *status = U_MEMORY_ALLOCATION_ERROR;
363 uprv_free(key);
364 uprv_free(profile);
365 return NULL;
366 }
367
368 uprv_strcpy(key->name, name);
369
370 key->path=NULL;
371
372 if(path != NULL){
373 key->path = (char*) uprv_malloc(uprv_strlen(path)+1);
374 if(key->path == NULL){
375 *status = U_MEMORY_ALLOCATION_ERROR;
376 uprv_free(key->name);
377 uprv_free(key);
378 uprv_free(profile);
379 return NULL;
380 }
381 uprv_strcpy(key->path, path);
382 }
383
384 /* load the data */
385 if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
386 uprv_free(key->path);
387 uprv_free(key->name);
388 uprv_free(key);
389 uprv_free(profile);
390 return NULL;
391 }
392
393 /* get the options */
394 profile->doNFKC = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
395 profile->checkBiDi = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
396
397 if(profile->checkBiDi) {
398 profile->bdp = ubidi_getSingleton(status);
399 if(U_FAILURE(*status)) {
400 usprep_unload(profile);
401 uprv_free(key->path);
402 uprv_free(key->name);
403 uprv_free(key);
404 uprv_free(profile);
405 return NULL;
406 }
407 } else {
408 profile->bdp = NULL;
409 }
410
411 umtx_lock(&usprepMutex);
412 /* add the data object to the cache */
413 uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
414 umtx_unlock(&usprepMutex);
415 }
416 umtx_lock(&usprepMutex);
417 /* increment the refcount */
418 profile->refCount++;
419 umtx_unlock(&usprepMutex);
420
421 return profile;
422 }
423
424 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)425 usprep_open(const char* path,
426 const char* name,
427 UErrorCode* status){
428
429 if(status == NULL || U_FAILURE(*status)){
430 return NULL;
431 }
432 /* initialize the mutex */
433 usprep_init();
434
435 /* initialize the profile struct members */
436 return usprep_getProfile(path,name,status);
437 }
438
439 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)440 usprep_openByType(UStringPrepProfileType type,
441 UErrorCode* status) {
442 if(status == NULL || U_FAILURE(*status)){
443 return NULL;
444 }
445 int32_t index = (int32_t)type;
446 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
447 *status = U_ILLEGAL_ARGUMENT_ERROR;
448 return NULL;
449 }
450 return usprep_open(NULL, PROFILE_NAMES[index], status);
451 }
452
453 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)454 usprep_close(UStringPrepProfile* profile){
455 if(profile==NULL){
456 return;
457 }
458
459 umtx_lock(&usprepMutex);
460 /* decrement the ref count*/
461 if(profile->refCount > 0){
462 profile->refCount--;
463 }
464 umtx_unlock(&usprepMutex);
465
466 }
467
468 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)469 uprv_syntaxError(const UChar* rules,
470 int32_t pos,
471 int32_t rulesLen,
472 UParseError* parseError){
473 if(parseError == NULL){
474 return;
475 }
476 parseError->offset = pos;
477 parseError->line = 0 ; // we are not using line numbers
478
479 // for pre-context
480 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
481 int32_t limit = pos;
482
483 u_memcpy(parseError->preContext,rules+start,limit-start);
484 //null terminate the buffer
485 parseError->preContext[limit-start] = 0;
486
487 // for post-context; include error rules[pos]
488 start = pos;
489 limit = start + (U_PARSE_CONTEXT_LEN-1);
490 if (limit > rulesLen) {
491 limit = rulesLen;
492 }
493 if (start < rulesLen) {
494 u_memcpy(parseError->postContext,rules+start,limit-start);
495 }
496 //null terminate the buffer
497 parseError->postContext[limit-start]= 0;
498 }
499
500
501 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)502 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
503
504 UStringPrepType type;
505 if(trieWord == 0){
506 /*
507 * Initial value stored in the mapping table
508 * just return USPREP_TYPE_LIMIT .. so that
509 * the source codepoint is copied to the destination
510 */
511 type = USPREP_TYPE_LIMIT;
512 isIndex =FALSE;
513 value = 0;
514 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
515 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
516 isIndex =FALSE;
517 value = 0;
518 }else{
519 /* get the type */
520 type = USPREP_MAP;
521 /* ascertain if the value is index or delta */
522 if(trieWord & 0x02){
523 isIndex = TRUE;
524 value = trieWord >> 2; //mask off the lower 2 bits and shift
525 }else{
526 isIndex = FALSE;
527 value = (int16_t)trieWord;
528 value = (value >> 2);
529 }
530
531 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
532 type = USPREP_DELETE;
533 isIndex =FALSE;
534 value = 0;
535 }
536 }
537 return type;
538 }
539
540
541
542 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)543 usprep_map( const UStringPrepProfile* profile,
544 const UChar* src, int32_t srcLength,
545 UChar* dest, int32_t destCapacity,
546 int32_t options,
547 UParseError* parseError,
548 UErrorCode* status ){
549
550 uint16_t result;
551 int32_t destIndex=0;
552 int32_t srcIndex;
553 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
554 UStringPrepType type;
555 int16_t value;
556 UBool isIndex;
557 const int32_t* indexes = profile->indexes;
558
559 // no error checking the caller check for error and arguments
560 // no string length check the caller finds out the string length
561
562 for(srcIndex=0;srcIndex<srcLength;){
563 UChar32 ch;
564
565 U16_NEXT(src,srcIndex,srcLength,ch);
566
567 result=0;
568
569 UTRIE_GET16(&profile->sprepTrie,ch,result);
570
571 type = getValues(result, value, isIndex);
572
573 // check if the source codepoint is unassigned
574 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
575
576 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
577 *status = U_STRINGPREP_UNASSIGNED_ERROR;
578 return 0;
579
580 }else if(type == USPREP_MAP){
581
582 int32_t index, length;
583
584 if(isIndex){
585 index = value;
586 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
587 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
588 length = 1;
589 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
590 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
591 length = 2;
592 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
593 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
594 length = 3;
595 }else{
596 length = profile->mappingData[index++];
597
598 }
599
600 /* copy mapping to destination */
601 for(int32_t i=0; i< length; i++){
602 if(destIndex < destCapacity ){
603 dest[destIndex] = profile->mappingData[index+i];
604 }
605 destIndex++; /* for pre-flighting */
606 }
607 continue;
608 }else{
609 // subtract the delta to arrive at the code point
610 ch -= value;
611 }
612
613 }else if(type==USPREP_DELETE){
614 // just consume the codepoint and contine
615 continue;
616 }
617 //copy the code point into destination
618 if(ch <= 0xFFFF){
619 if(destIndex < destCapacity ){
620 dest[destIndex] = (UChar)ch;
621 }
622 destIndex++;
623 }else{
624 if(destIndex+1 < destCapacity ){
625 dest[destIndex] = U16_LEAD(ch);
626 dest[destIndex+1] = U16_TRAIL(ch);
627 }
628 destIndex +=2;
629 }
630
631 }
632
633 return u_terminateUChars(dest, destCapacity, destIndex, status);
634 }
635
636
637 static int32_t
usprep_normalize(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode * status)638 usprep_normalize( const UChar* src, int32_t srcLength,
639 UChar* dest, int32_t destCapacity,
640 UErrorCode* status ){
641 /*
642 * Option UNORM_BEFORE_PRI_29:
643 *
644 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
645 * requires strict adherence to Unicode 3.2 normalization,
646 * including buggy composition from before fixing Public Review Issue #29.
647 * Note that this results in some valid but nonsensical text to be
648 * either corrupted or rejected, depending on the text.
649 * See http://www.unicode.org/review/resolved-pri.html#pri29
650 * See unorm.cpp and cnormtst.c
651 */
652 return unorm_normalize(
653 src, srcLength,
654 UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
655 dest, destCapacity,
656 status);
657 }
658
659
660 /*
661 1) Map -- For each character in the input, check if it has a mapping
662 and, if so, replace it with its mapping.
663
664 2) Normalize -- Possibly normalize the result of step 1 using Unicode
665 normalization.
666
667 3) Prohibit -- Check for any characters that are not allowed in the
668 output. If any are found, return an error.
669
670 4) Check bidi -- Possibly check for right-to-left characters, and if
671 any are found, make sure that the whole string satisfies the
672 requirements for bidirectional strings. If the string does not
673 satisfy the requirements for bidirectional strings, return an
674 error.
675 [Unicode3.2] defines several bidirectional categories; each character
676 has one bidirectional category assigned to it. For the purposes of
677 the requirements below, an "RandALCat character" is a character that
678 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
679 is a character that has Unicode bidirectional category "L". Note
680
681
682 that there are many characters which fall in neither of the above
683 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
684 this because they have bidirectional category "EN".
685
686 In any profile that specifies bidirectional character handling, all
687 three of the following requirements MUST be met:
688
689 1) The characters in section 5.8 MUST be prohibited.
690
691 2) If a string contains any RandALCat character, the string MUST NOT
692 contain any LCat character.
693
694 3) If a string contains any RandALCat character, a RandALCat
695 character MUST be the first character of the string, and a
696 RandALCat character MUST be the last character of the string.
697 */
698
699 #define MAX_STACK_BUFFER_SIZE 300
700
701
702 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)703 usprep_prepare( const UStringPrepProfile* profile,
704 const UChar* src, int32_t srcLength,
705 UChar* dest, int32_t destCapacity,
706 int32_t options,
707 UParseError* parseError,
708 UErrorCode* status ){
709
710 // check error status
711 if(status == NULL || U_FAILURE(*status)){
712 return 0;
713 }
714
715 //check arguments
716 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
717 *status=U_ILLEGAL_ARGUMENT_ERROR;
718 return 0;
719 }
720
721 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
722 UChar *b1 = b1Stack, *b2 = b2Stack;
723 int32_t b1Len, b2Len=0,
724 b1Capacity = MAX_STACK_BUFFER_SIZE ,
725 b2Capacity = MAX_STACK_BUFFER_SIZE;
726 uint16_t result;
727 int32_t b2Index = 0;
728 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
729 UBool leftToRight=FALSE, rightToLeft=FALSE;
730 int32_t rtlPos =-1, ltrPos =-1;
731
732 //get the string length
733 if(srcLength == -1){
734 srcLength = u_strlen(src);
735 }
736 // map
737 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
738
739 if(*status == U_BUFFER_OVERFLOW_ERROR){
740 // redo processing of string
741 /* we do not have enough room so grow the buffer*/
742 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
743 if(b1==NULL){
744 *status = U_MEMORY_ALLOCATION_ERROR;
745 goto CLEANUP;
746 }
747
748 *status = U_ZERO_ERROR; // reset error
749
750 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
751
752 }
753
754 // normalize
755 if(profile->doNFKC == TRUE){
756 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
757
758 if(*status == U_BUFFER_OVERFLOW_ERROR){
759 // redo processing of string
760 /* we do not have enough room so grow the buffer*/
761 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
762 if(b2==NULL){
763 *status = U_MEMORY_ALLOCATION_ERROR;
764 goto CLEANUP;
765 }
766
767 *status = U_ZERO_ERROR; // reset error
768
769 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
770
771 }
772
773 }else{
774 b2 = b1;
775 b2Len = b1Len;
776 }
777
778
779 if(U_FAILURE(*status)){
780 goto CLEANUP;
781 }
782
783 UChar32 ch;
784 UStringPrepType type;
785 int16_t value;
786 UBool isIndex;
787
788 // Prohibit and checkBiDi in one pass
789 for(b2Index=0; b2Index<b2Len;){
790
791 ch = 0;
792
793 U16_NEXT(b2, b2Index, b2Len, ch);
794
795 UTRIE_GET16(&profile->sprepTrie,ch,result);
796
797 type = getValues(result, value, isIndex);
798
799 if( type == USPREP_PROHIBITED ||
800 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
801 ){
802 *status = U_STRINGPREP_PROHIBITED_ERROR;
803 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
804 goto CLEANUP;
805 }
806
807 if(profile->checkBiDi) {
808 direction = ubidi_getClass(profile->bdp, ch);
809 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
810 firstCharDir = direction;
811 }
812 if(direction == U_LEFT_TO_RIGHT){
813 leftToRight = TRUE;
814 ltrPos = b2Index-1;
815 }
816 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
817 rightToLeft = TRUE;
818 rtlPos = b2Index-1;
819 }
820 }
821 }
822 if(profile->checkBiDi == TRUE){
823 // satisfy 2
824 if( leftToRight == TRUE && rightToLeft == TRUE){
825 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
826 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
827 goto CLEANUP;
828 }
829
830 //satisfy 3
831 if( rightToLeft == TRUE &&
832 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
833 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
834 ){
835 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
836 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
837 return FALSE;
838 }
839 }
840 if(b2Len>0 && b2Len <= destCapacity){
841 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
842 }
843
844 CLEANUP:
845 if(b1!=b1Stack){
846 uprv_free(b1);
847 b1=NULL;
848 }
849
850 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
851 uprv_free(b2);
852 b2=NULL;
853 }
854 return u_terminateUChars(dest, destCapacity, b2Len, status);
855 }
856
857
858 /* data swapping ------------------------------------------------------------ */
859
860 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)861 usprep_swap(const UDataSwapper *ds,
862 const void *inData, int32_t length, void *outData,
863 UErrorCode *pErrorCode) {
864 const UDataInfo *pInfo;
865 int32_t headerSize;
866
867 const uint8_t *inBytes;
868 uint8_t *outBytes;
869
870 const int32_t *inIndexes;
871 int32_t indexes[16];
872
873 int32_t i, offset, count, size;
874
875 /* udata_swapDataHeader checks the arguments */
876 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
877 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
878 return 0;
879 }
880
881 /* check data format and format version */
882 pInfo=(const UDataInfo *)((const char *)inData+4);
883 if(!(
884 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
885 pInfo->dataFormat[1]==0x50 &&
886 pInfo->dataFormat[2]==0x52 &&
887 pInfo->dataFormat[3]==0x50 &&
888 pInfo->formatVersion[0]==3
889 )) {
890 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
891 pInfo->dataFormat[0], pInfo->dataFormat[1],
892 pInfo->dataFormat[2], pInfo->dataFormat[3],
893 pInfo->formatVersion[0]);
894 *pErrorCode=U_UNSUPPORTED_ERROR;
895 return 0;
896 }
897
898 inBytes=(const uint8_t *)inData+headerSize;
899 outBytes=(uint8_t *)outData+headerSize;
900
901 inIndexes=(const int32_t *)inBytes;
902
903 if(length>=0) {
904 length-=headerSize;
905 if(length<16*4) {
906 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
907 length);
908 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
909 return 0;
910 }
911 }
912
913 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
914 for(i=0; i<16; ++i) {
915 indexes[i]=udata_readInt32(ds, inIndexes[i]);
916 }
917
918 /* calculate the total length of the data */
919 size=
920 16*4+ /* size of indexes[] */
921 indexes[_SPREP_INDEX_TRIE_SIZE]+
922 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
923
924 if(length>=0) {
925 if(length<size) {
926 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
927 length);
928 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
929 return 0;
930 }
931
932 /* copy the data for inaccessible bytes */
933 if(inBytes!=outBytes) {
934 uprv_memcpy(outBytes, inBytes, size);
935 }
936
937 offset=0;
938
939 /* swap the int32_t indexes[] */
940 count=16*4;
941 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
942 offset+=count;
943
944 /* swap the UTrie */
945 count=indexes[_SPREP_INDEX_TRIE_SIZE];
946 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
947 offset+=count;
948
949 /* swap the uint16_t mappingTable[] */
950 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
951 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
952 offset+=count;
953 }
954
955 return headerSize+size;
956 }
957
958 #endif /* #if !UCONFIG_NO_IDNA */
959