1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA
20
21 #include "unicode/usprep.h"
22
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "ubidi_props.h"
36
37 U_NAMESPACE_USE
38
39 U_CDECL_BEGIN
40
41 /*
42 Static cache for already opened StringPrep profiles
43 */
44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
45
46 static UMTX usprepMutex = NULL;
47
48 /* format version of spp file */
49 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
50
51 /* the Unicode version of the sprep data */
52 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
53
54 /* Profile names must be aligned to UStringPrepProfileType */
55 static const char *PROFILE_NAMES[] = {
56 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
57 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
58 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
59 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
60 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
61 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
62 "rfc3722", /* USPREP_RFC3722_ISCSI */
63 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
64 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
65 "rfc4011", /* USPREP_RFC4011_MIB */
66 "rfc4013", /* USPREP_RFC4013_SASLPREP */
67 "rfc4505", /* USPREP_RFC4505_TRACE */
68 "rfc4518", /* USPREP_RFC4518_LDAP */
69 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
70 };
71
72 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)73 isSPrepAcceptable(void * /* context */,
74 const char * /* type */,
75 const char * /* name */,
76 const UDataInfo *pInfo) {
77 if(
78 pInfo->size>=20 &&
79 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
80 pInfo->charsetFamily==U_CHARSET_FAMILY &&
81 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
82 pInfo->dataFormat[1]==0x50 &&
83 pInfo->dataFormat[2]==0x52 &&
84 pInfo->dataFormat[3]==0x50 &&
85 pInfo->formatVersion[0]==3 &&
86 pInfo->formatVersion[2]==UTRIE_SHIFT &&
87 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
88 ) {
89 uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
90 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
91 return TRUE;
92 } else {
93 return FALSE;
94 }
95 }
96
97 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)98 getSPrepFoldingOffset(uint32_t data) {
99
100 return (int32_t)data;
101
102 }
103
104 /* hashes an entry */
105 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)106 hashEntry(const UHashTok parm) {
107 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
108 UHashTok namekey, pathkey;
109 namekey.pointer = b->name;
110 pathkey.pointer = b->path;
111 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
112 }
113
114 /* compares two entries */
115 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)116 compareEntries(const UHashTok p1, const UHashTok p2) {
117 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
118 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
119 UHashTok name1, name2, path1, path2;
120 name1.pointer = b1->name;
121 name2.pointer = b2->name;
122 path1.pointer = b1->path;
123 path2.pointer = b2->path;
124 return ((UBool)(uhash_compareChars(name1, name2) &
125 uhash_compareChars(path1, path2)));
126 }
127
128 static void
usprep_unload(UStringPrepProfile * data)129 usprep_unload(UStringPrepProfile* data){
130 udata_close(data->sprepData);
131 }
132
133 static int32_t
usprep_internal_flushCache(UBool noRefCount)134 usprep_internal_flushCache(UBool noRefCount){
135 UStringPrepProfile *profile = NULL;
136 UStringPrepKey *key = NULL;
137 int32_t pos = -1;
138 int32_t deletedNum = 0;
139 const UHashElement *e;
140
141 /*
142 * if shared data hasn't even been lazy evaluated yet
143 * return 0
144 */
145 umtx_lock(&usprepMutex);
146 if (SHARED_DATA_HASHTABLE == NULL) {
147 umtx_unlock(&usprepMutex);
148 return 0;
149 }
150
151 /*creates an enumeration to iterate through every element in the table */
152 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
153 {
154 profile = (UStringPrepProfile *) e->value.pointer;
155 key = (UStringPrepKey *) e->key.pointer;
156
157 if ((noRefCount== FALSE && profile->refCount == 0) ||
158 noRefCount== TRUE) {
159 deletedNum++;
160 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
161
162 /* unload the data */
163 usprep_unload(profile);
164
165 if(key->name != NULL) {
166 uprv_free(key->name);
167 key->name=NULL;
168 }
169 if(key->path != NULL) {
170 uprv_free(key->path);
171 key->path=NULL;
172 }
173 uprv_free(profile);
174 uprv_free(key);
175 }
176
177 }
178 umtx_unlock(&usprepMutex);
179
180 return deletedNum;
181 }
182
183 /* Works just like ucnv_flushCache()
184 static int32_t
185 usprep_flushCache(){
186 return usprep_internal_flushCache(FALSE);
187 }
188 */
189
usprep_cleanup(void)190 static UBool U_CALLCONV usprep_cleanup(void){
191 if (SHARED_DATA_HASHTABLE != NULL) {
192 usprep_internal_flushCache(TRUE);
193 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
194 uhash_close(SHARED_DATA_HASHTABLE);
195 SHARED_DATA_HASHTABLE = NULL;
196 }
197 }
198
199 umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
200 /* if the hash table still exists. The mutex */
201 /* will lazily re-init itself if needed. */
202 return (SHARED_DATA_HASHTABLE == NULL);
203 }
204 U_CDECL_END
205
206
207 /** Initializes the cache for resources */
208 static void
initCache(UErrorCode * status)209 initCache(UErrorCode *status) {
210 UBool makeCache;
211 UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE == NULL), makeCache);
212 if(makeCache) {
213 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
214 if (U_SUCCESS(*status)) {
215 umtx_lock(&usprepMutex);
216 if(SHARED_DATA_HASHTABLE == NULL) {
217 SHARED_DATA_HASHTABLE = newCache;
218 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
219 newCache = NULL;
220 }
221 umtx_unlock(&usprepMutex);
222 }
223 if(newCache != NULL) {
224 uhash_close(newCache);
225 }
226 }
227 }
228
229 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)230 loadData(UStringPrepProfile* profile,
231 const char* path,
232 const char* name,
233 const char* type,
234 UErrorCode* errorCode) {
235 /* load Unicode SPREP data from file */
236 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
237 UDataMemory *dataMemory;
238 const int32_t *p=NULL;
239 const uint8_t *pb;
240 UVersionInfo normUnicodeVersion;
241 int32_t normUniVer, sprepUniVer, normCorrVer;
242
243 if(errorCode==NULL || U_FAILURE(*errorCode)) {
244 return 0;
245 }
246
247 /* open the data outside the mutex block */
248 //TODO: change the path
249 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
250 if(U_FAILURE(*errorCode)) {
251 return FALSE;
252 }
253
254 p=(const int32_t *)udata_getMemory(dataMemory);
255 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
256 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
257 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
258
259
260 if(U_FAILURE(*errorCode)) {
261 udata_close(dataMemory);
262 return FALSE;
263 }
264
265 /* in the mutex block, set the data for this process */
266 umtx_lock(&usprepMutex);
267 if(profile->sprepData==NULL) {
268 profile->sprepData=dataMemory;
269 dataMemory=NULL;
270 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
271 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
272 } else {
273 p=(const int32_t *)udata_getMemory(profile->sprepData);
274 }
275 umtx_unlock(&usprepMutex);
276 /* initialize some variables */
277 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
278
279 u_getUnicodeVersion(normUnicodeVersion);
280 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
281 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
282 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
283 (dataVersion[2] << 8 ) + (dataVersion[3]);
284 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
285
286 if(U_FAILURE(*errorCode)){
287 udata_close(dataMemory);
288 return FALSE;
289 }
290 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
291 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
292 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
293 ){
294 *errorCode = U_INVALID_FORMAT_ERROR;
295 udata_close(dataMemory);
296 return FALSE;
297 }
298 profile->isDataLoaded = TRUE;
299
300 /* if a different thread set it first, then close the extra data */
301 if(dataMemory!=NULL) {
302 udata_close(dataMemory); /* NULL if it was set correctly */
303 }
304
305
306 return profile->isDataLoaded;
307 }
308
309 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)310 usprep_getProfile(const char* path,
311 const char* name,
312 UErrorCode *status){
313
314 UStringPrepProfile* profile = NULL;
315
316 initCache(status);
317
318 if(U_FAILURE(*status)){
319 return NULL;
320 }
321
322 UStringPrepKey stackKey;
323 /*
324 * const is cast way to save malloc, strcpy and free calls
325 * we use the passed in pointers for fetching the data from the
326 * hash table which is safe
327 */
328 stackKey.name = (char*) name;
329 stackKey.path = (char*) path;
330
331 /* fetch the data from the cache */
332 umtx_lock(&usprepMutex);
333 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
334 if(profile != NULL) {
335 profile->refCount++;
336 }
337 umtx_unlock(&usprepMutex);
338
339 if(profile == NULL) {
340 /* else load the data and put the data in the cache */
341 LocalMemory<UStringPrepProfile> newProfile;
342 if(newProfile.allocateInsteadAndReset() == NULL) {
343 *status = U_MEMORY_ALLOCATION_ERROR;
344 return NULL;
345 }
346
347 /* load the data */
348 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
349 return NULL;
350 }
351
352 /* get the options */
353 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
354 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
355
356 if(newProfile->checkBiDi) {
357 newProfile->bdp = ubidi_getSingleton();
358 }
359
360 LocalMemory<UStringPrepKey> key;
361 LocalMemory<char> keyName;
362 LocalMemory<char> keyPath;
363 if( key.allocateInsteadAndReset() == NULL ||
364 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
365 (path != NULL &&
366 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
367 ) {
368 *status = U_MEMORY_ALLOCATION_ERROR;
369 usprep_unload(newProfile.getAlias());
370 return NULL;
371 }
372
373 umtx_lock(&usprepMutex);
374 // If another thread already inserted the same key/value, refcount and cleanup our thread data
375 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
376 if(profile != NULL) {
377 profile->refCount++;
378 usprep_unload(newProfile.getAlias());
379 }
380 else {
381 /* initialize the key members */
382 key->name = keyName.orphan();
383 uprv_strcpy(key->name, name);
384 if(path != NULL){
385 key->path = keyPath.orphan();
386 uprv_strcpy(key->path, path);
387 }
388 profile = newProfile.orphan();
389
390 /* add the data object to the cache */
391 profile->refCount = 1;
392 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
393 }
394 umtx_unlock(&usprepMutex);
395 }
396
397 return profile;
398 }
399
400 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)401 usprep_open(const char* path,
402 const char* name,
403 UErrorCode* status){
404
405 if(status == NULL || U_FAILURE(*status)){
406 return NULL;
407 }
408
409 /* initialize the profile struct members */
410 return usprep_getProfile(path,name,status);
411 }
412
413 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)414 usprep_openByType(UStringPrepProfileType type,
415 UErrorCode* status) {
416 if(status == NULL || U_FAILURE(*status)){
417 return NULL;
418 }
419 int32_t index = (int32_t)type;
420 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
421 *status = U_ILLEGAL_ARGUMENT_ERROR;
422 return NULL;
423 }
424 return usprep_open(NULL, PROFILE_NAMES[index], status);
425 }
426
427 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)428 usprep_close(UStringPrepProfile* profile){
429 if(profile==NULL){
430 return;
431 }
432
433 umtx_lock(&usprepMutex);
434 /* decrement the ref count*/
435 if(profile->refCount > 0){
436 profile->refCount--;
437 }
438 umtx_unlock(&usprepMutex);
439
440 }
441
442 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)443 uprv_syntaxError(const UChar* rules,
444 int32_t pos,
445 int32_t rulesLen,
446 UParseError* parseError){
447 if(parseError == NULL){
448 return;
449 }
450 parseError->offset = pos;
451 parseError->line = 0 ; // we are not using line numbers
452
453 // for pre-context
454 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
455 int32_t limit = pos;
456
457 u_memcpy(parseError->preContext,rules+start,limit-start);
458 //null terminate the buffer
459 parseError->preContext[limit-start] = 0;
460
461 // for post-context; include error rules[pos]
462 start = pos;
463 limit = start + (U_PARSE_CONTEXT_LEN-1);
464 if (limit > rulesLen) {
465 limit = rulesLen;
466 }
467 if (start < rulesLen) {
468 u_memcpy(parseError->postContext,rules+start,limit-start);
469 }
470 //null terminate the buffer
471 parseError->postContext[limit-start]= 0;
472 }
473
474
475 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)476 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
477
478 UStringPrepType type;
479 if(trieWord == 0){
480 /*
481 * Initial value stored in the mapping table
482 * just return USPREP_TYPE_LIMIT .. so that
483 * the source codepoint is copied to the destination
484 */
485 type = USPREP_TYPE_LIMIT;
486 isIndex =FALSE;
487 value = 0;
488 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
489 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
490 isIndex =FALSE;
491 value = 0;
492 }else{
493 /* get the type */
494 type = USPREP_MAP;
495 /* ascertain if the value is index or delta */
496 if(trieWord & 0x02){
497 isIndex = TRUE;
498 value = trieWord >> 2; //mask off the lower 2 bits and shift
499 }else{
500 isIndex = FALSE;
501 value = (int16_t)trieWord;
502 value = (value >> 2);
503 }
504
505 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
506 type = USPREP_DELETE;
507 isIndex =FALSE;
508 value = 0;
509 }
510 }
511 return type;
512 }
513
514
515
516 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)517 usprep_map( const UStringPrepProfile* profile,
518 const UChar* src, int32_t srcLength,
519 UChar* dest, int32_t destCapacity,
520 int32_t options,
521 UParseError* parseError,
522 UErrorCode* status ){
523
524 uint16_t result;
525 int32_t destIndex=0;
526 int32_t srcIndex;
527 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
528 UStringPrepType type;
529 int16_t value;
530 UBool isIndex;
531 const int32_t* indexes = profile->indexes;
532
533 // no error checking the caller check for error and arguments
534 // no string length check the caller finds out the string length
535
536 for(srcIndex=0;srcIndex<srcLength;){
537 UChar32 ch;
538
539 U16_NEXT(src,srcIndex,srcLength,ch);
540
541 result=0;
542
543 UTRIE_GET16(&profile->sprepTrie,ch,result);
544
545 type = getValues(result, value, isIndex);
546
547 // check if the source codepoint is unassigned
548 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
549
550 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
551 *status = U_STRINGPREP_UNASSIGNED_ERROR;
552 return 0;
553
554 }else if(type == USPREP_MAP){
555
556 int32_t index, length;
557
558 if(isIndex){
559 index = value;
560 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
561 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
562 length = 1;
563 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
564 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
565 length = 2;
566 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
567 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
568 length = 3;
569 }else{
570 length = profile->mappingData[index++];
571
572 }
573
574 /* copy mapping to destination */
575 for(int32_t i=0; i< length; i++){
576 if(destIndex < destCapacity ){
577 dest[destIndex] = profile->mappingData[index+i];
578 }
579 destIndex++; /* for pre-flighting */
580 }
581 continue;
582 }else{
583 // subtract the delta to arrive at the code point
584 ch -= value;
585 }
586
587 }else if(type==USPREP_DELETE){
588 // just consume the codepoint and contine
589 continue;
590 }
591 //copy the code point into destination
592 if(ch <= 0xFFFF){
593 if(destIndex < destCapacity ){
594 dest[destIndex] = (UChar)ch;
595 }
596 destIndex++;
597 }else{
598 if(destIndex+1 < destCapacity ){
599 dest[destIndex] = U16_LEAD(ch);
600 dest[destIndex+1] = U16_TRAIL(ch);
601 }
602 destIndex +=2;
603 }
604
605 }
606
607 return u_terminateUChars(dest, destCapacity, destIndex, status);
608 }
609
610
611 static int32_t
usprep_normalize(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode * status)612 usprep_normalize( const UChar* src, int32_t srcLength,
613 UChar* dest, int32_t destCapacity,
614 UErrorCode* status ){
615 return unorm_normalize(
616 src, srcLength,
617 UNORM_NFKC, UNORM_UNICODE_3_2,
618 dest, destCapacity,
619 status);
620 }
621
622
623 /*
624 1) Map -- For each character in the input, check if it has a mapping
625 and, if so, replace it with its mapping.
626
627 2) Normalize -- Possibly normalize the result of step 1 using Unicode
628 normalization.
629
630 3) Prohibit -- Check for any characters that are not allowed in the
631 output. If any are found, return an error.
632
633 4) Check bidi -- Possibly check for right-to-left characters, and if
634 any are found, make sure that the whole string satisfies the
635 requirements for bidirectional strings. If the string does not
636 satisfy the requirements for bidirectional strings, return an
637 error.
638 [Unicode3.2] defines several bidirectional categories; each character
639 has one bidirectional category assigned to it. For the purposes of
640 the requirements below, an "RandALCat character" is a character that
641 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
642 is a character that has Unicode bidirectional category "L". Note
643
644
645 that there are many characters which fall in neither of the above
646 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
647 this because they have bidirectional category "EN".
648
649 In any profile that specifies bidirectional character handling, all
650 three of the following requirements MUST be met:
651
652 1) The characters in section 5.8 MUST be prohibited.
653
654 2) If a string contains any RandALCat character, the string MUST NOT
655 contain any LCat character.
656
657 3) If a string contains any RandALCat character, a RandALCat
658 character MUST be the first character of the string, and a
659 RandALCat character MUST be the last character of the string.
660 */
661
662 #define MAX_STACK_BUFFER_SIZE 300
663
664
665 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)666 usprep_prepare( const UStringPrepProfile* profile,
667 const UChar* src, int32_t srcLength,
668 UChar* dest, int32_t destCapacity,
669 int32_t options,
670 UParseError* parseError,
671 UErrorCode* status ){
672
673 // check error status
674 if(status == NULL || U_FAILURE(*status)){
675 return 0;
676 }
677
678 //check arguments
679 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
680 *status=U_ILLEGAL_ARGUMENT_ERROR;
681 return 0;
682 }
683
684 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
685 UChar *b1 = b1Stack, *b2 = b2Stack;
686 int32_t b1Len, b2Len=0,
687 b1Capacity = MAX_STACK_BUFFER_SIZE ,
688 b2Capacity = MAX_STACK_BUFFER_SIZE;
689 uint16_t result;
690 int32_t b2Index = 0;
691 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
692 UBool leftToRight=FALSE, rightToLeft=FALSE;
693 int32_t rtlPos =-1, ltrPos =-1;
694
695 //get the string length
696 if(srcLength == -1){
697 srcLength = u_strlen(src);
698 }
699 // map
700 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
701
702 if(*status == U_BUFFER_OVERFLOW_ERROR){
703 // redo processing of string
704 /* we do not have enough room so grow the buffer*/
705 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
706 if(b1==NULL){
707 *status = U_MEMORY_ALLOCATION_ERROR;
708 goto CLEANUP;
709 }
710
711 *status = U_ZERO_ERROR; // reset error
712
713 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
714
715 }
716
717 // normalize
718 if(profile->doNFKC == TRUE){
719 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
720
721 if(*status == U_BUFFER_OVERFLOW_ERROR){
722 // redo processing of string
723 /* we do not have enough room so grow the buffer*/
724 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
725 if(b2==NULL){
726 *status = U_MEMORY_ALLOCATION_ERROR;
727 goto CLEANUP;
728 }
729
730 *status = U_ZERO_ERROR; // reset error
731
732 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
733
734 }
735
736 }else{
737 b2 = b1;
738 b2Len = b1Len;
739 }
740
741
742 if(U_FAILURE(*status)){
743 goto CLEANUP;
744 }
745
746 UChar32 ch;
747 UStringPrepType type;
748 int16_t value;
749 UBool isIndex;
750
751 // Prohibit and checkBiDi in one pass
752 for(b2Index=0; b2Index<b2Len;){
753
754 ch = 0;
755
756 U16_NEXT(b2, b2Index, b2Len, ch);
757
758 UTRIE_GET16(&profile->sprepTrie,ch,result);
759
760 type = getValues(result, value, isIndex);
761
762 if( type == USPREP_PROHIBITED ||
763 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
764 ){
765 *status = U_STRINGPREP_PROHIBITED_ERROR;
766 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
767 goto CLEANUP;
768 }
769
770 if(profile->checkBiDi) {
771 direction = ubidi_getClass(profile->bdp, ch);
772 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
773 firstCharDir = direction;
774 }
775 if(direction == U_LEFT_TO_RIGHT){
776 leftToRight = TRUE;
777 ltrPos = b2Index-1;
778 }
779 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
780 rightToLeft = TRUE;
781 rtlPos = b2Index-1;
782 }
783 }
784 }
785 if(profile->checkBiDi == TRUE){
786 // satisfy 2
787 if( leftToRight == TRUE && rightToLeft == TRUE){
788 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
789 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
790 goto CLEANUP;
791 }
792
793 //satisfy 3
794 if( rightToLeft == TRUE &&
795 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
796 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
797 ){
798 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
799 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
800 return FALSE;
801 }
802 }
803 if(b2Len>0 && b2Len <= destCapacity){
804 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
805 }
806
807 CLEANUP:
808 if(b1!=b1Stack){
809 uprv_free(b1);
810 b1=NULL;
811 }
812
813 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
814 uprv_free(b2);
815 b2=NULL;
816 }
817 return u_terminateUChars(dest, destCapacity, b2Len, status);
818 }
819
820
821 /* data swapping ------------------------------------------------------------ */
822
823 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)824 usprep_swap(const UDataSwapper *ds,
825 const void *inData, int32_t length, void *outData,
826 UErrorCode *pErrorCode) {
827 const UDataInfo *pInfo;
828 int32_t headerSize;
829
830 const uint8_t *inBytes;
831 uint8_t *outBytes;
832
833 const int32_t *inIndexes;
834 int32_t indexes[16];
835
836 int32_t i, offset, count, size;
837
838 /* udata_swapDataHeader checks the arguments */
839 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
840 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
841 return 0;
842 }
843
844 /* check data format and format version */
845 pInfo=(const UDataInfo *)((const char *)inData+4);
846 if(!(
847 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
848 pInfo->dataFormat[1]==0x50 &&
849 pInfo->dataFormat[2]==0x52 &&
850 pInfo->dataFormat[3]==0x50 &&
851 pInfo->formatVersion[0]==3
852 )) {
853 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
854 pInfo->dataFormat[0], pInfo->dataFormat[1],
855 pInfo->dataFormat[2], pInfo->dataFormat[3],
856 pInfo->formatVersion[0]);
857 *pErrorCode=U_UNSUPPORTED_ERROR;
858 return 0;
859 }
860
861 inBytes=(const uint8_t *)inData+headerSize;
862 outBytes=(uint8_t *)outData+headerSize;
863
864 inIndexes=(const int32_t *)inBytes;
865
866 if(length>=0) {
867 length-=headerSize;
868 if(length<16*4) {
869 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
870 length);
871 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
872 return 0;
873 }
874 }
875
876 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
877 for(i=0; i<16; ++i) {
878 indexes[i]=udata_readInt32(ds, inIndexes[i]);
879 }
880
881 /* calculate the total length of the data */
882 size=
883 16*4+ /* size of indexes[] */
884 indexes[_SPREP_INDEX_TRIE_SIZE]+
885 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
886
887 if(length>=0) {
888 if(length<size) {
889 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
890 length);
891 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
892 return 0;
893 }
894
895 /* copy the data for inaccessible bytes */
896 if(inBytes!=outBytes) {
897 uprv_memcpy(outBytes, inBytes, size);
898 }
899
900 offset=0;
901
902 /* swap the int32_t indexes[] */
903 count=16*4;
904 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
905 offset+=count;
906
907 /* swap the UTrie */
908 count=indexes[_SPREP_INDEX_TRIE_SIZE];
909 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
910 offset+=count;
911
912 /* swap the uint16_t mappingTable[] */
913 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
914 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
915 offset+=count;
916 }
917
918 return headerSize+size;
919 }
920
921 #endif /* #if !UCONFIG_NO_IDNA */
922