1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: usprep.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_IDNA
22
23 #include "unicode/usprep.h"
24
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39
40 U_NAMESPACE_USE
41
42 U_CDECL_BEGIN
43
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce;
49
50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
51
52 /* format version of spp file */
53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54
55 /* the Unicode version of the sprep data */
56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57
58 /* Profile names must be aligned to UStringPrepProfileType */
59 static const char * const PROFILE_NAMES[] = {
60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66 "rfc3722", /* USPREP_RFC3722_ISCSI */
67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
69 "rfc4011", /* USPREP_RFC4011_MIB */
70 "rfc4013", /* USPREP_RFC4013_SASLPREP */
71 "rfc4505", /* USPREP_RFC4505_TRACE */
72 "rfc4518", /* USPREP_RFC4518_LDAP */
73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
74 };
75
76 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)77 isSPrepAcceptable(void * /* context */,
78 const char * /* type */,
79 const char * /* name */,
80 const UDataInfo *pInfo) {
81 if(
82 pInfo->size>=20 &&
83 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84 pInfo->charsetFamily==U_CHARSET_FAMILY &&
85 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
86 pInfo->dataFormat[1]==0x50 &&
87 pInfo->dataFormat[2]==0x52 &&
88 pInfo->dataFormat[3]==0x50 &&
89 pInfo->formatVersion[0]==3 &&
90 pInfo->formatVersion[2]==UTRIE_SHIFT &&
91 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92 ) {
93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95 return TRUE;
96 } else {
97 return FALSE;
98 }
99 }
100
101 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)102 getSPrepFoldingOffset(uint32_t data) {
103
104 return (int32_t)data;
105
106 }
107
108 /* hashes an entry */
109 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)110 hashEntry(const UHashTok parm) {
111 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112 UHashTok namekey, pathkey;
113 namekey.pointer = b->name;
114 pathkey.pointer = b->path;
115 uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
116 37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
117 return static_cast<int32_t>(unsignedHash);
118 }
119
120 /* compares two entries */
121 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)122 compareEntries(const UHashTok p1, const UHashTok p2) {
123 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
124 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
125 UHashTok name1, name2, path1, path2;
126 name1.pointer = b1->name;
127 name2.pointer = b2->name;
128 path1.pointer = b1->path;
129 path2.pointer = b2->path;
130 return ((UBool)(uhash_compareChars(name1, name2) &
131 uhash_compareChars(path1, path2)));
132 }
133
134 static void
usprep_unload(UStringPrepProfile * data)135 usprep_unload(UStringPrepProfile* data){
136 udata_close(data->sprepData);
137 }
138
139 static int32_t
usprep_internal_flushCache(UBool noRefCount)140 usprep_internal_flushCache(UBool noRefCount){
141 UStringPrepProfile *profile = NULL;
142 UStringPrepKey *key = NULL;
143 int32_t pos = UHASH_FIRST;
144 int32_t deletedNum = 0;
145 const UHashElement *e;
146
147 /*
148 * if shared data hasn't even been lazy evaluated yet
149 * return 0
150 */
151 umtx_lock(&usprepMutex);
152 if (SHARED_DATA_HASHTABLE == NULL) {
153 umtx_unlock(&usprepMutex);
154 return 0;
155 }
156
157 /*creates an enumeration to iterate through every element in the table */
158 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
159 {
160 profile = (UStringPrepProfile *) e->value.pointer;
161 key = (UStringPrepKey *) e->key.pointer;
162
163 if ((noRefCount== FALSE && profile->refCount == 0) ||
164 noRefCount== TRUE) {
165 deletedNum++;
166 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
167
168 /* unload the data */
169 usprep_unload(profile);
170
171 if(key->name != NULL) {
172 uprv_free(key->name);
173 key->name=NULL;
174 }
175 if(key->path != NULL) {
176 uprv_free(key->path);
177 key->path=NULL;
178 }
179 uprv_free(profile);
180 uprv_free(key);
181 }
182
183 }
184 umtx_unlock(&usprepMutex);
185
186 return deletedNum;
187 }
188
189 /* Works just like ucnv_flushCache()
190 static int32_t
191 usprep_flushCache(){
192 return usprep_internal_flushCache(FALSE);
193 }
194 */
195
usprep_cleanup(void)196 static UBool U_CALLCONV usprep_cleanup(void){
197 if (SHARED_DATA_HASHTABLE != NULL) {
198 usprep_internal_flushCache(TRUE);
199 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
200 uhash_close(SHARED_DATA_HASHTABLE);
201 SHARED_DATA_HASHTABLE = NULL;
202 }
203 }
204 gSharedDataInitOnce.reset();
205 return (SHARED_DATA_HASHTABLE == NULL);
206 }
207 U_CDECL_END
208
209
210 /** Initializes the cache for resources */
211 static void U_CALLCONV
createCache(UErrorCode & status)212 createCache(UErrorCode &status) {
213 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
214 if (U_FAILURE(status)) {
215 SHARED_DATA_HASHTABLE = NULL;
216 }
217 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
218 }
219
220 static void
initCache(UErrorCode * status)221 initCache(UErrorCode *status) {
222 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
223 }
224
225 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)226 loadData(UStringPrepProfile* profile,
227 const char* path,
228 const char* name,
229 const char* type,
230 UErrorCode* errorCode) {
231 /* load Unicode SPREP data from file */
232 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
233 UDataMemory *dataMemory;
234 const int32_t *p=NULL;
235 const uint8_t *pb;
236 UVersionInfo normUnicodeVersion;
237 int32_t normUniVer, sprepUniVer, normCorrVer;
238
239 if(errorCode==NULL || U_FAILURE(*errorCode)) {
240 return 0;
241 }
242
243 /* open the data outside the mutex block */
244 //TODO: change the path
245 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
246 if(U_FAILURE(*errorCode)) {
247 return FALSE;
248 }
249
250 p=(const int32_t *)udata_getMemory(dataMemory);
251 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
252 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
253 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
254
255
256 if(U_FAILURE(*errorCode)) {
257 udata_close(dataMemory);
258 return FALSE;
259 }
260
261 /* in the mutex block, set the data for this process */
262 umtx_lock(&usprepMutex);
263 if(profile->sprepData==NULL) {
264 profile->sprepData=dataMemory;
265 dataMemory=NULL;
266 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
267 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
268 } else {
269 p=(const int32_t *)udata_getMemory(profile->sprepData);
270 }
271 umtx_unlock(&usprepMutex);
272 /* initialize some variables */
273 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
274
275 u_getUnicodeVersion(normUnicodeVersion);
276 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
277 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
278 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
279 (dataVersion[2] << 8 ) + (dataVersion[3]);
280 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
281
282 if(U_FAILURE(*errorCode)){
283 udata_close(dataMemory);
284 return FALSE;
285 }
286 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
287 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
288 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
289 ){
290 *errorCode = U_INVALID_FORMAT_ERROR;
291 udata_close(dataMemory);
292 return FALSE;
293 }
294 profile->isDataLoaded = TRUE;
295
296 /* if a different thread set it first, then close the extra data */
297 if(dataMemory!=NULL) {
298 udata_close(dataMemory); /* NULL if it was set correctly */
299 }
300
301
302 return profile->isDataLoaded;
303 }
304
305 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)306 usprep_getProfile(const char* path,
307 const char* name,
308 UErrorCode *status){
309
310 UStringPrepProfile* profile = NULL;
311
312 initCache(status);
313
314 if(U_FAILURE(*status)){
315 return NULL;
316 }
317
318 UStringPrepKey stackKey;
319 /*
320 * const is cast way to save malloc, strcpy and free calls
321 * we use the passed in pointers for fetching the data from the
322 * hash table which is safe
323 */
324 stackKey.name = (char*) name;
325 stackKey.path = (char*) path;
326
327 /* fetch the data from the cache */
328 umtx_lock(&usprepMutex);
329 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
330 if(profile != NULL) {
331 profile->refCount++;
332 }
333 umtx_unlock(&usprepMutex);
334
335 if(profile == NULL) {
336 /* else load the data and put the data in the cache */
337 LocalMemory<UStringPrepProfile> newProfile;
338 if(newProfile.allocateInsteadAndReset() == NULL) {
339 *status = U_MEMORY_ALLOCATION_ERROR;
340 return NULL;
341 }
342
343 /* load the data */
344 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
345 return NULL;
346 }
347
348 /* get the options */
349 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
350 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
351
352 LocalMemory<UStringPrepKey> key;
353 LocalMemory<char> keyName;
354 LocalMemory<char> keyPath;
355 if( key.allocateInsteadAndReset() == NULL ||
356 keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
357 (path != NULL &&
358 keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
359 ) {
360 *status = U_MEMORY_ALLOCATION_ERROR;
361 usprep_unload(newProfile.getAlias());
362 return NULL;
363 }
364
365 umtx_lock(&usprepMutex);
366 // If another thread already inserted the same key/value, refcount and cleanup our thread data
367 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
368 if(profile != NULL) {
369 profile->refCount++;
370 usprep_unload(newProfile.getAlias());
371 }
372 else {
373 /* initialize the key members */
374 key->name = keyName.orphan();
375 uprv_strcpy(key->name, name);
376 if(path != NULL){
377 key->path = keyPath.orphan();
378 uprv_strcpy(key->path, path);
379 }
380 profile = newProfile.orphan();
381
382 /* add the data object to the cache */
383 profile->refCount = 1;
384 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
385 }
386 umtx_unlock(&usprepMutex);
387 }
388
389 return profile;
390 }
391
392 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)393 usprep_open(const char* path,
394 const char* name,
395 UErrorCode* status){
396
397 if(status == NULL || U_FAILURE(*status)){
398 return NULL;
399 }
400
401 /* initialize the profile struct members */
402 return usprep_getProfile(path,name,status);
403 }
404
405 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)406 usprep_openByType(UStringPrepProfileType type,
407 UErrorCode* status) {
408 if(status == NULL || U_FAILURE(*status)){
409 return NULL;
410 }
411 int32_t index = (int32_t)type;
412 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
413 *status = U_ILLEGAL_ARGUMENT_ERROR;
414 return NULL;
415 }
416 return usprep_open(NULL, PROFILE_NAMES[index], status);
417 }
418
419 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)420 usprep_close(UStringPrepProfile* profile){
421 if(profile==NULL){
422 return;
423 }
424
425 umtx_lock(&usprepMutex);
426 /* decrement the ref count*/
427 if(profile->refCount > 0){
428 profile->refCount--;
429 }
430 umtx_unlock(&usprepMutex);
431
432 }
433
434 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)435 uprv_syntaxError(const UChar* rules,
436 int32_t pos,
437 int32_t rulesLen,
438 UParseError* parseError){
439 if(parseError == NULL){
440 return;
441 }
442 parseError->offset = pos;
443 parseError->line = 0 ; // we are not using line numbers
444
445 // for pre-context
446 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
447 int32_t limit = pos;
448
449 u_memcpy(parseError->preContext,rules+start,limit-start);
450 //null terminate the buffer
451 parseError->preContext[limit-start] = 0;
452
453 // for post-context; include error rules[pos]
454 start = pos;
455 limit = start + (U_PARSE_CONTEXT_LEN-1);
456 if (limit > rulesLen) {
457 limit = rulesLen;
458 }
459 if (start < rulesLen) {
460 u_memcpy(parseError->postContext,rules+start,limit-start);
461 }
462 //null terminate the buffer
463 parseError->postContext[limit-start]= 0;
464 }
465
466
467 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)468 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
469
470 UStringPrepType type;
471 if(trieWord == 0){
472 /*
473 * Initial value stored in the mapping table
474 * just return USPREP_TYPE_LIMIT .. so that
475 * the source codepoint is copied to the destination
476 */
477 type = USPREP_TYPE_LIMIT;
478 isIndex =FALSE;
479 value = 0;
480 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
481 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
482 isIndex =FALSE;
483 value = 0;
484 }else{
485 /* get the type */
486 type = USPREP_MAP;
487 /* ascertain if the value is index or delta */
488 if(trieWord & 0x02){
489 isIndex = TRUE;
490 value = trieWord >> 2; //mask off the lower 2 bits and shift
491 }else{
492 isIndex = FALSE;
493 value = (int16_t)trieWord;
494 value = (value >> 2);
495 }
496
497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
498 type = USPREP_DELETE;
499 isIndex =FALSE;
500 value = 0;
501 }
502 }
503 return type;
504 }
505
506 // TODO: change to writing to UnicodeString not UChar *
507 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)508 usprep_map( const UStringPrepProfile* profile,
509 const UChar* src, int32_t srcLength,
510 UChar* dest, int32_t destCapacity,
511 int32_t options,
512 UParseError* parseError,
513 UErrorCode* status ){
514
515 uint16_t result;
516 int32_t destIndex=0;
517 int32_t srcIndex;
518 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519 UStringPrepType type;
520 int16_t value;
521 UBool isIndex;
522 const int32_t* indexes = profile->indexes;
523
524 // no error checking the caller check for error and arguments
525 // no string length check the caller finds out the string length
526
527 for(srcIndex=0;srcIndex<srcLength;){
528 UChar32 ch;
529
530 U16_NEXT(src,srcIndex,srcLength,ch);
531
532 result=0;
533
534 UTRIE_GET16(&profile->sprepTrie,ch,result);
535
536 type = getValues(result, value, isIndex);
537
538 // check if the source codepoint is unassigned
539 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
540
541 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542 *status = U_STRINGPREP_UNASSIGNED_ERROR;
543 return 0;
544
545 }else if(type == USPREP_MAP){
546
547 int32_t index, length;
548
549 if(isIndex){
550 index = value;
551 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553 length = 1;
554 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556 length = 2;
557 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559 length = 3;
560 }else{
561 length = profile->mappingData[index++];
562
563 }
564
565 /* copy mapping to destination */
566 for(int32_t i=0; i< length; i++){
567 if(destIndex < destCapacity ){
568 dest[destIndex] = profile->mappingData[index+i];
569 }
570 destIndex++; /* for pre-flighting */
571 }
572 continue;
573 }else{
574 // subtract the delta to arrive at the code point
575 ch -= value;
576 }
577
578 }else if(type==USPREP_DELETE){
579 // just consume the codepoint and contine
580 continue;
581 }
582 //copy the code point into destination
583 if(ch <= 0xFFFF){
584 if(destIndex < destCapacity ){
585 dest[destIndex] = (UChar)ch;
586 }
587 destIndex++;
588 }else{
589 if(destIndex+1 < destCapacity ){
590 dest[destIndex] = U16_LEAD(ch);
591 dest[destIndex+1] = U16_TRAIL(ch);
592 }
593 destIndex +=2;
594 }
595
596 }
597
598 return u_terminateUChars(dest, destCapacity, destIndex, status);
599 }
600
601 /*
602 1) Map -- For each character in the input, check if it has a mapping
603 and, if so, replace it with its mapping.
604
605 2) Normalize -- Possibly normalize the result of step 1 using Unicode
606 normalization.
607
608 3) Prohibit -- Check for any characters that are not allowed in the
609 output. If any are found, return an error.
610
611 4) Check bidi -- Possibly check for right-to-left characters, and if
612 any are found, make sure that the whole string satisfies the
613 requirements for bidirectional strings. If the string does not
614 satisfy the requirements for bidirectional strings, return an
615 error.
616 [Unicode3.2] defines several bidirectional categories; each character
617 has one bidirectional category assigned to it. For the purposes of
618 the requirements below, an "RandALCat character" is a character that
619 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
620 is a character that has Unicode bidirectional category "L". Note
621
622
623 that there are many characters which fall in neither of the above
624 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
625 this because they have bidirectional category "EN".
626
627 In any profile that specifies bidirectional character handling, all
628 three of the following requirements MUST be met:
629
630 1) The characters in section 5.8 MUST be prohibited.
631
632 2) If a string contains any RandALCat character, the string MUST NOT
633 contain any LCat character.
634
635 3) If a string contains any RandALCat character, a RandALCat
636 character MUST be the first character of the string, and a
637 RandALCat character MUST be the last character of the string.
638 */
639 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)640 usprep_prepare( const UStringPrepProfile* profile,
641 const UChar* src, int32_t srcLength,
642 UChar* dest, int32_t destCapacity,
643 int32_t options,
644 UParseError* parseError,
645 UErrorCode* status ){
646
647 // check error status
648 if(U_FAILURE(*status)){
649 return 0;
650 }
651
652 //check arguments
653 if(profile==NULL ||
654 (src==NULL ? srcLength!=0 : srcLength<-1) ||
655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
656 *status=U_ILLEGAL_ARGUMENT_ERROR;
657 return 0;
658 }
659
660 //get the string length
661 if(srcLength < 0){
662 srcLength = u_strlen(src);
663 }
664 // map
665 UnicodeString s1;
666 UChar *b1 = s1.getBuffer(srcLength);
667 if(b1==NULL){
668 *status = U_MEMORY_ALLOCATION_ERROR;
669 return 0;
670 }
671 int32_t b1Len = usprep_map(profile, src, srcLength,
672 b1, s1.getCapacity(), options, parseError, status);
673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
674
675 if(*status == U_BUFFER_OVERFLOW_ERROR){
676 // redo processing of string
677 /* we do not have enough room so grow the buffer*/
678 b1 = s1.getBuffer(b1Len);
679 if(b1==NULL){
680 *status = U_MEMORY_ALLOCATION_ERROR;
681 return 0;
682 }
683
684 *status = U_ZERO_ERROR; // reset error
685 b1Len = usprep_map(profile, src, srcLength,
686 b1, s1.getCapacity(), options, parseError, status);
687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
688 }
689 if(U_FAILURE(*status)){
690 return 0;
691 }
692
693 // normalize
694 UnicodeString s2;
695 if(profile->doNFKC){
696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
698 if(U_FAILURE(*status)){
699 return 0;
700 }
701 fn2.normalize(s1, s2, *status);
702 }else{
703 s2.fastCopyFrom(s1);
704 }
705 if(U_FAILURE(*status)){
706 return 0;
707 }
708
709 // Prohibit and checkBiDi in one pass
710 const UChar *b2 = s2.getBuffer();
711 int32_t b2Len = s2.length();
712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
713 UBool leftToRight=FALSE, rightToLeft=FALSE;
714 int32_t rtlPos =-1, ltrPos =-1;
715
716 for(int32_t b2Index=0; b2Index<b2Len;){
717 UChar32 ch = 0;
718 U16_NEXT(b2, b2Index, b2Len, ch);
719
720 uint16_t result;
721 UTRIE_GET16(&profile->sprepTrie,ch,result);
722
723 int16_t value;
724 UBool isIndex;
725 UStringPrepType type = getValues(result, value, isIndex);
726
727 if( type == USPREP_PROHIBITED ||
728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
729 ){
730 *status = U_STRINGPREP_PROHIBITED_ERROR;
731 uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
732 return 0;
733 }
734
735 if(profile->checkBiDi) {
736 direction = ubidi_getClass(ch);
737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
738 firstCharDir = direction;
739 }
740 if(direction == U_LEFT_TO_RIGHT){
741 leftToRight = TRUE;
742 ltrPos = b2Index-1;
743 }
744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
745 rightToLeft = TRUE;
746 rtlPos = b2Index-1;
747 }
748 }
749 }
750 if(profile->checkBiDi == TRUE){
751 // satisfy 2
752 if( leftToRight == TRUE && rightToLeft == TRUE){
753 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
755 return 0;
756 }
757
758 //satisfy 3
759 if( rightToLeft == TRUE &&
760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
762 ){
763 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
764 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
765 return FALSE;
766 }
767 }
768 return s2.extract(dest, destCapacity, *status);
769 }
770
771
772 /* data swapping ------------------------------------------------------------ */
773
774 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)775 usprep_swap(const UDataSwapper *ds,
776 const void *inData, int32_t length, void *outData,
777 UErrorCode *pErrorCode) {
778 const UDataInfo *pInfo;
779 int32_t headerSize;
780
781 const uint8_t *inBytes;
782 uint8_t *outBytes;
783
784 const int32_t *inIndexes;
785 int32_t indexes[16];
786
787 int32_t i, offset, count, size;
788
789 /* udata_swapDataHeader checks the arguments */
790 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
791 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
792 return 0;
793 }
794
795 /* check data format and format version */
796 pInfo=(const UDataInfo *)((const char *)inData+4);
797 if(!(
798 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
799 pInfo->dataFormat[1]==0x50 &&
800 pInfo->dataFormat[2]==0x52 &&
801 pInfo->dataFormat[3]==0x50 &&
802 pInfo->formatVersion[0]==3
803 )) {
804 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
805 pInfo->dataFormat[0], pInfo->dataFormat[1],
806 pInfo->dataFormat[2], pInfo->dataFormat[3],
807 pInfo->formatVersion[0]);
808 *pErrorCode=U_UNSUPPORTED_ERROR;
809 return 0;
810 }
811
812 inBytes=(const uint8_t *)inData+headerSize;
813 outBytes=(uint8_t *)outData+headerSize;
814
815 inIndexes=(const int32_t *)inBytes;
816
817 if(length>=0) {
818 length-=headerSize;
819 if(length<16*4) {
820 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
821 length);
822 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
823 return 0;
824 }
825 }
826
827 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
828 for(i=0; i<16; ++i) {
829 indexes[i]=udata_readInt32(ds, inIndexes[i]);
830 }
831
832 /* calculate the total length of the data */
833 size=
834 16*4+ /* size of indexes[] */
835 indexes[_SPREP_INDEX_TRIE_SIZE]+
836 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
837
838 if(length>=0) {
839 if(length<size) {
840 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
841 length);
842 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
843 return 0;
844 }
845
846 /* copy the data for inaccessible bytes */
847 if(inBytes!=outBytes) {
848 uprv_memcpy(outBytes, inBytes, size);
849 }
850
851 offset=0;
852
853 /* swap the int32_t indexes[] */
854 count=16*4;
855 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
856 offset+=count;
857
858 /* swap the UTrie */
859 count=indexes[_SPREP_INDEX_TRIE_SIZE];
860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
861 offset+=count;
862
863 /* swap the uint16_t mappingTable[] */
864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
866 //offset+=count;
867 }
868
869 return headerSize+size;
870 }
871
872 #endif /* #if !UCONFIG_NO_IDNA */
873