1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: usprep.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_IDNA
22
23 #include "unicode/usprep.h"
24
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39
40 U_NAMESPACE_USE
41
42 U_CDECL_BEGIN
43
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER;
49
50 static UMutex usprepMutex;
51 /* format version of spp file */
52 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
53
54 /* the Unicode version of the sprep data */
55 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
56
57 /* Profile names must be aligned to UStringPrepProfileType */
58 static const char * const PROFILE_NAMES[] = {
59 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
60 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
61 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
62 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
63 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
64 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
65 "rfc3722", /* USPREP_RFC3722_ISCSI */
66 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
67 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
68 "rfc4011", /* USPREP_RFC4011_MIB */
69 "rfc4013", /* USPREP_RFC4013_SASLPREP */
70 "rfc4505", /* USPREP_RFC4505_TRACE */
71 "rfc4518", /* USPREP_RFC4518_LDAP */
72 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
73 };
74
75 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)76 isSPrepAcceptable(void * /* context */,
77 const char * /* type */,
78 const char * /* name */,
79 const UDataInfo *pInfo) {
80 if(
81 pInfo->size>=20 &&
82 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
83 pInfo->charsetFamily==U_CHARSET_FAMILY &&
84 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
85 pInfo->dataFormat[1]==0x50 &&
86 pInfo->dataFormat[2]==0x52 &&
87 pInfo->dataFormat[3]==0x50 &&
88 pInfo->formatVersion[0]==3 &&
89 pInfo->formatVersion[2]==UTRIE_SHIFT &&
90 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
91 ) {
92 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
93 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
94 return TRUE;
95 } else {
96 return FALSE;
97 }
98 }
99
100 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)101 getSPrepFoldingOffset(uint32_t data) {
102
103 return (int32_t)data;
104
105 }
106
107 /* hashes an entry */
108 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)109 hashEntry(const UHashTok parm) {
110 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
111 UHashTok namekey, pathkey;
112 namekey.pointer = b->name;
113 pathkey.pointer = b->path;
114 uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
115 37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
116 return static_cast<int32_t>(unsignedHash);
117 }
118
119 /* compares two entries */
120 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)121 compareEntries(const UHashTok p1, const UHashTok p2) {
122 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
123 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
124 UHashTok name1, name2, path1, path2;
125 name1.pointer = b1->name;
126 name2.pointer = b2->name;
127 path1.pointer = b1->path;
128 path2.pointer = b2->path;
129 return ((UBool)(uhash_compareChars(name1, name2) &
130 uhash_compareChars(path1, path2)));
131 }
132
133 static void
usprep_unload(UStringPrepProfile * data)134 usprep_unload(UStringPrepProfile* data){
135 udata_close(data->sprepData);
136 }
137
138 static int32_t
usprep_internal_flushCache(UBool noRefCount)139 usprep_internal_flushCache(UBool noRefCount){
140 UStringPrepProfile *profile = NULL;
141 UStringPrepKey *key = NULL;
142 int32_t pos = UHASH_FIRST;
143 int32_t deletedNum = 0;
144 const UHashElement *e;
145
146 /*
147 * if shared data hasn't even been lazy evaluated yet
148 * return 0
149 */
150 umtx_lock(&usprepMutex);
151 if (SHARED_DATA_HASHTABLE == NULL) {
152 umtx_unlock(&usprepMutex);
153 return 0;
154 }
155
156 /*creates an enumeration to iterate through every element in the table */
157 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
158 {
159 profile = (UStringPrepProfile *) e->value.pointer;
160 key = (UStringPrepKey *) e->key.pointer;
161
162 if ((noRefCount== FALSE && profile->refCount == 0) ||
163 noRefCount== TRUE) {
164 deletedNum++;
165 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
166
167 /* unload the data */
168 usprep_unload(profile);
169
170 if(key->name != NULL) {
171 uprv_free(key->name);
172 key->name=NULL;
173 }
174 if(key->path != NULL) {
175 uprv_free(key->path);
176 key->path=NULL;
177 }
178 uprv_free(profile);
179 uprv_free(key);
180 }
181
182 }
183 umtx_unlock(&usprepMutex);
184
185 return deletedNum;
186 }
187
188 /* Works just like ucnv_flushCache()
189 static int32_t
190 usprep_flushCache(){
191 return usprep_internal_flushCache(FALSE);
192 }
193 */
194
usprep_cleanup(void)195 static UBool U_CALLCONV usprep_cleanup(void){
196 if (SHARED_DATA_HASHTABLE != NULL) {
197 usprep_internal_flushCache(TRUE);
198 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
199 uhash_close(SHARED_DATA_HASHTABLE);
200 SHARED_DATA_HASHTABLE = NULL;
201 }
202 }
203 gSharedDataInitOnce.reset();
204 return (SHARED_DATA_HASHTABLE == NULL);
205 }
206 U_CDECL_END
207
208
209 /** Initializes the cache for resources */
210 static void U_CALLCONV
createCache(UErrorCode & status)211 createCache(UErrorCode &status) {
212 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
213 if (U_FAILURE(status)) {
214 SHARED_DATA_HASHTABLE = NULL;
215 }
216 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
217 }
218
219 static void
initCache(UErrorCode * status)220 initCache(UErrorCode *status) {
221 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
222 }
223
224 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)225 loadData(UStringPrepProfile* profile,
226 const char* path,
227 const char* name,
228 const char* type,
229 UErrorCode* errorCode) {
230 /* load Unicode SPREP data from file */
231 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
232 UDataMemory *dataMemory;
233 const int32_t *p=NULL;
234 const uint8_t *pb;
235 UVersionInfo normUnicodeVersion;
236 int32_t normUniVer, sprepUniVer, normCorrVer;
237
238 if(errorCode==NULL || U_FAILURE(*errorCode)) {
239 return 0;
240 }
241
242 /* open the data outside the mutex block */
243 //TODO: change the path
244 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
245 if(U_FAILURE(*errorCode)) {
246 return FALSE;
247 }
248
249 p=(const int32_t *)udata_getMemory(dataMemory);
250 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
251 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
252 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
253
254
255 if(U_FAILURE(*errorCode)) {
256 udata_close(dataMemory);
257 return FALSE;
258 }
259
260 /* in the mutex block, set the data for this process */
261 umtx_lock(&usprepMutex);
262 if(profile->sprepData==NULL) {
263 profile->sprepData=dataMemory;
264 dataMemory=NULL;
265 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
266 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
267 } else {
268 p=(const int32_t *)udata_getMemory(profile->sprepData);
269 }
270 umtx_unlock(&usprepMutex);
271 /* initialize some variables */
272 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
273
274 u_getUnicodeVersion(normUnicodeVersion);
275 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
276 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
277 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
278 (dataVersion[2] << 8 ) + (dataVersion[3]);
279 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
280
281 if(U_FAILURE(*errorCode)){
282 udata_close(dataMemory);
283 return FALSE;
284 }
285 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
286 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
287 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
288 ){
289 *errorCode = U_INVALID_FORMAT_ERROR;
290 udata_close(dataMemory);
291 return FALSE;
292 }
293 profile->isDataLoaded = TRUE;
294
295 /* if a different thread set it first, then close the extra data */
296 if(dataMemory!=NULL) {
297 udata_close(dataMemory); /* NULL if it was set correctly */
298 }
299
300
301 return profile->isDataLoaded;
302 }
303
304 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)305 usprep_getProfile(const char* path,
306 const char* name,
307 UErrorCode *status){
308
309 UStringPrepProfile* profile = NULL;
310
311 initCache(status);
312
313 if(U_FAILURE(*status)){
314 return NULL;
315 }
316
317 UStringPrepKey stackKey;
318 /*
319 * const is cast way to save malloc, strcpy and free calls
320 * we use the passed in pointers for fetching the data from the
321 * hash table which is safe
322 */
323 stackKey.name = (char*) name;
324 stackKey.path = (char*) path;
325
326 /* fetch the data from the cache */
327 umtx_lock(&usprepMutex);
328 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
329 if(profile != NULL) {
330 profile->refCount++;
331 }
332 umtx_unlock(&usprepMutex);
333
334 if(profile == NULL) {
335 /* else load the data and put the data in the cache */
336 LocalMemory<UStringPrepProfile> newProfile;
337 if(newProfile.allocateInsteadAndReset() == NULL) {
338 *status = U_MEMORY_ALLOCATION_ERROR;
339 return NULL;
340 }
341
342 /* load the data */
343 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
344 return NULL;
345 }
346
347 /* get the options */
348 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
349 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
350
351 LocalMemory<UStringPrepKey> key;
352 LocalMemory<char> keyName;
353 LocalMemory<char> keyPath;
354 if( key.allocateInsteadAndReset() == NULL ||
355 keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
356 (path != NULL &&
357 keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
358 ) {
359 *status = U_MEMORY_ALLOCATION_ERROR;
360 usprep_unload(newProfile.getAlias());
361 return NULL;
362 }
363
364 umtx_lock(&usprepMutex);
365 // If another thread already inserted the same key/value, refcount and cleanup our thread data
366 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
367 if(profile != NULL) {
368 profile->refCount++;
369 usprep_unload(newProfile.getAlias());
370 }
371 else {
372 /* initialize the key members */
373 key->name = keyName.orphan();
374 uprv_strcpy(key->name, name);
375 if(path != NULL){
376 key->path = keyPath.orphan();
377 uprv_strcpy(key->path, path);
378 }
379 profile = newProfile.orphan();
380
381 /* add the data object to the cache */
382 profile->refCount = 1;
383 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
384 }
385 umtx_unlock(&usprepMutex);
386 }
387
388 return profile;
389 }
390
391 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)392 usprep_open(const char* path,
393 const char* name,
394 UErrorCode* status){
395
396 if(status == NULL || U_FAILURE(*status)){
397 return NULL;
398 }
399
400 /* initialize the profile struct members */
401 return usprep_getProfile(path,name,status);
402 }
403
404 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)405 usprep_openByType(UStringPrepProfileType type,
406 UErrorCode* status) {
407 if(status == NULL || U_FAILURE(*status)){
408 return NULL;
409 }
410 int32_t index = (int32_t)type;
411 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
412 *status = U_ILLEGAL_ARGUMENT_ERROR;
413 return NULL;
414 }
415 return usprep_open(NULL, PROFILE_NAMES[index], status);
416 }
417
418 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)419 usprep_close(UStringPrepProfile* profile){
420 if(profile==NULL){
421 return;
422 }
423
424 umtx_lock(&usprepMutex);
425 /* decrement the ref count*/
426 if(profile->refCount > 0){
427 profile->refCount--;
428 }
429 umtx_unlock(&usprepMutex);
430
431 }
432
433 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)434 uprv_syntaxError(const UChar* rules,
435 int32_t pos,
436 int32_t rulesLen,
437 UParseError* parseError){
438 if(parseError == NULL){
439 return;
440 }
441 parseError->offset = pos;
442 parseError->line = 0 ; // we are not using line numbers
443
444 // for pre-context
445 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
446 int32_t limit = pos;
447
448 u_memcpy(parseError->preContext,rules+start,limit-start);
449 //null terminate the buffer
450 parseError->preContext[limit-start] = 0;
451
452 // for post-context; include error rules[pos]
453 start = pos;
454 limit = start + (U_PARSE_CONTEXT_LEN-1);
455 if (limit > rulesLen) {
456 limit = rulesLen;
457 }
458 if (start < rulesLen) {
459 u_memcpy(parseError->postContext,rules+start,limit-start);
460 }
461 //null terminate the buffer
462 parseError->postContext[limit-start]= 0;
463 }
464
465
466 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)467 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
468
469 UStringPrepType type;
470 if(trieWord == 0){
471 /*
472 * Initial value stored in the mapping table
473 * just return USPREP_TYPE_LIMIT .. so that
474 * the source codepoint is copied to the destination
475 */
476 type = USPREP_TYPE_LIMIT;
477 isIndex =FALSE;
478 value = 0;
479 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
480 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
481 isIndex =FALSE;
482 value = 0;
483 }else{
484 /* get the type */
485 type = USPREP_MAP;
486 /* ascertain if the value is index or delta */
487 if(trieWord & 0x02){
488 isIndex = TRUE;
489 value = trieWord >> 2; //mask off the lower 2 bits and shift
490 }else{
491 isIndex = FALSE;
492 value = (int16_t)trieWord;
493 value = (value >> 2);
494 }
495
496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
497 type = USPREP_DELETE;
498 isIndex =FALSE;
499 value = 0;
500 }
501 }
502 return type;
503 }
504
505 // TODO: change to writing to UnicodeString not UChar *
506 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)507 usprep_map( const UStringPrepProfile* profile,
508 const UChar* src, int32_t srcLength,
509 UChar* dest, int32_t destCapacity,
510 int32_t options,
511 UParseError* parseError,
512 UErrorCode* status ){
513
514 uint16_t result;
515 int32_t destIndex=0;
516 int32_t srcIndex;
517 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
518 UStringPrepType type;
519 int16_t value;
520 UBool isIndex;
521 const int32_t* indexes = profile->indexes;
522
523 // no error checking the caller check for error and arguments
524 // no string length check the caller finds out the string length
525
526 for(srcIndex=0;srcIndex<srcLength;){
527 UChar32 ch;
528
529 U16_NEXT(src,srcIndex,srcLength,ch);
530
531 result=0;
532
533 UTRIE_GET16(&profile->sprepTrie,ch,result);
534
535 type = getValues(result, value, isIndex);
536
537 // check if the source codepoint is unassigned
538 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
539
540 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
541 *status = U_STRINGPREP_UNASSIGNED_ERROR;
542 return 0;
543
544 }else if(type == USPREP_MAP){
545
546 int32_t index, length;
547
548 if(isIndex){
549 index = value;
550 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
551 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
552 length = 1;
553 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
554 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
555 length = 2;
556 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
557 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
558 length = 3;
559 }else{
560 length = profile->mappingData[index++];
561
562 }
563
564 /* copy mapping to destination */
565 for(int32_t i=0; i< length; i++){
566 if(destIndex < destCapacity ){
567 dest[destIndex] = profile->mappingData[index+i];
568 }
569 destIndex++; /* for pre-flighting */
570 }
571 continue;
572 }else{
573 // subtract the delta to arrive at the code point
574 ch -= value;
575 }
576
577 }else if(type==USPREP_DELETE){
578 // just consume the codepoint and continue
579 continue;
580 }
581 //copy the code point into destination
582 if(ch <= 0xFFFF){
583 if(destIndex < destCapacity ){
584 dest[destIndex] = (UChar)ch;
585 }
586 destIndex++;
587 }else{
588 if(destIndex+1 < destCapacity ){
589 dest[destIndex] = U16_LEAD(ch);
590 dest[destIndex+1] = U16_TRAIL(ch);
591 }
592 destIndex +=2;
593 }
594
595 }
596
597 return u_terminateUChars(dest, destCapacity, destIndex, status);
598 }
599
600 /*
601 1) Map -- For each character in the input, check if it has a mapping
602 and, if so, replace it with its mapping.
603
604 2) Normalize -- Possibly normalize the result of step 1 using Unicode
605 normalization.
606
607 3) Prohibit -- Check for any characters that are not allowed in the
608 output. If any are found, return an error.
609
610 4) Check bidi -- Possibly check for right-to-left characters, and if
611 any are found, make sure that the whole string satisfies the
612 requirements for bidirectional strings. If the string does not
613 satisfy the requirements for bidirectional strings, return an
614 error.
615 [Unicode3.2] defines several bidirectional categories; each character
616 has one bidirectional category assigned to it. For the purposes of
617 the requirements below, an "RandALCat character" is a character that
618 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
619 is a character that has Unicode bidirectional category "L". Note
620
621
622 that there are many characters which fall in neither of the above
623 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
624 this because they have bidirectional category "EN".
625
626 In any profile that specifies bidirectional character handling, all
627 three of the following requirements MUST be met:
628
629 1) The characters in section 5.8 MUST be prohibited.
630
631 2) If a string contains any RandALCat character, the string MUST NOT
632 contain any LCat character.
633
634 3) If a string contains any RandALCat character, a RandALCat
635 character MUST be the first character of the string, and a
636 RandALCat character MUST be the last character of the string.
637 */
638 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)639 usprep_prepare( const UStringPrepProfile* profile,
640 const UChar* src, int32_t srcLength,
641 UChar* dest, int32_t destCapacity,
642 int32_t options,
643 UParseError* parseError,
644 UErrorCode* status ){
645
646 // check error status
647 if(U_FAILURE(*status)){
648 return 0;
649 }
650
651 //check arguments
652 if(profile==NULL ||
653 (src==NULL ? srcLength!=0 : srcLength<-1) ||
654 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
655 *status=U_ILLEGAL_ARGUMENT_ERROR;
656 return 0;
657 }
658
659 //get the string length
660 if(srcLength < 0){
661 srcLength = u_strlen(src);
662 }
663 // map
664 UnicodeString s1;
665 UChar *b1 = s1.getBuffer(srcLength);
666 if(b1==NULL){
667 *status = U_MEMORY_ALLOCATION_ERROR;
668 return 0;
669 }
670 int32_t b1Len = usprep_map(profile, src, srcLength,
671 b1, s1.getCapacity(), options, parseError, status);
672 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
673
674 if(*status == U_BUFFER_OVERFLOW_ERROR){
675 // redo processing of string
676 /* we do not have enough room so grow the buffer*/
677 b1 = s1.getBuffer(b1Len);
678 if(b1==NULL){
679 *status = U_MEMORY_ALLOCATION_ERROR;
680 return 0;
681 }
682
683 *status = U_ZERO_ERROR; // reset error
684 b1Len = usprep_map(profile, src, srcLength,
685 b1, s1.getCapacity(), options, parseError, status);
686 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
687 }
688 if(U_FAILURE(*status)){
689 return 0;
690 }
691
692 // normalize
693 UnicodeString s2;
694 if(profile->doNFKC){
695 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
696 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
697 if(U_FAILURE(*status)){
698 return 0;
699 }
700 fn2.normalize(s1, s2, *status);
701 }else{
702 s2.fastCopyFrom(s1);
703 }
704 if(U_FAILURE(*status)){
705 return 0;
706 }
707
708 // Prohibit and checkBiDi in one pass
709 const UChar *b2 = s2.getBuffer();
710 int32_t b2Len = s2.length();
711 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
712 UBool leftToRight=FALSE, rightToLeft=FALSE;
713 int32_t rtlPos =-1, ltrPos =-1;
714
715 for(int32_t b2Index=0; b2Index<b2Len;){
716 UChar32 ch = 0;
717 U16_NEXT(b2, b2Index, b2Len, ch);
718
719 uint16_t result;
720 UTRIE_GET16(&profile->sprepTrie,ch,result);
721
722 int16_t value;
723 UBool isIndex;
724 UStringPrepType type = getValues(result, value, isIndex);
725
726 if( type == USPREP_PROHIBITED ||
727 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
728 ){
729 *status = U_STRINGPREP_PROHIBITED_ERROR;
730 uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
731 return 0;
732 }
733
734 if(profile->checkBiDi) {
735 direction = ubidi_getClass(ch);
736 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
737 firstCharDir = direction;
738 }
739 if(direction == U_LEFT_TO_RIGHT){
740 leftToRight = TRUE;
741 ltrPos = b2Index-1;
742 }
743 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
744 rightToLeft = TRUE;
745 rtlPos = b2Index-1;
746 }
747 }
748 }
749 if(profile->checkBiDi == TRUE){
750 // satisfy 2
751 if( leftToRight == TRUE && rightToLeft == TRUE){
752 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
753 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
754 return 0;
755 }
756
757 //satisfy 3
758 if( rightToLeft == TRUE &&
759 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
760 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
761 ){
762 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
763 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
764 return FALSE;
765 }
766 }
767 return s2.extract(dest, destCapacity, *status);
768 }
769
770
771 /* data swapping ------------------------------------------------------------ */
772
773 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)774 usprep_swap(const UDataSwapper *ds,
775 const void *inData, int32_t length, void *outData,
776 UErrorCode *pErrorCode) {
777 const UDataInfo *pInfo;
778 int32_t headerSize;
779
780 const uint8_t *inBytes;
781 uint8_t *outBytes;
782
783 const int32_t *inIndexes;
784 int32_t indexes[16];
785
786 int32_t i, offset, count, size;
787
788 /* udata_swapDataHeader checks the arguments */
789 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
790 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
791 return 0;
792 }
793
794 /* check data format and format version */
795 pInfo=(const UDataInfo *)((const char *)inData+4);
796 if(!(
797 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
798 pInfo->dataFormat[1]==0x50 &&
799 pInfo->dataFormat[2]==0x52 &&
800 pInfo->dataFormat[3]==0x50 &&
801 pInfo->formatVersion[0]==3
802 )) {
803 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
804 pInfo->dataFormat[0], pInfo->dataFormat[1],
805 pInfo->dataFormat[2], pInfo->dataFormat[3],
806 pInfo->formatVersion[0]);
807 *pErrorCode=U_UNSUPPORTED_ERROR;
808 return 0;
809 }
810
811 inBytes=(const uint8_t *)inData+headerSize;
812 outBytes=(uint8_t *)outData+headerSize;
813
814 inIndexes=(const int32_t *)inBytes;
815
816 if(length>=0) {
817 length-=headerSize;
818 if(length<16*4) {
819 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
820 length);
821 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
822 return 0;
823 }
824 }
825
826 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
827 for(i=0; i<16; ++i) {
828 indexes[i]=udata_readInt32(ds, inIndexes[i]);
829 }
830
831 /* calculate the total length of the data */
832 size=
833 16*4+ /* size of indexes[] */
834 indexes[_SPREP_INDEX_TRIE_SIZE]+
835 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
836
837 if(length>=0) {
838 if(length<size) {
839 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
840 length);
841 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
842 return 0;
843 }
844
845 /* copy the data for inaccessible bytes */
846 if(inBytes!=outBytes) {
847 uprv_memcpy(outBytes, inBytes, size);
848 }
849
850 offset=0;
851
852 /* swap the int32_t indexes[] */
853 count=16*4;
854 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
855 offset+=count;
856
857 /* swap the UTrie */
858 count=indexes[_SPREP_INDEX_TRIE_SIZE];
859 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
860 offset+=count;
861
862 /* swap the uint16_t mappingTable[] */
863 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
864 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
865 //offset+=count;
866 }
867
868 return headerSize+size;
869 }
870
871 #endif /* #if !UCONFIG_NO_IDNA */
872