1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2011, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: idnaref.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003feb1
16 * created by: Ram Viswanadha
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
22 #include "idnaref.h"
23 #include "punyref.h"
24 #include "ustr_imp.h"
25 #include "cmemory.h"
26 #include "sprpimpl.h"
27 #include "nptrans.h"
28 #include "testidna.h"
29 #include "punycode.h"
30 #include "unicode/ustring.h"
31
32 /* it is official IDNA ACE Prefix is "xn--" */
33 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
34 #define ACE_PREFIX_LENGTH 4
35
36 #define MAX_LABEL_LENGTH 63
37 #define HYPHEN 0x002D
38 /* The Max length of the labels should not be more than 64 */
39 #define MAX_LABEL_BUFFER_SIZE 100
40 #define MAX_IDN_BUFFER_SIZE 300
41
42 #define CAPITAL_A 0x0041
43 #define CAPITAL_Z 0x005A
44 #define LOWER_CASE_DELTA 0x0020
45 #define FULL_STOP 0x002E
46
47
48 inline static UBool
startsWithPrefix(const UChar * src,int32_t srcLength)49 startsWithPrefix(const UChar* src , int32_t srcLength){
50 UBool startsWithPrefix = true;
51
52 if(srcLength < ACE_PREFIX_LENGTH){
53 return false;
54 }
55
56 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
57 if(u_tolower(src[i]) != ACE_PREFIX[i]){
58 startsWithPrefix = false;
59 }
60 }
61 return startsWithPrefix;
62 }
63
64 inline static UChar
toASCIILower(UChar ch)65 toASCIILower(UChar ch){
66 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
67 return ch + LOWER_CASE_DELTA;
68 }
69 return ch;
70 }
71
72 inline static int32_t
compareCaseInsensitiveASCII(const UChar * s1,int32_t s1Len,const UChar * s2,int32_t s2Len)73 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
74 const UChar* s2, int32_t s2Len){
75 if(s1Len != s2Len){
76 return (s1Len > s2Len) ? s1Len : s2Len;
77 }
78 UChar c1,c2;
79 int32_t rc;
80
81 for(int32_t i =0;/* no condition */;i++) {
82 /* If we reach the ends of both strings then they match */
83 if(i == s1Len) {
84 return 0;
85 }
86
87 c1 = s1[i];
88 c2 = s2[i];
89
90 /* Case-insensitive comparison */
91 if(c1!=c2) {
92 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
93 if(rc!=0) {
94 return rc;
95 }
96 }
97 }
98
99 }
100
getError(enum punycode_status status)101 static UErrorCode getError(enum punycode_status status){
102 switch(status){
103 case punycode_success:
104 return U_ZERO_ERROR;
105 case punycode_bad_input: /* Input is invalid. */
106 return U_INVALID_CHAR_FOUND;
107 case punycode_big_output: /* Output would exceed the space provided. */
108 return U_BUFFER_OVERFLOW_ERROR;
109 case punycode_overflow : /* Input requires wider integers to process. */
110 return U_INDEX_OUTOFBOUNDS_ERROR;
111 default:
112 return U_INTERNAL_PROGRAM_ERROR;
113 }
114 }
115
convertASCIIToUChars(const char * src,UChar * dest,int32_t length)116 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){
117 int i;
118 for(i=0;i<length;i++){
119 dest[i] = src[i];
120 }
121 return i;
122 }
convertUCharsToASCII(const UChar * src,char * dest,int32_t length)123 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){
124 int i;
125 for(i=0;i<length;i++){
126 dest[i] = (char)src[i];
127 }
128 return i;
129 }
130 // wrapper around the reference Punycode implementation
convertToPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)131 static int32_t convertToPuny(const UChar* src, int32_t srcLength,
132 UChar* dest, int32_t destCapacity,
133 UErrorCode& status){
134 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
135 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
136 uint32_t* b1 = b1Stack;
137 char b2Stack[MAX_LABEL_BUFFER_SIZE];
138 char* b2 = b2Stack;
139 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ;
140 punycode_status error;
141 unsigned char* caseFlags = NULL;
142
143 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status);
144 if(status == U_BUFFER_OVERFLOW_ERROR){
145 // redo processing of string
146 /* we do not have enough room so grow the buffer*/
147 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t));
148 if(b1==NULL){
149 status = U_MEMORY_ALLOCATION_ERROR;
150 goto CLEANUP;
151 }
152
153 status = U_ZERO_ERROR; // reset error
154
155 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
156 }
157 if(U_FAILURE(status)){
158 goto CLEANUP;
159 }
160
161 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char));
162
163 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
164 status = getError(error);
165
166 if(status == U_BUFFER_OVERFLOW_ERROR){
167 /* we do not have enough room so grow the buffer*/
168 b2 = (char*) uprv_malloc( b2Len * sizeof(char));
169 if(b2==NULL){
170 status = U_MEMORY_ALLOCATION_ERROR;
171 goto CLEANUP;
172 }
173
174 status = U_ZERO_ERROR; // reset error
175
176 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
177 status = getError(error);
178 }
179 if(U_FAILURE(status)){
180 goto CLEANUP;
181 }
182
183 if(b2Len < destCapacity){
184 convertASCIIToUChars(b2,dest,b2Len);
185 }else{
186 status =U_BUFFER_OVERFLOW_ERROR;
187 }
188
189 CLEANUP:
190 if(b1Stack != b1){
191 uprv_free(b1);
192 }
193 if(b2Stack != b2){
194 uprv_free(b2);
195 }
196 uprv_free(caseFlags);
197
198 return b2Len;
199 }
200
201
getInstance(UErrorCode & status)202 static NamePrepTransform* getInstance(UErrorCode& status){
203 TestIDNA *thisTest = dynamic_cast<TestIDNA *>(IntlTest::gTest);
204 if (thisTest == nullptr && U_SUCCESS(status)) {
205 status = U_INTERNAL_PROGRAM_ERROR;
206 }
207 if (U_FAILURE(status)) return nullptr;
208 return thisTest->getInstance(status);
209 }
210
211
convertFromPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)212 static int32_t convertFromPuny( const UChar* src, int32_t srcLength,
213 UChar* dest, int32_t destCapacity,
214 UErrorCode& status){
215 char b1Stack[MAX_LABEL_BUFFER_SIZE];
216 char* b1 = b1Stack;
217 int32_t destLen =0;
218
219 convertUCharsToASCII(src, b1,srcLength);
220
221 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
222 uint32_t* b2 = b2Stack;
223 int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
224 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
225 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
226 status = getError(error);
227 if(status == U_BUFFER_OVERFLOW_ERROR){
228 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
229 if(b2 == NULL){
230 status = U_MEMORY_ALLOCATION_ERROR;
231 goto CLEANUP;
232 }
233 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
234 status = getError(error);
235 }
236
237 if(U_FAILURE(status)){
238 goto CLEANUP;
239 }
240
241 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);
242
243 CLEANUP:
244 if(b1Stack != b1){
245 uprv_free(b1);
246 }
247 if(b2Stack != b2){
248 uprv_free(b2);
249 }
250 uprv_free(caseFlags);
251
252 return destLen;
253 }
254
255
256 U_CFUNC int32_t U_EXPORT2
idnaref_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)257 idnaref_toASCII(const UChar* src, int32_t srcLength,
258 UChar* dest, int32_t destCapacity,
259 int32_t options,
260 UParseError* parseError,
261 UErrorCode* status){
262
263 if(status == NULL || U_FAILURE(*status)){
264 return 0;
265 }
266 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
267 *status = U_ILLEGAL_ARGUMENT_ERROR;
268 return 0;
269 }
270 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
271 //initialize pointers to stack buffers
272 UChar *b1 = b1Stack, *b2 = b2Stack;
273 int32_t b1Len=0, b2Len=0,
274 b1Capacity = MAX_LABEL_BUFFER_SIZE,
275 b2Capacity = MAX_LABEL_BUFFER_SIZE ,
276 reqLength=0;
277
278 //get the options
279 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
280 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
281
282 UBool* caseFlags = NULL;
283
284 // assume the source contains all ascii codepoints
285 UBool srcIsASCII = true;
286 // assume the source contains all LDH codepoints
287 UBool srcIsLDH = true;
288 int32_t j=0;
289
290 if(srcLength == -1){
291 srcLength = u_strlen(src);
292 }
293
294 // step 1
295 for( j=0;j<srcLength;j++){
296 if(src[j] > 0x7F){
297 srcIsASCII = false;
298 }
299 b1[b1Len++] = src[j];
300 }
301
302 NamePrepTransform* prep = getInstance(*status);
303 if(U_FAILURE(*status)){
304 goto CLEANUP;
305 }
306
307 // step 2 is performed only if the source contains non ASCII
308 if (!srcIsASCII) {
309 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status);
310
311 if(*status == U_BUFFER_OVERFLOW_ERROR){
312 // redo processing of string
313 /* we do not have enough room so grow the buffer*/
314 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
315 if(b1==NULL){
316 *status = U_MEMORY_ALLOCATION_ERROR;
317 goto CLEANUP;
318 }
319
320 *status = U_ZERO_ERROR; // reset error
321
322 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
323 }
324 // error bail out
325 if(U_FAILURE(*status)){
326 goto CLEANUP;
327 }
328 }
329
330 if(b1Len == 0){
331 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
332 goto CLEANUP;
333 }
334
335 srcIsASCII = true;
336 // step 3 & 4
337 for( j=0;j<b1Len;j++){
338 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII
339 srcIsASCII = false;
340 }else if(prep->isLDHChar(b1[j])==false){ // if the char is in ASCII range verify that it is an LDH character{
341 srcIsLDH = false;
342 }
343 }
344
345 if(useSTD3ASCIIRules == true){
346 // verify 3a and 3b
347 if( srcIsLDH == false /* source contains some non-LDH characters */
348 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
349 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
350 goto CLEANUP;
351 }
352 }
353 if(srcIsASCII){
354 if(b1Len <= destCapacity){
355 u_memmove(dest, b1, b1Len);
356 reqLength = b1Len;
357 }else{
358 reqLength = b1Len;
359 goto CLEANUP;
360 }
361 }else{
362 // step 5 : verify the sequence does not begin with ACE prefix
363 if(!startsWithPrefix(b1,b1Len)){
364
365 //step 6: encode the sequence with punycode
366 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
367
368 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status);
369 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status);
370 if(*status == U_BUFFER_OVERFLOW_ERROR){
371 // redo processing of string
372 /* we do not have enough room so grow the buffer*/
373 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
374 if(b2 == NULL){
375 *status = U_MEMORY_ALLOCATION_ERROR;
376 goto CLEANUP;
377 }
378
379 *status = U_ZERO_ERROR; // reset error
380
381 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status);
382 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status);
383
384 }
385 //error bail out
386 if(U_FAILURE(*status)){
387 goto CLEANUP;
388 }
389 reqLength = b2Len+ACE_PREFIX_LENGTH;
390
391 if(reqLength > destCapacity){
392 *status = U_BUFFER_OVERFLOW_ERROR;
393 goto CLEANUP;
394 }
395 //Step 7: prepend the ACE prefix
396 u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH);
397 //Step 6: copy the contents in b2 into dest
398 u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
399
400 }else{
401 *status = U_IDNA_ACE_PREFIX_ERROR;
402 goto CLEANUP;
403 }
404 }
405
406 if(reqLength > MAX_LABEL_LENGTH){
407 *status = U_IDNA_LABEL_TOO_LONG_ERROR;
408 }
409
410 CLEANUP:
411 if(b1 != b1Stack){
412 uprv_free(b1);
413 }
414 if(b2 != b2Stack){
415 uprv_free(b2);
416 }
417 uprv_free(caseFlags);
418
419 // delete prep;
420
421 return u_terminateUChars(dest, destCapacity, reqLength, status);
422 }
423
424
425 U_CFUNC int32_t U_EXPORT2
idnaref_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)426 idnaref_toUnicode(const UChar* src, int32_t srcLength,
427 UChar* dest, int32_t destCapacity,
428 int32_t options,
429 UParseError* parseError,
430 UErrorCode* status){
431
432 if(status == NULL || U_FAILURE(*status)){
433 return 0;
434 }
435 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
436 *status = U_ILLEGAL_ARGUMENT_ERROR;
437 return 0;
438 }
439
440
441
442 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
443
444 //initialize pointers to stack buffers
445 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
446 int32_t b1Len, b2Len, b1PrimeLen, b3Len,
447 b1Capacity = MAX_LABEL_BUFFER_SIZE,
448 b2Capacity = MAX_LABEL_BUFFER_SIZE,
449 b3Capacity = MAX_LABEL_BUFFER_SIZE,
450 reqLength=0;
451 // UParseError parseError;
452
453 NamePrepTransform* prep = getInstance(*status);
454 b1Len = 0;
455 UBool* caseFlags = NULL;
456
457 //get the options
458 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
459 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
460
461 UBool srcIsASCII = true;
462 UBool srcIsLDH = true;
463 int32_t failPos =0;
464
465 if(U_FAILURE(*status)){
466 goto CLEANUP;
467 }
468 // step 1: find out if all the codepoints in src are ASCII
469 if(srcLength==-1){
470 srcLength = 0;
471 for(;src[srcLength]!=0;){
472 if(src[srcLength]> 0x7f){
473 srcIsASCII = false;
474 }if(prep->isLDHChar(src[srcLength])==false){
475 // here we do not assemble surrogates
476 // since we know that LDH code points
477 // are in the ASCII range only
478 srcIsLDH = false;
479 failPos = srcLength;
480 }
481 srcLength++;
482 }
483 }else{
484 for(int32_t j=0; j<srcLength; j++){
485 if(src[j]> 0x7f){
486 srcIsASCII = false;
487 }else if(prep->isLDHChar(src[j])==false){
488 // here we do not assemble surrogates
489 // since we know that LDH code points
490 // are in the ASCII range only
491 srcIsLDH = false;
492 failPos = j;
493 }
494 }
495 }
496
497 if(srcIsASCII == false){
498 // step 2: process the string
499 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
500 if(*status == U_BUFFER_OVERFLOW_ERROR){
501 // redo processing of string
502 /* we do not have enough room so grow the buffer*/
503 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
504 if(b1==NULL){
505 *status = U_MEMORY_ALLOCATION_ERROR;
506 goto CLEANUP;
507 }
508
509 *status = U_ZERO_ERROR; // reset error
510
511 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
512 }
513 //bail out on error
514 if(U_FAILURE(*status)){
515 goto CLEANUP;
516 }
517 }else{
518
519 // copy everything to b1
520 if(srcLength < b1Capacity){
521 u_memmove(b1, src, srcLength);
522 }else{
523 /* we do not have enough room so grow the buffer*/
524 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
525 if(b1==NULL){
526 *status = U_MEMORY_ALLOCATION_ERROR;
527 goto CLEANUP;
528 }
529 u_memmove(b1, src, srcLength);
530 }
531 b1Len = srcLength;
532 }
533 //step 3: verify ACE Prefix
534 if(startsWithPrefix(src,srcLength)){
535
536 //step 4: Remove the ACE Prefix
537 b1Prime = b1 + ACE_PREFIX_LENGTH;
538 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
539
540 //step 5: Decode using punycode
541 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status);
542 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status);
543
544 if(*status == U_BUFFER_OVERFLOW_ERROR){
545 // redo processing of string
546 /* we do not have enough room so grow the buffer*/
547 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
548 if(b2==NULL){
549 *status = U_MEMORY_ALLOCATION_ERROR;
550 goto CLEANUP;
551 }
552
553 *status = U_ZERO_ERROR; // reset error
554
555 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status);
556 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status);
557 }
558
559
560 //step 6:Apply toASCII
561 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status);
562
563 if(*status == U_BUFFER_OVERFLOW_ERROR){
564 // redo processing of string
565 /* we do not have enough room so grow the buffer*/
566 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
567 if(b3==NULL){
568 *status = U_MEMORY_ALLOCATION_ERROR;
569 goto CLEANUP;
570 }
571
572 *status = U_ZERO_ERROR; // reset error
573
574 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status);
575
576 }
577 //bail out on error
578 if(U_FAILURE(*status)){
579 goto CLEANUP;
580 }
581
582 //step 7: verify
583 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
584 *status = U_IDNA_VERIFICATION_ERROR;
585 goto CLEANUP;
586 }
587
588 //step 8: return output of step 5
589 reqLength = b2Len;
590 if(b2Len <= destCapacity) {
591 u_memmove(dest, b2, b2Len);
592 }
593 }else{
594 // verify that STD3 ASCII rules are satisfied
595 if(useSTD3ASCIIRules == true){
596 if( srcIsLDH == false /* source contains some non-LDH characters */
597 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
598 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
599
600 /* populate the parseError struct */
601 if(srcIsLDH==false){
602 // failPos is always set the index of failure
603 uprv_syntaxError(src,failPos, srcLength,parseError);
604 }else if(src[0] == HYPHEN){
605 // fail position is 0
606 uprv_syntaxError(src,0,srcLength,parseError);
607 }else{
608 // the last index in the source is always length-1
609 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
610 }
611
612 goto CLEANUP;
613 }
614 }
615 //copy the source to destination
616 if(srcLength <= destCapacity){
617 u_memmove(dest, src, srcLength);
618 }
619 reqLength = srcLength;
620 }
621
622 CLEANUP:
623
624 if(b1 != b1Stack){
625 uprv_free(b1);
626 }
627 if(b2 != b2Stack){
628 uprv_free(b2);
629 }
630 uprv_free(caseFlags);
631
632 // The RFC states that
633 // <quote>
634 // ToUnicode never fails. If any step fails, then the original input
635 // is returned immediately in that step.
636 // </quote>
637 // So if any step fails lets copy source to destination
638 if(U_FAILURE(*status)){
639 //copy the source to destination
640 if(dest && srcLength <= destCapacity){
641 if(srcLength == -1) {
642 u_memmove(dest, src, u_strlen(src));
643 } else {
644 u_memmove(dest, src, srcLength);
645 }
646 }
647 reqLength = srcLength;
648 *status = U_ZERO_ERROR;
649 }
650 return u_terminateUChars(dest, destCapacity, reqLength, status);
651 }
652
653
654 static int32_t
getNextSeparator(UChar * src,int32_t srcLength,NamePrepTransform * prep,UChar ** limit,UBool * done,UErrorCode * status)655 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep,
656 UChar **limit,
657 UBool *done,
658 UErrorCode *status){
659 if(srcLength == -1){
660 int32_t i;
661 for(i=0 ; ;i++){
662 if(src[i] == 0){
663 *limit = src + i; // point to null
664 *done = true;
665 return i;
666 }
667 if(prep->isLabelSeparator(src[i],*status)){
668 *limit = src + (i+1); // go past the delimiter
669 return i;
670
671 }
672 }
673 }else{
674 int32_t i;
675 for(i=0;i<srcLength;i++){
676 if(prep->isLabelSeparator(src[i],*status)){
677 *limit = src + (i+1); // go past the delimiter
678 return i;
679 }
680 }
681 // we have not found the delimiter
682 if(i==srcLength){
683 *limit = src+srcLength;
684 *done = true;
685 }
686 return i;
687 }
688 }
689
690 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)691 idnaref_IDNToASCII( const UChar* src, int32_t srcLength,
692 UChar* dest, int32_t destCapacity,
693 int32_t options,
694 UParseError* parseError,
695 UErrorCode* status){
696
697 if(status == NULL || U_FAILURE(*status)){
698 return 0;
699 }
700 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
701 *status = U_ILLEGAL_ARGUMENT_ERROR;
702 return 0;
703 }
704
705 int32_t reqLength = 0;
706 // UParseError parseError;
707
708 NamePrepTransform* prep = getInstance(*status);
709
710 //initialize pointers to stack buffers
711 UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
712 UChar *b1 = b1Stack;
713 int32_t b1Len, labelLen;
714 UChar* delimiter = (UChar*)src;
715 UChar* labelStart = (UChar*)src;
716 int32_t remainingLen = srcLength;
717 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
718
719 //get the options
720 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
721 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
722 UBool done = false;
723
724 if(U_FAILURE(*status)){
725 goto CLEANUP;
726 }
727
728
729 if(srcLength == -1){
730 for(;;){
731
732 if(*delimiter == 0){
733 break;
734 }
735
736 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
737 b1Len = 0;
738 if(!(labelLen==0 && done)){// make sure this is not a root label separator.
739
740 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
741 options, parseError, status);
742
743 if(*status == U_BUFFER_OVERFLOW_ERROR){
744 // redo processing of string
745 /* we do not have enough room so grow the buffer*/
746 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
747 if(b1==NULL){
748 *status = U_MEMORY_ALLOCATION_ERROR;
749 goto CLEANUP;
750 }
751
752 *status = U_ZERO_ERROR; // reset error
753
754 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
755 options, parseError, status);
756
757 }
758 }
759
760 if(U_FAILURE(*status)){
761 goto CLEANUP;
762 }
763 int32_t tempLen = (reqLength + b1Len );
764 // copy to dest
765 if( tempLen< destCapacity){
766 u_memmove(dest+reqLength, b1, b1Len);
767 }
768
769 reqLength = tempLen;
770
771 // add the label separator
772 if(done == false){
773 if(reqLength < destCapacity){
774 dest[reqLength] = FULL_STOP;
775 }
776 reqLength++;
777 }
778
779 labelStart = delimiter;
780 }
781 }else{
782 for(;;){
783
784 if(delimiter == src+srcLength){
785 break;
786 }
787
788 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
789
790 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
791 options,parseError, status);
792
793 if(*status == U_BUFFER_OVERFLOW_ERROR){
794 // redo processing of string
795 /* we do not have enough room so grow the buffer*/
796 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
797 if(b1==NULL){
798 *status = U_MEMORY_ALLOCATION_ERROR;
799 goto CLEANUP;
800 }
801
802 *status = U_ZERO_ERROR; // reset error
803
804 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
805 options, parseError, status);
806
807 }
808
809 if(U_FAILURE(*status)){
810 goto CLEANUP;
811 }
812 int32_t tempLen = (reqLength + b1Len );
813 // copy to dest
814 if( tempLen< destCapacity){
815 u_memmove(dest+reqLength, b1, b1Len);
816 }
817
818 reqLength = tempLen;
819
820 // add the label separator
821 if(done == false){
822 if(reqLength < destCapacity){
823 dest[reqLength] = FULL_STOP;
824 }
825 reqLength++;
826 }
827
828 labelStart = delimiter;
829 remainingLen = static_cast<int32_t>(srcLength - (delimiter - src));
830 }
831 }
832
833
834 CLEANUP:
835
836 if(b1 != b1Stack){
837 uprv_free(b1);
838 }
839
840 // delete prep;
841
842 return u_terminateUChars(dest, destCapacity, reqLength, status);
843 }
844
845 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)846 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength,
847 UChar* dest, int32_t destCapacity,
848 int32_t options,
849 UParseError* parseError,
850 UErrorCode* status){
851
852 if(status == NULL || U_FAILURE(*status)){
853 return 0;
854 }
855 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
856 *status = U_ILLEGAL_ARGUMENT_ERROR;
857 return 0;
858 }
859
860 int32_t reqLength = 0;
861
862 UBool done = false;
863
864 NamePrepTransform* prep = getInstance(*status);
865
866 //initialize pointers to stack buffers
867 UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
868 UChar *b1 = b1Stack;
869 int32_t b1Len, labelLen;
870 UChar* delimiter = (UChar*)src;
871 UChar* labelStart = (UChar*)src;
872 int32_t remainingLen = srcLength;
873 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
874
875 //get the options
876 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
877 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
878
879 if(U_FAILURE(*status)){
880 goto CLEANUP;
881 }
882
883 if(srcLength == -1){
884 for(;;){
885
886 if(*delimiter == 0){
887 break;
888 }
889
890 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
891
892 if(labelLen==0 && done==false){
893 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
894 }
895 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity,
896 options, parseError, status);
897
898 if(*status == U_BUFFER_OVERFLOW_ERROR){
899 // redo processing of string
900 /* we do not have enough room so grow the buffer*/
901 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
902 if(b1==NULL){
903 *status = U_MEMORY_ALLOCATION_ERROR;
904 goto CLEANUP;
905 }
906
907 *status = U_ZERO_ERROR; // reset error
908
909 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
910 options, parseError, status);
911
912 }
913
914 if(U_FAILURE(*status)){
915 goto CLEANUP;
916 }
917 int32_t tempLen = (reqLength + b1Len );
918 // copy to dest
919 if( tempLen< destCapacity){
920 u_memmove(dest+reqLength, b1, b1Len);
921 }
922
923 reqLength = tempLen;
924 // add the label separator
925 if(done == false){
926 if(reqLength < destCapacity){
927 dest[reqLength] = FULL_STOP;
928 }
929 reqLength++;
930 }
931
932 labelStart = delimiter;
933 }
934 }else{
935 for(;;){
936
937 if(delimiter == src+srcLength){
938 break;
939 }
940
941 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
942
943 if(labelLen==0 && done==false){
944 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
945 }
946
947 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity,
948 options, parseError, status);
949
950 if(*status == U_BUFFER_OVERFLOW_ERROR){
951 // redo processing of string
952 /* we do not have enough room so grow the buffer*/
953 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
954 if(b1==NULL){
955 *status = U_MEMORY_ALLOCATION_ERROR;
956 goto CLEANUP;
957 }
958
959 *status = U_ZERO_ERROR; // reset error
960
961 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
962 options, parseError, status);
963
964 }
965
966 if(U_FAILURE(*status)){
967 goto CLEANUP;
968 }
969 int32_t tempLen = (reqLength + b1Len );
970 // copy to dest
971 if( tempLen< destCapacity){
972 u_memmove(dest+reqLength, b1, b1Len);
973 }
974
975 reqLength = tempLen;
976
977 // add the label separator
978 if(done == false){
979 if(reqLength < destCapacity){
980 dest[reqLength] = FULL_STOP;
981 }
982 reqLength++;
983 }
984
985 labelStart = delimiter;
986 remainingLen = static_cast<int32_t>(srcLength - (delimiter - src));
987 }
988 }
989
990 CLEANUP:
991
992 if(b1 != b1Stack){
993 uprv_free(b1);
994 }
995
996 // delete prep;
997
998 return u_terminateUChars(dest, destCapacity, reqLength, status);
999 }
1000
1001 U_CFUNC int32_t U_EXPORT2
idnaref_compare(const UChar * s1,int32_t length1,const UChar * s2,int32_t length2,int32_t options,UErrorCode * status)1002 idnaref_compare( const UChar *s1, int32_t length1,
1003 const UChar *s2, int32_t length2,
1004 int32_t options,
1005 UErrorCode* status){
1006
1007 if(status == NULL || U_FAILURE(*status)){
1008 return -1;
1009 }
1010
1011 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
1012 UChar *b1 = b1Stack, *b2 = b2Stack;
1013 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
1014 int32_t result = -1;
1015
1016 UParseError parseError;
1017
1018 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
1019 if(*status == U_BUFFER_OVERFLOW_ERROR){
1020 // redo processing of string
1021 /* we do not have enough room so grow the buffer*/
1022 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
1023 if(b1==NULL){
1024 *status = U_MEMORY_ALLOCATION_ERROR;
1025 goto CLEANUP;
1026 }
1027
1028 *status = U_ZERO_ERROR; // reset error
1029
1030 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
1031
1032 }
1033
1034 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status);
1035 if(*status == U_BUFFER_OVERFLOW_ERROR){
1036 // redo processing of string
1037 /* we do not have enough room so grow the buffer*/
1038 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
1039 if(b2==NULL){
1040 *status = U_MEMORY_ALLOCATION_ERROR;
1041 goto CLEANUP;
1042 }
1043
1044 *status = U_ZERO_ERROR; // reset error
1045
1046 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status);
1047
1048 }
1049 // when toASCII is applied all label separators are replaced with FULL_STOP
1050 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
1051
1052 CLEANUP:
1053 if(b1 != b1Stack){
1054 uprv_free(b1);
1055 }
1056
1057 if(b2 != b2Stack){
1058 uprv_free(b2);
1059 }
1060
1061 return result;
1062 }
1063 #endif /* #if !UCONFIG_NO_IDNA */
1064