1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2009, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uidna.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA
20
21 #include "unicode/uidna.h"
22 #include "unicode/ustring.h"
23 #include "unicode/usprep.h"
24 #include "punycode.h"
25 #include "ustr_imp.h"
26 #include "cmemory.h"
27 #include "uassert.h"
28 #include "sprpimpl.h"
29
30 /* it is official IDNA ACE Prefix is "xn--" */
31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
32 #define ACE_PREFIX_LENGTH 4
33
34 #define MAX_LABEL_LENGTH 63
35 /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
36 #define MAX_LABEL_BUFFER_SIZE 100
37
38 #define MAX_DOMAIN_NAME_LENGTH 255
39 /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
40 #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1
41
42 #define LOWER_CASE_DELTA 0x0020
43 #define HYPHEN 0x002D
44 #define FULL_STOP 0x002E
45 #define CAPITAL_A 0x0041
46 #define CAPITAL_Z 0x005A
47
48 inline static UChar
toASCIILower(UChar ch)49 toASCIILower(UChar ch){
50 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
51 return ch + LOWER_CASE_DELTA;
52 }
53 return ch;
54 }
55
56 inline static UBool
startsWithPrefix(const UChar * src,int32_t srcLength)57 startsWithPrefix(const UChar* src , int32_t srcLength){
58 UBool startsWithPrefix = TRUE;
59
60 if(srcLength < ACE_PREFIX_LENGTH){
61 return FALSE;
62 }
63
64 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
65 if(toASCIILower(src[i]) != ACE_PREFIX[i]){
66 startsWithPrefix = FALSE;
67 }
68 }
69 return startsWithPrefix;
70 }
71
72
73 inline static int32_t
compareCaseInsensitiveASCII(const UChar * s1,int32_t s1Len,const UChar * s2,int32_t s2Len)74 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
75 const UChar* s2, int32_t s2Len){
76
77 int32_t minLength;
78 int32_t lengthResult;
79
80 // are we comparing different lengths?
81 if(s1Len != s2Len) {
82 if(s1Len < s2Len) {
83 minLength = s1Len;
84 lengthResult = -1;
85 } else {
86 minLength = s2Len;
87 lengthResult = 1;
88 }
89 } else {
90 // ok the lengths are equal
91 minLength = s1Len;
92 lengthResult = 0;
93 }
94
95 UChar c1,c2;
96 int32_t rc;
97
98 for(int32_t i =0;/* no condition */;i++) {
99
100 /* If we reach the ends of both strings then they match */
101 if(i == minLength) {
102 return lengthResult;
103 }
104
105 c1 = s1[i];
106 c2 = s2[i];
107
108 /* Case-insensitive comparison */
109 if(c1!=c2) {
110 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
111 if(rc!=0) {
112 lengthResult=rc;
113 break;
114 }
115 }
116 }
117 return lengthResult;
118 }
119
120
121 /**
122 * Ascertain if the given code point is a label separator as
123 * defined by the IDNA RFC
124 *
125 * @param ch The code point to be ascertained
126 * @return true if the char is a label separator
127 * @stable ICU 2.8
128 */
isLabelSeparator(UChar ch)129 static inline UBool isLabelSeparator(UChar ch){
130 switch(ch){
131 case 0x002e:
132 case 0x3002:
133 case 0xFF0E:
134 case 0xFF61:
135 return TRUE;
136 default:
137 return FALSE;
138 }
139 }
140
141 // returns the length of the label excluding the separator
142 // if *limit == separator then the length returned does not include
143 // the separtor.
144 static inline int32_t
getNextSeparator(UChar * src,int32_t srcLength,UChar ** limit,UBool * done)145 getNextSeparator(UChar *src, int32_t srcLength,
146 UChar **limit, UBool *done){
147 if(srcLength == -1){
148 int32_t i;
149 for(i=0 ; ;i++){
150 if(src[i] == 0){
151 *limit = src + i; // point to null
152 *done = TRUE;
153 return i;
154 }
155 if(isLabelSeparator(src[i])){
156 *limit = src + (i+1); // go past the delimiter
157 return i;
158
159 }
160 }
161 }else{
162 int32_t i;
163 for(i=0;i<srcLength;i++){
164 if(isLabelSeparator(src[i])){
165 *limit = src + (i+1); // go past the delimiter
166 return i;
167 }
168 }
169 // we have not found the delimiter
170 // if(i==srcLength)
171 *limit = src+srcLength;
172 *done = TRUE;
173
174 return i;
175 }
176 }
isLDHChar(UChar ch)177 static inline UBool isLDHChar(UChar ch){
178 // high runner case
179 if(ch>0x007A){
180 return FALSE;
181 }
182 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
183 if( (ch==0x002D) ||
184 (0x0030 <= ch && ch <= 0x0039) ||
185 (0x0041 <= ch && ch <= 0x005A) ||
186 (0x0061 <= ch && ch <= 0x007A)
187 ){
188 return TRUE;
189 }
190 return FALSE;
191 }
192
193 static int32_t
_internal_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UStringPrepProfile * nameprep,UParseError * parseError,UErrorCode * status)194 _internal_toASCII(const UChar* src, int32_t srcLength,
195 UChar* dest, int32_t destCapacity,
196 int32_t options,
197 UStringPrepProfile* nameprep,
198 UParseError* parseError,
199 UErrorCode* status)
200 {
201
202 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
203 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
204 //initialize pointers to stack buffers
205 UChar *b1 = b1Stack, *b2 = b2Stack;
206 int32_t b1Len=0, b2Len,
207 b1Capacity = MAX_LABEL_BUFFER_SIZE,
208 b2Capacity = MAX_LABEL_BUFFER_SIZE ,
209 reqLength=0;
210
211 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
212 UBool* caseFlags = NULL;
213
214 // the source contains all ascii codepoints
215 UBool srcIsASCII = TRUE;
216 // assume the source contains all LDH codepoints
217 UBool srcIsLDH = TRUE;
218
219 int32_t j=0;
220
221 //get the options
222 UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
223
224 int32_t failPos = -1;
225
226 if(srcLength == -1){
227 srcLength = u_strlen(src);
228 }
229
230 if(srcLength > b1Capacity){
231 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
232 if(b1==NULL){
233 *status = U_MEMORY_ALLOCATION_ERROR;
234 goto CLEANUP;
235 }
236 b1Capacity = srcLength;
237 }
238
239 // step 1
240 for( j=0;j<srcLength;j++){
241 if(src[j] > 0x7F){
242 srcIsASCII = FALSE;
243 }
244 b1[b1Len++] = src[j];
245 }
246
247 // step 2 is performed only if the source contains non ASCII
248 if(srcIsASCII == FALSE){
249
250 // step 2
251 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
252
253 if(*status == U_BUFFER_OVERFLOW_ERROR){
254 // redo processing of string
255 // we do not have enough room so grow the buffer
256 if(b1 != b1Stack){
257 uprv_free(b1);
258 }
259 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
260 if(b1==NULL){
261 *status = U_MEMORY_ALLOCATION_ERROR;
262 goto CLEANUP;
263 }
264
265 *status = U_ZERO_ERROR; // reset error
266
267 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
268 }
269 }
270 // error bail out
271 if(U_FAILURE(*status)){
272 goto CLEANUP;
273 }
274 if(b1Len == 0){
275 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
276 goto CLEANUP;
277 }
278
279 // for step 3 & 4
280 srcIsASCII = TRUE;
281 for( j=0;j<b1Len;j++){
282 // check if output of usprep_prepare is all ASCII
283 if(b1[j] > 0x7F){
284 srcIsASCII = FALSE;
285 }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character
286 srcIsLDH = FALSE;
287 failPos = j;
288 }
289 }
290 if(useSTD3ASCIIRules == TRUE){
291 // verify 3a and 3b
292 // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
293 // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
294 // 3(b) Verify the absence of leading and trailing hyphen-minus; that
295 // is, the absence of U+002D at the beginning and end of the
296 // sequence.
297 if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
298 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
299 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
300
301 /* populate the parseError struct */
302 if(srcIsLDH==FALSE){
303 // failPos is always set the index of failure
304 uprv_syntaxError(b1,failPos, b1Len,parseError);
305 }else if(b1[0] == HYPHEN){
306 // fail position is 0
307 uprv_syntaxError(b1,0,b1Len,parseError);
308 }else{
309 // the last index in the source is always length-1
310 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
311 }
312
313 goto CLEANUP;
314 }
315 }
316 // Step 4: if the source is ASCII then proceed to step 8
317 if(srcIsASCII){
318 if(b1Len <= destCapacity){
319 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
320 reqLength = b1Len;
321 }else{
322 reqLength = b1Len;
323 goto CLEANUP;
324 }
325 }else{
326 // step 5 : verify the sequence does not begin with ACE prefix
327 if(!startsWithPrefix(b1,b1Len)){
328
329 //step 6: encode the sequence with punycode
330
331 // do not preserve the case flags for now!
332 // TODO: Preserve the case while implementing the RFE
333 // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
334 // uprv_memset(caseFlags,TRUE,b1Len);
335
336 b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
337
338 if(*status == U_BUFFER_OVERFLOW_ERROR){
339 // redo processing of string
340 /* we do not have enough room so grow the buffer*/
341 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
342 if(b2 == NULL){
343 *status = U_MEMORY_ALLOCATION_ERROR;
344 goto CLEANUP;
345 }
346
347 *status = U_ZERO_ERROR; // reset error
348
349 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
350 }
351 //error bail out
352 if(U_FAILURE(*status)){
353 goto CLEANUP;
354 }
355 // TODO : Reconsider while implementing the case preserve RFE
356 // convert all codepoints to lower case ASCII
357 // toASCIILower(b2,b2Len);
358 reqLength = b2Len+ACE_PREFIX_LENGTH;
359
360 if(reqLength > destCapacity){
361 *status = U_BUFFER_OVERFLOW_ERROR;
362 goto CLEANUP;
363 }
364 //Step 7: prepend the ACE prefix
365 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
366 //Step 6: copy the contents in b2 into dest
367 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
368
369 }else{
370 *status = U_IDNA_ACE_PREFIX_ERROR;
371 //position of failure is 0
372 uprv_syntaxError(b1,0,b1Len,parseError);
373 goto CLEANUP;
374 }
375 }
376 // step 8: verify the length of label
377 if(reqLength > MAX_LABEL_LENGTH){
378 *status = U_IDNA_LABEL_TOO_LONG_ERROR;
379 }
380
381 CLEANUP:
382 if(b1 != b1Stack){
383 uprv_free(b1);
384 }
385 if(b2 != b2Stack){
386 uprv_free(b2);
387 }
388 uprv_free(caseFlags);
389
390 return u_terminateUChars(dest, destCapacity, reqLength, status);
391 }
392
393 static int32_t
_internal_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UStringPrepProfile * nameprep,UParseError * parseError,UErrorCode * status)394 _internal_toUnicode(const UChar* src, int32_t srcLength,
395 UChar* dest, int32_t destCapacity,
396 int32_t options,
397 UStringPrepProfile* nameprep,
398 UParseError* parseError,
399 UErrorCode* status)
400 {
401
402 //get the options
403 //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
404 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
405
406 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
407 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
408
409 //initialize pointers to stack buffers
410 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
411 int32_t b1Len, b2Len, b1PrimeLen, b3Len,
412 b1Capacity = MAX_LABEL_BUFFER_SIZE,
413 b2Capacity = MAX_LABEL_BUFFER_SIZE,
414 b3Capacity = MAX_LABEL_BUFFER_SIZE,
415 reqLength=0;
416
417 b1Len = 0;
418 UBool* caseFlags = NULL;
419
420 UBool srcIsASCII = TRUE;
421 /*UBool srcIsLDH = TRUE;
422 int32_t failPos =0;*/
423
424 // step 1: find out if all the codepoints in src are ASCII
425 if(srcLength==-1){
426 srcLength = 0;
427 for(;src[srcLength]!=0;){
428 if(src[srcLength]> 0x7f){
429 srcIsASCII = FALSE;
430 }/*else if(isLDHChar(src[srcLength])==FALSE){
431 // here we do not assemble surrogates
432 // since we know that LDH code points
433 // are in the ASCII range only
434 srcIsLDH = FALSE;
435 failPos = srcLength;
436 }*/
437 srcLength++;
438 }
439 }else if(srcLength > 0){
440 for(int32_t j=0; j<srcLength; j++){
441 if(src[j]> 0x7f){
442 srcIsASCII = FALSE;
443 }/*else if(isLDHChar(src[j])==FALSE){
444 // here we do not assemble surrogates
445 // since we know that LDH code points
446 // are in the ASCII range only
447 srcIsLDH = FALSE;
448 failPos = j;
449 }*/
450 }
451 }else{
452 return 0;
453 }
454
455 if(srcIsASCII == FALSE){
456 // step 2: process the string
457 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
458 if(*status == U_BUFFER_OVERFLOW_ERROR){
459 // redo processing of string
460 /* we do not have enough room so grow the buffer*/
461 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
462 if(b1==NULL){
463 *status = U_MEMORY_ALLOCATION_ERROR;
464 goto CLEANUP;
465 }
466
467 *status = U_ZERO_ERROR; // reset error
468
469 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
470 }
471 //bail out on error
472 if(U_FAILURE(*status)){
473 goto CLEANUP;
474 }
475 }else{
476
477 //just point src to b1
478 b1 = (UChar*) src;
479 b1Len = srcLength;
480 }
481
482 // The RFC states that
483 // <quote>
484 // ToUnicode never fails. If any step fails, then the original input
485 // is returned immediately in that step.
486 // </quote>
487
488 //step 3: verify ACE Prefix
489 if(startsWithPrefix(b1,b1Len)){
490
491 //step 4: Remove the ACE Prefix
492 b1Prime = b1 + ACE_PREFIX_LENGTH;
493 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
494
495 //step 5: Decode using punycode
496 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
497
498 if(*status == U_BUFFER_OVERFLOW_ERROR){
499 // redo processing of string
500 /* we do not have enough room so grow the buffer*/
501 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
502 if(b2==NULL){
503 *status = U_MEMORY_ALLOCATION_ERROR;
504 goto CLEANUP;
505 }
506
507 *status = U_ZERO_ERROR; // reset error
508
509 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
510 }
511
512
513 //step 6:Apply toASCII
514 b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
515
516 if(*status == U_BUFFER_OVERFLOW_ERROR){
517 // redo processing of string
518 /* we do not have enough room so grow the buffer*/
519 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
520 if(b3==NULL){
521 *status = U_MEMORY_ALLOCATION_ERROR;
522 goto CLEANUP;
523 }
524
525 *status = U_ZERO_ERROR; // reset error
526
527 b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
528
529 }
530 //bail out on error
531 if(U_FAILURE(*status)){
532 goto CLEANUP;
533 }
534
535 //step 7: verify
536 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
537 // Cause the original to be returned.
538 *status = U_IDNA_VERIFICATION_ERROR;
539 goto CLEANUP;
540 }
541
542 //step 8: return output of step 5
543 reqLength = b2Len;
544 if(b2Len <= destCapacity) {
545 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
546 }
547 }
548 else{
549 // See the start of this if statement for why this is commented out.
550 // verify that STD3 ASCII rules are satisfied
551 /*if(useSTD3ASCIIRules == TRUE){
552 if( srcIsLDH == FALSE // source contains some non-LDH characters
553 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
554 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
555
556 // populate the parseError struct
557 if(srcIsLDH==FALSE){
558 // failPos is always set the index of failure
559 uprv_syntaxError(src,failPos, srcLength,parseError);
560 }else if(src[0] == HYPHEN){
561 // fail position is 0
562 uprv_syntaxError(src,0,srcLength,parseError);
563 }else{
564 // the last index in the source is always length-1
565 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
566 }
567
568 goto CLEANUP;
569 }
570 }*/
571 // just return the source
572 //copy the source to destination
573 if(srcLength <= destCapacity){
574 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
575 }
576 reqLength = srcLength;
577 }
578
579
580 CLEANUP:
581
582 if(b1 != b1Stack && b1!=src){
583 uprv_free(b1);
584 }
585 if(b2 != b2Stack){
586 uprv_free(b2);
587 }
588 uprv_free(caseFlags);
589
590 // The RFC states that
591 // <quote>
592 // ToUnicode never fails. If any step fails, then the original input
593 // is returned immediately in that step.
594 // </quote>
595 // So if any step fails lets copy source to destination
596 if(U_FAILURE(*status)){
597 //copy the source to destination
598 if(dest && srcLength <= destCapacity){
599 // srcLength should have already been set earlier.
600 U_ASSERT(srcLength >= 0);
601 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
602 }
603 reqLength = srcLength;
604 *status = U_ZERO_ERROR;
605 }
606
607 return u_terminateUChars(dest, destCapacity, reqLength, status);
608 }
609
610 U_CAPI int32_t U_EXPORT2
uidna_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)611 uidna_toASCII(const UChar* src, int32_t srcLength,
612 UChar* dest, int32_t destCapacity,
613 int32_t options,
614 UParseError* parseError,
615 UErrorCode* status){
616
617 if(status == NULL || U_FAILURE(*status)){
618 return 0;
619 }
620 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
621 *status = U_ILLEGAL_ARGUMENT_ERROR;
622 return 0;
623 }
624
625 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
626
627 if(U_FAILURE(*status)){
628 return -1;
629 }
630
631 int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
632
633 /* close the profile*/
634 usprep_close(nameprep);
635
636 return retLen;
637 }
638
639 U_CAPI int32_t U_EXPORT2
uidna_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)640 uidna_toUnicode(const UChar* src, int32_t srcLength,
641 UChar* dest, int32_t destCapacity,
642 int32_t options,
643 UParseError* parseError,
644 UErrorCode* status){
645
646 if(status == NULL || U_FAILURE(*status)){
647 return 0;
648 }
649 if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
650 *status = U_ILLEGAL_ARGUMENT_ERROR;
651 return 0;
652 }
653
654 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
655
656 if(U_FAILURE(*status)){
657 return -1;
658 }
659
660 int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
661
662 usprep_close(nameprep);
663
664 return retLen;
665 }
666
667
668 U_CAPI int32_t U_EXPORT2
uidna_IDNToASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)669 uidna_IDNToASCII( const UChar *src, int32_t srcLength,
670 UChar* dest, int32_t destCapacity,
671 int32_t options,
672 UParseError *parseError,
673 UErrorCode *status){
674
675 if(status == NULL || U_FAILURE(*status)){
676 return 0;
677 }
678 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
679 *status = U_ILLEGAL_ARGUMENT_ERROR;
680 return 0;
681 }
682
683 int32_t reqLength = 0;
684
685 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
686
687 if(U_FAILURE(*status)){
688 return 0;
689 }
690
691 //initialize pointers
692 UChar *delimiter = (UChar*)src;
693 UChar *labelStart = (UChar*)src;
694 UChar *currentDest = (UChar*) dest;
695 int32_t remainingLen = srcLength;
696 int32_t remainingDestCapacity = destCapacity;
697 int32_t labelLen = 0, labelReqLength = 0;
698 UBool done = FALSE;
699
700
701 for(;;){
702
703 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
704 labelReqLength = 0;
705 if(!(labelLen==0 && done)){// make sure this is not a root label separator.
706
707 labelReqLength = _internal_toASCII( labelStart, labelLen,
708 currentDest, remainingDestCapacity,
709 options, nameprep,
710 parseError, status);
711
712 if(*status == U_BUFFER_OVERFLOW_ERROR){
713
714 *status = U_ZERO_ERROR; // reset error
715 remainingDestCapacity = 0;
716 }
717 }
718
719
720 if(U_FAILURE(*status)){
721 break;
722 }
723
724 reqLength +=labelReqLength;
725 // adjust the destination pointer
726 if(labelReqLength < remainingDestCapacity){
727 currentDest = currentDest + labelReqLength;
728 remainingDestCapacity -= labelReqLength;
729 }else{
730 // should never occur
731 remainingDestCapacity = 0;
732 }
733
734 if(done == TRUE){
735 break;
736 }
737
738 // add the label separator
739 if(remainingDestCapacity > 0){
740 *currentDest++ = FULL_STOP;
741 remainingDestCapacity--;
742 }
743 reqLength++;
744
745 labelStart = delimiter;
746 if(remainingLen >0 ){
747 remainingLen = (int32_t)(srcLength - (delimiter - src));
748 }
749
750 }
751
752 if(reqLength > MAX_DOMAIN_NAME_LENGTH){
753 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
754 }
755
756 usprep_close(nameprep);
757
758 return u_terminateUChars(dest, destCapacity, reqLength, status);
759 }
760
761 U_CAPI int32_t U_EXPORT2
uidna_IDNToUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)762 uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
763 UChar* dest, int32_t destCapacity,
764 int32_t options,
765 UParseError* parseError,
766 UErrorCode* status){
767
768 if(status == NULL || U_FAILURE(*status)){
769 return 0;
770 }
771 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
772 *status = U_ILLEGAL_ARGUMENT_ERROR;
773 return 0;
774 }
775
776 int32_t reqLength = 0;
777
778 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
779
780 if(U_FAILURE(*status)){
781 return 0;
782 }
783
784 //initialize pointers
785 UChar *delimiter = (UChar*)src;
786 UChar *labelStart = (UChar*)src;
787 UChar *currentDest = (UChar*) dest;
788 int32_t remainingLen = srcLength;
789 int32_t remainingDestCapacity = destCapacity;
790 int32_t labelLen = 0, labelReqLength = 0;
791 UBool done = FALSE;
792
793 for(;;){
794
795 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
796
797 // The RFC states that
798 // <quote>
799 // ToUnicode never fails. If any step fails, then the original input
800 // is returned immediately in that step.
801 // </quote>
802 // _internal_toUnicode will copy the label.
803 /*if(labelLen==0 && done==FALSE){
804 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
805 break;
806 }*/
807
808 labelReqLength = _internal_toUnicode(labelStart, labelLen,
809 currentDest, remainingDestCapacity,
810 options, nameprep,
811 parseError, status);
812
813 if(*status == U_BUFFER_OVERFLOW_ERROR){
814 *status = U_ZERO_ERROR; // reset error
815 remainingDestCapacity = 0;
816 }
817
818 if(U_FAILURE(*status)){
819 break;
820 }
821
822 reqLength +=labelReqLength;
823 // adjust the destination pointer
824 if(labelReqLength < remainingDestCapacity){
825 currentDest = currentDest + labelReqLength;
826 remainingDestCapacity -= labelReqLength;
827 }else{
828 // should never occur
829 remainingDestCapacity = 0;
830 }
831
832 if(done == TRUE){
833 break;
834 }
835
836 // add the label separator
837 // Unlike the ToASCII operation we don't normalize the label separators
838 if(remainingDestCapacity > 0){
839 *currentDest++ = *(labelStart + labelLen);
840 remainingDestCapacity--;
841 }
842 reqLength++;
843
844 labelStart = delimiter;
845 if(remainingLen >0 ){
846 remainingLen = (int32_t)(srcLength - (delimiter - src));
847 }
848
849 }
850
851 if(reqLength > MAX_DOMAIN_NAME_LENGTH){
852 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
853 }
854
855 usprep_close(nameprep);
856
857 return u_terminateUChars(dest, destCapacity, reqLength, status);
858 }
859
860 U_CAPI int32_t U_EXPORT2
uidna_compare(const UChar * s1,int32_t length1,const UChar * s2,int32_t length2,int32_t options,UErrorCode * status)861 uidna_compare( const UChar *s1, int32_t length1,
862 const UChar *s2, int32_t length2,
863 int32_t options,
864 UErrorCode* status){
865
866 if(status == NULL || U_FAILURE(*status)){
867 return -1;
868 }
869
870 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
871 UChar *b1 = b1Stack, *b2 = b2Stack;
872 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
873 int32_t result=-1;
874
875 UParseError parseError;
876
877 b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
878 if(*status == U_BUFFER_OVERFLOW_ERROR){
879 // redo processing of string
880 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
881 if(b1==NULL){
882 *status = U_MEMORY_ALLOCATION_ERROR;
883 goto CLEANUP;
884 }
885
886 *status = U_ZERO_ERROR; // reset error
887
888 b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
889
890 }
891
892 b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
893 if(*status == U_BUFFER_OVERFLOW_ERROR){
894 // redo processing of string
895 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
896 if(b2==NULL){
897 *status = U_MEMORY_ALLOCATION_ERROR;
898 goto CLEANUP;
899 }
900
901 *status = U_ZERO_ERROR; // reset error
902
903 b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
904
905 }
906 // when toASCII is applied all label separators are replaced with FULL_STOP
907 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
908
909 CLEANUP:
910 if(b1 != b1Stack){
911 uprv_free(b1);
912 }
913
914 if(b2 != b2Stack){
915 uprv_free(b2);
916 }
917
918 return result;
919 }
920
921 #endif /* #if !UCONFIG_NO_IDNA */
922