• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2011, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  idnaref.cpp
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003feb1
16  *   created by: Ram Viswanadha
17  */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
22 #include "idnaref.h"
23 #include "punyref.h"
24 #include "ustr_imp.h"
25 #include "cmemory.h"
26 #include "sprpimpl.h"
27 #include "nptrans.h"
28 #include "testidna.h"
29 #include "punycode.h"
30 #include "unicode/ustring.h"
31 
32 /* it is official IDNA ACE Prefix is "xn--" */
33 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
34 #define ACE_PREFIX_LENGTH 4
35 
36 #define MAX_LABEL_LENGTH 63
37 #define HYPHEN      0x002D
38 /* The Max length of the labels should not be more than 64 */
39 #define MAX_LABEL_BUFFER_SIZE 100
40 #define MAX_IDN_BUFFER_SIZE   300
41 
42 #define CAPITAL_A        0x0041
43 #define CAPITAL_Z        0x005A
44 #define LOWER_CASE_DELTA 0x0020
45 #define FULL_STOP        0x002E
46 
47 
48 inline static UBool
startsWithPrefix(const UChar * src,int32_t srcLength)49 startsWithPrefix(const UChar* src , int32_t srcLength){
50     UBool startsWithPrefix = true;
51 
52     if(srcLength < ACE_PREFIX_LENGTH){
53         return false;
54     }
55 
56     for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
57         if(u_tolower(src[i]) != ACE_PREFIX[i]){
58             startsWithPrefix = false;
59         }
60     }
61     return startsWithPrefix;
62 }
63 
64 inline static UChar
toASCIILower(UChar ch)65 toASCIILower(UChar ch){
66     if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
67         return ch + LOWER_CASE_DELTA;
68     }
69     return ch;
70 }
71 
72 inline static int32_t
compareCaseInsensitiveASCII(const UChar * s1,int32_t s1Len,const UChar * s2,int32_t s2Len)73 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
74                             const UChar* s2, int32_t s2Len){
75     if(s1Len != s2Len){
76         return (s1Len > s2Len) ? s1Len : s2Len;
77     }
78     UChar c1,c2;
79     int32_t rc;
80 
81     for(int32_t i =0;/* no condition */;i++) {
82         /* If we reach the ends of both strings then they match */
83         if(i == s1Len) {
84             return 0;
85         }
86 
87         c1 = s1[i];
88         c2 = s2[i];
89 
90         /* Case-insensitive comparison */
91         if(c1!=c2) {
92             rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
93             if(rc!=0) {
94                 return rc;
95             }
96         }
97     }
98 
99 }
100 
getError(enum punycode_status status)101 static UErrorCode getError(enum punycode_status status){
102     switch(status){
103     case punycode_success:
104         return U_ZERO_ERROR;
105     case punycode_bad_input:   /* Input is invalid.                         */
106         return U_INVALID_CHAR_FOUND;
107     case punycode_big_output:  /* Output would exceed the space provided.   */
108         return U_BUFFER_OVERFLOW_ERROR;
109     case punycode_overflow :    /* Input requires wider integers to process. */
110         return U_INDEX_OUTOFBOUNDS_ERROR;
111     default:
112         return U_INTERNAL_PROGRAM_ERROR;
113     }
114 }
115 
convertASCIIToUChars(const char * src,UChar * dest,int32_t length)116 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){
117     int i;
118     for(i=0;i<length;i++){
119         dest[i] = src[i];
120     }
121     return i;
122 }
convertUCharsToASCII(const UChar * src,char * dest,int32_t length)123 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){
124     int i;
125     for(i=0;i<length;i++){
126         dest[i] = (char)src[i];
127     }
128     return i;
129 }
130 // wrapper around the reference Punycode implementation
convertToPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)131 static int32_t convertToPuny(const UChar* src, int32_t srcLength,
132                              UChar* dest, int32_t destCapacity,
133                              UErrorCode& status){
134     uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
135     int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
136     uint32_t* b1 = b1Stack;
137     char b2Stack[MAX_LABEL_BUFFER_SIZE];
138     char* b2 = b2Stack;
139     int32_t b2Len =MAX_LABEL_BUFFER_SIZE ;
140     punycode_status error;
141     unsigned char* caseFlags = NULL;
142 
143     u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status);
144     if(status == U_BUFFER_OVERFLOW_ERROR){
145         // redo processing of string
146         /* we do not have enough room so grow the buffer*/
147         b1 =  (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t));
148         if(b1==NULL){
149             status = U_MEMORY_ALLOCATION_ERROR;
150             goto CLEANUP;
151         }
152 
153         status = U_ZERO_ERROR; // reset error
154 
155         u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
156     }
157     if(U_FAILURE(status)){
158         goto CLEANUP;
159     }
160 
161     //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char));
162 
163     error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
164     status = getError(error);
165 
166     if(status == U_BUFFER_OVERFLOW_ERROR){
167         /* we do not have enough room so grow the buffer*/
168         b2 = (char*) uprv_malloc( b2Len * sizeof(char));
169         if(b2==NULL){
170             status = U_MEMORY_ALLOCATION_ERROR;
171             goto CLEANUP;
172         }
173 
174         status = U_ZERO_ERROR; // reset error
175 
176         punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
177         status = getError(error);
178     }
179     if(U_FAILURE(status)){
180         goto CLEANUP;
181     }
182 
183     if(b2Len < destCapacity){
184           convertASCIIToUChars(b2,dest,b2Len);
185     }else{
186         status =U_BUFFER_OVERFLOW_ERROR;
187     }
188 
189 CLEANUP:
190     if(b1Stack != b1){
191         uprv_free(b1);
192     }
193     if(b2Stack != b2){
194         uprv_free(b2);
195     }
196     uprv_free(caseFlags);
197 
198     return b2Len;
199 }
200 
201 
getInstance(UErrorCode & status)202 static NamePrepTransform* getInstance(UErrorCode& status){
203     TestIDNA *thisTest = dynamic_cast<TestIDNA *>(IntlTest::gTest);
204     if (thisTest == nullptr && U_SUCCESS(status)) {
205         status = U_INTERNAL_PROGRAM_ERROR;
206     }
207     if (U_FAILURE(status)) return nullptr;
208     return thisTest->getInstance(status);
209 }
210 
211 
convertFromPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)212 static int32_t convertFromPuny(  const UChar* src, int32_t srcLength,
213                                  UChar* dest, int32_t destCapacity,
214                                  UErrorCode& status){
215     char b1Stack[MAX_LABEL_BUFFER_SIZE];
216     char* b1 = b1Stack;
217     int32_t destLen =0;
218 
219     convertUCharsToASCII(src, b1,srcLength);
220 
221     uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
222     uint32_t* b2 = b2Stack;
223     int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
224     unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
225     punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
226     status = getError(error);
227     if(status == U_BUFFER_OVERFLOW_ERROR){
228         b2 =  (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
229         if(b2 == NULL){
230             status = U_MEMORY_ALLOCATION_ERROR;
231             goto CLEANUP;
232         }
233         error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
234         status = getError(error);
235     }
236 
237     if(U_FAILURE(status)){
238         goto CLEANUP;
239     }
240 
241     u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);
242 
243 CLEANUP:
244     if(b1Stack != b1){
245         uprv_free(b1);
246     }
247     if(b2Stack != b2){
248         uprv_free(b2);
249     }
250     uprv_free(caseFlags);
251 
252     return destLen;
253 }
254 
255 
256 U_CFUNC int32_t U_EXPORT2
idnaref_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)257 idnaref_toASCII(const UChar* src, int32_t srcLength,
258               UChar* dest, int32_t destCapacity,
259               int32_t options,
260               UParseError* parseError,
261               UErrorCode* status){
262 
263     if(status == NULL || U_FAILURE(*status)){
264         return 0;
265     }
266     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
267         *status = U_ILLEGAL_ARGUMENT_ERROR;
268         return 0;
269     }
270     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
271     //initialize pointers to stack buffers
272     UChar  *b1 = b1Stack, *b2 = b2Stack;
273     int32_t b1Len=0, b2Len=0,
274             b1Capacity = MAX_LABEL_BUFFER_SIZE,
275             b2Capacity = MAX_LABEL_BUFFER_SIZE ,
276             reqLength=0;
277 
278     //get the options
279     UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
280     UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
281 
282     UBool* caseFlags = NULL;
283 
284     // assume the source contains all ascii codepoints
285     UBool srcIsASCII  = true;
286     // assume the source contains all LDH codepoints
287     UBool srcIsLDH = true;
288     int32_t j=0;
289 
290     if(srcLength == -1){
291         srcLength = u_strlen(src);
292     }
293 
294     // step 1
295     for( j=0;j<srcLength;j++){
296         if(src[j] > 0x7F){
297             srcIsASCII = false;
298         }
299         b1[b1Len++] = src[j];
300     }
301 
302     NamePrepTransform* prep = getInstance(*status);
303     if(U_FAILURE(*status)){
304         goto CLEANUP;
305     }
306 
307     // step 2 is performed only if the source contains non ASCII
308     if (!srcIsASCII) {
309         b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status);
310 
311         if(*status == U_BUFFER_OVERFLOW_ERROR){
312             // redo processing of string
313             /* we do not have enough room so grow the buffer*/
314             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
315             if(b1==NULL){
316                 *status = U_MEMORY_ALLOCATION_ERROR;
317                 goto CLEANUP;
318             }
319 
320             *status = U_ZERO_ERROR; // reset error
321 
322             b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
323         }
324         // error bail out
325         if(U_FAILURE(*status)){
326             goto CLEANUP;
327         }
328     }
329 
330     if(b1Len == 0){
331         *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
332         goto CLEANUP;
333     }
334 
335     srcIsASCII = true;
336     // step 3 & 4
337     for( j=0;j<b1Len;j++){
338         if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII
339             srcIsASCII = false;
340         }else if(prep->isLDHChar(b1[j])==false){  // if the char is in ASCII range verify that it is an LDH character{
341             srcIsLDH = false;
342         }
343     }
344 
345     if(useSTD3ASCIIRules == true){
346         // verify 3a and 3b
347         if( srcIsLDH == false /* source contains some non-LDH characters */
348             || b1[0] ==  HYPHEN || b1[b1Len-1] == HYPHEN){
349             *status = U_IDNA_STD3_ASCII_RULES_ERROR;
350             goto CLEANUP;
351         }
352     }
353     if(srcIsASCII){
354         if(b1Len <= destCapacity){
355             u_memmove(dest, b1, b1Len);
356             reqLength = b1Len;
357         }else{
358             reqLength = b1Len;
359             goto CLEANUP;
360         }
361     }else{
362         // step 5 : verify the sequence does not begin with ACE prefix
363         if(!startsWithPrefix(b1,b1Len)){
364 
365             //step 6: encode the sequence with punycode
366             //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
367 
368             b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status);
369             //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status);
370             if(*status == U_BUFFER_OVERFLOW_ERROR){
371                 // redo processing of string
372                 /* we do not have enough room so grow the buffer*/
373                 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
374                 if(b2 == NULL){
375                     *status = U_MEMORY_ALLOCATION_ERROR;
376                     goto CLEANUP;
377                 }
378 
379                 *status = U_ZERO_ERROR; // reset error
380 
381                 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status);
382                 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status);
383 
384             }
385             //error bail out
386             if(U_FAILURE(*status)){
387                 goto CLEANUP;
388             }
389             reqLength = b2Len+ACE_PREFIX_LENGTH;
390 
391             if(reqLength > destCapacity){
392                 *status = U_BUFFER_OVERFLOW_ERROR;
393                 goto CLEANUP;
394             }
395             //Step 7: prepend the ACE prefix
396             u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH);
397             //Step 6: copy the contents in b2 into dest
398             u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
399 
400         }else{
401             *status = U_IDNA_ACE_PREFIX_ERROR;
402             goto CLEANUP;
403         }
404     }
405 
406     if(reqLength > MAX_LABEL_LENGTH){
407         *status = U_IDNA_LABEL_TOO_LONG_ERROR;
408     }
409 
410 CLEANUP:
411     if(b1 != b1Stack){
412         uprv_free(b1);
413     }
414     if(b2 != b2Stack){
415         uprv_free(b2);
416     }
417     uprv_free(caseFlags);
418 
419 //    delete prep;
420 
421     return u_terminateUChars(dest, destCapacity, reqLength, status);
422 }
423 
424 
425 U_CFUNC int32_t U_EXPORT2
idnaref_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)426 idnaref_toUnicode(const UChar* src, int32_t srcLength,
427                 UChar* dest, int32_t destCapacity,
428                 int32_t options,
429                 UParseError* parseError,
430                 UErrorCode* status){
431 
432     if(status == NULL || U_FAILURE(*status)){
433         return 0;
434     }
435     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
436         *status = U_ILLEGAL_ARGUMENT_ERROR;
437         return 0;
438     }
439 
440 
441 
442     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
443 
444     //initialize pointers to stack buffers
445     UChar  *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
446     int32_t b1Len, b2Len, b1PrimeLen, b3Len,
447             b1Capacity = MAX_LABEL_BUFFER_SIZE,
448             b2Capacity = MAX_LABEL_BUFFER_SIZE,
449             b3Capacity = MAX_LABEL_BUFFER_SIZE,
450             reqLength=0;
451 //    UParseError parseError;
452 
453     NamePrepTransform* prep = getInstance(*status);
454     b1Len = 0;
455     UBool* caseFlags = NULL;
456 
457     //get the options
458     UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
459     UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
460 
461     UBool srcIsASCII = true;
462     UBool srcIsLDH = true;
463     int32_t failPos =0;
464 
465     if(U_FAILURE(*status)){
466         goto CLEANUP;
467     }
468     // step 1: find out if all the codepoints in src are ASCII
469     if(srcLength==-1){
470         srcLength = 0;
471         for(;src[srcLength]!=0;){
472             if(src[srcLength]> 0x7f){
473                 srcIsASCII = false;
474             }if(prep->isLDHChar(src[srcLength])==false){
475                 // here we do not assemble surrogates
476                 // since we know that LDH code points
477                 // are in the ASCII range only
478                 srcIsLDH = false;
479                 failPos = srcLength;
480             }
481             srcLength++;
482         }
483     }else{
484         for(int32_t j=0; j<srcLength; j++){
485             if(src[j]> 0x7f){
486                 srcIsASCII = false;
487             }else if(prep->isLDHChar(src[j])==false){
488                 // here we do not assemble surrogates
489                 // since we know that LDH code points
490                 // are in the ASCII range only
491                 srcIsLDH = false;
492                 failPos = j;
493             }
494         }
495     }
496 
497     if(srcIsASCII == false){
498         // step 2: process the string
499         b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
500         if(*status == U_BUFFER_OVERFLOW_ERROR){
501             // redo processing of string
502             /* we do not have enough room so grow the buffer*/
503             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
504             if(b1==NULL){
505                 *status = U_MEMORY_ALLOCATION_ERROR;
506                 goto CLEANUP;
507             }
508 
509             *status = U_ZERO_ERROR; // reset error
510 
511             b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
512         }
513         //bail out on error
514         if(U_FAILURE(*status)){
515             goto CLEANUP;
516         }
517     }else{
518 
519         // copy everything to b1
520         if(srcLength < b1Capacity){
521             u_memmove(b1, src, srcLength);
522         }else{
523             /* we do not have enough room so grow the buffer*/
524             b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
525             if(b1==NULL){
526                 *status = U_MEMORY_ALLOCATION_ERROR;
527                 goto CLEANUP;
528             }
529             u_memmove(b1, src, srcLength);
530         }
531         b1Len = srcLength;
532     }
533     //step 3: verify ACE Prefix
534     if(startsWithPrefix(src,srcLength)){
535 
536         //step 4: Remove the ACE Prefix
537         b1Prime = b1 + ACE_PREFIX_LENGTH;
538         b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
539 
540         //step 5: Decode using punycode
541         b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status);
542         //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status);
543 
544         if(*status == U_BUFFER_OVERFLOW_ERROR){
545             // redo processing of string
546             /* we do not have enough room so grow the buffer*/
547             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
548             if(b2==NULL){
549                 *status = U_MEMORY_ALLOCATION_ERROR;
550                 goto CLEANUP;
551             }
552 
553             *status = U_ZERO_ERROR; // reset error
554 
555             b2Len =  convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status);
556             //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status);
557         }
558 
559 
560         //step 6:Apply toASCII
561         b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status);
562 
563         if(*status == U_BUFFER_OVERFLOW_ERROR){
564             // redo processing of string
565             /* we do not have enough room so grow the buffer*/
566             b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
567             if(b3==NULL){
568                 *status = U_MEMORY_ALLOCATION_ERROR;
569                 goto CLEANUP;
570             }
571 
572             *status = U_ZERO_ERROR; // reset error
573 
574             b3Len =  idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status);
575 
576         }
577         //bail out on error
578         if(U_FAILURE(*status)){
579             goto CLEANUP;
580         }
581 
582         //step 7: verify
583         if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
584             *status = U_IDNA_VERIFICATION_ERROR;
585             goto CLEANUP;
586         }
587 
588         //step 8: return output of step 5
589         reqLength = b2Len;
590         if(b2Len <= destCapacity) {
591             u_memmove(dest, b2, b2Len);
592         }
593     }else{
594         // verify that STD3 ASCII rules are satisfied
595         if(useSTD3ASCIIRules == true){
596             if( srcIsLDH == false /* source contains some non-LDH characters */
597                 || src[0] ==  HYPHEN || src[srcLength-1] == HYPHEN){
598                 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
599 
600                 /* populate the parseError struct */
601                 if(srcIsLDH==false){
602                     // failPos is always set the index of failure
603                     uprv_syntaxError(src,failPos, srcLength,parseError);
604                 }else if(src[0] == HYPHEN){
605                     // fail position is 0
606                     uprv_syntaxError(src,0,srcLength,parseError);
607                 }else{
608                     // the last index in the source is always length-1
609                     uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
610                 }
611 
612                 goto CLEANUP;
613             }
614         }
615         //copy the source to destination
616         if(srcLength <= destCapacity){
617             u_memmove(dest, src, srcLength);
618         }
619         reqLength = srcLength;
620     }
621 
622 CLEANUP:
623 
624     if(b1 != b1Stack){
625         uprv_free(b1);
626     }
627     if(b2 != b2Stack){
628         uprv_free(b2);
629     }
630     uprv_free(caseFlags);
631 
632     // The RFC states that
633     // <quote>
634     // ToUnicode never fails. If any step fails, then the original input
635     // is returned immediately in that step.
636     // </quote>
637     // So if any step fails lets copy source to destination
638     if(U_FAILURE(*status)){
639         //copy the source to destination
640         if(dest && srcLength <= destCapacity){
641           if(srcLength == -1) {
642             u_memmove(dest, src, u_strlen(src));
643           } else {
644             u_memmove(dest, src, srcLength);
645           }
646         }
647         reqLength = srcLength;
648         *status = U_ZERO_ERROR;
649     }
650     return u_terminateUChars(dest, destCapacity, reqLength, status);
651 }
652 
653 
654 static int32_t
getNextSeparator(UChar * src,int32_t srcLength,NamePrepTransform * prep,UChar ** limit,UBool * done,UErrorCode * status)655 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep,
656                  UChar **limit,
657                  UBool *done,
658                  UErrorCode *status){
659     if(srcLength == -1){
660         int32_t i;
661         for(i=0 ; ;i++){
662             if(src[i] == 0){
663                 *limit = src + i; // point to null
664                 *done = true;
665                 return i;
666             }
667             if(prep->isLabelSeparator(src[i],*status)){
668                 *limit = src + (i+1); // go past the delimiter
669                 return i;
670 
671             }
672         }
673     }else{
674         int32_t i;
675         for(i=0;i<srcLength;i++){
676             if(prep->isLabelSeparator(src[i],*status)){
677                 *limit = src + (i+1); // go past the delimiter
678                 return i;
679             }
680         }
681         // we have not found the delimiter
682         if(i==srcLength){
683             *limit = src+srcLength;
684             *done = true;
685         }
686         return i;
687     }
688 }
689 
690 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)691 idnaref_IDNToASCII(  const UChar* src, int32_t srcLength,
692                    UChar* dest, int32_t destCapacity,
693                    int32_t options,
694                    UParseError* parseError,
695                    UErrorCode* status){
696 
697     if(status == NULL || U_FAILURE(*status)){
698         return 0;
699     }
700     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
701         *status = U_ILLEGAL_ARGUMENT_ERROR;
702         return 0;
703     }
704 
705     int32_t reqLength = 0;
706 //    UParseError parseError;
707 
708     NamePrepTransform* prep = getInstance(*status);
709 
710     //initialize pointers to stack buffers
711     UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
712     UChar  *b1 = b1Stack;
713     int32_t b1Len, labelLen;
714     UChar* delimiter = (UChar*)src;
715     UChar* labelStart = (UChar*)src;
716     int32_t remainingLen = srcLength;
717     int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
718 
719     //get the options
720 //    UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
721 //    UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
722     UBool done = false;
723 
724     if(U_FAILURE(*status)){
725         goto CLEANUP;
726     }
727 
728 
729     if(srcLength == -1){
730         for(;;){
731 
732             if(*delimiter == 0){
733                 break;
734             }
735 
736             labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
737             b1Len = 0;
738             if(!(labelLen==0 && done)){// make sure this is not a root label separator.
739 
740                 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
741                                         options, parseError, status);
742 
743                 if(*status == U_BUFFER_OVERFLOW_ERROR){
744                     // redo processing of string
745                     /* we do not have enough room so grow the buffer*/
746                     b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
747                     if(b1==NULL){
748                         *status = U_MEMORY_ALLOCATION_ERROR;
749                         goto CLEANUP;
750                     }
751 
752                     *status = U_ZERO_ERROR; // reset error
753 
754                     b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
755                                             options, parseError, status);
756 
757                 }
758             }
759 
760             if(U_FAILURE(*status)){
761                 goto CLEANUP;
762             }
763             int32_t tempLen = (reqLength + b1Len );
764             // copy to dest
765             if( tempLen< destCapacity){
766                 u_memmove(dest+reqLength, b1, b1Len);
767             }
768 
769             reqLength = tempLen;
770 
771             // add the label separator
772             if(done == false){
773                 if(reqLength < destCapacity){
774                     dest[reqLength] = FULL_STOP;
775                 }
776                 reqLength++;
777             }
778 
779             labelStart = delimiter;
780         }
781     }else{
782         for(;;){
783 
784             if(delimiter == src+srcLength){
785                 break;
786             }
787 
788             labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
789 
790             b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
791                                     options,parseError, status);
792 
793             if(*status == U_BUFFER_OVERFLOW_ERROR){
794                 // redo processing of string
795                 /* we do not have enough room so grow the buffer*/
796                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
797                 if(b1==NULL){
798                     *status = U_MEMORY_ALLOCATION_ERROR;
799                     goto CLEANUP;
800                 }
801 
802                 *status = U_ZERO_ERROR; // reset error
803 
804                 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
805                                         options, parseError, status);
806 
807             }
808 
809             if(U_FAILURE(*status)){
810                 goto CLEANUP;
811             }
812             int32_t tempLen = (reqLength + b1Len );
813             // copy to dest
814             if( tempLen< destCapacity){
815                 u_memmove(dest+reqLength, b1, b1Len);
816             }
817 
818             reqLength = tempLen;
819 
820             // add the label separator
821             if(done == false){
822                 if(reqLength < destCapacity){
823                     dest[reqLength] = FULL_STOP;
824                 }
825                 reqLength++;
826             }
827 
828             labelStart = delimiter;
829             remainingLen = static_cast<int32_t>(srcLength - (delimiter - src));
830         }
831     }
832 
833 
834 CLEANUP:
835 
836     if(b1 != b1Stack){
837         uprv_free(b1);
838     }
839 
840 //   delete prep;
841 
842     return u_terminateUChars(dest, destCapacity, reqLength, status);
843 }
844 
845 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)846 idnaref_IDNToUnicode(  const UChar* src, int32_t srcLength,
847                      UChar* dest, int32_t destCapacity,
848                      int32_t options,
849                      UParseError* parseError,
850                      UErrorCode* status){
851 
852     if(status == NULL || U_FAILURE(*status)){
853         return 0;
854     }
855     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
856         *status = U_ILLEGAL_ARGUMENT_ERROR;
857         return 0;
858     }
859 
860     int32_t reqLength = 0;
861 
862     UBool done = false;
863 
864     NamePrepTransform* prep = getInstance(*status);
865 
866     //initialize pointers to stack buffers
867     UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
868     UChar  *b1 = b1Stack;
869     int32_t b1Len, labelLen;
870     UChar* delimiter = (UChar*)src;
871     UChar* labelStart = (UChar*)src;
872     int32_t remainingLen = srcLength;
873     int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
874 
875     //get the options
876 //    UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
877 //    UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
878 
879     if(U_FAILURE(*status)){
880         goto CLEANUP;
881     }
882 
883     if(srcLength == -1){
884         for(;;){
885 
886             if(*delimiter == 0){
887                 break;
888             }
889 
890             labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
891 
892            if(labelLen==0 && done==false){
893                 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
894             }
895             b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity,
896                                       options, parseError, status);
897 
898             if(*status == U_BUFFER_OVERFLOW_ERROR){
899                 // redo processing of string
900                 /* we do not have enough room so grow the buffer*/
901                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
902                 if(b1==NULL){
903                     *status = U_MEMORY_ALLOCATION_ERROR;
904                     goto CLEANUP;
905                 }
906 
907                 *status = U_ZERO_ERROR; // reset error
908 
909                 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
910                                            options, parseError, status);
911 
912             }
913 
914             if(U_FAILURE(*status)){
915                 goto CLEANUP;
916             }
917             int32_t tempLen = (reqLength + b1Len );
918             // copy to dest
919             if( tempLen< destCapacity){
920                 u_memmove(dest+reqLength, b1, b1Len);
921             }
922 
923             reqLength = tempLen;
924             // add the label separator
925             if(done == false){
926                 if(reqLength < destCapacity){
927                     dest[reqLength] = FULL_STOP;
928                 }
929                 reqLength++;
930             }
931 
932             labelStart = delimiter;
933         }
934     }else{
935         for(;;){
936 
937             if(delimiter == src+srcLength){
938                 break;
939             }
940 
941             labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
942 
943             if(labelLen==0 && done==false){
944                 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
945             }
946 
947             b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity,
948                                        options, parseError, status);
949 
950             if(*status == U_BUFFER_OVERFLOW_ERROR){
951                 // redo processing of string
952                 /* we do not have enough room so grow the buffer*/
953                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
954                 if(b1==NULL){
955                     *status = U_MEMORY_ALLOCATION_ERROR;
956                     goto CLEANUP;
957                 }
958 
959                 *status = U_ZERO_ERROR; // reset error
960 
961                 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
962                                            options, parseError, status);
963 
964             }
965 
966             if(U_FAILURE(*status)){
967                 goto CLEANUP;
968             }
969             int32_t tempLen = (reqLength + b1Len );
970             // copy to dest
971             if( tempLen< destCapacity){
972                 u_memmove(dest+reqLength, b1, b1Len);
973             }
974 
975             reqLength = tempLen;
976 
977             // add the label separator
978             if(done == false){
979                 if(reqLength < destCapacity){
980                     dest[reqLength] = FULL_STOP;
981                 }
982                 reqLength++;
983             }
984 
985             labelStart = delimiter;
986             remainingLen = static_cast<int32_t>(srcLength - (delimiter - src));
987         }
988     }
989 
990 CLEANUP:
991 
992     if(b1 != b1Stack){
993         uprv_free(b1);
994     }
995 
996 //    delete prep;
997 
998     return u_terminateUChars(dest, destCapacity, reqLength, status);
999 }
1000 
1001 U_CFUNC int32_t U_EXPORT2
idnaref_compare(const UChar * s1,int32_t length1,const UChar * s2,int32_t length2,int32_t options,UErrorCode * status)1002 idnaref_compare(  const UChar *s1, int32_t length1,
1003                 const UChar *s2, int32_t length2,
1004                 int32_t options,
1005                 UErrorCode* status){
1006 
1007     if(status == NULL || U_FAILURE(*status)){
1008         return -1;
1009     }
1010 
1011     UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
1012     UChar *b1 = b1Stack, *b2 = b2Stack;
1013     int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
1014     int32_t result = -1;
1015 
1016     UParseError parseError;
1017 
1018     b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
1019     if(*status == U_BUFFER_OVERFLOW_ERROR){
1020         // redo processing of string
1021         /* we do not have enough room so grow the buffer*/
1022         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
1023         if(b1==NULL){
1024             *status = U_MEMORY_ALLOCATION_ERROR;
1025             goto CLEANUP;
1026         }
1027 
1028         *status = U_ZERO_ERROR; // reset error
1029 
1030         b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
1031 
1032     }
1033 
1034     b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status);
1035     if(*status == U_BUFFER_OVERFLOW_ERROR){
1036         // redo processing of string
1037         /* we do not have enough room so grow the buffer*/
1038         b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
1039         if(b2==NULL){
1040             *status = U_MEMORY_ALLOCATION_ERROR;
1041             goto CLEANUP;
1042         }
1043 
1044         *status = U_ZERO_ERROR; // reset error
1045 
1046         b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status);
1047 
1048     }
1049     // when toASCII is applied all label separators are replaced with FULL_STOP
1050     result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
1051 
1052 CLEANUP:
1053     if(b1 != b1Stack){
1054         uprv_free(b1);
1055     }
1056 
1057     if(b2 != b2Stack){
1058         uprv_free(b2);
1059     }
1060 
1061     return result;
1062 }
1063 #endif /* #if !UCONFIG_NO_IDNA */
1064