• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2007, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  idnaref.cpp
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003feb1
14  *   created by: Ram Viswanadha
15  */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
20 #include "idnaref.h"
21 #include "punyref.h"
22 #include "ustr_imp.h"
23 #include "cmemory.h"
24 #include "sprpimpl.h"
25 #include "nptrans.h"
26 #include "testidna.h"
27 #include "punycode.h"
28 #include "unicode/ustring.h"
29 
30 /* it is official IDNA ACE Prefix is "xn--" */
31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
32 #define ACE_PREFIX_LENGTH 4
33 
34 #define MAX_LABEL_LENGTH 63
35 #define HYPHEN      0x002D
36 /* The Max length of the labels should not be more than 64 */
37 #define MAX_LABEL_BUFFER_SIZE 100
38 #define MAX_IDN_BUFFER_SIZE   300
39 
40 #define CAPITAL_A        0x0041
41 #define CAPITAL_Z        0x005A
42 #define LOWER_CASE_DELTA 0x0020
43 #define FULL_STOP        0x002E
44 
45 
46 inline static UBool
startsWithPrefix(const UChar * src,int32_t srcLength)47 startsWithPrefix(const UChar* src , int32_t srcLength){
48     UBool startsWithPrefix = TRUE;
49 
50     if(srcLength < ACE_PREFIX_LENGTH){
51         return FALSE;
52     }
53 
54     for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
55         if(u_tolower(src[i]) != ACE_PREFIX[i]){
56             startsWithPrefix = FALSE;
57         }
58     }
59     return startsWithPrefix;
60 }
61 
62 inline static UChar
toASCIILower(UChar ch)63 toASCIILower(UChar ch){
64     if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
65         return ch + LOWER_CASE_DELTA;
66     }
67     return ch;
68 }
69 
70 inline static int32_t
compareCaseInsensitiveASCII(const UChar * s1,int32_t s1Len,const UChar * s2,int32_t s2Len)71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
72                             const UChar* s2, int32_t s2Len){
73     if(s1Len != s2Len){
74         return (s1Len > s2Len) ? s1Len : s2Len;
75     }
76     UChar c1,c2;
77     int32_t rc;
78 
79     for(int32_t i =0;/* no condition */;i++) {
80         /* If we reach the ends of both strings then they match */
81         if(i == s1Len) {
82             return 0;
83         }
84 
85         c1 = s1[i];
86         c2 = s2[i];
87 
88         /* Case-insensitive comparison */
89         if(c1!=c2) {
90             rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
91             if(rc!=0) {
92                 return rc;
93             }
94         }
95     }
96 
97 }
98 
getError(enum punycode_status status)99 static UErrorCode getError(enum punycode_status status){
100     switch(status){
101     case punycode_success:
102         return U_ZERO_ERROR;
103     case punycode_bad_input:   /* Input is invalid.                         */
104         return U_INVALID_CHAR_FOUND;
105     case punycode_big_output:  /* Output would exceed the space provided.   */
106         return U_BUFFER_OVERFLOW_ERROR;
107     case punycode_overflow :    /* Input requires wider integers to process. */
108         return U_INDEX_OUTOFBOUNDS_ERROR;
109     default:
110         return U_INTERNAL_PROGRAM_ERROR;
111     }
112 }
113 
convertASCIIToUChars(const char * src,UChar * dest,int32_t length)114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){
115     int i;
116     for(i=0;i<length;i++){
117         dest[i] = src[i];
118     }
119     return i;
120 }
convertUCharsToASCII(const UChar * src,char * dest,int32_t length)121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){
122     int i;
123     for(i=0;i<length;i++){
124         dest[i] = (char)src[i];
125     }
126     return i;
127 }
128 // wrapper around the reference Punycode implementation
convertToPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)129 static int32_t convertToPuny(const UChar* src, int32_t srcLength,
130                              UChar* dest, int32_t destCapacity,
131                              UErrorCode& status){
132     uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
133     int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
134     uint32_t* b1 = b1Stack;
135     char b2Stack[MAX_LABEL_BUFFER_SIZE];
136     char* b2 = b2Stack;
137     int32_t b2Len =MAX_LABEL_BUFFER_SIZE ;
138     punycode_status error;
139     unsigned char* caseFlags = NULL;
140 
141     u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status);
142     if(status == U_BUFFER_OVERFLOW_ERROR){
143         // redo processing of string
144         /* we do not have enough room so grow the buffer*/
145         b1 =  (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t));
146         if(b1==NULL){
147             status = U_MEMORY_ALLOCATION_ERROR;
148             goto CLEANUP;
149         }
150 
151         status = U_ZERO_ERROR; // reset error
152 
153         u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
154     }
155     if(U_FAILURE(status)){
156         goto CLEANUP;
157     }
158 
159     //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char));
160 
161     error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
162     status = getError(error);
163 
164     if(status == U_BUFFER_OVERFLOW_ERROR){
165         /* we do not have enough room so grow the buffer*/
166         b2 = (char*) uprv_malloc( b2Len * sizeof(char));
167         if(b2==NULL){
168             status = U_MEMORY_ALLOCATION_ERROR;
169             goto CLEANUP;
170         }
171 
172         status = U_ZERO_ERROR; // reset error
173 
174         punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
175         status = getError(error);
176     }
177     if(U_FAILURE(status)){
178         goto CLEANUP;
179     }
180 
181     if(b2Len < destCapacity){
182           convertASCIIToUChars(b2,dest,b2Len);
183     }else{
184         status =U_BUFFER_OVERFLOW_ERROR;
185     }
186 
187 CLEANUP:
188     if(b1Stack != b1){
189         uprv_free(b1);
190     }
191     if(b2Stack != b2){
192         uprv_free(b2);
193     }
194     uprv_free(caseFlags);
195 
196     return b2Len;
197 }
198 
convertFromPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)199 static int32_t convertFromPuny(  const UChar* src, int32_t srcLength,
200                                  UChar* dest, int32_t destCapacity,
201                                  UErrorCode& status){
202     char b1Stack[MAX_LABEL_BUFFER_SIZE];
203     char* b1 = b1Stack;
204     int32_t destLen =0;
205 
206     convertUCharsToASCII(src, b1,srcLength);
207 
208     uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
209     uint32_t* b2 = b2Stack;
210     int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
211     unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
212     punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
213     status = getError(error);
214     if(status == U_BUFFER_OVERFLOW_ERROR){
215         b2 =  (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
216         if(b2 == NULL){
217             status = U_MEMORY_ALLOCATION_ERROR;
218             goto CLEANUP;
219         }
220         error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
221         status = getError(error);
222     }
223 
224     if(U_FAILURE(status)){
225         goto CLEANUP;
226     }
227 
228     u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);
229 
230 CLEANUP:
231     if(b1Stack != b1){
232         uprv_free(b1);
233     }
234     if(b2Stack != b2){
235         uprv_free(b2);
236     }
237     uprv_free(caseFlags);
238 
239     return destLen;
240 }
241 
242 
243 U_CFUNC int32_t U_EXPORT2
idnaref_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)244 idnaref_toASCII(const UChar* src, int32_t srcLength,
245               UChar* dest, int32_t destCapacity,
246               int32_t options,
247               UParseError* parseError,
248               UErrorCode* status){
249 
250     if(status == NULL || U_FAILURE(*status)){
251         return 0;
252     }
253     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
254         *status = U_ILLEGAL_ARGUMENT_ERROR;
255         return 0;
256     }
257     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
258     //initialize pointers to stack buffers
259     UChar  *b1 = b1Stack, *b2 = b2Stack;
260     int32_t b1Len=0, b2Len=0,
261             b1Capacity = MAX_LABEL_BUFFER_SIZE,
262             b2Capacity = MAX_LABEL_BUFFER_SIZE ,
263             reqLength=0;
264 
265     //get the options
266     UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
267     UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
268 
269     UBool* caseFlags = NULL;
270 
271     // assume the source contains all ascii codepoints
272     UBool srcIsASCII  = TRUE;
273     // assume the source contains all LDH codepoints
274     UBool srcIsLDH = TRUE;
275     int32_t j=0;
276 
277     if(srcLength == -1){
278         srcLength = u_strlen(src);
279     }
280 
281     // step 1
282     for( j=0;j<srcLength;j++){
283         if(src[j] > 0x7F){
284             srcIsASCII = FALSE;
285         }
286         b1[b1Len++] = src[j];
287     }
288     // step 2
289     NamePrepTransform* prep = TestIDNA::getInstance(*status);
290 
291     if(U_FAILURE(*status)){
292         goto CLEANUP;
293     }
294 
295     b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status);
296 
297     if(*status == U_BUFFER_OVERFLOW_ERROR){
298         // redo processing of string
299         /* we do not have enough room so grow the buffer*/
300         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
301         if(b1==NULL){
302             *status = U_MEMORY_ALLOCATION_ERROR;
303             goto CLEANUP;
304         }
305 
306         *status = U_ZERO_ERROR; // reset error
307 
308         b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
309     }
310     // error bail out
311     if(U_FAILURE(*status)){
312         goto CLEANUP;
313     }
314 
315     if(b1Len == 0){
316         *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
317         goto CLEANUP;
318     }
319 
320     srcIsASCII = TRUE;
321     // step 3 & 4
322     for( j=0;j<b1Len;j++){
323         if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII
324             srcIsASCII = FALSE;
325         }else if(prep->isLDHChar(b1[j])==FALSE){  // if the char is in ASCII range verify that it is an LDH character{
326             srcIsLDH = FALSE;
327         }
328     }
329 
330     if(useSTD3ASCIIRules == TRUE){
331         // verify 3a and 3b
332         if( srcIsLDH == FALSE /* source contains some non-LDH characters */
333             || b1[0] ==  HYPHEN || b1[b1Len-1] == HYPHEN){
334             *status = U_IDNA_STD3_ASCII_RULES_ERROR;
335             goto CLEANUP;
336         }
337     }
338     if(srcIsASCII){
339         if(b1Len <= destCapacity){
340             uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
341             reqLength = b1Len;
342         }else{
343             reqLength = b1Len;
344             goto CLEANUP;
345         }
346     }else{
347         // step 5 : verify the sequence does not begin with ACE prefix
348         if(!startsWithPrefix(b1,b1Len)){
349 
350             //step 6: encode the sequence with punycode
351             //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
352 
353             b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status);
354             //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status);
355             if(*status == U_BUFFER_OVERFLOW_ERROR){
356                 // redo processing of string
357                 /* we do not have enough room so grow the buffer*/
358                 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
359                 if(b2 == NULL){
360                     *status = U_MEMORY_ALLOCATION_ERROR;
361                     goto CLEANUP;
362                 }
363 
364                 *status = U_ZERO_ERROR; // reset error
365 
366                 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status);
367                 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status);
368 
369             }
370             //error bail out
371             if(U_FAILURE(*status)){
372                 goto CLEANUP;
373             }
374             reqLength = b2Len+ACE_PREFIX_LENGTH;
375 
376             if(reqLength > destCapacity){
377                 *status = U_BUFFER_OVERFLOW_ERROR;
378                 goto CLEANUP;
379             }
380             //Step 7: prepend the ACE prefix
381             uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
382             //Step 6: copy the contents in b2 into dest
383             uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
384 
385         }else{
386             *status = U_IDNA_ACE_PREFIX_ERROR;
387             goto CLEANUP;
388         }
389     }
390 
391     if(reqLength > MAX_LABEL_LENGTH){
392         *status = U_IDNA_LABEL_TOO_LONG_ERROR;
393     }
394 
395 CLEANUP:
396     if(b1 != b1Stack){
397         uprv_free(b1);
398     }
399     if(b2 != b2Stack){
400         uprv_free(b2);
401     }
402     uprv_free(caseFlags);
403 
404 //    delete prep;
405 
406     return u_terminateUChars(dest, destCapacity, reqLength, status);
407 }
408 
409 
410 U_CFUNC int32_t U_EXPORT2
idnaref_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)411 idnaref_toUnicode(const UChar* src, int32_t srcLength,
412                 UChar* dest, int32_t destCapacity,
413                 int32_t options,
414                 UParseError* parseError,
415                 UErrorCode* status){
416 
417     if(status == NULL || U_FAILURE(*status)){
418         return 0;
419     }
420     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
421         *status = U_ILLEGAL_ARGUMENT_ERROR;
422         return 0;
423     }
424 
425 
426 
427     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
428 
429     //initialize pointers to stack buffers
430     UChar  *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
431     int32_t b1Len, b2Len, b1PrimeLen, b3Len,
432             b1Capacity = MAX_LABEL_BUFFER_SIZE,
433             b2Capacity = MAX_LABEL_BUFFER_SIZE,
434             b3Capacity = MAX_LABEL_BUFFER_SIZE,
435             reqLength=0;
436 //    UParseError parseError;
437 
438     NamePrepTransform* prep = TestIDNA::getInstance(*status);
439     b1Len = 0;
440     UBool* caseFlags = NULL;
441 
442     //get the options
443     UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
444     UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
445 
446     UBool srcIsASCII = TRUE;
447     UBool srcIsLDH = TRUE;
448     int32_t failPos =0;
449 
450     if(U_FAILURE(*status)){
451         goto CLEANUP;
452     }
453     // step 1: find out if all the codepoints in src are ASCII
454     if(srcLength==-1){
455         srcLength = 0;
456         for(;src[srcLength]!=0;){
457             if(src[srcLength]> 0x7f){
458                 srcIsASCII = FALSE;
459             }if(prep->isLDHChar(src[srcLength])==FALSE){
460                 // here we do not assemble surrogates
461                 // since we know that LDH code points
462                 // are in the ASCII range only
463                 srcIsLDH = FALSE;
464                 failPos = srcLength;
465             }
466             srcLength++;
467         }
468     }else{
469         for(int32_t j=0; j<srcLength; j++){
470             if(src[j]> 0x7f){
471                 srcIsASCII = FALSE;
472             }else if(prep->isLDHChar(src[j])==FALSE){
473                 // here we do not assemble surrogates
474                 // since we know that LDH code points
475                 // are in the ASCII range only
476                 srcIsLDH = FALSE;
477                 failPos = j;
478             }
479         }
480     }
481 
482     if(srcIsASCII == FALSE){
483         // step 2: process the string
484         b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
485         if(*status == U_BUFFER_OVERFLOW_ERROR){
486             // redo processing of string
487             /* we do not have enough room so grow the buffer*/
488             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
489             if(b1==NULL){
490                 *status = U_MEMORY_ALLOCATION_ERROR;
491                 goto CLEANUP;
492             }
493 
494             *status = U_ZERO_ERROR; // reset error
495 
496             b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
497         }
498         //bail out on error
499         if(U_FAILURE(*status)){
500             goto CLEANUP;
501         }
502     }else{
503 
504         // copy everything to b1
505         if(srcLength < b1Capacity){
506             uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR);
507         }else{
508             /* we do not have enough room so grow the buffer*/
509             b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
510             if(b1==NULL){
511                 *status = U_MEMORY_ALLOCATION_ERROR;
512                 goto CLEANUP;
513             }
514             uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR);
515         }
516         b1Len = srcLength;
517     }
518     //step 3: verify ACE Prefix
519     if(startsWithPrefix(src,srcLength)){
520 
521         //step 4: Remove the ACE Prefix
522         b1Prime = b1 + ACE_PREFIX_LENGTH;
523         b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
524 
525         //step 5: Decode using punycode
526         b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status);
527         //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status);
528 
529         if(*status == U_BUFFER_OVERFLOW_ERROR){
530             // redo processing of string
531             /* we do not have enough room so grow the buffer*/
532             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
533             if(b2==NULL){
534                 *status = U_MEMORY_ALLOCATION_ERROR;
535                 goto CLEANUP;
536             }
537 
538             *status = U_ZERO_ERROR; // reset error
539 
540             b2Len =  convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status);
541             //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status);
542         }
543 
544 
545         //step 6:Apply toASCII
546         b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status);
547 
548         if(*status == U_BUFFER_OVERFLOW_ERROR){
549             // redo processing of string
550             /* we do not have enough room so grow the buffer*/
551             b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
552             if(b3==NULL){
553                 *status = U_MEMORY_ALLOCATION_ERROR;
554                 goto CLEANUP;
555             }
556 
557             *status = U_ZERO_ERROR; // reset error
558 
559             b3Len =  idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status);
560 
561         }
562         //bail out on error
563         if(U_FAILURE(*status)){
564             goto CLEANUP;
565         }
566 
567         //step 7: verify
568         if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
569             *status = U_IDNA_VERIFICATION_ERROR;
570             goto CLEANUP;
571         }
572 
573         //step 8: return output of step 5
574         reqLength = b2Len;
575         if(b2Len <= destCapacity) {
576             uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
577         }
578     }else{
579         // verify that STD3 ASCII rules are satisfied
580         if(useSTD3ASCIIRules == TRUE){
581             if( srcIsLDH == FALSE /* source contains some non-LDH characters */
582                 || src[0] ==  HYPHEN || src[srcLength-1] == HYPHEN){
583                 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
584 
585                 /* populate the parseError struct */
586                 if(srcIsLDH==FALSE){
587                     // failPos is always set the index of failure
588                     uprv_syntaxError(src,failPos, srcLength,parseError);
589                 }else if(src[0] == HYPHEN){
590                     // fail position is 0
591                     uprv_syntaxError(src,0,srcLength,parseError);
592                 }else{
593                     // the last index in the source is always length-1
594                     uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
595                 }
596 
597                 goto CLEANUP;
598             }
599         }
600         //copy the source to destination
601         if(srcLength <= destCapacity){
602             uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
603         }
604         reqLength = srcLength;
605     }
606 
607 CLEANUP:
608 
609     if(b1 != b1Stack){
610         uprv_free(b1);
611     }
612     if(b2 != b2Stack){
613         uprv_free(b2);
614     }
615     uprv_free(caseFlags);
616 
617     // The RFC states that
618     // <quote>
619     // ToUnicode never fails. If any step fails, then the original input
620     // is returned immediately in that step.
621     // </quote>
622     // So if any step fails lets copy source to destination
623     if(U_FAILURE(*status)){
624         //copy the source to destination
625         if(dest && srcLength <= destCapacity){
626           if(srcLength == -1) {
627             uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR);
628           } else {
629             uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
630           }
631         }
632         reqLength = srcLength;
633         *status = U_ZERO_ERROR;
634     }
635     return u_terminateUChars(dest, destCapacity, reqLength, status);
636 }
637 
638 
639 static int32_t
getNextSeparator(UChar * src,int32_t srcLength,NamePrepTransform * prep,UChar ** limit,UBool * done,UErrorCode * status)640 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep,
641                  UChar **limit,
642                  UBool *done,
643                  UErrorCode *status){
644     if(srcLength == -1){
645         int32_t i;
646         for(i=0 ; ;i++){
647             if(src[i] == 0){
648                 *limit = src + i; // point to null
649                 *done = TRUE;
650                 return i;
651             }
652             if(prep->isLabelSeparator(src[i],*status)){
653                 *limit = src + (i+1); // go past the delimiter
654                 return i;
655 
656             }
657         }
658     }else{
659         int32_t i;
660         for(i=0;i<srcLength;i++){
661             if(prep->isLabelSeparator(src[i],*status)){
662                 *limit = src + (i+1); // go past the delimiter
663                 return i;
664             }
665         }
666         // we have not found the delimiter
667         if(i==srcLength){
668             *limit = src+srcLength;
669             *done = TRUE;
670         }
671         return i;
672     }
673 }
674 
675 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)676 idnaref_IDNToASCII(  const UChar* src, int32_t srcLength,
677                    UChar* dest, int32_t destCapacity,
678                    int32_t options,
679                    UParseError* parseError,
680                    UErrorCode* status){
681 
682     if(status == NULL || U_FAILURE(*status)){
683         return 0;
684     }
685     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
686         *status = U_ILLEGAL_ARGUMENT_ERROR;
687         return 0;
688     }
689 
690     int32_t reqLength = 0;
691 //    UParseError parseError;
692 
693     NamePrepTransform* prep = TestIDNA::getInstance(*status);
694 
695     //initialize pointers to stack buffers
696     UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
697     UChar  *b1 = b1Stack;
698     int32_t b1Len, labelLen;
699     UChar* delimiter = (UChar*)src;
700     UChar* labelStart = (UChar*)src;
701     int32_t remainingLen = srcLength;
702     int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
703 
704     //get the options
705 //    UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
706 //    UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
707     UBool done = FALSE;
708 
709     if(U_FAILURE(*status)){
710         goto CLEANUP;
711     }
712 
713 
714     if(srcLength == -1){
715         for(;;){
716 
717             if(*delimiter == 0){
718                 break;
719             }
720 
721             labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
722             b1Len = 0;
723             if(!(labelLen==0 && done)){// make sure this is not a root label separator.
724 
725                 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
726                                         options, parseError, status);
727 
728                 if(*status == U_BUFFER_OVERFLOW_ERROR){
729                     // redo processing of string
730                     /* we do not have enough room so grow the buffer*/
731                     b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
732                     if(b1==NULL){
733                         *status = U_MEMORY_ALLOCATION_ERROR;
734                         goto CLEANUP;
735                     }
736 
737                     *status = U_ZERO_ERROR; // reset error
738 
739                     b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
740                                             options, parseError, status);
741 
742                 }
743             }
744 
745             if(U_FAILURE(*status)){
746                 goto CLEANUP;
747             }
748             int32_t tempLen = (reqLength + b1Len );
749             // copy to dest
750             if( tempLen< destCapacity){
751                 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
752             }
753 
754             reqLength = tempLen;
755 
756             // add the label separator
757             if(done == FALSE){
758                 if(reqLength < destCapacity){
759                     dest[reqLength] = FULL_STOP;
760                 }
761                 reqLength++;
762             }
763 
764             labelStart = delimiter;
765         }
766     }else{
767         for(;;){
768 
769             if(delimiter == src+srcLength){
770                 break;
771             }
772 
773             labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
774 
775             b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
776                                     options,parseError, status);
777 
778             if(*status == U_BUFFER_OVERFLOW_ERROR){
779                 // redo processing of string
780                 /* we do not have enough room so grow the buffer*/
781                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
782                 if(b1==NULL){
783                     *status = U_MEMORY_ALLOCATION_ERROR;
784                     goto CLEANUP;
785                 }
786 
787                 *status = U_ZERO_ERROR; // reset error
788 
789                 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
790                                         options, parseError, status);
791 
792             }
793 
794             if(U_FAILURE(*status)){
795                 goto CLEANUP;
796             }
797             int32_t tempLen = (reqLength + b1Len );
798             // copy to dest
799             if( tempLen< destCapacity){
800                 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
801             }
802 
803             reqLength = tempLen;
804 
805             // add the label separator
806             if(done == FALSE){
807                 if(reqLength < destCapacity){
808                     dest[reqLength] = FULL_STOP;
809                 }
810                 reqLength++;
811             }
812 
813             labelStart = delimiter;
814             remainingLen = srcLength - (delimiter - src);
815         }
816     }
817 
818 
819 CLEANUP:
820 
821     if(b1 != b1Stack){
822         uprv_free(b1);
823     }
824 
825 //   delete prep;
826 
827     return u_terminateUChars(dest, destCapacity, reqLength, status);
828 }
829 
830 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)831 idnaref_IDNToUnicode(  const UChar* src, int32_t srcLength,
832                      UChar* dest, int32_t destCapacity,
833                      int32_t options,
834                      UParseError* parseError,
835                      UErrorCode* status){
836 
837     if(status == NULL || U_FAILURE(*status)){
838         return 0;
839     }
840     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
841         *status = U_ILLEGAL_ARGUMENT_ERROR;
842         return 0;
843     }
844 
845     int32_t reqLength = 0;
846 
847     UBool done = FALSE;
848 
849     NamePrepTransform* prep = TestIDNA::getInstance(*status);
850 
851     //initialize pointers to stack buffers
852     UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
853     UChar  *b1 = b1Stack;
854     int32_t b1Len, labelLen;
855     UChar* delimiter = (UChar*)src;
856     UChar* labelStart = (UChar*)src;
857     int32_t remainingLen = srcLength;
858     int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
859 
860     //get the options
861 //    UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
862 //    UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
863 
864     if(U_FAILURE(*status)){
865         goto CLEANUP;
866     }
867 
868     if(srcLength == -1){
869         for(;;){
870 
871             if(*delimiter == 0){
872                 break;
873             }
874 
875             labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
876 
877            if(labelLen==0 && done==FALSE){
878                 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
879             }
880             b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity,
881                                       options, parseError, status);
882 
883             if(*status == U_BUFFER_OVERFLOW_ERROR){
884                 // redo processing of string
885                 /* we do not have enough room so grow the buffer*/
886                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
887                 if(b1==NULL){
888                     *status = U_MEMORY_ALLOCATION_ERROR;
889                     goto CLEANUP;
890                 }
891 
892                 *status = U_ZERO_ERROR; // reset error
893 
894                 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
895                                            options, parseError, status);
896 
897             }
898 
899             if(U_FAILURE(*status)){
900                 goto CLEANUP;
901             }
902             int32_t tempLen = (reqLength + b1Len );
903             // copy to dest
904             if( tempLen< destCapacity){
905                 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
906             }
907 
908             reqLength = tempLen;
909             // add the label separator
910             if(done == FALSE){
911                 if(reqLength < destCapacity){
912                     dest[reqLength] = FULL_STOP;
913                 }
914                 reqLength++;
915             }
916 
917             labelStart = delimiter;
918         }
919     }else{
920         for(;;){
921 
922             if(delimiter == src+srcLength){
923                 break;
924             }
925 
926             labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
927 
928             if(labelLen==0 && done==FALSE){
929                 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
930             }
931 
932             b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity,
933                                        options, parseError, status);
934 
935             if(*status == U_BUFFER_OVERFLOW_ERROR){
936                 // redo processing of string
937                 /* we do not have enough room so grow the buffer*/
938                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
939                 if(b1==NULL){
940                     *status = U_MEMORY_ALLOCATION_ERROR;
941                     goto CLEANUP;
942                 }
943 
944                 *status = U_ZERO_ERROR; // reset error
945 
946                 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
947                                            options, parseError, status);
948 
949             }
950 
951             if(U_FAILURE(*status)){
952                 goto CLEANUP;
953             }
954             int32_t tempLen = (reqLength + b1Len );
955             // copy to dest
956             if( tempLen< destCapacity){
957                 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
958             }
959 
960             reqLength = tempLen;
961 
962             // add the label separator
963             if(done == FALSE){
964                 if(reqLength < destCapacity){
965                     dest[reqLength] = FULL_STOP;
966                 }
967                 reqLength++;
968             }
969 
970             labelStart = delimiter;
971             remainingLen = srcLength - (delimiter - src);
972         }
973     }
974 
975 CLEANUP:
976 
977     if(b1 != b1Stack){
978         uprv_free(b1);
979     }
980 
981 //    delete prep;
982 
983     return u_terminateUChars(dest, destCapacity, reqLength, status);
984 }
985 
986 U_CFUNC int32_t U_EXPORT2
idnaref_compare(const UChar * s1,int32_t length1,const UChar * s2,int32_t length2,int32_t options,UErrorCode * status)987 idnaref_compare(  const UChar *s1, int32_t length1,
988                 const UChar *s2, int32_t length2,
989                 int32_t options,
990                 UErrorCode* status){
991 
992     if(status == NULL || U_FAILURE(*status)){
993         return -1;
994     }
995 
996     UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
997     UChar *b1 = b1Stack, *b2 = b2Stack;
998     int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
999     int32_t result = -1;
1000 
1001     UParseError parseError;
1002 
1003     b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
1004     if(*status == U_BUFFER_OVERFLOW_ERROR){
1005         // redo processing of string
1006         /* we do not have enough room so grow the buffer*/
1007         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
1008         if(b1==NULL){
1009             *status = U_MEMORY_ALLOCATION_ERROR;
1010             goto CLEANUP;
1011         }
1012 
1013         *status = U_ZERO_ERROR; // reset error
1014 
1015         b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
1016 
1017     }
1018 
1019     b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status);
1020     if(*status == U_BUFFER_OVERFLOW_ERROR){
1021         // redo processing of string
1022         /* we do not have enough room so grow the buffer*/
1023         b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
1024         if(b2==NULL){
1025             *status = U_MEMORY_ALLOCATION_ERROR;
1026             goto CLEANUP;
1027         }
1028 
1029         *status = U_ZERO_ERROR; // reset error
1030 
1031         b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status);
1032 
1033     }
1034     // when toASCII is applied all label separators are replaced with FULL_STOP
1035     result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
1036 
1037 CLEANUP:
1038     if(b1 != b1Stack){
1039         uprv_free(b1);
1040     }
1041 
1042     if(b2 != b2Stack){
1043         uprv_free(b2);
1044     }
1045 
1046     return result;
1047 }
1048 #endif /* #if !UCONFIG_NO_IDNA */
1049