• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2003-2010, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 */
9 package com.ibm.icu.impl;
10 
11 import com.ibm.icu.text.IDNA;
12 import com.ibm.icu.text.StringPrep;
13 import com.ibm.icu.text.StringPrepParseException;
14 import com.ibm.icu.text.UCharacterIterator;
15 
16 /**
17  * IDNA2003 implementation code, moved out of com.ibm.icu.text.IDNA.java
18  * while extending that class to support IDNA2008/UTS #46 as well.
19  * @author Ram Viswanadha
20  */
21 public final class IDNA2003 {
22     /* IDNA ACE Prefix is "xn--" */
23     private static char[] ACE_PREFIX                = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
24     //private static final int ACE_PREFIX_LENGTH      = ACE_PREFIX.length;
25 
26     private static final int MAX_LABEL_LENGTH       = 63;
27     private static final int HYPHEN                 = 0x002D;
28     private static final int CAPITAL_A              = 0x0041;
29     private static final int CAPITAL_Z              = 0x005A;
30     private static final int LOWER_CASE_DELTA       = 0x0020;
31     private static final int FULL_STOP              = 0x002E;
32     private static final int MAX_DOMAIN_NAME_LENGTH = 255;
33 
34     // The NamePrep profile object
35     private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
36 
startsWithPrefix(StringBuffer src)37     private static boolean startsWithPrefix(StringBuffer src){
38         if(src.length() < ACE_PREFIX.length){
39             return false;
40         }
41         for(int i=0; i<ACE_PREFIX.length;i++){
42             if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
43                 return false;
44             }
45         }
46         return true;
47     }
48 
toASCIILower(char ch)49     private static char toASCIILower(char ch){
50         if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
51             return (char)(ch + LOWER_CASE_DELTA);
52         }
53         return ch;
54     }
55 
toASCIILower(CharSequence src)56     private static StringBuffer toASCIILower(CharSequence src){
57         StringBuffer dest = new StringBuffer();
58         for(int i=0; i<src.length();i++){
59             dest.append(toASCIILower(src.charAt(i)));
60         }
61         return dest;
62     }
63 
compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2)64     private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
65         char c1,c2;
66         int rc;
67         for(int i =0;/* no condition */;i++) {
68             /* If we reach the ends of both strings then they match */
69             if(i == s1.length()) {
70                 return 0;
71             }
72 
73             c1 = s1.charAt(i);
74             c2 = s2.charAt(i);
75 
76             /* Case-insensitive comparison */
77             if(c1!=c2) {
78                 rc=toASCIILower(c1)-toASCIILower(c2);
79                 if(rc!=0) {
80                     return rc;
81                 }
82             }
83         }
84     }
85 
getSeparatorIndex(char[] src,int start, int limit)86     private static int getSeparatorIndex(char[] src,int start, int limit){
87         for(; start<limit;start++){
88             if(isLabelSeparator(src[start])){
89                 return start;
90             }
91         }
92         // we have not found the separator just return length
93         return start;
94     }
95 
96     /*
97     private static int getSeparatorIndex(UCharacterIterator iter){
98         int currentIndex = iter.getIndex();
99         int separatorIndex = 0;
100         int ch;
101         while((ch=iter.next())!= UCharacterIterator.DONE){
102             if(isLabelSeparator(ch)){
103                 separatorIndex = iter.getIndex();
104                 iter.setIndex(currentIndex);
105                 return separatorIndex;
106             }
107         }
108         // reset index
109         iter.setIndex(currentIndex);
110         // we have not found the separator just return the length
111 
112     }
113     */
114 
115 
isLDHChar(int ch)116     private static boolean isLDHChar(int ch){
117         // high runner case
118         if(ch>0x007A){
119             return false;
120         }
121         //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
122         if( (ch==0x002D) ||
123             (0x0030 <= ch && ch <= 0x0039) ||
124             (0x0041 <= ch && ch <= 0x005A) ||
125             (0x0061 <= ch && ch <= 0x007A)
126           ){
127             return true;
128         }
129         return false;
130     }
131 
132     /**
133      * Ascertain if the given code point is a label separator as
134      * defined by the IDNA RFC
135      *
136      * @param ch The code point to be ascertained
137      * @return true if the char is a label separator
138      * @stable ICU 2.8
139      */
isLabelSeparator(int ch)140     private static boolean isLabelSeparator(int ch){
141         switch(ch){
142             case 0x002e:
143             case 0x3002:
144             case 0xFF0E:
145             case 0xFF61:
146                 return true;
147             default:
148                 return false;
149         }
150     }
151 
convertToASCII(UCharacterIterator src, int options)152     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
153             throws StringPrepParseException{
154 
155         boolean[] caseFlags = null;
156 
157         // the source contains all ascii codepoints
158         boolean srcIsASCII  = true;
159         // assume the source contains all LDH codepoints
160         boolean srcIsLDH = true;
161 
162         //get the options
163         boolean useSTD3ASCIIRules = ((options & IDNA.USE_STD3_RULES) != 0);
164         int ch;
165         // step 1
166         while((ch = src.next())!= UCharacterIterator.DONE){
167             if(ch> 0x7f){
168                 srcIsASCII = false;
169                 break;
170             }
171         }
172         int failPos = -1;
173         src.setToStart();
174         StringBuffer processOut = null;
175         // step 2 is performed only if the source contains non ASCII
176         if(!srcIsASCII){
177             // step 2
178             processOut = namePrep.prepare(src, options);
179         }else{
180             processOut = new StringBuffer(src.getText());
181         }
182         int poLen = processOut.length();
183 
184         if(poLen==0){
185             throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
186         }
187         StringBuffer dest = new StringBuffer();
188 
189         // reset the variable to verify if output of prepare is ASCII or not
190         srcIsASCII = true;
191 
192         // step 3 & 4
193         for(int j=0;j<poLen;j++ ){
194             ch=processOut.charAt(j);
195             if(ch > 0x7F){
196                 srcIsASCII = false;
197             }else if(isLDHChar(ch)==false){
198                 // here we do not assemble surrogates
199                 // since we know that LDH code points
200                 // are in the ASCII range only
201                 srcIsLDH = false;
202                 failPos = j;
203             }
204         }
205 
206         if(useSTD3ASCIIRules == true){
207             // verify 3a and 3b
208             if( srcIsLDH == false /* source contains some non-LDH characters */
209                 || processOut.charAt(0) ==  HYPHEN
210                 || processOut.charAt(processOut.length()-1) == HYPHEN){
211 
212                 /* populate the parseError struct */
213                 if(srcIsLDH==false){
214                      throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
215                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
216                                               processOut.toString(),
217                                              (failPos>0) ? (failPos-1) : failPos);
218                 }else if(processOut.charAt(0) == HYPHEN){
219                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
220                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
221 
222                 }else{
223                      throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
224                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
225                                               processOut.toString(),
226                                               (poLen>0) ? poLen-1 : poLen);
227 
228                 }
229             }
230         }
231         if(srcIsASCII){
232             dest =  processOut;
233         }else{
234             // step 5 : verify the sequence does not begin with ACE prefix
235             if(!startsWithPrefix(processOut)){
236 
237                 //step 6: encode the sequence with punycode
238                 caseFlags = new boolean[poLen];
239 
240                 StringBuilder punyout = Punycode.encode(processOut,caseFlags);
241 
242                 // convert all codepoints to lower case ASCII
243                 StringBuffer lowerOut = toASCIILower(punyout);
244 
245                 //Step 7: prepend the ACE prefix
246                 dest.append(ACE_PREFIX,0,ACE_PREFIX.length);
247                 //Step 6: copy the contents in b2 into dest
248                 dest.append(lowerOut);
249             }else{
250 
251                 throw new StringPrepParseException("The input does not start with the ACE Prefix.",
252                                          StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
253             }
254         }
255         if(dest.length() > MAX_LABEL_LENGTH){
256             throw new StringPrepParseException("The labels in the input are too long. Length > 63.",
257                                      StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
258         }
259         return dest;
260     }
261 
convertIDNToASCII(String src,int options)262     public static StringBuffer convertIDNToASCII(String src,int options)
263             throws StringPrepParseException{
264 
265         char[] srcArr = src.toCharArray();
266         StringBuffer result = new StringBuffer();
267         int sepIndex=0;
268         int oldSepIndex=0;
269         for(;;){
270             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
271             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
272             //make sure this is not a root label separator.
273             if(!(label.length()==0 && sepIndex==srcArr.length)){
274                 UCharacterIterator iter = UCharacterIterator.getInstance(label);
275                 result.append(convertToASCII(iter,options));
276             }
277             if(sepIndex==srcArr.length){
278                 break;
279             }
280 
281             // increment the sepIndex to skip past the separator
282             sepIndex++;
283             oldSepIndex = sepIndex;
284             result.append((char)FULL_STOP);
285         }
286         if(result.length() > MAX_DOMAIN_NAME_LENGTH){
287             throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
288         }
289         return result;
290     }
291 
convertToUnicode(UCharacterIterator src, int options)292     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
293             throws StringPrepParseException{
294 
295         boolean[] caseFlags = null;
296 
297         // the source contains all ascii codepoints
298         boolean srcIsASCII  = true;
299         // assume the source contains all LDH codepoints
300         //boolean srcIsLDH = true;
301 
302         //get the options
303         //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
304 
305         //int failPos = -1;
306         int ch;
307         int saveIndex = src.getIndex();
308         // step 1: find out if all the codepoints in src are ASCII
309         while((ch=src.next())!= UCharacterIterator.DONE){
310             if(ch>0x7F){
311                 srcIsASCII = false;
312             }/*else if((srcIsLDH = isLDHChar(ch))==false){
313                 failPos = src.getIndex();
314             }*/
315         }
316         StringBuffer processOut;
317 
318         if(srcIsASCII == false){
319             try {
320                 // step 2: process the string
321                 src.setIndex(saveIndex);
322                 processOut = namePrep.prepare(src,options);
323             } catch (StringPrepParseException ex) {
324                 return new StringBuffer(src.getText());
325             }
326 
327         }else{
328             //just point to source
329             processOut = new StringBuffer(src.getText());
330         }
331         // TODO:
332         // The RFC states that
333         // <quote>
334         // ToUnicode never fails. If any step fails, then the original input
335         // is returned immediately in that step.
336         // </quote>
337 
338         //step 3: verify ACE Prefix
339         if(startsWithPrefix(processOut)){
340             StringBuffer decodeOut = null;
341 
342             //step 4: Remove the ACE Prefix
343             String temp = processOut.substring(ACE_PREFIX.length,processOut.length());
344 
345             //step 5: Decode using punycode
346             try {
347                 decodeOut = new StringBuffer(Punycode.decode(temp,caseFlags));
348             } catch (StringPrepParseException e) {
349                 decodeOut = null;
350             }
351 
352             //step 6:Apply toASCII
353             if (decodeOut != null) {
354                 StringBuffer toASCIIOut = convertToASCII(UCharacterIterator.getInstance(decodeOut), options);
355 
356                 //step 7: verify
357                 if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
358 //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
359 //                                             StringPrepParseException.VERIFICATION_ERROR);
360                     decodeOut = null;
361                 }
362             }
363 
364             //step 8: return output of step 5
365              if (decodeOut != null) {
366                  return decodeOut;
367              }
368         }
369 
370 //        }else{
371 //            // verify that STD3 ASCII rules are satisfied
372 //            if(useSTD3ASCIIRules == true){
373 //                if( srcIsLDH == false /* source contains some non-LDH characters */
374 //                    || processOut.charAt(0) ==  HYPHEN
375 //                    || processOut.charAt(processOut.length()-1) == HYPHEN){
376 //
377 //                    if(srcIsLDH==false){
378 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
379 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
380 //                                                 (failPos>0) ? (failPos-1) : failPos);
381 //                    }else if(processOut.charAt(0) == HYPHEN){
382 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
383 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
384 //                                                 processOut.toString(),0);
385 //
386 //                    }else{
387 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
388 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
389 //                                                 processOut.toString(),
390 //                                                 processOut.length());
391 //
392 //                    }
393 //                }
394 //            }
395 //            // just return the source
396 //            return new StringBuffer(src.getText());
397 //        }
398 
399         return new StringBuffer(src.getText());
400     }
401 
convertIDNToUnicode(String src, int options)402     public static StringBuffer convertIDNToUnicode(String src, int options)
403             throws StringPrepParseException{
404 
405         char[] srcArr = src.toCharArray();
406         StringBuffer result = new StringBuffer();
407         int sepIndex=0;
408         int oldSepIndex=0;
409         for(;;){
410             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
411             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
412             if(label.length()==0 && sepIndex!=srcArr.length ){
413                 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
414             }
415             UCharacterIterator iter = UCharacterIterator.getInstance(label);
416             result.append(convertToUnicode(iter,options));
417             if(sepIndex==srcArr.length){
418                 break;
419             }
420             // Unlike the ToASCII operation we don't normalize the label separators
421             result.append(srcArr[sepIndex]);
422             // increment the sepIndex to skip past the separator
423             sepIndex++;
424             oldSepIndex =sepIndex;
425         }
426         if(result.length() > MAX_DOMAIN_NAME_LENGTH){
427             throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
428         }
429         return result;
430     }
431 
compare(String s1, String s2, int options)432     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
433         StringBuffer s1Out = convertIDNToASCII(s1, options);
434         StringBuffer s2Out = convertIDNToASCII(s2, options);
435         return compareCaseInsensitiveASCII(s1Out,s2Out);
436     }
437 }
438