• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 * Copyright (C) 2010-2014, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 */
10 package ohos.global.icu.dev.test.normalizer;
11 
12 import java.io.BufferedReader;
13 import java.io.IOException;
14 import java.util.Collections;
15 import java.util.EnumSet;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.TreeMap;
19 import java.util.regex.Pattern;
20 
21 import org.junit.Test;
22 import org.junit.runner.RunWith;
23 import org.junit.runners.JUnit4;
24 
25 import ohos.global.icu.dev.test.TestFmwk;
26 import ohos.global.icu.dev.test.TestUtil;
27 import ohos.global.icu.impl.Normalizer2Impl.UTF16Plus;
28 import ohos.global.icu.impl.Utility;
29 import ohos.global.icu.text.IDNA;
30 
31 
32 /**
33  * UTS #46 (IDNA2008) test.
34  * @author Markus Scherer
35  * @since 2010jul10
36  */
37 
38 @RunWith(JUnit4.class)
39 public class UTS46Test extends TestFmwk {
UTS46Test()40     public UTS46Test() {
41         int commonOptions=
42             IDNA.USE_STD3_RULES|IDNA.CHECK_BIDI|
43             IDNA.CHECK_CONTEXTJ|IDNA.CHECK_CONTEXTO;
44         trans=IDNA.getUTS46Instance(commonOptions);
45         nontrans=IDNA.getUTS46Instance(commonOptions|
46                                        IDNA.NONTRANSITIONAL_TO_ASCII|IDNA.NONTRANSITIONAL_TO_UNICODE);
47     }
48 
49     @Test
TestAPI()50     public void TestAPI() {
51         StringBuilder result=new StringBuilder();
52         IDNA.Info info=new IDNA.Info();
53         String input="www.eXample.cOm";
54         String expected="www.example.com";
55         trans.nameToASCII(input, result, info);
56         if(info.hasErrors() || !UTF16Plus.equal(result, expected)) {
57             errln(String.format("T.nameToASCII(www.example.com) info.errors=%s result matches=%b",
58                                 info.getErrors(), UTF16Plus.equal(result, expected)));
59         }
60         input="xn--bcher.de-65a";
61         expected="xn--bcher\uFFFDde-65a";
62         nontrans.labelToASCII(input, result, info);
63         if( !info.getErrors().equals(EnumSet.of(IDNA.Error.LABEL_HAS_DOT, IDNA.Error.INVALID_ACE_LABEL)) ||
64             !UTF16Plus.equal(result, expected)
65         ) {
66             errln(String.format("N.labelToASCII(label-with-dot) failed with errors %s",
67                                 info.getErrors()));
68         }
69         // Java API tests that are not parallel to C++ tests
70         // because the C++ specifics (error codes etc.) do not apply here.
71         String resultString=trans.nameToUnicode("fA\u00DF.de", result, info).toString();
72         if(info.hasErrors() || !resultString.equals("fass.de")) {
73             errln(String.format("T.nameToUnicode(fA\u00DF.de) info.errors=%s result matches=%b",
74                                 info.getErrors(), resultString.equals("fass.de")));
75         }
76         try {
77             nontrans.labelToUnicode(result, result, info);
78             errln("N.labelToUnicode(result, result) did not throw an Exception");
79         } catch(Exception e) {
80             // as expected (should be an IllegalArgumentException, or an ICU version of it)
81         }
82     }
83 
84     @Test
TestNotSTD3()85     public void TestNotSTD3() {
86         IDNA not3=IDNA.getUTS46Instance(IDNA.CHECK_BIDI);
87         String input="\u0000A_2+2=4\n.e\u00DFen.net";
88         StringBuilder result=new StringBuilder();
89         IDNA.Info info=new IDNA.Info();
90         if( !not3.nameToUnicode(input, result, info).toString().equals("\u0000a_2+2=4\n.essen.net") ||
91             info.hasErrors()
92         ) {
93             errln(String.format("notSTD3.nameToUnicode(non-LDH ASCII) unexpected errors %s string %s",
94                                 info.getErrors(), prettify(result.toString())));
95         }
96         // A space (BiDi class WS) is not allowed in a BiDi domain name.
97         input="a z.xn--4db.edu";
98         not3.nameToASCII(input, result, info);
99         if(!UTF16Plus.equal(result, input) || !info.getErrors().equals(EnumSet.of(IDNA.Error.BIDI))) {
100             errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed");
101         }
102         // Characters that are canonically equivalent to sequences with non-LDH ASCII.
103         input="a\u2260b\u226Ec\u226Fd";
104         not3.nameToUnicode(input, result, info);
105         if(!UTF16Plus.equal(result, input) || info.hasErrors()) {
106             errln(String.format("notSTD3.nameToUnicode(equiv to non-LDH ASCII) unexpected errors %s string %s",
107                                 info.getErrors().toString(), prettify(result.toString())));
108         }
109     }
110 
111     private static final Map<String, IDNA.Error> errorNamesToErrors;
112     static {
113         errorNamesToErrors=new TreeMap<String, IDNA.Error>();
114         errorNamesToErrors.put("UIDNA_ERROR_EMPTY_LABEL", IDNA.Error.EMPTY_LABEL);
115         errorNamesToErrors.put("UIDNA_ERROR_LABEL_TOO_LONG", IDNA.Error.LABEL_TOO_LONG);
116         errorNamesToErrors.put("UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", IDNA.Error.DOMAIN_NAME_TOO_LONG);
117         errorNamesToErrors.put("UIDNA_ERROR_LEADING_HYPHEN", IDNA.Error.LEADING_HYPHEN);
118         errorNamesToErrors.put("UIDNA_ERROR_TRAILING_HYPHEN", IDNA.Error.TRAILING_HYPHEN);
119         errorNamesToErrors.put("UIDNA_ERROR_HYPHEN_3_4", IDNA.Error.HYPHEN_3_4);
120         errorNamesToErrors.put("UIDNA_ERROR_LEADING_COMBINING_MARK", IDNA.Error.LEADING_COMBINING_MARK);
121         errorNamesToErrors.put("UIDNA_ERROR_DISALLOWED", IDNA.Error.DISALLOWED);
122         errorNamesToErrors.put("UIDNA_ERROR_PUNYCODE", IDNA.Error.PUNYCODE);
123         errorNamesToErrors.put("UIDNA_ERROR_LABEL_HAS_DOT", IDNA.Error.LABEL_HAS_DOT);
124         errorNamesToErrors.put("UIDNA_ERROR_INVALID_ACE_LABEL", IDNA.Error.INVALID_ACE_LABEL);
125         errorNamesToErrors.put("UIDNA_ERROR_BIDI", IDNA.Error.BIDI);
126         errorNamesToErrors.put("UIDNA_ERROR_CONTEXTJ", IDNA.Error.CONTEXTJ);
127         errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_PUNCTUATION", IDNA.Error.CONTEXTO_PUNCTUATION);
128         errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_DIGITS", IDNA.Error.CONTEXTO_DIGITS);
129     }
130 
131     private static final class TestCase {
TestCase()132         private TestCase() {
133             errors=EnumSet.noneOf(IDNA.Error.class);
134         }
set(String[] data)135         private void set(String[] data) {
136             s=data[0];
137             o=data[1];
138             u=data[2];
139             errors.clear();
140             if(data[3].length()!=0) {
141                 for(String e: data[3].split("\\|")) {
142                     errors.add(errorNamesToErrors.get(e));
143                 }
144             }
145         }
146         // Input string and options string (Nontransitional/Transitional/Both).
147         private String s, o;
148         // Expected Unicode result string.
149         private String u;
150         private EnumSet<IDNA.Error> errors;
151     };
152 
153     private static final String testCases[][]={
154         { "www.eXample.cOm", "B",  // all ASCII
155           "www.example.com", "" },
156         { "B\u00FCcher.de", "B",  // u-umlaut
157           "b\u00FCcher.de", "" },
158         { "\u00D6BB", "B",  // O-umlaut
159           "\u00F6bb", "" },
160         { "fa\u00DF.de", "N",  // sharp s
161           "fa\u00DF.de", "" },
162         { "fa\u00DF.de", "T",  // sharp s
163           "fass.de", "" },
164         { "XN--fA-hia.dE", "B",  // sharp s in Punycode
165           "fa\u00DF.de", "" },
166         { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "N",  // Greek with final sigma
167           "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "" },
168         { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "T",  // Greek with final sigma
169           "\u03B2\u03CC\u03BB\u03BF\u03C3.com", "" },
170         { "xn--nxasmm1c", "B",  // Greek with final sigma in Punycode
171           "\u03B2\u03CC\u03BB\u03BF\u03C2", "" },
172         { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "N",  // "Sri" in "Sri Lanka" has a ZWJ
173           "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" },
174         { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "T",  // "Sri" in "Sri Lanka" has a ZWJ
175           "www.\u0DC1\u0DCA\u0DBB\u0DD3.com", "" },
176         { "www.xn--10cl1a0b660p.com", "B",  // "Sri" in Punycode
177           "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" },
178         { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "N",  // ZWNJ
179           "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "" },
180         { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "T",  // ZWNJ
181           "\u0646\u0627\u0645\u0647\u0627\u06CC", "" },
182         { "xn--mgba3gch31f060k.com", "B",  // ZWNJ in Punycode
183           "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC.com", "" },
184         { "a.b\uFF0Ec\u3002d\uFF61", "B",
185           "a.b.c.d.", "" },
186         { "U\u0308.xn--tda", "B",  // U+umlaut.u-umlaut
187           "\u00FC.\u00FC", "" },
188         { "xn--u-ccb", "B",  // u+umlaut in Punycode
189           "xn--u-ccb\uFFFD", "UIDNA_ERROR_INVALID_ACE_LABEL" },
190         { "a\u2488com", "B",  // contains 1-dot
191           "a\uFFFDcom", "UIDNA_ERROR_DISALLOWED" },
192         { "xn--a-ecp.ru", "B",  // contains 1-dot in Punycode
193           "xn--a-ecp\uFFFD.ru", "UIDNA_ERROR_INVALID_ACE_LABEL" },
194         { "xn--0.pt", "B",  // invalid Punycode
195           "xn--0\uFFFD.pt", "UIDNA_ERROR_PUNYCODE" },
196         { "xn--a.pt", "B",  // U+0080
197           "xn--a\uFFFD.pt", "UIDNA_ERROR_INVALID_ACE_LABEL" },
198         { "xn--a-\u00C4.pt", "B",  // invalid Punycode
199           "xn--a-\u00E4.pt", "UIDNA_ERROR_PUNYCODE" },
200         { "\u65E5\u672C\u8A9E\u3002\uFF2A\uFF30", "B",  // Japanese with fullwidth ".jp"
201           "\u65E5\u672C\u8A9E.jp", "" },
202         { "\u2615", "B", "\u2615", "" },  // Unicode 4.0 HOT BEVERAGE
203         // some characters are disallowed because they are canonically equivalent
204         // to sequences with non-LDH ASCII
205         { "a\u2260b\u226Ec\u226Fd", "B",
206           "a\uFFFDb\uFFFDc\uFFFDd", "UIDNA_ERROR_DISALLOWED" },
207         // many deviation characters, test the special mapping code
208         { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
209           "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
210           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
211           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
212           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "N",
213           "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
214           "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
215           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
216           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
217           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz",
218           "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_CONTEXTJ" },
219         { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+
220           "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+
221           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+
222           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+
223           "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "T",
224           "1.assbcssssssssd"+
225           "\u03C3\u03C3sssssssssssssssse"+
226           "ssssssssssssssssssssx"+
227           "ssssssssssssssssssssy"+
228           "sssssssssssssss\u015Dssz", "UIDNA_ERROR_LABEL_TOO_LONG" },
229         // "xn--bss" with deviation characters
230         { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "N",
231           "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "UIDNA_ERROR_CONTEXTJ" },
232         { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "T",
233           "\u5919", "" },
234         // "xn--bssffl" written as:
235         // 02E3 MODIFIER LETTER SMALL X
236         // 034F COMBINING GRAPHEME JOINER (ignored)
237         // 2115 DOUBLE-STRUCK CAPITAL N
238         // 200B ZERO WIDTH SPACE (ignored)
239         // FE63 SMALL HYPHEN-MINUS
240         // 00AD SOFT HYPHEN (ignored)
241         // FF0D FULLWIDTH HYPHEN-MINUS
242         // 180C MONGOLIAN FREE VARIATION SELECTOR TWO (ignored)
243         // 212C SCRIPT CAPITAL B
244         // FE00 VARIATION SELECTOR-1 (ignored)
245         // 017F LATIN SMALL LETTER LONG S
246         // 2064 INVISIBLE PLUS (ignored)
247         // 1D530 MATHEMATICAL FRAKTUR SMALL S
248         // E01EF VARIATION SELECTOR-256 (ignored)
249         // FB04 LATIN SMALL LIGATURE FFL
250         { "\u02E3\u034F\u2115\u200B\uFE63\u00AD\uFF0D\u180C"+
251           "\u212C\uFE00\u017F\u2064"+"\uD835\uDD30\uDB40\uDDEF"/*1D530 E01EF*/+"\uFB04", "B",
252           "\u5921\u591E\u591C\u5919", "" },
253         { "123456789012345678901234567890123456789012345678901234567890123."+
254           "123456789012345678901234567890123456789012345678901234567890123."+
255           "123456789012345678901234567890123456789012345678901234567890123."+
256           "1234567890123456789012345678901234567890123456789012345678901", "B",
257           "123456789012345678901234567890123456789012345678901234567890123."+
258           "123456789012345678901234567890123456789012345678901234567890123."+
259           "123456789012345678901234567890123456789012345678901234567890123."+
260           "1234567890123456789012345678901234567890123456789012345678901", "" },
261         { "123456789012345678901234567890123456789012345678901234567890123."+
262           "123456789012345678901234567890123456789012345678901234567890123."+
263           "123456789012345678901234567890123456789012345678901234567890123."+
264           "1234567890123456789012345678901234567890123456789012345678901.", "B",
265           "123456789012345678901234567890123456789012345678901234567890123."+
266           "123456789012345678901234567890123456789012345678901234567890123."+
267           "123456789012345678901234567890123456789012345678901234567890123."+
268           "1234567890123456789012345678901234567890123456789012345678901.", "" },
269         // Domain name >256 characters, forces slow path in UTF-8 processing.
270         { "123456789012345678901234567890123456789012345678901234567890123."+
271           "123456789012345678901234567890123456789012345678901234567890123."+
272           "123456789012345678901234567890123456789012345678901234567890123."+
273           "123456789012345678901234567890123456789012345678901234567890123."+
274           "12345678901234567890123456789012345678901234567890123456789012", "B",
275           "123456789012345678901234567890123456789012345678901234567890123."+
276           "123456789012345678901234567890123456789012345678901234567890123."+
277           "123456789012345678901234567890123456789012345678901234567890123."+
278           "123456789012345678901234567890123456789012345678901234567890123."+
279           "12345678901234567890123456789012345678901234567890123456789012",
280           "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
281         { "123456789012345678901234567890123456789012345678901234567890123."+
282           "123456789012345678901234567890123456789012345678901234567890123."+
283           "123456789012345678901234567890123456789012345678901234567890123."+
284           "123456789012345678901234567890123456789012345678901234567890123."+
285           "1234567890123456789012345678901234567890123456789\u05D0", "B",
286           "123456789012345678901234567890123456789012345678901234567890123."+
287           "123456789012345678901234567890123456789012345678901234567890123."+
288           "123456789012345678901234567890123456789012345678901234567890123."+
289           "123456789012345678901234567890123456789012345678901234567890123."+
290           "1234567890123456789012345678901234567890123456789\u05D0",
291           "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG|UIDNA_ERROR_BIDI" },
292         { "123456789012345678901234567890123456789012345678901234567890123."+
293           "1234567890123456789012345678901234567890123456789012345678901234."+
294           "123456789012345678901234567890123456789012345678901234567890123."+
295           "123456789012345678901234567890123456789012345678901234567890", "B",
296           "123456789012345678901234567890123456789012345678901234567890123."+
297           "1234567890123456789012345678901234567890123456789012345678901234."+
298           "123456789012345678901234567890123456789012345678901234567890123."+
299           "123456789012345678901234567890123456789012345678901234567890",
300           "UIDNA_ERROR_LABEL_TOO_LONG" },
301         { "123456789012345678901234567890123456789012345678901234567890123."+
302           "1234567890123456789012345678901234567890123456789012345678901234."+
303           "123456789012345678901234567890123456789012345678901234567890123."+
304           "123456789012345678901234567890123456789012345678901234567890.", "B",
305           "123456789012345678901234567890123456789012345678901234567890123."+
306           "1234567890123456789012345678901234567890123456789012345678901234."+
307           "123456789012345678901234567890123456789012345678901234567890123."+
308           "123456789012345678901234567890123456789012345678901234567890.",
309           "UIDNA_ERROR_LABEL_TOO_LONG" },
310         { "123456789012345678901234567890123456789012345678901234567890123."+
311           "1234567890123456789012345678901234567890123456789012345678901234."+
312           "123456789012345678901234567890123456789012345678901234567890123."+
313           "1234567890123456789012345678901234567890123456789012345678901", "B",
314           "123456789012345678901234567890123456789012345678901234567890123."+
315           "1234567890123456789012345678901234567890123456789012345678901234."+
316           "123456789012345678901234567890123456789012345678901234567890123."+
317           "1234567890123456789012345678901234567890123456789012345678901",
318           "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
319         // label length 63: xn--1234567890123456789012345678901234567890123456789012345-9te
320         { "\u00E41234567890123456789012345678901234567890123456789012345", "B",
321           "\u00E41234567890123456789012345678901234567890123456789012345", "" },
322         { "1234567890\u00E41234567890123456789012345678901234567890123456", "B",
323           "1234567890\u00E41234567890123456789012345678901234567890123456", "UIDNA_ERROR_LABEL_TOO_LONG" },
324         { "123456789012345678901234567890123456789012345678901234567890123."+
325           "1234567890\u00E4123456789012345678901234567890123456789012345."+
326           "123456789012345678901234567890123456789012345678901234567890123."+
327           "1234567890123456789012345678901234567890123456789012345678901", "B",
328           "123456789012345678901234567890123456789012345678901234567890123."+
329           "1234567890\u00E4123456789012345678901234567890123456789012345."+
330           "123456789012345678901234567890123456789012345678901234567890123."+
331           "1234567890123456789012345678901234567890123456789012345678901", "" },
332         { "123456789012345678901234567890123456789012345678901234567890123."+
333           "1234567890\u00E4123456789012345678901234567890123456789012345."+
334           "123456789012345678901234567890123456789012345678901234567890123."+
335           "1234567890123456789012345678901234567890123456789012345678901.", "B",
336           "123456789012345678901234567890123456789012345678901234567890123."+
337           "1234567890\u00E4123456789012345678901234567890123456789012345."+
338           "123456789012345678901234567890123456789012345678901234567890123."+
339           "1234567890123456789012345678901234567890123456789012345678901.", "" },
340         { "123456789012345678901234567890123456789012345678901234567890123."+
341           "1234567890\u00E4123456789012345678901234567890123456789012345."+
342           "123456789012345678901234567890123456789012345678901234567890123."+
343           "12345678901234567890123456789012345678901234567890123456789012", "B",
344           "123456789012345678901234567890123456789012345678901234567890123."+
345           "1234567890\u00E4123456789012345678901234567890123456789012345."+
346           "123456789012345678901234567890123456789012345678901234567890123."+
347           "12345678901234567890123456789012345678901234567890123456789012",
348           "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
349         { "123456789012345678901234567890123456789012345678901234567890123."+
350           "1234567890\u00E41234567890123456789012345678901234567890123456."+
351           "123456789012345678901234567890123456789012345678901234567890123."+
352           "123456789012345678901234567890123456789012345678901234567890", "B",
353           "123456789012345678901234567890123456789012345678901234567890123."+
354           "1234567890\u00E41234567890123456789012345678901234567890123456."+
355           "123456789012345678901234567890123456789012345678901234567890123."+
356           "123456789012345678901234567890123456789012345678901234567890",
357           "UIDNA_ERROR_LABEL_TOO_LONG" },
358         { "123456789012345678901234567890123456789012345678901234567890123."+
359           "1234567890\u00E41234567890123456789012345678901234567890123456."+
360           "123456789012345678901234567890123456789012345678901234567890123."+
361           "123456789012345678901234567890123456789012345678901234567890.", "B",
362           "123456789012345678901234567890123456789012345678901234567890123."+
363           "1234567890\u00E41234567890123456789012345678901234567890123456."+
364           "123456789012345678901234567890123456789012345678901234567890123."+
365           "123456789012345678901234567890123456789012345678901234567890.",
366           "UIDNA_ERROR_LABEL_TOO_LONG" },
367         { "123456789012345678901234567890123456789012345678901234567890123."+
368           "1234567890\u00E41234567890123456789012345678901234567890123456."+
369           "123456789012345678901234567890123456789012345678901234567890123."+
370           "1234567890123456789012345678901234567890123456789012345678901", "B",
371           "123456789012345678901234567890123456789012345678901234567890123."+
372           "1234567890\u00E41234567890123456789012345678901234567890123456."+
373           "123456789012345678901234567890123456789012345678901234567890123."+
374           "1234567890123456789012345678901234567890123456789012345678901",
375           "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" },
376         // hyphen errors and empty-label errors
377         // Ticket #10883: ToUnicode also checks for empty labels.
378         { ".", "B", ".", "UIDNA_ERROR_EMPTY_LABEL" },
379         { "\uFF0E", "B", ".", "UIDNA_ERROR_EMPTY_LABEL" },
380         // "xn---q----jra"=="-q--a-umlaut-"
381         { "a.b..-q--a-.e", "B", "a.b..-q--a-.e",
382           "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
383           "UIDNA_ERROR_HYPHEN_3_4" },
384         { "a.b..-q--\u00E4-.e", "B", "a.b..-q--\u00E4-.e",
385           "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
386           "UIDNA_ERROR_HYPHEN_3_4" },
387         { "a.b..xn---q----jra.e", "B", "a.b..-q--\u00E4-.e",
388           "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+
389           "UIDNA_ERROR_HYPHEN_3_4" },
390         { "a..c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" },
391         { "a.xn--.c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" },
392         { "a.-b.", "B", "a.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
393         { "a.b-.c", "B", "a.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
394         { "a.-.c", "B", "a.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
395         { "a.bc--de.f", "B", "a.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" },
396         { "\u00E4.\u00AD.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" },
397         { "\u00E4.xn--.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" },
398         { "\u00E4.-b.", "B", "\u00E4.-b.", "UIDNA_ERROR_LEADING_HYPHEN" },
399         { "\u00E4.b-.c", "B", "\u00E4.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" },
400         { "\u00E4.-.c", "B", "\u00E4.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" },
401         { "\u00E4.bc--de.f", "B", "\u00E4.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" },
402         { "a.b.\u0308c.d", "B", "a.b.\uFFFDc.d", "UIDNA_ERROR_LEADING_COMBINING_MARK" },
403         { "a.b.xn--c-bcb.d", "B",
404           "a.b.xn--c-bcb\uFFFD.d", "UIDNA_ERROR_LEADING_COMBINING_MARK|UIDNA_ERROR_INVALID_ACE_LABEL" },
405         // BiDi
406         { "A0", "B", "a0", "" },
407         { "0A", "B", "0a", "" },  // all-LTR is ok to start with a digit (EN)
408         { "0A.\u05D0", "B",  // ASCII label does not start with L/R/AL
409           "0a.\u05D0", "UIDNA_ERROR_BIDI" },
410         { "c.xn--0-eha.xn--4db", "B",  // 2nd label does not start with L/R/AL
411           "c.0\u00FC.\u05D0", "UIDNA_ERROR_BIDI" },
412         { "b-.\u05D0", "B",  // label does not end with L/EN
413           "b-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" },
414         { "d.xn----dha.xn--4db", "B",  // 2nd label does not end with L/EN
415           "d.\u00FC-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" },
416         { "a\u05D0", "B", "a\u05D0", "UIDNA_ERROR_BIDI" },  // first dir != last dir
417         { "\u05D0\u05C7", "B", "\u05D0\u05C7", "" },
418         { "\u05D09\u05C7", "B", "\u05D09\u05C7", "" },
419         { "\u05D0a\u05C7", "B", "\u05D0a\u05C7", "UIDNA_ERROR_BIDI" },  // first dir != last dir
420         { "\u05D0\u05EA", "B", "\u05D0\u05EA", "" },
421         { "\u05D0\u05F3\u05EA", "B", "\u05D0\u05F3\u05EA", "" },
422         { "a\u05D0Tz", "B", "a\u05D0tz", "UIDNA_ERROR_BIDI" },  // mixed dir
423         { "\u05D0T\u05EA", "B", "\u05D0t\u05EA", "UIDNA_ERROR_BIDI" },  // mixed dir
424         { "\u05D07\u05EA", "B", "\u05D07\u05EA", "" },
425         { "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" },  // Arabic 7 in the middle
426         { "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" },  // AN digit in LTR
427         { "a7\u0667", "B", "a7\u0667", "UIDNA_ERROR_BIDI" },  // AN digit in LTR
428         { "\u05D07\u0667\u05EA", "B",  // mixed EN/AN digits in RTL
429           "\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" },
430         { "\u05D07\u0667", "B",  // mixed EN/AN digits in RTL
431           "\u05D07\u0667", "UIDNA_ERROR_BIDI" },
432         // ZWJ
433         { "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" },  // Virama+ZWJ
434         { "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
435         { "\u200D", "N", "\u200D", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
436         // ZWNJ
437         { "\u0BB9\u0BCD\u200C", "N", "\u0BB9\u0BCD\u200C", "" },  // Virama+ZWNJ
438         { "\u0BB9\u200C", "N", "\u0BB9\u200C", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
439         { "\u200C", "N", "\u200C", "UIDNA_ERROR_CONTEXTJ" },  // no Virama
440         { "\u0644\u0670\u200C\u06ED\u06EF", "N",  // Joining types D T ZWNJ T R
441           "\u0644\u0670\u200C\u06ED\u06EF", "" },
442         { "\u0644\u0670\u200C\u06EF", "N",  // D T ZWNJ R
443           "\u0644\u0670\u200C\u06EF", "" },
444         { "\u0644\u200C\u06ED\u06EF", "N",  // D ZWNJ T R
445           "\u0644\u200C\u06ED\u06EF", "" },
446         { "\u0644\u200C\u06EF", "N",  // D ZWNJ R
447           "\u0644\u200C\u06EF", "" },
448         { "\u0644\u0670\u200C\u06ED", "N",  // D T ZWNJ T
449           "\u0644\u0670\u200C\u06ED", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" },
450         { "\u06EF\u200C\u06EF", "N",  // R ZWNJ R
451           "\u06EF\u200C\u06EF", "UIDNA_ERROR_CONTEXTJ" },
452         { "\u0644\u200C", "N",  // D ZWNJ
453           "\u0644\u200C", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" },
454         { "\u0660\u0661", "B",  // Arabic-Indic Digits alone
455           "\u0660\u0661", "UIDNA_ERROR_BIDI" },
456         { "\u06F0\u06F1", "B",  // Extended Arabic-Indic Digits alone
457           "\u06F0\u06F1", "" },
458         { "\u0660\u06F1", "B",  // Mixed Arabic-Indic Digits
459           "\u0660\u06F1", "UIDNA_ERROR_CONTEXTO_DIGITS|UIDNA_ERROR_BIDI" },
460         // All of the CONTEXTO "Would otherwise have been DISALLOWED" characters
461         // in their correct contexts,
462         // then each in incorrect context.
463         { "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "B",
464           "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "UIDNA_ERROR_BIDI" },
465         { "l\u00B7", "B",
466           "l\u00B7", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
467         { "\u00B7l", "B",
468           "\u00B7l", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
469         { "\u0375", "B",
470           "\u0375", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
471         { "\u03B1\u05F3", "B",
472           "\u03B1\u05F3", "UIDNA_ERROR_CONTEXTO_PUNCTUATION|UIDNA_ERROR_BIDI" },
473         { "\u05F4", "B",
474           "\u05F4", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
475         { "l\u30FB", "B",
476           "l\u30FB", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" },
477         // { "", "B",
478         //   "", "" },
479     };
480 
481     @Test
TestSomeCases()482     public void TestSomeCases() {
483         StringBuilder aT=new StringBuilder(), uT=new StringBuilder();
484         StringBuilder aN=new StringBuilder(), uN=new StringBuilder();
485         IDNA.Info aTInfo=new IDNA.Info(), uTInfo=new IDNA.Info();
486         IDNA.Info aNInfo=new IDNA.Info(), uNInfo=new IDNA.Info();
487 
488         StringBuilder aTuN=new StringBuilder(), uTaN=new StringBuilder();
489         StringBuilder aNuN=new StringBuilder(), uNaN=new StringBuilder();
490         IDNA.Info aTuNInfo=new IDNA.Info(), uTaNInfo=new IDNA.Info();
491         IDNA.Info aNuNInfo=new IDNA.Info(), uNaNInfo=new IDNA.Info();
492 
493         StringBuilder aTL=new StringBuilder(), uTL=new StringBuilder();
494         StringBuilder aNL=new StringBuilder(), uNL=new StringBuilder();
495         IDNA.Info aTLInfo=new IDNA.Info(), uTLInfo=new IDNA.Info();
496         IDNA.Info aNLInfo=new IDNA.Info(), uNLInfo=new IDNA.Info();
497 
498         EnumSet<IDNA.Error> uniErrors=EnumSet.noneOf(IDNA.Error.class);
499 
500         TestCase testCase=new TestCase();
501         int i;
502         for(i=0; i<testCases.length; ++i) {
503             testCase.set(testCases[i]);
504             String input=testCase.s;
505             String expected=testCase.u;
506             // ToASCII/ToUnicode, transitional/nontransitional
507             try {
508                 trans.nameToASCII(input, aT, aTInfo);
509                 trans.nameToUnicode(input, uT, uTInfo);
510                 nontrans.nameToASCII(input, aN, aNInfo);
511                 nontrans.nameToUnicode(input, uN, uNInfo);
512             } catch(Exception e) {
513                 errln(String.format("first-level processing [%d/%s] %s - %s",
514                                     i, testCase.o, testCase.s, e));
515                 continue;
516             }
517             // ToUnicode does not set length-overflow errors.
518             uniErrors.clear();
519             uniErrors.addAll(testCase.errors);
520             uniErrors.removeAll(lengthOverflowErrors);
521             char mode=testCase.o.charAt(0);
522             if(mode=='B' || mode=='N') {
523                 if(!sameErrors(uNInfo, uniErrors)) {
524                     errln(String.format("N.nameToUnicode([%d] %s) unexpected errors %s",
525                                         i, testCase.s, uNInfo.getErrors()));
526                     continue;
527                 }
528                 if(!UTF16Plus.equal(uN, expected)) {
529                     errln(String.format("N.nameToUnicode([%d] %s) unexpected string %s",
530                                         i, testCase.s, prettify(uN.toString())));
531                     continue;
532                 }
533                 if(!sameErrors(aNInfo, testCase.errors)) {
534                     errln(String.format("N.nameToASCII([%d] %s) unexpected errors %s",
535                                         i, testCase.s, aNInfo.getErrors()));
536                     continue;
537                 }
538             }
539             if(mode=='B' || mode=='T') {
540                 if(!sameErrors(uTInfo, uniErrors)) {
541                     errln(String.format("T.nameToUnicode([%d] %s) unexpected errors %s",
542                                         i, testCase.s, uTInfo.getErrors()));
543                     continue;
544                 }
545                 if(!UTF16Plus.equal(uT, expected)) {
546                     errln(String.format("T.nameToUnicode([%d] %s) unexpected string %s",
547                                         i, testCase.s, prettify(uT.toString())));
548                     continue;
549                 }
550                 if(!sameErrors(aTInfo, testCase.errors)) {
551                     errln(String.format("T.nameToASCII([%d] %s) unexpected errors %s",
552                                         i, testCase.s, aTInfo.getErrors()));
553                     continue;
554                 }
555             }
556             // ToASCII is all-ASCII if no severe errors
557             if(!hasCertainErrors(aNInfo, severeErrors) && !isASCII(aN)) {
558                 errln(String.format("N.nameToASCII([%d] %s) (errors %s) result is not ASCII %s",
559                                     i, testCase.s, aNInfo.getErrors(), prettify(aN.toString())));
560                 continue;
561             }
562             if(!hasCertainErrors(aTInfo, severeErrors) && !isASCII(aT)) {
563                 errln(String.format("T.nameToASCII([%d] %s) (errors %s) result is not ASCII %s",
564                                     i, testCase.s, aTInfo.getErrors(), prettify(aT.toString())));
565                 continue;
566             }
567             if(isVerbose()) {
568                 char m= mode=='B' ? mode : 'N';
569                 logln(String.format("%c.nameToASCII([%d] %s) (errors %s) result string: %s",
570                                     m, i, testCase.s, aNInfo.getErrors(), prettify(aN.toString())));
571                 if(mode!='B') {
572                     logln(String.format("T.nameToASCII([%d] %s) (errors %s) result string: %s",
573                                         i, testCase.s, aTInfo.getErrors(), prettify(aT.toString())));
574                 }
575             }
576             // second-level processing
577             try {
578                 nontrans.nameToUnicode(aT, aTuN, aTuNInfo);
579                 nontrans.nameToASCII(uT, uTaN, uTaNInfo);
580                 nontrans.nameToUnicode(aN, aNuN, aNuNInfo);
581                 nontrans.nameToASCII(uN, uNaN, uNaNInfo);
582             } catch(Exception e) {
583                 errln(String.format("second-level processing [%d/%s] %s - %s",
584                                     i, testCase.o, testCase.s, e));
585                 continue;
586             }
587             if(!UTF16Plus.equal(aN, uNaN)) {
588                 errln(String.format("N.nameToASCII([%d] %s)!=N.nameToUnicode().N.nameToASCII() "+
589                                     "(errors %s) %s vs. %s",
590                                     i, testCase.s, aNInfo.getErrors(),
591                                     prettify(aN.toString()), prettify(uNaN.toString())));
592                 continue;
593             }
594             if(!UTF16Plus.equal(aT, uTaN)) {
595                 errln(String.format("T.nameToASCII([%d] %s)!=T.nameToUnicode().N.nameToASCII() "+
596                                     "(errors %s) %s vs. %s",
597                                     i, testCase.s, aNInfo.getErrors(),
598                                     prettify(aT.toString()), prettify(uTaN.toString())));
599                 continue;
600             }
601             if(!UTF16Plus.equal(uN, aNuN)) {
602                 errln(String.format("N.nameToUnicode([%d] %s)!=N.nameToASCII().N.nameToUnicode() "+
603                                     "(errors %s) %s vs. %s",
604                                     i, testCase.s, uNInfo.getErrors(), prettify(uN.toString()), prettify(aNuN.toString())));
605                 continue;
606             }
607             if(!UTF16Plus.equal(uT, aTuN)) {
608                 errln(String.format("T.nameToUnicode([%d] %s)!=T.nameToASCII().N.nameToUnicode() "+
609                                     "(errors %s) %s vs. %s",
610                                     i, testCase.s, uNInfo.getErrors(),
611                                     prettify(uT.toString()), prettify(aTuN.toString())));
612                 continue;
613             }
614             // labelToUnicode
615             try {
616                 trans.labelToASCII(input, aTL, aTLInfo);
617                 trans.labelToUnicode(input, uTL, uTLInfo);
618                 nontrans.labelToASCII(input, aNL, aNLInfo);
619                 nontrans.labelToUnicode(input, uNL, uNLInfo);
620             } catch(Exception e) {
621                 errln(String.format("labelToXYZ processing [%d/%s] %s - %s",
622                                     i, testCase.o, testCase.s, e));
623                 continue;
624             }
625             if(aN.indexOf(".")<0) {
626                 if(!UTF16Plus.equal(aN, aNL) || !sameErrors(aNInfo, aNLInfo)) {
627                     errln(String.format("N.nameToASCII([%d] %s)!=N.labelToASCII() "+
628                                         "(errors %s vs %s) %s vs. %s",
629                                         i, testCase.s, aNInfo.getErrors().toString(), aNLInfo.getErrors().toString(),
630                                         prettify(aN.toString()), prettify(aNL.toString())));
631                     continue;
632                 }
633             } else {
634                 if(!hasError(aNLInfo, IDNA.Error.LABEL_HAS_DOT)) {
635                     errln(String.format("N.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
636                                         i, testCase.s, aNLInfo.getErrors()));
637                     continue;
638                 }
639             }
640             if(aT.indexOf(".")<0) {
641                 if(!UTF16Plus.equal(aT, aTL) || !sameErrors(aTInfo, aTLInfo)) {
642                     errln(String.format("T.nameToASCII([%d] %s)!=T.labelToASCII() "+
643                                         "(errors %s vs %s) %s vs. %s",
644                                         i, testCase.s, aTInfo.getErrors().toString(), aTLInfo.getErrors().toString(),
645                                         prettify(aT.toString()), prettify(aTL.toString())));
646                     continue;
647                 }
648             } else {
649                 if(!hasError(aTLInfo, IDNA.Error.LABEL_HAS_DOT)) {
650                     errln(String.format("T.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
651                                         i, testCase.s, aTLInfo.getErrors()));
652                     continue;
653                 }
654             }
655             if(uN.indexOf(".")<0) {
656                 if(!UTF16Plus.equal(uN, uNL) || !sameErrors(uNInfo, uNLInfo)) {
657                     errln(String.format("N.nameToUnicode([%d] %s)!=N.labelToUnicode() "+
658                                         "(errors %s vs %s) %s vs. %s",
659                                         i, testCase.s, uNInfo.getErrors().toString(), uNLInfo.getErrors().toString(),
660                                         prettify(uN.toString()), prettify(uNL.toString())));
661                     continue;
662                 }
663             } else {
664                 if(!hasError(uNLInfo, IDNA.Error.LABEL_HAS_DOT)) {
665                     errln(String.format("N.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
666                                         i, testCase.s, uNLInfo.getErrors()));
667                     continue;
668                 }
669             }
670             if(uT.indexOf(".")<0) {
671                 if(!UTF16Plus.equal(uT, uTL) || !sameErrors(uTInfo, uTLInfo)) {
672                     errln(String.format("T.nameToUnicode([%d] %s)!=T.labelToUnicode() "+
673                                         "(errors %s vs %s) %s vs. %s",
674                                         i, testCase.s, uTInfo.getErrors().toString(), uTLInfo.getErrors().toString(),
675                                         prettify(uT.toString()), prettify(uTL.toString())));
676                     continue;
677                 }
678             } else {
679                 if(!hasError(uTLInfo, IDNA.Error.LABEL_HAS_DOT)) {
680                     errln(String.format("T.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT",
681                                         i, testCase.s, uTLInfo.getErrors()));
682                     continue;
683                 }
684             }
685             // Differences between transitional and nontransitional processing
686             if(mode=='B') {
687                 if( aNInfo.isTransitionalDifferent() ||
688                     aTInfo.isTransitionalDifferent() ||
689                     uNInfo.isTransitionalDifferent() ||
690                     uTInfo.isTransitionalDifferent() ||
691                     aNLInfo.isTransitionalDifferent() ||
692                     aTLInfo.isTransitionalDifferent() ||
693                     uNLInfo.isTransitionalDifferent() ||
694                     uTLInfo.isTransitionalDifferent()
695                 ) {
696                     errln(String.format("B.process([%d] %s) isTransitionalDifferent()", i, testCase.s));
697                     continue;
698                 }
699                 if( !UTF16Plus.equal(aN, aT) || !UTF16Plus.equal(uN, uT) ||
700                     !UTF16Plus.equal(aNL, aTL) || !UTF16Plus.equal(uNL, uTL) ||
701                     !sameErrors(aNInfo, aTInfo) || !sameErrors(uNInfo, uTInfo) ||
702                     !sameErrors(aNLInfo, aTLInfo) || !sameErrors(uNLInfo, uTLInfo)
703                 ) {
704                     errln(String.format("N.process([%d] %s) vs. T.process() different errors or result strings",
705                                         i, testCase.s));
706                     continue;
707                 }
708             } else {
709                 if( !aNInfo.isTransitionalDifferent() ||
710                     !aTInfo.isTransitionalDifferent() ||
711                     !uNInfo.isTransitionalDifferent() ||
712                     !uTInfo.isTransitionalDifferent() ||
713                     !aNLInfo.isTransitionalDifferent() ||
714                     !aTLInfo.isTransitionalDifferent() ||
715                     !uNLInfo.isTransitionalDifferent() ||
716                     !uTLInfo.isTransitionalDifferent()
717                 ) {
718                     errln(String.format("%s.process([%d] %s) !isTransitionalDifferent()",
719                                         testCase.o, i, testCase.s));
720                     continue;
721                 }
722                 if( UTF16Plus.equal(aN, aT) || UTF16Plus.equal(uN, uT) ||
723                     UTF16Plus.equal(aNL, aTL) || UTF16Plus.equal(uNL, uTL)
724                 ) {
725                     errln(String.format("N.process([%d] %s) vs. T.process() same result strings",
726                                         i, testCase.s));
727                     continue;
728                 }
729             }
730         }
731     }
732 
checkIdnaTestResult(String line, String type, String expected, CharSequence result, String status, IDNA.Info info)733     private void checkIdnaTestResult(String line, String type,
734             String expected, CharSequence result, String status, IDNA.Info info) {
735         // An error in toUnicode or toASCII is indicated by a value in square brackets,
736         // such as "[B5 B6]".
737         boolean expectedHasErrors = false;
738         if (!status.isEmpty()) {
739             if (status.charAt(0) != '[') {
740                 errln(String.format("%s  status field does not start with '[': %s\n    %s",
741                         type, status, line));
742             }
743             if (!status.equals("[]")) {
744                 expectedHasErrors = true;
745             }
746         }
747         if (expectedHasErrors != info.hasErrors()) {
748             errln(String.format(
749                     "%s  expected errors %s %b != %b = actual has errors: %s\n    %s",
750                     type, status, expectedHasErrors, info.hasErrors(), info.getErrors(), line));
751         }
752         if (!expectedHasErrors && !UTF16Plus.equal(expected, result)) {
753             errln(String.format("%s  expected != actual\n    %s", type, line));
754             errln("    " + expected);
755             errln("    " + result);
756         }
757     }
758 
759     @Test
IdnaTest()760     public void IdnaTest() throws IOException {
761         BufferedReader idnaTestFile = TestUtil.getDataReader("unicode/IdnaTestV2.txt", "UTF-8");
762         Pattern semi = Pattern.compile(";");
763         try {
764             String line;
765             while ((line = idnaTestFile.readLine()) != null) {
766                 // Remove trailing comments and whitespace.
767                 int commentStart = line.indexOf('#');
768                 if (commentStart >= 0) {
769                     line = line.substring(0, commentStart);
770                 }
771                 String[] fields = semi.split(line, -1);
772                 if (fields.length <= 1) {
773                     continue;  // Skip empty and comment-only lines.
774                 }
775 
776                 // IdnaTestV2.txt (since Unicode 11)
777                 // Column 1: source
778                 // The source string to be tested
779                 String source = Utility.unescape(fields[0].trim());
780 
781                 // Column 2: toUnicode
782                 // The result of applying toUnicode to the source, with Transitional_Processing=false.
783                 // A blank value means the same as the source value.
784                 String toUnicode = Utility.unescape(fields[1].trim());
785                 if (toUnicode.isEmpty()) {
786                     toUnicode = source;
787                 }
788 
789                 // Column 3: toUnicodeStatus
790                 // A set of status codes, each corresponding to a particular test.
791                 // A blank value means [].
792                 String toUnicodeStatus = fields[2].trim();
793 
794                 // Column 4: toAsciiN
795                 // The result of applying toASCII to the source, with Transitional_Processing=false.
796                 // A blank value means the same as the toUnicode value.
797                 String toAsciiN = Utility.unescape(fields[3].trim());
798                 if (toAsciiN.isEmpty()) {
799                     toAsciiN = toUnicode;
800                 }
801 
802                 // Column 5: toAsciiNStatus
803                 // A set of status codes, each corresponding to a particular test.
804                 // A blank value means the same as the toUnicodeStatus value.
805                 String toAsciiNStatus = fields[4].trim();
806                 if (toAsciiNStatus.isEmpty()) {
807                     toAsciiNStatus = toUnicodeStatus;
808                 }
809 
810                 // Column 6: toAsciiT
811                 // The result of applying toASCII to the source, with Transitional_Processing=true.
812                 // A blank value means the same as the toAsciiN value.
813                 String toAsciiT = Utility.unescape(fields[5].trim());
814                 if (toAsciiT.isEmpty()) {
815                     toAsciiT = toAsciiN;
816                 }
817 
818                 // Column 7: toAsciiTStatus
819                 // A set of status codes, each corresponding to a particular test.
820                 // A blank value means the same as the toAsciiNStatus value.
821                 String toAsciiTStatus = fields[6].trim();
822                 if (toAsciiTStatus.isEmpty()) {
823                     toAsciiTStatus = toAsciiNStatus;
824                 }
825 
826                 // ToASCII/ToUnicode, transitional/nontransitional
827                 StringBuilder uN, aN, aT;
828                 IDNA.Info uNInfo, aNInfo, aTInfo;
829                 nontrans.nameToUnicode(source, uN = new StringBuilder(), uNInfo = new IDNA.Info());
830                 checkIdnaTestResult(line, "toUnicodeNontrans", toUnicode, uN, toUnicodeStatus, uNInfo);
831                 nontrans.nameToASCII(source, aN = new StringBuilder(), aNInfo = new IDNA.Info());
832                 checkIdnaTestResult(line, "toASCIINontrans", toAsciiN, aN, toAsciiNStatus, aNInfo);
833                 trans.nameToASCII(source, aT = new StringBuilder(), aTInfo = new IDNA.Info());
834                 checkIdnaTestResult(line, "toASCIITrans", toAsciiT, aT, toAsciiTStatus, aTInfo);
835             }
836         } finally {
837             idnaTestFile.close();
838         }
839     }
840 
841     private final IDNA trans, nontrans;
842 
843     private static final EnumSet<IDNA.Error> severeErrors=EnumSet.of(
844         IDNA.Error.LEADING_COMBINING_MARK,
845         IDNA.Error.DISALLOWED,
846         IDNA.Error.PUNYCODE,
847         IDNA.Error.LABEL_HAS_DOT,
848         IDNA.Error.INVALID_ACE_LABEL);
849     private static final EnumSet<IDNA.Error> lengthOverflowErrors=EnumSet.of(
850             IDNA.Error.LABEL_TOO_LONG,
851             IDNA.Error.DOMAIN_NAME_TOO_LONG);
852 
hasError(IDNA.Info info, IDNA.Error error)853     private boolean hasError(IDNA.Info info, IDNA.Error error) {
854         return info.getErrors().contains(error);
855     }
856     // assumes that certainErrors is not empty
hasCertainErrors(Set<IDNA.Error> errors, Set<IDNA.Error> certainErrors)857     private boolean hasCertainErrors(Set<IDNA.Error> errors, Set<IDNA.Error> certainErrors) {
858         return !errors.isEmpty() && !Collections.disjoint(errors, certainErrors);
859     }
hasCertainErrors(IDNA.Info info, Set<IDNA.Error> certainErrors)860     private boolean hasCertainErrors(IDNA.Info info, Set<IDNA.Error> certainErrors) {
861         return hasCertainErrors(info.getErrors(), certainErrors);
862     }
sameErrors(Set<IDNA.Error> a, Set<IDNA.Error> b)863     private boolean sameErrors(Set<IDNA.Error> a, Set<IDNA.Error> b) {
864         return a.equals(b);
865     }
sameErrors(IDNA.Info a, IDNA.Info b)866     private boolean sameErrors(IDNA.Info a, IDNA.Info b) {
867         return sameErrors(a.getErrors(), b.getErrors());
868     }
sameErrors(IDNA.Info a, Set<IDNA.Error> b)869     private boolean sameErrors(IDNA.Info a, Set<IDNA.Error> b) {
870         return sameErrors(a.getErrors(), b);
871     }
872 
873     private static boolean
isASCII(CharSequence str)874     isASCII(CharSequence str) {
875         int length=str.length();
876         for(int i=0; i<length; ++i) {
877             if(str.charAt(i)>=0x80) {
878                 return false;
879             }
880         }
881         return true;
882     }
883 }
884