1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2010-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package ohos.global.icu.dev.test.normalizer; 11 12 import java.io.BufferedReader; 13 import java.io.IOException; 14 import java.util.Collections; 15 import java.util.EnumSet; 16 import java.util.Map; 17 import java.util.Set; 18 import java.util.TreeMap; 19 import java.util.regex.Pattern; 20 21 import org.junit.Test; 22 import org.junit.runner.RunWith; 23 import org.junit.runners.JUnit4; 24 25 import ohos.global.icu.dev.test.TestFmwk; 26 import ohos.global.icu.dev.test.TestUtil; 27 import ohos.global.icu.impl.Normalizer2Impl.UTF16Plus; 28 import ohos.global.icu.impl.Utility; 29 import ohos.global.icu.text.IDNA; 30 31 32 /** 33 * UTS #46 (IDNA2008) test. 34 * @author Markus Scherer 35 * @since 2010jul10 36 */ 37 38 @RunWith(JUnit4.class) 39 public class UTS46Test extends TestFmwk { UTS46Test()40 public UTS46Test() { 41 int commonOptions= 42 IDNA.USE_STD3_RULES|IDNA.CHECK_BIDI| 43 IDNA.CHECK_CONTEXTJ|IDNA.CHECK_CONTEXTO; 44 trans=IDNA.getUTS46Instance(commonOptions); 45 nontrans=IDNA.getUTS46Instance(commonOptions| 46 IDNA.NONTRANSITIONAL_TO_ASCII|IDNA.NONTRANSITIONAL_TO_UNICODE); 47 } 48 49 @Test TestAPI()50 public void TestAPI() { 51 StringBuilder result=new StringBuilder(); 52 IDNA.Info info=new IDNA.Info(); 53 String input="www.eXample.cOm"; 54 String expected="www.example.com"; 55 trans.nameToASCII(input, result, info); 56 if(info.hasErrors() || !UTF16Plus.equal(result, expected)) { 57 errln(String.format("T.nameToASCII(www.example.com) info.errors=%s result matches=%b", 58 info.getErrors(), UTF16Plus.equal(result, expected))); 59 } 60 input="xn--bcher.de-65a"; 61 expected="xn--bcher\uFFFDde-65a"; 62 nontrans.labelToASCII(input, result, info); 63 if( !info.getErrors().equals(EnumSet.of(IDNA.Error.LABEL_HAS_DOT, IDNA.Error.INVALID_ACE_LABEL)) || 64 !UTF16Plus.equal(result, expected) 65 ) { 66 errln(String.format("N.labelToASCII(label-with-dot) failed with errors %s", 67 info.getErrors())); 68 } 69 // Java API tests that are not parallel to C++ tests 70 // because the C++ specifics (error codes etc.) do not apply here. 71 String resultString=trans.nameToUnicode("fA\u00DF.de", result, info).toString(); 72 if(info.hasErrors() || !resultString.equals("fass.de")) { 73 errln(String.format("T.nameToUnicode(fA\u00DF.de) info.errors=%s result matches=%b", 74 info.getErrors(), resultString.equals("fass.de"))); 75 } 76 try { 77 nontrans.labelToUnicode(result, result, info); 78 errln("N.labelToUnicode(result, result) did not throw an Exception"); 79 } catch(Exception e) { 80 // as expected (should be an IllegalArgumentException, or an ICU version of it) 81 } 82 } 83 84 @Test TestNotSTD3()85 public void TestNotSTD3() { 86 IDNA not3=IDNA.getUTS46Instance(IDNA.CHECK_BIDI); 87 String input="\u0000A_2+2=4\n.e\u00DFen.net"; 88 StringBuilder result=new StringBuilder(); 89 IDNA.Info info=new IDNA.Info(); 90 if( !not3.nameToUnicode(input, result, info).toString().equals("\u0000a_2+2=4\n.essen.net") || 91 info.hasErrors() 92 ) { 93 errln(String.format("notSTD3.nameToUnicode(non-LDH ASCII) unexpected errors %s string %s", 94 info.getErrors(), prettify(result.toString()))); 95 } 96 // A space (BiDi class WS) is not allowed in a BiDi domain name. 97 input="a z.xn--4db.edu"; 98 not3.nameToASCII(input, result, info); 99 if(!UTF16Plus.equal(result, input) || !info.getErrors().equals(EnumSet.of(IDNA.Error.BIDI))) { 100 errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed"); 101 } 102 // Characters that are canonically equivalent to sequences with non-LDH ASCII. 103 input="a\u2260b\u226Ec\u226Fd"; 104 not3.nameToUnicode(input, result, info); 105 if(!UTF16Plus.equal(result, input) || info.hasErrors()) { 106 errln(String.format("notSTD3.nameToUnicode(equiv to non-LDH ASCII) unexpected errors %s string %s", 107 info.getErrors().toString(), prettify(result.toString()))); 108 } 109 } 110 111 private static final Map<String, IDNA.Error> errorNamesToErrors; 112 static { 113 errorNamesToErrors=new TreeMap<String, IDNA.Error>(); 114 errorNamesToErrors.put("UIDNA_ERROR_EMPTY_LABEL", IDNA.Error.EMPTY_LABEL); 115 errorNamesToErrors.put("UIDNA_ERROR_LABEL_TOO_LONG", IDNA.Error.LABEL_TOO_LONG); 116 errorNamesToErrors.put("UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", IDNA.Error.DOMAIN_NAME_TOO_LONG); 117 errorNamesToErrors.put("UIDNA_ERROR_LEADING_HYPHEN", IDNA.Error.LEADING_HYPHEN); 118 errorNamesToErrors.put("UIDNA_ERROR_TRAILING_HYPHEN", IDNA.Error.TRAILING_HYPHEN); 119 errorNamesToErrors.put("UIDNA_ERROR_HYPHEN_3_4", IDNA.Error.HYPHEN_3_4); 120 errorNamesToErrors.put("UIDNA_ERROR_LEADING_COMBINING_MARK", IDNA.Error.LEADING_COMBINING_MARK); 121 errorNamesToErrors.put("UIDNA_ERROR_DISALLOWED", IDNA.Error.DISALLOWED); 122 errorNamesToErrors.put("UIDNA_ERROR_PUNYCODE", IDNA.Error.PUNYCODE); 123 errorNamesToErrors.put("UIDNA_ERROR_LABEL_HAS_DOT", IDNA.Error.LABEL_HAS_DOT); 124 errorNamesToErrors.put("UIDNA_ERROR_INVALID_ACE_LABEL", IDNA.Error.INVALID_ACE_LABEL); 125 errorNamesToErrors.put("UIDNA_ERROR_BIDI", IDNA.Error.BIDI); 126 errorNamesToErrors.put("UIDNA_ERROR_CONTEXTJ", IDNA.Error.CONTEXTJ); 127 errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_PUNCTUATION", IDNA.Error.CONTEXTO_PUNCTUATION); 128 errorNamesToErrors.put("UIDNA_ERROR_CONTEXTO_DIGITS", IDNA.Error.CONTEXTO_DIGITS); 129 } 130 131 private static final class TestCase { TestCase()132 private TestCase() { 133 errors=EnumSet.noneOf(IDNA.Error.class); 134 } set(String[] data)135 private void set(String[] data) { 136 s=data[0]; 137 o=data[1]; 138 u=data[2]; 139 errors.clear(); 140 if(data[3].length()!=0) { 141 for(String e: data[3].split("\\|")) { 142 errors.add(errorNamesToErrors.get(e)); 143 } 144 } 145 } 146 // Input string and options string (Nontransitional/Transitional/Both). 147 private String s, o; 148 // Expected Unicode result string. 149 private String u; 150 private EnumSet<IDNA.Error> errors; 151 }; 152 153 private static final String testCases[][]={ 154 { "www.eXample.cOm", "B", // all ASCII 155 "www.example.com", "" }, 156 { "B\u00FCcher.de", "B", // u-umlaut 157 "b\u00FCcher.de", "" }, 158 { "\u00D6BB", "B", // O-umlaut 159 "\u00F6bb", "" }, 160 { "fa\u00DF.de", "N", // sharp s 161 "fa\u00DF.de", "" }, 162 { "fa\u00DF.de", "T", // sharp s 163 "fass.de", "" }, 164 { "XN--fA-hia.dE", "B", // sharp s in Punycode 165 "fa\u00DF.de", "" }, 166 { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "N", // Greek with final sigma 167 "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "" }, 168 { "\u03B2\u03CC\u03BB\u03BF\u03C2.com", "T", // Greek with final sigma 169 "\u03B2\u03CC\u03BB\u03BF\u03C3.com", "" }, 170 { "xn--nxasmm1c", "B", // Greek with final sigma in Punycode 171 "\u03B2\u03CC\u03BB\u03BF\u03C2", "" }, 172 { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "N", // "Sri" in "Sri Lanka" has a ZWJ 173 "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" }, 174 { "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "T", // "Sri" in "Sri Lanka" has a ZWJ 175 "www.\u0DC1\u0DCA\u0DBB\u0DD3.com", "" }, 176 { "www.xn--10cl1a0b660p.com", "B", // "Sri" in Punycode 177 "www.\u0DC1\u0DCA\u200D\u0DBB\u0DD3.com", "" }, 178 { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "N", // ZWNJ 179 "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "" }, 180 { "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC", "T", // ZWNJ 181 "\u0646\u0627\u0645\u0647\u0627\u06CC", "" }, 182 { "xn--mgba3gch31f060k.com", "B", // ZWNJ in Punycode 183 "\u0646\u0627\u0645\u0647\u200C\u0627\u06CC.com", "" }, 184 { "a.b\uFF0Ec\u3002d\uFF61", "B", 185 "a.b.c.d.", "" }, 186 { "U\u0308.xn--tda", "B", // U+umlaut.u-umlaut 187 "\u00FC.\u00FC", "" }, 188 { "xn--u-ccb", "B", // u+umlaut in Punycode 189 "xn--u-ccb\uFFFD", "UIDNA_ERROR_INVALID_ACE_LABEL" }, 190 { "a\u2488com", "B", // contains 1-dot 191 "a\uFFFDcom", "UIDNA_ERROR_DISALLOWED" }, 192 { "xn--a-ecp.ru", "B", // contains 1-dot in Punycode 193 "xn--a-ecp\uFFFD.ru", "UIDNA_ERROR_INVALID_ACE_LABEL" }, 194 { "xn--0.pt", "B", // invalid Punycode 195 "xn--0\uFFFD.pt", "UIDNA_ERROR_PUNYCODE" }, 196 { "xn--a.pt", "B", // U+0080 197 "xn--a\uFFFD.pt", "UIDNA_ERROR_INVALID_ACE_LABEL" }, 198 { "xn--a-\u00C4.pt", "B", // invalid Punycode 199 "xn--a-\u00E4.pt", "UIDNA_ERROR_PUNYCODE" }, 200 { "\u65E5\u672C\u8A9E\u3002\uFF2A\uFF30", "B", // Japanese with fullwidth ".jp" 201 "\u65E5\u672C\u8A9E.jp", "" }, 202 { "\u2615", "B", "\u2615", "" }, // Unicode 4.0 HOT BEVERAGE 203 // some characters are disallowed because they are canonically equivalent 204 // to sequences with non-LDH ASCII 205 { "a\u2260b\u226Ec\u226Fd", "B", 206 "a\uFFFDb\uFFFDc\uFFFDd", "UIDNA_ERROR_DISALLOWED" }, 207 // many deviation characters, test the special mapping code 208 { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+ 209 "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+ 210 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+ 211 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+ 212 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "N", 213 "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+ 214 "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+ 215 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+ 216 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+ 217 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", 218 "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_CONTEXTJ" }, 219 { "1.a\u00DF\u200C\u200Db\u200C\u200Dc\u00DF\u00DF\u00DF\u00DFd"+ 220 "\u03C2\u03C3\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFe"+ 221 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFx"+ 222 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DFy"+ 223 "\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u00DF\u0302\u00DFz", "T", 224 "1.assbcssssssssd"+ 225 "\u03C3\u03C3sssssssssssssssse"+ 226 "ssssssssssssssssssssx"+ 227 "ssssssssssssssssssssy"+ 228 "sssssssssssssss\u015Dssz", "UIDNA_ERROR_LABEL_TOO_LONG" }, 229 // "xn--bss" with deviation characters 230 { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "N", 231 "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "UIDNA_ERROR_CONTEXTJ" }, 232 { "\u200Cx\u200Dn\u200C-\u200D-b\u00DF", "T", 233 "\u5919", "" }, 234 // "xn--bssffl" written as: 235 // 02E3 MODIFIER LETTER SMALL X 236 // 034F COMBINING GRAPHEME JOINER (ignored) 237 // 2115 DOUBLE-STRUCK CAPITAL N 238 // 200B ZERO WIDTH SPACE (ignored) 239 // FE63 SMALL HYPHEN-MINUS 240 // 00AD SOFT HYPHEN (ignored) 241 // FF0D FULLWIDTH HYPHEN-MINUS 242 // 180C MONGOLIAN FREE VARIATION SELECTOR TWO (ignored) 243 // 212C SCRIPT CAPITAL B 244 // FE00 VARIATION SELECTOR-1 (ignored) 245 // 017F LATIN SMALL LETTER LONG S 246 // 2064 INVISIBLE PLUS (ignored) 247 // 1D530 MATHEMATICAL FRAKTUR SMALL S 248 // E01EF VARIATION SELECTOR-256 (ignored) 249 // FB04 LATIN SMALL LIGATURE FFL 250 { "\u02E3\u034F\u2115\u200B\uFE63\u00AD\uFF0D\u180C"+ 251 "\u212C\uFE00\u017F\u2064"+"\uD835\uDD30\uDB40\uDDEF"/*1D530 E01EF*/+"\uFB04", "B", 252 "\u5921\u591E\u591C\u5919", "" }, 253 { "123456789012345678901234567890123456789012345678901234567890123."+ 254 "123456789012345678901234567890123456789012345678901234567890123."+ 255 "123456789012345678901234567890123456789012345678901234567890123."+ 256 "1234567890123456789012345678901234567890123456789012345678901", "B", 257 "123456789012345678901234567890123456789012345678901234567890123."+ 258 "123456789012345678901234567890123456789012345678901234567890123."+ 259 "123456789012345678901234567890123456789012345678901234567890123."+ 260 "1234567890123456789012345678901234567890123456789012345678901", "" }, 261 { "123456789012345678901234567890123456789012345678901234567890123."+ 262 "123456789012345678901234567890123456789012345678901234567890123."+ 263 "123456789012345678901234567890123456789012345678901234567890123."+ 264 "1234567890123456789012345678901234567890123456789012345678901.", "B", 265 "123456789012345678901234567890123456789012345678901234567890123."+ 266 "123456789012345678901234567890123456789012345678901234567890123."+ 267 "123456789012345678901234567890123456789012345678901234567890123."+ 268 "1234567890123456789012345678901234567890123456789012345678901.", "" }, 269 // Domain name >256 characters, forces slow path in UTF-8 processing. 270 { "123456789012345678901234567890123456789012345678901234567890123."+ 271 "123456789012345678901234567890123456789012345678901234567890123."+ 272 "123456789012345678901234567890123456789012345678901234567890123."+ 273 "123456789012345678901234567890123456789012345678901234567890123."+ 274 "12345678901234567890123456789012345678901234567890123456789012", "B", 275 "123456789012345678901234567890123456789012345678901234567890123."+ 276 "123456789012345678901234567890123456789012345678901234567890123."+ 277 "123456789012345678901234567890123456789012345678901234567890123."+ 278 "123456789012345678901234567890123456789012345678901234567890123."+ 279 "12345678901234567890123456789012345678901234567890123456789012", 280 "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, 281 { "123456789012345678901234567890123456789012345678901234567890123."+ 282 "123456789012345678901234567890123456789012345678901234567890123."+ 283 "123456789012345678901234567890123456789012345678901234567890123."+ 284 "123456789012345678901234567890123456789012345678901234567890123."+ 285 "1234567890123456789012345678901234567890123456789\u05D0", "B", 286 "123456789012345678901234567890123456789012345678901234567890123."+ 287 "123456789012345678901234567890123456789012345678901234567890123."+ 288 "123456789012345678901234567890123456789012345678901234567890123."+ 289 "123456789012345678901234567890123456789012345678901234567890123."+ 290 "1234567890123456789012345678901234567890123456789\u05D0", 291 "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG|UIDNA_ERROR_BIDI" }, 292 { "123456789012345678901234567890123456789012345678901234567890123."+ 293 "1234567890123456789012345678901234567890123456789012345678901234."+ 294 "123456789012345678901234567890123456789012345678901234567890123."+ 295 "123456789012345678901234567890123456789012345678901234567890", "B", 296 "123456789012345678901234567890123456789012345678901234567890123."+ 297 "1234567890123456789012345678901234567890123456789012345678901234."+ 298 "123456789012345678901234567890123456789012345678901234567890123."+ 299 "123456789012345678901234567890123456789012345678901234567890", 300 "UIDNA_ERROR_LABEL_TOO_LONG" }, 301 { "123456789012345678901234567890123456789012345678901234567890123."+ 302 "1234567890123456789012345678901234567890123456789012345678901234."+ 303 "123456789012345678901234567890123456789012345678901234567890123."+ 304 "123456789012345678901234567890123456789012345678901234567890.", "B", 305 "123456789012345678901234567890123456789012345678901234567890123."+ 306 "1234567890123456789012345678901234567890123456789012345678901234."+ 307 "123456789012345678901234567890123456789012345678901234567890123."+ 308 "123456789012345678901234567890123456789012345678901234567890.", 309 "UIDNA_ERROR_LABEL_TOO_LONG" }, 310 { "123456789012345678901234567890123456789012345678901234567890123."+ 311 "1234567890123456789012345678901234567890123456789012345678901234."+ 312 "123456789012345678901234567890123456789012345678901234567890123."+ 313 "1234567890123456789012345678901234567890123456789012345678901", "B", 314 "123456789012345678901234567890123456789012345678901234567890123."+ 315 "1234567890123456789012345678901234567890123456789012345678901234."+ 316 "123456789012345678901234567890123456789012345678901234567890123."+ 317 "1234567890123456789012345678901234567890123456789012345678901", 318 "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, 319 // label length 63: xn--1234567890123456789012345678901234567890123456789012345-9te 320 { "\u00E41234567890123456789012345678901234567890123456789012345", "B", 321 "\u00E41234567890123456789012345678901234567890123456789012345", "" }, 322 { "1234567890\u00E41234567890123456789012345678901234567890123456", "B", 323 "1234567890\u00E41234567890123456789012345678901234567890123456", "UIDNA_ERROR_LABEL_TOO_LONG" }, 324 { "123456789012345678901234567890123456789012345678901234567890123."+ 325 "1234567890\u00E4123456789012345678901234567890123456789012345."+ 326 "123456789012345678901234567890123456789012345678901234567890123."+ 327 "1234567890123456789012345678901234567890123456789012345678901", "B", 328 "123456789012345678901234567890123456789012345678901234567890123."+ 329 "1234567890\u00E4123456789012345678901234567890123456789012345."+ 330 "123456789012345678901234567890123456789012345678901234567890123."+ 331 "1234567890123456789012345678901234567890123456789012345678901", "" }, 332 { "123456789012345678901234567890123456789012345678901234567890123."+ 333 "1234567890\u00E4123456789012345678901234567890123456789012345."+ 334 "123456789012345678901234567890123456789012345678901234567890123."+ 335 "1234567890123456789012345678901234567890123456789012345678901.", "B", 336 "123456789012345678901234567890123456789012345678901234567890123."+ 337 "1234567890\u00E4123456789012345678901234567890123456789012345."+ 338 "123456789012345678901234567890123456789012345678901234567890123."+ 339 "1234567890123456789012345678901234567890123456789012345678901.", "" }, 340 { "123456789012345678901234567890123456789012345678901234567890123."+ 341 "1234567890\u00E4123456789012345678901234567890123456789012345."+ 342 "123456789012345678901234567890123456789012345678901234567890123."+ 343 "12345678901234567890123456789012345678901234567890123456789012", "B", 344 "123456789012345678901234567890123456789012345678901234567890123."+ 345 "1234567890\u00E4123456789012345678901234567890123456789012345."+ 346 "123456789012345678901234567890123456789012345678901234567890123."+ 347 "12345678901234567890123456789012345678901234567890123456789012", 348 "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, 349 { "123456789012345678901234567890123456789012345678901234567890123."+ 350 "1234567890\u00E41234567890123456789012345678901234567890123456."+ 351 "123456789012345678901234567890123456789012345678901234567890123."+ 352 "123456789012345678901234567890123456789012345678901234567890", "B", 353 "123456789012345678901234567890123456789012345678901234567890123."+ 354 "1234567890\u00E41234567890123456789012345678901234567890123456."+ 355 "123456789012345678901234567890123456789012345678901234567890123."+ 356 "123456789012345678901234567890123456789012345678901234567890", 357 "UIDNA_ERROR_LABEL_TOO_LONG" }, 358 { "123456789012345678901234567890123456789012345678901234567890123."+ 359 "1234567890\u00E41234567890123456789012345678901234567890123456."+ 360 "123456789012345678901234567890123456789012345678901234567890123."+ 361 "123456789012345678901234567890123456789012345678901234567890.", "B", 362 "123456789012345678901234567890123456789012345678901234567890123."+ 363 "1234567890\u00E41234567890123456789012345678901234567890123456."+ 364 "123456789012345678901234567890123456789012345678901234567890123."+ 365 "123456789012345678901234567890123456789012345678901234567890.", 366 "UIDNA_ERROR_LABEL_TOO_LONG" }, 367 { "123456789012345678901234567890123456789012345678901234567890123."+ 368 "1234567890\u00E41234567890123456789012345678901234567890123456."+ 369 "123456789012345678901234567890123456789012345678901234567890123."+ 370 "1234567890123456789012345678901234567890123456789012345678901", "B", 371 "123456789012345678901234567890123456789012345678901234567890123."+ 372 "1234567890\u00E41234567890123456789012345678901234567890123456."+ 373 "123456789012345678901234567890123456789012345678901234567890123."+ 374 "1234567890123456789012345678901234567890123456789012345678901", 375 "UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG" }, 376 // hyphen errors and empty-label errors 377 // Ticket #10883: ToUnicode also checks for empty labels. 378 { ".", "B", ".", "UIDNA_ERROR_EMPTY_LABEL" }, 379 { "\uFF0E", "B", ".", "UIDNA_ERROR_EMPTY_LABEL" }, 380 // "xn---q----jra"=="-q--a-umlaut-" 381 { "a.b..-q--a-.e", "B", "a.b..-q--a-.e", 382 "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+ 383 "UIDNA_ERROR_HYPHEN_3_4" }, 384 { "a.b..-q--\u00E4-.e", "B", "a.b..-q--\u00E4-.e", 385 "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+ 386 "UIDNA_ERROR_HYPHEN_3_4" }, 387 { "a.b..xn---q----jra.e", "B", "a.b..-q--\u00E4-.e", 388 "UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|"+ 389 "UIDNA_ERROR_HYPHEN_3_4" }, 390 { "a..c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" }, 391 { "a.xn--.c", "B", "a..c", "UIDNA_ERROR_EMPTY_LABEL" }, 392 { "a.-b.", "B", "a.-b.", "UIDNA_ERROR_LEADING_HYPHEN" }, 393 { "a.b-.c", "B", "a.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" }, 394 { "a.-.c", "B", "a.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" }, 395 { "a.bc--de.f", "B", "a.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" }, 396 { "\u00E4.\u00AD.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" }, 397 { "\u00E4.xn--.c", "B", "\u00E4..c", "UIDNA_ERROR_EMPTY_LABEL" }, 398 { "\u00E4.-b.", "B", "\u00E4.-b.", "UIDNA_ERROR_LEADING_HYPHEN" }, 399 { "\u00E4.b-.c", "B", "\u00E4.b-.c", "UIDNA_ERROR_TRAILING_HYPHEN" }, 400 { "\u00E4.-.c", "B", "\u00E4.-.c", "UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN" }, 401 { "\u00E4.bc--de.f", "B", "\u00E4.bc--de.f", "UIDNA_ERROR_HYPHEN_3_4" }, 402 { "a.b.\u0308c.d", "B", "a.b.\uFFFDc.d", "UIDNA_ERROR_LEADING_COMBINING_MARK" }, 403 { "a.b.xn--c-bcb.d", "B", 404 "a.b.xn--c-bcb\uFFFD.d", "UIDNA_ERROR_LEADING_COMBINING_MARK|UIDNA_ERROR_INVALID_ACE_LABEL" }, 405 // BiDi 406 { "A0", "B", "a0", "" }, 407 { "0A", "B", "0a", "" }, // all-LTR is ok to start with a digit (EN) 408 { "0A.\u05D0", "B", // ASCII label does not start with L/R/AL 409 "0a.\u05D0", "UIDNA_ERROR_BIDI" }, 410 { "c.xn--0-eha.xn--4db", "B", // 2nd label does not start with L/R/AL 411 "c.0\u00FC.\u05D0", "UIDNA_ERROR_BIDI" }, 412 { "b-.\u05D0", "B", // label does not end with L/EN 413 "b-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" }, 414 { "d.xn----dha.xn--4db", "B", // 2nd label does not end with L/EN 415 "d.\u00FC-.\u05D0", "UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI" }, 416 { "a\u05D0", "B", "a\u05D0", "UIDNA_ERROR_BIDI" }, // first dir != last dir 417 { "\u05D0\u05C7", "B", "\u05D0\u05C7", "" }, 418 { "\u05D09\u05C7", "B", "\u05D09\u05C7", "" }, 419 { "\u05D0a\u05C7", "B", "\u05D0a\u05C7", "UIDNA_ERROR_BIDI" }, // first dir != last dir 420 { "\u05D0\u05EA", "B", "\u05D0\u05EA", "" }, 421 { "\u05D0\u05F3\u05EA", "B", "\u05D0\u05F3\u05EA", "" }, 422 { "a\u05D0Tz", "B", "a\u05D0tz", "UIDNA_ERROR_BIDI" }, // mixed dir 423 { "\u05D0T\u05EA", "B", "\u05D0t\u05EA", "UIDNA_ERROR_BIDI" }, // mixed dir 424 { "\u05D07\u05EA", "B", "\u05D07\u05EA", "" }, 425 { "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" }, // Arabic 7 in the middle 426 { "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" }, // AN digit in LTR 427 { "a7\u0667", "B", "a7\u0667", "UIDNA_ERROR_BIDI" }, // AN digit in LTR 428 { "\u05D07\u0667\u05EA", "B", // mixed EN/AN digits in RTL 429 "\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" }, 430 { "\u05D07\u0667", "B", // mixed EN/AN digits in RTL 431 "\u05D07\u0667", "UIDNA_ERROR_BIDI" }, 432 // ZWJ 433 { "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" }, // Virama+ZWJ 434 { "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama 435 { "\u200D", "N", "\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama 436 // ZWNJ 437 { "\u0BB9\u0BCD\u200C", "N", "\u0BB9\u0BCD\u200C", "" }, // Virama+ZWNJ 438 { "\u0BB9\u200C", "N", "\u0BB9\u200C", "UIDNA_ERROR_CONTEXTJ" }, // no Virama 439 { "\u200C", "N", "\u200C", "UIDNA_ERROR_CONTEXTJ" }, // no Virama 440 { "\u0644\u0670\u200C\u06ED\u06EF", "N", // Joining types D T ZWNJ T R 441 "\u0644\u0670\u200C\u06ED\u06EF", "" }, 442 { "\u0644\u0670\u200C\u06EF", "N", // D T ZWNJ R 443 "\u0644\u0670\u200C\u06EF", "" }, 444 { "\u0644\u200C\u06ED\u06EF", "N", // D ZWNJ T R 445 "\u0644\u200C\u06ED\u06EF", "" }, 446 { "\u0644\u200C\u06EF", "N", // D ZWNJ R 447 "\u0644\u200C\u06EF", "" }, 448 { "\u0644\u0670\u200C\u06ED", "N", // D T ZWNJ T 449 "\u0644\u0670\u200C\u06ED", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" }, 450 { "\u06EF\u200C\u06EF", "N", // R ZWNJ R 451 "\u06EF\u200C\u06EF", "UIDNA_ERROR_CONTEXTJ" }, 452 { "\u0644\u200C", "N", // D ZWNJ 453 "\u0644\u200C", "UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ" }, 454 { "\u0660\u0661", "B", // Arabic-Indic Digits alone 455 "\u0660\u0661", "UIDNA_ERROR_BIDI" }, 456 { "\u06F0\u06F1", "B", // Extended Arabic-Indic Digits alone 457 "\u06F0\u06F1", "" }, 458 { "\u0660\u06F1", "B", // Mixed Arabic-Indic Digits 459 "\u0660\u06F1", "UIDNA_ERROR_CONTEXTO_DIGITS|UIDNA_ERROR_BIDI" }, 460 // All of the CONTEXTO "Would otherwise have been DISALLOWED" characters 461 // in their correct contexts, 462 // then each in incorrect context. 463 { "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "B", 464 "l\u00B7l\u4E00\u0375\u03B1\u05D0\u05F3\u05F4\u30FB", "UIDNA_ERROR_BIDI" }, 465 { "l\u00B7", "B", 466 "l\u00B7", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, 467 { "\u00B7l", "B", 468 "\u00B7l", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, 469 { "\u0375", "B", 470 "\u0375", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, 471 { "\u03B1\u05F3", "B", 472 "\u03B1\u05F3", "UIDNA_ERROR_CONTEXTO_PUNCTUATION|UIDNA_ERROR_BIDI" }, 473 { "\u05F4", "B", 474 "\u05F4", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, 475 { "l\u30FB", "B", 476 "l\u30FB", "UIDNA_ERROR_CONTEXTO_PUNCTUATION" }, 477 // { "", "B", 478 // "", "" }, 479 }; 480 481 @Test TestSomeCases()482 public void TestSomeCases() { 483 StringBuilder aT=new StringBuilder(), uT=new StringBuilder(); 484 StringBuilder aN=new StringBuilder(), uN=new StringBuilder(); 485 IDNA.Info aTInfo=new IDNA.Info(), uTInfo=new IDNA.Info(); 486 IDNA.Info aNInfo=new IDNA.Info(), uNInfo=new IDNA.Info(); 487 488 StringBuilder aTuN=new StringBuilder(), uTaN=new StringBuilder(); 489 StringBuilder aNuN=new StringBuilder(), uNaN=new StringBuilder(); 490 IDNA.Info aTuNInfo=new IDNA.Info(), uTaNInfo=new IDNA.Info(); 491 IDNA.Info aNuNInfo=new IDNA.Info(), uNaNInfo=new IDNA.Info(); 492 493 StringBuilder aTL=new StringBuilder(), uTL=new StringBuilder(); 494 StringBuilder aNL=new StringBuilder(), uNL=new StringBuilder(); 495 IDNA.Info aTLInfo=new IDNA.Info(), uTLInfo=new IDNA.Info(); 496 IDNA.Info aNLInfo=new IDNA.Info(), uNLInfo=new IDNA.Info(); 497 498 EnumSet<IDNA.Error> uniErrors=EnumSet.noneOf(IDNA.Error.class); 499 500 TestCase testCase=new TestCase(); 501 int i; 502 for(i=0; i<testCases.length; ++i) { 503 testCase.set(testCases[i]); 504 String input=testCase.s; 505 String expected=testCase.u; 506 // ToASCII/ToUnicode, transitional/nontransitional 507 try { 508 trans.nameToASCII(input, aT, aTInfo); 509 trans.nameToUnicode(input, uT, uTInfo); 510 nontrans.nameToASCII(input, aN, aNInfo); 511 nontrans.nameToUnicode(input, uN, uNInfo); 512 } catch(Exception e) { 513 errln(String.format("first-level processing [%d/%s] %s - %s", 514 i, testCase.o, testCase.s, e)); 515 continue; 516 } 517 // ToUnicode does not set length-overflow errors. 518 uniErrors.clear(); 519 uniErrors.addAll(testCase.errors); 520 uniErrors.removeAll(lengthOverflowErrors); 521 char mode=testCase.o.charAt(0); 522 if(mode=='B' || mode=='N') { 523 if(!sameErrors(uNInfo, uniErrors)) { 524 errln(String.format("N.nameToUnicode([%d] %s) unexpected errors %s", 525 i, testCase.s, uNInfo.getErrors())); 526 continue; 527 } 528 if(!UTF16Plus.equal(uN, expected)) { 529 errln(String.format("N.nameToUnicode([%d] %s) unexpected string %s", 530 i, testCase.s, prettify(uN.toString()))); 531 continue; 532 } 533 if(!sameErrors(aNInfo, testCase.errors)) { 534 errln(String.format("N.nameToASCII([%d] %s) unexpected errors %s", 535 i, testCase.s, aNInfo.getErrors())); 536 continue; 537 } 538 } 539 if(mode=='B' || mode=='T') { 540 if(!sameErrors(uTInfo, uniErrors)) { 541 errln(String.format("T.nameToUnicode([%d] %s) unexpected errors %s", 542 i, testCase.s, uTInfo.getErrors())); 543 continue; 544 } 545 if(!UTF16Plus.equal(uT, expected)) { 546 errln(String.format("T.nameToUnicode([%d] %s) unexpected string %s", 547 i, testCase.s, prettify(uT.toString()))); 548 continue; 549 } 550 if(!sameErrors(aTInfo, testCase.errors)) { 551 errln(String.format("T.nameToASCII([%d] %s) unexpected errors %s", 552 i, testCase.s, aTInfo.getErrors())); 553 continue; 554 } 555 } 556 // ToASCII is all-ASCII if no severe errors 557 if(!hasCertainErrors(aNInfo, severeErrors) && !isASCII(aN)) { 558 errln(String.format("N.nameToASCII([%d] %s) (errors %s) result is not ASCII %s", 559 i, testCase.s, aNInfo.getErrors(), prettify(aN.toString()))); 560 continue; 561 } 562 if(!hasCertainErrors(aTInfo, severeErrors) && !isASCII(aT)) { 563 errln(String.format("T.nameToASCII([%d] %s) (errors %s) result is not ASCII %s", 564 i, testCase.s, aTInfo.getErrors(), prettify(aT.toString()))); 565 continue; 566 } 567 if(isVerbose()) { 568 char m= mode=='B' ? mode : 'N'; 569 logln(String.format("%c.nameToASCII([%d] %s) (errors %s) result string: %s", 570 m, i, testCase.s, aNInfo.getErrors(), prettify(aN.toString()))); 571 if(mode!='B') { 572 logln(String.format("T.nameToASCII([%d] %s) (errors %s) result string: %s", 573 i, testCase.s, aTInfo.getErrors(), prettify(aT.toString()))); 574 } 575 } 576 // second-level processing 577 try { 578 nontrans.nameToUnicode(aT, aTuN, aTuNInfo); 579 nontrans.nameToASCII(uT, uTaN, uTaNInfo); 580 nontrans.nameToUnicode(aN, aNuN, aNuNInfo); 581 nontrans.nameToASCII(uN, uNaN, uNaNInfo); 582 } catch(Exception e) { 583 errln(String.format("second-level processing [%d/%s] %s - %s", 584 i, testCase.o, testCase.s, e)); 585 continue; 586 } 587 if(!UTF16Plus.equal(aN, uNaN)) { 588 errln(String.format("N.nameToASCII([%d] %s)!=N.nameToUnicode().N.nameToASCII() "+ 589 "(errors %s) %s vs. %s", 590 i, testCase.s, aNInfo.getErrors(), 591 prettify(aN.toString()), prettify(uNaN.toString()))); 592 continue; 593 } 594 if(!UTF16Plus.equal(aT, uTaN)) { 595 errln(String.format("T.nameToASCII([%d] %s)!=T.nameToUnicode().N.nameToASCII() "+ 596 "(errors %s) %s vs. %s", 597 i, testCase.s, aNInfo.getErrors(), 598 prettify(aT.toString()), prettify(uTaN.toString()))); 599 continue; 600 } 601 if(!UTF16Plus.equal(uN, aNuN)) { 602 errln(String.format("N.nameToUnicode([%d] %s)!=N.nameToASCII().N.nameToUnicode() "+ 603 "(errors %s) %s vs. %s", 604 i, testCase.s, uNInfo.getErrors(), prettify(uN.toString()), prettify(aNuN.toString()))); 605 continue; 606 } 607 if(!UTF16Plus.equal(uT, aTuN)) { 608 errln(String.format("T.nameToUnicode([%d] %s)!=T.nameToASCII().N.nameToUnicode() "+ 609 "(errors %s) %s vs. %s", 610 i, testCase.s, uNInfo.getErrors(), 611 prettify(uT.toString()), prettify(aTuN.toString()))); 612 continue; 613 } 614 // labelToUnicode 615 try { 616 trans.labelToASCII(input, aTL, aTLInfo); 617 trans.labelToUnicode(input, uTL, uTLInfo); 618 nontrans.labelToASCII(input, aNL, aNLInfo); 619 nontrans.labelToUnicode(input, uNL, uNLInfo); 620 } catch(Exception e) { 621 errln(String.format("labelToXYZ processing [%d/%s] %s - %s", 622 i, testCase.o, testCase.s, e)); 623 continue; 624 } 625 if(aN.indexOf(".")<0) { 626 if(!UTF16Plus.equal(aN, aNL) || !sameErrors(aNInfo, aNLInfo)) { 627 errln(String.format("N.nameToASCII([%d] %s)!=N.labelToASCII() "+ 628 "(errors %s vs %s) %s vs. %s", 629 i, testCase.s, aNInfo.getErrors().toString(), aNLInfo.getErrors().toString(), 630 prettify(aN.toString()), prettify(aNL.toString()))); 631 continue; 632 } 633 } else { 634 if(!hasError(aNLInfo, IDNA.Error.LABEL_HAS_DOT)) { 635 errln(String.format("N.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT", 636 i, testCase.s, aNLInfo.getErrors())); 637 continue; 638 } 639 } 640 if(aT.indexOf(".")<0) { 641 if(!UTF16Plus.equal(aT, aTL) || !sameErrors(aTInfo, aTLInfo)) { 642 errln(String.format("T.nameToASCII([%d] %s)!=T.labelToASCII() "+ 643 "(errors %s vs %s) %s vs. %s", 644 i, testCase.s, aTInfo.getErrors().toString(), aTLInfo.getErrors().toString(), 645 prettify(aT.toString()), prettify(aTL.toString()))); 646 continue; 647 } 648 } else { 649 if(!hasError(aTLInfo, IDNA.Error.LABEL_HAS_DOT)) { 650 errln(String.format("T.labelToASCII([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT", 651 i, testCase.s, aTLInfo.getErrors())); 652 continue; 653 } 654 } 655 if(uN.indexOf(".")<0) { 656 if(!UTF16Plus.equal(uN, uNL) || !sameErrors(uNInfo, uNLInfo)) { 657 errln(String.format("N.nameToUnicode([%d] %s)!=N.labelToUnicode() "+ 658 "(errors %s vs %s) %s vs. %s", 659 i, testCase.s, uNInfo.getErrors().toString(), uNLInfo.getErrors().toString(), 660 prettify(uN.toString()), prettify(uNL.toString()))); 661 continue; 662 } 663 } else { 664 if(!hasError(uNLInfo, IDNA.Error.LABEL_HAS_DOT)) { 665 errln(String.format("N.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT", 666 i, testCase.s, uNLInfo.getErrors())); 667 continue; 668 } 669 } 670 if(uT.indexOf(".")<0) { 671 if(!UTF16Plus.equal(uT, uTL) || !sameErrors(uTInfo, uTLInfo)) { 672 errln(String.format("T.nameToUnicode([%d] %s)!=T.labelToUnicode() "+ 673 "(errors %s vs %s) %s vs. %s", 674 i, testCase.s, uTInfo.getErrors().toString(), uTLInfo.getErrors().toString(), 675 prettify(uT.toString()), prettify(uTL.toString()))); 676 continue; 677 } 678 } else { 679 if(!hasError(uTLInfo, IDNA.Error.LABEL_HAS_DOT)) { 680 errln(String.format("T.labelToUnicode([%d] %s) errors %s missing UIDNA_ERROR_LABEL_HAS_DOT", 681 i, testCase.s, uTLInfo.getErrors())); 682 continue; 683 } 684 } 685 // Differences between transitional and nontransitional processing 686 if(mode=='B') { 687 if( aNInfo.isTransitionalDifferent() || 688 aTInfo.isTransitionalDifferent() || 689 uNInfo.isTransitionalDifferent() || 690 uTInfo.isTransitionalDifferent() || 691 aNLInfo.isTransitionalDifferent() || 692 aTLInfo.isTransitionalDifferent() || 693 uNLInfo.isTransitionalDifferent() || 694 uTLInfo.isTransitionalDifferent() 695 ) { 696 errln(String.format("B.process([%d] %s) isTransitionalDifferent()", i, testCase.s)); 697 continue; 698 } 699 if( !UTF16Plus.equal(aN, aT) || !UTF16Plus.equal(uN, uT) || 700 !UTF16Plus.equal(aNL, aTL) || !UTF16Plus.equal(uNL, uTL) || 701 !sameErrors(aNInfo, aTInfo) || !sameErrors(uNInfo, uTInfo) || 702 !sameErrors(aNLInfo, aTLInfo) || !sameErrors(uNLInfo, uTLInfo) 703 ) { 704 errln(String.format("N.process([%d] %s) vs. T.process() different errors or result strings", 705 i, testCase.s)); 706 continue; 707 } 708 } else { 709 if( !aNInfo.isTransitionalDifferent() || 710 !aTInfo.isTransitionalDifferent() || 711 !uNInfo.isTransitionalDifferent() || 712 !uTInfo.isTransitionalDifferent() || 713 !aNLInfo.isTransitionalDifferent() || 714 !aTLInfo.isTransitionalDifferent() || 715 !uNLInfo.isTransitionalDifferent() || 716 !uTLInfo.isTransitionalDifferent() 717 ) { 718 errln(String.format("%s.process([%d] %s) !isTransitionalDifferent()", 719 testCase.o, i, testCase.s)); 720 continue; 721 } 722 if( UTF16Plus.equal(aN, aT) || UTF16Plus.equal(uN, uT) || 723 UTF16Plus.equal(aNL, aTL) || UTF16Plus.equal(uNL, uTL) 724 ) { 725 errln(String.format("N.process([%d] %s) vs. T.process() same result strings", 726 i, testCase.s)); 727 continue; 728 } 729 } 730 } 731 } 732 checkIdnaTestResult(String line, String type, String expected, CharSequence result, String status, IDNA.Info info)733 private void checkIdnaTestResult(String line, String type, 734 String expected, CharSequence result, String status, IDNA.Info info) { 735 // An error in toUnicode or toASCII is indicated by a value in square brackets, 736 // such as "[B5 B6]". 737 boolean expectedHasErrors = false; 738 if (!status.isEmpty()) { 739 if (status.charAt(0) != '[') { 740 errln(String.format("%s status field does not start with '[': %s\n %s", 741 type, status, line)); 742 } 743 if (!status.equals("[]")) { 744 expectedHasErrors = true; 745 } 746 } 747 if (expectedHasErrors != info.hasErrors()) { 748 errln(String.format( 749 "%s expected errors %s %b != %b = actual has errors: %s\n %s", 750 type, status, expectedHasErrors, info.hasErrors(), info.getErrors(), line)); 751 } 752 if (!expectedHasErrors && !UTF16Plus.equal(expected, result)) { 753 errln(String.format("%s expected != actual\n %s", type, line)); 754 errln(" " + expected); 755 errln(" " + result); 756 } 757 } 758 759 @Test IdnaTest()760 public void IdnaTest() throws IOException { 761 BufferedReader idnaTestFile = TestUtil.getDataReader("unicode/IdnaTestV2.txt", "UTF-8"); 762 Pattern semi = Pattern.compile(";"); 763 try { 764 String line; 765 while ((line = idnaTestFile.readLine()) != null) { 766 // Remove trailing comments and whitespace. 767 int commentStart = line.indexOf('#'); 768 if (commentStart >= 0) { 769 line = line.substring(0, commentStart); 770 } 771 String[] fields = semi.split(line, -1); 772 if (fields.length <= 1) { 773 continue; // Skip empty and comment-only lines. 774 } 775 776 // IdnaTestV2.txt (since Unicode 11) 777 // Column 1: source 778 // The source string to be tested 779 String source = Utility.unescape(fields[0].trim()); 780 781 // Column 2: toUnicode 782 // The result of applying toUnicode to the source, with Transitional_Processing=false. 783 // A blank value means the same as the source value. 784 String toUnicode = Utility.unescape(fields[1].trim()); 785 if (toUnicode.isEmpty()) { 786 toUnicode = source; 787 } 788 789 // Column 3: toUnicodeStatus 790 // A set of status codes, each corresponding to a particular test. 791 // A blank value means []. 792 String toUnicodeStatus = fields[2].trim(); 793 794 // Column 4: toAsciiN 795 // The result of applying toASCII to the source, with Transitional_Processing=false. 796 // A blank value means the same as the toUnicode value. 797 String toAsciiN = Utility.unescape(fields[3].trim()); 798 if (toAsciiN.isEmpty()) { 799 toAsciiN = toUnicode; 800 } 801 802 // Column 5: toAsciiNStatus 803 // A set of status codes, each corresponding to a particular test. 804 // A blank value means the same as the toUnicodeStatus value. 805 String toAsciiNStatus = fields[4].trim(); 806 if (toAsciiNStatus.isEmpty()) { 807 toAsciiNStatus = toUnicodeStatus; 808 } 809 810 // Column 6: toAsciiT 811 // The result of applying toASCII to the source, with Transitional_Processing=true. 812 // A blank value means the same as the toAsciiN value. 813 String toAsciiT = Utility.unescape(fields[5].trim()); 814 if (toAsciiT.isEmpty()) { 815 toAsciiT = toAsciiN; 816 } 817 818 // Column 7: toAsciiTStatus 819 // A set of status codes, each corresponding to a particular test. 820 // A blank value means the same as the toAsciiNStatus value. 821 String toAsciiTStatus = fields[6].trim(); 822 if (toAsciiTStatus.isEmpty()) { 823 toAsciiTStatus = toAsciiNStatus; 824 } 825 826 // ToASCII/ToUnicode, transitional/nontransitional 827 StringBuilder uN, aN, aT; 828 IDNA.Info uNInfo, aNInfo, aTInfo; 829 nontrans.nameToUnicode(source, uN = new StringBuilder(), uNInfo = new IDNA.Info()); 830 checkIdnaTestResult(line, "toUnicodeNontrans", toUnicode, uN, toUnicodeStatus, uNInfo); 831 nontrans.nameToASCII(source, aN = new StringBuilder(), aNInfo = new IDNA.Info()); 832 checkIdnaTestResult(line, "toASCIINontrans", toAsciiN, aN, toAsciiNStatus, aNInfo); 833 trans.nameToASCII(source, aT = new StringBuilder(), aTInfo = new IDNA.Info()); 834 checkIdnaTestResult(line, "toASCIITrans", toAsciiT, aT, toAsciiTStatus, aTInfo); 835 } 836 } finally { 837 idnaTestFile.close(); 838 } 839 } 840 841 private final IDNA trans, nontrans; 842 843 private static final EnumSet<IDNA.Error> severeErrors=EnumSet.of( 844 IDNA.Error.LEADING_COMBINING_MARK, 845 IDNA.Error.DISALLOWED, 846 IDNA.Error.PUNYCODE, 847 IDNA.Error.LABEL_HAS_DOT, 848 IDNA.Error.INVALID_ACE_LABEL); 849 private static final EnumSet<IDNA.Error> lengthOverflowErrors=EnumSet.of( 850 IDNA.Error.LABEL_TOO_LONG, 851 IDNA.Error.DOMAIN_NAME_TOO_LONG); 852 hasError(IDNA.Info info, IDNA.Error error)853 private boolean hasError(IDNA.Info info, IDNA.Error error) { 854 return info.getErrors().contains(error); 855 } 856 // assumes that certainErrors is not empty hasCertainErrors(Set<IDNA.Error> errors, Set<IDNA.Error> certainErrors)857 private boolean hasCertainErrors(Set<IDNA.Error> errors, Set<IDNA.Error> certainErrors) { 858 return !errors.isEmpty() && !Collections.disjoint(errors, certainErrors); 859 } hasCertainErrors(IDNA.Info info, Set<IDNA.Error> certainErrors)860 private boolean hasCertainErrors(IDNA.Info info, Set<IDNA.Error> certainErrors) { 861 return hasCertainErrors(info.getErrors(), certainErrors); 862 } sameErrors(Set<IDNA.Error> a, Set<IDNA.Error> b)863 private boolean sameErrors(Set<IDNA.Error> a, Set<IDNA.Error> b) { 864 return a.equals(b); 865 } sameErrors(IDNA.Info a, IDNA.Info b)866 private boolean sameErrors(IDNA.Info a, IDNA.Info b) { 867 return sameErrors(a.getErrors(), b.getErrors()); 868 } sameErrors(IDNA.Info a, Set<IDNA.Error> b)869 private boolean sameErrors(IDNA.Info a, Set<IDNA.Error> b) { 870 return sameErrors(a.getErrors(), b); 871 } 872 873 private static boolean isASCII(CharSequence str)874 isASCII(CharSequence str) { 875 int length=str.length(); 876 for(int i=0; i<length; ++i) { 877 if(str.charAt(i)>=0x80) { 878 return false; 879 } 880 } 881 return true; 882 } 883 } 884