• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  uts46test.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010may05
14 *   created by: Markus W. Scherer
15 */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_IDNA
20 
21 #include <string.h>
22 #include "unicode/bytestream.h"
23 #include "unicode/idna.h"
24 #include "unicode/localpointer.h"
25 #include "unicode/std_string.h"
26 #include "unicode/stringpiece.h"
27 #include "unicode/uidna.h"
28 #include "unicode/unistr.h"
29 #include "charstr.h"
30 #include "cmemory.h"
31 #include "intltest.h"
32 #include "punycode.h"
33 #include "uparse.h"
34 
35 class UTS46Test : public IntlTest {
36 public:
UTS46Test()37     UTS46Test() : trans(nullptr), nontrans(nullptr) {}
38     virtual ~UTS46Test();
39 
40     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=nullptr) override;
41     void TestAPI();
42     void TestNotSTD3();
43     void TestInvalidPunycodeDigits();
44     void TestACELabelEdgeCases();
45     void TestDefaultNontransitional();
46     void TestTooLong();
47     void TestSomeCases();
48     void IdnaTest();
49 
50     void checkIdnaTestResult(const char *line, const char *type,
51                              const UnicodeString &expected, const UnicodeString &result,
52                              const char *status, const IDNAInfo &info);
53     void idnaTestOneLine(char *fields[][2], UErrorCode &errorCode);
54 
55 private:
56     IDNA *trans, *nontrans;
57 };
58 
createUTS46Test()59 extern IntlTest *createUTS46Test() {
60     return new UTS46Test();
61 }
62 
~UTS46Test()63 UTS46Test::~UTS46Test() {
64     delete trans;
65     delete nontrans;
66 }
67 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)68 void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
69     if(exec) {
70         logln("TestSuite UTS46Test: ");
71         if(trans==nullptr) {
72             IcuTestErrorCode errorCode(*this, "init/createUTS46Instance()");
73             uint32_t commonOptions=
74                 UIDNA_USE_STD3_RULES|UIDNA_CHECK_BIDI|
75                 UIDNA_CHECK_CONTEXTJ|UIDNA_CHECK_CONTEXTO;
76             trans=IDNA::createUTS46Instance(commonOptions, errorCode);
77             nontrans=IDNA::createUTS46Instance(
78                 commonOptions|
79                 UIDNA_NONTRANSITIONAL_TO_ASCII|UIDNA_NONTRANSITIONAL_TO_UNICODE,
80                 errorCode);
81             if(errorCode.errDataIfFailureAndReset("createUTS46Instance()")) {
82                 name="";
83                 return;
84             }
85         }
86     }
87     TESTCASE_AUTO_BEGIN;
88     TESTCASE_AUTO(TestAPI);
89     TESTCASE_AUTO(TestNotSTD3);
90     TESTCASE_AUTO(TestInvalidPunycodeDigits);
91     TESTCASE_AUTO(TestACELabelEdgeCases);
92     TESTCASE_AUTO(TestDefaultNontransitional);
93     TESTCASE_AUTO(TestTooLong);
94     TESTCASE_AUTO(TestSomeCases);
95     TESTCASE_AUTO(IdnaTest);
96     TESTCASE_AUTO_END;
97 }
98 
99 namespace {
100 
101 const uint32_t severeErrors=
102     UIDNA_ERROR_LEADING_COMBINING_MARK|
103     UIDNA_ERROR_DISALLOWED|
104     UIDNA_ERROR_PUNYCODE|
105     UIDNA_ERROR_LABEL_HAS_DOT|
106     UIDNA_ERROR_INVALID_ACE_LABEL;
107 
isASCII(const UnicodeString & str)108 UBool isASCII(const UnicodeString &str) {
109     const char16_t *s=str.getBuffer();
110     int32_t length=str.length();
111     for(int32_t i=0; i<length; ++i) {
112         if(s[i]>=0x80) {
113             return false;
114         }
115     }
116     return true;
117 }
118 
119 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
120 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)121     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
122             : CheckedArrayByteSink(outbuf, capacity), calledFlush(false) {}
Reset()123     virtual CheckedArrayByteSink& Reset() override {
124         CheckedArrayByteSink::Reset();
125         calledFlush = false;
126         return *this;
127     }
Flush()128     virtual void Flush() override { calledFlush = true; }
129     UBool calledFlush;
130 };
131 
132 }  // namespace
133 
TestAPI()134 void UTS46Test::TestAPI() {
135     UErrorCode errorCode=U_ZERO_ERROR;
136     UnicodeString result;
137     IDNAInfo info;
138     UnicodeString input=UNICODE_STRING_SIMPLE("www.eXample.cOm");
139     UnicodeString expected=UNICODE_STRING_SIMPLE("www.example.com");
140     trans->nameToASCII(input, result, info, errorCode);
141     if(U_FAILURE(errorCode) || info.hasErrors() || result!=expected) {
142         errln("T.nameToASCII(www.example.com) info.errors=%04lx result matches=%d %s",
143               static_cast<long>(info.getErrors()), result == expected, u_errorName(errorCode));
144     }
145     errorCode=U_USELESS_COLLATOR_ERROR;
146     trans->nameToUnicode(input, result, info, errorCode);
147     if(errorCode!=U_USELESS_COLLATOR_ERROR || !result.isBogus()) {
148         errln("T.nameToUnicode(U_FAILURE) did not preserve the errorCode "
149               "or not result.setToBogus() - %s",
150               u_errorName(errorCode));
151     }
152     errorCode=U_ZERO_ERROR;
153     input.setToBogus();
154     result=UNICODE_STRING_SIMPLE("quatsch");
155     nontrans->labelToASCII(input, result, info, errorCode);
156     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || !result.isBogus()) {
157         errln("N.labelToASCII(bogus) did not set illegal-argument-error "
158               "or not result.setToBogus() - %s",
159               u_errorName(errorCode));
160     }
161     errorCode=U_ZERO_ERROR;
162     input=UNICODE_STRING_SIMPLE("xn--bcher.de-65a");
163     expected=UNICODE_STRING_SIMPLE("xn--bcher\\uFFFDde-65a").unescape();
164     nontrans->labelToASCII(input, result, info, errorCode);
165     if( U_FAILURE(errorCode) ||
166         info.getErrors()!=(UIDNA_ERROR_LABEL_HAS_DOT|UIDNA_ERROR_INVALID_ACE_LABEL) ||
167         result!=expected
168     ) {
169         errln("N.labelToASCII(label-with-dot) failed with errors %04lx - %s",
170               info.getErrors(), u_errorName(errorCode));
171     }
172     // UTF-8
173     char buffer[100];
174     TestCheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
175     errorCode=U_ZERO_ERROR;
176     nontrans->labelToUnicodeUTF8(StringPiece((const char *)nullptr, 5), sink, info, errorCode);
177     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || sink.NumberOfBytesWritten()!=0) {
178         errln("N.labelToUnicodeUTF8(StringPiece(nullptr, 5)) did not set illegal-argument-error ",
179               "or did output something - %s",
180               u_errorName(errorCode));
181     }
182 
183     sink.Reset();
184     errorCode=U_ZERO_ERROR;
185     nontrans->nameToASCII_UTF8(StringPiece(), sink, info, errorCode);
186     if(U_FAILURE(errorCode) || sink.NumberOfBytesWritten()!=0 || !sink.calledFlush) {
187         errln("N.nameToASCII_UTF8(empty) failed - %s",
188               u_errorName(errorCode));
189     }
190 
191     static const char s[] = { 0x61, static_cast<char>(0xc3), static_cast<char>(0x9f) };
192     sink.Reset();
193     errorCode=U_USELESS_COLLATOR_ERROR;
194     nontrans->nameToUnicodeUTF8(StringPiece(s, 3), sink, info, errorCode);
195     if(errorCode!=U_USELESS_COLLATOR_ERROR || sink.NumberOfBytesWritten()!=0) {
196         errln("N.nameToUnicode_UTF8(U_FAILURE) did not preserve the errorCode "
197               "or did output something - %s",
198               u_errorName(errorCode));
199     }
200 
201     sink.Reset();
202     errorCode=U_ZERO_ERROR;
203     trans->labelToUnicodeUTF8(StringPiece(s, 3), sink, info, errorCode);
204     if( U_FAILURE(errorCode) || sink.NumberOfBytesWritten()!=3 ||
205         buffer[0]!=0x61 || buffer[1]!=0x73 || buffer[2]!=0x73 ||
206         !sink.calledFlush
207     ) {
208         errln("T.labelToUnicodeUTF8(a sharp-s) failed - %s",
209               u_errorName(errorCode));
210     }
211 
212     sink.Reset();
213     errorCode=U_ZERO_ERROR;
214     // "eXampLe.cOm"
215     static const char eX[]={ 0x65, 0x58, 0x61, 0x6d, 0x70, 0x4c, 0x65, 0x2e, 0x63, 0x4f, 0x6d, 0 };
216     // "example.com"
217     static const char ex[]={ 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d };
218     trans->nameToUnicodeUTF8(eX, sink, info, errorCode);
219     if( U_FAILURE(errorCode) || sink.NumberOfBytesWritten()!=11 ||
220         0!=memcmp(ex, buffer, 11) || !sink.calledFlush
221     ) {
222         errln("T.nameToUnicodeUTF8(eXampLe.cOm) failed - %s",
223               u_errorName(errorCode));
224     }
225 }
226 
TestNotSTD3()227 void UTS46Test::TestNotSTD3() {
228     IcuTestErrorCode errorCode(*this, "TestNotSTD3()");
229     char buffer[400];
230     LocalPointer<IDNA> not3(IDNA::createUTS46Instance(UIDNA_CHECK_BIDI, errorCode));
231     if(errorCode.isFailure()) {
232         return;
233     }
234     UnicodeString input=UNICODE_STRING_SIMPLE("\\u0000A_2+2=4\\u000A.e\\u00DFen.net").unescape();
235     UnicodeString result;
236     IDNAInfo info;
237     if( not3->nameToUnicode(input, result, info, errorCode)!=
238             UNICODE_STRING_SIMPLE("\\u0000a_2+2=4\\u000A.essen.net").unescape() ||
239         info.hasErrors()
240     ) {
241         prettify(result).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
242         errln("notSTD3.nameToUnicode(non-LDH ASCII) unexpected errors %04lx string %s",
243               static_cast<long>(info.getErrors()), buffer);
244     }
245     // A space (BiDi class WS) is not allowed in a BiDi domain name.
246     input=UNICODE_STRING_SIMPLE("a z.xn--4db.edu");
247     not3->nameToASCII(input, result, info, errorCode);
248     if(result!=input || info.getErrors()!=UIDNA_ERROR_BIDI) {
249         errln("notSTD3.nameToASCII(ASCII-with-space.alef.edu) failed");
250     }
251 }
252 
TestInvalidPunycodeDigits()253 void UTS46Test::TestInvalidPunycodeDigits() {
254     IcuTestErrorCode errorCode(*this, "TestInvalidPunycodeDigits()");
255     LocalPointer<IDNA> idna(IDNA::createUTS46Instance(0, errorCode));
256     if(errorCode.isFailure()) {
257         return;
258     }
259     UnicodeString result;
260     {
261         IDNAInfo info;
262         idna->nameToUnicode(u"xn--pleP", result, info, errorCode);  // P=U+0050
263         assertFalse("nameToUnicode() should succeed",
264                     (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
265         assertEquals("normal result", u"ᔼᔴ", result);
266     }
267     {
268         IDNAInfo info;
269         idna->nameToUnicode(u"xn--pleѐ", result, info, errorCode);  // ends with non-ASCII U+0450
270         assertTrue("nameToUnicode() should detect non-ASCII",
271                    (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
272     }
273 
274     // Test with ASCII characters adjacent to LDH.
275     {
276         IDNAInfo info;
277         idna->nameToUnicode(u"xn--ple/", result, info, errorCode);
278         assertTrue("nameToUnicode() should detect '/'",
279                    (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
280     }
281 
282     {
283         IDNAInfo info;
284         idna->nameToUnicode(u"xn--ple:", result, info, errorCode);
285         assertTrue("nameToUnicode() should detect ':'",
286                    (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
287     }
288 
289     {
290         IDNAInfo info;
291         idna->nameToUnicode(u"xn--ple@", result, info, errorCode);
292         assertTrue("nameToUnicode() should detect '@'",
293                    (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
294     }
295 
296     {
297         IDNAInfo info;
298         idna->nameToUnicode(u"xn--ple[", result, info, errorCode);
299         assertTrue("nameToUnicode() should detect '['",
300                    (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
301     }
302 
303     {
304         IDNAInfo info;
305         idna->nameToUnicode(u"xn--ple`", result, info, errorCode);
306         assertTrue("nameToUnicode() should detect '`'",
307                    (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
308     }
309 
310     {
311         IDNAInfo info;
312         idna->nameToUnicode(u"xn--ple{", result, info, errorCode);
313         assertTrue("nameToUnicode() should detect '{'",
314                    (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
315     }
316 }
317 
TestACELabelEdgeCases()318 void UTS46Test::TestACELabelEdgeCases() {
319     // In IDNA2008, these labels fail the round-trip validation from comparing
320     // the ToUnicode input with the back-to-ToASCII output.
321     IcuTestErrorCode errorCode(*this, "TestACELabelEdgeCases()");
322     LocalPointer<IDNA> idna(IDNA::createUTS46Instance(0, errorCode));
323     if(errorCode.isFailure()) {
324         return;
325     }
326     UnicodeString result;
327     {
328         IDNAInfo info;
329         idna->labelToUnicode(u"xn--", result, info, errorCode);
330         assertTrue("empty xn--", (info.getErrors()&UIDNA_ERROR_INVALID_ACE_LABEL)!=0);
331     }
332     {
333         IDNAInfo info;
334         idna->labelToUnicode(u"xN--ASCII-", result, info, errorCode);
335         assertTrue("nothing but ASCII", (info.getErrors()&UIDNA_ERROR_INVALID_ACE_LABEL)!=0);
336     }
337     {
338         // Different error: The Punycode decoding procedure does not consume the last delimiter
339         // if it is right after the xn-- so the main decoding loop fails because the hyphen
340         // is not a valid Punycode digit.
341         IDNAInfo info;
342         idna->labelToUnicode(u"Xn---", result, info, errorCode);
343         assertTrue("empty Xn---", (info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
344     }
345 
346     {
347         // Unicode 15.1 UTS #46:
348         // Added an additional condition in 4.1 Validity Criteria to
349         // disallow labels such as xn--xn---epa., which do not round-trip.
350         // --> Validity Criteria new criterion 4:
351         // If not CheckHyphens, the label must not begin with “xn--”.
352         IDNAInfo info;
353         idna->labelToUnicode("xn--xn---epa", result, info, errorCode);
354         assertTrue("error for xn--xn---epa",
355                 (info.getErrors()&UIDNA_ERROR_INVALID_ACE_LABEL)!=0);
356     }
357 }
358 
TestDefaultNontransitional()359 void UTS46Test::TestDefaultNontransitional() {
360     IcuTestErrorCode errorCode(*this, "TestDefaultNontransitional()");
361     // Unicode 15.1 UTS #46 deprecated transitional processing.
362     // ICU 76 changed UIDNA_DEFAULT to set the nontransitional options.
363     LocalPointer<IDNA> forZero(IDNA::createUTS46Instance(0, errorCode));
364     LocalPointer<IDNA> forDefault(IDNA::createUTS46Instance(UIDNA_DEFAULT, errorCode));
365     if(errorCode.isFailure()) {
366         return;
367     }
368     UnicodeString result;
369     IDNAInfo info;
370     forZero->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
371     assertEquals("forZero.toUnicode(Fⓤßẞ)", u"fussss", result);
372     forZero->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
373     assertEquals("forZero.toASCII(Fⓤßẞ)", u"fussss", result);
374     forDefault->labelToUnicode(u"Fⓤßẞ", result, info, errorCode);
375     assertEquals("forDefault.toUnicode(Fⓤßẞ)", u"fußß", result);
376     forDefault->labelToASCII(u"Fⓤßẞ", result, info, errorCode);
377     assertEquals("forDefault.toASCII(Fⓤßẞ)", u"xn--fu-hiaa", result);
378 }
379 
TestTooLong()380 void UTS46Test::TestTooLong() {
381     // ICU-13727: Limit input length for n^2 algorithm
382     // where well-formed strings are at most 59 characters long.
383     int32_t count = 50000;
384     UnicodeString s(count, u'a', count);  // capacity, code point, count
385     char16_t dest[60000];
386     UErrorCode errorCode = U_ZERO_ERROR;
387     u_strToPunycode(s.getBuffer(), s.length(), dest, UPRV_LENGTHOF(dest), nullptr, &errorCode);
388     assertEquals("encode: expected an error for too-long input", U_INPUT_TOO_LONG_ERROR, errorCode);
389     errorCode = U_ZERO_ERROR;
390     u_strFromPunycode(s.getBuffer(), s.length(), dest, UPRV_LENGTHOF(dest), nullptr, &errorCode);
391     assertEquals("decode: expected an error for too-long input", U_INPUT_TOO_LONG_ERROR, errorCode);
392 }
393 
394 namespace {
395 
396 struct TestCase {
397     // Input string and options string (Nontransitional/Transitional/Both).
398     const char *s, *o;
399     // Expected Unicode result string.
400     const char *u;
401     uint32_t errors;
402 };
403 
404 const TestCase testCases[] = {
405     { "www.eXample.cOm", "B",  // all ASCII
406       "www.example.com", 0 },
407     { "B\\u00FCcher.de", "B",  // u-umlaut
408       "b\\u00FCcher.de", 0 },
409     { "\\u00D6BB", "B",  // O-umlaut
410       "\\u00F6bb", 0 },
411     { "fa\\u00DF.de", "N",  // sharp s
412       "fa\\u00DF.de", 0 },
413     { "fa\\u00DF.de", "T",  // sharp s
414       "fass.de", 0 },
415     { "XN--fA-hia.dE", "B",  // sharp s in Punycode
416       "fa\\u00DF.de", 0 },
417     { "\\u03B2\\u03CC\\u03BB\\u03BF\\u03C2.com", "N",  // Greek with final sigma
418       "\\u03B2\\u03CC\\u03BB\\u03BF\\u03C2.com", 0 },
419     { "\\u03B2\\u03CC\\u03BB\\u03BF\\u03C2.com", "T",  // Greek with final sigma
420       "\\u03B2\\u03CC\\u03BB\\u03BF\\u03C3.com", 0 },
421     { "xn--nxasmm1c", "B",  // Greek with final sigma in Punycode
422       "\\u03B2\\u03CC\\u03BB\\u03BF\\u03C2", 0 },
423     { "www.\\u0DC1\\u0DCA\\u200D\\u0DBB\\u0DD3.com", "N",  // "Sri" in "Sri Lanka" has a ZWJ
424       "www.\\u0DC1\\u0DCA\\u200D\\u0DBB\\u0DD3.com", 0 },
425     { "www.\\u0DC1\\u0DCA\\u200D\\u0DBB\\u0DD3.com", "T",  // "Sri" in "Sri Lanka" has a ZWJ
426       "www.\\u0DC1\\u0DCA\\u0DBB\\u0DD3.com", 0 },
427     { "www.xn--10cl1a0b660p.com", "B",  // "Sri" in Punycode
428       "www.\\u0DC1\\u0DCA\\u200D\\u0DBB\\u0DD3.com", 0 },
429     { "\\u0646\\u0627\\u0645\\u0647\\u200C\\u0627\\u06CC", "N",  // ZWNJ
430       "\\u0646\\u0627\\u0645\\u0647\\u200C\\u0627\\u06CC", 0 },
431     { "\\u0646\\u0627\\u0645\\u0647\\u200C\\u0627\\u06CC", "T",  // ZWNJ
432       "\\u0646\\u0627\\u0645\\u0647\\u0627\\u06CC", 0 },
433     { "xn--mgba3gch31f060k.com", "B",  // ZWNJ in Punycode
434       "\\u0646\\u0627\\u0645\\u0647\\u200C\\u0627\\u06CC.com", 0 },
435     { "a.b\\uFF0Ec\\u3002d\\uFF61", "B",
436       "a.b.c.d.", 0 },
437     { "U\\u0308.xn--tda", "B",  // U+umlaut.u-umlaut
438       "\\u00FC.\\u00FC", 0 },
439     { "xn--u-ccb", "B",  // u+umlaut in Punycode
440       "xn--u-ccb\\uFFFD", UIDNA_ERROR_INVALID_ACE_LABEL },
441     { "a\\u2488com", "B",  // contains 1-dot
442       "a\\uFFFDcom", UIDNA_ERROR_DISALLOWED },
443     { "xn--a-ecp.ru", "B",  // contains 1-dot in Punycode
444       "xn--a-ecp\\uFFFD.ru", UIDNA_ERROR_INVALID_ACE_LABEL },
445     { "xn--0.pt", "B",  // invalid Punycode
446       "xn--0\\uFFFD.pt", UIDNA_ERROR_PUNYCODE },
447     { "xn--a.pt", "B",  // U+0080
448       "xn--a\\uFFFD.pt", UIDNA_ERROR_INVALID_ACE_LABEL },
449     { "xn--a-\\u00C4.pt", "B",  // invalid Punycode
450       "xn--a-\\u00E4.pt", UIDNA_ERROR_PUNYCODE },
451     { "\\u65E5\\u672C\\u8A9E\\u3002\\uFF2A\\uFF30", "B",  // Japanese with fullwidth ".jp"
452       "\\u65E5\\u672C\\u8A9E.jp", 0 },
453     { "\\u2615", "B", "\\u2615", 0 },  // Unicode 4.0 HOT BEVERAGE
454     // many deviation characters, test the special mapping code
455     { "1.a\\u00DF\\u200C\\u200Db\\u200C\\u200Dc\\u00DF\\u00DF\\u00DF\\u00DFd"
456       "\\u03C2\\u03C3\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFe"
457       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFx"
458       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFy"
459       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u0302\\u00DFz", "N",
460       "1.a\\u00DF\\u200C\\u200Db\\u200C\\u200Dc\\u00DF\\u00DF\\u00DF\\u00DFd"
461       "\\u03C2\\u03C3\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFe"
462       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFx"
463       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFy"
464       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u0302\\u00DFz",
465       UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_CONTEXTJ },
466     { "1.a\\u00DF\\u200C\\u200Db\\u200C\\u200Dc\\u00DF\\u00DF\\u00DF\\u00DFd"
467       "\\u03C2\\u03C3\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFe"
468       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFx"
469       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DFy"
470       "\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u00DF\\u0302\\u00DFz", "T",
471       "1.assbcssssssssd"
472       "\\u03C3\\u03C3sssssssssssssssse"
473       "ssssssssssssssssssssx"
474       "ssssssssssssssssssssy"
475       "sssssssssssssss\\u015Dssz", UIDNA_ERROR_LABEL_TOO_LONG },
476     // "xn--bss" with deviation characters
477     { "\\u200Cx\\u200Dn\\u200C-\\u200D-b\\u00DF", "N",
478       "\\u200Cx\\u200Dn\\u200C-\\u200D-b\\u00DF", UIDNA_ERROR_CONTEXTJ },
479     { "\\u200Cx\\u200Dn\\u200C-\\u200D-b\\u00DF", "T",
480       "\\u5919", 0 },
481     // "xn--bssffl" written as:
482     // 02E3 MODIFIER LETTER SMALL X
483     // 034F COMBINING GRAPHEME JOINER (ignored)
484     // 2115 DOUBLE-STRUCK CAPITAL N
485     // 200B ZERO WIDTH SPACE (ignored)
486     // FE63 SMALL HYPHEN-MINUS
487     // 00AD SOFT HYPHEN (ignored)
488     // FF0D FULLWIDTH HYPHEN-MINUS
489     // 180C MONGOLIAN FREE VARIATION SELECTOR TWO (ignored)
490     // 212C SCRIPT CAPITAL B
491     // FE00 VARIATION SELECTOR-1 (ignored)
492     // 017F LATIN SMALL LETTER LONG S
493     // 2064 INVISIBLE PLUS (ignored)
494     // 1D530 MATHEMATICAL FRAKTUR SMALL S
495     // E01EF VARIATION SELECTOR-256 (ignored)
496     // FB04 LATIN SMALL LIGATURE FFL
497     { "\\u02E3\\u034F\\u2115\\u200B\\uFE63\\u00AD\\uFF0D\\u180C"
498       "\\u212C\\uFE00\\u017F\\u2064\\U0001D530\\U000E01EF\\uFB04", "B",
499       "\\u5921\\u591E\\u591C\\u5919", 0 },
500     { "123456789012345678901234567890123456789012345678901234567890123."
501       "123456789012345678901234567890123456789012345678901234567890123."
502       "123456789012345678901234567890123456789012345678901234567890123."
503       "1234567890123456789012345678901234567890123456789012345678901", "B",
504       "123456789012345678901234567890123456789012345678901234567890123."
505       "123456789012345678901234567890123456789012345678901234567890123."
506       "123456789012345678901234567890123456789012345678901234567890123."
507       "1234567890123456789012345678901234567890123456789012345678901", 0 },
508     { "123456789012345678901234567890123456789012345678901234567890123."
509       "123456789012345678901234567890123456789012345678901234567890123."
510       "123456789012345678901234567890123456789012345678901234567890123."
511       "1234567890123456789012345678901234567890123456789012345678901.", "B",
512       "123456789012345678901234567890123456789012345678901234567890123."
513       "123456789012345678901234567890123456789012345678901234567890123."
514       "123456789012345678901234567890123456789012345678901234567890123."
515       "1234567890123456789012345678901234567890123456789012345678901.", 0 },
516     // Domain name >256 characters, forces slow path in UTF-8 processing.
517     { "123456789012345678901234567890123456789012345678901234567890123."
518       "123456789012345678901234567890123456789012345678901234567890123."
519       "123456789012345678901234567890123456789012345678901234567890123."
520       "123456789012345678901234567890123456789012345678901234567890123."
521       "12345678901234567890123456789012345678901234567890123456789012", "B",
522       "123456789012345678901234567890123456789012345678901234567890123."
523       "123456789012345678901234567890123456789012345678901234567890123."
524       "123456789012345678901234567890123456789012345678901234567890123."
525       "123456789012345678901234567890123456789012345678901234567890123."
526       "12345678901234567890123456789012345678901234567890123456789012",
527       UIDNA_ERROR_DOMAIN_NAME_TOO_LONG },
528     { "123456789012345678901234567890123456789012345678901234567890123."
529       "123456789012345678901234567890123456789012345678901234567890123."
530       "123456789012345678901234567890123456789012345678901234567890123."
531       "123456789012345678901234567890123456789012345678901234567890123."
532       "1234567890123456789012345678901234567890123456789\\u05D0", "B",
533       "123456789012345678901234567890123456789012345678901234567890123."
534       "123456789012345678901234567890123456789012345678901234567890123."
535       "123456789012345678901234567890123456789012345678901234567890123."
536       "123456789012345678901234567890123456789012345678901234567890123."
537       "1234567890123456789012345678901234567890123456789\\u05D0",
538       UIDNA_ERROR_DOMAIN_NAME_TOO_LONG|UIDNA_ERROR_BIDI },
539     { "123456789012345678901234567890123456789012345678901234567890123."
540       "1234567890123456789012345678901234567890123456789012345678901234."
541       "123456789012345678901234567890123456789012345678901234567890123."
542       "123456789012345678901234567890123456789012345678901234567890", "B",
543       "123456789012345678901234567890123456789012345678901234567890123."
544       "1234567890123456789012345678901234567890123456789012345678901234."
545       "123456789012345678901234567890123456789012345678901234567890123."
546       "123456789012345678901234567890123456789012345678901234567890",
547       UIDNA_ERROR_LABEL_TOO_LONG },
548     { "123456789012345678901234567890123456789012345678901234567890123."
549       "1234567890123456789012345678901234567890123456789012345678901234."
550       "123456789012345678901234567890123456789012345678901234567890123."
551       "123456789012345678901234567890123456789012345678901234567890.", "B",
552       "123456789012345678901234567890123456789012345678901234567890123."
553       "1234567890123456789012345678901234567890123456789012345678901234."
554       "123456789012345678901234567890123456789012345678901234567890123."
555       "123456789012345678901234567890123456789012345678901234567890.",
556       UIDNA_ERROR_LABEL_TOO_LONG },
557     { "123456789012345678901234567890123456789012345678901234567890123."
558       "1234567890123456789012345678901234567890123456789012345678901234."
559       "123456789012345678901234567890123456789012345678901234567890123."
560       "1234567890123456789012345678901234567890123456789012345678901", "B",
561       "123456789012345678901234567890123456789012345678901234567890123."
562       "1234567890123456789012345678901234567890123456789012345678901234."
563       "123456789012345678901234567890123456789012345678901234567890123."
564       "1234567890123456789012345678901234567890123456789012345678901",
565       UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG },
566     // label length 63: xn--1234567890123456789012345678901234567890123456789012345-9te
567     { "\\u00E41234567890123456789012345678901234567890123456789012345", "B",
568       "\\u00E41234567890123456789012345678901234567890123456789012345", 0 },
569     { "1234567890\\u00E41234567890123456789012345678901234567890123456", "B",
570       "1234567890\\u00E41234567890123456789012345678901234567890123456", UIDNA_ERROR_LABEL_TOO_LONG },
571     { "123456789012345678901234567890123456789012345678901234567890123."
572       "1234567890\\u00E4123456789012345678901234567890123456789012345."
573       "123456789012345678901234567890123456789012345678901234567890123."
574       "1234567890123456789012345678901234567890123456789012345678901", "B",
575       "123456789012345678901234567890123456789012345678901234567890123."
576       "1234567890\\u00E4123456789012345678901234567890123456789012345."
577       "123456789012345678901234567890123456789012345678901234567890123."
578       "1234567890123456789012345678901234567890123456789012345678901", 0 },
579     { "123456789012345678901234567890123456789012345678901234567890123."
580       "1234567890\\u00E4123456789012345678901234567890123456789012345."
581       "123456789012345678901234567890123456789012345678901234567890123."
582       "1234567890123456789012345678901234567890123456789012345678901.", "B",
583       "123456789012345678901234567890123456789012345678901234567890123."
584       "1234567890\\u00E4123456789012345678901234567890123456789012345."
585       "123456789012345678901234567890123456789012345678901234567890123."
586       "1234567890123456789012345678901234567890123456789012345678901.", 0 },
587     { "123456789012345678901234567890123456789012345678901234567890123."
588       "1234567890\\u00E4123456789012345678901234567890123456789012345."
589       "123456789012345678901234567890123456789012345678901234567890123."
590       "12345678901234567890123456789012345678901234567890123456789012", "B",
591       "123456789012345678901234567890123456789012345678901234567890123."
592       "1234567890\\u00E4123456789012345678901234567890123456789012345."
593       "123456789012345678901234567890123456789012345678901234567890123."
594       "12345678901234567890123456789012345678901234567890123456789012",
595       UIDNA_ERROR_DOMAIN_NAME_TOO_LONG },
596     { "123456789012345678901234567890123456789012345678901234567890123."
597       "1234567890\\u00E41234567890123456789012345678901234567890123456."
598       "123456789012345678901234567890123456789012345678901234567890123."
599       "123456789012345678901234567890123456789012345678901234567890", "B",
600       "123456789012345678901234567890123456789012345678901234567890123."
601       "1234567890\\u00E41234567890123456789012345678901234567890123456."
602       "123456789012345678901234567890123456789012345678901234567890123."
603       "123456789012345678901234567890123456789012345678901234567890",
604       UIDNA_ERROR_LABEL_TOO_LONG },
605     { "123456789012345678901234567890123456789012345678901234567890123."
606       "1234567890\\u00E41234567890123456789012345678901234567890123456."
607       "123456789012345678901234567890123456789012345678901234567890123."
608       "123456789012345678901234567890123456789012345678901234567890.", "B",
609       "123456789012345678901234567890123456789012345678901234567890123."
610       "1234567890\\u00E41234567890123456789012345678901234567890123456."
611       "123456789012345678901234567890123456789012345678901234567890123."
612       "123456789012345678901234567890123456789012345678901234567890.",
613       UIDNA_ERROR_LABEL_TOO_LONG },
614     { "123456789012345678901234567890123456789012345678901234567890123."
615       "1234567890\\u00E41234567890123456789012345678901234567890123456."
616       "123456789012345678901234567890123456789012345678901234567890123."
617       "1234567890123456789012345678901234567890123456789012345678901", "B",
618       "123456789012345678901234567890123456789012345678901234567890123."
619       "1234567890\\u00E41234567890123456789012345678901234567890123456."
620       "123456789012345678901234567890123456789012345678901234567890123."
621       "1234567890123456789012345678901234567890123456789012345678901",
622       UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG },
623     // hyphen errors and empty-label errors
624     // Ticket #10883: ToUnicode also checks for empty labels.
625     { ".", "B", ".", UIDNA_ERROR_EMPTY_LABEL },
626     { "\\uFF0E", "B", ".", UIDNA_ERROR_EMPTY_LABEL },
627     // "xn---q----jra"=="-q--a-umlaut-"
628     { "a.b..-q--a-.e", "B", "a.b..-q--a-.e",
629       UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|
630       UIDNA_ERROR_HYPHEN_3_4 },
631     { "a.b..-q--\\u00E4-.e", "B", "a.b..-q--\\u00E4-.e",
632       UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|
633       UIDNA_ERROR_HYPHEN_3_4 },
634     { "a.b..xn---q----jra.e", "B", "a.b..-q--\\u00E4-.e",
635       UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN|
636       UIDNA_ERROR_HYPHEN_3_4 },
637     { "a..c", "B", "a..c", UIDNA_ERROR_EMPTY_LABEL },
638     { "a.xn--.c", "B", "a.xn--\\uFFFD.c", UIDNA_ERROR_INVALID_ACE_LABEL },
639     { "a.-b.", "B", "a.-b.", UIDNA_ERROR_LEADING_HYPHEN },
640     { "a.b-.c", "B", "a.b-.c", UIDNA_ERROR_TRAILING_HYPHEN },
641     { "a.-.c", "B", "a.-.c", UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN },
642     { "a.bc--de.f", "B", "a.bc--de.f", UIDNA_ERROR_HYPHEN_3_4 },
643     { "\\u00E4.\\u00AD.c", "B", "\\u00E4..c", UIDNA_ERROR_EMPTY_LABEL },
644     { "\\u00E4.xn--.c", "B", "\\u00E4.xn--\\uFFFD.c", UIDNA_ERROR_INVALID_ACE_LABEL },
645     { "\\u00E4.-b.", "B", "\\u00E4.-b.", UIDNA_ERROR_LEADING_HYPHEN },
646     { "\\u00E4.b-.c", "B", "\\u00E4.b-.c", UIDNA_ERROR_TRAILING_HYPHEN },
647     { "\\u00E4.-.c", "B", "\\u00E4.-.c", UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN },
648     { "\\u00E4.bc--de.f", "B", "\\u00E4.bc--de.f", UIDNA_ERROR_HYPHEN_3_4 },
649     { "a.b.\\u0308c.d", "B", "a.b.\\uFFFDc.d", UIDNA_ERROR_LEADING_COMBINING_MARK },
650     { "a.b.xn--c-bcb.d", "B",
651       "a.b.xn--c-bcb\\uFFFD.d", UIDNA_ERROR_LEADING_COMBINING_MARK|UIDNA_ERROR_INVALID_ACE_LABEL },
652     // BiDi
653     { "A0", "B", "a0", 0 },
654     { "0A", "B", "0a", 0 },  // all-LTR is ok to start with a digit (EN)
655     { "0A.\\u05D0", "B",  // ASCII label does not start with L/R/AL
656       "0a.\\u05D0", UIDNA_ERROR_BIDI },
657     { "c.xn--0-eha.xn--4db", "B",  // 2nd label does not start with L/R/AL
658       "c.0\\u00FC.\\u05D0", UIDNA_ERROR_BIDI },
659     { "b-.\\u05D0", "B",  // label does not end with L/EN
660       "b-.\\u05D0", UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI },
661     { "d.xn----dha.xn--4db", "B",  // 2nd label does not end with L/EN
662       "d.\\u00FC-.\\u05D0", UIDNA_ERROR_TRAILING_HYPHEN|UIDNA_ERROR_BIDI },
663     { "a\\u05D0", "B", "a\\u05D0", UIDNA_ERROR_BIDI },  // first dir != last dir
664     { "\\u05D0\\u05C7", "B", "\\u05D0\\u05C7", 0 },
665     { "\\u05D09\\u05C7", "B", "\\u05D09\\u05C7", 0 },
666     { "\\u05D0a\\u05C7", "B", "\\u05D0a\\u05C7", UIDNA_ERROR_BIDI },  // first dir != last dir
667     { "\\u05D0\\u05EA", "B", "\\u05D0\\u05EA", 0 },
668     { "\\u05D0\\u05F3\\u05EA", "B", "\\u05D0\\u05F3\\u05EA", 0 },
669     { "a\\u05D0Tz", "B", "a\\u05D0tz", UIDNA_ERROR_BIDI },  // mixed dir
670     { "\\u05D0T\\u05EA", "B", "\\u05D0t\\u05EA", UIDNA_ERROR_BIDI },  // mixed dir
671     { "\\u05D07\\u05EA", "B", "\\u05D07\\u05EA", 0 },
672     { "\\u05D0\\u0667\\u05EA", "B", "\\u05D0\\u0667\\u05EA", 0 },  // Arabic 7 in the middle
673     { "a7\\u0667z", "B", "a7\\u0667z", UIDNA_ERROR_BIDI },  // AN digit in LTR
674     { "a7\\u0667", "B", "a7\\u0667", UIDNA_ERROR_BIDI },  // AN digit in LTR
675     { "\\u05D07\\u0667\\u05EA", "B",  // mixed EN/AN digits in RTL
676       "\\u05D07\\u0667\\u05EA", UIDNA_ERROR_BIDI },
677     { "\\u05D07\\u0667", "B",  // mixed EN/AN digits in RTL
678       "\\u05D07\\u0667", UIDNA_ERROR_BIDI },
679     // ZWJ
680     { "\\u0BB9\\u0BCD\\u200D", "N", "\\u0BB9\\u0BCD\\u200D", 0 },  // Virama+ZWJ
681     { "\\u0BB9\\u200D", "N", "\\u0BB9\\u200D", UIDNA_ERROR_CONTEXTJ },  // no Virama
682     { "\\u200D", "N", "\\u200D", UIDNA_ERROR_CONTEXTJ },  // no Virama
683     // ZWNJ
684     { "\\u0BB9\\u0BCD\\u200C", "N", "\\u0BB9\\u0BCD\\u200C", 0 },  // Virama+ZWNJ
685     { "\\u0BB9\\u200C", "N", "\\u0BB9\\u200C", UIDNA_ERROR_CONTEXTJ },  // no Virama
686     { "\\u200C", "N", "\\u200C", UIDNA_ERROR_CONTEXTJ },  // no Virama
687     { "\\u0644\\u0670\\u200C\\u06ED\\u06EF", "N",  // Joining types D T ZWNJ T R
688       "\\u0644\\u0670\\u200C\\u06ED\\u06EF", 0 },
689     { "\\u0644\\u0670\\u200C\\u06EF", "N",  // D T ZWNJ R
690       "\\u0644\\u0670\\u200C\\u06EF", 0 },
691     { "\\u0644\\u200C\\u06ED\\u06EF", "N",  // D ZWNJ T R
692       "\\u0644\\u200C\\u06ED\\u06EF", 0 },
693     { "\\u0644\\u200C\\u06EF", "N",  // D ZWNJ R
694       "\\u0644\\u200C\\u06EF", 0 },
695     { "\\u0644\\u0670\\u200C\\u06ED", "N",  // D T ZWNJ T
696       "\\u0644\\u0670\\u200C\\u06ED", UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ },
697     { "\\u06EF\\u200C\\u06EF", "N",  // R ZWNJ R
698       "\\u06EF\\u200C\\u06EF", UIDNA_ERROR_CONTEXTJ },
699     { "\\u0644\\u200C", "N",  // D ZWNJ
700       "\\u0644\\u200C", UIDNA_ERROR_BIDI|UIDNA_ERROR_CONTEXTJ },
701     { "\\u0660\\u0661", "B",  // Arabic-Indic Digits alone
702       "\\u0660\\u0661", UIDNA_ERROR_BIDI },
703     { "\\u06F0\\u06F1", "B",  // Extended Arabic-Indic Digits alone
704       "\\u06F0\\u06F1", 0 },
705     { "\\u0660\\u06F1", "B",  // Mixed Arabic-Indic Digits
706       "\\u0660\\u06F1", UIDNA_ERROR_CONTEXTO_DIGITS|UIDNA_ERROR_BIDI },
707     // All of the CONTEXTO "Would otherwise have been DISALLOWED" characters
708     // in their correct contexts,
709     // then each in incorrect context.
710     { "l\\u00B7l\\u4E00\\u0375\\u03B1\\u05D0\\u05F3\\u05F4\\u30FB", "B",
711       "l\\u00B7l\\u4E00\\u0375\\u03B1\\u05D0\\u05F3\\u05F4\\u30FB", UIDNA_ERROR_BIDI },
712     { "l\\u00B7", "B",
713       "l\\u00B7", UIDNA_ERROR_CONTEXTO_PUNCTUATION },
714     { "\\u00B7l", "B",
715       "\\u00B7l", UIDNA_ERROR_CONTEXTO_PUNCTUATION },
716     { "\\u0375", "B",
717       "\\u0375", UIDNA_ERROR_CONTEXTO_PUNCTUATION },
718     { "\\u03B1\\u05F3", "B",
719       "\\u03B1\\u05F3", UIDNA_ERROR_CONTEXTO_PUNCTUATION|UIDNA_ERROR_BIDI },
720     { "\\u05F4", "B",
721       "\\u05F4", UIDNA_ERROR_CONTEXTO_PUNCTUATION },
722     { "l\\u30FB", "B",
723       "l\\u30FB", UIDNA_ERROR_CONTEXTO_PUNCTUATION },
724     // Ticket #8137: UTS #46 toUnicode() fails with non-ASCII labels that turn
725     // into 15 characters (UChars).
726     // The bug was in u_strFromPunycode() which did not write the last character
727     // if it just so fit into the end of the destination buffer.
728     // The UTS #46 code gives a default-capacity UnicodeString as the destination buffer,
729     // and the internal UnicodeString capacity is currently 15 UChars on 64-bit machines
730     // but 13 on 32-bit machines.
731     // Label with 15 UChars, for 64-bit-machine testing:
732     { "aaaaaaaaaaaaa\\u00FCa.de", "B", "aaaaaaaaaaaaa\\u00FCa.de", 0 },
733     { "xn--aaaaaaaaaaaaaa-ssb.de", "B", "aaaaaaaaaaaaa\\u00FCa.de", 0 },
734     { "abschlu\\u00DFpr\\u00FCfung.de", "N", "abschlu\\u00DFpr\\u00FCfung.de", 0 },
735     { "xn--abschluprfung-hdb15b.de", "B", "abschlu\\u00DFpr\\u00FCfung.de", 0 },
736     // Label with 13 UChars, for 32-bit-machine testing:
737     { "xn--aaaaaaaaaaaa-nlb.de", "B", "aaaaaaaaaaa\\u00FCa.de", 0 },
738     { "xn--schluprfung-z6a39a.de", "B", "schlu\\u00DFpr\\u00FCfung.de", 0 },
739     // { "", "B",
740     //   "", 0 },
741 };
742 
743 }  // namespace
744 
TestSomeCases()745 void UTS46Test::TestSomeCases() {
746     IcuTestErrorCode errorCode(*this, "TestSomeCases");
747     char buffer[400], buffer2[400];
748     int32_t i;
749     for(i=0; i<UPRV_LENGTHOF(testCases); ++i) {
750         const TestCase &testCase=testCases[i];
751         UnicodeString input(ctou(testCase.s));
752         UnicodeString expected(ctou(testCase.u));
753         // ToASCII/ToUnicode, transitional/nontransitional
754         UnicodeString aT, uT, aN, uN;
755         IDNAInfo aTInfo, uTInfo, aNInfo, uNInfo;
756         trans->nameToASCII(input, aT, aTInfo, errorCode);
757         trans->nameToUnicode(input, uT, uTInfo, errorCode);
758         nontrans->nameToASCII(input, aN, aNInfo, errorCode);
759         nontrans->nameToUnicode(input, uN, uNInfo, errorCode);
760         if(errorCode.errIfFailureAndReset("first-level processing [%d/%s] %s",
761                                           static_cast<int>(i), testCase.o, testCase.s)
762         ) {
763             continue;
764         }
765         // ToUnicode does not set length-overflow errors.
766         uint32_t uniErrors=testCase.errors&~
767             (UIDNA_ERROR_LABEL_TOO_LONG|
768              UIDNA_ERROR_DOMAIN_NAME_TOO_LONG);
769         char mode=testCase.o[0];
770         if(mode=='B' || mode=='N') {
771             if(uNInfo.getErrors()!=uniErrors) {
772                 errln("N.nameToUnicode([%d] %s) unexpected errors %04lx",
773                       static_cast<int>(i), testCase.s, static_cast<long>(uNInfo.getErrors()));
774                 continue;
775             }
776             if(uN!=expected) {
777                 prettify(uN).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
778                 errln("N.nameToUnicode([%d] %s) unexpected string %s",
779                       static_cast<int>(i), testCase.s, buffer);
780                 continue;
781             }
782             if(aNInfo.getErrors()!=testCase.errors) {
783                 errln("N.nameToASCII([%d] %s) unexpected errors %04lx",
784                       static_cast<int>(i), testCase.s, static_cast<long>(aNInfo.getErrors()));
785                 continue;
786             }
787         }
788         if(mode=='B' || mode=='T') {
789             if(uTInfo.getErrors()!=uniErrors) {
790                 errln("T.nameToUnicode([%d] %s) unexpected errors %04lx",
791                       static_cast<int>(i), testCase.s, static_cast<long>(uTInfo.getErrors()));
792                 continue;
793             }
794             if(uT!=expected) {
795                 prettify(uT).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
796                 errln("T.nameToUnicode([%d] %s) unexpected string %s",
797                       static_cast<int>(i), testCase.s, buffer);
798                 continue;
799             }
800             if(aTInfo.getErrors()!=testCase.errors) {
801                 errln("T.nameToASCII([%d] %s) unexpected errors %04lx",
802                       static_cast<int>(i), testCase.s, static_cast<long>(aTInfo.getErrors()));
803                 continue;
804             }
805         }
806         // ToASCII is all-ASCII if no severe errors
807         if((aNInfo.getErrors()&severeErrors)==0 && !isASCII(aN)) {
808             prettify(aN).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
809             errln("N.nameToASCII([%d] %s) (errors %04lx) result is not ASCII %s",
810                   static_cast<int>(i), testCase.s, aNInfo.getErrors(), buffer);
811             continue;
812         }
813         if((aTInfo.getErrors()&severeErrors)==0 && !isASCII(aT)) {
814             prettify(aT).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
815             errln("T.nameToASCII([%d] %s) (errors %04lx) result is not ASCII %s",
816                   static_cast<int>(i), testCase.s, aTInfo.getErrors(), buffer);
817             continue;
818         }
819         if(verbose) {
820             char m= mode=='B' ? mode : 'N';
821             prettify(aN).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
822             logln("%c.nameToASCII([%d] %s) (errors %04lx) result string: %s",
823                   m, static_cast<int>(i), testCase.s, aNInfo.getErrors(), buffer);
824             if(mode!='B') {
825                 prettify(aT).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
826                 logln("T.nameToASCII([%d] %s) (errors %04lx) result string: %s",
827                       static_cast<int>(i), testCase.s, aTInfo.getErrors(), buffer);
828             }
829         }
830         // second-level processing
831         UnicodeString aTuN, uTaN, aNuN, uNaN;
832         IDNAInfo aTuNInfo, uTaNInfo, aNuNInfo, uNaNInfo;
833         nontrans->nameToUnicode(aT, aTuN, aTuNInfo, errorCode);
834         nontrans->nameToASCII(uT, uTaN, uTaNInfo, errorCode);
835         nontrans->nameToUnicode(aN, aNuN, aNuNInfo, errorCode);
836         nontrans->nameToASCII(uN, uNaN, uNaNInfo, errorCode);
837         if(errorCode.errIfFailureAndReset("second-level processing [%d/%s] %s",
838                                           static_cast<int>(i), testCase.o, testCase.s)
839         ) {
840             continue;
841         }
842         if(aN!=uNaN) {
843             prettify(aN).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
844             prettify(uNaN).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
845             errln("N.nameToASCII([%d] %s)!=N.nameToUnicode().N.nameToASCII() "
846                   "(errors %04lx) %s vs. %s",
847                   static_cast<int>(i), testCase.s, aNInfo.getErrors(), buffer, buffer2);
848             continue;
849         }
850         if(aT!=uTaN) {
851             prettify(aT).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
852             prettify(uTaN).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
853             errln("T.nameToASCII([%d] %s)!=T.nameToUnicode().N.nameToASCII() "
854                   "(errors %04lx) %s vs. %s",
855                   static_cast<int>(i), testCase.s, aNInfo.getErrors(), buffer, buffer2);
856             continue;
857         }
858         if(uN!=aNuN) {
859             prettify(uN).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
860             prettify(aNuN).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
861             errln("N.nameToUnicode([%d] %s)!=N.nameToASCII().N.nameToUnicode() "
862                   "(errors %04lx) %s vs. %s",
863                   static_cast<int>(i), testCase.s, uNInfo.getErrors(), buffer, buffer2);
864             continue;
865         }
866         if(uT!=aTuN) {
867             prettify(uT).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
868             prettify(aTuN).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
869             errln("T.nameToUnicode([%d] %s)!=T.nameToASCII().N.nameToUnicode() "
870                   "(errors %04lx) %s vs. %s",
871                   static_cast<int>(i), testCase.s, uNInfo.getErrors(), buffer, buffer2);
872             continue;
873         }
874         // labelToUnicode
875         UnicodeString aTL, uTL, aNL, uNL;
876         IDNAInfo aTLInfo, uTLInfo, aNLInfo, uNLInfo;
877         trans->labelToASCII(input, aTL, aTLInfo, errorCode);
878         trans->labelToUnicode(input, uTL, uTLInfo, errorCode);
879         nontrans->labelToASCII(input, aNL, aNLInfo, errorCode);
880         nontrans->labelToUnicode(input, uNL, uNLInfo, errorCode);
881         if(errorCode.errIfFailureAndReset("labelToXYZ processing [%d/%s] %s",
882                                           static_cast<int>(i), testCase.o, testCase.s)
883         ) {
884             continue;
885         }
886         if (aN.indexOf(static_cast<char16_t>(0x2e)) < 0) {
887             if(aN!=aNL || aNInfo.getErrors()!=aNLInfo.getErrors()) {
888                 prettify(aN).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
889                 prettify(aNL).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
890                 errln("N.nameToASCII([%d] %s)!=N.labelToASCII() "
891                       "(errors %04lx vs %04lx) %s vs. %s",
892                       static_cast<int>(i), testCase.s, aNInfo.getErrors(), aNLInfo.getErrors(), buffer, buffer2);
893                 continue;
894             }
895         } else {
896             if((aNLInfo.getErrors()&UIDNA_ERROR_LABEL_HAS_DOT)==0) {
897                 errln("N.labelToASCII([%d] %s) errors %04lx missing UIDNA_ERROR_LABEL_HAS_DOT",
898                       static_cast<int>(i), testCase.s, static_cast<long>(aNLInfo.getErrors()));
899                 continue;
900             }
901         }
902         if (aT.indexOf(static_cast<char16_t>(0x2e)) < 0) {
903             if(aT!=aTL || aTInfo.getErrors()!=aTLInfo.getErrors()) {
904                 prettify(aT).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
905                 prettify(aTL).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
906                 errln("T.nameToASCII([%d] %s)!=T.labelToASCII() "
907                       "(errors %04lx vs %04lx) %s vs. %s",
908                       static_cast<int>(i), testCase.s, aTInfo.getErrors(), aTLInfo.getErrors(), buffer, buffer2);
909                 continue;
910             }
911         } else {
912             if((aTLInfo.getErrors()&UIDNA_ERROR_LABEL_HAS_DOT)==0) {
913                 errln("T.labelToASCII([%d] %s) errors %04lx missing UIDNA_ERROR_LABEL_HAS_DOT",
914                       static_cast<int>(i), testCase.s, static_cast<long>(aTLInfo.getErrors()));
915                 continue;
916             }
917         }
918         if (uN.indexOf(static_cast<char16_t>(0x2e)) < 0) {
919             if(uN!=uNL || uNInfo.getErrors()!=uNLInfo.getErrors()) {
920                 prettify(uN).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
921                 prettify(uNL).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
922                 errln("N.nameToUnicode([%d] %s)!=N.labelToUnicode() "
923                       "(errors %04lx vs %04lx) %s vs. %s",
924                       static_cast<int>(i), testCase.s, uNInfo.getErrors(), uNLInfo.getErrors(), buffer, buffer2);
925                 continue;
926             }
927         } else {
928             if((uNLInfo.getErrors()&UIDNA_ERROR_LABEL_HAS_DOT)==0) {
929                 errln("N.labelToUnicode([%d] %s) errors %04lx missing UIDNA_ERROR_LABEL_HAS_DOT",
930                       static_cast<int>(i), testCase.s, static_cast<long>(uNLInfo.getErrors()));
931                 continue;
932             }
933         }
934         if (uT.indexOf(static_cast<char16_t>(0x2e)) < 0) {
935             if(uT!=uTL || uTInfo.getErrors()!=uTLInfo.getErrors()) {
936                 prettify(uT).extract(0, 0x7fffffff, buffer, UPRV_LENGTHOF(buffer));
937                 prettify(uTL).extract(0, 0x7fffffff, buffer2, UPRV_LENGTHOF(buffer2));
938                 errln("T.nameToUnicode([%d] %s)!=T.labelToUnicode() "
939                       "(errors %04lx vs %04lx) %s vs. %s",
940                       static_cast<int>(i), testCase.s, uTInfo.getErrors(), uTLInfo.getErrors(), buffer, buffer2);
941                 continue;
942             }
943         } else {
944             if((uTLInfo.getErrors()&UIDNA_ERROR_LABEL_HAS_DOT)==0) {
945                 errln("T.labelToUnicode([%d] %s) errors %04lx missing UIDNA_ERROR_LABEL_HAS_DOT",
946                       static_cast<int>(i), testCase.s, static_cast<long>(uTLInfo.getErrors()));
947                 continue;
948             }
949         }
950         // Differences between transitional and nontransitional processing
951         if(mode=='B') {
952             if( aNInfo.isTransitionalDifferent() ||
953                 aTInfo.isTransitionalDifferent() ||
954                 uNInfo.isTransitionalDifferent() ||
955                 uTInfo.isTransitionalDifferent() ||
956                 aNLInfo.isTransitionalDifferent() ||
957                 aTLInfo.isTransitionalDifferent() ||
958                 uNLInfo.isTransitionalDifferent() ||
959                 uTLInfo.isTransitionalDifferent()
960             ) {
961                 errln("B.process([%d] %s) isTransitionalDifferent()", static_cast<int>(i), testCase.s);
962                 continue;
963             }
964             if( aN!=aT || uN!=uT || aNL!=aTL || uNL!=uTL ||
965                 aNInfo.getErrors()!=aTInfo.getErrors() || uNInfo.getErrors()!=uTInfo.getErrors() ||
966                 aNLInfo.getErrors()!=aTLInfo.getErrors() || uNLInfo.getErrors()!=uTLInfo.getErrors()
967             ) {
968                 errln("N.process([%d] %s) vs. T.process() different errors or result strings",
969                       static_cast<int>(i), testCase.s);
970                 continue;
971             }
972         } else {
973             if( !aNInfo.isTransitionalDifferent() ||
974                 !aTInfo.isTransitionalDifferent() ||
975                 !uNInfo.isTransitionalDifferent() ||
976                 !uTInfo.isTransitionalDifferent() ||
977                 !aNLInfo.isTransitionalDifferent() ||
978                 !aTLInfo.isTransitionalDifferent() ||
979                 !uNLInfo.isTransitionalDifferent() ||
980                 !uTLInfo.isTransitionalDifferent()
981             ) {
982                 errln("%s.process([%d] %s) !isTransitionalDifferent()",
983                       testCase.o, static_cast<int>(i), testCase.s);
984                 continue;
985             }
986             if(aN==aT || uN==uT || aNL==aTL || uNL==uTL) {
987                 errln("N.process([%d] %s) vs. T.process() same result strings",
988                       static_cast<int>(i), testCase.s);
989                 continue;
990             }
991         }
992         // UTF-8
993         std::string input8, aT8, uT8, aN8, uN8;
994         StringByteSink<std::string> aT8Sink(&aT8), uT8Sink(&uT8), aN8Sink(&aN8), uN8Sink(&uN8);
995         IDNAInfo aT8Info, uT8Info, aN8Info, uN8Info;
996         input.toUTF8String(input8);
997         trans->nameToASCII_UTF8(input8, aT8Sink, aT8Info, errorCode);
998         trans->nameToUnicodeUTF8(input8, uT8Sink, uT8Info, errorCode);
999         nontrans->nameToASCII_UTF8(input8, aN8Sink, aN8Info, errorCode);
1000         nontrans->nameToUnicodeUTF8(input8, uN8Sink, uN8Info, errorCode);
1001         if(errorCode.errIfFailureAndReset("UTF-8 processing [%d/%s] %s",
1002                                           static_cast<int>(i), testCase.o, testCase.s)
1003         ) {
1004             continue;
1005         }
1006         UnicodeString aT16(UnicodeString::fromUTF8(aT8));
1007         UnicodeString uT16(UnicodeString::fromUTF8(uT8));
1008         UnicodeString aN16(UnicodeString::fromUTF8(aN8));
1009         UnicodeString uN16(UnicodeString::fromUTF8(uN8));
1010         if( aN8Info.getErrors()!=aNInfo.getErrors() ||
1011             uN8Info.getErrors()!=uNInfo.getErrors()
1012         ) {
1013             errln("N.xyzUTF8([%d] %s) vs. UTF-16 processing different errors %04lx vs. %04lx",
1014                   static_cast<int>(i), testCase.s,
1015                   static_cast<long>(aN8Info.getErrors()), static_cast<long>(aNInfo.getErrors()));
1016             continue;
1017         }
1018         if( aT8Info.getErrors()!=aTInfo.getErrors() ||
1019             uT8Info.getErrors()!=uTInfo.getErrors()
1020         ) {
1021             errln("T.xyzUTF8([%d] %s) vs. UTF-16 processing different errors %04lx vs. %04lx",
1022                   static_cast<int>(i), testCase.s,
1023                   static_cast<long>(aT8Info.getErrors()), static_cast<long>(aTInfo.getErrors()));
1024             continue;
1025         }
1026         if(aT16!=aT || uT16!=uT || aN16!=aN || uN16!=uN) {
1027             errln("%s.xyzUTF8([%d] %s) vs. UTF-16 processing different string results",
1028                   testCase.o, static_cast<int>(i), testCase.s, static_cast<long>(aTInfo.getErrors()));
1029             continue;
1030         }
1031         if( aT8Info.isTransitionalDifferent()!=aTInfo.isTransitionalDifferent() ||
1032             uT8Info.isTransitionalDifferent()!=uTInfo.isTransitionalDifferent() ||
1033             aN8Info.isTransitionalDifferent()!=aNInfo.isTransitionalDifferent() ||
1034             uN8Info.isTransitionalDifferent()!=uNInfo.isTransitionalDifferent()
1035         ) {
1036             errln("%s.xyzUTF8([%d] %s) vs. UTF-16 processing different isTransitionalDifferent()",
1037                   testCase.o, static_cast<int>(i), testCase.s);
1038             continue;
1039         }
1040     }
1041 }
1042 
1043 namespace {
1044 
1045 const int32_t kNumFields = 7;
1046 
1047 void U_CALLCONV
idnaTestLineFn(void * context,char * fields[][2],int32_t,UErrorCode * pErrorCode)1048 idnaTestLineFn(void *context,
1049                char *fields[][2], int32_t /* fieldCount */,
1050                UErrorCode *pErrorCode) {
1051     reinterpret_cast<UTS46Test *>(context)->idnaTestOneLine(fields, *pErrorCode);
1052 }
1053 
s16FromField(char * (& field)[2],const UnicodeString & sameAs)1054 UnicodeString s16FromField(char *(&field)[2], const UnicodeString &sameAs) {
1055     int32_t length = static_cast<int32_t>(field[1] - field[0]);
1056     UnicodeString s = UnicodeString::fromUTF8(StringPiece(field[0], length)).trim().unescape();
1057     if (s.isEmpty()) {
1058         s = sameAs;  // blank means same as another string
1059     } else if (s == u"\"\"") {
1060         s.remove();  // explicit empty string (new in Unicode 16)
1061     }
1062     return s;
1063 }
1064 
statusFromField(char * (& field)[2])1065 std::string statusFromField(char *(&field)[2]) {
1066     const char *start = u_skipWhitespace(field[0]);
1067     std::string status;
1068     if (start != field[1]) {
1069         int32_t length = static_cast<int32_t>(field[1] - start);
1070         while (length > 0 && (start[length - 1] == u' ' || start[length - 1] == u'\t')) {
1071             --length;
1072         }
1073         status.assign(start, length);
1074     }
1075     return status;
1076 }
1077 
1078 }  // namespace
1079 
checkIdnaTestResult(const char * line,const char * type,const UnicodeString & expected,const UnicodeString & result,const char * status,const IDNAInfo & info)1080 void UTS46Test::checkIdnaTestResult(const char *line, const char *type,
1081                                     const UnicodeString &expected, const UnicodeString &result,
1082                                     const char *status, const IDNAInfo &info) {
1083     // An error in toUnicode or toASCII is indicated by a value in square brackets,
1084     // such as "[B5 B6]".
1085     UBool expectedHasErrors = false;
1086     if (*status != 0) {
1087         if (*status != u'[') {
1088             errln("%s  status field does not start with '[': %s\n    %s", type, status, line);
1089         }
1090         if (strcmp(status, reinterpret_cast<const char*>(u8"[]")) != 0) {
1091             expectedHasErrors = true;
1092         }
1093         // ICU workaround:
1094         // We do effectively VerifyDnsLength (we always check for lengths), except,
1095         // based on past bug reports, we do not do the following in UTS #46 ToASCII:
1096         // When VerifyDnsLength is true, the empty root label is disallowed.
1097         // Ignore the expected error if it is the only one.
1098         // TODO: ICU-22882 - Report the empty root label separately from empty non-root labels.
1099         if (strncmp(type, "toASCII", 7) == 0 &&  // startsWith
1100                 strcmp(status, "[A4_2]") == 0 && !info.hasErrors()) {
1101             if (result.endsWith(UnicodeString::readOnlyAlias(u".")) &&
1102                     // !contains
1103                     result.indexOf(UnicodeString::readOnlyAlias(u"..")) < 0) {
1104                 expectedHasErrors = false;
1105             }
1106         }
1107     }
1108     if (expectedHasErrors != info.hasErrors()) {
1109         errln("%s  expected errors %s %d != %d = actual has errors: %04lx\n    %s",
1110               type, status, expectedHasErrors, info.hasErrors(), static_cast<long>(info.getErrors()), line);
1111     }
1112     if (!expectedHasErrors && expected != result) {
1113         errln("%s  expected != actual\n    %s", type, line);
1114         errln(UnicodeString(u"    ") + expected);
1115         errln(UnicodeString(u"    ") + result);
1116     }
1117 }
1118 
idnaTestOneLine(char * fields[][2],UErrorCode & errorCode)1119 void UTS46Test::idnaTestOneLine(char *fields[][2], UErrorCode &errorCode) {
1120     // IdnaTestV2.txt (since Unicode 11)
1121     // Column 1: source
1122     // The source string to be tested.
1123     // "" means the empty string.
1124     UnicodeString source = s16FromField(fields[0], UnicodeString());
1125 
1126     // Column 2: toUnicode
1127     // The result of applying toUnicode to the source, with Transitional_Processing=false.
1128     // A blank value means the same as the source value.
1129     // "" means the empty string.
1130     UnicodeString toUnicode = s16FromField(fields[1], source);
1131 
1132     // Column 3: toUnicodeStatus
1133     // A set of status codes, each corresponding to a particular test.
1134     // A blank value means [].
1135     std::string toUnicodeStatus = statusFromField(fields[2]);
1136 
1137     // Column 4: toAsciiN
1138     // The result of applying toASCII to the source, with Transitional_Processing=false.
1139     // A blank value means the same as the toUnicode value.
1140     // "" means the empty string.
1141     UnicodeString toAsciiN = s16FromField(fields[3], toUnicode);
1142 
1143     // Column 5: toAsciiNStatus
1144     // A set of status codes, each corresponding to a particular test.
1145     // A blank value means the same as the toUnicodeStatus value.
1146     std::string toAsciiNStatus = statusFromField(fields[4]);
1147     if (toAsciiNStatus.empty()) {
1148         toAsciiNStatus = toUnicodeStatus;
1149     }
1150 
1151     // Column 6: toAsciiT
1152     // The result of applying toASCII to the source, with Transitional_Processing=true.
1153     // A blank value means the same as the toAsciiN value.
1154     // "" means the empty string.
1155     UnicodeString toAsciiT = s16FromField(fields[5], toAsciiN);
1156 
1157     // Column 7: toAsciiTStatus
1158     // A set of status codes, each corresponding to a particular test.
1159     // A blank value means the same as the toAsciiNStatus value.
1160     std::string toAsciiTStatus = statusFromField(fields[6]);
1161     if (toAsciiTStatus.empty()) {
1162         toAsciiTStatus = toAsciiNStatus;
1163     }
1164 
1165     // ToASCII/ToUnicode, transitional/nontransitional
1166     UnicodeString uN, aN, aT;
1167     IDNAInfo uNInfo, aNInfo, aTInfo;
1168     nontrans->nameToUnicode(source, uN, uNInfo, errorCode);
1169     checkIdnaTestResult(fields[0][0], "toUnicodeNontrans", toUnicode, uN,
1170                         toUnicodeStatus.c_str(), uNInfo);
1171     nontrans->nameToASCII(source, aN, aNInfo, errorCode);
1172     checkIdnaTestResult(fields[0][0], "toASCIINontrans", toAsciiN, aN,
1173                         toAsciiNStatus.c_str(), aNInfo);
1174     trans->nameToASCII(source, aT, aTInfo, errorCode);
1175     checkIdnaTestResult(fields[0][0], "toASCIITrans", toAsciiT, aT,
1176                         toAsciiTStatus.c_str(), aTInfo);
1177 }
1178 
1179 namespace {
1180 
1181 // TODO: de-duplicate
1182 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
1183 
1184 }  // namespace
1185 
IdnaTest()1186 void UTS46Test::IdnaTest() {
1187     IcuTestErrorCode errorCode(*this, "IdnaTest");
1188     const char *sourceTestDataPath = getSourceTestData(errorCode);
1189     if (errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
1190                                        "folder (getSourceTestData())")) {
1191         return;
1192     }
1193     CharString path(sourceTestDataPath, errorCode);
1194     path.appendPathPart("IdnaTestV2.txt", errorCode);
1195     LocalStdioFilePointer idnaTestFile(fopen(path.data(), "r"));
1196     if (idnaTestFile.isNull()) {
1197         errln("unable to open %s", path.data());
1198         return;
1199     }
1200 
1201     // Columns (c1, c2,...) are separated by semicolons.
1202     // Leading and trailing spaces and tabs in each column are ignored.
1203     // Comments are indicated with hash marks.
1204     char *fields[kNumFields][2];
1205     u_parseDelimitedFile(path.data(), ';', fields, kNumFields, idnaTestLineFn, this, errorCode);
1206     if (errorCode.errIfFailureAndReset("error parsing IdnaTestV2.txt")) {
1207         return;
1208     }
1209 }
1210 
1211 #endif  // UCONFIG_NO_IDNA
1212