1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2005-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * created on: 2005jun15
12 * created by: Raymond Yang
13 */
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_IDNA
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "unicode/ucnv.h"
23 #include "unicode/ustring.h"
24 #include "unicode/uidna.h"
25 #include "unicode/utf16.h"
26 #include "idnaconf.h"
27
28 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
29 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
30 static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase
31
32 static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type
33 static const UChar C_TOASCII[] = {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0}; // toascii
34 static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode
35
36 static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail
37 static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass
38 static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail
39
40 static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc
41 static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44,
42 0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules
43
IdnaConfTest()44 IdnaConfTest::IdnaConfTest(){
45 base = NULL;
46 len = 0;
47 curOffset = 0;
48
49 type = option = passfail = -1;
50 namebase.setToBogus();
51 namezone.setToBogus();
52 }
~IdnaConfTest()53 IdnaConfTest::~IdnaConfTest(){
54 delete [] base;
55 }
56
57 #if !UCONFIG_NO_IDNA
58 /* this function is modified from RBBITest::ReadAndConvertFile()
59 *
60 */
ReadAndConvertFile()61 UBool IdnaConfTest::ReadAndConvertFile(){
62
63 char * source = NULL;
64 size_t source_len;
65
66 // read the test data file to memory
67 FILE* f = NULL;
68 UErrorCode status = U_ZERO_ERROR;
69
70 const char *path = IntlTest::getSourceTestData(status);
71 if (U_FAILURE(status)) {
72 errln("%s", u_errorName(status));
73 return FALSE;
74 }
75
76 const char* name = "idna_conf.txt"; // test data file
77 int t = static_cast<int>(strlen(path) + strlen(name) + 1);
78 char* absolute_name = new char[t];
79 strcpy(absolute_name, path);
80 strcat(absolute_name, name);
81 f = fopen(absolute_name, "rb");
82 delete [] absolute_name;
83
84 if (f == NULL){
85 dataerrln("fopen error on %s", name);
86 return FALSE;
87 }
88
89 fseek( f, 0, SEEK_END);
90 if ((source_len = ftell(f)) <= 0){
91 errln("Error reading test data file.");
92 fclose(f);
93 return FALSE;
94 }
95
96 source = new char[source_len];
97 fseek(f, 0, SEEK_SET);
98 if (fread(source, 1, source_len, f) != source_len) {
99 errln("Error reading test data file.");
100 delete [] source;
101 fclose(f);
102 return FALSE;
103 }
104 fclose(f);
105
106 // convert the UTF-8 encoded stream to UTF-16 stream
107 UConverter* conv = ucnv_open("utf-8", &status);
108 int dest_len = ucnv_toUChars(conv,
109 NULL, // dest,
110 0, // destCapacity,
111 source,
112 static_cast<int32_t>(source_len),
113 &status);
114 if (status == U_BUFFER_OVERFLOW_ERROR) {
115 // Buffer Overflow is expected from the preflight operation.
116 status = U_ZERO_ERROR;
117 UChar * dest = NULL;
118 dest = new UChar[ dest_len + 1];
119 ucnv_toUChars(conv, dest, dest_len + 1, source, static_cast<int32_t>(source_len), &status);
120 // Do not know the "if possible" behavior of ucnv_toUChars()
121 // Do it by ourself.
122 dest[dest_len] = 0;
123 len = dest_len;
124 base = dest;
125 delete [] source;
126 ucnv_close(conv);
127 return TRUE; // The buffer will owned by caller.
128 }
129 errln("UConverter error: %s", u_errorName(status));
130 delete [] source;
131 ucnv_close(conv);
132 return FALSE;
133 }
134
isNewlineMark()135 int IdnaConfTest::isNewlineMark(){
136 static const UChar LF = 0x0a;
137 static const UChar CR = 0x0d;
138 UChar c = base[curOffset];
139 // CR LF
140 if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){
141 return 2;
142 }
143
144 // CR or LF
145 if ( c == CR || c == LF) {
146 return 1;
147 }
148
149 return 0;
150 }
151
152 /* Read a logical line.
153 *
154 * All lines ending in a backslash (\) and immediately followed by a newline
155 * character are joined with the next line in the source file forming logical
156 * lines from the physical lines.
157 *
158 */
ReadOneLine(UnicodeString & buf)159 UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){
160 if ( !(curOffset < len) ) return FALSE; // stream end
161
162 static const UChar BACKSLASH = 0x5c;
163 buf.remove();
164 int t = 0;
165 while (curOffset < len){
166 if ((t = isNewlineMark())) { // end of line
167 curOffset += t;
168 break;
169 }
170 UChar c = base[curOffset];
171 if (c == BACKSLASH && curOffset < len -1){ // escaped new line mark
172 if ((t = isNewlineMark())){
173 curOffset += 1 + t; // BACKSLAH and NewlineMark
174 continue;
175 }
176 }
177 buf.append(c);
178 curOffset++;
179 }
180 return TRUE;
181 }
182
183 //
184 //===============================================================
185 //
186
187 /* Explain <xxxxx> tag to a native value
188 *
189 * Since <xxxxx> is always larger than the native value,
190 * the operation will replace the tag directly in the buffer,
191 * and, of course, will shift tail elements.
192 */
ExplainCodePointTag(UnicodeString & buf)193 void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){
194 buf.append((UChar)0); // add a terminal NULL
195 UChar* bufBase = buf.getBuffer(buf.length());
196 UChar* p = bufBase;
197 while (*p != 0){
198 if ( *p != 0x3C){ // <
199 *bufBase++ = *p++;
200 } else {
201 p++; // skip <
202 UChar32 cp = 0;
203 for ( ;*p != 0x3E; p++){ // >
204 if (0x30 <= *p && *p <= 0x39){ // 0-9
205 cp = (cp * 16) + (*p - 0x30);
206 } else if (0x61 <= *p && *p <= 0x66){ // a-f
207 cp = (cp * 16) + (*p - 0x61) + 10;
208 } else if (0x41 <= *p && *p <= 0x46) {// A-F
209 cp = (cp * 16) + (*p - 0x41) + 10;
210 }
211 // no else. hope everything is good.
212 }
213 p++; // skip >
214 if (U_IS_BMP(cp)){
215 *bufBase++ = cp;
216 } else {
217 *bufBase++ = U16_LEAD(cp);
218 *bufBase++ = U16_TRAIL(cp);
219 }
220 }
221 }
222 *bufBase = 0; // close our buffer
223 buf.releaseBuffer();
224 }
225
Call()226 void IdnaConfTest::Call(){
227 if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){
228 errln("Incomplete record");
229 } else {
230 UErrorCode status = U_ZERO_ERROR;
231 UChar result[200] = {0,}; // simple life
232 const UChar *p = namebase.getTerminatedBuffer();
233 const int p_len = namebase.length();
234
235 if (type == 0 && option == 0){
236 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
237 } else if (type == 0 && option == 1){
238 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
239 } else if (type == 1 && option == 0){
240 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
241 } else if (type == 1 && option == 1){
242 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
243 }
244 if (passfail == 0){
245 if (U_FAILURE(status)){
246 id.append(" should pass, but failed. - ");
247 id.append(u_errorName(status));
248 errcheckln(status, id);
249 } else{
250 if (namezone.compare(result, -1) == 0){
251 // expected
252 logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
253 } else {
254 id.append(" no error, but result is not as expected.");
255 errln(id);
256 }
257 }
258 } else if (passfail == 1){
259 if (U_FAILURE(status)){
260 // expected
261 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName
262 //logln("Got the expected error: " + UnicodeString(u_errorName(status)));
263 } else{
264 if (namebase.compare(result, -1) == 0){
265 // garbage in -> garbage out
266 logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
267 } else {
268 id.append(" should fail, but not failed. ");
269 id.append(u_errorName(status));
270 errln(id);
271 }
272 }
273 }
274 }
275 type = option = passfail = -1;
276 namebase.setToBogus();
277 namezone.setToBogus();
278 id.remove();
279 return;
280 }
281
Test(void)282 void IdnaConfTest::Test(void){
283 if (!ReadAndConvertFile())return;
284
285 UnicodeString s;
286 UnicodeString key;
287 UnicodeString value;
288
289 // skip everything before the first "=====" and "=====" itself
290 do {
291 if (!ReadOneLine(s)) {
292 errln("End of file prematurely found");
293 break;
294 }
295 }
296 while (s.compare(C_TAG, -1) != 0); //"====="
297
298 while(ReadOneLine(s)){
299 s.trim();
300 key.remove();
301 value.remove();
302 if (s.compare(C_TAG, -1) == 0){ //"====="
303 Call();
304 } else {
305 // explain key:value
306 int p = s.indexOf((UChar)0x3A); // :
307 key.setTo(s,0,p).trim();
308 value.setTo(s,p+1).trim();
309 if (key.compare(C_TYPE, -1) == 0){
310 if (value.compare(C_TOASCII, -1) == 0) {
311 type = 0;
312 } else if (value.compare(C_TOUNICODE, -1) == 0){
313 type = 1;
314 }
315 } else if (key.compare(C_PASSFAIL, -1) == 0){
316 if (value.compare(C_PASS, -1) == 0){
317 passfail = 0;
318 } else if (value.compare(C_FAIL, -1) == 0){
319 passfail = 1;
320 }
321 } else if (key.compare(C_DESC, -1) == 0){
322 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
323 option = 1; // not found
324 } else {
325 option = 0;
326 }
327 id.setTo(value, 0, value.indexOf((UChar)0x20)); // space
328 } else if (key.compare(C_NAMEZONE, -1) == 0){
329 ExplainCodePointTag(value);
330 namezone.setTo(value);
331 } else if (key.compare(C_NAMEBASE, -1) == 0){
332 ExplainCodePointTag(value);
333 namebase.setTo(value);
334 }
335 // just skip other lines
336 }
337 }
338
339 Call(); // for last record
340 }
341 #else
Test(void)342 void IdnaConfTest::Test(void)
343 {
344 // test nothing...
345 }
346 #endif
347
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)348 void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){
349 switch (index) {
350 TESTCASE(0,Test);
351 default: name = ""; break;
352 }
353 }
354
355 #endif
356