1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2005-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * created on: 2005jun15
12 * created by: Raymond Yang
13 */
14
15 #if !UCONFIG_NO_IDNA
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "unicode/utypes.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uidna.h"
24 #include "unicode/utf16.h"
25 #include "idnaconf.h"
26
27 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
28 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
29 static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase
30
31 static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type
32 static const UChar C_TOASCII[] = {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0}; // toascii
33 static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode
34
35 static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail
36 static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass
37 static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail
38
39 static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc
40 static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44,
41 0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules
42
IdnaConfTest()43 IdnaConfTest::IdnaConfTest(){
44 base = NULL;
45 len = 0;
46 curOffset = 0;
47
48 type = option = passfail = -1;
49 namebase.setToBogus();
50 namezone.setToBogus();
51 }
~IdnaConfTest()52 IdnaConfTest::~IdnaConfTest(){
53 delete [] base;
54 }
55
56 #if !UCONFIG_NO_IDNA
57 /* this function is modified from RBBITest::ReadAndConvertFile()
58 *
59 */
ReadAndConvertFile()60 UBool IdnaConfTest::ReadAndConvertFile(){
61
62 char * source = NULL;
63 size_t source_len;
64
65 // read the test data file to memory
66 FILE* f = NULL;
67 UErrorCode status = U_ZERO_ERROR;
68
69 const char *path = IntlTest::getSourceTestData(status);
70 if (U_FAILURE(status)) {
71 errln("%s", u_errorName(status));
72 return FALSE;
73 }
74
75 const char* name = "idna_conf.txt"; // test data file
76 int t = strlen(path) + strlen(name) + 1;
77 char* absolute_name = new char[t];
78 strcpy(absolute_name, path);
79 strcat(absolute_name, name);
80 f = fopen(absolute_name, "rb");
81 delete [] absolute_name;
82
83 if (f == NULL){
84 dataerrln("fopen error on %s", name);
85 return FALSE;
86 }
87
88 fseek( f, 0, SEEK_END);
89 if ((source_len = ftell(f)) <= 0){
90 errln("Error reading test data file.");
91 fclose(f);
92 return FALSE;
93 }
94
95 source = new char[source_len];
96 fseek(f, 0, SEEK_SET);
97 if (fread(source, 1, source_len, f) != source_len) {
98 errln("Error reading test data file.");
99 delete [] source;
100 fclose(f);
101 return FALSE;
102 }
103 fclose(f);
104
105 // convert the UTF-8 encoded stream to UTF-16 stream
106 UConverter* conv = ucnv_open("utf-8", &status);
107 int dest_len = ucnv_toUChars(conv,
108 NULL, // dest,
109 0, // destCapacity,
110 source,
111 source_len,
112 &status);
113 if (status == U_BUFFER_OVERFLOW_ERROR) {
114 // Buffer Overflow is expected from the preflight operation.
115 status = U_ZERO_ERROR;
116 UChar * dest = NULL;
117 dest = new UChar[ dest_len + 1];
118 ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status);
119 // Do not know the "if possible" behavior of ucnv_toUChars()
120 // Do it by ourself.
121 dest[dest_len] = 0;
122 len = dest_len;
123 base = dest;
124 delete [] source;
125 ucnv_close(conv);
126 return TRUE; // The buffer will owned by caller.
127 }
128 errln("UConverter error: %s", u_errorName(status));
129 delete [] source;
130 ucnv_close(conv);
131 return FALSE;
132 }
133
isNewlineMark()134 int IdnaConfTest::isNewlineMark(){
135 static const UChar LF = 0x0a;
136 static const UChar CR = 0x0d;
137 UChar c = base[curOffset];
138 // CR LF
139 if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){
140 return 2;
141 }
142
143 // CR or LF
144 if ( c == CR || c == LF) {
145 return 1;
146 }
147
148 return 0;
149 }
150
151 /* Read a logical line.
152 *
153 * All lines ending in a backslash (\) and immediately followed by a newline
154 * character are joined with the next line in the source file forming logical
155 * lines from the physical lines.
156 *
157 */
ReadOneLine(UnicodeString & buf)158 UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){
159 if ( !(curOffset < len) ) return FALSE; // stream end
160
161 static const UChar BACKSLASH = 0x5c;
162 buf.remove();
163 int t = 0;
164 while (curOffset < len){
165 if ((t = isNewlineMark())) { // end of line
166 curOffset += t;
167 break;
168 }
169 UChar c = base[curOffset];
170 if (c == BACKSLASH && curOffset < len -1){ // escaped new line mark
171 if ((t = isNewlineMark())){
172 curOffset += 1 + t; // BACKSLAH and NewlineMark
173 continue;
174 }
175 };
176 buf.append(c);
177 curOffset++;
178 }
179 return TRUE;
180 }
181
182 //
183 //===============================================================
184 //
185
186 /* Explain <xxxxx> tag to a native value
187 *
188 * Since <xxxxx> is always larger than the native value,
189 * the operation will replace the tag directly in the buffer,
190 * and, of course, will shift tail elements.
191 */
ExplainCodePointTag(UnicodeString & buf)192 void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){
193 buf.append((UChar)0); // add a terminal NULL
194 UChar* bufBase = buf.getBuffer(buf.length());
195 UChar* p = bufBase;
196 while (*p != 0){
197 if ( *p != 0x3C){ // <
198 *bufBase++ = *p++;
199 } else {
200 p++; // skip <
201 UChar32 cp = 0;
202 for ( ;*p != 0x3E; p++){ // >
203 if (0x30 <= *p && *p <= 0x39){ // 0-9
204 cp = (cp * 16) + (*p - 0x30);
205 } else if (0x61 <= *p && *p <= 0x66){ // a-f
206 cp = (cp * 16) + (*p - 0x61) + 10;
207 } else if (0x41 <= *p && *p <= 0x46) {// A-F
208 cp = (cp * 16) + (*p - 0x41) + 10;
209 }
210 // no else. hope everything is good.
211 }
212 p++; // skip >
213 if (U_IS_BMP(cp)){
214 *bufBase++ = cp;
215 } else {
216 *bufBase++ = U16_LEAD(cp);
217 *bufBase++ = U16_TRAIL(cp);
218 }
219 }
220 }
221 *bufBase = 0; // close our buffer
222 buf.releaseBuffer();
223 }
224
Call()225 void IdnaConfTest::Call(){
226 if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){
227 errln("Incomplete record");
228 } else {
229 UErrorCode status = U_ZERO_ERROR;
230 UChar result[200] = {0,}; // simple life
231 const UChar *p = namebase.getTerminatedBuffer();
232 const int p_len = namebase.length();
233
234 if (type == 0 && option == 0){
235 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
236 } else if (type == 0 && option == 1){
237 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
238 } else if (type == 1 && option == 0){
239 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
240 } else if (type == 1 && option == 1){
241 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
242 }
243 if (passfail == 0){
244 if (U_FAILURE(status)){
245 id.append(" should pass, but failed. - ");
246 id.append(u_errorName(status));
247 errcheckln(status, id);
248 } else{
249 if (namezone.compare(result, -1) == 0){
250 // expected
251 logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
252 } else {
253 id.append(" no error, but result is not as expected.");
254 errln(id);
255 }
256 }
257 } else if (passfail == 1){
258 if (U_FAILURE(status)){
259 // expected
260 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName
261 //logln("Got the expected error: " + UnicodeString(u_errorName(status)));
262 } else{
263 if (namebase.compare(result, -1) == 0){
264 // garbage in -> garbage out
265 logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
266 } else {
267 id.append(" should fail, but not failed. ");
268 id.append(u_errorName(status));
269 errln(id);
270 }
271 }
272 }
273 }
274 type = option = passfail = -1;
275 namebase.setToBogus();
276 namezone.setToBogus();
277 id.remove();
278 return;
279 }
280
Test(void)281 void IdnaConfTest::Test(void){
282 if (!ReadAndConvertFile())return;
283
284 UnicodeString s;
285 UnicodeString key;
286 UnicodeString value;
287
288 // skip everything before the first "=====" and "=====" itself
289 do {
290 if (!ReadOneLine(s)) {
291 errln("End of file prematurely found");
292 break;
293 }
294 }
295 while (s.compare(C_TAG, -1) != 0); //"====="
296
297 while(ReadOneLine(s)){
298 s.trim();
299 key.remove();
300 value.remove();
301 if (s.compare(C_TAG, -1) == 0){ //"====="
302 Call();
303 } else {
304 // explain key:value
305 int p = s.indexOf((UChar)0x3A); // :
306 key.setTo(s,0,p).trim();
307 value.setTo(s,p+1).trim();
308 if (key.compare(C_TYPE, -1) == 0){
309 if (value.compare(C_TOASCII, -1) == 0) {
310 type = 0;
311 } else if (value.compare(C_TOUNICODE, -1) == 0){
312 type = 1;
313 }
314 } else if (key.compare(C_PASSFAIL, -1) == 0){
315 if (value.compare(C_PASS, -1) == 0){
316 passfail = 0;
317 } else if (value.compare(C_FAIL, -1) == 0){
318 passfail = 1;
319 }
320 } else if (key.compare(C_DESC, -1) == 0){
321 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
322 option = 1; // not found
323 } else {
324 option = 0;
325 }
326 id.setTo(value, 0, value.indexOf((UChar)0x20)); // space
327 } else if (key.compare(C_NAMEZONE, -1) == 0){
328 ExplainCodePointTag(value);
329 namezone.setTo(value);
330 } else if (key.compare(C_NAMEBASE, -1) == 0){
331 ExplainCodePointTag(value);
332 namebase.setTo(value);
333 }
334 // just skip other lines
335 }
336 }
337
338 Call(); // for last record
339 }
340 #else
Test(void)341 void IdnaConfTest::Test(void)
342 {
343 // test nothing...
344 }
345 #endif
346
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)347 void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){
348 switch (index) {
349 TESTCASE(0,Test);
350 default: name = ""; break;
351 }
352 }
353
354 #endif
355