1 /* Copyright The libuv project and contributors. All rights reserved.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
20 */
21
22 #include "task.h"
23 #define uv__malloc malloc
24 #ifdef USE_OHOS_DFX
25 #include "../src/idna.h"
26 #else
27 #include "../src/idna.c"
28 #endif
29 #include <string.h>
30
TEST_IMPL(utf8_decode1)31 TEST_IMPL(utf8_decode1) {
32 const char* p;
33 char b[32];
34 int i;
35
36 /* ASCII. */
37 p = b;
38 snprintf(b, sizeof(b), "%c\x7F", 0x00);
39 ASSERT_OK(uv__utf8_decode1(&p, b + sizeof(b)));
40 ASSERT_PTR_EQ(p, b + 1);
41 ASSERT_EQ(127, uv__utf8_decode1(&p, b + sizeof(b)));
42 ASSERT_PTR_EQ(p, b + 2);
43
44 /* Two-byte sequences. */
45 p = b;
46 snprintf(b, sizeof(b), "\xC2\x80\xDF\xBF");
47 ASSERT_EQ(128, uv__utf8_decode1(&p, b + sizeof(b)));
48 ASSERT_PTR_EQ(p, b + 2);
49 ASSERT_EQ(0x7FF, uv__utf8_decode1(&p, b + sizeof(b)));
50 ASSERT_PTR_EQ(p, b + 4);
51
52 /* Three-byte sequences. */
53 p = b;
54 snprintf(b, sizeof(b), "\xE0\xA0\x80\xEF\xBF\xBF");
55 ASSERT_EQ(0x800, uv__utf8_decode1(&p, b + sizeof(b)));
56 ASSERT_PTR_EQ(p, b + 3);
57 ASSERT_EQ(0xFFFF, uv__utf8_decode1(&p, b + sizeof(b)));
58 ASSERT_PTR_EQ(p, b + 6);
59
60 /* Four-byte sequences. */
61 p = b;
62 snprintf(b, sizeof(b), "\xF0\x90\x80\x80\xF4\x8F\xBF\xBF");
63 ASSERT_EQ(0x10000, uv__utf8_decode1(&p, b + sizeof(b)));
64 ASSERT_PTR_EQ(p, b + 4);
65 ASSERT_EQ(0x10FFFF, uv__utf8_decode1(&p, b + sizeof(b)));
66 ASSERT_PTR_EQ(p, b + 8);
67
68 /* Four-byte sequences > U+10FFFF; disallowed. */
69 p = b;
70 snprintf(b, sizeof(b), "\xF4\x90\xC0\xC0\xF7\xBF\xBF\xBF");
71 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
72 ASSERT_PTR_EQ(p, b + 4);
73 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
74 ASSERT_PTR_EQ(p, b + 8);
75
76 /* Overlong; disallowed. */
77 p = b;
78 snprintf(b, sizeof(b), "\xC0\x80\xC1\x80");
79 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
80 ASSERT_PTR_EQ(p, b + 2);
81 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
82 ASSERT_PTR_EQ(p, b + 4);
83
84 /* Surrogate pairs; disallowed. */
85 p = b;
86 snprintf(b, sizeof(b), "\xED\xA0\x80\xED\xA3\xBF");
87 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
88 ASSERT_PTR_EQ(p, b + 3);
89 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
90 ASSERT_PTR_EQ(p, b + 6);
91
92 /* Simply illegal. */
93 p = b;
94 snprintf(b, sizeof(b), "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
95
96 for (i = 1; i <= 8; i++) {
97 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
98 ASSERT_PTR_EQ(p, b + i);
99 }
100
101 return 0;
102 }
103
TEST_IMPL(utf8_decode1_overrun)104 TEST_IMPL(utf8_decode1_overrun) {
105 const char* p;
106 char b[1];
107 char c[1];
108
109 /* Single byte. */
110 p = b;
111 b[0] = 0x7F;
112 ASSERT_EQ(0x7F, uv__utf8_decode1(&p, b + 1));
113 ASSERT_PTR_EQ(p, b + 1);
114
115 /* Multi-byte. */
116 p = b;
117 b[0] = 0xC0;
118 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + 1));
119 ASSERT_PTR_EQ(p, b + 1);
120
121 b[0] = 0x7F;
122 ASSERT_EQ(UV_EINVAL, uv__idna_toascii(b, b + 0, c, c + 1));
123 ASSERT_EQ(UV_EINVAL, uv__idna_toascii(b, b + 1, c, c + 1));
124
125 return 0;
126 }
127
128 /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
129 #ifndef __MVS__
130
131 #define F(input, err) \
132 do { \
133 char d[256] = {0}; \
134 static const char s[] = "" input ""; \
135 ASSERT_EQ(err, uv__idna_toascii(s, s + sizeof(s) - 1, d, d + sizeof(d))); \
136 } while (0)
137
138 #define T(input, expected) \
139 do { \
140 long n; \
141 char d1[256] = {0}; \
142 char d2[256] = {0}; \
143 static const char s[] = "" input ""; \
144 n = uv__idna_toascii(s, s + sizeof(s) - 1, d1, d1 + sizeof(d1)); \
145 ASSERT_EQ(n, sizeof(expected)); \
146 ASSERT_OK(memcmp(d1, expected, n)); \
147 /* Sanity check: encoding twice should not change the output. */ \
148 n = uv__idna_toascii(d1, d1 + strlen(d1), d2, d2 + sizeof(d2)); \
149 ASSERT_EQ(n, sizeof(expected)); \
150 ASSERT_OK(memcmp(d2, expected, n)); \
151 ASSERT_OK(memcmp(d1, d2, sizeof(d2))); \
152 } while (0)
153
TEST_IMPL(idna_toascii)154 TEST_IMPL(idna_toascii) {
155 /* Illegal inputs. */
156 F("\xC0\x80\xC1\x80", UV_EINVAL); /* Overlong UTF-8 sequence. */
157 F("\xC0\x80\xC1\x80.com", UV_EINVAL); /* Overlong UTF-8 sequence. */
158 F("", UV_EINVAL);
159 /* No conversion. */
160 T(".", ".");
161 T(".com", ".com");
162 T("example", "example");
163 T("example-", "example-");
164 T("straße.de", "xn--strae-oqa.de");
165 /* Test cases adapted from punycode.js. Most are from RFC 3492. */
166 T("foo.bar", "foo.bar");
167 T("mañana.com", "xn--maana-pta.com");
168 T("example.com.", "example.com.");
169 T("bücher.com", "xn--bcher-kva.com");
170 T("café.com", "xn--caf-dma.com");
171 T("café.café.com", "xn--caf-dma.xn--caf-dma.com");
172 T("☃-⌘.com", "xn----dqo34k.com");
173 T("퐀☃-⌘.com", "xn----dqo34kn65z.com");
174 T(".la", "xn--ls8h.la");
175 T("mañana.com", "xn--maana-pta.com");
176 T("mañana。com", "xn--maana-pta.com");
177 T("mañana.com", "xn--maana-pta.com");
178 T("mañana。com", "xn--maana-pta.com");
179 T("ü", "xn--tda");
180 T(".ü", ".xn--tda");
181 T("ü.ü", "xn--tda.xn--tda");
182 T("ü.ü.", "xn--tda.xn--tda.");
183 T("üëäö♥", "xn--4can8av2009b");
184 T("Willst du die Blüthe des frühen, die Früchte des späteren Jahres",
185 "xn--Willst du die Blthe des frhen, "
186 "die Frchte des spteren Jahres-x9e96lkal");
187 T("ليهمابتكلموشعربي؟", "xn--egbpdaj6bu4bxfgehfvwxn");
188 T("他们为什么不说中文", "xn--ihqwcrb4cv8a8dqg056pqjye");
189 T("他們爲什麽不說中文", "xn--ihqwctvzc91f659drss3x8bo0yb");
190 T("Pročprostěnemluvíčesky", "xn--Proprostnemluvesky-uyb24dma41a");
191 T("למההםפשוטלאמדבריםעברית", "xn--4dbcagdahymbxekheh6e0a7fei0b");
192 T("यहलोगहिन्दीक्योंनहींबोलसकतेहैं",
193 "xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd");
194 T("なぜみんな日本語を話してくれないのか",
195 "xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa");
196 T("세계의모든사람들이한국어를이해한다면얼마나좋을까",
197 "xn--989aomsvi5e83db1d2a355cv1e0vak1d"
198 "wrv93d5xbh15a0dt30a5jpsd879ccm6fea98c");
199 T("почемужеонинеговорятпорусски", "xn--b1abfaaepdrnnbgefbadotcwatmq2g4l");
200 T("PorquénopuedensimplementehablarenEspañol",
201 "xn--PorqunopuedensimplementehablarenEspaol-fmd56a");
202 T("TạisaohọkhôngthểchỉnóitiếngViệt",
203 "xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g");
204 T("3年B組金八先生", "xn--3B-ww4c5e180e575a65lsy2b");
205 T("安室奈美恵-with-SUPER-MONKEYS",
206 "xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n");
207 T("Hello-Another-Way-それぞれの場所",
208 "xn--Hello-Another-Way--fc4qua05auwb3674vfr0b");
209 T("ひとつ屋根の下2", "xn--2-u9tlzr9756bt3uc0v");
210 T("MajiでKoiする5秒前", "xn--MajiKoi5-783gue6qz075azm5e");
211 T("パフィーdeルンバ", "xn--de-jg4avhby1noc0d");
212 T("そのスピードで", "xn--d9juau41awczczp");
213 T("-> $1.00 <-", "-> $1.00 <-");
214 /* Test cases from https://unicode.org/reports/tr46/ */
215 T("faß.de", "xn--fa-hia.de");
216 T("βόλος.com", "xn--nxasmm1c.com");
217 T("ශ්රී.com", "xn--10cl1a0b660p.com");
218 T("نامهای.com", "xn--mgba3gch31f060k.com");
219 return 0;
220 }
221
222 #undef T
223
224 #endif /* __MVS__ */
225