1 #include "node_url.h"
2 #include "base_object-inl.h"
3 #include "node_errors.h"
4 #include "node_i18n.h"
5 #include "util-inl.h"
6
7 #include <cmath>
8 #include <cstdio>
9 #include <string>
10 #include <vector>
11
12 namespace node {
13
14 using errors::TryCatchScope;
15
16 using v8::Array;
17 using v8::Context;
18 using v8::Function;
19 using v8::FunctionCallbackInfo;
20 using v8::HandleScope;
21 using v8::Int32;
22 using v8::Integer;
23 using v8::Isolate;
24 using v8::Local;
25 using v8::MaybeLocal;
26 using v8::NewStringType;
27 using v8::Null;
28 using v8::Object;
29 using v8::String;
30 using v8::Undefined;
31 using v8::Value;
32
Utf8String(Isolate * isolate,const std::string & str)33 Local<String> Utf8String(Isolate* isolate, const std::string& str) {
34 return String::NewFromUtf8(isolate,
35 str.data(),
36 NewStringType::kNormal,
37 str.length()).ToLocalChecked();
38 }
39
40 namespace url {
41
42 namespace {
43
44 // https://url.spec.whatwg.org/#eof-code-point
45 constexpr char kEOL = -1;
46
47 // Used in ToUSVString().
48 constexpr char16_t kUnicodeReplacementCharacter = 0xFFFD;
49
50 // https://url.spec.whatwg.org/#concept-host
51 class URLHost {
52 public:
53 ~URLHost();
54
55 void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
56 void ParseIPv6Host(const char* input, size_t length);
57 void ParseOpaqueHost(const char* input, size_t length);
58 void ParseHost(const char* input,
59 size_t length,
60 bool is_special,
61 bool unicode = false);
62
ParsingFailed() const63 bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
64 std::string ToString() const;
65 // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
66 std::string ToStringMove();
67
68 private:
69 enum class HostType {
70 H_FAILED,
71 H_DOMAIN,
72 H_IPV4,
73 H_IPV6,
74 H_OPAQUE,
75 };
76
77 union Value {
78 std::string domain_or_opaque;
79 uint32_t ipv4;
80 uint16_t ipv6[8];
81
~Value()82 ~Value() {}
Value()83 Value() : ipv4(0) {}
84 };
85
86 Value value_;
87 HostType type_ = HostType::H_FAILED;
88
Reset()89 void Reset() {
90 using string = std::string;
91 switch (type_) {
92 case HostType::H_DOMAIN:
93 case HostType::H_OPAQUE:
94 value_.domain_or_opaque.~string();
95 break;
96 default:
97 break;
98 }
99 type_ = HostType::H_FAILED;
100 }
101
102 // Setting the string members of the union with = is brittle because
103 // it relies on them being initialized to a state that requires no
104 // destruction of old data.
105 // For a long time, that worked well enough because ParseIPv6Host() happens
106 // to zero-fill `value_`, but that really is relying on standard library
107 // internals too much.
108 // These helpers are the easiest solution but we might want to consider
109 // just not forcing strings into an union.
SetOpaque(std::string && string)110 void SetOpaque(std::string&& string) {
111 Reset();
112 type_ = HostType::H_OPAQUE;
113 new(&value_.domain_or_opaque) std::string(std::move(string));
114 }
115
SetDomain(std::string && string)116 void SetDomain(std::string&& string) {
117 Reset();
118 type_ = HostType::H_DOMAIN;
119 new(&value_.domain_or_opaque) std::string(std::move(string));
120 }
121 };
122
~URLHost()123 URLHost::~URLHost() {
124 Reset();
125 }
126
127 #define ARGS(XX) \
128 XX(ARG_FLAGS) \
129 XX(ARG_PROTOCOL) \
130 XX(ARG_USERNAME) \
131 XX(ARG_PASSWORD) \
132 XX(ARG_HOST) \
133 XX(ARG_PORT) \
134 XX(ARG_PATH) \
135 XX(ARG_QUERY) \
136 XX(ARG_FRAGMENT) \
137 XX(ARG_COUNT) // This one has to be last.
138
139 #define ERR_ARGS(XX) \
140 XX(ERR_ARG_FLAGS) \
141 XX(ERR_ARG_INPUT) \
142
143 enum url_cb_args {
144 #define XX(name) name,
145 ARGS(XX)
146 #undef XX
147 };
148
149 enum url_error_cb_args {
150 #define XX(name) name,
151 ERR_ARGS(XX)
152 #undef XX
153 };
154
155 #define CHAR_TEST(bits, name, expr) \
156 template <typename T> \
157 bool name(const T ch) { \
158 static_assert(sizeof(ch) >= (bits) / 8, \
159 "Character must be wider than " #bits " bits"); \
160 return (expr); \
161 }
162
163 #define TWO_CHAR_STRING_TEST(bits, name, expr) \
164 template <typename T> \
165 bool name(const T ch1, const T ch2) { \
166 static_assert(sizeof(ch1) >= (bits) / 8, \
167 "Character must be wider than " #bits " bits"); \
168 return (expr); \
169 } \
170 template <typename T> \
171 bool name(const std::basic_string<T>& str) { \
172 static_assert(sizeof(str[0]) >= (bits) / 8, \
173 "Character must be wider than " #bits " bits"); \
174 return str.length() >= 2 && name(str[0], str[1]); \
175 }
176
177 // https://infra.spec.whatwg.org/#ascii-tab-or-newline
178 CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
179
180 // https://infra.spec.whatwg.org/#c0-control-or-space
181 CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
182
183 // https://infra.spec.whatwg.org/#ascii-digit
184 CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
185
186 // https://infra.spec.whatwg.org/#ascii-hex-digit
187 CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
188 (ch >= 'A' && ch <= 'F') ||
189 (ch >= 'a' && ch <= 'f')))
190
191 // https://infra.spec.whatwg.org/#ascii-alpha
192 CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
193 (ch >= 'a' && ch <= 'z')))
194
195 // https://infra.spec.whatwg.org/#ascii-alphanumeric
196 CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
197
198 // https://infra.spec.whatwg.org/#ascii-lowercase
199 template <typename T>
ASCIILowercase(T ch)200 T ASCIILowercase(T ch) {
201 return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
202 }
203
204 // https://url.spec.whatwg.org/#forbidden-host-code-point
205 CHAR_TEST(8, IsForbiddenHostCodePoint,
206 ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
207 ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
208 ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
209 ch == '\\' || ch == ']')
210
211 // https://url.spec.whatwg.org/#windows-drive-letter
212 TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
213 (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
214
215 // https://url.spec.whatwg.org/#normalized-windows-drive-letter
216 TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
217 (IsASCIIAlpha(ch1) && ch2 == ':'))
218
219 // If a UTF-16 character is a low/trailing surrogate.
220 CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
221
222 // If a UTF-16 character is a surrogate.
223 CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
224
225 // If a UTF-16 surrogate is a low/trailing one.
226 CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
227
228 #undef CHAR_TEST
229 #undef TWO_CHAR_STRING_TEST
230
231 const char* hex[256] = {
232 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
233 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
234 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
235 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
236 "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
237 "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
238 "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
239 "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
240 "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
241 "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
242 "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
243 "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
244 "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
245 "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
246 "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
247 "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
248 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
249 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
250 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
251 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
252 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
253 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
254 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
255 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
256 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
257 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
258 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
259 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
260 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
261 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
262 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
263 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
264 };
265
266 const uint8_t C0_CONTROL_ENCODE_SET[32] = {
267 // 00 01 02 03 04 05 06 07
268 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269 // 08 09 0A 0B 0C 0D 0E 0F
270 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271 // 10 11 12 13 14 15 16 17
272 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273 // 18 19 1A 1B 1C 1D 1E 1F
274 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
275 // 20 21 22 23 24 25 26 27
276 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
277 // 28 29 2A 2B 2C 2D 2E 2F
278 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
279 // 30 31 32 33 34 35 36 37
280 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
281 // 38 39 3A 3B 3C 3D 3E 3F
282 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
283 // 40 41 42 43 44 45 46 47
284 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
285 // 48 49 4A 4B 4C 4D 4E 4F
286 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
287 // 50 51 52 53 54 55 56 57
288 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
289 // 58 59 5A 5B 5C 5D 5E 5F
290 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
291 // 60 61 62 63 64 65 66 67
292 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
293 // 68 69 6A 6B 6C 6D 6E 6F
294 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
295 // 70 71 72 73 74 75 76 77
296 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
297 // 78 79 7A 7B 7C 7D 7E 7F
298 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
299 // 80 81 82 83 84 85 86 87
300 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
301 // 88 89 8A 8B 8C 8D 8E 8F
302 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
303 // 90 91 92 93 94 95 96 97
304 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
305 // 98 99 9A 9B 9C 9D 9E 9F
306 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
307 // A0 A1 A2 A3 A4 A5 A6 A7
308 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
309 // A8 A9 AA AB AC AD AE AF
310 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
311 // B0 B1 B2 B3 B4 B5 B6 B7
312 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
313 // B8 B9 BA BB BC BD BE BF
314 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
315 // C0 C1 C2 C3 C4 C5 C6 C7
316 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
317 // C8 C9 CA CB CC CD CE CF
318 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
319 // D0 D1 D2 D3 D4 D5 D6 D7
320 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
321 // D8 D9 DA DB DC DD DE DF
322 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
323 // E0 E1 E2 E3 E4 E5 E6 E7
324 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
325 // E8 E9 EA EB EC ED EE EF
326 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
327 // F0 F1 F2 F3 F4 F5 F6 F7
328 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
329 // F8 F9 FA FB FC FD FE FF
330 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
331 };
332
333 const uint8_t FRAGMENT_ENCODE_SET[32] = {
334 // 00 01 02 03 04 05 06 07
335 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336 // 08 09 0A 0B 0C 0D 0E 0F
337 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338 // 10 11 12 13 14 15 16 17
339 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340 // 18 19 1A 1B 1C 1D 1E 1F
341 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
342 // 20 21 22 23 24 25 26 27
343 0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
344 // 28 29 2A 2B 2C 2D 2E 2F
345 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
346 // 30 31 32 33 34 35 36 37
347 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
348 // 38 39 3A 3B 3C 3D 3E 3F
349 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
350 // 40 41 42 43 44 45 46 47
351 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
352 // 48 49 4A 4B 4C 4D 4E 4F
353 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
354 // 50 51 52 53 54 55 56 57
355 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
356 // 58 59 5A 5B 5C 5D 5E 5F
357 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
358 // 60 61 62 63 64 65 66 67
359 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
360 // 68 69 6A 6B 6C 6D 6E 6F
361 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
362 // 70 71 72 73 74 75 76 77
363 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
364 // 78 79 7A 7B 7C 7D 7E 7F
365 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
366 // 80 81 82 83 84 85 86 87
367 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
368 // 88 89 8A 8B 8C 8D 8E 8F
369 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
370 // 90 91 92 93 94 95 96 97
371 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
372 // 98 99 9A 9B 9C 9D 9E 9F
373 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
374 // A0 A1 A2 A3 A4 A5 A6 A7
375 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
376 // A8 A9 AA AB AC AD AE AF
377 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
378 // B0 B1 B2 B3 B4 B5 B6 B7
379 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
380 // B8 B9 BA BB BC BD BE BF
381 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
382 // C0 C1 C2 C3 C4 C5 C6 C7
383 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
384 // C8 C9 CA CB CC CD CE CF
385 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
386 // D0 D1 D2 D3 D4 D5 D6 D7
387 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
388 // D8 D9 DA DB DC DD DE DF
389 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
390 // E0 E1 E2 E3 E4 E5 E6 E7
391 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
392 // E8 E9 EA EB EC ED EE EF
393 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
394 // F0 F1 F2 F3 F4 F5 F6 F7
395 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
396 // F8 F9 FA FB FC FD FE FF
397 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
398 };
399
400
401 const uint8_t PATH_ENCODE_SET[32] = {
402 // 00 01 02 03 04 05 06 07
403 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
404 // 08 09 0A 0B 0C 0D 0E 0F
405 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
406 // 10 11 12 13 14 15 16 17
407 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
408 // 18 19 1A 1B 1C 1D 1E 1F
409 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
410 // 20 21 22 23 24 25 26 27
411 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
412 // 28 29 2A 2B 2C 2D 2E 2F
413 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
414 // 30 31 32 33 34 35 36 37
415 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
416 // 38 39 3A 3B 3C 3D 3E 3F
417 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
418 // 40 41 42 43 44 45 46 47
419 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
420 // 48 49 4A 4B 4C 4D 4E 4F
421 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
422 // 50 51 52 53 54 55 56 57
423 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
424 // 58 59 5A 5B 5C 5D 5E 5F
425 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
426 // 60 61 62 63 64 65 66 67
427 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
428 // 68 69 6A 6B 6C 6D 6E 6F
429 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
430 // 70 71 72 73 74 75 76 77
431 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
432 // 78 79 7A 7B 7C 7D 7E 7F
433 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
434 // 80 81 82 83 84 85 86 87
435 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
436 // 88 89 8A 8B 8C 8D 8E 8F
437 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
438 // 90 91 92 93 94 95 96 97
439 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
440 // 98 99 9A 9B 9C 9D 9E 9F
441 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
442 // A0 A1 A2 A3 A4 A5 A6 A7
443 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
444 // A8 A9 AA AB AC AD AE AF
445 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
446 // B0 B1 B2 B3 B4 B5 B6 B7
447 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
448 // B8 B9 BA BB BC BD BE BF
449 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
450 // C0 C1 C2 C3 C4 C5 C6 C7
451 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
452 // C8 C9 CA CB CC CD CE CF
453 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
454 // D0 D1 D2 D3 D4 D5 D6 D7
455 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
456 // D8 D9 DA DB DC DD DE DF
457 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458 // E0 E1 E2 E3 E4 E5 E6 E7
459 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460 // E8 E9 EA EB EC ED EE EF
461 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462 // F0 F1 F2 F3 F4 F5 F6 F7
463 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
464 // F8 F9 FA FB FC FD FE FF
465 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
466 };
467
468 const uint8_t USERINFO_ENCODE_SET[32] = {
469 // 00 01 02 03 04 05 06 07
470 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
471 // 08 09 0A 0B 0C 0D 0E 0F
472 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
473 // 10 11 12 13 14 15 16 17
474 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
475 // 18 19 1A 1B 1C 1D 1E 1F
476 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
477 // 20 21 22 23 24 25 26 27
478 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
479 // 28 29 2A 2B 2C 2D 2E 2F
480 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
481 // 30 31 32 33 34 35 36 37
482 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
483 // 38 39 3A 3B 3C 3D 3E 3F
484 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
485 // 40 41 42 43 44 45 46 47
486 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
487 // 48 49 4A 4B 4C 4D 4E 4F
488 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
489 // 50 51 52 53 54 55 56 57
490 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
491 // 58 59 5A 5B 5C 5D 5E 5F
492 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
493 // 60 61 62 63 64 65 66 67
494 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
495 // 68 69 6A 6B 6C 6D 6E 6F
496 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
497 // 70 71 72 73 74 75 76 77
498 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
499 // 78 79 7A 7B 7C 7D 7E 7F
500 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
501 // 80 81 82 83 84 85 86 87
502 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
503 // 88 89 8A 8B 8C 8D 8E 8F
504 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
505 // 90 91 92 93 94 95 96 97
506 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
507 // 98 99 9A 9B 9C 9D 9E 9F
508 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
509 // A0 A1 A2 A3 A4 A5 A6 A7
510 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
511 // A8 A9 AA AB AC AD AE AF
512 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
513 // B0 B1 B2 B3 B4 B5 B6 B7
514 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
515 // B8 B9 BA BB BC BD BE BF
516 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
517 // C0 C1 C2 C3 C4 C5 C6 C7
518 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
519 // C8 C9 CA CB CC CD CE CF
520 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
521 // D0 D1 D2 D3 D4 D5 D6 D7
522 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
523 // D8 D9 DA DB DC DD DE DF
524 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
525 // E0 E1 E2 E3 E4 E5 E6 E7
526 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
527 // E8 E9 EA EB EC ED EE EF
528 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
529 // F0 F1 F2 F3 F4 F5 F6 F7
530 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
531 // F8 F9 FA FB FC FD FE FF
532 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
533 };
534
535 const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = {
536 // 00 01 02 03 04 05 06 07
537 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
538 // 08 09 0A 0B 0C 0D 0E 0F
539 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
540 // 10 11 12 13 14 15 16 17
541 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
542 // 18 19 1A 1B 1C 1D 1E 1F
543 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
544 // 20 21 22 23 24 25 26 27
545 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
546 // 28 29 2A 2B 2C 2D 2E 2F
547 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
548 // 30 31 32 33 34 35 36 37
549 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
550 // 38 39 3A 3B 3C 3D 3E 3F
551 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
552 // 40 41 42 43 44 45 46 47
553 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
554 // 48 49 4A 4B 4C 4D 4E 4F
555 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
556 // 50 51 52 53 54 55 56 57
557 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
558 // 58 59 5A 5B 5C 5D 5E 5F
559 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
560 // 60 61 62 63 64 65 66 67
561 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
562 // 68 69 6A 6B 6C 6D 6E 6F
563 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
564 // 70 71 72 73 74 75 76 77
565 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
566 // 78 79 7A 7B 7C 7D 7E 7F
567 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
568 // 80 81 82 83 84 85 86 87
569 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
570 // 88 89 8A 8B 8C 8D 8E 8F
571 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
572 // 90 91 92 93 94 95 96 97
573 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
574 // 98 99 9A 9B 9C 9D 9E 9F
575 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
576 // A0 A1 A2 A3 A4 A5 A6 A7
577 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
578 // A8 A9 AA AB AC AD AE AF
579 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
580 // B0 B1 B2 B3 B4 B5 B6 B7
581 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
582 // B8 B9 BA BB BC BD BE BF
583 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
584 // C0 C1 C2 C3 C4 C5 C6 C7
585 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
586 // C8 C9 CA CB CC CD CE CF
587 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
588 // D0 D1 D2 D3 D4 D5 D6 D7
589 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
590 // D8 D9 DA DB DC DD DE DF
591 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
592 // E0 E1 E2 E3 E4 E5 E6 E7
593 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
594 // E8 E9 EA EB EC ED EE EF
595 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
596 // F0 F1 F2 F3 F4 F5 F6 F7
597 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
598 // F8 F9 FA FB FC FD FE FF
599 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
600 };
601
602 // Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded.
603 const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = {
604 // 00 01 02 03 04 05 06 07
605 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
606 // 08 09 0A 0B 0C 0D 0E 0F
607 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
608 // 10 11 12 13 14 15 16 17
609 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
610 // 18 19 1A 1B 1C 1D 1E 1F
611 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
612 // 20 21 22 23 24 25 26 27
613 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80,
614 // 28 29 2A 2B 2C 2D 2E 2F
615 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
616 // 30 31 32 33 34 35 36 37
617 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
618 // 38 39 3A 3B 3C 3D 3E 3F
619 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
620 // 40 41 42 43 44 45 46 47
621 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
622 // 48 49 4A 4B 4C 4D 4E 4F
623 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
624 // 50 51 52 53 54 55 56 57
625 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
626 // 58 59 5A 5B 5C 5D 5E 5F
627 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
628 // 60 61 62 63 64 65 66 67
629 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
630 // 68 69 6A 6B 6C 6D 6E 6F
631 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
632 // 70 71 72 73 74 75 76 77
633 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
634 // 78 79 7A 7B 7C 7D 7E 7F
635 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
636 // 80 81 82 83 84 85 86 87
637 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
638 // 88 89 8A 8B 8C 8D 8E 8F
639 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
640 // 90 91 92 93 94 95 96 97
641 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
642 // 98 99 9A 9B 9C 9D 9E 9F
643 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
644 // A0 A1 A2 A3 A4 A5 A6 A7
645 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
646 // A8 A9 AA AB AC AD AE AF
647 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
648 // B0 B1 B2 B3 B4 B5 B6 B7
649 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
650 // B8 B9 BA BB BC BD BE BF
651 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
652 // C0 C1 C2 C3 C4 C5 C6 C7
653 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
654 // C8 C9 CA CB CC CD CE CF
655 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
656 // D0 D1 D2 D3 D4 D5 D6 D7
657 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
658 // D8 D9 DA DB DC DD DE DF
659 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
660 // E0 E1 E2 E3 E4 E5 E6 E7
661 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
662 // E8 E9 EA EB EC ED EE EF
663 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
664 // F0 F1 F2 F3 F4 F5 F6 F7
665 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
666 // F8 F9 FA FB FC FD FE FF
667 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
668 };
669
BitAt(const uint8_t a[],const uint8_t i)670 bool BitAt(const uint8_t a[], const uint8_t i) {
671 return !!(a[i >> 3] & (1 << (i & 7)));
672 }
673
674 // Appends ch to str. If ch position in encode_set is set, the ch will
675 // be percent-encoded then appended.
AppendOrEscape(std::string * str,const unsigned char ch,const uint8_t encode_set[])676 void AppendOrEscape(std::string* str,
677 const unsigned char ch,
678 const uint8_t encode_set[]) {
679 if (BitAt(encode_set, ch))
680 *str += hex[ch];
681 else
682 *str += ch;
683 }
684
685 template <typename T>
hex2bin(const T ch)686 unsigned hex2bin(const T ch) {
687 if (ch >= '0' && ch <= '9')
688 return ch - '0';
689 if (ch >= 'A' && ch <= 'F')
690 return 10 + (ch - 'A');
691 if (ch >= 'a' && ch <= 'f')
692 return 10 + (ch - 'a');
693 return static_cast<unsigned>(-1);
694 }
695
PercentDecode(const char * input,size_t len)696 std::string PercentDecode(const char* input, size_t len) {
697 std::string dest;
698 if (len == 0)
699 return dest;
700 dest.reserve(len);
701 const char* pointer = input;
702 const char* end = input + len;
703
704 while (pointer < end) {
705 const char ch = pointer[0];
706 size_t remaining = end - pointer - 1;
707 if (ch != '%' || remaining < 2 ||
708 (ch == '%' &&
709 (!IsASCIIHexDigit(pointer[1]) ||
710 !IsASCIIHexDigit(pointer[2])))) {
711 dest += ch;
712 pointer++;
713 continue;
714 } else {
715 unsigned a = hex2bin(pointer[1]);
716 unsigned b = hex2bin(pointer[2]);
717 char c = static_cast<char>(a * 16 + b);
718 dest += c;
719 pointer += 3;
720 }
721 }
722 return dest;
723 }
724
725 #define SPECIALS(XX) \
726 XX(ftp, 21, "ftp:") \
727 XX(file, -1, "file:") \
728 XX(gopher, 70, "gopher:") \
729 XX(http, 80, "http:") \
730 XX(https, 443, "https:") \
731 XX(ws, 80, "ws:") \
732 XX(wss, 443, "wss:")
733
IsSpecial(const std::string & scheme)734 bool IsSpecial(const std::string& scheme) {
735 #define V(_, __, name) if (scheme == name) return true;
736 SPECIALS(V);
737 #undef V
738 return false;
739 }
740
GetSpecial(Environment * env,const std::string & scheme)741 Local<String> GetSpecial(Environment* env, const std::string& scheme) {
742 #define V(key, _, name) if (scheme == name) \
743 return env->url_special_##key##_string();
744 SPECIALS(V)
745 #undef V
746 UNREACHABLE();
747 }
748
NormalizePort(const std::string & scheme,int p)749 int NormalizePort(const std::string& scheme, int p) {
750 #define V(_, port, name) if (scheme == name && p == port) return -1;
751 SPECIALS(V);
752 #undef V
753 return p;
754 }
755
756 // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
StartsWithWindowsDriveLetter(const char * p,const char * end)757 bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
758 size_t length = end - p;
759 return length >= 2 &&
760 IsWindowsDriveLetter(p[0], p[1]) &&
761 (length == 2 ||
762 p[2] == '/' ||
763 p[2] == '\\' ||
764 p[2] == '?' ||
765 p[2] == '#');
766 }
767
768 #if defined(NODE_HAVE_I18N_SUPPORT)
ToUnicode(const std::string & input,std::string * output)769 bool ToUnicode(const std::string& input, std::string* output) {
770 MaybeStackBuffer<char> buf;
771 if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
772 return false;
773 output->assign(*buf, buf.length());
774 return true;
775 }
776
ToASCII(const std::string & input,std::string * output)777 bool ToASCII(const std::string& input, std::string* output) {
778 MaybeStackBuffer<char> buf;
779 if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
780 return false;
781 output->assign(*buf, buf.length());
782 return true;
783 }
784 #else
785 // Intentional non-ops if ICU is not present.
ToUnicode(const std::string & input,std::string * output)786 bool ToUnicode(const std::string& input, std::string* output) {
787 *output = input;
788 return true;
789 }
790
ToASCII(const std::string & input,std::string * output)791 bool ToASCII(const std::string& input, std::string* output) {
792 *output = input;
793 return true;
794 }
795 #endif
796
ParseIPv6Host(const char * input,size_t length)797 void URLHost::ParseIPv6Host(const char* input, size_t length) {
798 CHECK_EQ(type_, HostType::H_FAILED);
799 unsigned size = arraysize(value_.ipv6);
800 for (unsigned n = 0; n < size; n++)
801 value_.ipv6[n] = 0;
802 uint16_t* piece_pointer = &value_.ipv6[0];
803 uint16_t* const buffer_end = piece_pointer + size;
804 uint16_t* compress_pointer = nullptr;
805 const char* pointer = input;
806 const char* end = pointer + length;
807 unsigned value, len, numbers_seen;
808 char ch = pointer < end ? pointer[0] : kEOL;
809 if (ch == ':') {
810 if (length < 2 || pointer[1] != ':')
811 return;
812 pointer += 2;
813 ch = pointer < end ? pointer[0] : kEOL;
814 piece_pointer++;
815 compress_pointer = piece_pointer;
816 }
817 while (ch != kEOL) {
818 if (piece_pointer >= buffer_end)
819 return;
820 if (ch == ':') {
821 if (compress_pointer != nullptr)
822 return;
823 pointer++;
824 ch = pointer < end ? pointer[0] : kEOL;
825 piece_pointer++;
826 compress_pointer = piece_pointer;
827 continue;
828 }
829 value = 0;
830 len = 0;
831 while (len < 4 && IsASCIIHexDigit(ch)) {
832 value = value * 0x10 + hex2bin(ch);
833 pointer++;
834 ch = pointer < end ? pointer[0] : kEOL;
835 len++;
836 }
837 switch (ch) {
838 case '.':
839 if (len == 0)
840 return;
841 pointer -= len;
842 ch = pointer < end ? pointer[0] : kEOL;
843 if (piece_pointer > buffer_end - 2)
844 return;
845 numbers_seen = 0;
846 while (ch != kEOL) {
847 value = 0xffffffff;
848 if (numbers_seen > 0) {
849 if (ch == '.' && numbers_seen < 4) {
850 pointer++;
851 ch = pointer < end ? pointer[0] : kEOL;
852 } else {
853 return;
854 }
855 }
856 if (!IsASCIIDigit(ch))
857 return;
858 while (IsASCIIDigit(ch)) {
859 unsigned number = ch - '0';
860 if (value == 0xffffffff) {
861 value = number;
862 } else if (value == 0) {
863 return;
864 } else {
865 value = value * 10 + number;
866 }
867 if (value > 255)
868 return;
869 pointer++;
870 ch = pointer < end ? pointer[0] : kEOL;
871 }
872 *piece_pointer = *piece_pointer * 0x100 + value;
873 numbers_seen++;
874 if (numbers_seen == 2 || numbers_seen == 4)
875 piece_pointer++;
876 }
877 if (numbers_seen != 4)
878 return;
879 continue;
880 case ':':
881 pointer++;
882 ch = pointer < end ? pointer[0] : kEOL;
883 if (ch == kEOL)
884 return;
885 break;
886 case kEOL:
887 break;
888 default:
889 return;
890 }
891 *piece_pointer = value;
892 piece_pointer++;
893 }
894
895 if (compress_pointer != nullptr) {
896 unsigned swaps = piece_pointer - compress_pointer;
897 piece_pointer = buffer_end - 1;
898 while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
899 uint16_t temp = *piece_pointer;
900 uint16_t* swap_piece = compress_pointer + swaps - 1;
901 *piece_pointer = *swap_piece;
902 *swap_piece = temp;
903 piece_pointer--;
904 swaps--;
905 }
906 } else if (compress_pointer == nullptr &&
907 piece_pointer != buffer_end) {
908 return;
909 }
910 type_ = HostType::H_IPV6;
911 }
912
ParseNumber(const char * start,const char * end)913 int64_t ParseNumber(const char* start, const char* end) {
914 unsigned R = 10;
915 if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
916 start += 2;
917 R = 16;
918 }
919 if (end - start == 0) {
920 return 0;
921 } else if (R == 10 && end - start > 1 && start[0] == '0') {
922 start++;
923 R = 8;
924 }
925 const char* p = start;
926
927 while (p < end) {
928 const char ch = p[0];
929 switch (R) {
930 case 8:
931 if (ch < '0' || ch > '7')
932 return -1;
933 break;
934 case 10:
935 if (!IsASCIIDigit(ch))
936 return -1;
937 break;
938 case 16:
939 if (!IsASCIIHexDigit(ch))
940 return -1;
941 break;
942 }
943 p++;
944 }
945 return strtoll(start, nullptr, R);
946 }
947
ParseIPv4Host(const char * input,size_t length,bool * is_ipv4)948 void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
949 CHECK_EQ(type_, HostType::H_FAILED);
950 *is_ipv4 = false;
951 const char* pointer = input;
952 const char* mark = input;
953 const char* end = pointer + length;
954 int parts = 0;
955 uint32_t val = 0;
956 uint64_t numbers[4];
957 int tooBigNumbers = 0;
958 if (length == 0)
959 return;
960
961 while (pointer <= end) {
962 const char ch = pointer < end ? pointer[0] : kEOL;
963 int remaining = end - pointer - 1;
964 if (ch == '.' || ch == kEOL) {
965 if (++parts > static_cast<int>(arraysize(numbers)))
966 return;
967 if (pointer == mark)
968 return;
969 int64_t n = ParseNumber(mark, pointer);
970 if (n < 0)
971 return;
972
973 if (n > 255) {
974 tooBigNumbers++;
975 }
976 numbers[parts - 1] = n;
977 mark = pointer + 1;
978 if (ch == '.' && remaining == 0)
979 break;
980 }
981 pointer++;
982 }
983 CHECK_GT(parts, 0);
984 *is_ipv4 = true;
985
986 // If any but the last item in numbers is greater than 255, return failure.
987 // If the last item in numbers is greater than or equal to
988 // 256^(5 - the number of items in numbers), return failure.
989 if (tooBigNumbers > 1 ||
990 (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
991 numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
992 return;
993 }
994
995 type_ = HostType::H_IPV4;
996 val = numbers[parts - 1];
997 for (int n = 0; n < parts - 1; n++) {
998 double b = 3 - n;
999 val += numbers[n] * pow(256, b);
1000 }
1001
1002 value_.ipv4 = val;
1003 }
1004
ParseOpaqueHost(const char * input,size_t length)1005 void URLHost::ParseOpaqueHost(const char* input, size_t length) {
1006 CHECK_EQ(type_, HostType::H_FAILED);
1007 std::string output;
1008 output.reserve(length);
1009 for (size_t i = 0; i < length; i++) {
1010 const char ch = input[i];
1011 if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
1012 return;
1013 } else {
1014 AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
1015 }
1016 }
1017
1018 SetOpaque(std::move(output));
1019 }
1020
ParseHost(const char * input,size_t length,bool is_special,bool unicode)1021 void URLHost::ParseHost(const char* input,
1022 size_t length,
1023 bool is_special,
1024 bool unicode) {
1025 CHECK_EQ(type_, HostType::H_FAILED);
1026 const char* pointer = input;
1027
1028 if (length == 0)
1029 return;
1030
1031 if (pointer[0] == '[') {
1032 if (pointer[length - 1] != ']')
1033 return;
1034 return ParseIPv6Host(++pointer, length - 2);
1035 }
1036
1037 if (!is_special)
1038 return ParseOpaqueHost(input, length);
1039
1040 // First, we have to percent decode
1041 std::string decoded = PercentDecode(input, length);
1042
1043 // Then we have to punycode toASCII
1044 if (!ToASCII(decoded, &decoded))
1045 return;
1046
1047 // If any of the following characters are still present, we have to fail
1048 for (size_t n = 0; n < decoded.size(); n++) {
1049 const char ch = decoded[n];
1050 if (IsForbiddenHostCodePoint(ch)) {
1051 return;
1052 }
1053 }
1054
1055 // Check to see if it's an IPv4 IP address
1056 bool is_ipv4;
1057 ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
1058 if (is_ipv4)
1059 return;
1060
1061 // If the unicode flag is set, run the result through punycode ToUnicode
1062 if (unicode && !ToUnicode(decoded, &decoded))
1063 return;
1064
1065 // It's not an IPv4 or IPv6 address, it must be a domain
1066 SetDomain(std::move(decoded));
1067 }
1068
1069 // Locates the longest sequence of 0 segments in an IPv6 address
1070 // in order to use the :: compression when serializing
1071 template <typename T>
FindLongestZeroSequence(T * values,size_t len)1072 T* FindLongestZeroSequence(T* values, size_t len) {
1073 T* start = values;
1074 T* end = start + len;
1075 T* result = nullptr;
1076
1077 T* current = nullptr;
1078 unsigned counter = 0, longest = 1;
1079
1080 while (start < end) {
1081 if (*start == 0) {
1082 if (current == nullptr)
1083 current = start;
1084 counter++;
1085 } else {
1086 if (counter > longest) {
1087 longest = counter;
1088 result = current;
1089 }
1090 counter = 0;
1091 current = nullptr;
1092 }
1093 start++;
1094 }
1095 if (counter > longest)
1096 result = current;
1097 return result;
1098 }
1099
ToStringMove()1100 std::string URLHost::ToStringMove() {
1101 std::string return_value;
1102 switch (type_) {
1103 case HostType::H_DOMAIN:
1104 case HostType::H_OPAQUE:
1105 return_value = std::move(value_.domain_or_opaque);
1106 break;
1107 default:
1108 return_value = ToString();
1109 break;
1110 }
1111 Reset();
1112 return return_value;
1113 }
1114
ToString() const1115 std::string URLHost::ToString() const {
1116 std::string dest;
1117 switch (type_) {
1118 case HostType::H_DOMAIN:
1119 case HostType::H_OPAQUE:
1120 return value_.domain_or_opaque;
1121 break;
1122 case HostType::H_IPV4: {
1123 dest.reserve(15);
1124 uint32_t value = value_.ipv4;
1125 for (int n = 0; n < 4; n++) {
1126 char buf[4];
1127 snprintf(buf, sizeof(buf), "%d", value % 256);
1128 dest.insert(0, buf);
1129 if (n < 3)
1130 dest.insert(0, 1, '.');
1131 value /= 256;
1132 }
1133 break;
1134 }
1135 case HostType::H_IPV6: {
1136 dest.reserve(41);
1137 dest += '[';
1138 const uint16_t* start = &value_.ipv6[0];
1139 const uint16_t* compress_pointer =
1140 FindLongestZeroSequence(start, 8);
1141 bool ignore0 = false;
1142 for (int n = 0; n <= 7; n++) {
1143 const uint16_t* piece = &value_.ipv6[n];
1144 if (ignore0 && *piece == 0)
1145 continue;
1146 else if (ignore0)
1147 ignore0 = false;
1148 if (compress_pointer == piece) {
1149 dest += n == 0 ? "::" : ":";
1150 ignore0 = true;
1151 continue;
1152 }
1153 char buf[5];
1154 snprintf(buf, sizeof(buf), "%x", *piece);
1155 dest += buf;
1156 if (n < 7)
1157 dest += ':';
1158 }
1159 dest += ']';
1160 break;
1161 }
1162 case HostType::H_FAILED:
1163 break;
1164 }
1165 return dest;
1166 }
1167
ParseHost(const std::string & input,std::string * output,bool is_special,bool unicode=false)1168 bool ParseHost(const std::string& input,
1169 std::string* output,
1170 bool is_special,
1171 bool unicode = false) {
1172 if (input.empty()) {
1173 output->clear();
1174 return true;
1175 }
1176 URLHost host;
1177 host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1178 if (host.ParsingFailed())
1179 return false;
1180 *output = host.ToStringMove();
1181 return true;
1182 }
1183
FromJSStringArray(Environment * env,Local<Array> array)1184 std::vector<std::string> FromJSStringArray(Environment* env,
1185 Local<Array> array) {
1186 std::vector<std::string> vec;
1187 if (array->Length() > 0)
1188 vec.reserve(array->Length());
1189 for (size_t n = 0; n < array->Length(); n++) {
1190 Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
1191 if (val->IsString()) {
1192 Utf8Value value(env->isolate(), val.As<String>());
1193 vec.emplace_back(*value, value.length());
1194 }
1195 }
1196 return vec;
1197 }
1198
HarvestBase(Environment * env,Local<Object> base_obj)1199 url_data HarvestBase(Environment* env, Local<Object> base_obj) {
1200 url_data base;
1201 Local<Context> context = env->context();
1202
1203 Local<Value> flags =
1204 base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1205 if (flags->IsInt32())
1206 base.flags = flags->Int32Value(context).FromJust();
1207
1208 Local<Value> port =
1209 base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1210 if (port->IsInt32())
1211 base.port = port->Int32Value(context).FromJust();
1212
1213 Local<Value> scheme =
1214 base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1215 base.scheme = Utf8Value(env->isolate(), scheme).out();
1216
1217 auto GetStr = [&](std::string url_data::*member,
1218 int flag,
1219 Local<String> name,
1220 bool empty_as_present) {
1221 Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
1222 if (value->IsString()) {
1223 Utf8Value utf8value(env->isolate(), value.As<String>());
1224 (base.*member).assign(*utf8value, utf8value.length());
1225 if (empty_as_present || value.As<String>()->Length() != 0) {
1226 base.flags |= flag;
1227 }
1228 }
1229 };
1230 GetStr(&url_data::username,
1231 URL_FLAGS_HAS_USERNAME,
1232 env->username_string(),
1233 false);
1234 GetStr(&url_data::password,
1235 URL_FLAGS_HAS_PASSWORD,
1236 env->password_string(),
1237 false);
1238 GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
1239 GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
1240 GetStr(&url_data::fragment,
1241 URL_FLAGS_HAS_FRAGMENT,
1242 env->fragment_string(),
1243 true);
1244
1245 Local<Value>
1246 path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
1247 if (path->IsArray()) {
1248 base.flags |= URL_FLAGS_HAS_PATH;
1249 base.path = FromJSStringArray(env, path.As<Array>());
1250 }
1251 return base;
1252 }
1253
HarvestContext(Environment * env,Local<Object> context_obj)1254 url_data HarvestContext(Environment* env, Local<Object> context_obj) {
1255 url_data context;
1256 Local<Value> flags =
1257 context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1258 if (flags->IsInt32()) {
1259 static constexpr int32_t kCopyFlagsMask =
1260 URL_FLAGS_SPECIAL |
1261 URL_FLAGS_CANNOT_BE_BASE |
1262 URL_FLAGS_HAS_USERNAME |
1263 URL_FLAGS_HAS_PASSWORD |
1264 URL_FLAGS_HAS_HOST;
1265 context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
1266 }
1267 Local<Value> scheme =
1268 context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1269 if (scheme->IsString()) {
1270 Utf8Value value(env->isolate(), scheme);
1271 context.scheme.assign(*value, value.length());
1272 }
1273 Local<Value> port =
1274 context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1275 if (port->IsInt32())
1276 context.port = port.As<Int32>()->Value();
1277 if (context.flags & URL_FLAGS_HAS_USERNAME) {
1278 Local<Value> username =
1279 context_obj->Get(env->context(),
1280 env->username_string()).ToLocalChecked();
1281 CHECK(username->IsString());
1282 Utf8Value value(env->isolate(), username);
1283 context.username.assign(*value, value.length());
1284 }
1285 if (context.flags & URL_FLAGS_HAS_PASSWORD) {
1286 Local<Value> password =
1287 context_obj->Get(env->context(),
1288 env->password_string()).ToLocalChecked();
1289 CHECK(password->IsString());
1290 Utf8Value value(env->isolate(), password);
1291 context.password.assign(*value, value.length());
1292 }
1293 Local<Value> host =
1294 context_obj->Get(env->context(),
1295 env->host_string()).ToLocalChecked();
1296 if (host->IsString()) {
1297 Utf8Value value(env->isolate(), host);
1298 context.host.assign(*value, value.length());
1299 }
1300 return context;
1301 }
1302
1303 // Single dot segment can be ".", "%2e", or "%2E"
IsSingleDotSegment(const std::string & str)1304 bool IsSingleDotSegment(const std::string& str) {
1305 switch (str.size()) {
1306 case 1:
1307 return str == ".";
1308 case 3:
1309 return str[0] == '%' &&
1310 str[1] == '2' &&
1311 ASCIILowercase(str[2]) == 'e';
1312 default:
1313 return false;
1314 }
1315 }
1316
1317 // Double dot segment can be:
1318 // "..", ".%2e", ".%2E", "%2e.", "%2E.",
1319 // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
IsDoubleDotSegment(const std::string & str)1320 bool IsDoubleDotSegment(const std::string& str) {
1321 switch (str.size()) {
1322 case 2:
1323 return str == "..";
1324 case 4:
1325 if (str[0] != '.' && str[0] != '%')
1326 return false;
1327 return ((str[0] == '.' &&
1328 str[1] == '%' &&
1329 str[2] == '2' &&
1330 ASCIILowercase(str[3]) == 'e') ||
1331 (str[0] == '%' &&
1332 str[1] == '2' &&
1333 ASCIILowercase(str[2]) == 'e' &&
1334 str[3] == '.'));
1335 case 6:
1336 return (str[0] == '%' &&
1337 str[1] == '2' &&
1338 ASCIILowercase(str[2]) == 'e' &&
1339 str[3] == '%' &&
1340 str[4] == '2' &&
1341 ASCIILowercase(str[5]) == 'e');
1342 default:
1343 return false;
1344 }
1345 }
1346
ShortenUrlPath(struct url_data * url)1347 void ShortenUrlPath(struct url_data* url) {
1348 if (url->path.empty()) return;
1349 if (url->path.size() == 1 && url->scheme == "file:" &&
1350 IsNormalizedWindowsDriveLetter(url->path[0])) return;
1351 url->path.pop_back();
1352 }
1353
1354 } // anonymous namespace
1355
Parse(const char * input,size_t len,enum url_parse_state state_override,struct url_data * url,bool has_url,const struct url_data * base,bool has_base)1356 void URL::Parse(const char* input,
1357 size_t len,
1358 enum url_parse_state state_override,
1359 struct url_data* url,
1360 bool has_url,
1361 const struct url_data* base,
1362 bool has_base) {
1363 const char* p = input;
1364 const char* end = input + len;
1365
1366 if (!has_url) {
1367 for (const char* ptr = p; ptr < end; ptr++) {
1368 if (IsC0ControlOrSpace(*ptr))
1369 p++;
1370 else
1371 break;
1372 }
1373 for (const char* ptr = end - 1; ptr >= p; ptr--) {
1374 if (IsC0ControlOrSpace(*ptr))
1375 end--;
1376 else
1377 break;
1378 }
1379 input = p;
1380 len = end - p;
1381 }
1382
1383 // The spec says we should strip out any ASCII tabs or newlines.
1384 // In those cases, we create another std::string instance with the filtered
1385 // contents, but in the general case we avoid the overhead.
1386 std::string whitespace_stripped;
1387 for (const char* ptr = p; ptr < end; ptr++) {
1388 if (!IsASCIITabOrNewline(*ptr))
1389 continue;
1390 // Hit tab or newline. Allocate storage, copy what we have until now,
1391 // and then iterate and filter all similar characters out.
1392 whitespace_stripped.reserve(len - 1);
1393 whitespace_stripped.assign(p, ptr - p);
1394 // 'ptr + 1' skips the current char, which we know to be tab or newline.
1395 for (ptr = ptr + 1; ptr < end; ptr++) {
1396 if (!IsASCIITabOrNewline(*ptr))
1397 whitespace_stripped += *ptr;
1398 }
1399
1400 // Update variables like they should have looked like if the string
1401 // had been stripped of whitespace to begin with.
1402 input = whitespace_stripped.c_str();
1403 len = whitespace_stripped.size();
1404 p = input;
1405 end = input + len;
1406 break;
1407 }
1408
1409 bool atflag = false; // Set when @ has been seen.
1410 bool square_bracket_flag = false; // Set inside of [...]
1411 bool password_token_seen_flag = false; // Set after a : after an username.
1412
1413 std::string buffer;
1414
1415 // Set the initial parse state.
1416 const bool has_state_override = state_override != kUnknownState;
1417 enum url_parse_state state = has_state_override ? state_override :
1418 kSchemeStart;
1419
1420 if (state < kSchemeStart || state > kFragment) {
1421 url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1422 return;
1423 }
1424
1425 while (p <= end) {
1426 const char ch = p < end ? p[0] : kEOL;
1427 bool special = (url->flags & URL_FLAGS_SPECIAL);
1428 bool cannot_be_base;
1429 const bool special_back_slash = (special && ch == '\\');
1430
1431 switch (state) {
1432 case kSchemeStart:
1433 if (IsASCIIAlpha(ch)) {
1434 buffer += ASCIILowercase(ch);
1435 state = kScheme;
1436 } else if (!has_state_override) {
1437 state = kNoScheme;
1438 continue;
1439 } else {
1440 url->flags |= URL_FLAGS_FAILED;
1441 return;
1442 }
1443 break;
1444 case kScheme:
1445 if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1446 buffer += ASCIILowercase(ch);
1447 } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1448 if (has_state_override && buffer.size() == 0) {
1449 url->flags |= URL_FLAGS_TERMINATED;
1450 return;
1451 }
1452 buffer += ':';
1453
1454 bool new_is_special = IsSpecial(buffer);
1455
1456 if (has_state_override) {
1457 if ((special != new_is_special) ||
1458 ((buffer == "file:") &&
1459 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1460 (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1461 (url->port != -1)))) {
1462 url->flags |= URL_FLAGS_TERMINATED;
1463 return;
1464 }
1465
1466 // File scheme && (host == empty or null) check left to JS-land
1467 // as it can be done before even entering C++ binding.
1468 }
1469
1470 url->scheme = std::move(buffer);
1471 url->port = NormalizePort(url->scheme, url->port);
1472 if (new_is_special) {
1473 url->flags |= URL_FLAGS_SPECIAL;
1474 special = true;
1475 } else {
1476 url->flags &= ~URL_FLAGS_SPECIAL;
1477 special = false;
1478 }
1479 buffer.clear();
1480 if (has_state_override)
1481 return;
1482 if (url->scheme == "file:") {
1483 state = kFile;
1484 } else if (special &&
1485 has_base &&
1486 url->scheme == base->scheme) {
1487 state = kSpecialRelativeOrAuthority;
1488 } else if (special) {
1489 state = kSpecialAuthoritySlashes;
1490 } else if (p + 1 < end && p[1] == '/') {
1491 state = kPathOrAuthority;
1492 p++;
1493 } else {
1494 url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1495 url->flags |= URL_FLAGS_HAS_PATH;
1496 url->path.emplace_back("");
1497 state = kCannotBeBase;
1498 }
1499 } else if (!has_state_override) {
1500 buffer.clear();
1501 state = kNoScheme;
1502 p = input;
1503 continue;
1504 } else {
1505 url->flags |= URL_FLAGS_FAILED;
1506 return;
1507 }
1508 break;
1509 case kNoScheme:
1510 cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1511 if (!has_base || (cannot_be_base && ch != '#')) {
1512 url->flags |= URL_FLAGS_FAILED;
1513 return;
1514 } else if (cannot_be_base && ch == '#') {
1515 url->scheme = base->scheme;
1516 if (IsSpecial(url->scheme)) {
1517 url->flags |= URL_FLAGS_SPECIAL;
1518 special = true;
1519 } else {
1520 url->flags &= ~URL_FLAGS_SPECIAL;
1521 special = false;
1522 }
1523 if (base->flags & URL_FLAGS_HAS_PATH) {
1524 url->flags |= URL_FLAGS_HAS_PATH;
1525 url->path = base->path;
1526 }
1527 if (base->flags & URL_FLAGS_HAS_QUERY) {
1528 url->flags |= URL_FLAGS_HAS_QUERY;
1529 url->query = base->query;
1530 }
1531 if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1532 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1533 url->fragment = base->fragment;
1534 }
1535 url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1536 state = kFragment;
1537 } else if (has_base &&
1538 base->scheme != "file:") {
1539 state = kRelative;
1540 continue;
1541 } else {
1542 url->scheme = "file:";
1543 url->flags |= URL_FLAGS_SPECIAL;
1544 special = true;
1545 state = kFile;
1546 continue;
1547 }
1548 break;
1549 case kSpecialRelativeOrAuthority:
1550 if (ch == '/' && p + 1 < end && p[1] == '/') {
1551 state = kSpecialAuthorityIgnoreSlashes;
1552 p++;
1553 } else {
1554 state = kRelative;
1555 continue;
1556 }
1557 break;
1558 case kPathOrAuthority:
1559 if (ch == '/') {
1560 state = kAuthority;
1561 } else {
1562 state = kPath;
1563 continue;
1564 }
1565 break;
1566 case kRelative:
1567 url->scheme = base->scheme;
1568 if (IsSpecial(url->scheme)) {
1569 url->flags |= URL_FLAGS_SPECIAL;
1570 special = true;
1571 } else {
1572 url->flags &= ~URL_FLAGS_SPECIAL;
1573 special = false;
1574 }
1575 switch (ch) {
1576 case kEOL:
1577 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1578 url->flags |= URL_FLAGS_HAS_USERNAME;
1579 url->username = base->username;
1580 }
1581 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1582 url->flags |= URL_FLAGS_HAS_PASSWORD;
1583 url->password = base->password;
1584 }
1585 if (base->flags & URL_FLAGS_HAS_HOST) {
1586 url->flags |= URL_FLAGS_HAS_HOST;
1587 url->host = base->host;
1588 }
1589 if (base->flags & URL_FLAGS_HAS_QUERY) {
1590 url->flags |= URL_FLAGS_HAS_QUERY;
1591 url->query = base->query;
1592 }
1593 if (base->flags & URL_FLAGS_HAS_PATH) {
1594 url->flags |= URL_FLAGS_HAS_PATH;
1595 url->path = base->path;
1596 }
1597 url->port = base->port;
1598 break;
1599 case '/':
1600 state = kRelativeSlash;
1601 break;
1602 case '?':
1603 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1604 url->flags |= URL_FLAGS_HAS_USERNAME;
1605 url->username = base->username;
1606 }
1607 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1608 url->flags |= URL_FLAGS_HAS_PASSWORD;
1609 url->password = base->password;
1610 }
1611 if (base->flags & URL_FLAGS_HAS_HOST) {
1612 url->flags |= URL_FLAGS_HAS_HOST;
1613 url->host = base->host;
1614 }
1615 if (base->flags & URL_FLAGS_HAS_PATH) {
1616 url->flags |= URL_FLAGS_HAS_PATH;
1617 url->path = base->path;
1618 }
1619 url->port = base->port;
1620 state = kQuery;
1621 break;
1622 case '#':
1623 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1624 url->flags |= URL_FLAGS_HAS_USERNAME;
1625 url->username = base->username;
1626 }
1627 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1628 url->flags |= URL_FLAGS_HAS_PASSWORD;
1629 url->password = base->password;
1630 }
1631 if (base->flags & URL_FLAGS_HAS_HOST) {
1632 url->flags |= URL_FLAGS_HAS_HOST;
1633 url->host = base->host;
1634 }
1635 if (base->flags & URL_FLAGS_HAS_QUERY) {
1636 url->flags |= URL_FLAGS_HAS_QUERY;
1637 url->query = base->query;
1638 }
1639 if (base->flags & URL_FLAGS_HAS_PATH) {
1640 url->flags |= URL_FLAGS_HAS_PATH;
1641 url->path = base->path;
1642 }
1643 url->port = base->port;
1644 state = kFragment;
1645 break;
1646 default:
1647 if (special_back_slash) {
1648 state = kRelativeSlash;
1649 } else {
1650 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1651 url->flags |= URL_FLAGS_HAS_USERNAME;
1652 url->username = base->username;
1653 }
1654 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1655 url->flags |= URL_FLAGS_HAS_PASSWORD;
1656 url->password = base->password;
1657 }
1658 if (base->flags & URL_FLAGS_HAS_HOST) {
1659 url->flags |= URL_FLAGS_HAS_HOST;
1660 url->host = base->host;
1661 }
1662 if (base->flags & URL_FLAGS_HAS_PATH) {
1663 url->flags |= URL_FLAGS_HAS_PATH;
1664 url->path = base->path;
1665 ShortenUrlPath(url);
1666 }
1667 url->port = base->port;
1668 state = kPath;
1669 continue;
1670 }
1671 }
1672 break;
1673 case kRelativeSlash:
1674 if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1675 state = kSpecialAuthorityIgnoreSlashes;
1676 } else if (ch == '/') {
1677 state = kAuthority;
1678 } else {
1679 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1680 url->flags |= URL_FLAGS_HAS_USERNAME;
1681 url->username = base->username;
1682 }
1683 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1684 url->flags |= URL_FLAGS_HAS_PASSWORD;
1685 url->password = base->password;
1686 }
1687 if (base->flags & URL_FLAGS_HAS_HOST) {
1688 url->flags |= URL_FLAGS_HAS_HOST;
1689 url->host = base->host;
1690 }
1691 url->port = base->port;
1692 state = kPath;
1693 continue;
1694 }
1695 break;
1696 case kSpecialAuthoritySlashes:
1697 state = kSpecialAuthorityIgnoreSlashes;
1698 if (ch == '/' && p + 1 < end && p[1] == '/') {
1699 p++;
1700 } else {
1701 continue;
1702 }
1703 break;
1704 case kSpecialAuthorityIgnoreSlashes:
1705 if (ch != '/' && ch != '\\') {
1706 state = kAuthority;
1707 continue;
1708 }
1709 break;
1710 case kAuthority:
1711 if (ch == '@') {
1712 if (atflag) {
1713 buffer.reserve(buffer.size() + 3);
1714 buffer.insert(0, "%40");
1715 }
1716 atflag = true;
1717 size_t blen = buffer.size();
1718 if (blen > 0 && buffer[0] != ':') {
1719 url->flags |= URL_FLAGS_HAS_USERNAME;
1720 }
1721 for (size_t n = 0; n < blen; n++) {
1722 const char bch = buffer[n];
1723 if (bch == ':') {
1724 url->flags |= URL_FLAGS_HAS_PASSWORD;
1725 if (!password_token_seen_flag) {
1726 password_token_seen_flag = true;
1727 continue;
1728 }
1729 }
1730 if (password_token_seen_flag) {
1731 AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1732 } else {
1733 AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1734 }
1735 }
1736 buffer.clear();
1737 } else if (ch == kEOL ||
1738 ch == '/' ||
1739 ch == '?' ||
1740 ch == '#' ||
1741 special_back_slash) {
1742 if (atflag && buffer.size() == 0) {
1743 url->flags |= URL_FLAGS_FAILED;
1744 return;
1745 }
1746 p -= buffer.size() + 1;
1747 buffer.clear();
1748 state = kHost;
1749 } else {
1750 buffer += ch;
1751 }
1752 break;
1753 case kHost:
1754 case kHostname:
1755 if (has_state_override && url->scheme == "file:") {
1756 state = kFileHost;
1757 continue;
1758 } else if (ch == ':' && !square_bracket_flag) {
1759 if (buffer.size() == 0) {
1760 url->flags |= URL_FLAGS_FAILED;
1761 return;
1762 }
1763 url->flags |= URL_FLAGS_HAS_HOST;
1764 if (!ParseHost(buffer, &url->host, special)) {
1765 url->flags |= URL_FLAGS_FAILED;
1766 return;
1767 }
1768 buffer.clear();
1769 state = kPort;
1770 if (state_override == kHostname) {
1771 return;
1772 }
1773 } else if (ch == kEOL ||
1774 ch == '/' ||
1775 ch == '?' ||
1776 ch == '#' ||
1777 special_back_slash) {
1778 p--;
1779 if (special && buffer.size() == 0) {
1780 url->flags |= URL_FLAGS_FAILED;
1781 return;
1782 }
1783 if (has_state_override &&
1784 buffer.size() == 0 &&
1785 ((url->username.size() > 0 || url->password.size() > 0) ||
1786 url->port != -1)) {
1787 url->flags |= URL_FLAGS_TERMINATED;
1788 return;
1789 }
1790 url->flags |= URL_FLAGS_HAS_HOST;
1791 if (!ParseHost(buffer, &url->host, special)) {
1792 url->flags |= URL_FLAGS_FAILED;
1793 return;
1794 }
1795 buffer.clear();
1796 state = kPathStart;
1797 if (has_state_override) {
1798 return;
1799 }
1800 } else {
1801 if (ch == '[')
1802 square_bracket_flag = true;
1803 if (ch == ']')
1804 square_bracket_flag = false;
1805 buffer += ch;
1806 }
1807 break;
1808 case kPort:
1809 if (IsASCIIDigit(ch)) {
1810 buffer += ch;
1811 } else if (has_state_override ||
1812 ch == kEOL ||
1813 ch == '/' ||
1814 ch == '?' ||
1815 ch == '#' ||
1816 special_back_slash) {
1817 if (buffer.size() > 0) {
1818 unsigned port = 0;
1819 // the condition port <= 0xffff prevents integer overflow
1820 for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1821 port = port * 10 + buffer[i] - '0';
1822 if (port > 0xffff) {
1823 // TODO(TimothyGu): This hack is currently needed for the host
1824 // setter since it needs access to hostname if it is valid, and
1825 // if the FAILED flag is set the entire response to JS layer
1826 // will be empty.
1827 if (state_override == kHost)
1828 url->port = -1;
1829 else
1830 url->flags |= URL_FLAGS_FAILED;
1831 return;
1832 }
1833 // the port is valid
1834 url->port = NormalizePort(url->scheme, static_cast<int>(port));
1835 if (url->port == -1)
1836 url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1837 buffer.clear();
1838 } else if (has_state_override) {
1839 // TODO(TimothyGu): Similar case as above.
1840 if (state_override == kHost)
1841 url->port = -1;
1842 else
1843 url->flags |= URL_FLAGS_TERMINATED;
1844 return;
1845 }
1846 state = kPathStart;
1847 continue;
1848 } else {
1849 url->flags |= URL_FLAGS_FAILED;
1850 return;
1851 }
1852 break;
1853 case kFile:
1854 url->scheme = "file:";
1855 if (ch == '/' || ch == '\\') {
1856 state = kFileSlash;
1857 } else if (has_base && base->scheme == "file:") {
1858 switch (ch) {
1859 case kEOL:
1860 if (base->flags & URL_FLAGS_HAS_HOST) {
1861 url->flags |= URL_FLAGS_HAS_HOST;
1862 url->host = base->host;
1863 }
1864 if (base->flags & URL_FLAGS_HAS_PATH) {
1865 url->flags |= URL_FLAGS_HAS_PATH;
1866 url->path = base->path;
1867 }
1868 if (base->flags & URL_FLAGS_HAS_QUERY) {
1869 url->flags |= URL_FLAGS_HAS_QUERY;
1870 url->query = base->query;
1871 }
1872 break;
1873 case '?':
1874 if (base->flags & URL_FLAGS_HAS_HOST) {
1875 url->flags |= URL_FLAGS_HAS_HOST;
1876 url->host = base->host;
1877 }
1878 if (base->flags & URL_FLAGS_HAS_PATH) {
1879 url->flags |= URL_FLAGS_HAS_PATH;
1880 url->path = base->path;
1881 }
1882 url->flags |= URL_FLAGS_HAS_QUERY;
1883 url->query.clear();
1884 state = kQuery;
1885 break;
1886 case '#':
1887 if (base->flags & URL_FLAGS_HAS_HOST) {
1888 url->flags |= URL_FLAGS_HAS_HOST;
1889 url->host = base->host;
1890 }
1891 if (base->flags & URL_FLAGS_HAS_PATH) {
1892 url->flags |= URL_FLAGS_HAS_PATH;
1893 url->path = base->path;
1894 }
1895 if (base->flags & URL_FLAGS_HAS_QUERY) {
1896 url->flags |= URL_FLAGS_HAS_QUERY;
1897 url->query = base->query;
1898 }
1899 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1900 url->fragment.clear();
1901 state = kFragment;
1902 break;
1903 default:
1904 if (!StartsWithWindowsDriveLetter(p, end)) {
1905 if (base->flags & URL_FLAGS_HAS_HOST) {
1906 url->flags |= URL_FLAGS_HAS_HOST;
1907 url->host = base->host;
1908 }
1909 if (base->flags & URL_FLAGS_HAS_PATH) {
1910 url->flags |= URL_FLAGS_HAS_PATH;
1911 url->path = base->path;
1912 }
1913 ShortenUrlPath(url);
1914 }
1915 state = kPath;
1916 continue;
1917 }
1918 } else {
1919 state = kPath;
1920 continue;
1921 }
1922 break;
1923 case kFileSlash:
1924 if (ch == '/' || ch == '\\') {
1925 state = kFileHost;
1926 } else {
1927 if (has_base &&
1928 base->scheme == "file:" &&
1929 !StartsWithWindowsDriveLetter(p, end)) {
1930 if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1931 url->flags |= URL_FLAGS_HAS_PATH;
1932 url->path.push_back(base->path[0]);
1933 } else {
1934 if (base->flags & URL_FLAGS_HAS_HOST) {
1935 url->flags |= URL_FLAGS_HAS_HOST;
1936 url->host = base->host;
1937 } else {
1938 url->flags &= ~URL_FLAGS_HAS_HOST;
1939 url->host.clear();
1940 }
1941 }
1942 }
1943 state = kPath;
1944 continue;
1945 }
1946 break;
1947 case kFileHost:
1948 if (ch == kEOL ||
1949 ch == '/' ||
1950 ch == '\\' ||
1951 ch == '?' ||
1952 ch == '#') {
1953 if (!has_state_override &&
1954 buffer.size() == 2 &&
1955 IsWindowsDriveLetter(buffer)) {
1956 state = kPath;
1957 } else if (buffer.size() == 0) {
1958 url->flags |= URL_FLAGS_HAS_HOST;
1959 url->host.clear();
1960 if (has_state_override)
1961 return;
1962 state = kPathStart;
1963 } else {
1964 std::string host;
1965 if (!ParseHost(buffer, &host, special)) {
1966 url->flags |= URL_FLAGS_FAILED;
1967 return;
1968 }
1969 if (host == "localhost")
1970 host.clear();
1971 url->flags |= URL_FLAGS_HAS_HOST;
1972 url->host = host;
1973 if (has_state_override)
1974 return;
1975 buffer.clear();
1976 state = kPathStart;
1977 }
1978 continue;
1979 } else {
1980 buffer += ch;
1981 }
1982 break;
1983 case kPathStart:
1984 if (IsSpecial(url->scheme)) {
1985 state = kPath;
1986 if (ch != '/' && ch != '\\') {
1987 continue;
1988 }
1989 } else if (!has_state_override && ch == '?') {
1990 url->flags |= URL_FLAGS_HAS_QUERY;
1991 url->query.clear();
1992 state = kQuery;
1993 } else if (!has_state_override && ch == '#') {
1994 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1995 url->fragment.clear();
1996 state = kFragment;
1997 } else if (ch != kEOL) {
1998 state = kPath;
1999 if (ch != '/') {
2000 continue;
2001 }
2002 }
2003 break;
2004 case kPath:
2005 if (ch == kEOL ||
2006 ch == '/' ||
2007 special_back_slash ||
2008 (!has_state_override && (ch == '?' || ch == '#'))) {
2009 if (IsDoubleDotSegment(buffer)) {
2010 ShortenUrlPath(url);
2011 if (ch != '/' && !special_back_slash) {
2012 url->flags |= URL_FLAGS_HAS_PATH;
2013 url->path.emplace_back("");
2014 }
2015 } else if (IsSingleDotSegment(buffer) &&
2016 ch != '/' && !special_back_slash) {
2017 url->flags |= URL_FLAGS_HAS_PATH;
2018 url->path.emplace_back("");
2019 } else if (!IsSingleDotSegment(buffer)) {
2020 if (url->scheme == "file:" &&
2021 url->path.empty() &&
2022 buffer.size() == 2 &&
2023 IsWindowsDriveLetter(buffer)) {
2024 if ((url->flags & URL_FLAGS_HAS_HOST) &&
2025 !url->host.empty()) {
2026 url->host.clear();
2027 url->flags |= URL_FLAGS_HAS_HOST;
2028 }
2029 buffer[1] = ':';
2030 }
2031 url->flags |= URL_FLAGS_HAS_PATH;
2032 url->path.emplace_back(std::move(buffer));
2033 }
2034 buffer.clear();
2035 if (url->scheme == "file:" &&
2036 (ch == kEOL ||
2037 ch == '?' ||
2038 ch == '#')) {
2039 while (url->path.size() > 1 && url->path[0].empty()) {
2040 url->path.erase(url->path.begin());
2041 }
2042 }
2043 if (ch == '?') {
2044 url->flags |= URL_FLAGS_HAS_QUERY;
2045 state = kQuery;
2046 } else if (ch == '#') {
2047 state = kFragment;
2048 }
2049 } else {
2050 AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
2051 }
2052 break;
2053 case kCannotBeBase:
2054 switch (ch) {
2055 case '?':
2056 state = kQuery;
2057 break;
2058 case '#':
2059 state = kFragment;
2060 break;
2061 default:
2062 if (url->path.empty())
2063 url->path.emplace_back("");
2064 else if (ch != kEOL)
2065 AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
2066 }
2067 break;
2068 case kQuery:
2069 if (ch == kEOL || (!has_state_override && ch == '#')) {
2070 url->flags |= URL_FLAGS_HAS_QUERY;
2071 url->query = std::move(buffer);
2072 buffer.clear();
2073 if (ch == '#')
2074 state = kFragment;
2075 } else {
2076 AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
2077 QUERY_ENCODE_SET_NONSPECIAL);
2078 }
2079 break;
2080 case kFragment:
2081 switch (ch) {
2082 case kEOL:
2083 url->flags |= URL_FLAGS_HAS_FRAGMENT;
2084 url->fragment = std::move(buffer);
2085 break;
2086 case 0:
2087 break;
2088 default:
2089 AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
2090 }
2091 break;
2092 default:
2093 url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
2094 return;
2095 }
2096
2097 p++;
2098 }
2099 } // NOLINT(readability/fn_size)
2100
2101 namespace {
SetArgs(Environment * env,Local<Value> argv[ARG_COUNT],const struct url_data & url)2102 void SetArgs(Environment* env,
2103 Local<Value> argv[ARG_COUNT],
2104 const struct url_data& url) {
2105 Isolate* isolate = env->isolate();
2106 argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2107 argv[ARG_PROTOCOL] =
2108 url.flags & URL_FLAGS_SPECIAL ?
2109 GetSpecial(env, url.scheme) :
2110 OneByteString(isolate, url.scheme.c_str());
2111 if (url.flags & URL_FLAGS_HAS_USERNAME)
2112 argv[ARG_USERNAME] = Utf8String(isolate, url.username);
2113 if (url.flags & URL_FLAGS_HAS_PASSWORD)
2114 argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
2115 if (url.flags & URL_FLAGS_HAS_HOST)
2116 argv[ARG_HOST] = Utf8String(isolate, url.host);
2117 if (url.flags & URL_FLAGS_HAS_QUERY)
2118 argv[ARG_QUERY] = Utf8String(isolate, url.query);
2119 if (url.flags & URL_FLAGS_HAS_FRAGMENT)
2120 argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
2121 if (url.port > -1)
2122 argv[ARG_PORT] = Integer::New(isolate, url.port);
2123 if (url.flags & URL_FLAGS_HAS_PATH)
2124 argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
2125 }
2126
Parse(Environment * env,Local<Value> recv,const char * input,size_t len,enum url_parse_state state_override,Local<Value> base_obj,Local<Value> context_obj,Local<Function> cb,Local<Value> error_cb)2127 void Parse(Environment* env,
2128 Local<Value> recv,
2129 const char* input,
2130 size_t len,
2131 enum url_parse_state state_override,
2132 Local<Value> base_obj,
2133 Local<Value> context_obj,
2134 Local<Function> cb,
2135 Local<Value> error_cb) {
2136 Isolate* isolate = env->isolate();
2137 Local<Context> context = env->context();
2138 HandleScope handle_scope(isolate);
2139 Context::Scope context_scope(context);
2140
2141 const bool has_context = context_obj->IsObject();
2142 const bool has_base = base_obj->IsObject();
2143
2144 url_data base;
2145 url_data url;
2146 if (has_context)
2147 url = HarvestContext(env, context_obj.As<Object>());
2148 if (has_base)
2149 base = HarvestBase(env, base_obj.As<Object>());
2150
2151 URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2152 if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2153 ((state_override != kUnknownState) &&
2154 (url.flags & URL_FLAGS_TERMINATED)))
2155 return;
2156
2157 // Define the return value placeholders
2158 const Local<Value> undef = Undefined(isolate);
2159 const Local<Value> null = Null(isolate);
2160 if (!(url.flags & URL_FLAGS_FAILED)) {
2161 Local<Value> argv[] = {
2162 undef,
2163 undef,
2164 undef,
2165 undef,
2166 null, // host defaults to null
2167 null, // port defaults to null
2168 undef,
2169 null, // query defaults to null
2170 null, // fragment defaults to null
2171 };
2172 SetArgs(env, argv, url);
2173 cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2174 } else if (error_cb->IsFunction()) {
2175 Local<Value> argv[2] = { undef, undef };
2176 argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2177 argv[ERR_ARG_INPUT] =
2178 String::NewFromUtf8(env->isolate(),
2179 input,
2180 NewStringType::kNormal).ToLocalChecked();
2181 error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2182 .FromMaybe(Local<Value>());
2183 }
2184 }
2185
Parse(const FunctionCallbackInfo<Value> & args)2186 void Parse(const FunctionCallbackInfo<Value>& args) {
2187 Environment* env = Environment::GetCurrent(args);
2188 CHECK_GE(args.Length(), 5);
2189 CHECK(args[0]->IsString()); // input
2190 CHECK(args[2]->IsUndefined() || // base context
2191 args[2]->IsNull() ||
2192 args[2]->IsObject());
2193 CHECK(args[3]->IsUndefined() || // context
2194 args[3]->IsNull() ||
2195 args[3]->IsObject());
2196 CHECK(args[4]->IsFunction()); // complete callback
2197 CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback
2198
2199 Utf8Value input(env->isolate(), args[0]);
2200 enum url_parse_state state_override = kUnknownState;
2201 if (args[1]->IsNumber()) {
2202 state_override = static_cast<enum url_parse_state>(
2203 args[1]->Uint32Value(env->context()).FromJust());
2204 }
2205
2206 Parse(env, args.This(),
2207 *input, input.length(),
2208 state_override,
2209 args[2],
2210 args[3],
2211 args[4].As<Function>(),
2212 args[5]);
2213 }
2214
EncodeAuthSet(const FunctionCallbackInfo<Value> & args)2215 void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2216 Environment* env = Environment::GetCurrent(args);
2217 CHECK_GE(args.Length(), 1);
2218 CHECK(args[0]->IsString());
2219 Utf8Value value(env->isolate(), args[0]);
2220 std::string output;
2221 size_t len = value.length();
2222 output.reserve(len);
2223 for (size_t n = 0; n < len; n++) {
2224 const char ch = (*value)[n];
2225 AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2226 }
2227 args.GetReturnValue().Set(
2228 String::NewFromUtf8(env->isolate(),
2229 output.c_str(),
2230 NewStringType::kNormal).ToLocalChecked());
2231 }
2232
ToUSVString(const FunctionCallbackInfo<Value> & args)2233 void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2234 Environment* env = Environment::GetCurrent(args);
2235 CHECK_GE(args.Length(), 2);
2236 CHECK(args[0]->IsString());
2237 CHECK(args[1]->IsNumber());
2238
2239 TwoByteValue value(env->isolate(), args[0]);
2240
2241 int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2242 CHECK_GE(start, 0);
2243
2244 for (size_t i = start; i < value.length(); i++) {
2245 char16_t c = value[i];
2246 if (!IsUnicodeSurrogate(c)) {
2247 continue;
2248 } else if (IsUnicodeSurrogateTrail(c) || i == value.length() - 1) {
2249 value[i] = kUnicodeReplacementCharacter;
2250 } else {
2251 char16_t d = value[i + 1];
2252 if (IsUnicodeTrail(d)) {
2253 i++;
2254 } else {
2255 value[i] = kUnicodeReplacementCharacter;
2256 }
2257 }
2258 }
2259
2260 args.GetReturnValue().Set(
2261 String::NewFromTwoByte(env->isolate(),
2262 *value,
2263 NewStringType::kNormal,
2264 value.length()).ToLocalChecked());
2265 }
2266
DomainToASCII(const FunctionCallbackInfo<Value> & args)2267 void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2268 Environment* env = Environment::GetCurrent(args);
2269 CHECK_GE(args.Length(), 1);
2270 CHECK(args[0]->IsString());
2271 Utf8Value value(env->isolate(), args[0]);
2272
2273 URLHost host;
2274 // Assuming the host is used for a special scheme.
2275 host.ParseHost(*value, value.length(), true);
2276 if (host.ParsingFailed()) {
2277 args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2278 return;
2279 }
2280 std::string out = host.ToStringMove();
2281 args.GetReturnValue().Set(
2282 String::NewFromUtf8(env->isolate(),
2283 out.c_str(),
2284 NewStringType::kNormal).ToLocalChecked());
2285 }
2286
DomainToUnicode(const FunctionCallbackInfo<Value> & args)2287 void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2288 Environment* env = Environment::GetCurrent(args);
2289 CHECK_GE(args.Length(), 1);
2290 CHECK(args[0]->IsString());
2291 Utf8Value value(env->isolate(), args[0]);
2292
2293 URLHost host;
2294 // Assuming the host is used for a special scheme.
2295 host.ParseHost(*value, value.length(), true, true);
2296 if (host.ParsingFailed()) {
2297 args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2298 return;
2299 }
2300 std::string out = host.ToStringMove();
2301 args.GetReturnValue().Set(
2302 String::NewFromUtf8(env->isolate(),
2303 out.c_str(),
2304 NewStringType::kNormal).ToLocalChecked());
2305 }
2306
SetURLConstructor(const FunctionCallbackInfo<Value> & args)2307 void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2308 Environment* env = Environment::GetCurrent(args);
2309 CHECK_EQ(args.Length(), 1);
2310 CHECK(args[0]->IsFunction());
2311 env->set_url_constructor_function(args[0].As<Function>());
2312 }
2313
Initialize(Local<Object> target,Local<Value> unused,Local<Context> context,void * priv)2314 void Initialize(Local<Object> target,
2315 Local<Value> unused,
2316 Local<Context> context,
2317 void* priv) {
2318 Environment* env = Environment::GetCurrent(context);
2319 env->SetMethod(target, "parse", Parse);
2320 env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
2321 env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
2322 env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
2323 env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
2324 env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2325
2326 #define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2327 FLAGS(XX)
2328 #undef XX
2329
2330 #define XX(name) NODE_DEFINE_CONSTANT(target, name);
2331 PARSESTATES(XX)
2332 #undef XX
2333 }
2334 } // namespace
2335
ToFilePath() const2336 std::string URL::ToFilePath() const {
2337 if (context_.scheme != "file:") {
2338 return "";
2339 }
2340
2341 #ifdef _WIN32
2342 const char* slash = "\\";
2343 auto is_slash = [] (char ch) {
2344 return ch == '/' || ch == '\\';
2345 };
2346 #else
2347 const char* slash = "/";
2348 auto is_slash = [] (char ch) {
2349 return ch == '/';
2350 };
2351 if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2352 context_.host.length() > 0) {
2353 return "";
2354 }
2355 #endif
2356 std::string decoded_path;
2357 for (const std::string& part : context_.path) {
2358 std::string decoded = PercentDecode(part.c_str(), part.length());
2359 for (char& ch : decoded) {
2360 if (is_slash(ch)) {
2361 return "";
2362 }
2363 }
2364 decoded_path += slash + decoded;
2365 }
2366
2367 #ifdef _WIN32
2368 // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2369
2370 // If hostname is set, then we have a UNC path. Pass the hostname through
2371 // ToUnicode just in case it is an IDN using punycode encoding. We do not
2372 // need to worry about percent encoding because the URL parser will have
2373 // already taken care of that for us. Note that this only causes IDNs with an
2374 // appropriate `xn--` prefix to be decoded.
2375 if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2376 context_.host.length() > 0) {
2377 std::string unicode_host;
2378 if (!ToUnicode(context_.host, &unicode_host)) {
2379 return "";
2380 }
2381 return "\\\\" + unicode_host + decoded_path;
2382 }
2383 // Otherwise, it's a local path that requires a drive letter.
2384 if (decoded_path.length() < 3) {
2385 return "";
2386 }
2387 if (decoded_path[2] != ':' ||
2388 !IsASCIIAlpha(decoded_path[1])) {
2389 return "";
2390 }
2391 // Strip out the leading '\'.
2392 return decoded_path.substr(1);
2393 #else
2394 return decoded_path;
2395 #endif
2396 }
2397
FromFilePath(const std::string & file_path)2398 URL URL::FromFilePath(const std::string& file_path) {
2399 URL url("file://");
2400 std::string escaped_file_path;
2401 for (size_t i = 0; i < file_path.length(); ++i) {
2402 escaped_file_path += file_path[i];
2403 if (file_path[i] == '%')
2404 escaped_file_path += "25";
2405 }
2406 URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
2407 &url.context_, true, nullptr, false);
2408 return url;
2409 }
2410
2411 // This function works by calling out to a JS function that creates and
2412 // returns the JS URL object. Be mindful of the JS<->Native boundary
2413 // crossing that is required.
ToObject(Environment * env) const2414 MaybeLocal<Value> URL::ToObject(Environment* env) const {
2415 Isolate* isolate = env->isolate();
2416 Local<Context> context = env->context();
2417 Context::Scope context_scope(context);
2418
2419 const Local<Value> undef = Undefined(isolate);
2420 const Local<Value> null = Null(isolate);
2421
2422 if (context_.flags & URL_FLAGS_FAILED)
2423 return Local<Value>();
2424
2425 Local<Value> argv[] = {
2426 undef,
2427 undef,
2428 undef,
2429 undef,
2430 null, // host defaults to null
2431 null, // port defaults to null
2432 undef,
2433 null, // query defaults to null
2434 null, // fragment defaults to null
2435 };
2436 SetArgs(env, argv, context_);
2437
2438 MaybeLocal<Value> ret;
2439 {
2440 TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
2441
2442 // The SetURLConstructor method must have been called already to
2443 // set the constructor function used below. SetURLConstructor is
2444 // called automatically when the internal/url.js module is loaded
2445 // during the internal/bootstrap/node.js processing.
2446 ret = env->url_constructor_function()
2447 ->Call(env->context(), undef, arraysize(argv), argv);
2448 }
2449
2450 return ret;
2451 }
2452
2453 } // namespace url
2454 } // namespace node
2455
2456 NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
2457