• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "node_url.h"
2 #include "base_object-inl.h"
3 #include "node_errors.h"
4 #include "node_i18n.h"
5 #include "util-inl.h"
6 
7 #include <cmath>
8 #include <cstdio>
9 #include <string>
10 #include <vector>
11 
12 namespace node {
13 
14 using errors::TryCatchScope;
15 
16 using v8::Array;
17 using v8::Context;
18 using v8::Function;
19 using v8::FunctionCallbackInfo;
20 using v8::HandleScope;
21 using v8::Int32;
22 using v8::Integer;
23 using v8::Isolate;
24 using v8::Local;
25 using v8::MaybeLocal;
26 using v8::NewStringType;
27 using v8::Null;
28 using v8::Object;
29 using v8::String;
30 using v8::Undefined;
31 using v8::Value;
32 
Utf8String(Isolate * isolate,const std::string & str)33 Local<String> Utf8String(Isolate* isolate, const std::string& str) {
34   return String::NewFromUtf8(isolate,
35                              str.data(),
36                              NewStringType::kNormal,
37                              str.length()).ToLocalChecked();
38 }
39 
40 namespace url {
41 
42 namespace {
43 
44 // https://url.spec.whatwg.org/#eof-code-point
45 constexpr char kEOL = -1;
46 
47 // Used in ToUSVString().
48 constexpr char16_t kUnicodeReplacementCharacter = 0xFFFD;
49 
50 // https://url.spec.whatwg.org/#concept-host
51 class URLHost {
52  public:
53   ~URLHost();
54 
55   void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
56   void ParseIPv6Host(const char* input, size_t length);
57   void ParseOpaqueHost(const char* input, size_t length);
58   void ParseHost(const char* input,
59                  size_t length,
60                  bool is_special,
61                  bool unicode = false);
62 
ParsingFailed() const63   bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
64   std::string ToString() const;
65   // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
66   std::string ToStringMove();
67 
68  private:
69   enum class HostType {
70     H_FAILED,
71     H_DOMAIN,
72     H_IPV4,
73     H_IPV6,
74     H_OPAQUE,
75   };
76 
77   union Value {
78     std::string domain_or_opaque;
79     uint32_t ipv4;
80     uint16_t ipv6[8];
81 
~Value()82     ~Value() {}
Value()83     Value() : ipv4(0) {}
84   };
85 
86   Value value_;
87   HostType type_ = HostType::H_FAILED;
88 
Reset()89   void Reset() {
90     using string = std::string;
91     switch (type_) {
92       case HostType::H_DOMAIN:
93       case HostType::H_OPAQUE:
94         value_.domain_or_opaque.~string();
95         break;
96       default:
97         break;
98     }
99     type_ = HostType::H_FAILED;
100   }
101 
102   // Setting the string members of the union with = is brittle because
103   // it relies on them being initialized to a state that requires no
104   // destruction of old data.
105   // For a long time, that worked well enough because ParseIPv6Host() happens
106   // to zero-fill `value_`, but that really is relying on standard library
107   // internals too much.
108   // These helpers are the easiest solution but we might want to consider
109   // just not forcing strings into an union.
SetOpaque(std::string && string)110   void SetOpaque(std::string&& string) {
111     Reset();
112     type_ = HostType::H_OPAQUE;
113     new(&value_.domain_or_opaque) std::string(std::move(string));
114   }
115 
SetDomain(std::string && string)116   void SetDomain(std::string&& string) {
117     Reset();
118     type_ = HostType::H_DOMAIN;
119     new(&value_.domain_or_opaque) std::string(std::move(string));
120   }
121 };
122 
~URLHost()123 URLHost::~URLHost() {
124   Reset();
125 }
126 
127 #define ARGS(XX)                                                              \
128   XX(ARG_FLAGS)                                                               \
129   XX(ARG_PROTOCOL)                                                            \
130   XX(ARG_USERNAME)                                                            \
131   XX(ARG_PASSWORD)                                                            \
132   XX(ARG_HOST)                                                                \
133   XX(ARG_PORT)                                                                \
134   XX(ARG_PATH)                                                                \
135   XX(ARG_QUERY)                                                               \
136   XX(ARG_FRAGMENT)                                                            \
137   XX(ARG_COUNT)  // This one has to be last.
138 
139 #define ERR_ARGS(XX)                                                          \
140   XX(ERR_ARG_FLAGS)                                                           \
141   XX(ERR_ARG_INPUT)                                                           \
142 
143 enum url_cb_args {
144 #define XX(name) name,
145   ARGS(XX)
146 #undef XX
147 };
148 
149 enum url_error_cb_args {
150 #define XX(name) name,
151   ERR_ARGS(XX)
152 #undef XX
153 };
154 
155 #define CHAR_TEST(bits, name, expr)                                           \
156   template <typename T>                                                       \
157   bool name(const T ch) {                                              \
158     static_assert(sizeof(ch) >= (bits) / 8,                                   \
159                   "Character must be wider than " #bits " bits");             \
160     return (expr);                                                            \
161   }
162 
163 #define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
164   template <typename T>                                                       \
165   bool name(const T ch1, const T ch2) {                                \
166     static_assert(sizeof(ch1) >= (bits) / 8,                                  \
167                   "Character must be wider than " #bits " bits");             \
168     return (expr);                                                            \
169   }                                                                           \
170   template <typename T>                                                       \
171   bool name(const std::basic_string<T>& str) {                         \
172     static_assert(sizeof(str[0]) >= (bits) / 8,                               \
173                   "Character must be wider than " #bits " bits");             \
174     return str.length() >= 2 && name(str[0], str[1]);                         \
175   }
176 
177 // https://infra.spec.whatwg.org/#ascii-tab-or-newline
178 CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
179 
180 // https://infra.spec.whatwg.org/#c0-control-or-space
181 CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
182 
183 // https://infra.spec.whatwg.org/#ascii-digit
184 CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
185 
186 // https://infra.spec.whatwg.org/#ascii-hex-digit
187 CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
188                                (ch >= 'A' && ch <= 'F') ||
189                                (ch >= 'a' && ch <= 'f')))
190 
191 // https://infra.spec.whatwg.org/#ascii-alpha
192 CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
193                             (ch >= 'a' && ch <= 'z')))
194 
195 // https://infra.spec.whatwg.org/#ascii-alphanumeric
196 CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
197 
198 // https://infra.spec.whatwg.org/#ascii-lowercase
199 template <typename T>
ASCIILowercase(T ch)200 T ASCIILowercase(T ch) {
201   return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
202 }
203 
204 // https://url.spec.whatwg.org/#forbidden-host-code-point
205 CHAR_TEST(8, IsForbiddenHostCodePoint,
206           ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
207           ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
208           ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
209           ch == '\\' || ch == ']')
210 
211 // https://url.spec.whatwg.org/#windows-drive-letter
212 TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
213                      (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
214 
215 // https://url.spec.whatwg.org/#normalized-windows-drive-letter
216 TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
217                      (IsASCIIAlpha(ch1) && ch2 == ':'))
218 
219 // If a UTF-16 character is a low/trailing surrogate.
220 CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
221 
222 // If a UTF-16 character is a surrogate.
223 CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
224 
225 // If a UTF-16 surrogate is a low/trailing one.
226 CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
227 
228 #undef CHAR_TEST
229 #undef TWO_CHAR_STRING_TEST
230 
231 const char* hex[256] = {
232   "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
233   "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
234   "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
235   "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
236   "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
237   "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
238   "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
239   "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
240   "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
241   "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
242   "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
243   "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
244   "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
245   "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
246   "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
247   "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
248   "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
249   "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
250   "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
251   "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
252   "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
253   "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
254   "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
255   "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
256   "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
257   "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
258   "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
259   "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
260   "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
261   "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
262   "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
263   "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
264 };
265 
266 const uint8_t C0_CONTROL_ENCODE_SET[32] = {
267   // 00     01     02     03     04     05     06     07
268     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269   // 08     09     0A     0B     0C     0D     0E     0F
270     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271   // 10     11     12     13     14     15     16     17
272     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273   // 18     19     1A     1B     1C     1D     1E     1F
274     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
275   // 20     21     22     23     24     25     26     27
276     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
277   // 28     29     2A     2B     2C     2D     2E     2F
278     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
279   // 30     31     32     33     34     35     36     37
280     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
281   // 38     39     3A     3B     3C     3D     3E     3F
282     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
283   // 40     41     42     43     44     45     46     47
284     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
285   // 48     49     4A     4B     4C     4D     4E     4F
286     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
287   // 50     51     52     53     54     55     56     57
288     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
289   // 58     59     5A     5B     5C     5D     5E     5F
290     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
291   // 60     61     62     63     64     65     66     67
292     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
293   // 68     69     6A     6B     6C     6D     6E     6F
294     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
295   // 70     71     72     73     74     75     76     77
296     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
297   // 78     79     7A     7B     7C     7D     7E     7F
298     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
299   // 80     81     82     83     84     85     86     87
300     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
301   // 88     89     8A     8B     8C     8D     8E     8F
302     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
303   // 90     91     92     93     94     95     96     97
304     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
305   // 98     99     9A     9B     9C     9D     9E     9F
306     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
307   // A0     A1     A2     A3     A4     A5     A6     A7
308     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
309   // A8     A9     AA     AB     AC     AD     AE     AF
310     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
311   // B0     B1     B2     B3     B4     B5     B6     B7
312     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
313   // B8     B9     BA     BB     BC     BD     BE     BF
314     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
315   // C0     C1     C2     C3     C4     C5     C6     C7
316     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
317   // C8     C9     CA     CB     CC     CD     CE     CF
318     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
319   // D0     D1     D2     D3     D4     D5     D6     D7
320     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
321   // D8     D9     DA     DB     DC     DD     DE     DF
322     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
323   // E0     E1     E2     E3     E4     E5     E6     E7
324     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
325   // E8     E9     EA     EB     EC     ED     EE     EF
326     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
327   // F0     F1     F2     F3     F4     F5     F6     F7
328     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
329   // F8     F9     FA     FB     FC     FD     FE     FF
330     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
331 };
332 
333 const uint8_t FRAGMENT_ENCODE_SET[32] = {
334   // 00     01     02     03     04     05     06     07
335     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336   // 08     09     0A     0B     0C     0D     0E     0F
337     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338   // 10     11     12     13     14     15     16     17
339     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340   // 18     19     1A     1B     1C     1D     1E     1F
341     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
342   // 20     21     22     23     24     25     26     27
343     0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
344   // 28     29     2A     2B     2C     2D     2E     2F
345     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
346   // 30     31     32     33     34     35     36     37
347     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
348   // 38     39     3A     3B     3C     3D     3E     3F
349     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
350   // 40     41     42     43     44     45     46     47
351     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
352   // 48     49     4A     4B     4C     4D     4E     4F
353     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
354   // 50     51     52     53     54     55     56     57
355     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
356   // 58     59     5A     5B     5C     5D     5E     5F
357     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
358   // 60     61     62     63     64     65     66     67
359     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
360   // 68     69     6A     6B     6C     6D     6E     6F
361     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
362   // 70     71     72     73     74     75     76     77
363     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
364   // 78     79     7A     7B     7C     7D     7E     7F
365     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
366   // 80     81     82     83     84     85     86     87
367     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
368   // 88     89     8A     8B     8C     8D     8E     8F
369     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
370   // 90     91     92     93     94     95     96     97
371     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
372   // 98     99     9A     9B     9C     9D     9E     9F
373     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
374   // A0     A1     A2     A3     A4     A5     A6     A7
375     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
376   // A8     A9     AA     AB     AC     AD     AE     AF
377     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
378   // B0     B1     B2     B3     B4     B5     B6     B7
379     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
380   // B8     B9     BA     BB     BC     BD     BE     BF
381     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
382   // C0     C1     C2     C3     C4     C5     C6     C7
383     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
384   // C8     C9     CA     CB     CC     CD     CE     CF
385     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
386   // D0     D1     D2     D3     D4     D5     D6     D7
387     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
388   // D8     D9     DA     DB     DC     DD     DE     DF
389     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
390   // E0     E1     E2     E3     E4     E5     E6     E7
391     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
392   // E8     E9     EA     EB     EC     ED     EE     EF
393     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
394   // F0     F1     F2     F3     F4     F5     F6     F7
395     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
396   // F8     F9     FA     FB     FC     FD     FE     FF
397     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
398 };
399 
400 
401 const uint8_t PATH_ENCODE_SET[32] = {
402   // 00     01     02     03     04     05     06     07
403     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
404   // 08     09     0A     0B     0C     0D     0E     0F
405     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
406   // 10     11     12     13     14     15     16     17
407     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
408   // 18     19     1A     1B     1C     1D     1E     1F
409     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
410   // 20     21     22     23     24     25     26     27
411     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
412   // 28     29     2A     2B     2C     2D     2E     2F
413     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
414   // 30     31     32     33     34     35     36     37
415     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
416   // 38     39     3A     3B     3C     3D     3E     3F
417     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
418   // 40     41     42     43     44     45     46     47
419     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
420   // 48     49     4A     4B     4C     4D     4E     4F
421     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
422   // 50     51     52     53     54     55     56     57
423     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
424   // 58     59     5A     5B     5C     5D     5E     5F
425     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
426   // 60     61     62     63     64     65     66     67
427     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
428   // 68     69     6A     6B     6C     6D     6E     6F
429     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
430   // 70     71     72     73     74     75     76     77
431     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
432   // 78     79     7A     7B     7C     7D     7E     7F
433     0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
434   // 80     81     82     83     84     85     86     87
435     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
436   // 88     89     8A     8B     8C     8D     8E     8F
437     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
438   // 90     91     92     93     94     95     96     97
439     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
440   // 98     99     9A     9B     9C     9D     9E     9F
441     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
442   // A0     A1     A2     A3     A4     A5     A6     A7
443     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
444   // A8     A9     AA     AB     AC     AD     AE     AF
445     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
446   // B0     B1     B2     B3     B4     B5     B6     B7
447     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
448   // B8     B9     BA     BB     BC     BD     BE     BF
449     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
450   // C0     C1     C2     C3     C4     C5     C6     C7
451     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
452   // C8     C9     CA     CB     CC     CD     CE     CF
453     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
454   // D0     D1     D2     D3     D4     D5     D6     D7
455     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
456   // D8     D9     DA     DB     DC     DD     DE     DF
457     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458   // E0     E1     E2     E3     E4     E5     E6     E7
459     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460   // E8     E9     EA     EB     EC     ED     EE     EF
461     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462   // F0     F1     F2     F3     F4     F5     F6     F7
463     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
464   // F8     F9     FA     FB     FC     FD     FE     FF
465     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
466 };
467 
468 const uint8_t USERINFO_ENCODE_SET[32] = {
469   // 00     01     02     03     04     05     06     07
470     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
471   // 08     09     0A     0B     0C     0D     0E     0F
472     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
473   // 10     11     12     13     14     15     16     17
474     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
475   // 18     19     1A     1B     1C     1D     1E     1F
476     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
477   // 20     21     22     23     24     25     26     27
478     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
479   // 28     29     2A     2B     2C     2D     2E     2F
480     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
481   // 30     31     32     33     34     35     36     37
482     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
483   // 38     39     3A     3B     3C     3D     3E     3F
484     0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
485   // 40     41     42     43     44     45     46     47
486     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
487   // 48     49     4A     4B     4C     4D     4E     4F
488     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
489   // 50     51     52     53     54     55     56     57
490     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
491   // 58     59     5A     5B     5C     5D     5E     5F
492     0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
493   // 60     61     62     63     64     65     66     67
494     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
495   // 68     69     6A     6B     6C     6D     6E     6F
496     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
497   // 70     71     72     73     74     75     76     77
498     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
499   // 78     79     7A     7B     7C     7D     7E     7F
500     0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
501   // 80     81     82     83     84     85     86     87
502     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
503   // 88     89     8A     8B     8C     8D     8E     8F
504     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
505   // 90     91     92     93     94     95     96     97
506     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
507   // 98     99     9A     9B     9C     9D     9E     9F
508     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
509   // A0     A1     A2     A3     A4     A5     A6     A7
510     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
511   // A8     A9     AA     AB     AC     AD     AE     AF
512     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
513   // B0     B1     B2     B3     B4     B5     B6     B7
514     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
515   // B8     B9     BA     BB     BC     BD     BE     BF
516     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
517   // C0     C1     C2     C3     C4     C5     C6     C7
518     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
519   // C8     C9     CA     CB     CC     CD     CE     CF
520     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
521   // D0     D1     D2     D3     D4     D5     D6     D7
522     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
523   // D8     D9     DA     DB     DC     DD     DE     DF
524     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
525   // E0     E1     E2     E3     E4     E5     E6     E7
526     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
527   // E8     E9     EA     EB     EC     ED     EE     EF
528     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
529   // F0     F1     F2     F3     F4     F5     F6     F7
530     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
531   // F8     F9     FA     FB     FC     FD     FE     FF
532     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
533 };
534 
535 const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = {
536   // 00     01     02     03     04     05     06     07
537     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
538   // 08     09     0A     0B     0C     0D     0E     0F
539     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
540   // 10     11     12     13     14     15     16     17
541     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
542   // 18     19     1A     1B     1C     1D     1E     1F
543     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
544   // 20     21     22     23     24     25     26     27
545     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
546   // 28     29     2A     2B     2C     2D     2E     2F
547     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
548   // 30     31     32     33     34     35     36     37
549     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
550   // 38     39     3A     3B     3C     3D     3E     3F
551     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
552   // 40     41     42     43     44     45     46     47
553     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
554   // 48     49     4A     4B     4C     4D     4E     4F
555     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
556   // 50     51     52     53     54     55     56     57
557     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
558   // 58     59     5A     5B     5C     5D     5E     5F
559     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
560   // 60     61     62     63     64     65     66     67
561     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
562   // 68     69     6A     6B     6C     6D     6E     6F
563     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
564   // 70     71     72     73     74     75     76     77
565     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
566   // 78     79     7A     7B     7C     7D     7E     7F
567     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
568   // 80     81     82     83     84     85     86     87
569     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
570   // 88     89     8A     8B     8C     8D     8E     8F
571     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
572   // 90     91     92     93     94     95     96     97
573     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
574   // 98     99     9A     9B     9C     9D     9E     9F
575     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
576   // A0     A1     A2     A3     A4     A5     A6     A7
577     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
578   // A8     A9     AA     AB     AC     AD     AE     AF
579     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
580   // B0     B1     B2     B3     B4     B5     B6     B7
581     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
582   // B8     B9     BA     BB     BC     BD     BE     BF
583     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
584   // C0     C1     C2     C3     C4     C5     C6     C7
585     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
586   // C8     C9     CA     CB     CC     CD     CE     CF
587     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
588   // D0     D1     D2     D3     D4     D5     D6     D7
589     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
590   // D8     D9     DA     DB     DC     DD     DE     DF
591     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
592   // E0     E1     E2     E3     E4     E5     E6     E7
593     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
594   // E8     E9     EA     EB     EC     ED     EE     EF
595     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
596   // F0     F1     F2     F3     F4     F5     F6     F7
597     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
598   // F8     F9     FA     FB     FC     FD     FE     FF
599     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
600 };
601 
602 // Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded.
603 const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = {
604   // 00     01     02     03     04     05     06     07
605     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
606   // 08     09     0A     0B     0C     0D     0E     0F
607     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
608   // 10     11     12     13     14     15     16     17
609     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
610   // 18     19     1A     1B     1C     1D     1E     1F
611     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
612   // 20     21     22     23     24     25     26     27
613     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80,
614   // 28     29     2A     2B     2C     2D     2E     2F
615     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
616   // 30     31     32     33     34     35     36     37
617     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
618   // 38     39     3A     3B     3C     3D     3E     3F
619     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
620   // 40     41     42     43     44     45     46     47
621     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
622   // 48     49     4A     4B     4C     4D     4E     4F
623     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
624   // 50     51     52     53     54     55     56     57
625     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
626   // 58     59     5A     5B     5C     5D     5E     5F
627     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
628   // 60     61     62     63     64     65     66     67
629     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
630   // 68     69     6A     6B     6C     6D     6E     6F
631     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
632   // 70     71     72     73     74     75     76     77
633     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
634   // 78     79     7A     7B     7C     7D     7E     7F
635     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
636   // 80     81     82     83     84     85     86     87
637     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
638   // 88     89     8A     8B     8C     8D     8E     8F
639     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
640   // 90     91     92     93     94     95     96     97
641     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
642   // 98     99     9A     9B     9C     9D     9E     9F
643     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
644   // A0     A1     A2     A3     A4     A5     A6     A7
645     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
646   // A8     A9     AA     AB     AC     AD     AE     AF
647     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
648   // B0     B1     B2     B3     B4     B5     B6     B7
649     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
650   // B8     B9     BA     BB     BC     BD     BE     BF
651     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
652   // C0     C1     C2     C3     C4     C5     C6     C7
653     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
654   // C8     C9     CA     CB     CC     CD     CE     CF
655     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
656   // D0     D1     D2     D3     D4     D5     D6     D7
657     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
658   // D8     D9     DA     DB     DC     DD     DE     DF
659     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
660   // E0     E1     E2     E3     E4     E5     E6     E7
661     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
662   // E8     E9     EA     EB     EC     ED     EE     EF
663     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
664   // F0     F1     F2     F3     F4     F5     F6     F7
665     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
666   // F8     F9     FA     FB     FC     FD     FE     FF
667     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
668 };
669 
BitAt(const uint8_t a[],const uint8_t i)670 bool BitAt(const uint8_t a[], const uint8_t i) {
671   return !!(a[i >> 3] & (1 << (i & 7)));
672 }
673 
674 // Appends ch to str. If ch position in encode_set is set, the ch will
675 // be percent-encoded then appended.
AppendOrEscape(std::string * str,const unsigned char ch,const uint8_t encode_set[])676 void AppendOrEscape(std::string* str,
677                            const unsigned char ch,
678                            const uint8_t encode_set[]) {
679   if (BitAt(encode_set, ch))
680     *str += hex[ch];
681   else
682     *str += ch;
683 }
684 
685 template <typename T>
hex2bin(const T ch)686 unsigned hex2bin(const T ch) {
687   if (ch >= '0' && ch <= '9')
688     return ch - '0';
689   if (ch >= 'A' && ch <= 'F')
690     return 10 + (ch - 'A');
691   if (ch >= 'a' && ch <= 'f')
692     return 10 + (ch - 'a');
693   return static_cast<unsigned>(-1);
694 }
695 
PercentDecode(const char * input,size_t len)696 std::string PercentDecode(const char* input, size_t len) {
697   std::string dest;
698   if (len == 0)
699     return dest;
700   dest.reserve(len);
701   const char* pointer = input;
702   const char* end = input + len;
703 
704   while (pointer < end) {
705     const char ch = pointer[0];
706     size_t remaining = end - pointer - 1;
707     if (ch != '%' || remaining < 2 ||
708         (ch == '%' &&
709          (!IsASCIIHexDigit(pointer[1]) ||
710           !IsASCIIHexDigit(pointer[2])))) {
711       dest += ch;
712       pointer++;
713       continue;
714     } else {
715       unsigned a = hex2bin(pointer[1]);
716       unsigned b = hex2bin(pointer[2]);
717       char c = static_cast<char>(a * 16 + b);
718       dest += c;
719       pointer += 3;
720     }
721   }
722   return dest;
723 }
724 
725 #define SPECIALS(XX)                                                          \
726   XX(ftp, 21, "ftp:")                                                         \
727   XX(file, -1, "file:")                                                       \
728   XX(gopher, 70, "gopher:")                                                   \
729   XX(http, 80, "http:")                                                       \
730   XX(https, 443, "https:")                                                    \
731   XX(ws, 80, "ws:")                                                           \
732   XX(wss, 443, "wss:")
733 
IsSpecial(const std::string & scheme)734 bool IsSpecial(const std::string& scheme) {
735 #define V(_, __, name) if (scheme == name) return true;
736   SPECIALS(V);
737 #undef V
738   return false;
739 }
740 
GetSpecial(Environment * env,const std::string & scheme)741 Local<String> GetSpecial(Environment* env, const std::string& scheme) {
742 #define V(key, _, name) if (scheme == name)                                  \
743     return env->url_special_##key##_string();
744   SPECIALS(V)
745 #undef V
746   UNREACHABLE();
747 }
748 
NormalizePort(const std::string & scheme,int p)749 int NormalizePort(const std::string& scheme, int p) {
750 #define V(_, port, name) if (scheme == name && p == port) return -1;
751   SPECIALS(V);
752 #undef V
753   return p;
754 }
755 
756 // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
StartsWithWindowsDriveLetter(const char * p,const char * end)757 bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
758   size_t length = end - p;
759   return length >= 2 &&
760     IsWindowsDriveLetter(p[0], p[1]) &&
761     (length == 2 ||
762       p[2] == '/' ||
763       p[2] == '\\' ||
764       p[2] == '?' ||
765       p[2] == '#');
766 }
767 
768 #if defined(NODE_HAVE_I18N_SUPPORT)
ToUnicode(const std::string & input,std::string * output)769 bool ToUnicode(const std::string& input, std::string* output) {
770   MaybeStackBuffer<char> buf;
771   if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
772     return false;
773   output->assign(*buf, buf.length());
774   return true;
775 }
776 
ToASCII(const std::string & input,std::string * output)777 bool ToASCII(const std::string& input, std::string* output) {
778   MaybeStackBuffer<char> buf;
779   if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
780     return false;
781   output->assign(*buf, buf.length());
782   return true;
783 }
784 #else
785 // Intentional non-ops if ICU is not present.
ToUnicode(const std::string & input,std::string * output)786 bool ToUnicode(const std::string& input, std::string* output) {
787   *output = input;
788   return true;
789 }
790 
ToASCII(const std::string & input,std::string * output)791 bool ToASCII(const std::string& input, std::string* output) {
792   *output = input;
793   return true;
794 }
795 #endif
796 
ParseIPv6Host(const char * input,size_t length)797 void URLHost::ParseIPv6Host(const char* input, size_t length) {
798   CHECK_EQ(type_, HostType::H_FAILED);
799   unsigned size = arraysize(value_.ipv6);
800   for (unsigned n = 0; n < size; n++)
801     value_.ipv6[n] = 0;
802   uint16_t* piece_pointer = &value_.ipv6[0];
803   uint16_t* const buffer_end = piece_pointer + size;
804   uint16_t* compress_pointer = nullptr;
805   const char* pointer = input;
806   const char* end = pointer + length;
807   unsigned value, len, numbers_seen;
808   char ch = pointer < end ? pointer[0] : kEOL;
809   if (ch == ':') {
810     if (length < 2 || pointer[1] != ':')
811       return;
812     pointer += 2;
813     ch = pointer < end ? pointer[0] : kEOL;
814     piece_pointer++;
815     compress_pointer = piece_pointer;
816   }
817   while (ch != kEOL) {
818     if (piece_pointer >= buffer_end)
819       return;
820     if (ch == ':') {
821       if (compress_pointer != nullptr)
822         return;
823       pointer++;
824       ch = pointer < end ? pointer[0] : kEOL;
825       piece_pointer++;
826       compress_pointer = piece_pointer;
827       continue;
828     }
829     value = 0;
830     len = 0;
831     while (len < 4 && IsASCIIHexDigit(ch)) {
832       value = value * 0x10 + hex2bin(ch);
833       pointer++;
834       ch = pointer < end ? pointer[0] : kEOL;
835       len++;
836     }
837     switch (ch) {
838       case '.':
839         if (len == 0)
840           return;
841         pointer -= len;
842         ch = pointer < end ? pointer[0] : kEOL;
843         if (piece_pointer > buffer_end - 2)
844           return;
845         numbers_seen = 0;
846         while (ch != kEOL) {
847           value = 0xffffffff;
848           if (numbers_seen > 0) {
849             if (ch == '.' && numbers_seen < 4) {
850               pointer++;
851               ch = pointer < end ? pointer[0] : kEOL;
852             } else {
853               return;
854             }
855           }
856           if (!IsASCIIDigit(ch))
857             return;
858           while (IsASCIIDigit(ch)) {
859             unsigned number = ch - '0';
860             if (value == 0xffffffff) {
861               value = number;
862             } else if (value == 0) {
863               return;
864             } else {
865               value = value * 10 + number;
866             }
867             if (value > 255)
868               return;
869             pointer++;
870             ch = pointer < end ? pointer[0] : kEOL;
871           }
872           *piece_pointer = *piece_pointer * 0x100 + value;
873           numbers_seen++;
874           if (numbers_seen == 2 || numbers_seen == 4)
875             piece_pointer++;
876         }
877         if (numbers_seen != 4)
878           return;
879         continue;
880       case ':':
881         pointer++;
882         ch = pointer < end ? pointer[0] : kEOL;
883         if (ch == kEOL)
884           return;
885         break;
886       case kEOL:
887         break;
888       default:
889         return;
890     }
891     *piece_pointer = value;
892     piece_pointer++;
893   }
894 
895   if (compress_pointer != nullptr) {
896     unsigned swaps = piece_pointer - compress_pointer;
897     piece_pointer = buffer_end - 1;
898     while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
899       uint16_t temp = *piece_pointer;
900       uint16_t* swap_piece = compress_pointer + swaps - 1;
901       *piece_pointer = *swap_piece;
902       *swap_piece = temp;
903        piece_pointer--;
904        swaps--;
905     }
906   } else if (compress_pointer == nullptr &&
907              piece_pointer != buffer_end) {
908     return;
909   }
910   type_ = HostType::H_IPV6;
911 }
912 
ParseNumber(const char * start,const char * end)913 int64_t ParseNumber(const char* start, const char* end) {
914   unsigned R = 10;
915   if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
916     start += 2;
917     R = 16;
918   }
919   if (end - start == 0) {
920     return 0;
921   } else if (R == 10 && end - start > 1 && start[0] == '0') {
922     start++;
923     R = 8;
924   }
925   const char* p = start;
926 
927   while (p < end) {
928     const char ch = p[0];
929     switch (R) {
930       case 8:
931         if (ch < '0' || ch > '7')
932           return -1;
933         break;
934       case 10:
935         if (!IsASCIIDigit(ch))
936           return -1;
937         break;
938       case 16:
939         if (!IsASCIIHexDigit(ch))
940           return -1;
941         break;
942     }
943     p++;
944   }
945   return strtoll(start, nullptr, R);
946 }
947 
ParseIPv4Host(const char * input,size_t length,bool * is_ipv4)948 void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
949   CHECK_EQ(type_, HostType::H_FAILED);
950   *is_ipv4 = false;
951   const char* pointer = input;
952   const char* mark = input;
953   const char* end = pointer + length;
954   int parts = 0;
955   uint32_t val = 0;
956   uint64_t numbers[4];
957   int tooBigNumbers = 0;
958   if (length == 0)
959     return;
960 
961   while (pointer <= end) {
962     const char ch = pointer < end ? pointer[0] : kEOL;
963     int remaining = end - pointer - 1;
964     if (ch == '.' || ch == kEOL) {
965       if (++parts > static_cast<int>(arraysize(numbers)))
966         return;
967       if (pointer == mark)
968         return;
969       int64_t n = ParseNumber(mark, pointer);
970       if (n < 0)
971         return;
972 
973       if (n > 255) {
974         tooBigNumbers++;
975       }
976       numbers[parts - 1] = n;
977       mark = pointer + 1;
978       if (ch == '.' && remaining == 0)
979         break;
980     }
981     pointer++;
982   }
983   CHECK_GT(parts, 0);
984   *is_ipv4 = true;
985 
986   // If any but the last item in numbers is greater than 255, return failure.
987   // If the last item in numbers is greater than or equal to
988   // 256^(5 - the number of items in numbers), return failure.
989   if (tooBigNumbers > 1 ||
990       (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
991       numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
992     return;
993   }
994 
995   type_ = HostType::H_IPV4;
996   val = numbers[parts - 1];
997   for (int n = 0; n < parts - 1; n++) {
998     double b = 3 - n;
999     val += numbers[n] * pow(256, b);
1000   }
1001 
1002   value_.ipv4 = val;
1003 }
1004 
ParseOpaqueHost(const char * input,size_t length)1005 void URLHost::ParseOpaqueHost(const char* input, size_t length) {
1006   CHECK_EQ(type_, HostType::H_FAILED);
1007   std::string output;
1008   output.reserve(length);
1009   for (size_t i = 0; i < length; i++) {
1010     const char ch = input[i];
1011     if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
1012       return;
1013     } else {
1014       AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
1015     }
1016   }
1017 
1018   SetOpaque(std::move(output));
1019 }
1020 
ParseHost(const char * input,size_t length,bool is_special,bool unicode)1021 void URLHost::ParseHost(const char* input,
1022                         size_t length,
1023                         bool is_special,
1024                         bool unicode) {
1025   CHECK_EQ(type_, HostType::H_FAILED);
1026   const char* pointer = input;
1027 
1028   if (length == 0)
1029     return;
1030 
1031   if (pointer[0] == '[') {
1032     if (pointer[length - 1] != ']')
1033       return;
1034     return ParseIPv6Host(++pointer, length - 2);
1035   }
1036 
1037   if (!is_special)
1038     return ParseOpaqueHost(input, length);
1039 
1040   // First, we have to percent decode
1041   std::string decoded = PercentDecode(input, length);
1042 
1043   // Then we have to punycode toASCII
1044   if (!ToASCII(decoded, &decoded))
1045     return;
1046 
1047   // If any of the following characters are still present, we have to fail
1048   for (size_t n = 0; n < decoded.size(); n++) {
1049     const char ch = decoded[n];
1050     if (IsForbiddenHostCodePoint(ch)) {
1051       return;
1052     }
1053   }
1054 
1055   // Check to see if it's an IPv4 IP address
1056   bool is_ipv4;
1057   ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
1058   if (is_ipv4)
1059     return;
1060 
1061   // If the unicode flag is set, run the result through punycode ToUnicode
1062   if (unicode && !ToUnicode(decoded, &decoded))
1063     return;
1064 
1065   // It's not an IPv4 or IPv6 address, it must be a domain
1066   SetDomain(std::move(decoded));
1067 }
1068 
1069 // Locates the longest sequence of 0 segments in an IPv6 address
1070 // in order to use the :: compression when serializing
1071 template <typename T>
FindLongestZeroSequence(T * values,size_t len)1072 T* FindLongestZeroSequence(T* values, size_t len) {
1073   T* start = values;
1074   T* end = start + len;
1075   T* result = nullptr;
1076 
1077   T* current = nullptr;
1078   unsigned counter = 0, longest = 1;
1079 
1080   while (start < end) {
1081     if (*start == 0) {
1082       if (current == nullptr)
1083         current = start;
1084       counter++;
1085     } else {
1086       if (counter > longest) {
1087         longest = counter;
1088         result = current;
1089       }
1090       counter = 0;
1091       current = nullptr;
1092     }
1093     start++;
1094   }
1095   if (counter > longest)
1096     result = current;
1097   return result;
1098 }
1099 
ToStringMove()1100 std::string URLHost::ToStringMove() {
1101   std::string return_value;
1102   switch (type_) {
1103     case HostType::H_DOMAIN:
1104     case HostType::H_OPAQUE:
1105       return_value = std::move(value_.domain_or_opaque);
1106       break;
1107     default:
1108       return_value = ToString();
1109       break;
1110   }
1111   Reset();
1112   return return_value;
1113 }
1114 
ToString() const1115 std::string URLHost::ToString() const {
1116   std::string dest;
1117   switch (type_) {
1118     case HostType::H_DOMAIN:
1119     case HostType::H_OPAQUE:
1120       return value_.domain_or_opaque;
1121       break;
1122     case HostType::H_IPV4: {
1123       dest.reserve(15);
1124       uint32_t value = value_.ipv4;
1125       for (int n = 0; n < 4; n++) {
1126         char buf[4];
1127         snprintf(buf, sizeof(buf), "%d", value % 256);
1128         dest.insert(0, buf);
1129         if (n < 3)
1130           dest.insert(0, 1, '.');
1131         value /= 256;
1132       }
1133       break;
1134     }
1135     case HostType::H_IPV6: {
1136       dest.reserve(41);
1137       dest += '[';
1138       const uint16_t* start = &value_.ipv6[0];
1139       const uint16_t* compress_pointer =
1140           FindLongestZeroSequence(start, 8);
1141       bool ignore0 = false;
1142       for (int n = 0; n <= 7; n++) {
1143         const uint16_t* piece = &value_.ipv6[n];
1144         if (ignore0 && *piece == 0)
1145           continue;
1146         else if (ignore0)
1147           ignore0 = false;
1148         if (compress_pointer == piece) {
1149           dest += n == 0 ? "::" : ":";
1150           ignore0 = true;
1151           continue;
1152         }
1153         char buf[5];
1154         snprintf(buf, sizeof(buf), "%x", *piece);
1155         dest += buf;
1156         if (n < 7)
1157           dest += ':';
1158       }
1159       dest += ']';
1160       break;
1161     }
1162     case HostType::H_FAILED:
1163       break;
1164   }
1165   return dest;
1166 }
1167 
ParseHost(const std::string & input,std::string * output,bool is_special,bool unicode=false)1168 bool ParseHost(const std::string& input,
1169                std::string* output,
1170                bool is_special,
1171                bool unicode = false) {
1172   if (input.empty()) {
1173     output->clear();
1174     return true;
1175   }
1176   URLHost host;
1177   host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1178   if (host.ParsingFailed())
1179     return false;
1180   *output = host.ToStringMove();
1181   return true;
1182 }
1183 
FromJSStringArray(Environment * env,Local<Array> array)1184 std::vector<std::string> FromJSStringArray(Environment* env,
1185                                            Local<Array> array) {
1186   std::vector<std::string> vec;
1187   if (array->Length() > 0)
1188     vec.reserve(array->Length());
1189   for (size_t n = 0; n < array->Length(); n++) {
1190     Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
1191     if (val->IsString()) {
1192       Utf8Value value(env->isolate(), val.As<String>());
1193       vec.emplace_back(*value, value.length());
1194     }
1195   }
1196   return vec;
1197 }
1198 
HarvestBase(Environment * env,Local<Object> base_obj)1199 url_data HarvestBase(Environment* env, Local<Object> base_obj) {
1200   url_data base;
1201   Local<Context> context = env->context();
1202 
1203   Local<Value> flags =
1204       base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1205   if (flags->IsInt32())
1206     base.flags = flags->Int32Value(context).FromJust();
1207 
1208   Local<Value> port =
1209       base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1210   if (port->IsInt32())
1211     base.port = port->Int32Value(context).FromJust();
1212 
1213   Local<Value> scheme =
1214       base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1215   base.scheme = Utf8Value(env->isolate(), scheme).out();
1216 
1217   auto GetStr = [&](std::string url_data::*member,
1218                     int flag,
1219                     Local<String> name,
1220                     bool empty_as_present) {
1221     Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
1222     if (value->IsString()) {
1223       Utf8Value utf8value(env->isolate(), value.As<String>());
1224       (base.*member).assign(*utf8value, utf8value.length());
1225       if (empty_as_present || value.As<String>()->Length() != 0) {
1226         base.flags |= flag;
1227       }
1228     }
1229   };
1230   GetStr(&url_data::username,
1231          URL_FLAGS_HAS_USERNAME,
1232          env->username_string(),
1233          false);
1234   GetStr(&url_data::password,
1235          URL_FLAGS_HAS_PASSWORD,
1236          env->password_string(),
1237          false);
1238   GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
1239   GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
1240   GetStr(&url_data::fragment,
1241          URL_FLAGS_HAS_FRAGMENT,
1242          env->fragment_string(),
1243          true);
1244 
1245   Local<Value>
1246       path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
1247   if (path->IsArray()) {
1248     base.flags |= URL_FLAGS_HAS_PATH;
1249     base.path = FromJSStringArray(env, path.As<Array>());
1250   }
1251   return base;
1252 }
1253 
HarvestContext(Environment * env,Local<Object> context_obj)1254 url_data HarvestContext(Environment* env, Local<Object> context_obj) {
1255   url_data context;
1256   Local<Value> flags =
1257       context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1258   if (flags->IsInt32()) {
1259     static constexpr int32_t kCopyFlagsMask =
1260         URL_FLAGS_SPECIAL |
1261         URL_FLAGS_CANNOT_BE_BASE |
1262         URL_FLAGS_HAS_USERNAME |
1263         URL_FLAGS_HAS_PASSWORD |
1264         URL_FLAGS_HAS_HOST;
1265     context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
1266   }
1267   Local<Value> scheme =
1268       context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1269   if (scheme->IsString()) {
1270     Utf8Value value(env->isolate(), scheme);
1271     context.scheme.assign(*value, value.length());
1272   }
1273   Local<Value> port =
1274       context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1275   if (port->IsInt32())
1276     context.port = port.As<Int32>()->Value();
1277   if (context.flags & URL_FLAGS_HAS_USERNAME) {
1278     Local<Value> username =
1279         context_obj->Get(env->context(),
1280                          env->username_string()).ToLocalChecked();
1281     CHECK(username->IsString());
1282     Utf8Value value(env->isolate(), username);
1283     context.username.assign(*value, value.length());
1284   }
1285   if (context.flags & URL_FLAGS_HAS_PASSWORD) {
1286     Local<Value> password =
1287         context_obj->Get(env->context(),
1288                          env->password_string()).ToLocalChecked();
1289     CHECK(password->IsString());
1290     Utf8Value value(env->isolate(), password);
1291     context.password.assign(*value, value.length());
1292   }
1293   Local<Value> host =
1294       context_obj->Get(env->context(),
1295                        env->host_string()).ToLocalChecked();
1296   if (host->IsString()) {
1297     Utf8Value value(env->isolate(), host);
1298     context.host.assign(*value, value.length());
1299   }
1300   return context;
1301 }
1302 
1303 // Single dot segment can be ".", "%2e", or "%2E"
IsSingleDotSegment(const std::string & str)1304 bool IsSingleDotSegment(const std::string& str) {
1305   switch (str.size()) {
1306     case 1:
1307       return str == ".";
1308     case 3:
1309       return str[0] == '%' &&
1310              str[1] == '2' &&
1311              ASCIILowercase(str[2]) == 'e';
1312     default:
1313       return false;
1314   }
1315 }
1316 
1317 // Double dot segment can be:
1318 //   "..", ".%2e", ".%2E", "%2e.", "%2E.",
1319 //   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
IsDoubleDotSegment(const std::string & str)1320 bool IsDoubleDotSegment(const std::string& str) {
1321   switch (str.size()) {
1322     case 2:
1323       return str == "..";
1324     case 4:
1325       if (str[0] != '.' && str[0] != '%')
1326         return false;
1327       return ((str[0] == '.' &&
1328                str[1] == '%' &&
1329                str[2] == '2' &&
1330                ASCIILowercase(str[3]) == 'e') ||
1331               (str[0] == '%' &&
1332                str[1] == '2' &&
1333                ASCIILowercase(str[2]) == 'e' &&
1334                str[3] == '.'));
1335     case 6:
1336       return (str[0] == '%' &&
1337               str[1] == '2' &&
1338               ASCIILowercase(str[2]) == 'e' &&
1339               str[3] == '%' &&
1340               str[4] == '2' &&
1341               ASCIILowercase(str[5]) == 'e');
1342     default:
1343       return false;
1344   }
1345 }
1346 
ShortenUrlPath(struct url_data * url)1347 void ShortenUrlPath(struct url_data* url) {
1348   if (url->path.empty()) return;
1349   if (url->path.size() == 1 && url->scheme == "file:" &&
1350       IsNormalizedWindowsDriveLetter(url->path[0])) return;
1351   url->path.pop_back();
1352 }
1353 
1354 }  // anonymous namespace
1355 
Parse(const char * input,size_t len,enum url_parse_state state_override,struct url_data * url,bool has_url,const struct url_data * base,bool has_base)1356 void URL::Parse(const char* input,
1357                 size_t len,
1358                 enum url_parse_state state_override,
1359                 struct url_data* url,
1360                 bool has_url,
1361                 const struct url_data* base,
1362                 bool has_base) {
1363   const char* p = input;
1364   const char* end = input + len;
1365 
1366   if (!has_url) {
1367     for (const char* ptr = p; ptr < end; ptr++) {
1368       if (IsC0ControlOrSpace(*ptr))
1369         p++;
1370       else
1371         break;
1372     }
1373     for (const char* ptr = end - 1; ptr >= p; ptr--) {
1374       if (IsC0ControlOrSpace(*ptr))
1375         end--;
1376       else
1377         break;
1378     }
1379     input = p;
1380     len = end - p;
1381   }
1382 
1383   // The spec says we should strip out any ASCII tabs or newlines.
1384   // In those cases, we create another std::string instance with the filtered
1385   // contents, but in the general case we avoid the overhead.
1386   std::string whitespace_stripped;
1387   for (const char* ptr = p; ptr < end; ptr++) {
1388     if (!IsASCIITabOrNewline(*ptr))
1389       continue;
1390     // Hit tab or newline. Allocate storage, copy what we have until now,
1391     // and then iterate and filter all similar characters out.
1392     whitespace_stripped.reserve(len - 1);
1393     whitespace_stripped.assign(p, ptr - p);
1394     // 'ptr + 1' skips the current char, which we know to be tab or newline.
1395     for (ptr = ptr + 1; ptr < end; ptr++) {
1396       if (!IsASCIITabOrNewline(*ptr))
1397         whitespace_stripped += *ptr;
1398     }
1399 
1400     // Update variables like they should have looked like if the string
1401     // had been stripped of whitespace to begin with.
1402     input = whitespace_stripped.c_str();
1403     len = whitespace_stripped.size();
1404     p = input;
1405     end = input + len;
1406     break;
1407   }
1408 
1409   bool atflag = false;  // Set when @ has been seen.
1410   bool square_bracket_flag = false;  // Set inside of [...]
1411   bool password_token_seen_flag = false;  // Set after a : after an username.
1412 
1413   std::string buffer;
1414 
1415   // Set the initial parse state.
1416   const bool has_state_override = state_override != kUnknownState;
1417   enum url_parse_state state = has_state_override ? state_override :
1418                                                     kSchemeStart;
1419 
1420   if (state < kSchemeStart || state > kFragment) {
1421     url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1422     return;
1423   }
1424 
1425   while (p <= end) {
1426     const char ch = p < end ? p[0] : kEOL;
1427     bool special = (url->flags & URL_FLAGS_SPECIAL);
1428     bool cannot_be_base;
1429     const bool special_back_slash = (special && ch == '\\');
1430 
1431     switch (state) {
1432       case kSchemeStart:
1433         if (IsASCIIAlpha(ch)) {
1434           buffer += ASCIILowercase(ch);
1435           state = kScheme;
1436         } else if (!has_state_override) {
1437           state = kNoScheme;
1438           continue;
1439         } else {
1440           url->flags |= URL_FLAGS_FAILED;
1441           return;
1442         }
1443         break;
1444       case kScheme:
1445         if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1446           buffer += ASCIILowercase(ch);
1447         } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1448           if (has_state_override && buffer.size() == 0) {
1449             url->flags |= URL_FLAGS_TERMINATED;
1450             return;
1451           }
1452           buffer += ':';
1453 
1454           bool new_is_special = IsSpecial(buffer);
1455 
1456           if (has_state_override) {
1457             if ((special != new_is_special) ||
1458                 ((buffer == "file:") &&
1459                  ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1460                   (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1461                   (url->port != -1)))) {
1462               url->flags |= URL_FLAGS_TERMINATED;
1463               return;
1464             }
1465 
1466             // File scheme && (host == empty or null) check left to JS-land
1467             // as it can be done before even entering C++ binding.
1468           }
1469 
1470           url->scheme = std::move(buffer);
1471           url->port = NormalizePort(url->scheme, url->port);
1472           if (new_is_special) {
1473             url->flags |= URL_FLAGS_SPECIAL;
1474             special = true;
1475           } else {
1476             url->flags &= ~URL_FLAGS_SPECIAL;
1477             special = false;
1478           }
1479           buffer.clear();
1480           if (has_state_override)
1481             return;
1482           if (url->scheme == "file:") {
1483             state = kFile;
1484           } else if (special &&
1485                      has_base &&
1486                      url->scheme == base->scheme) {
1487             state = kSpecialRelativeOrAuthority;
1488           } else if (special) {
1489             state = kSpecialAuthoritySlashes;
1490           } else if (p + 1 < end && p[1] == '/') {
1491             state = kPathOrAuthority;
1492             p++;
1493           } else {
1494             url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1495             url->flags |= URL_FLAGS_HAS_PATH;
1496             url->path.emplace_back("");
1497             state = kCannotBeBase;
1498           }
1499         } else if (!has_state_override) {
1500           buffer.clear();
1501           state = kNoScheme;
1502           p = input;
1503           continue;
1504         } else {
1505           url->flags |= URL_FLAGS_FAILED;
1506           return;
1507         }
1508         break;
1509       case kNoScheme:
1510         cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1511         if (!has_base || (cannot_be_base && ch != '#')) {
1512           url->flags |= URL_FLAGS_FAILED;
1513           return;
1514         } else if (cannot_be_base && ch == '#') {
1515           url->scheme = base->scheme;
1516           if (IsSpecial(url->scheme)) {
1517             url->flags |= URL_FLAGS_SPECIAL;
1518             special = true;
1519           } else {
1520             url->flags &= ~URL_FLAGS_SPECIAL;
1521             special = false;
1522           }
1523           if (base->flags & URL_FLAGS_HAS_PATH) {
1524             url->flags |= URL_FLAGS_HAS_PATH;
1525             url->path = base->path;
1526           }
1527           if (base->flags & URL_FLAGS_HAS_QUERY) {
1528             url->flags |= URL_FLAGS_HAS_QUERY;
1529             url->query = base->query;
1530           }
1531           if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1532             url->flags |= URL_FLAGS_HAS_FRAGMENT;
1533             url->fragment = base->fragment;
1534           }
1535           url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1536           state = kFragment;
1537         } else if (has_base &&
1538                    base->scheme != "file:") {
1539           state = kRelative;
1540           continue;
1541         } else {
1542           url->scheme = "file:";
1543           url->flags |= URL_FLAGS_SPECIAL;
1544           special = true;
1545           state = kFile;
1546           continue;
1547         }
1548         break;
1549       case kSpecialRelativeOrAuthority:
1550         if (ch == '/' && p + 1 < end && p[1] == '/') {
1551           state = kSpecialAuthorityIgnoreSlashes;
1552           p++;
1553         } else {
1554           state = kRelative;
1555           continue;
1556         }
1557         break;
1558       case kPathOrAuthority:
1559         if (ch == '/') {
1560           state = kAuthority;
1561         } else {
1562           state = kPath;
1563           continue;
1564         }
1565         break;
1566       case kRelative:
1567         url->scheme = base->scheme;
1568         if (IsSpecial(url->scheme)) {
1569           url->flags |= URL_FLAGS_SPECIAL;
1570           special = true;
1571         } else {
1572           url->flags &= ~URL_FLAGS_SPECIAL;
1573           special = false;
1574         }
1575         switch (ch) {
1576           case kEOL:
1577             if (base->flags & URL_FLAGS_HAS_USERNAME) {
1578               url->flags |= URL_FLAGS_HAS_USERNAME;
1579               url->username = base->username;
1580             }
1581             if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1582               url->flags |= URL_FLAGS_HAS_PASSWORD;
1583               url->password = base->password;
1584             }
1585             if (base->flags & URL_FLAGS_HAS_HOST) {
1586               url->flags |= URL_FLAGS_HAS_HOST;
1587               url->host = base->host;
1588             }
1589             if (base->flags & URL_FLAGS_HAS_QUERY) {
1590               url->flags |= URL_FLAGS_HAS_QUERY;
1591               url->query = base->query;
1592             }
1593             if (base->flags & URL_FLAGS_HAS_PATH) {
1594               url->flags |= URL_FLAGS_HAS_PATH;
1595               url->path = base->path;
1596             }
1597             url->port = base->port;
1598             break;
1599           case '/':
1600             state = kRelativeSlash;
1601             break;
1602           case '?':
1603             if (base->flags & URL_FLAGS_HAS_USERNAME) {
1604               url->flags |= URL_FLAGS_HAS_USERNAME;
1605               url->username = base->username;
1606             }
1607             if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1608               url->flags |= URL_FLAGS_HAS_PASSWORD;
1609               url->password = base->password;
1610             }
1611             if (base->flags & URL_FLAGS_HAS_HOST) {
1612               url->flags |= URL_FLAGS_HAS_HOST;
1613               url->host = base->host;
1614             }
1615             if (base->flags & URL_FLAGS_HAS_PATH) {
1616               url->flags |= URL_FLAGS_HAS_PATH;
1617               url->path = base->path;
1618             }
1619             url->port = base->port;
1620             state = kQuery;
1621             break;
1622           case '#':
1623             if (base->flags & URL_FLAGS_HAS_USERNAME) {
1624               url->flags |= URL_FLAGS_HAS_USERNAME;
1625               url->username = base->username;
1626             }
1627             if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1628               url->flags |= URL_FLAGS_HAS_PASSWORD;
1629               url->password = base->password;
1630             }
1631             if (base->flags & URL_FLAGS_HAS_HOST) {
1632               url->flags |= URL_FLAGS_HAS_HOST;
1633               url->host = base->host;
1634             }
1635             if (base->flags & URL_FLAGS_HAS_QUERY) {
1636               url->flags |= URL_FLAGS_HAS_QUERY;
1637               url->query = base->query;
1638             }
1639             if (base->flags & URL_FLAGS_HAS_PATH) {
1640               url->flags |= URL_FLAGS_HAS_PATH;
1641               url->path = base->path;
1642             }
1643             url->port = base->port;
1644             state = kFragment;
1645             break;
1646           default:
1647             if (special_back_slash) {
1648               state = kRelativeSlash;
1649             } else {
1650               if (base->flags & URL_FLAGS_HAS_USERNAME) {
1651                 url->flags |= URL_FLAGS_HAS_USERNAME;
1652                 url->username = base->username;
1653               }
1654               if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1655                 url->flags |= URL_FLAGS_HAS_PASSWORD;
1656                 url->password = base->password;
1657               }
1658               if (base->flags & URL_FLAGS_HAS_HOST) {
1659                 url->flags |= URL_FLAGS_HAS_HOST;
1660                 url->host = base->host;
1661               }
1662               if (base->flags & URL_FLAGS_HAS_PATH) {
1663                 url->flags |= URL_FLAGS_HAS_PATH;
1664                 url->path = base->path;
1665                 ShortenUrlPath(url);
1666               }
1667               url->port = base->port;
1668               state = kPath;
1669               continue;
1670             }
1671         }
1672         break;
1673       case kRelativeSlash:
1674         if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1675           state = kSpecialAuthorityIgnoreSlashes;
1676         } else if (ch == '/') {
1677           state = kAuthority;
1678         } else {
1679           if (base->flags & URL_FLAGS_HAS_USERNAME) {
1680             url->flags |= URL_FLAGS_HAS_USERNAME;
1681             url->username = base->username;
1682           }
1683           if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1684             url->flags |= URL_FLAGS_HAS_PASSWORD;
1685             url->password = base->password;
1686           }
1687           if (base->flags & URL_FLAGS_HAS_HOST) {
1688             url->flags |= URL_FLAGS_HAS_HOST;
1689             url->host = base->host;
1690           }
1691           url->port = base->port;
1692           state = kPath;
1693           continue;
1694         }
1695         break;
1696       case kSpecialAuthoritySlashes:
1697         state = kSpecialAuthorityIgnoreSlashes;
1698         if (ch == '/' && p + 1 < end && p[1] == '/') {
1699           p++;
1700         } else {
1701           continue;
1702         }
1703         break;
1704       case kSpecialAuthorityIgnoreSlashes:
1705         if (ch != '/' && ch != '\\') {
1706           state = kAuthority;
1707           continue;
1708         }
1709         break;
1710       case kAuthority:
1711         if (ch == '@') {
1712           if (atflag) {
1713             buffer.reserve(buffer.size() + 3);
1714             buffer.insert(0, "%40");
1715           }
1716           atflag = true;
1717           size_t blen = buffer.size();
1718           if (blen > 0 && buffer[0] != ':') {
1719             url->flags |= URL_FLAGS_HAS_USERNAME;
1720           }
1721           for (size_t n = 0; n < blen; n++) {
1722             const char bch = buffer[n];
1723             if (bch == ':') {
1724               url->flags |= URL_FLAGS_HAS_PASSWORD;
1725               if (!password_token_seen_flag) {
1726                 password_token_seen_flag = true;
1727                 continue;
1728               }
1729             }
1730             if (password_token_seen_flag) {
1731               AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1732             } else {
1733               AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1734             }
1735           }
1736           buffer.clear();
1737         } else if (ch == kEOL ||
1738                    ch == '/' ||
1739                    ch == '?' ||
1740                    ch == '#' ||
1741                    special_back_slash) {
1742           if (atflag && buffer.size() == 0) {
1743             url->flags |= URL_FLAGS_FAILED;
1744             return;
1745           }
1746           p -= buffer.size() + 1;
1747           buffer.clear();
1748           state = kHost;
1749         } else {
1750           buffer += ch;
1751         }
1752         break;
1753       case kHost:
1754       case kHostname:
1755         if (has_state_override && url->scheme == "file:") {
1756           state = kFileHost;
1757           continue;
1758         } else if (ch == ':' && !square_bracket_flag) {
1759           if (buffer.size() == 0) {
1760             url->flags |= URL_FLAGS_FAILED;
1761             return;
1762           }
1763           url->flags |= URL_FLAGS_HAS_HOST;
1764           if (!ParseHost(buffer, &url->host, special)) {
1765             url->flags |= URL_FLAGS_FAILED;
1766             return;
1767           }
1768           buffer.clear();
1769           state = kPort;
1770           if (state_override == kHostname) {
1771             return;
1772           }
1773         } else if (ch == kEOL ||
1774                    ch == '/' ||
1775                    ch == '?' ||
1776                    ch == '#' ||
1777                    special_back_slash) {
1778           p--;
1779           if (special && buffer.size() == 0) {
1780             url->flags |= URL_FLAGS_FAILED;
1781             return;
1782           }
1783           if (has_state_override &&
1784               buffer.size() == 0 &&
1785               ((url->username.size() > 0 || url->password.size() > 0) ||
1786                url->port != -1)) {
1787             url->flags |= URL_FLAGS_TERMINATED;
1788             return;
1789           }
1790           url->flags |= URL_FLAGS_HAS_HOST;
1791           if (!ParseHost(buffer, &url->host, special)) {
1792             url->flags |= URL_FLAGS_FAILED;
1793             return;
1794           }
1795           buffer.clear();
1796           state = kPathStart;
1797           if (has_state_override) {
1798             return;
1799           }
1800         } else {
1801           if (ch == '[')
1802             square_bracket_flag = true;
1803           if (ch == ']')
1804             square_bracket_flag = false;
1805           buffer += ch;
1806         }
1807         break;
1808       case kPort:
1809         if (IsASCIIDigit(ch)) {
1810           buffer += ch;
1811         } else if (has_state_override ||
1812                    ch == kEOL ||
1813                    ch == '/' ||
1814                    ch == '?' ||
1815                    ch == '#' ||
1816                    special_back_slash) {
1817           if (buffer.size() > 0) {
1818             unsigned port = 0;
1819             // the condition port <= 0xffff prevents integer overflow
1820             for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1821               port = port * 10 + buffer[i] - '0';
1822             if (port > 0xffff) {
1823               // TODO(TimothyGu): This hack is currently needed for the host
1824               // setter since it needs access to hostname if it is valid, and
1825               // if the FAILED flag is set the entire response to JS layer
1826               // will be empty.
1827               if (state_override == kHost)
1828                 url->port = -1;
1829               else
1830                 url->flags |= URL_FLAGS_FAILED;
1831               return;
1832             }
1833             // the port is valid
1834             url->port = NormalizePort(url->scheme, static_cast<int>(port));
1835             if (url->port == -1)
1836               url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1837             buffer.clear();
1838           } else if (has_state_override) {
1839             // TODO(TimothyGu): Similar case as above.
1840             if (state_override == kHost)
1841               url->port = -1;
1842             else
1843               url->flags |= URL_FLAGS_TERMINATED;
1844             return;
1845           }
1846           state = kPathStart;
1847           continue;
1848         } else {
1849           url->flags |= URL_FLAGS_FAILED;
1850           return;
1851         }
1852         break;
1853       case kFile:
1854         url->scheme = "file:";
1855         if (ch == '/' || ch == '\\') {
1856           state = kFileSlash;
1857         } else if (has_base && base->scheme == "file:") {
1858           switch (ch) {
1859             case kEOL:
1860               if (base->flags & URL_FLAGS_HAS_HOST) {
1861                 url->flags |= URL_FLAGS_HAS_HOST;
1862                 url->host = base->host;
1863               }
1864               if (base->flags & URL_FLAGS_HAS_PATH) {
1865                 url->flags |= URL_FLAGS_HAS_PATH;
1866                 url->path = base->path;
1867               }
1868               if (base->flags & URL_FLAGS_HAS_QUERY) {
1869                 url->flags |= URL_FLAGS_HAS_QUERY;
1870                 url->query = base->query;
1871               }
1872               break;
1873             case '?':
1874               if (base->flags & URL_FLAGS_HAS_HOST) {
1875                 url->flags |= URL_FLAGS_HAS_HOST;
1876                 url->host = base->host;
1877               }
1878               if (base->flags & URL_FLAGS_HAS_PATH) {
1879                 url->flags |= URL_FLAGS_HAS_PATH;
1880                 url->path = base->path;
1881               }
1882               url->flags |= URL_FLAGS_HAS_QUERY;
1883               url->query.clear();
1884               state = kQuery;
1885               break;
1886             case '#':
1887               if (base->flags & URL_FLAGS_HAS_HOST) {
1888                 url->flags |= URL_FLAGS_HAS_HOST;
1889                 url->host = base->host;
1890               }
1891               if (base->flags & URL_FLAGS_HAS_PATH) {
1892                 url->flags |= URL_FLAGS_HAS_PATH;
1893                 url->path = base->path;
1894               }
1895               if (base->flags & URL_FLAGS_HAS_QUERY) {
1896                 url->flags |= URL_FLAGS_HAS_QUERY;
1897                 url->query = base->query;
1898               }
1899               url->flags |= URL_FLAGS_HAS_FRAGMENT;
1900               url->fragment.clear();
1901               state = kFragment;
1902               break;
1903             default:
1904               if (!StartsWithWindowsDriveLetter(p, end)) {
1905                 if (base->flags & URL_FLAGS_HAS_HOST) {
1906                   url->flags |= URL_FLAGS_HAS_HOST;
1907                   url->host = base->host;
1908                 }
1909                 if (base->flags & URL_FLAGS_HAS_PATH) {
1910                   url->flags |= URL_FLAGS_HAS_PATH;
1911                   url->path = base->path;
1912                 }
1913                 ShortenUrlPath(url);
1914               }
1915               state = kPath;
1916               continue;
1917           }
1918         } else {
1919           state = kPath;
1920           continue;
1921         }
1922         break;
1923       case kFileSlash:
1924         if (ch == '/' || ch == '\\') {
1925           state = kFileHost;
1926         } else {
1927           if (has_base &&
1928               base->scheme == "file:" &&
1929               !StartsWithWindowsDriveLetter(p, end)) {
1930             if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1931               url->flags |= URL_FLAGS_HAS_PATH;
1932               url->path.push_back(base->path[0]);
1933             } else {
1934               if (base->flags & URL_FLAGS_HAS_HOST) {
1935                 url->flags |= URL_FLAGS_HAS_HOST;
1936                 url->host = base->host;
1937               } else {
1938                 url->flags &= ~URL_FLAGS_HAS_HOST;
1939                 url->host.clear();
1940               }
1941             }
1942           }
1943           state = kPath;
1944           continue;
1945         }
1946         break;
1947       case kFileHost:
1948         if (ch == kEOL ||
1949             ch == '/' ||
1950             ch == '\\' ||
1951             ch == '?' ||
1952             ch == '#') {
1953           if (!has_state_override &&
1954               buffer.size() == 2 &&
1955               IsWindowsDriveLetter(buffer)) {
1956             state = kPath;
1957           } else if (buffer.size() == 0) {
1958             url->flags |= URL_FLAGS_HAS_HOST;
1959             url->host.clear();
1960             if (has_state_override)
1961               return;
1962             state = kPathStart;
1963           } else {
1964             std::string host;
1965             if (!ParseHost(buffer, &host, special)) {
1966               url->flags |= URL_FLAGS_FAILED;
1967               return;
1968             }
1969             if (host == "localhost")
1970               host.clear();
1971             url->flags |= URL_FLAGS_HAS_HOST;
1972             url->host = host;
1973             if (has_state_override)
1974               return;
1975             buffer.clear();
1976             state = kPathStart;
1977           }
1978           continue;
1979         } else {
1980           buffer += ch;
1981         }
1982         break;
1983       case kPathStart:
1984         if (IsSpecial(url->scheme)) {
1985           state = kPath;
1986           if (ch != '/' && ch != '\\') {
1987             continue;
1988           }
1989         } else if (!has_state_override && ch == '?') {
1990           url->flags |= URL_FLAGS_HAS_QUERY;
1991           url->query.clear();
1992           state = kQuery;
1993         } else if (!has_state_override && ch == '#') {
1994           url->flags |= URL_FLAGS_HAS_FRAGMENT;
1995           url->fragment.clear();
1996           state = kFragment;
1997         } else if (ch != kEOL) {
1998           state = kPath;
1999           if (ch != '/') {
2000             continue;
2001           }
2002         }
2003         break;
2004       case kPath:
2005         if (ch == kEOL ||
2006             ch == '/' ||
2007             special_back_slash ||
2008             (!has_state_override && (ch == '?' || ch == '#'))) {
2009           if (IsDoubleDotSegment(buffer)) {
2010             ShortenUrlPath(url);
2011             if (ch != '/' && !special_back_slash) {
2012               url->flags |= URL_FLAGS_HAS_PATH;
2013               url->path.emplace_back("");
2014             }
2015           } else if (IsSingleDotSegment(buffer) &&
2016                      ch != '/' && !special_back_slash) {
2017             url->flags |= URL_FLAGS_HAS_PATH;
2018             url->path.emplace_back("");
2019           } else if (!IsSingleDotSegment(buffer)) {
2020             if (url->scheme == "file:" &&
2021                 url->path.empty() &&
2022                 buffer.size() == 2 &&
2023                 IsWindowsDriveLetter(buffer)) {
2024               if ((url->flags & URL_FLAGS_HAS_HOST) &&
2025                   !url->host.empty()) {
2026                 url->host.clear();
2027                 url->flags |= URL_FLAGS_HAS_HOST;
2028               }
2029               buffer[1] = ':';
2030             }
2031             url->flags |= URL_FLAGS_HAS_PATH;
2032             url->path.emplace_back(std::move(buffer));
2033           }
2034           buffer.clear();
2035           if (url->scheme == "file:" &&
2036               (ch == kEOL ||
2037                ch == '?' ||
2038                ch == '#')) {
2039             while (url->path.size() > 1 && url->path[0].empty()) {
2040               url->path.erase(url->path.begin());
2041             }
2042           }
2043           if (ch == '?') {
2044             url->flags |= URL_FLAGS_HAS_QUERY;
2045             state = kQuery;
2046           } else if (ch == '#') {
2047             state = kFragment;
2048           }
2049         } else {
2050           AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
2051         }
2052         break;
2053       case kCannotBeBase:
2054         switch (ch) {
2055           case '?':
2056             state = kQuery;
2057             break;
2058           case '#':
2059             state = kFragment;
2060             break;
2061           default:
2062             if (url->path.empty())
2063               url->path.emplace_back("");
2064             else if (ch != kEOL)
2065               AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
2066         }
2067         break;
2068       case kQuery:
2069         if (ch == kEOL || (!has_state_override && ch == '#')) {
2070           url->flags |= URL_FLAGS_HAS_QUERY;
2071           url->query = std::move(buffer);
2072           buffer.clear();
2073           if (ch == '#')
2074             state = kFragment;
2075         } else {
2076           AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
2077                                                 QUERY_ENCODE_SET_NONSPECIAL);
2078         }
2079         break;
2080       case kFragment:
2081         switch (ch) {
2082           case kEOL:
2083             url->flags |= URL_FLAGS_HAS_FRAGMENT;
2084             url->fragment = std::move(buffer);
2085             break;
2086           case 0:
2087             break;
2088           default:
2089             AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
2090         }
2091         break;
2092       default:
2093         url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
2094         return;
2095     }
2096 
2097     p++;
2098   }
2099 }  // NOLINT(readability/fn_size)
2100 
2101 namespace {
SetArgs(Environment * env,Local<Value> argv[ARG_COUNT],const struct url_data & url)2102 void SetArgs(Environment* env,
2103              Local<Value> argv[ARG_COUNT],
2104              const struct url_data& url) {
2105   Isolate* isolate = env->isolate();
2106   argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2107   argv[ARG_PROTOCOL] =
2108       url.flags & URL_FLAGS_SPECIAL ?
2109           GetSpecial(env, url.scheme) :
2110           OneByteString(isolate, url.scheme.c_str());
2111   if (url.flags & URL_FLAGS_HAS_USERNAME)
2112     argv[ARG_USERNAME] = Utf8String(isolate, url.username);
2113   if (url.flags & URL_FLAGS_HAS_PASSWORD)
2114     argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
2115   if (url.flags & URL_FLAGS_HAS_HOST)
2116     argv[ARG_HOST] = Utf8String(isolate, url.host);
2117   if (url.flags & URL_FLAGS_HAS_QUERY)
2118     argv[ARG_QUERY] = Utf8String(isolate, url.query);
2119   if (url.flags & URL_FLAGS_HAS_FRAGMENT)
2120     argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
2121   if (url.port > -1)
2122     argv[ARG_PORT] = Integer::New(isolate, url.port);
2123   if (url.flags & URL_FLAGS_HAS_PATH)
2124     argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
2125 }
2126 
Parse(Environment * env,Local<Value> recv,const char * input,size_t len,enum url_parse_state state_override,Local<Value> base_obj,Local<Value> context_obj,Local<Function> cb,Local<Value> error_cb)2127 void Parse(Environment* env,
2128            Local<Value> recv,
2129            const char* input,
2130            size_t len,
2131            enum url_parse_state state_override,
2132            Local<Value> base_obj,
2133            Local<Value> context_obj,
2134            Local<Function> cb,
2135            Local<Value> error_cb) {
2136   Isolate* isolate = env->isolate();
2137   Local<Context> context = env->context();
2138   HandleScope handle_scope(isolate);
2139   Context::Scope context_scope(context);
2140 
2141   const bool has_context = context_obj->IsObject();
2142   const bool has_base = base_obj->IsObject();
2143 
2144   url_data base;
2145   url_data url;
2146   if (has_context)
2147     url = HarvestContext(env, context_obj.As<Object>());
2148   if (has_base)
2149     base = HarvestBase(env, base_obj.As<Object>());
2150 
2151   URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2152   if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2153       ((state_override != kUnknownState) &&
2154        (url.flags & URL_FLAGS_TERMINATED)))
2155     return;
2156 
2157   // Define the return value placeholders
2158   const Local<Value> undef = Undefined(isolate);
2159   const Local<Value> null = Null(isolate);
2160   if (!(url.flags & URL_FLAGS_FAILED)) {
2161     Local<Value> argv[] = {
2162       undef,
2163       undef,
2164       undef,
2165       undef,
2166       null,  // host defaults to null
2167       null,  // port defaults to null
2168       undef,
2169       null,  // query defaults to null
2170       null,  // fragment defaults to null
2171     };
2172     SetArgs(env, argv, url);
2173     cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2174   } else if (error_cb->IsFunction()) {
2175     Local<Value> argv[2] = { undef, undef };
2176     argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2177     argv[ERR_ARG_INPUT] =
2178       String::NewFromUtf8(env->isolate(),
2179                           input,
2180                           NewStringType::kNormal).ToLocalChecked();
2181     error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2182         .FromMaybe(Local<Value>());
2183   }
2184 }
2185 
Parse(const FunctionCallbackInfo<Value> & args)2186 void Parse(const FunctionCallbackInfo<Value>& args) {
2187   Environment* env = Environment::GetCurrent(args);
2188   CHECK_GE(args.Length(), 5);
2189   CHECK(args[0]->IsString());  // input
2190   CHECK(args[2]->IsUndefined() ||  // base context
2191         args[2]->IsNull() ||
2192         args[2]->IsObject());
2193   CHECK(args[3]->IsUndefined() ||  // context
2194         args[3]->IsNull() ||
2195         args[3]->IsObject());
2196   CHECK(args[4]->IsFunction());  // complete callback
2197   CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
2198 
2199   Utf8Value input(env->isolate(), args[0]);
2200   enum url_parse_state state_override = kUnknownState;
2201   if (args[1]->IsNumber()) {
2202     state_override = static_cast<enum url_parse_state>(
2203         args[1]->Uint32Value(env->context()).FromJust());
2204   }
2205 
2206   Parse(env, args.This(),
2207         *input, input.length(),
2208         state_override,
2209         args[2],
2210         args[3],
2211         args[4].As<Function>(),
2212         args[5]);
2213 }
2214 
EncodeAuthSet(const FunctionCallbackInfo<Value> & args)2215 void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2216   Environment* env = Environment::GetCurrent(args);
2217   CHECK_GE(args.Length(), 1);
2218   CHECK(args[0]->IsString());
2219   Utf8Value value(env->isolate(), args[0]);
2220   std::string output;
2221   size_t len = value.length();
2222   output.reserve(len);
2223   for (size_t n = 0; n < len; n++) {
2224     const char ch = (*value)[n];
2225     AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2226   }
2227   args.GetReturnValue().Set(
2228       String::NewFromUtf8(env->isolate(),
2229                           output.c_str(),
2230                           NewStringType::kNormal).ToLocalChecked());
2231 }
2232 
ToUSVString(const FunctionCallbackInfo<Value> & args)2233 void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2234   Environment* env = Environment::GetCurrent(args);
2235   CHECK_GE(args.Length(), 2);
2236   CHECK(args[0]->IsString());
2237   CHECK(args[1]->IsNumber());
2238 
2239   TwoByteValue value(env->isolate(), args[0]);
2240 
2241   int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2242   CHECK_GE(start, 0);
2243 
2244   for (size_t i = start; i < value.length(); i++) {
2245     char16_t c = value[i];
2246     if (!IsUnicodeSurrogate(c)) {
2247       continue;
2248     } else if (IsUnicodeSurrogateTrail(c) || i == value.length() - 1) {
2249       value[i] = kUnicodeReplacementCharacter;
2250     } else {
2251       char16_t d = value[i + 1];
2252       if (IsUnicodeTrail(d)) {
2253         i++;
2254       } else {
2255         value[i] = kUnicodeReplacementCharacter;
2256       }
2257     }
2258   }
2259 
2260   args.GetReturnValue().Set(
2261       String::NewFromTwoByte(env->isolate(),
2262                              *value,
2263                              NewStringType::kNormal,
2264                              value.length()).ToLocalChecked());
2265 }
2266 
DomainToASCII(const FunctionCallbackInfo<Value> & args)2267 void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2268   Environment* env = Environment::GetCurrent(args);
2269   CHECK_GE(args.Length(), 1);
2270   CHECK(args[0]->IsString());
2271   Utf8Value value(env->isolate(), args[0]);
2272 
2273   URLHost host;
2274   // Assuming the host is used for a special scheme.
2275   host.ParseHost(*value, value.length(), true);
2276   if (host.ParsingFailed()) {
2277     args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2278     return;
2279   }
2280   std::string out = host.ToStringMove();
2281   args.GetReturnValue().Set(
2282       String::NewFromUtf8(env->isolate(),
2283                           out.c_str(),
2284                           NewStringType::kNormal).ToLocalChecked());
2285 }
2286 
DomainToUnicode(const FunctionCallbackInfo<Value> & args)2287 void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2288   Environment* env = Environment::GetCurrent(args);
2289   CHECK_GE(args.Length(), 1);
2290   CHECK(args[0]->IsString());
2291   Utf8Value value(env->isolate(), args[0]);
2292 
2293   URLHost host;
2294   // Assuming the host is used for a special scheme.
2295   host.ParseHost(*value, value.length(), true, true);
2296   if (host.ParsingFailed()) {
2297     args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2298     return;
2299   }
2300   std::string out = host.ToStringMove();
2301   args.GetReturnValue().Set(
2302       String::NewFromUtf8(env->isolate(),
2303                           out.c_str(),
2304                           NewStringType::kNormal).ToLocalChecked());
2305 }
2306 
SetURLConstructor(const FunctionCallbackInfo<Value> & args)2307 void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2308   Environment* env = Environment::GetCurrent(args);
2309   CHECK_EQ(args.Length(), 1);
2310   CHECK(args[0]->IsFunction());
2311   env->set_url_constructor_function(args[0].As<Function>());
2312 }
2313 
Initialize(Local<Object> target,Local<Value> unused,Local<Context> context,void * priv)2314 void Initialize(Local<Object> target,
2315                 Local<Value> unused,
2316                 Local<Context> context,
2317                 void* priv) {
2318   Environment* env = Environment::GetCurrent(context);
2319   env->SetMethod(target, "parse", Parse);
2320   env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
2321   env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
2322   env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
2323   env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
2324   env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2325 
2326 #define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2327   FLAGS(XX)
2328 #undef XX
2329 
2330 #define XX(name) NODE_DEFINE_CONSTANT(target, name);
2331   PARSESTATES(XX)
2332 #undef XX
2333 }
2334 }  // namespace
2335 
ToFilePath() const2336 std::string URL::ToFilePath() const {
2337   if (context_.scheme != "file:") {
2338     return "";
2339   }
2340 
2341 #ifdef _WIN32
2342   const char* slash = "\\";
2343   auto is_slash = [] (char ch) {
2344     return ch == '/' || ch == '\\';
2345   };
2346 #else
2347   const char* slash = "/";
2348   auto is_slash = [] (char ch) {
2349     return ch == '/';
2350   };
2351   if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2352       context_.host.length() > 0) {
2353     return "";
2354   }
2355 #endif
2356   std::string decoded_path;
2357   for (const std::string& part : context_.path) {
2358     std::string decoded = PercentDecode(part.c_str(), part.length());
2359     for (char& ch : decoded) {
2360       if (is_slash(ch)) {
2361         return "";
2362       }
2363     }
2364     decoded_path += slash + decoded;
2365   }
2366 
2367 #ifdef _WIN32
2368   // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2369 
2370   // If hostname is set, then we have a UNC path. Pass the hostname through
2371   // ToUnicode just in case it is an IDN using punycode encoding. We do not
2372   // need to worry about percent encoding because the URL parser will have
2373   // already taken care of that for us. Note that this only causes IDNs with an
2374   // appropriate `xn--` prefix to be decoded.
2375   if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2376       context_.host.length() > 0) {
2377     std::string unicode_host;
2378     if (!ToUnicode(context_.host, &unicode_host)) {
2379       return "";
2380     }
2381     return "\\\\" + unicode_host + decoded_path;
2382   }
2383   // Otherwise, it's a local path that requires a drive letter.
2384   if (decoded_path.length() < 3) {
2385     return "";
2386   }
2387   if (decoded_path[2] != ':' ||
2388       !IsASCIIAlpha(decoded_path[1])) {
2389     return "";
2390   }
2391   // Strip out the leading '\'.
2392   return decoded_path.substr(1);
2393 #else
2394   return decoded_path;
2395 #endif
2396 }
2397 
FromFilePath(const std::string & file_path)2398 URL URL::FromFilePath(const std::string& file_path) {
2399   URL url("file://");
2400   std::string escaped_file_path;
2401   for (size_t i = 0; i < file_path.length(); ++i) {
2402     escaped_file_path += file_path[i];
2403     if (file_path[i] == '%')
2404       escaped_file_path += "25";
2405   }
2406   URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
2407              &url.context_, true, nullptr, false);
2408   return url;
2409 }
2410 
2411 // This function works by calling out to a JS function that creates and
2412 // returns the JS URL object. Be mindful of the JS<->Native boundary
2413 // crossing that is required.
ToObject(Environment * env) const2414 MaybeLocal<Value> URL::ToObject(Environment* env) const {
2415   Isolate* isolate = env->isolate();
2416   Local<Context> context = env->context();
2417   Context::Scope context_scope(context);
2418 
2419   const Local<Value> undef = Undefined(isolate);
2420   const Local<Value> null = Null(isolate);
2421 
2422   if (context_.flags & URL_FLAGS_FAILED)
2423     return Local<Value>();
2424 
2425   Local<Value> argv[] = {
2426     undef,
2427     undef,
2428     undef,
2429     undef,
2430     null,  // host defaults to null
2431     null,  // port defaults to null
2432     undef,
2433     null,  // query defaults to null
2434     null,  // fragment defaults to null
2435   };
2436   SetArgs(env, argv, context_);
2437 
2438   MaybeLocal<Value> ret;
2439   {
2440     TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
2441 
2442     // The SetURLConstructor method must have been called already to
2443     // set the constructor function used below. SetURLConstructor is
2444     // called automatically when the internal/url.js module is loaded
2445     // during the internal/bootstrap/node.js processing.
2446     ret = env->url_constructor_function()
2447         ->Call(env->context(), undef, arraysize(argv), argv);
2448   }
2449 
2450   return ret;
2451 }
2452 
2453 }  // namespace url
2454 }  // namespace node
2455 
2456 NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
2457