• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "base/check.h"
11 #include "base/cpu_reduction_experiment.h"
12 #include "url/url_canon.h"
13 #include "url/url_canon_internal.h"
14 #include "url/url_features.h"
15 
16 namespace url {
17 
18 namespace {
19 
20 // This table lists the canonical version of all characters we allow in the
21 // input, with 0 indicating it is disallowed. We use the magic kEsc value to
22 // indicate that this character should be escaped. At present, ' ' (SPACE) and
23 // '*' (asterisk) are still non-compliant to the URL Standard. See
24 // https://crbug.com/1416013 for details.
25 const unsigned char kEsc = 0xff;
26 // clang-format off
27 const unsigned char kHostCharLookup[0x80] = {
28 // 00-1f: all are invalid
29      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
30      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
31 //  ' '   !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
32     kEsc,'!', '"',  0,  '$',  0,  '&', '\'','(', ')', kEsc, '+', ',', '-', '.',  0,
33 //   0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
34     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';' , 0,  '=',  0,   0,
35 //   @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
36      0,  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
37 //   P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
38     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[',  0,  ']',  0,  '_',
39 //   `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
40     '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
41 //   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
42     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{',  0, '}',  '~',  0 };
43 // clang-format on
44 
45 // https://url.spec.whatwg.org/#forbidden-host-code-point
46 const uint8_t kForbiddenHost = 0x1;
47 
48 // TODO(crbug.com/40063064): Merge other lookup tables into this table. That can
49 // be probably done after https://crbug.com/1416013 is resolved.
50 //
51 // This table is currently only used for an opaque-host in non-special URLs.
52 const uint8_t kHostCharacterTable[128] = {
53     kForbiddenHost,  // 0x00 (NUL)
54     0,               // 0x01
55     0,               // 0x02
56     0,               // 0x03
57     0,               // 0x04
58     0,               // 0x05
59     0,               // 0x06
60     0,               // 0x07
61     0,               // 0x08
62     kForbiddenHost,  // 0x09 (TAB)
63     kForbiddenHost,  // 0x0A (LF)
64     0,               // 0x0B
65     0,               // 0x0C
66     kForbiddenHost,  // 0x0D (CR)
67     0,               // 0x0E
68     0,               // 0x0F
69     0,               // 0x10
70     0,               // 0x11
71     0,               // 0x12
72     0,               // 0x13
73     0,               // 0x14
74     0,               // 0x15
75     0,               // 0x16
76     0,               // 0x17
77     0,               // 0x18
78     0,               // 0x19
79     0,               // 0x1A
80     0,               // 0x1B
81     0,               // 0x1C
82     0,               // 0x1D
83     0,               // 0x1E
84     0,               // 0x1F
85     kForbiddenHost,  // ' '
86     0,               // '!'
87     0,               // '"'
88     kForbiddenHost,  // '#'
89     0,               // '$'
90     0,               // '%'
91     0,               // '&'
92     0,               // '\''
93     0,               // '('
94     0,               // ')'
95     0,               // '*'
96     0,               // '+'
97     0,               // ','
98     0,               // '-'
99     0,               // '.'
100     kForbiddenHost,  // '/'
101     0,               // '0'
102     0,               // '1'
103     0,               // '2'
104     0,               // '3'
105     0,               // '4'
106     0,               // '5'
107     0,               // '6'
108     0,               // '7'
109     0,               // '8'
110     0,               // '9'
111     kForbiddenHost,  // ':'
112     0,               // ';'
113     kForbiddenHost,  // '<'
114     0,               // '='
115     kForbiddenHost,  // '>'
116     kForbiddenHost,  // '?'
117     kForbiddenHost,  // '@'
118     0,               // 'A'
119     0,               // 'B'
120     0,               // 'C'
121     0,               // 'D'
122     0,               // 'E'
123     0,               // 'F'
124     0,               // 'G'
125     0,               // 'H'
126     0,               // 'I'
127     0,               // 'J'
128     0,               // 'K'
129     0,               // 'L'
130     0,               // 'M'
131     0,               // 'N'
132     0,               // 'O'
133     0,               // 'P'
134     0,               // 'Q'
135     0,               // 'R'
136     0,               // 'S'
137     0,               // 'T'
138     0,               // 'U'
139     0,               // 'V'
140     0,               // 'W'
141     0,               // 'X'
142     0,               // 'Y'
143     0,               // 'Z'
144     kForbiddenHost,  // '['
145     kForbiddenHost,  // '\\'
146     kForbiddenHost,  // ']'
147     kForbiddenHost,  // '^'
148     0,               // '_'
149     0,               // '`'
150     0,               // 'a'
151     0,               // 'b'
152     0,               // 'c'
153     0,               // 'd'
154     0,               // 'e'
155     0,               // 'f'
156     0,               // 'g'
157     0,               // 'h'
158     0,               // 'i'
159     0,               // 'j'
160     0,               // 'k'
161     0,               // 'l'
162     0,               // 'm'
163     0,               // 'n'
164     0,               // 'o'
165     0,               // 'p'
166     0,               // 'q'
167     0,               // 'r'
168     0,               // 's'
169     0,               // 't'
170     0,               // 'u'
171     0,               // 'v'
172     0,               // 'w'
173     0,               // 'x'
174     0,               // 'y'
175     0,               // 'z'
176     0,               // '{'
177     kForbiddenHost,  // '|'
178     0,               // '}'
179     0,               // '~'
180     0,               // 0x7F (DEL)
181 };
182 // clang-format on
183 
IsForbiddenHostCodePoint(uint8_t ch)184 bool IsForbiddenHostCodePoint(uint8_t ch) {
185   return ch <= 0x7F && (kHostCharacterTable[ch] & kForbiddenHost);
186 }
187 
188 // RFC1034 maximum FQDN length.
189 constexpr size_t kMaxHostLength = 253;
190 
191 // Generous padding to account for the fact that UTS#46 normalization can cause
192 // a long string to actually shrink and fit within the 253 character RFC1034
193 // FQDN length limit. Note that this can still be too short for pathological
194 // cases: An arbitrary number of characters (e.g. U+00AD SOFT HYPHEN) can be
195 // removed from the input by UTS#46 processing. However, this should be
196 // sufficient for all normally-encountered, non-abusive hostname strings.
197 constexpr size_t kMaxHostBufferLength = kMaxHostLength * 5;
198 
199 constexpr size_t kTempHostBufferLen = 1024;
200 using StackBuffer = RawCanonOutputT<char, kTempHostBufferLen>;
201 using StackBufferW = RawCanonOutputT<char16_t, kTempHostBufferLen>;
202 
203 // Scans a host name and fills in the output flags according to what we find.
204 // |has_non_ascii| will be true if there are any non-7-bit characters, and
205 // |has_escaped| will be true if there is a percent sign.
206 template<typename CHAR, typename UCHAR>
ScanHostname(const CHAR * spec,const Component & host,bool * has_non_ascii,bool * has_escaped)207 void ScanHostname(const CHAR* spec,
208                   const Component& host,
209                   bool* has_non_ascii,
210                   bool* has_escaped) {
211   int end = host.end();
212   *has_non_ascii = false;
213   *has_escaped = false;
214   for (int i = host.begin; i < end; i++) {
215     if (static_cast<UCHAR>(spec[i]) >= 0x80)
216       *has_non_ascii = true;
217     else if (spec[i] == '%')
218       *has_escaped = true;
219   }
220 }
221 
222 // Canonicalizes a host name that is entirely 8-bit characters (even though
223 // the type holding them may be 16 bits. Escaped characters will be unescaped.
224 // Non-7-bit characters (for example, UTF-8) will be passed unchanged.
225 //
226 // The |*has_non_ascii| flag will be true if there are non-7-bit characters in
227 // the output.
228 //
229 // This function is used in two situations:
230 //
231 //  * When the caller knows there is no non-ASCII or percent escaped
232 //    characters. This is what DoHost does. The result will be a completely
233 //    canonicalized host since we know nothing weird can happen (escaped
234 //    characters could be unescaped to non-7-bit, so they have to be treated
235 //    with suspicion at this point). It does not use the |has_non_ascii| flag.
236 //
237 //  * When the caller has an 8-bit string that may need unescaping.
238 //    DoComplexHost calls us this situation to do unescaping and validation.
239 //    After this, it may do other IDN operations depending on the value of the
240 //    |*has_non_ascii| flag.
241 //
242 // The return value indicates if the output is a potentially valid host name.
243 template <CanonMode canon_mode, typename INCHAR, typename OUTCHAR>
DoSimpleHost(const INCHAR * host,size_t host_len,CanonOutputT<OUTCHAR> * output,bool * has_non_ascii)244 bool DoSimpleHost(const INCHAR* host,
245                   size_t host_len,
246                   CanonOutputT<OUTCHAR>* output,
247                   bool* has_non_ascii) {
248   *has_non_ascii = false;
249 
250   bool success = true;
251   for (size_t i = 0; i < host_len; ++i) {
252     unsigned int source = host[i];
253     if (source == '%') {
254       // Unescape first, if possible.
255       // Source will be used only if decode operation was successful.
256       if (!DecodeEscaped(host, &i, host_len,
257                          reinterpret_cast<unsigned char*>(&source))) {
258         // Invalid escaped character. There is nothing that can make this
259         // host valid. We append an escaped percent so the URL looks reasonable
260         // and mark as failed.
261         AppendEscapedChar('%', output);
262         success = false;
263         continue;
264       }
265     }
266 
267     if (source < 0x80) {
268       // We have ASCII input, we can use our lookup table.
269       unsigned char replacement = kHostCharLookup[source];
270       if (!replacement) {
271         // Invalid character, add it as percent-escaped and mark as failed.
272         AppendEscapedChar(source, output);
273         success = false;
274       } else if (replacement == kEsc) {
275         // This character is valid but should be escaped.
276         AppendEscapedChar(source, output);
277         if (source == ' ' &&
278             url::IsDisallowingSpaceCharacterInURLHostParsing() &&
279             canon_mode != CanonMode::kFileURL) {
280           success = false;
281         }
282       } else {
283         // Common case, the given character is valid in a hostname, the lookup
284         // table tells us the canonical representation of that character (lower
285         // cased).
286         output->push_back(replacement);
287       }
288     } else {
289       // It's a non-ascii char. Just push it to the output.
290       // In case where we have char16 input, and char output it's safe to
291       // cast char16->char only if input string was converted to ASCII.
292       output->push_back(static_cast<OUTCHAR>(source));
293       *has_non_ascii = true;
294     }
295   }
296   return success;
297 }
298 
299 // Canonicalizes a host that requires IDN conversion. Returns true on success
300 template <CanonMode canon_mode>
DoIDNHost(const char16_t * src,size_t src_len,CanonOutput * output)301 bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
302   int original_output_len = output->length();  // So we can rewind below.
303 
304   // We need to escape URL before doing IDN conversion, since punicode strings
305   // cannot be escaped after they are created.
306   RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
307   bool has_non_ascii;
308   DoSimpleHost<canon_mode>(src, src_len, &url_escaped_host, &has_non_ascii);
309   if (url_escaped_host.length() > kMaxHostBufferLength) {
310     AppendInvalidNarrowString(src, 0, src_len, output);
311     return false;
312   }
313 
314   StackBufferW wide_output;
315   if (!IDNToASCII(url_escaped_host.view(), &wide_output)) {
316     // Some error, give up. This will write some reasonable looking
317     // representation of the string to the output.
318     AppendInvalidNarrowString(src, 0, src_len, output);
319     return false;
320   }
321 
322   // Now we check the ASCII output like a normal host. It will also handle
323   // unescaping. Although we unescaped everything before this function call, if
324   // somebody does %00 as fullwidth, ICU will convert this to ASCII.
325   bool success = DoSimpleHost<canon_mode>(
326       wide_output.data(), wide_output.length(), output, &has_non_ascii);
327   if (has_non_ascii) {
328     // ICU generated something that DoSimpleHost didn't think looked like
329     // ASCII. This is quite rare, but ICU might convert some characters to
330     // percent signs which might generate new escape sequences which might in
331     // turn be invalid. An example is U+FE6A "small percent" which ICU will
332     // name prep into an ASCII percent and then we can interpret the following
333     // characters as escaped characters.
334     //
335     // If DoSimpleHost didn't think the output was ASCII, just escape the
336     // thing we gave ICU and give up. DoSimpleHost will have handled a further
337     // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates
338     // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't
339     // do more (like handle escaped non-ASCII sequences). Handling the escaped
340     // ASCII isn't strictly necessary, but DoSimpleHost handles this case
341     // anyway so we handle it/
342     output->set_length(original_output_len);
343     AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
344                               output);
345     return false;
346   }
347   return success;
348 }
349 
350 // 8-bit convert host to its ASCII version: this converts the UTF-8 input to
351 // UTF-16. The has_escaped flag should be set if the input string requires
352 // unescaping.
353 template <CanonMode canon_mode>
DoComplexHost(const char * host,size_t host_len,bool has_non_ascii,bool has_escaped,CanonOutput * output)354 bool DoComplexHost(const char* host,
355                    size_t host_len,
356                    bool has_non_ascii,
357                    bool has_escaped,
358                    CanonOutput* output) {
359   // Save the current position in the output. We may write stuff and rewind it
360   // below, so we need to know where to rewind to.
361   size_t begin_length = output->length();
362 
363   // Points to the UTF-8 data we want to convert. This will either be the
364   // input or the unescaped version written to |*output| if necessary.
365   const char* utf8_source;
366   size_t utf8_source_len;
367   bool are_all_escaped_valid = true;
368   if (has_escaped) {
369     // Unescape before converting to UTF-16 for IDN. We write this into the
370     // output because it most likely does not require IDNization, and we can
371     // save another huge stack buffer. It will be replaced below if it requires
372     // IDN. This will also update our non-ASCII flag so we know whether the
373     // unescaped input requires IDN.
374     if (!DoSimpleHost<canon_mode>(host, host_len, output, &has_non_ascii)) {
375       // Error with some escape sequence. We'll call the current output
376       // complete. DoSimpleHost will have written some "reasonable" output
377       // for the invalid escapes, but the output could be non-ASCII and
378       // needs to go through re-encoding below.
379       are_all_escaped_valid = false;
380     }
381 
382     // Unescaping may have left us with ASCII input, in which case the
383     // unescaped version we wrote to output is complete.
384     if (!has_non_ascii) {
385       return are_all_escaped_valid;
386     }
387 
388     // Save the pointer into the data was just converted (it may be appended to
389     // other data in the output buffer).
390     utf8_source = &output->data()[begin_length];
391     utf8_source_len = output->length() - begin_length;
392   } else {
393     // We don't need to unescape, use input for IDNization later. (We know the
394     // input has non-ASCII, or the simple version would have been called
395     // instead of us.)
396     utf8_source = host;
397     utf8_source_len = host_len;
398   }
399 
400   // Non-ASCII input requires IDN, convert to UTF-16 and do the IDN conversion.
401   // Above, we may have used the output to write the unescaped values to, so
402   // we have to rewind it to where we started after we convert it to UTF-16.
403   StackBufferW utf16;
404   if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
405     // In this error case, the input may or may not be the output.
406     StackBuffer utf8;
407     for (size_t i = 0; i < utf8_source_len; i++)
408       utf8.push_back(utf8_source[i]);
409     output->set_length(begin_length);
410     AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
411     return false;
412   }
413   output->set_length(begin_length);
414 
415   // This will call DoSimpleHost which will do normal ASCII canonicalization
416   // and also check for IP addresses in the outpt.
417   return DoIDNHost<canon_mode>(utf16.data(), utf16.length(), output) &&
418          are_all_escaped_valid;
419 }
420 
421 // UTF-16 convert host to its ASCII version. The set up is already ready for
422 // the backend, so we just pass through. The has_escaped flag should be set if
423 // the input string requires unescaping.
424 template <CanonMode canon_mode>
DoComplexHost(const char16_t * host,size_t host_len,bool has_non_ascii,bool has_escaped,CanonOutput * output)425 bool DoComplexHost(const char16_t* host,
426                    size_t host_len,
427                    bool has_non_ascii,
428                    bool has_escaped,
429                    CanonOutput* output) {
430   if (has_escaped) {
431     // Yikes, we have escaped characters with wide input. The escaped
432     // characters should be interpreted as UTF-8. To solve this problem,
433     // we convert to UTF-8, unescape, then convert back to UTF-16 for IDN.
434     //
435     // We don't bother to optimize the conversion in the ASCII case (which
436     // *could* just be a copy) and use the UTF-8 path, because it should be
437     // very rare that host names have escaped characters, and it is relatively
438     // fast to do the conversion anyway.
439     StackBuffer utf8;
440     if (!ConvertUTF16ToUTF8(host, host_len, &utf8)) {
441       AppendInvalidNarrowString(host, 0, host_len, output);
442       return false;
443     }
444 
445     // Once we convert to UTF-8, we can use the 8-bit version of the complex
446     // host handling code above.
447     return DoComplexHost<canon_mode>(utf8.data(), utf8.length(), has_non_ascii,
448                                      has_escaped, output);
449   }
450 
451   // No unescaping necessary, we can safely pass the input to ICU. This
452   // function will only get called if we either have escaped or non-ascii
453   // input, so it's safe to just use ICU now. Even if the input is ASCII,
454   // this function will do the right thing (just slower than we could).
455   return DoIDNHost<canon_mode>(host, host_len, output);
456 }
457 
458 template <typename CHAR, typename UCHAR, CanonMode canon_mode>
DoHostSubstring(const CHAR * spec,const Component & host,CanonOutput * output)459 bool DoHostSubstring(const CHAR* spec,
460                      const Component& host,
461                      CanonOutput* output) {
462   DCHECK(host.is_valid());
463 
464   bool has_non_ascii, has_escaped;
465   ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
466 
467   if (has_non_ascii || has_escaped) {
468     return DoComplexHost<canon_mode>(&spec[host.begin],
469                                      static_cast<size_t>(host.len),
470                                      has_non_ascii, has_escaped, output);
471   }
472 
473   const bool success = DoSimpleHost<canon_mode>(
474       &spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
475   DCHECK(!has_non_ascii);
476   return success;
477 }
478 
479 template <typename CharT>
DoOpaqueHost(const std::basic_string_view<CharT> host,CanonOutput & output)480 bool DoOpaqueHost(const std::basic_string_view<CharT> host,
481                   CanonOutput& output) {
482   // URL Standard: https://url.spec.whatwg.org/#concept-opaque-host-parser
483 
484   size_t host_len = host.size();
485 
486   for (size_t i = 0; i < host_len; ++i) {
487     char16_t ch = host[i];
488     // The characters '[', ':', and ']', are checked later in
489     // `CanonicalizeIPv6Address` function.
490     if (ch != '[' && ch != ']' && ch != ':' && IsForbiddenHostCodePoint(ch)) {
491       return false;
492     }
493 
494     // Implementation note:
495     //
496     // URL Standard: Step 3 in
497     // https://url.spec.whatwg.org/#concept-opaque-host-parser
498     //
499     // > 3. If input contains a U+0025 (%) and the two code points following
500     // > it are not ASCII hex digits, invalid-URL-unit validation error.
501     //
502     // `invalid-URL-unit` is NOT marked as failure. We don't need to consider
503     // step 3 here.
504 
505     // URL Standard: Step 4 in
506     // https://url.spec.whatwg.org/#concept-opaque-host-parser
507     //
508     // > 4. Return the result of running UTF-8 percent-encode on input using
509     // > the C0 control percent-encode set.
510     if (IsInC0ControlPercentEncodeSet(ch)) {
511       AppendUTF8EscapedChar(host.data(), &i, host_len, &output);
512     } else {
513       output.push_back(ch);
514     }
515   }
516   return true;
517 }
518 
519 template <typename CHAR, typename UCHAR, CanonMode canon_mode>
DoHost(const CHAR * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)520 void DoHost(const CHAR* spec,
521             const Component& host,
522             CanonOutput& output,
523             CanonHostInfo& host_info) {
524   // URL Standard: https://url.spec.whatwg.org/#host-parsing
525 
526   // Keep track of output's initial length, so we can rewind later.
527   const int output_begin = output.length();
528 
529   if (host.is_empty()) {
530     // Empty hosts don't need anything.
531     host_info.family = CanonHostInfo::NEUTRAL;
532     // Carry over the valid empty host for non-special URLs.
533     //
534     // Component(0, 0) should be considered invalid here for historical reasons.
535     //
536     // TODO(crbug.com/40063064): Update the callers so that they don't pass
537     // Component(0, 0) as an invalid `host`.
538     if (host.begin != 0 && host.len == 0) {
539       host_info.out_host = Component(output_begin, 0);
540     } else {
541       host_info.out_host = Component();
542     }
543     return;
544   }
545 
546   bool success;
547   if constexpr (canon_mode == CanonMode::kSpecialURL ||
548                 canon_mode == CanonMode::kFileURL) {
549     success = DoHostSubstring<CHAR, UCHAR, canon_mode>(spec, host, &output);
550   } else {
551     // URL Standard: https://url.spec.whatwg.org/#concept-opaque-host-parser
552     success = DoOpaqueHost(host.as_string_view_on(spec), output);
553   }
554 
555   if (success) {
556     // After all the other canonicalization, check if we ended up with an IP
557     // address. IP addresses are small, so writing into this temporary buffer
558     // should not cause an allocation.
559     RawCanonOutput<64> canon_ip;
560 
561     if constexpr (canon_mode == CanonMode::kSpecialURL ||
562                   canon_mode == CanonMode::kFileURL) {
563       CanonicalizeIPAddress(output.data(),
564                             MakeRange(output_begin, output.length()), &canon_ip,
565                             &host_info);
566     } else {
567       // Non-special URLs support only IPv6.
568       CanonicalizeIPv6Address(output.data(),
569                               MakeRange(output_begin, output.length()),
570                               canon_ip, host_info);
571     }
572 
573     // If we got an IPv4/IPv6 address, copy the canonical form back to the
574     // real buffer. Otherwise, it's a hostname or broken IP, in which case
575     // we just leave it in place.
576     if (host_info.IsIPAddress()) {
577       output.set_length(output_begin);
578       output.Append(canon_ip.view());
579     }
580   } else {
581     // Canonicalization failed. Set BROKEN to notify the caller.
582     host_info.family = CanonHostInfo::BROKEN;
583   }
584   host_info.out_host = MakeRange(output_begin, output.length());
585 }
586 
587 }  // namespace
588 
CanonicalizeHost(const char * spec,const Component & host,CanonOutput * output,Component * out_host)589 bool CanonicalizeHost(const char* spec,
590                       const Component& host,
591                       CanonOutput* output,
592                       Component* out_host) {
593   DCHECK(output);
594   DCHECK(out_host);
595   return CanonicalizeSpecialHost(spec, host, *output, *out_host);
596 }
597 
CanonicalizeHost(const char16_t * spec,const Component & host,CanonOutput * output,Component * out_host)598 bool CanonicalizeHost(const char16_t* spec,
599                       const Component& host,
600                       CanonOutput* output,
601                       Component* out_host) {
602   DCHECK(output);
603   DCHECK(out_host);
604   return CanonicalizeSpecialHost(spec, host, *output, *out_host);
605 }
606 
CanonicalizeSpecialHost(const char * spec,const Component & host,CanonOutput & output,Component & out_host)607 bool CanonicalizeSpecialHost(const char* spec,
608                              const Component& host,
609                              CanonOutput& output,
610                              Component& out_host) {
611   CanonHostInfo host_info;
612   DoHost<char, unsigned char, CanonMode::kSpecialURL>(spec, host, output,
613                                                       host_info);
614   out_host = host_info.out_host;
615   return (host_info.family != CanonHostInfo::BROKEN);
616 }
617 
CanonicalizeSpecialHost(const char16_t * spec,const Component & host,CanonOutput & output,Component & out_host)618 bool CanonicalizeSpecialHost(const char16_t* spec,
619                              const Component& host,
620                              CanonOutput& output,
621                              Component& out_host) {
622   CanonHostInfo host_info;
623   DoHost<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host, output,
624                                                      host_info);
625   out_host = host_info.out_host;
626   return (host_info.family != CanonHostInfo::BROKEN);
627 }
628 
CanonicalizeFileHost(const char * spec,const Component & host,CanonOutput & output,Component & out_host)629 bool CanonicalizeFileHost(const char* spec,
630                           const Component& host,
631                           CanonOutput& output,
632                           Component& out_host) {
633   CanonHostInfo host_info;
634   DoHost<char, unsigned char, CanonMode::kFileURL>(spec, host, output,
635                                                    host_info);
636   out_host = host_info.out_host;
637   return (host_info.family != CanonHostInfo::BROKEN);
638 }
639 
CanonicalizeFileHost(const char16_t * spec,const Component & host,CanonOutput & output,Component & out_host)640 bool CanonicalizeFileHost(const char16_t* spec,
641                           const Component& host,
642                           CanonOutput& output,
643                           Component& out_host) {
644   CanonHostInfo host_info;
645   DoHost<char16_t, char16_t, CanonMode::kFileURL>(spec, host, output,
646                                                   host_info);
647   out_host = host_info.out_host;
648   return (host_info.family != CanonHostInfo::BROKEN);
649 }
650 
CanonicalizeNonSpecialHost(const char * spec,const Component & host,CanonOutput & output,Component & out_host)651 bool CanonicalizeNonSpecialHost(const char* spec,
652                                 const Component& host,
653                                 CanonOutput& output,
654                                 Component& out_host) {
655   CanonHostInfo host_info;
656   DoHost<char, unsigned char, CanonMode::kNonSpecialURL>(spec, host, output,
657                                                          host_info);
658   out_host = host_info.out_host;
659   return (host_info.family != CanonHostInfo::BROKEN);
660 }
661 
CanonicalizeNonSpecialHost(const char16_t * spec,const Component & host,CanonOutput & output,Component & out_host)662 bool CanonicalizeNonSpecialHost(const char16_t* spec,
663                                 const Component& host,
664                                 CanonOutput& output,
665                                 Component& out_host) {
666   CanonHostInfo host_info;
667   DoHost<char16_t, char16_t, CanonMode::kNonSpecialURL>(spec, host, output,
668                                                         host_info);
669   out_host = host_info.out_host;
670   return (host_info.family != CanonHostInfo::BROKEN);
671 }
672 
CanonicalizeHostVerbose(const char * spec,const Component & host,CanonOutput * output,CanonHostInfo * host_info)673 void CanonicalizeHostVerbose(const char* spec,
674                              const Component& host,
675                              CanonOutput* output,
676                              CanonHostInfo* host_info) {
677   DCHECK(output);
678   DCHECK(host_info);
679   CanonicalizeSpecialHostVerbose(spec, host, *output, *host_info);
680 }
681 
CanonicalizeHostVerbose(const char16_t * spec,const Component & host,CanonOutput * output,CanonHostInfo * host_info)682 void CanonicalizeHostVerbose(const char16_t* spec,
683                              const Component& host,
684                              CanonOutput* output,
685                              CanonHostInfo* host_info) {
686   DCHECK(output);
687   DCHECK(host_info);
688   CanonicalizeSpecialHostVerbose(spec, host, *output, *host_info);
689 }
690 
CanonicalizeSpecialHostVerbose(const char * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)691 void CanonicalizeSpecialHostVerbose(const char* spec,
692                                     const Component& host,
693                                     CanonOutput& output,
694                                     CanonHostInfo& host_info) {
695   DoHost<char, unsigned char, CanonMode::kSpecialURL>(spec, host, output,
696                                                       host_info);
697 }
698 
CanonicalizeSpecialHostVerbose(const char16_t * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)699 void CanonicalizeSpecialHostVerbose(const char16_t* spec,
700                                     const Component& host,
701                                     CanonOutput& output,
702                                     CanonHostInfo& host_info) {
703   DoHost<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host, output,
704                                                      host_info);
705 }
706 
CanonicalizeFileHostVerbose(const char * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)707 void CanonicalizeFileHostVerbose(const char* spec,
708                                  const Component& host,
709                                  CanonOutput& output,
710                                  CanonHostInfo& host_info) {
711   DoHost<char, unsigned char, CanonMode::kFileURL>(spec, host, output,
712                                                    host_info);
713 }
714 
CanonicalizeFileHostVerbose(const char16_t * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)715 void CanonicalizeFileHostVerbose(const char16_t* spec,
716                                  const Component& host,
717                                  CanonOutput& output,
718                                  CanonHostInfo& host_info) {
719   DoHost<char16_t, char16_t, CanonMode::kFileURL>(spec, host, output,
720                                                   host_info);
721 }
722 
CanonicalizeHostSubstring(const char * spec,const Component & host,CanonOutput * output)723 bool CanonicalizeHostSubstring(const char* spec,
724                                const Component& host,
725                                CanonOutput* output) {
726   return DoHostSubstring<char, unsigned char, CanonMode::kSpecialURL>(
727       spec, host, output);
728 }
729 
CanonicalizeHostSubstring(const char16_t * spec,const Component & host,CanonOutput * output)730 bool CanonicalizeHostSubstring(const char16_t* spec,
731                                const Component& host,
732                                CanonOutput* output) {
733   return DoHostSubstring<char16_t, char16_t, CanonMode::kSpecialURL>(spec, host,
734                                                                      output);
735 }
736 
CanonicalizeNonSpecialHostVerbose(const char * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)737 void CanonicalizeNonSpecialHostVerbose(const char* spec,
738                                        const Component& host,
739                                        CanonOutput& output,
740                                        CanonHostInfo& host_info) {
741   DoHost<char, unsigned char, CanonMode::kNonSpecialURL>(spec, host, output,
742                                                          host_info);
743 }
744 
CanonicalizeNonSpecialHostVerbose(const char16_t * spec,const Component & host,CanonOutput & output,CanonHostInfo & host_info)745 void CanonicalizeNonSpecialHostVerbose(const char16_t* spec,
746                                        const Component& host,
747                                        CanonOutput& output,
748                                        CanonHostInfo& host_info) {
749   DoHost<char16_t, char16_t, CanonMode::kNonSpecialURL>(spec, host, output,
750                                                         host_info);
751 }
752 
753 }  // namespace url
754