• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "url/url_canon_internal.h"
6 
7 #include <errno.h>
8 #include <stddef.h>
9 #include <stdlib.h>
10 #ifdef __SSE2__
11 #include <immintrin.h>
12 #elif defined(__aarch64__)
13 #include <arm_neon.h>
14 #endif
15 
16 #include <cstdio>
17 #include <string>
18 
19 #include "base/bits.h"
20 #include "base/numerics/safe_conversions.h"
21 #include "base/strings/utf_string_conversion_utils.h"
22 
23 namespace url {
24 
25 namespace {
26 
27 // Find the initial segment of the given string that consists solely
28 // of characters valid for CHAR_QUERY. (We can have false negatives in
29 // one specific case, namely the exclamation mark 0x21, but false negatives
30 // are fine, and it's not worth adding a separate test for.) This is
31 // a fast path to speed up checking of very long query strings that are
32 // already valid, which happen on some web pages.
33 //
34 // This has some startup cost to load the constants and such, so it's
35 // usually not worth it for short strings.
FindInitialQuerySafeString(const char * source,size_t length)36 size_t FindInitialQuerySafeString(const char* source, size_t length) {
37 #if defined(__SSE2__) || defined(__aarch64__)
38   constexpr size_t kChunkSize = 16;
39   size_t i;
40   for (i = 0; i < base::bits::AlignDown(length, kChunkSize); i += kChunkSize) {
41     char b __attribute__((vector_size(16)));
42     memcpy(&b, source + i, sizeof(b));
43 
44     // Compare each element with the ranges for CHAR_QUERY
45     // (see kSharedCharTypeTable), vectorized so that it creates
46     // a mask of which elements match. For completeness, we could
47     // have had (...) | b == 0x21 here, but exclamation marks are
48     // rare and the extra test costs us some time.
49     auto mask = b >= 0x24 && b <= 0x7e && b != 0x27 && b != 0x3c && b != 0x3e;
50 
51 #ifdef __SSE2__
52     if (_mm_movemask_epi8(reinterpret_cast<__m128i>(mask)) != 0xffff) {
53       return i;
54     }
55 #else
56     if (vminvq_u8(reinterpret_cast<uint8x16_t>(mask)) == 0) {
57       return i;
58     }
59 #endif
60   }
61   return i;
62 #else
63   // Need SIMD support (with fast reductions) for this to be efficient.
64   return 0;
65 #endif
66 }
67 
68 template <typename CHAR, typename UCHAR>
DoAppendStringOfType(const CHAR * source,size_t length,SharedCharTypes type,CanonOutput * output)69 void DoAppendStringOfType(const CHAR* source,
70                           size_t length,
71                           SharedCharTypes type,
72                           CanonOutput* output) {
73   size_t i = 0;
74   // We only instantiate this for char, to avoid a Clang crash
75   // (and because Append() does not support converting).
76   if constexpr (sizeof(CHAR) == 1) {
77     if (type == CHAR_QUERY && length >= kMinimumLengthForSIMD) {
78       i = FindInitialQuerySafeString(source, length);
79       output->Append(source, i);
80     }
81   }
82   for (; i < length; i++) {
83     if (static_cast<UCHAR>(source[i]) >= 0x80) {
84       // ReadChar will fill the code point with kUnicodeReplacementCharacter
85       // when the input is invalid, which is what we want.
86       base_icu::UChar32 code_point;
87       ReadUTFChar(source, &i, length, &code_point);
88       AppendUTF8EscapedValue(code_point, output);
89     } else {
90       // Just append the 7-bit character, possibly escaping it.
91       unsigned char uch = static_cast<unsigned char>(source[i]);
92       if (!IsCharOfType(uch, type))
93         AppendEscapedChar(uch, output);
94       else
95         output->push_back(uch);
96     }
97   }
98 }
99 
100 // This function assumes the input values are all contained in 8-bit,
101 // although it allows any type. Returns true if input is valid, false if not.
102 template <typename CHAR, typename UCHAR>
DoAppendInvalidNarrowString(const CHAR * spec,size_t begin,size_t end,CanonOutput * output)103 void DoAppendInvalidNarrowString(const CHAR* spec,
104                                  size_t begin,
105                                  size_t end,
106                                  CanonOutput* output) {
107   for (size_t i = begin; i < end; i++) {
108     UCHAR uch = static_cast<UCHAR>(spec[i]);
109     if (uch >= 0x80) {
110       // Handle UTF-8/16 encodings. This call will correctly handle the error
111       // case by appending the invalid character.
112       AppendUTF8EscapedChar(spec, &i, end, output);
113     } else if (uch <= ' ' || uch == 0x7f) {
114       // This function is for error handling, so we escape all control
115       // characters and spaces, but not anything else since we lack
116       // context to do something more specific.
117       AppendEscapedChar(static_cast<unsigned char>(uch), output);
118     } else {
119       output->push_back(static_cast<char>(uch));
120     }
121   }
122 }
123 
124 // Overrides one component, see the Replacements structure for
125 // what the various combionations of source pointer and component mean.
DoOverrideComponent(const char * override_source,const Component & override_component,const char ** dest,Component * dest_component)126 void DoOverrideComponent(const char* override_source,
127                          const Component& override_component,
128                          const char** dest,
129                          Component* dest_component) {
130   if (override_source) {
131     *dest = override_source;
132     *dest_component = override_component;
133   }
134 }
135 
136 // Similar to DoOverrideComponent except that it takes a UTF-16 input and does
137 // not actually set the output character pointer.
138 //
139 // The input is converted to UTF-8 at the end of the given buffer as a temporary
140 // holding place. The component identifying the portion of the buffer used in
141 // the |utf8_buffer| will be specified in |*dest_component|.
142 //
143 // This will not actually set any |dest| pointer like DoOverrideComponent
144 // does because all of the pointers will point into the |utf8_buffer|, which
145 // may get resized while we're overriding a subsequent component. Instead, the
146 // caller should use the beginning of the |utf8_buffer| as the string pointer
147 // for all components once all overrides have been prepared.
PrepareUTF16OverrideComponent(const char16_t * override_source,const Component & override_component,CanonOutput * utf8_buffer,Component * dest_component)148 bool PrepareUTF16OverrideComponent(const char16_t* override_source,
149                                    const Component& override_component,
150                                    CanonOutput* utf8_buffer,
151                                    Component* dest_component) {
152   bool success = true;
153   if (override_source) {
154     if (!override_component.is_valid()) {
155       // Non-"valid" component (means delete), so we need to preserve that.
156       *dest_component = Component();
157     } else {
158       // Convert to UTF-8.
159       dest_component->begin = utf8_buffer->length();
160       success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
161                                    static_cast<size_t>(override_component.len),
162                                    utf8_buffer);
163       dest_component->len = utf8_buffer->length() - dest_component->begin;
164     }
165   }
166   return success;
167 }
168 
169 }  // namespace
170 
171 // See the header file for this array's declaration.
172 // clang-format off
173 const unsigned char kSharedCharTypeTable[0x100] = {
174     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
175     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
176     0,                           // 0x20  ' ' (escape spaces in queries)
177     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x21  !
178     0,                           // 0x22  "
179     0,                           // 0x23  #  (invalid in query since it marks the ref)
180     CHAR_QUERY | CHAR_USERINFO,  // 0x24  $
181     CHAR_QUERY | CHAR_USERINFO,  // 0x25  %
182     CHAR_QUERY | CHAR_USERINFO,  // 0x26  &
183     0,                           // 0x27  '  (Try to prevent XSS.)
184     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x28  (
185     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x29  )
186     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2a  *
187     CHAR_QUERY | CHAR_USERINFO,  // 0x2b  +
188     CHAR_QUERY | CHAR_USERINFO,  // 0x2c  ,
189     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2d  -
190     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x2e  .
191     CHAR_QUERY,                  // 0x2f  /
192     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x30  0
193     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x31  1
194     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x32  2
195     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x33  3
196     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x34  4
197     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x35  5
198     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x36  6
199     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x37  7
200     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x38  8
201     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x39  9
202     CHAR_QUERY,  // 0x3a  :
203     CHAR_QUERY,  // 0x3b  ;
204     0,           // 0x3c  <  (Try to prevent certain types of XSS.)
205     CHAR_QUERY,  // 0x3d  =
206     0,           // 0x3e  >  (Try to prevent certain types of XSS.)
207     CHAR_QUERY,  // 0x3f  ?
208     CHAR_QUERY,  // 0x40  @
209     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x41  A
210     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x42  B
211     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x43  C
212     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x44  D
213     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x45  E
214     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x46  F
215     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x47  G
216     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x48  H
217     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x49  I
218     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4a  J
219     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4b  K
220     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4c  L
221     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4d  M
222     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4e  N
223     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4f  O
224     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x50  P
225     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x51  Q
226     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x52  R
227     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x53  S
228     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x54  T
229     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x55  U
230     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x56  V
231     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x57  W
232     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58  X
233     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x59  Y
234     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5a  Z
235     CHAR_QUERY,  // 0x5b  [
236     CHAR_QUERY,  // 0x5c  '\'
237     CHAR_QUERY,  // 0x5d  ]
238     CHAR_QUERY,  // 0x5e  ^
239     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5f  _
240     CHAR_QUERY,  // 0x60  `
241     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x61  a
242     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x62  b
243     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x63  c
244     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x64  d
245     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x65  e
246     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x66  f
247     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x67  g
248     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x68  h
249     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x69  i
250     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6a  j
251     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6b  k
252     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6c  l
253     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6d  m
254     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6e  n
255     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6f  o
256     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x70  p
257     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x71  q
258     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x72  r
259     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x73  s
260     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x74  t
261     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x75  u
262     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x76  v
263     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x77  w
264     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x78  x
265     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x79  y
266     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7a  z
267     CHAR_QUERY,  // 0x7b  {
268     CHAR_QUERY,  // 0x7c  |
269     CHAR_QUERY,  // 0x7d  }
270     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7e  ~
271     0,           // 0x7f
272     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x80 - 0x8f
273     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x90 - 0x9f
274     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xa0 - 0xaf
275     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xb0 - 0xbf
276     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xc0 - 0xcf
277     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xd0 - 0xdf
278     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xe0 - 0xef
279     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
280 };
281 // clang-format on
282 
283 const char kHexCharLookup[0x10] = {
284     '0', '1', '2', '3', '4', '5', '6', '7',
285     '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
286 };
287 
288 const char kCharToHexLookup[8] = {
289     0,         // 0x00 - 0x1f
290     '0',       // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
291     'A' - 10,  // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
292     'a' - 10,  // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
293     0,         // 0x80 - 0x9F
294     0,         // 0xA0 - 0xBF
295     0,         // 0xC0 - 0xDF
296     0,         // 0xE0 - 0xFF
297 };
298 
299 const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
300 
AppendStringOfType(const char * source,size_t length,SharedCharTypes type,CanonOutput * output)301 void AppendStringOfType(const char* source,
302                         size_t length,
303                         SharedCharTypes type,
304                         CanonOutput* output) {
305   DoAppendStringOfType<char, unsigned char>(source, length, type, output);
306 }
307 
AppendStringOfType(const char16_t * source,size_t length,SharedCharTypes type,CanonOutput * output)308 void AppendStringOfType(const char16_t* source,
309                         size_t length,
310                         SharedCharTypes type,
311                         CanonOutput* output) {
312   DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
313 }
314 
ReadUTFChar(const char * str,size_t * begin,size_t length,base_icu::UChar32 * code_point_out)315 bool ReadUTFChar(const char* str,
316                  size_t* begin,
317                  size_t length,
318                  base_icu::UChar32* code_point_out) {
319   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
320       !base::IsValidCharacter(*code_point_out)) {
321     *code_point_out = kUnicodeReplacementCharacter;
322     return false;
323   }
324   return true;
325 }
326 
ReadUTFChar(const char16_t * str,size_t * begin,size_t length,base_icu::UChar32 * code_point_out)327 bool ReadUTFChar(const char16_t* str,
328                  size_t* begin,
329                  size_t length,
330                  base_icu::UChar32* code_point_out) {
331   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
332       !base::IsValidCharacter(*code_point_out)) {
333     *code_point_out = kUnicodeReplacementCharacter;
334     return false;
335   }
336   return true;
337 }
338 
AppendInvalidNarrowString(const char * spec,size_t begin,size_t end,CanonOutput * output)339 void AppendInvalidNarrowString(const char* spec,
340                                size_t begin,
341                                size_t end,
342                                CanonOutput* output) {
343   DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
344 }
345 
AppendInvalidNarrowString(const char16_t * spec,size_t begin,size_t end,CanonOutput * output)346 void AppendInvalidNarrowString(const char16_t* spec,
347                                size_t begin,
348                                size_t end,
349                                CanonOutput* output) {
350   DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
351 }
352 
ConvertUTF16ToUTF8(const char16_t * input,size_t input_len,CanonOutput * output)353 bool ConvertUTF16ToUTF8(const char16_t* input,
354                         size_t input_len,
355                         CanonOutput* output) {
356   bool success = true;
357   for (size_t i = 0; i < input_len; i++) {
358     base_icu::UChar32 code_point;
359     success &= ReadUTFChar(input, &i, input_len, &code_point);
360     AppendUTF8Value(code_point, output);
361   }
362   return success;
363 }
364 
ConvertUTF8ToUTF16(const char * input,size_t input_len,CanonOutputT<char16_t> * output)365 bool ConvertUTF8ToUTF16(const char* input,
366                         size_t input_len,
367                         CanonOutputT<char16_t>* output) {
368   bool success = true;
369   for (size_t i = 0; i < input_len; i++) {
370     base_icu::UChar32 code_point;
371     success &= ReadUTFChar(input, &i, input_len, &code_point);
372     AppendUTF16Value(code_point, output);
373   }
374   return success;
375 }
376 
SetupOverrideComponents(const char * base,const Replacements<char> & repl,URLComponentSource<char> * source,Parsed * parsed)377 void SetupOverrideComponents(const char* base,
378                              const Replacements<char>& repl,
379                              URLComponentSource<char>* source,
380                              Parsed* parsed) {
381   // Get the source and parsed structures of the things we are replacing.
382   const URLComponentSource<char>& repl_source = repl.sources();
383   const Parsed& repl_parsed = repl.components();
384 
385   DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, &source->scheme,
386                       &parsed->scheme);
387   DoOverrideComponent(repl_source.username, repl_parsed.username,
388                       &source->username, &parsed->username);
389   DoOverrideComponent(repl_source.password, repl_parsed.password,
390                       &source->password, &parsed->password);
391 
392   // Our host should be empty if not present, so override the default setup.
393   DoOverrideComponent(repl_source.host, repl_parsed.host, &source->host,
394                       &parsed->host);
395   if (parsed->host.len == -1)
396     parsed->host.len = 0;
397 
398   DoOverrideComponent(repl_source.port, repl_parsed.port, &source->port,
399                       &parsed->port);
400   DoOverrideComponent(repl_source.path, repl_parsed.path, &source->path,
401                       &parsed->path);
402   DoOverrideComponent(repl_source.query, repl_parsed.query, &source->query,
403                       &parsed->query);
404   DoOverrideComponent(repl_source.ref, repl_parsed.ref, &source->ref,
405                       &parsed->ref);
406 }
407 
SetupUTF16OverrideComponents(const char * base,const Replacements<char16_t> & repl,CanonOutput * utf8_buffer,URLComponentSource<char> * source,Parsed * parsed)408 bool SetupUTF16OverrideComponents(const char* base,
409                                   const Replacements<char16_t>& repl,
410                                   CanonOutput* utf8_buffer,
411                                   URLComponentSource<char>* source,
412                                   Parsed* parsed) {
413   bool success = true;
414 
415   // Get the source and parsed structures of the things we are replacing.
416   const URLComponentSource<char16_t>& repl_source = repl.sources();
417   const Parsed& repl_parsed = repl.components();
418 
419   success &= PrepareUTF16OverrideComponent(
420       repl_source.scheme, repl_parsed.scheme, utf8_buffer, &parsed->scheme);
421   success &=
422       PrepareUTF16OverrideComponent(repl_source.username, repl_parsed.username,
423                                     utf8_buffer, &parsed->username);
424   success &=
425       PrepareUTF16OverrideComponent(repl_source.password, repl_parsed.password,
426                                     utf8_buffer, &parsed->password);
427   success &= PrepareUTF16OverrideComponent(repl_source.host, repl_parsed.host,
428                                            utf8_buffer, &parsed->host);
429   success &= PrepareUTF16OverrideComponent(repl_source.port, repl_parsed.port,
430                                            utf8_buffer, &parsed->port);
431   success &= PrepareUTF16OverrideComponent(repl_source.path, repl_parsed.path,
432                                            utf8_buffer, &parsed->path);
433   success &= PrepareUTF16OverrideComponent(repl_source.query, repl_parsed.query,
434                                            utf8_buffer, &parsed->query);
435   success &= PrepareUTF16OverrideComponent(repl_source.ref, repl_parsed.ref,
436                                            utf8_buffer, &parsed->ref);
437 
438   // PrepareUTF16OverrideComponent will not have set the data pointer since the
439   // buffer could be resized, invalidating the pointers. We set the data
440   // pointers for affected components now that the buffer is finalized.
441   if (repl_source.scheme)
442     source->scheme = utf8_buffer->data();
443   if (repl_source.username)
444     source->username = utf8_buffer->data();
445   if (repl_source.password)
446     source->password = utf8_buffer->data();
447   if (repl_source.host)
448     source->host = utf8_buffer->data();
449   if (repl_source.port)
450     source->port = utf8_buffer->data();
451   if (repl_source.path)
452     source->path = utf8_buffer->data();
453   if (repl_source.query)
454     source->query = utf8_buffer->data();
455   if (repl_source.ref)
456     source->ref = utf8_buffer->data();
457 
458   return success;
459 }
460 
461 #ifndef WIN32
462 
_itoa_s(int value,char * buffer,size_t size_in_chars,int radix)463 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
464   const char* format_str;
465   if (radix == 10)
466     format_str = "%d";
467   else if (radix == 16)
468     format_str = "%x";
469   else
470     return EINVAL;
471 
472   int written = snprintf(buffer, size_in_chars, format_str, value);
473   if (static_cast<size_t>(written) >= size_in_chars) {
474     // Output was truncated, or written was negative.
475     return EINVAL;
476   }
477   return 0;
478 }
479 
_itow_s(int value,char16_t * buffer,size_t size_in_chars,int radix)480 int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
481   if (radix != 10)
482     return EINVAL;
483 
484   // No more than 12 characters will be required for a 32-bit integer.
485   // Add an extra byte for the terminating null.
486   char temp[13];
487   int written = snprintf(temp, sizeof(temp), "%d", value);
488   if (static_cast<size_t>(written) >= size_in_chars) {
489     // Output was truncated, or written was negative.
490     return EINVAL;
491   }
492 
493   for (int i = 0; i < written; ++i) {
494     buffer[i] = static_cast<char16_t>(temp[i]);
495   }
496   buffer[written] = '\0';
497   return 0;
498 }
499 
500 #endif  // !WIN32
501 
502 }  // namespace url
503