• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "url/url_canon_internal.h"
11 
12 #include <errno.h>
13 #include <stddef.h>
14 #include <stdlib.h>
15 #ifdef __SSE2__
16 #include <immintrin.h>
17 #elif defined(__aarch64__)
18 #include <arm_neon.h>
19 #endif
20 
21 #include <cstdio>
22 #include <string>
23 
24 #include "base/bits.h"
25 #include "base/numerics/safe_conversions.h"
26 #include "base/strings/utf_string_conversion_utils.h"
27 #include "url/url_features.h"
28 
29 namespace url {
30 
31 namespace {
32 
33 // Find the initial segment of the given string that consists solely
34 // of characters valid for CHAR_QUERY. (We can have false negatives in
35 // one specific case, namely the exclamation mark 0x21, but false negatives
36 // are fine, and it's not worth adding a separate test for.) This is
37 // a fast path to speed up checking of very long query strings that are
38 // already valid, which happen on some web pages.
39 //
40 // This has some startup cost to load the constants and such, so it's
41 // usually not worth it for short strings.
FindInitialQuerySafeString(const char * source,size_t length)42 size_t FindInitialQuerySafeString(const char* source, size_t length) {
43 #if defined(__SSE2__) || defined(__aarch64__)
44   constexpr size_t kChunkSize = 16;
45   size_t i;
46   for (i = 0; i < base::bits::AlignDown(length, kChunkSize); i += kChunkSize) {
47     char b __attribute__((vector_size(16)));
48     memcpy(&b, source + i, sizeof(b));
49 
50     // Compare each element with the ranges for CHAR_QUERY
51     // (see kSharedCharTypeTable), vectorized so that it creates
52     // a mask of which elements match. For completeness, we could
53     // have had (...) | b == 0x21 here, but exclamation marks are
54     // rare and the extra test costs us some time.
55     auto mask = b >= 0x24 && b <= 0x7e && b != 0x27 && b != 0x3c && b != 0x3e;
56 
57 #ifdef __SSE2__
58     if (_mm_movemask_epi8(reinterpret_cast<__m128i>(mask)) != 0xffff) {
59       return i;
60     }
61 #else
62     if (vminvq_u8(reinterpret_cast<uint8x16_t>(mask)) == 0) {
63       return i;
64     }
65 #endif
66   }
67   return i;
68 #else
69   // Need SIMD support (with fast reductions) for this to be efficient.
70   return 0;
71 #endif
72 }
73 
74 template <typename CHAR, typename UCHAR>
DoAppendStringOfType(const CHAR * source,size_t length,SharedCharTypes type,CanonOutput * output)75 void DoAppendStringOfType(const CHAR* source,
76                           size_t length,
77                           SharedCharTypes type,
78                           CanonOutput* output) {
79   size_t i = 0;
80   // We only instantiate this for char, to avoid a Clang crash
81   // (and because Append() does not support converting).
82   if constexpr (sizeof(CHAR) == 1) {
83     if (type == CHAR_QUERY && length >= kMinimumLengthForSIMD) {
84       i = FindInitialQuerySafeString(source, length);
85       output->Append(source, i);
86     }
87   }
88   for (; i < length; i++) {
89     if (static_cast<UCHAR>(source[i]) >= 0x80) {
90       // ReadUTFCharLossy will fill the code point with
91       // kUnicodeReplacementCharacter when the input is invalid, which is what
92       // we want.
93       base_icu::UChar32 code_point;
94       ReadUTFCharLossy(source, &i, length, &code_point);
95       AppendUTF8EscapedValue(code_point, output);
96     } else {
97       // Just append the 7-bit character, possibly escaping it.
98       unsigned char uch = static_cast<unsigned char>(source[i]);
99       if (!IsCharOfType(uch, type))
100         AppendEscapedChar(uch, output);
101       else
102         output->push_back(uch);
103     }
104   }
105 }
106 
107 // This function assumes the input values are all contained in 8-bit,
108 // although it allows any type. Returns true if input is valid, false if not.
109 template <typename CHAR, typename UCHAR>
DoAppendInvalidNarrowString(const CHAR * spec,size_t begin,size_t end,CanonOutput * output)110 void DoAppendInvalidNarrowString(const CHAR* spec,
111                                  size_t begin,
112                                  size_t end,
113                                  CanonOutput* output) {
114   for (size_t i = begin; i < end; i++) {
115     UCHAR uch = static_cast<UCHAR>(spec[i]);
116     if (uch >= 0x80) {
117       // Handle UTF-8/16 encodings. This call will correctly handle the error
118       // case by appending the invalid character.
119       AppendUTF8EscapedChar(spec, &i, end, output);
120     } else if (uch <= ' ' || uch == 0x7f) {
121       // This function is for error handling, so we escape all control
122       // characters and spaces, but not anything else since we lack
123       // context to do something more specific.
124       AppendEscapedChar(static_cast<unsigned char>(uch), output);
125     } else {
126       output->push_back(static_cast<char>(uch));
127     }
128   }
129 }
130 
131 // Overrides one component, see the Replacements structure for
132 // what the various combionations of source pointer and component mean.
DoOverrideComponent(const char * override_source,const Component & override_component,const char ** dest,Component * dest_component)133 void DoOverrideComponent(const char* override_source,
134                          const Component& override_component,
135                          const char** dest,
136                          Component* dest_component) {
137   if (override_source) {
138     *dest = override_source;
139     *dest_component = override_component;
140   }
141 }
142 
143 // Similar to DoOverrideComponent except that it takes a UTF-16 input and does
144 // not actually set the output character pointer.
145 //
146 // The input is converted to UTF-8 at the end of the given buffer as a temporary
147 // holding place. The component identifying the portion of the buffer used in
148 // the |utf8_buffer| will be specified in |*dest_component|.
149 //
150 // This will not actually set any |dest| pointer like DoOverrideComponent
151 // does because all of the pointers will point into the |utf8_buffer|, which
152 // may get resized while we're overriding a subsequent component. Instead, the
153 // caller should use the beginning of the |utf8_buffer| as the string pointer
154 // for all components once all overrides have been prepared.
PrepareUTF16OverrideComponent(const char16_t * override_source,const Component & override_component,CanonOutput * utf8_buffer,Component * dest_component)155 bool PrepareUTF16OverrideComponent(const char16_t* override_source,
156                                    const Component& override_component,
157                                    CanonOutput* utf8_buffer,
158                                    Component* dest_component) {
159   bool success = true;
160   if (override_source) {
161     if (!override_component.is_valid()) {
162       // Non-"valid" component (means delete), so we need to preserve that.
163       *dest_component = Component();
164     } else {
165       // Convert to UTF-8.
166       dest_component->begin = utf8_buffer->length();
167       success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
168                                    static_cast<size_t>(override_component.len),
169                                    utf8_buffer);
170       dest_component->len = utf8_buffer->length() - dest_component->begin;
171     }
172   }
173   return success;
174 }
175 
176 }  // namespace
177 
178 // See the header file for this array's declaration.
179 // clang-format off
180 const unsigned char kSharedCharTypeTable[0x100] = {
181     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
182     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
183     0,                           // 0x20  ' ' (escape spaces in queries)
184     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x21  !
185     0,                           // 0x22  "
186     0,                           // 0x23  #  (invalid in query since it marks the ref)
187     CHAR_QUERY | CHAR_USERINFO,  // 0x24  $
188     CHAR_QUERY | CHAR_USERINFO,  // 0x25  %
189     CHAR_QUERY | CHAR_USERINFO,  // 0x26  &
190     0,                           // 0x27  '  (Try to prevent XSS.)
191     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x28  (
192     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x29  )
193     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2a  *
194     CHAR_QUERY | CHAR_USERINFO,  // 0x2b  +
195     CHAR_QUERY | CHAR_USERINFO,  // 0x2c  ,
196     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2d  -
197     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x2e  .
198     CHAR_QUERY,                  // 0x2f  /
199     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x30  0
200     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x31  1
201     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x32  2
202     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x33  3
203     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x34  4
204     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x35  5
205     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x36  6
206     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x37  7
207     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x38  8
208     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x39  9
209     CHAR_QUERY,  // 0x3a  :
210     CHAR_QUERY,  // 0x3b  ;
211     0,           // 0x3c  <  (Try to prevent certain types of XSS.)
212     CHAR_QUERY,  // 0x3d  =
213     0,           // 0x3e  >  (Try to prevent certain types of XSS.)
214     CHAR_QUERY,  // 0x3f  ?
215     CHAR_QUERY,  // 0x40  @
216     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x41  A
217     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x42  B
218     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x43  C
219     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x44  D
220     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x45  E
221     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x46  F
222     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x47  G
223     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x48  H
224     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x49  I
225     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4a  J
226     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4b  K
227     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4c  L
228     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4d  M
229     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4e  N
230     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4f  O
231     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x50  P
232     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x51  Q
233     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x52  R
234     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x53  S
235     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x54  T
236     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x55  U
237     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x56  V
238     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x57  W
239     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58  X
240     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x59  Y
241     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5a  Z
242     CHAR_QUERY,  // 0x5b  [
243     CHAR_QUERY,  // 0x5c  '\'
244     CHAR_QUERY,  // 0x5d  ]
245     CHAR_QUERY,  // 0x5e  ^
246     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5f  _
247     CHAR_QUERY,  // 0x60  `
248     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x61  a
249     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x62  b
250     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x63  c
251     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x64  d
252     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x65  e
253     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x66  f
254     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x67  g
255     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x68  h
256     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x69  i
257     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6a  j
258     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6b  k
259     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6c  l
260     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6d  m
261     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6e  n
262     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6f  o
263     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x70  p
264     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x71  q
265     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x72  r
266     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x73  s
267     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x74  t
268     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x75  u
269     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x76  v
270     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x77  w
271     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x78  x
272     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x79  y
273     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7a  z
274     CHAR_QUERY,  // 0x7b  {
275     CHAR_QUERY,  // 0x7c  |
276     CHAR_QUERY,  // 0x7d  }
277     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7e  ~
278     0,           // 0x7f
279     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x80 - 0x8f
280     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x90 - 0x9f
281     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xa0 - 0xaf
282     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xb0 - 0xbf
283     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xc0 - 0xcf
284     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xd0 - 0xdf
285     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xe0 - 0xef
286     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
287 };
288 // clang-format on
289 
290 const char kCharToHexLookup[8] = {
291     0,         // 0x00 - 0x1f
292     '0',       // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
293     'A' - 10,  // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
294     'a' - 10,  // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
295     0,         // 0x80 - 0x9F
296     0,         // 0xA0 - 0xBF
297     0,         // 0xC0 - 0xDF
298     0,         // 0xE0 - 0xFF
299 };
300 
301 const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
302 
AppendStringOfType(const char * source,size_t length,SharedCharTypes type,CanonOutput * output)303 void AppendStringOfType(const char* source,
304                         size_t length,
305                         SharedCharTypes type,
306                         CanonOutput* output) {
307   DoAppendStringOfType<char, unsigned char>(source, length, type, output);
308 }
309 
AppendStringOfType(const char16_t * source,size_t length,SharedCharTypes type,CanonOutput * output)310 void AppendStringOfType(const char16_t* source,
311                         size_t length,
312                         SharedCharTypes type,
313                         CanonOutput* output) {
314   DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
315 }
316 
ReadUTFCharLossy(const char * str,size_t * begin,size_t length,base_icu::UChar32 * code_point_out)317 bool ReadUTFCharLossy(const char* str,
318                       size_t* begin,
319                       size_t length,
320                       base_icu::UChar32* code_point_out) {
321   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
322     *code_point_out = kUnicodeReplacementCharacter;
323     return false;
324   }
325   return true;
326 }
327 
ReadUTFCharLossy(const char16_t * str,size_t * begin,size_t length,base_icu::UChar32 * code_point_out)328 bool ReadUTFCharLossy(const char16_t* str,
329                       size_t* begin,
330                       size_t length,
331                       base_icu::UChar32* code_point_out) {
332   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
333     *code_point_out = kUnicodeReplacementCharacter;
334     return false;
335   }
336   return true;
337 }
338 
AppendInvalidNarrowString(const char * spec,size_t begin,size_t end,CanonOutput * output)339 void AppendInvalidNarrowString(const char* spec,
340                                size_t begin,
341                                size_t end,
342                                CanonOutput* output) {
343   DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
344 }
345 
AppendInvalidNarrowString(const char16_t * spec,size_t begin,size_t end,CanonOutput * output)346 void AppendInvalidNarrowString(const char16_t* spec,
347                                size_t begin,
348                                size_t end,
349                                CanonOutput* output) {
350   DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
351 }
352 
ConvertUTF16ToUTF8(const char16_t * input,size_t input_len,CanonOutput * output)353 bool ConvertUTF16ToUTF8(const char16_t* input,
354                         size_t input_len,
355                         CanonOutput* output) {
356   bool success = true;
357   for (size_t i = 0; i < input_len; i++) {
358     base_icu::UChar32 code_point;
359     success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
360     AppendUTF8Value(code_point, output);
361   }
362   return success;
363 }
364 
ConvertUTF8ToUTF16(const char * input,size_t input_len,CanonOutputT<char16_t> * output)365 bool ConvertUTF8ToUTF16(const char* input,
366                         size_t input_len,
367                         CanonOutputT<char16_t>* output) {
368   bool success = true;
369   for (size_t i = 0; i < input_len; i++) {
370     base_icu::UChar32 code_point;
371     success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
372     AppendUTF16Value(code_point, output);
373   }
374   return success;
375 }
376 
SetupOverrideComponents(const char * base,const Replacements<char> & repl,URLComponentSource<char> * source,Parsed * parsed)377 void SetupOverrideComponents(const char* base,
378                              const Replacements<char>& repl,
379                              URLComponentSource<char>* source,
380                              Parsed* parsed) {
381   // Get the source and parsed structures of the things we are replacing.
382   const URLComponentSource<char>& repl_source = repl.sources();
383   const Parsed& repl_parsed = repl.components();
384 
385   DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, &source->scheme,
386                       &parsed->scheme);
387   DoOverrideComponent(repl_source.username, repl_parsed.username,
388                       &source->username, &parsed->username);
389   DoOverrideComponent(repl_source.password, repl_parsed.password,
390                       &source->password, &parsed->password);
391 
392   DoOverrideComponent(repl_source.host, repl_parsed.host, &source->host,
393                       &parsed->host);
394   if (!url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
395     // For backward compatibility, the following is probably required while the
396     // flag is disabled by default.
397     if (parsed->host.len == -1) {
398       parsed->host.len = 0;
399     }
400   }
401 
402   DoOverrideComponent(repl_source.port, repl_parsed.port, &source->port,
403                       &parsed->port);
404   DoOverrideComponent(repl_source.path, repl_parsed.path, &source->path,
405                       &parsed->path);
406   DoOverrideComponent(repl_source.query, repl_parsed.query, &source->query,
407                       &parsed->query);
408   DoOverrideComponent(repl_source.ref, repl_parsed.ref, &source->ref,
409                       &parsed->ref);
410 }
411 
SetupUTF16OverrideComponents(const char * base,const Replacements<char16_t> & repl,CanonOutput * utf8_buffer,URLComponentSource<char> * source,Parsed * parsed)412 bool SetupUTF16OverrideComponents(const char* base,
413                                   const Replacements<char16_t>& repl,
414                                   CanonOutput* utf8_buffer,
415                                   URLComponentSource<char>* source,
416                                   Parsed* parsed) {
417   bool success = true;
418 
419   // Get the source and parsed structures of the things we are replacing.
420   const URLComponentSource<char16_t>& repl_source = repl.sources();
421   const Parsed& repl_parsed = repl.components();
422 
423   success &= PrepareUTF16OverrideComponent(
424       repl_source.scheme, repl_parsed.scheme, utf8_buffer, &parsed->scheme);
425   success &=
426       PrepareUTF16OverrideComponent(repl_source.username, repl_parsed.username,
427                                     utf8_buffer, &parsed->username);
428   success &=
429       PrepareUTF16OverrideComponent(repl_source.password, repl_parsed.password,
430                                     utf8_buffer, &parsed->password);
431   success &= PrepareUTF16OverrideComponent(repl_source.host, repl_parsed.host,
432                                            utf8_buffer, &parsed->host);
433   success &= PrepareUTF16OverrideComponent(repl_source.port, repl_parsed.port,
434                                            utf8_buffer, &parsed->port);
435   success &= PrepareUTF16OverrideComponent(repl_source.path, repl_parsed.path,
436                                            utf8_buffer, &parsed->path);
437   success &= PrepareUTF16OverrideComponent(repl_source.query, repl_parsed.query,
438                                            utf8_buffer, &parsed->query);
439   success &= PrepareUTF16OverrideComponent(repl_source.ref, repl_parsed.ref,
440                                            utf8_buffer, &parsed->ref);
441 
442   // PrepareUTF16OverrideComponent will not have set the data pointer since the
443   // buffer could be resized, invalidating the pointers. We set the data
444   // pointers for affected components now that the buffer is finalized.
445   if (repl_source.scheme)
446     source->scheme = utf8_buffer->data();
447   if (repl_source.username)
448     source->username = utf8_buffer->data();
449   if (repl_source.password)
450     source->password = utf8_buffer->data();
451   if (repl_source.host)
452     source->host = utf8_buffer->data();
453   if (repl_source.port)
454     source->port = utf8_buffer->data();
455   if (repl_source.path)
456     source->path = utf8_buffer->data();
457   if (repl_source.query)
458     source->query = utf8_buffer->data();
459   if (repl_source.ref)
460     source->ref = utf8_buffer->data();
461 
462   return success;
463 }
464 
465 #ifndef WIN32
466 
_itoa_s(int value,char * buffer,size_t size_in_chars,int radix)467 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
468   const char* format_str;
469   if (radix == 10)
470     format_str = "%d";
471   else if (radix == 16)
472     format_str = "%x";
473   else
474     return EINVAL;
475 
476   int written = snprintf(buffer, size_in_chars, format_str, value);
477   if (static_cast<size_t>(written) >= size_in_chars) {
478     // Output was truncated, or written was negative.
479     return EINVAL;
480   }
481   return 0;
482 }
483 
_itow_s(int value,char16_t * buffer,size_t size_in_chars,int radix)484 int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
485   if (radix != 10)
486     return EINVAL;
487 
488   // No more than 12 characters will be required for a 32-bit integer.
489   // Add an extra byte for the terminating null.
490   char temp[13];
491   int written = snprintf(temp, sizeof(temp), "%d", value);
492   if (static_cast<size_t>(written) >= size_in_chars) {
493     // Output was truncated, or written was negative.
494     return EINVAL;
495   }
496 
497   for (int i = 0; i < written; ++i) {
498     buffer[i] = static_cast<char16_t>(temp[i]);
499   }
500   buffer[written] = '\0';
501   return 0;
502 }
503 
504 #endif  // !WIN32
505 
506 }  // namespace url
507