• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 // Functions for canonicalizing "mailto:" URLs.
11 
12 #include "url/url_canon.h"
13 #include "url/url_canon_internal.h"
14 #include "url/url_file.h"
15 #include "url/url_parse_internal.h"
16 
17 namespace url {
18 
19 namespace {
20 
21 // Certain characters should be percent-encoded when they appear in the path
22 // component of a mailto URL, to improve compatibility and mitigate against
23 // command-injection attacks on mailto handlers. See https://crbug.com/711020.
24 template <typename UCHAR>
ShouldEncodeMailboxCharacter(UCHAR uch)25 bool ShouldEncodeMailboxCharacter(UCHAR uch) {
26   if (uch < 0x21 ||                              // space & control characters.
27       uch > 0x7e ||                              // high-ascii characters.
28       uch == 0x22 ||                             // quote.
29       uch == 0x3c || uch == 0x3e ||              // angle brackets.
30       uch == 0x60 ||                             // backtick.
31       uch == 0x7b || uch == 0x7c || uch == 0x7d  // braces and pipe.
32       ) {
33     return true;
34   }
35   return false;
36 }
37 
38 template <typename CHAR, typename UCHAR>
DoCanonicalizeMailtoURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)39 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
40                              const Parsed& parsed,
41                              CanonOutput* output,
42                              Parsed* new_parsed) {
43   // mailto: only uses {scheme, path, query} -- clear the rest.
44   new_parsed->username = Component();
45   new_parsed->password = Component();
46   new_parsed->host = Component();
47   new_parsed->port = Component();
48   new_parsed->ref = Component();
49 
50   // Scheme (known, so we don't bother running it through the more
51   // complicated scheme canonicalizer).
52   new_parsed->scheme.begin = output->length();
53   output->Append("mailto:");
54   new_parsed->scheme.len = 6;
55 
56   bool success = true;
57 
58   // Path
59   if (parsed.path.is_valid()) {
60     new_parsed->path.begin = output->length();
61 
62     // Copy the path using path URL's more lax escaping rules.
63     // We convert to UTF-8 and escape non-ASCII, but leave most
64     // ASCII characters alone.
65     size_t end = static_cast<size_t>(parsed.path.end());
66     for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
67       UCHAR uch = static_cast<UCHAR>(source.path[i]);
68       if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
69         success &= AppendUTF8EscapedChar(source.path, &i, end, output);
70       else
71         output->push_back(static_cast<char>(uch));
72     }
73 
74     new_parsed->path.len = output->length() - new_parsed->path.begin;
75   } else {
76     // No path at all
77     new_parsed->path.reset();
78   }
79 
80   // Query -- always use the default UTF8 charset converter.
81   CanonicalizeQuery(source.query, parsed.query, NULL,
82                     output, &new_parsed->query);
83 
84   return success;
85 }
86 
87 } // namespace
88 
CanonicalizeMailtoURL(const char * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)89 bool CanonicalizeMailtoURL(const char* spec,
90                            int spec_len,
91                            const Parsed& parsed,
92                            CanonOutput* output,
93                            Parsed* new_parsed) {
94   return DoCanonicalizeMailtoURL<char, unsigned char>(
95       URLComponentSource<char>(spec), parsed, output, new_parsed);
96 }
97 
CanonicalizeMailtoURL(const char16_t * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)98 bool CanonicalizeMailtoURL(const char16_t* spec,
99                            int spec_len,
100                            const Parsed& parsed,
101                            CanonOutput* output,
102                            Parsed* new_parsed) {
103   return DoCanonicalizeMailtoURL<char16_t, char16_t>(
104       URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
105 }
106 
ReplaceMailtoURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CanonOutput * output,Parsed * new_parsed)107 bool ReplaceMailtoURL(const char* base,
108                       const Parsed& base_parsed,
109                       const Replacements<char>& replacements,
110                       CanonOutput* output,
111                       Parsed* new_parsed) {
112   URLComponentSource<char> source(base);
113   Parsed parsed(base_parsed);
114   SetupOverrideComponents(base, replacements, &source, &parsed);
115   return DoCanonicalizeMailtoURL<char, unsigned char>(
116       source, parsed, output, new_parsed);
117 }
118 
ReplaceMailtoURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CanonOutput * output,Parsed * new_parsed)119 bool ReplaceMailtoURL(const char* base,
120                       const Parsed& base_parsed,
121                       const Replacements<char16_t>& replacements,
122                       CanonOutput* output,
123                       Parsed* new_parsed) {
124   RawCanonOutput<1024> utf8;
125   URLComponentSource<char> source(base);
126   Parsed parsed(base_parsed);
127   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
128   return DoCanonicalizeMailtoURL<char, unsigned char>(
129       source, parsed, output, new_parsed);
130 }
131 
132 }  // namespace url
133