1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 // Functions for canonicalizing "mailto:" URLs.
11
12 #include "url/url_canon.h"
13 #include "url/url_canon_internal.h"
14 #include "url/url_file.h"
15 #include "url/url_parse_internal.h"
16
17 namespace url {
18
19 namespace {
20
21 // Certain characters should be percent-encoded when they appear in the path
22 // component of a mailto URL, to improve compatibility and mitigate against
23 // command-injection attacks on mailto handlers. See https://crbug.com/711020.
24 template <typename UCHAR>
ShouldEncodeMailboxCharacter(UCHAR uch)25 bool ShouldEncodeMailboxCharacter(UCHAR uch) {
26 if (uch < 0x21 || // space & control characters.
27 uch > 0x7e || // high-ascii characters.
28 uch == 0x22 || // quote.
29 uch == 0x3c || uch == 0x3e || // angle brackets.
30 uch == 0x60 || // backtick.
31 uch == 0x7b || uch == 0x7c || uch == 0x7d // braces and pipe.
32 ) {
33 return true;
34 }
35 return false;
36 }
37
38 template <typename CHAR, typename UCHAR>
DoCanonicalizeMailtoURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)39 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
40 const Parsed& parsed,
41 CanonOutput* output,
42 Parsed* new_parsed) {
43 // mailto: only uses {scheme, path, query} -- clear the rest.
44 new_parsed->username = Component();
45 new_parsed->password = Component();
46 new_parsed->host = Component();
47 new_parsed->port = Component();
48 new_parsed->ref = Component();
49
50 // Scheme (known, so we don't bother running it through the more
51 // complicated scheme canonicalizer).
52 new_parsed->scheme.begin = output->length();
53 output->Append("mailto:");
54 new_parsed->scheme.len = 6;
55
56 bool success = true;
57
58 // Path
59 if (parsed.path.is_valid()) {
60 new_parsed->path.begin = output->length();
61
62 // Copy the path using path URL's more lax escaping rules.
63 // We convert to UTF-8 and escape non-ASCII, but leave most
64 // ASCII characters alone.
65 size_t end = static_cast<size_t>(parsed.path.end());
66 for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
67 UCHAR uch = static_cast<UCHAR>(source.path[i]);
68 if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
69 success &= AppendUTF8EscapedChar(source.path, &i, end, output);
70 else
71 output->push_back(static_cast<char>(uch));
72 }
73
74 new_parsed->path.len = output->length() - new_parsed->path.begin;
75 } else {
76 // No path at all
77 new_parsed->path.reset();
78 }
79
80 // Query -- always use the default UTF8 charset converter.
81 CanonicalizeQuery(source.query, parsed.query, NULL,
82 output, &new_parsed->query);
83
84 return success;
85 }
86
87 } // namespace
88
CanonicalizeMailtoURL(const char * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)89 bool CanonicalizeMailtoURL(const char* spec,
90 int spec_len,
91 const Parsed& parsed,
92 CanonOutput* output,
93 Parsed* new_parsed) {
94 return DoCanonicalizeMailtoURL<char, unsigned char>(
95 URLComponentSource<char>(spec), parsed, output, new_parsed);
96 }
97
CanonicalizeMailtoURL(const char16_t * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)98 bool CanonicalizeMailtoURL(const char16_t* spec,
99 int spec_len,
100 const Parsed& parsed,
101 CanonOutput* output,
102 Parsed* new_parsed) {
103 return DoCanonicalizeMailtoURL<char16_t, char16_t>(
104 URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
105 }
106
ReplaceMailtoURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CanonOutput * output,Parsed * new_parsed)107 bool ReplaceMailtoURL(const char* base,
108 const Parsed& base_parsed,
109 const Replacements<char>& replacements,
110 CanonOutput* output,
111 Parsed* new_parsed) {
112 URLComponentSource<char> source(base);
113 Parsed parsed(base_parsed);
114 SetupOverrideComponents(base, replacements, &source, &parsed);
115 return DoCanonicalizeMailtoURL<char, unsigned char>(
116 source, parsed, output, new_parsed);
117 }
118
ReplaceMailtoURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CanonOutput * output,Parsed * new_parsed)119 bool ReplaceMailtoURL(const char* base,
120 const Parsed& base_parsed,
121 const Replacements<char16_t>& replacements,
122 CanonOutput* output,
123 Parsed* new_parsed) {
124 RawCanonOutput<1024> utf8;
125 URLComponentSource<char> source(base);
126 Parsed parsed(base_parsed);
127 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
128 return DoCanonicalizeMailtoURL<char, unsigned char>(
129 source, parsed, output, new_parsed);
130 }
131
132 } // namespace url
133