1 // Copyright 2008, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // Functions for canonicalizing "mailto:" URLs.
31
32 #include "googleurl/src/url_canon.h"
33 #include "googleurl/src/url_canon_internal.h"
34 #include "googleurl/src/url_file.h"
35 #include "googleurl/src/url_parse_internal.h"
36
37 namespace url_canon {
38
39 namespace {
40
41
42 template<typename CHAR, typename UCHAR>
DoCanonicalizeMailtoURL(const URLComponentSource<CHAR> & source,const url_parse::Parsed & parsed,CanonOutput * output,url_parse::Parsed * new_parsed)43 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
44 const url_parse::Parsed& parsed,
45 CanonOutput* output,
46 url_parse::Parsed* new_parsed) {
47
48 // mailto: only uses {scheme, path, query} -- clear the rest.
49 new_parsed->username = url_parse::Component();
50 new_parsed->password = url_parse::Component();
51 new_parsed->host = url_parse::Component();
52 new_parsed->port = url_parse::Component();
53 new_parsed->ref = url_parse::Component();
54
55 // Scheme (known, so we don't bother running it through the more
56 // complicated scheme canonicalizer).
57 new_parsed->scheme.begin = output->length();
58 output->Append("mailto:", 7);
59 new_parsed->scheme.len = 6;
60
61 bool success = true;
62
63 // Path
64 if (parsed.path.is_valid()) {
65 new_parsed->path.begin = output->length();
66
67 // Copy the path using path URL's more lax escaping rules.
68 // We convert to UTF-8 and escape non-ASCII, but leave all
69 // ASCII characters alone.
70 int end = parsed.path.end();
71 for (int i = parsed.path.begin; i < end; ++i) {
72 UCHAR uch = static_cast<UCHAR>(source.path[i]);
73 if (uch < 0x20 || uch >= 0x80)
74 success &= AppendUTF8EscapedChar(source.path, &i, end, output);
75 else
76 output->push_back(static_cast<char>(uch));
77 }
78
79 new_parsed->path.len = output->length() - new_parsed->path.begin;
80 } else {
81 // No path at all
82 new_parsed->path.reset();
83 }
84
85 // Query -- always use the default utf8 charset converter.
86 CanonicalizeQuery(source.query, parsed.query, NULL,
87 output, &new_parsed->query);
88
89 return success;
90 }
91
92 } // namespace
93
CanonicalizeMailtoURL(const char * spec,int spec_len,const url_parse::Parsed & parsed,CanonOutput * output,url_parse::Parsed * new_parsed)94 bool CanonicalizeMailtoURL(const char* spec,
95 int spec_len,
96 const url_parse::Parsed& parsed,
97 CanonOutput* output,
98 url_parse::Parsed* new_parsed) {
99 return DoCanonicalizeMailtoURL<char, unsigned char>(
100 URLComponentSource<char>(spec), parsed, output, new_parsed);
101 }
102
CanonicalizeMailtoURL(const char16 * spec,int spec_len,const url_parse::Parsed & parsed,CanonOutput * output,url_parse::Parsed * new_parsed)103 bool CanonicalizeMailtoURL(const char16* spec,
104 int spec_len,
105 const url_parse::Parsed& parsed,
106 CanonOutput* output,
107 url_parse::Parsed* new_parsed) {
108 return DoCanonicalizeMailtoURL<char16, char16>(
109 URLComponentSource<char16>(spec), parsed, output, new_parsed);
110 }
111
ReplaceMailtoURL(const char * base,const url_parse::Parsed & base_parsed,const Replacements<char> & replacements,CanonOutput * output,url_parse::Parsed * new_parsed)112 bool ReplaceMailtoURL(const char* base,
113 const url_parse::Parsed& base_parsed,
114 const Replacements<char>& replacements,
115 CanonOutput* output,
116 url_parse::Parsed* new_parsed) {
117 URLComponentSource<char> source(base);
118 url_parse::Parsed parsed(base_parsed);
119 SetupOverrideComponents(base, replacements, &source, &parsed);
120 return DoCanonicalizeMailtoURL<char, unsigned char>(
121 source, parsed, output, new_parsed);
122 }
123
ReplaceMailtoURL(const char * base,const url_parse::Parsed & base_parsed,const Replacements<char16> & replacements,CanonOutput * output,url_parse::Parsed * new_parsed)124 bool ReplaceMailtoURL(const char* base,
125 const url_parse::Parsed& base_parsed,
126 const Replacements<char16>& replacements,
127 CanonOutput* output,
128 url_parse::Parsed* new_parsed) {
129 RawCanonOutput<1024> utf8;
130 URLComponentSource<char> source(base);
131 url_parse::Parsed parsed(base_parsed);
132 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
133 return DoCanonicalizeMailtoURL<char, unsigned char>(
134 source, parsed, output, new_parsed);
135 }
136
137 } // namespace url_canon
138