1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_BASE_ESCAPE_H_ 6 #define NET_BASE_ESCAPE_H_ 7 8 #include <string> 9 #include <vector> 10 11 #include "base/basictypes.h" 12 #include "base/strings/string16.h" 13 #include "base/strings/utf_offset_string_conversions.h" 14 #include "net/base/net_export.h" 15 16 namespace net { 17 18 // Escaping -------------------------------------------------------------------- 19 20 // Escapes characters in text suitable for use as a query parameter value. 21 // We %XX everything except alphanumerics and -_.!~*'() 22 // Spaces change to "+" unless you pass usePlus=false. 23 // This is basically the same as encodeURIComponent in javascript. 24 NET_EXPORT std::string EscapeQueryParamValue(const std::string& text, 25 bool use_plus); 26 27 // Escapes a partial or complete file/pathname. This includes: 28 // non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|} 29 // For the base::string16 version, we attempt a conversion to |codepage| before 30 // encoding the string. If this conversion fails, we return false. 31 NET_EXPORT std::string EscapePath(const std::string& path); 32 33 // Escapes application/x-www-form-urlencoded content. This includes: 34 // non-printable, non-7bit, and (including space) ?>=<;+'&%$#"![\]^`{|} 35 // Space is escaped as + (if use_plus is true) and other special characters 36 // as %XX (hex). 37 NET_EXPORT std::string EscapeUrlEncodedData(const std::string& path, 38 bool use_plus); 39 40 // Escapes all non-ASCII input. 41 NET_EXPORT std::string EscapeNonASCII(const std::string& input); 42 43 // Escapes characters in text suitable for use as an external protocol handler 44 // command. 45 // We %XX everything except alphanumerics and %-_.!~*'() and the restricted 46 // chracters (;/?:@&=+$,). 47 NET_EXPORT std::string EscapeExternalHandlerValue(const std::string& text); 48 49 // Appends the given character to the output string, escaping the character if 50 // the character would be interpretted as an HTML delimiter. 51 NET_EXPORT void AppendEscapedCharForHTML(char c, std::string* output); 52 53 // Escapes chars that might cause this text to be interpretted as HTML tags. 54 NET_EXPORT std::string EscapeForHTML(const std::string& text); 55 NET_EXPORT base::string16 EscapeForHTML(const base::string16& text); 56 57 // Unescaping ------------------------------------------------------------------ 58 59 class UnescapeRule { 60 public: 61 // A combination of the following flags that is passed to the unescaping 62 // functions. 63 typedef uint32 Type; 64 65 enum { 66 // Don't unescape anything at all. 67 NONE = 0, 68 69 // Don't unescape anything special, but all normal unescaping will happen. 70 // This is a placeholder and can't be combined with other flags (since it's 71 // just the absence of them). All other unescape rules imply "normal" in 72 // addition to their special meaning. Things like escaped letters, digits, 73 // and most symbols will get unescaped with this mode. 74 NORMAL = 1, 75 76 // Convert %20 to spaces. In some places where we're showing URLs, we may 77 // want this. In places where the URL may be copied and pasted out, then 78 // you wouldn't want this since it might not be interpreted in one piece 79 // by other applications. 80 SPACES = 2, 81 82 // Unescapes various characters that will change the meaning of URLs, 83 // including '%', '+', '&', '/', '#'. If we unescaped these characters, the 84 // resulting URL won't be the same as the source one. This flag is used when 85 // generating final output like filenames for URLs where we won't be 86 // interpreting as a URL and want to do as much unescaping as possible. 87 URL_SPECIAL_CHARS = 4, 88 89 // Unescapes control characters such as %01. This INCLUDES NULLs. This is 90 // used for rare cases such as data: URL decoding where the result is binary 91 // data. You should not use this for normal URLs! 92 CONTROL_CHARS = 8, 93 94 // URL queries use "+" for space. This flag controls that replacement. 95 REPLACE_PLUS_WITH_SPACE = 16, 96 }; 97 }; 98 99 // Unescapes |escaped_text| and returns the result. 100 // Unescaping consists of looking for the exact pattern "%XX", where each X is 101 // a hex digit, and converting to the character with the numerical value of 102 // those digits. Thus "i%20=%203%3b" unescapes to "i = 3;". 103 // 104 // Watch out: this doesn't necessarily result in the correct final result, 105 // because the encoding may be unknown. For example, the input might be ASCII, 106 // which, after unescaping, is supposed to be interpreted as UTF-8, and then 107 // converted into full UTF-16 chars. This function won't tell you if any 108 // conversions need to take place, it only unescapes. 109 NET_EXPORT std::string UnescapeURLComponent(const std::string& escaped_text, 110 UnescapeRule::Type rules); 111 NET_EXPORT base::string16 UnescapeURLComponent( 112 const base::string16& escaped_text, 113 UnescapeRule::Type rules); 114 115 // Unescapes the given substring as a URL, and then tries to interpret the 116 // result as being encoded as UTF-8. If the result is convertable into UTF-8, it 117 // will be returned as converted. If it is not, the original escaped string will 118 // be converted into a base::string16 and returned. |adjustments| provides 119 // information on how the original string was adjusted to get the string 120 // returned. 121 NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponent( 122 const std::string& text, 123 UnescapeRule::Type rules); 124 NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( 125 const std::string& text, 126 UnescapeRule::Type rules, 127 base::OffsetAdjuster::Adjustments* adjustments); 128 129 // Unescapes the following ampersand character codes from |text|: 130 // < > & " ' 131 NET_EXPORT base::string16 UnescapeForHTML(const base::string16& text); 132 133 } // namespace net 134 135 #endif // NET_BASE_ESCAPE_H_ 136