1 /*
2 * Copyright 2004 The WebRTC Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef WEBRTC_BASE_STRINGENCODE_H_
12 #define WEBRTC_BASE_STRINGENCODE_H_
13
14 #include <sstream>
15 #include <string>
16 #include <vector>
17
18 #include "webrtc/base/checks.h"
19
20 namespace rtc {
21
22 //////////////////////////////////////////////////////////////////////
23 // String Encoding Utilities
24 //////////////////////////////////////////////////////////////////////
25
26 // Convert an unsigned value to it's utf8 representation. Returns the length
27 // of the encoded string, or 0 if the encoding is longer than buflen - 1.
28 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
29 // Decode the utf8 encoded value pointed to by source. Returns the number of
30 // bytes used by the encoding, or 0 if the encoding is invalid.
31 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
32
33 // Escaping prefixes illegal characters with the escape character. Compact, but
34 // illegal characters still appear in the string.
35 size_t escape(char * buffer, size_t buflen,
36 const char * source, size_t srclen,
37 const char * illegal, char escape);
38 // Note: in-place unescaping (buffer == source) is allowed.
39 size_t unescape(char * buffer, size_t buflen,
40 const char * source, size_t srclen,
41 char escape);
42
43 // Encoding replaces illegal characters with the escape character and 2 hex
44 // chars, so it's a little less compact than escape, but completely removes
45 // illegal characters. note that hex digits should not be used as illegal
46 // characters.
47 size_t encode(char * buffer, size_t buflen,
48 const char * source, size_t srclen,
49 const char * illegal, char escape);
50 // Note: in-place decoding (buffer == source) is allowed.
51 size_t decode(char * buffer, size_t buflen,
52 const char * source, size_t srclen,
53 char escape);
54
55 // Returns a list of characters that may be unsafe for use in the name of a
56 // file, suitable for passing to the 'illegal' member of escape or encode.
57 const char* unsafe_filename_characters();
58
59 // url_encode is an encode operation with a predefined set of illegal characters
60 // and escape character (for use in URLs, obviously).
61 size_t url_encode(char * buffer, size_t buflen,
62 const char * source, size_t srclen);
63 // Note: in-place decoding (buffer == source) is allowed.
64 size_t url_decode(char * buffer, size_t buflen,
65 const char * source, size_t srclen);
66
67 // html_encode prevents data embedded in html from containing markup.
68 size_t html_encode(char * buffer, size_t buflen,
69 const char * source, size_t srclen);
70 // Note: in-place decoding (buffer == source) is allowed.
71 size_t html_decode(char * buffer, size_t buflen,
72 const char * source, size_t srclen);
73
74 // xml_encode makes data suitable for inside xml attributes and values.
75 size_t xml_encode(char * buffer, size_t buflen,
76 const char * source, size_t srclen);
77 // Note: in-place decoding (buffer == source) is allowed.
78 size_t xml_decode(char * buffer, size_t buflen,
79 const char * source, size_t srclen);
80
81 // Convert an unsigned value from 0 to 15 to the hex character equivalent...
82 char hex_encode(unsigned char val);
83 // ...and vice-versa.
84 bool hex_decode(char ch, unsigned char* val);
85
86 // hex_encode shows the hex representation of binary data in ascii.
87 size_t hex_encode(char* buffer, size_t buflen,
88 const char* source, size_t srclen);
89
90 // hex_encode, but separate each byte representation with a delimiter.
91 // |delimiter| == 0 means no delimiter
92 // If the buffer is too short, we return 0
93 size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
94 const char* source, size_t srclen,
95 char delimiter);
96
97 // Helper functions for hex_encode.
98 std::string hex_encode(const std::string& str);
99 std::string hex_encode(const char* source, size_t srclen);
100 std::string hex_encode_with_delimiter(const char* source, size_t srclen,
101 char delimiter);
102
103 // hex_decode converts ascii hex to binary.
104 size_t hex_decode(char* buffer, size_t buflen,
105 const char* source, size_t srclen);
106
107 // hex_decode, assuming that there is a delimiter between every byte
108 // pair.
109 // |delimiter| == 0 means no delimiter
110 // If the buffer is too short or the data is invalid, we return 0.
111 size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
112 const char* source, size_t srclen,
113 char delimiter);
114
115 // Helper functions for hex_decode.
116 size_t hex_decode(char* buffer, size_t buflen, const std::string& source);
117 size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
118 const std::string& source, char delimiter);
119
120 // Apply any suitable string transform (including the ones above) to an STL
121 // string. Stack-allocated temporary space is used for the transformation,
122 // so value and source may refer to the same string.
123 typedef size_t (*Transform)(char * buffer, size_t buflen,
124 const char * source, size_t srclen);
125 size_t transform(std::string& value, size_t maxlen, const std::string& source,
126 Transform t);
127
128 // Return the result of applying transform t to source.
129 std::string s_transform(const std::string& source, Transform t);
130
131 // Convenience wrappers.
s_url_encode(const std::string & source)132 inline std::string s_url_encode(const std::string& source) {
133 return s_transform(source, url_encode);
134 }
s_url_decode(const std::string & source)135 inline std::string s_url_decode(const std::string& source) {
136 return s_transform(source, url_decode);
137 }
138
139 // Splits the source string into multiple fields separated by delimiter,
140 // with duplicates of delimiter creating empty fields.
141 size_t split(const std::string& source, char delimiter,
142 std::vector<std::string>* fields);
143
144 // Splits the source string into multiple fields separated by delimiter,
145 // with duplicates of delimiter ignored. Trailing delimiter ignored.
146 size_t tokenize(const std::string& source, char delimiter,
147 std::vector<std::string>* fields);
148
149 // Tokenize, including the empty tokens.
150 size_t tokenize_with_empty_tokens(const std::string& source,
151 char delimiter,
152 std::vector<std::string>* fields);
153
154 // Tokenize and append the tokens to fields. Return the new size of fields.
155 size_t tokenize_append(const std::string& source, char delimiter,
156 std::vector<std::string>* fields);
157
158 // Splits the source string into multiple fields separated by delimiter, with
159 // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in
160 // between the start_mark and the end_mark is treated as a single field. Return
161 // the size of fields. For example, if source is "filename
162 // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and
163 // the start_mark and end_mark are '"', this method returns two fields:
164 // "filename" and "/Library/Application Support/media content.txt".
165 size_t tokenize(const std::string& source, char delimiter, char start_mark,
166 char end_mark, std::vector<std::string>* fields);
167
168 // Extract the first token from source as separated by delimiter, with
169 // duplicates of delimiter ignored. Return false if the delimiter could not be
170 // found, otherwise return true.
171 bool tokenize_first(const std::string& source,
172 const char delimiter,
173 std::string* token,
174 std::string* rest);
175
176 // Safe sprintf to std::string
177 //void sprintf(std::string& value, size_t maxlen, const char * format, ...)
178 // PRINTF_FORMAT(3);
179
180 // Convert arbitrary values to/from a string.
181
182 template <class T>
ToString(const T & t,std::string * s)183 static bool ToString(const T &t, std::string* s) {
184 RTC_DCHECK(s);
185 std::ostringstream oss;
186 oss << std::boolalpha << t;
187 *s = oss.str();
188 return !oss.fail();
189 }
190
191 template <class T>
FromString(const std::string & s,T * t)192 static bool FromString(const std::string& s, T* t) {
193 RTC_DCHECK(t);
194 std::istringstream iss(s);
195 iss >> std::boolalpha >> *t;
196 return !iss.fail();
197 }
198
199 // Inline versions of the string conversion routines.
200
201 template<typename T>
ToString(const T & val)202 static inline std::string ToString(const T& val) {
203 std::string str; ToString(val, &str); return str;
204 }
205
206 template<typename T>
FromString(const std::string & str)207 static inline T FromString(const std::string& str) {
208 T val; FromString(str, &val); return val;
209 }
210
211 template<typename T>
FromString(const T & defaultValue,const std::string & str)212 static inline T FromString(const T& defaultValue, const std::string& str) {
213 T val(defaultValue); FromString(str, &val); return val;
214 }
215
216 // simple function to strip out characters which shouldn't be
217 // used in filenames
218 char make_char_safe_for_filename(char c);
219
220 //////////////////////////////////////////////////////////////////////
221
222 } // namespace rtc
223
224 #endif // WEBRTC_BASE_STRINGENCODE_H__
225