1 /*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef FLATBUFFERS_UTIL_H_
18 #define FLATBUFFERS_UTIL_H_
19
20 #include <fstream>
21 #include <iomanip>
22 #include <string>
23 #include <sstream>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #ifdef _WIN32
28 #ifndef WIN32_LEAN_AND_MEAN
29 #define WIN32_LEAN_AND_MEAN
30 #endif
31 #ifndef NOMINMAX
32 #define NOMINMAX
33 #endif
34 #include <windows.h>
35 #include <winbase.h>
36 #include <direct.h>
37 #else
38 #include <limits.h>
39 #endif
40 #include <sys/types.h>
41 #include <sys/stat.h>
42
43 #include "flatbuffers/base.h"
44
45
46 namespace flatbuffers {
47
48 // Convert an integer or floating point value to a string.
49 // In contrast to std::stringstream, "char" values are
50 // converted to a string of digits, and we don't use scientific notation.
NumToString(T t)51 template<typename T> std::string NumToString(T t) {
52 std::stringstream ss;
53 ss << t;
54 return ss.str();
55 }
56 // Avoid char types used as character data.
57 template<> inline std::string NumToString<signed char>(signed char t) {
58 return NumToString(static_cast<int>(t));
59 }
60 template<> inline std::string NumToString<unsigned char>(unsigned char t) {
61 return NumToString(static_cast<int>(t));
62 }
63 #if defined(FLATBUFFERS_CPP98_STL)
64 template <> inline std::string NumToString<long long>(long long t) {
65 char buf[21]; // (log((1 << 63) - 1) / log(10)) + 2
66 snprintf(buf, sizeof(buf), "%lld", t);
67 return std::string(buf);
68 }
69
70 template <> inline std::string NumToString<unsigned long long>(
71 unsigned long long t) {
72 char buf[22]; // (log((1 << 63) - 1) / log(10)) + 1
73 snprintf(buf, sizeof(buf), "%llu", t);
74 return std::string(buf);
75 }
76 #endif // defined(FLATBUFFERS_CPP98_STL)
77
78 // Special versions for floats/doubles.
79 template<> inline std::string NumToString<double>(double t) {
80 // to_string() prints different numbers of digits for floats depending on
81 // platform and isn't available on Android, so we use stringstream
82 std::stringstream ss;
83 // Use std::fixed to surpress scientific notation.
84 ss << std::fixed << t;
85 auto s = ss.str();
86 // Sadly, std::fixed turns "1" into "1.00000", so here we undo that.
87 auto p = s.find_last_not_of('0');
88 if (p != std::string::npos) {
89 // Strip trailing zeroes. If it is a whole number, keep one zero.
90 s.resize(p + (s[p] == '.' ? 2 : 1));
91 }
92 return s;
93 }
94 template<> inline std::string NumToString<float>(float t) {
95 return NumToString(static_cast<double>(t));
96 }
97
98 // Convert an integer value to a hexadecimal string.
99 // The returned string length is always xdigits long, prefixed by 0 digits.
100 // For example, IntToStringHex(0x23, 8) returns the string "00000023".
IntToStringHex(int i,int xdigits)101 inline std::string IntToStringHex(int i, int xdigits) {
102 std::stringstream ss;
103 ss << std::setw(xdigits)
104 << std::setfill('0')
105 << std::hex
106 << std::uppercase
107 << i;
108 return ss.str();
109 }
110
111 // Portable implementation of strtoll().
112 inline int64_t StringToInt(const char *str, char **endptr = nullptr,
113 int base = 10) {
114 #ifdef _MSC_VER
115 return _strtoi64(str, endptr, base);
116 #else
117 return strtoll(str, endptr, base);
118 #endif
119 }
120
121 // Portable implementation of strtoull().
122 inline uint64_t StringToUInt(const char *str, char **endptr = nullptr,
123 int base = 10) {
124 #ifdef _MSC_VER
125 return _strtoui64(str, endptr, base);
126 #else
127 return strtoull(str, endptr, base);
128 #endif
129 }
130
131 typedef bool (*LoadFileFunction)(const char *filename, bool binary,
132 std::string *dest);
133 typedef bool (*FileExistsFunction)(const char *filename);
134
135 LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function);
136
137 FileExistsFunction SetFileExistsFunction(FileExistsFunction
138 file_exists_function);
139
140
141 // Check if file "name" exists.
142 bool FileExists(const char *name);
143
144 // Check if "name" exists and it is also a directory.
145 bool DirExists(const char *name);
146
147 // Load file "name" into "buf" returning true if successful
148 // false otherwise. If "binary" is false data is read
149 // using ifstream's text mode, otherwise data is read with
150 // no transcoding.
151 bool LoadFile(const char *name, bool binary, std::string *buf);
152
153 // Save data "buf" of length "len" bytes into a file
154 // "name" returning true if successful, false otherwise.
155 // If "binary" is false data is written using ifstream's
156 // text mode, otherwise data is written with no
157 // transcoding.
SaveFile(const char * name,const char * buf,size_t len,bool binary)158 inline bool SaveFile(const char *name, const char *buf, size_t len,
159 bool binary) {
160 std::ofstream ofs(name, binary ? std::ofstream::binary : std::ofstream::out);
161 if (!ofs.is_open()) return false;
162 ofs.write(buf, len);
163 return !ofs.bad();
164 }
165
166 // Save data "buf" into file "name" returning true if
167 // successful, false otherwise. If "binary" is false
168 // data is written using ifstream's text mode, otherwise
169 // data is written with no transcoding.
SaveFile(const char * name,const std::string & buf,bool binary)170 inline bool SaveFile(const char *name, const std::string &buf, bool binary) {
171 return SaveFile(name, buf.c_str(), buf.size(), binary);
172 }
173
174 // Functionality for minimalistic portable path handling.
175
176 // The functions below behave correctly regardless of whether posix ('/') or
177 // Windows ('/' or '\\') separators are used.
178
179 // Any new separators inserted are always posix.
180
181 // We internally store paths in posix format ('/'). Paths supplied
182 // by the user should go through PosixPath to ensure correct behavior
183 // on Windows when paths are string-compared.
184
185 static const char kPathSeparator = '/';
186 static const char kPathSeparatorWindows = '\\';
187 static const char *PathSeparatorSet = "\\/"; // Intentionally no ':'
188
189 // Returns the path with the extension, if any, removed.
StripExtension(const std::string & filepath)190 inline std::string StripExtension(const std::string &filepath) {
191 size_t i = filepath.find_last_of(".");
192 return i != std::string::npos ? filepath.substr(0, i) : filepath;
193 }
194
195 // Returns the extension, if any.
GetExtension(const std::string & filepath)196 inline std::string GetExtension(const std::string &filepath) {
197 size_t i = filepath.find_last_of(".");
198 return i != std::string::npos ? filepath.substr(i + 1) : "";
199 }
200
201 // Return the last component of the path, after the last separator.
StripPath(const std::string & filepath)202 inline std::string StripPath(const std::string &filepath) {
203 size_t i = filepath.find_last_of(PathSeparatorSet);
204 return i != std::string::npos ? filepath.substr(i + 1) : filepath;
205 }
206
207 // Strip the last component of the path + separator.
StripFileName(const std::string & filepath)208 inline std::string StripFileName(const std::string &filepath) {
209 size_t i = filepath.find_last_of(PathSeparatorSet);
210 return i != std::string::npos ? filepath.substr(0, i) : "";
211 }
212
213 // Concatenates a path with a filename, regardless of wether the path
214 // ends in a separator or not.
ConCatPathFileName(const std::string & path,const std::string & filename)215 inline std::string ConCatPathFileName(const std::string &path,
216 const std::string &filename) {
217 std::string filepath = path;
218 if (filepath.length()) {
219 char filepath_last_character = string_back(filepath);
220 if (filepath_last_character == kPathSeparatorWindows) {
221 filepath_last_character = kPathSeparator;
222 } else if (filepath_last_character != kPathSeparator) {
223 filepath += kPathSeparator;
224 }
225 }
226 filepath += filename;
227 return filepath;
228 }
229
230 // Replaces any '\\' separators with '/'
PosixPath(const char * path)231 inline std::string PosixPath(const char *path) {
232 std::string p = path;
233 std::replace(p.begin(), p.end(), '\\', '/');
234 return p;
235 }
236
237 // This function ensure a directory exists, by recursively
238 // creating dirs for any parts of the path that don't exist yet.
EnsureDirExists(const std::string & filepath)239 inline void EnsureDirExists(const std::string &filepath) {
240 auto parent = StripFileName(filepath);
241 if (parent.length()) EnsureDirExists(parent);
242 #ifdef _WIN32
243 (void)_mkdir(filepath.c_str());
244 #else
245 mkdir(filepath.c_str(), S_IRWXU|S_IRGRP|S_IXGRP);
246 #endif
247 }
248
249 // Obtains the absolute path from any other path.
250 // Returns the input path if the absolute path couldn't be resolved.
AbsolutePath(const std::string & filepath)251 inline std::string AbsolutePath(const std::string &filepath) {
252 #ifdef FLATBUFFERS_NO_ABSOLUTE_PATH_RESOLUTION
253 return filepath;
254 #else
255 #ifdef _WIN32
256 char abs_path[MAX_PATH];
257 return GetFullPathNameA(filepath.c_str(), MAX_PATH, abs_path, nullptr)
258 #else
259 char abs_path[PATH_MAX];
260 return realpath(filepath.c_str(), abs_path)
261 #endif
262 ? abs_path
263 : filepath;
264 #endif // FLATBUFFERS_NO_ABSOLUTE_PATH_RESOLUTION
265 }
266
267 // To and from UTF-8 unicode conversion functions
268
269 // Convert a unicode code point into a UTF-8 representation by appending it
270 // to a string. Returns the number of bytes generated.
ToUTF8(uint32_t ucc,std::string * out)271 inline int ToUTF8(uint32_t ucc, std::string *out) {
272 assert(!(ucc & 0x80000000)); // Top bit can't be set.
273 // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8
274 for (int i = 0; i < 6; i++) {
275 // Max bits this encoding can represent.
276 uint32_t max_bits = 6 + i * 5 + static_cast<int>(!i);
277 if (ucc < (1u << max_bits)) { // does it fit?
278 // Remaining bits not encoded in the first byte, store 6 bits each
279 uint32_t remain_bits = i * 6;
280 // Store first byte:
281 (*out) += static_cast<char>((0xFE << (max_bits - remain_bits)) |
282 (ucc >> remain_bits));
283 // Store remaining bytes:
284 for (int j = i - 1; j >= 0; j--) {
285 (*out) += static_cast<char>(((ucc >> (j * 6)) & 0x3F) | 0x80);
286 }
287 return i + 1; // Return the number of bytes added.
288 }
289 }
290 assert(0); // Impossible to arrive here.
291 return -1;
292 }
293
294 // Converts whatever prefix of the incoming string corresponds to a valid
295 // UTF-8 sequence into a unicode code. The incoming pointer will have been
296 // advanced past all bytes parsed.
297 // returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in
298 // this case).
FromUTF8(const char ** in)299 inline int FromUTF8(const char **in) {
300 int len = 0;
301 // Count leading 1 bits.
302 for (int mask = 0x80; mask >= 0x04; mask >>= 1) {
303 if (**in & mask) {
304 len++;
305 } else {
306 break;
307 }
308 }
309 if ((**in << len) & 0x80) return -1; // Bit after leading 1's must be 0.
310 if (!len) return *(*in)++;
311 // UTF-8 encoded values with a length are between 2 and 4 bytes.
312 if (len < 2 || len > 4) {
313 return -1;
314 }
315 // Grab initial bits of the code.
316 int ucc = *(*in)++ & ((1 << (7 - len)) - 1);
317 for (int i = 0; i < len - 1; i++) {
318 if ((**in & 0xC0) != 0x80) return -1; // Upper bits must 1 0.
319 ucc <<= 6;
320 ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code.
321 }
322 // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for
323 // UTF-16 surrogate pairs).
324 if (ucc >= 0xD800 && ucc <= 0xDFFF) {
325 return -1;
326 }
327 // UTF-8 must represent code points in their shortest possible encoding.
328 switch (len) {
329 case 2:
330 // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF.
331 if (ucc < 0x0080 || ucc > 0x07FF) {
332 return -1;
333 }
334 break;
335 case 3:
336 // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF.
337 if (ucc < 0x0800 || ucc > 0xFFFF) {
338 return -1;
339 }
340 break;
341 case 4:
342 // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF.
343 if (ucc < 0x10000 || ucc > 0x10FFFF) {
344 return -1;
345 }
346 break;
347 }
348 return ucc;
349 }
350
351 // Wraps a string to a maximum length, inserting new lines where necessary. Any
352 // existing whitespace will be collapsed down to a single space. A prefix or
353 // suffix can be provided, which will be inserted before or after a wrapped
354 // line, respectively.
WordWrap(const std::string in,size_t max_length,const std::string wrapped_line_prefix,const std::string wrapped_line_suffix)355 inline std::string WordWrap(const std::string in, size_t max_length,
356 const std::string wrapped_line_prefix,
357 const std::string wrapped_line_suffix) {
358 std::istringstream in_stream(in);
359 std::string wrapped, line, word;
360
361 in_stream >> word;
362 line = word;
363
364 while (in_stream >> word) {
365 if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) <
366 max_length) {
367 line += " " + word;
368 } else {
369 wrapped += line + wrapped_line_suffix + "\n";
370 line = wrapped_line_prefix + word;
371 }
372 }
373 wrapped += line;
374
375 return wrapped;
376 }
377
EscapeString(const char * s,size_t length,std::string * _text,bool allow_non_utf8)378 inline bool EscapeString(const char *s, size_t length, std::string *_text,
379 bool allow_non_utf8) {
380 std::string &text = *_text;
381 text += "\"";
382 for (uoffset_t i = 0; i < length; i++) {
383 char c = s[i];
384 switch (c) {
385 case '\n': text += "\\n"; break;
386 case '\t': text += "\\t"; break;
387 case '\r': text += "\\r"; break;
388 case '\b': text += "\\b"; break;
389 case '\f': text += "\\f"; break;
390 case '\"': text += "\\\""; break;
391 case '\\': text += "\\\\"; break;
392 default:
393 if (c >= ' ' && c <= '~') {
394 text += c;
395 } else {
396 // Not printable ASCII data. Let's see if it's valid UTF-8 first:
397 const char *utf8 = s + i;
398 int ucc = FromUTF8(&utf8);
399 if (ucc < 0) {
400 if (allow_non_utf8) {
401 text += "\\x";
402 text += IntToStringHex(static_cast<uint8_t>(c), 2);
403 } else {
404 // There are two cases here:
405 //
406 // 1) We reached here by parsing an IDL file. In that case,
407 // we previously checked for non-UTF-8, so we shouldn't reach
408 // here.
409 //
410 // 2) We reached here by someone calling GenerateText()
411 // on a previously-serialized flatbuffer. The data might have
412 // non-UTF-8 Strings, or might be corrupt.
413 //
414 // In both cases, we have to give up and inform the caller
415 // they have no JSON.
416 return false;
417 }
418 } else {
419 if (ucc <= 0xFFFF) {
420 // Parses as Unicode within JSON's \uXXXX range, so use that.
421 text += "\\u";
422 text += IntToStringHex(ucc, 4);
423 } else if (ucc <= 0x10FFFF) {
424 // Encode Unicode SMP values to a surrogate pair using two \u escapes.
425 uint32_t base = ucc - 0x10000;
426 auto high_surrogate = (base >> 10) + 0xD800;
427 auto low_surrogate = (base & 0x03FF) + 0xDC00;
428 text += "\\u";
429 text += IntToStringHex(high_surrogate, 4);
430 text += "\\u";
431 text += IntToStringHex(low_surrogate, 4);
432 }
433 // Skip past characters recognized.
434 i = static_cast<uoffset_t>(utf8 - s - 1);
435 }
436 }
437 break;
438 }
439 }
440 text += "\"";
441 return true;
442 }
443
444 } // namespace flatbuffers
445
446 #endif // FLATBUFFERS_UTIL_H_
447