1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/fx_string.h"
8
9 #include <stdint.h>
10
11 #include <array>
12 #include <string>
13 #include <vector>
14
15 #include "build/build_config.h"
16 #include "core/fxcrt/bytestring.h"
17 #include "core/fxcrt/code_point_view.h"
18 #include "core/fxcrt/fx_extension.h"
19 #include "core/fxcrt/span.h"
20 #include "core/fxcrt/utf16.h"
21 #include "core/fxcrt/widestring.h"
22 #include "third_party/fast_float/include/fast_float.h"
23
24 //#if !defined(WCHAR_T_IS_16_BIT) && !defined(WCHAR_T_IS_32_BIT)
25 //#error "Unknown wchar_t size"
26 //#endif
27 //#if defined(WCHAR_T_IS_16_BIT) && defined(WCHAR_T_IS_32_BIT)
28 //#error "Conflicting wchar_t sizes"
29 //#endif
30
31 namespace {
32
33 // Appends a Unicode code point to a `ByteString` using UTF-8.
34 //
35 // TODO(crbug.com/pdfium/2041): Migrate to `ByteString`.
AppendCodePointToByteString(char32_t code_point,ByteString & buffer)36 void AppendCodePointToByteString(char32_t code_point, ByteString& buffer) {
37 if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
38 // Invalid code point above U+10FFFF.
39 return;
40 }
41
42 if (code_point < 0x80) {
43 // 7-bit code points are unchanged in UTF-8.
44 buffer += code_point;
45 return;
46 }
47
48 int byte_size;
49 if (code_point < 0x800) {
50 byte_size = 2;
51 } else if (code_point < 0x10000) {
52 byte_size = 3;
53 } else {
54 byte_size = 4;
55 }
56
57 static constexpr std::array<uint8_t, 3> kPrefix = {{0xc0, 0xe0, 0xf0}};
58 int order = 1 << ((byte_size - 1) * 6);
59 buffer += kPrefix[byte_size - 2] | (code_point / order);
60 for (int i = 0; i < byte_size - 1; i++) {
61 code_point = code_point % order;
62 order >>= 6;
63 buffer += 0x80 | (code_point / order);
64 }
65 }
66
67 } // namespace
68
FX_UTF8Encode(WideStringView wsStr)69 ByteString FX_UTF8Encode(WideStringView wsStr) {
70 ByteString buffer;
71 for (char32_t code_point : pdfium::CodePointView(wsStr)) {
72 AppendCodePointToByteString(code_point, buffer);
73 }
74 return buffer;
75 }
76
FX_UTF16Encode(WideStringView wsStr)77 std::u16string FX_UTF16Encode(WideStringView wsStr) {
78 if (wsStr.IsEmpty()) {
79 return {};
80 }
81
82 std::u16string result;
83 result.reserve(wsStr.GetLength());
84
85 for (wchar_t c : wsStr) {
86 #if defined(WCHAR_T_IS_32_BIT)
87 if (pdfium::IsSupplementary(c)) {
88 pdfium::SurrogatePair pair(c);
89 result.push_back(pair.high());
90 result.push_back(pair.low());
91 continue;
92 }
93 #endif // defined(WCHAR_T_IS_32_BIT)
94 result.push_back(c);
95 }
96
97 return result;
98 }
99
100 namespace {
101
102 template <class T>
StringTo(ByteStringView strc)103 T StringTo(ByteStringView strc) {
104 // Skip leading whitespaces.
105 size_t start = 0;
106 size_t len = strc.GetLength();
107 while (start < len && strc[start] == ' ') {
108 ++start;
109 }
110
111 // Skip a leading '+' sign.
112 if (start < len && strc[start] == '+') {
113 ++start;
114 }
115
116 ByteStringView sub_strc = strc.Substr(start, len - start);
117
118 T value;
119 auto result = fast_float::from_chars(sub_strc.begin(), sub_strc.end(), value);
120
121 // Return 0 for parsing errors. Some examples of errors are an empty string
122 // and a string that cannot be converted to T.
123 return result.ec == std::errc() || result.ec == std::errc::result_out_of_range
124 ? value
125 : 0;
126 }
127
128 } // namespace
129
StringToFloat(ByteStringView strc)130 float StringToFloat(ByteStringView strc) {
131 return StringTo<float>(strc);
132 }
133
StringToFloat(WideStringView wsStr)134 float StringToFloat(WideStringView wsStr) {
135 return StringToFloat(FX_UTF8Encode(wsStr).AsStringView());
136 }
137
StringToDouble(ByteStringView strc)138 double StringToDouble(ByteStringView strc) {
139 return StringTo<double>(strc);
140 }
141
StringToDouble(WideStringView wsStr)142 double StringToDouble(WideStringView wsStr) {
143 return StringToDouble(FX_UTF8Encode(wsStr).AsStringView());
144 }
145
146 namespace fxcrt {
147
148 template std::vector<ByteString> Split<ByteString>(const ByteString& that,
149 ByteString::CharType ch);
150 template std::vector<WideString> Split<WideString>(const WideString& that,
151 WideString::CharType ch);
152
153 } // namespace fxcrt
154