• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/fx_string.h"
8 
9 #include <stdint.h>
10 
11 #include <array>
12 #include <string>
13 #include <vector>
14 
15 #include "build/build_config.h"
16 #include "core/fxcrt/bytestring.h"
17 #include "core/fxcrt/code_point_view.h"
18 #include "core/fxcrt/fx_extension.h"
19 #include "core/fxcrt/span.h"
20 #include "core/fxcrt/utf16.h"
21 #include "core/fxcrt/widestring.h"
22 #include "third_party/fast_float/include/fast_float.h"
23 
24 //#if !defined(WCHAR_T_IS_16_BIT) && !defined(WCHAR_T_IS_32_BIT)
25 //#error "Unknown wchar_t size"
26 //#endif
27 //#if defined(WCHAR_T_IS_16_BIT) && defined(WCHAR_T_IS_32_BIT)
28 //#error "Conflicting wchar_t sizes"
29 //#endif
30 
31 namespace {
32 
33 // Appends a Unicode code point to a `ByteString` using UTF-8.
34 //
35 // TODO(crbug.com/pdfium/2041): Migrate to `ByteString`.
AppendCodePointToByteString(char32_t code_point,ByteString & buffer)36 void AppendCodePointToByteString(char32_t code_point, ByteString& buffer) {
37   if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
38     // Invalid code point above U+10FFFF.
39     return;
40   }
41 
42   if (code_point < 0x80) {
43     // 7-bit code points are unchanged in UTF-8.
44     buffer += code_point;
45     return;
46   }
47 
48   int byte_size;
49   if (code_point < 0x800) {
50     byte_size = 2;
51   } else if (code_point < 0x10000) {
52     byte_size = 3;
53   } else {
54     byte_size = 4;
55   }
56 
57   static constexpr std::array<uint8_t, 3> kPrefix = {{0xc0, 0xe0, 0xf0}};
58   int order = 1 << ((byte_size - 1) * 6);
59   buffer += kPrefix[byte_size - 2] | (code_point / order);
60   for (int i = 0; i < byte_size - 1; i++) {
61     code_point = code_point % order;
62     order >>= 6;
63     buffer += 0x80 | (code_point / order);
64   }
65 }
66 
67 }  // namespace
68 
FX_UTF8Encode(WideStringView wsStr)69 ByteString FX_UTF8Encode(WideStringView wsStr) {
70   ByteString buffer;
71   for (char32_t code_point : pdfium::CodePointView(wsStr)) {
72     AppendCodePointToByteString(code_point, buffer);
73   }
74   return buffer;
75 }
76 
FX_UTF16Encode(WideStringView wsStr)77 std::u16string FX_UTF16Encode(WideStringView wsStr) {
78   if (wsStr.IsEmpty()) {
79     return {};
80   }
81 
82   std::u16string result;
83   result.reserve(wsStr.GetLength());
84 
85   for (wchar_t c : wsStr) {
86 #if defined(WCHAR_T_IS_32_BIT)
87     if (pdfium::IsSupplementary(c)) {
88       pdfium::SurrogatePair pair(c);
89       result.push_back(pair.high());
90       result.push_back(pair.low());
91       continue;
92     }
93 #endif  // defined(WCHAR_T_IS_32_BIT)
94     result.push_back(c);
95   }
96 
97   return result;
98 }
99 
100 namespace {
101 
102 template <class T>
StringTo(ByteStringView strc)103 T StringTo(ByteStringView strc) {
104   // Skip leading whitespaces.
105   size_t start = 0;
106   size_t len = strc.GetLength();
107   while (start < len && strc[start] == ' ') {
108     ++start;
109   }
110 
111   // Skip a leading '+' sign.
112   if (start < len && strc[start] == '+') {
113     ++start;
114   }
115 
116   ByteStringView sub_strc = strc.Substr(start, len - start);
117 
118   T value;
119   auto result = fast_float::from_chars(sub_strc.begin(), sub_strc.end(), value);
120 
121   // Return 0 for parsing errors. Some examples of errors are an empty string
122   // and a string that cannot be converted to T.
123   return result.ec == std::errc() || result.ec == std::errc::result_out_of_range
124              ? value
125              : 0;
126 }
127 
128 }  // namespace
129 
StringToFloat(ByteStringView strc)130 float StringToFloat(ByteStringView strc) {
131   return StringTo<float>(strc);
132 }
133 
StringToFloat(WideStringView wsStr)134 float StringToFloat(WideStringView wsStr) {
135   return StringToFloat(FX_UTF8Encode(wsStr).AsStringView());
136 }
137 
StringToDouble(ByteStringView strc)138 double StringToDouble(ByteStringView strc) {
139   return StringTo<double>(strc);
140 }
141 
StringToDouble(WideStringView wsStr)142 double StringToDouble(WideStringView wsStr) {
143   return StringToDouble(FX_UTF8Encode(wsStr).AsStringView());
144 }
145 
146 namespace fxcrt {
147 
148 template std::vector<ByteString> Split<ByteString>(const ByteString& that,
149                                                    ByteString::CharType ch);
150 template std::vector<WideString> Split<WideString>(const WideString& that,
151                                                    WideString::CharType ch);
152 
153 }  // namespace fxcrt
154