• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "google/protobuf/io/strtod.h"
9 
10 #include <float.h>  // FLT_DIG and DBL_DIG
11 
12 #include <cmath>
13 #include <cstdio>
14 #include <cstdlib>
15 #include <cstring>
16 #include <limits>
17 #include <string>
18 #include <system_error>  // NOLINT(build/c++11)
19 
20 #include "absl/log/absl_check.h"
21 #include "absl/strings/charconv.h"
22 #include "absl/strings/numbers.h"
23 #include "absl/strings/str_format.h"
24 
25 namespace google {
26 namespace protobuf {
27 namespace io {
28 
29 // This approximately 0x1.ffffffp127, but we don't use 0x1.ffffffp127 because
30 // it won't compile in MSVC.
31 constexpr double MAX_FLOAT_AS_DOUBLE_ROUNDED = 3.4028235677973366e+38;
32 
SafeDoubleToFloat(double value)33 float SafeDoubleToFloat(double value) {
34   // static_cast<float> on a number larger than float can result in illegal
35   // instruction error, so we need to manually convert it to infinity or max.
36   if (value > std::numeric_limits<float>::max()) {
37     // Max float value is about 3.4028234664E38 when represented as a double.
38     // However, when printing float as text, it will be rounded as
39     // 3.4028235e+38. If we parse the value of 3.4028235e+38 from text and
40     // compare it to 3.4028234664E38, we may think that it is larger, but
41     // actually, any number between these two numbers could only be represented
42     // as the same max float number in float, so we should treat them the same
43     // as max float.
44     if (value <= MAX_FLOAT_AS_DOUBLE_ROUNDED) {
45       return std::numeric_limits<float>::max();
46     }
47     return std::numeric_limits<float>::infinity();
48   } else if (value < -std::numeric_limits<float>::max()) {
49     if (value >= -MAX_FLOAT_AS_DOUBLE_ROUNDED) {
50       return -std::numeric_limits<float>::max();
51     }
52     return -std::numeric_limits<float>::infinity();
53   } else {
54     return static_cast<float>(value);
55   }
56 }
57 
NoLocaleStrtod(const char * str,char ** endptr)58 double NoLocaleStrtod(const char *str, char **endptr) {
59   double ret = 0.0;
60   // This isn't ideal, but the existing function interface does not provide any
61   // bounds.
62   const char *end = strchr(str, 0);
63   auto result = absl::from_chars(str, end, ret);
64   // from_chars() with DR 3081's current wording will return max() on
65   // overflow.  SimpleAtod returns infinity instead.
66   if (result.ec == std::errc::result_out_of_range) {
67     if (ret > 1.0) {
68       ret = std::numeric_limits<double>::infinity();
69     } else if (ret < -1.0) {
70       ret = -std::numeric_limits<double>::infinity();
71     }
72   }
73   if (endptr) {
74     *endptr = const_cast<char *>(result.ptr);
75   }
76   return ret;
77 }
78 
79 // ----------------------------------------------------------------------
80 // SimpleDtoa()
81 // SimpleFtoa()
82 //    We want to print the value without losing precision, but we also do
83 //    not want to print more digits than necessary.  This turns out to be
84 //    trickier than it sounds.  Numbers like 0.2 cannot be represented
85 //    exactly in binary.  If we print 0.2 with a very large precision,
86 //    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
87 //    On the other hand, if we set the precision too low, we lose
88 //    significant digits when printing numbers that actually need them.
89 //    It turns out there is no precision value that does the right thing
90 //    for all numbers.
91 //
92 //    Our strategy is to first try printing with a precision that is never
93 //    over-precise, then parse the result with strtod() to see if it
94 //    matches.  If not, we print again with a precision that will always
95 //    give a precise result, but may use more digits than necessary.
96 //
97 //    An arguably better strategy would be to use the algorithm described
98 //    in "How to Print Floating-Point Numbers Accurately" by Steele &
99 //    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
100 //    however, that the following implementation is about as fast as
101 //    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
102 //    will not scale well on multi-core machines.  DMG's code is slightly
103 //    more accurate (in that it will never use more digits than
104 //    necessary), but this is probably irrelevant for most users.
105 //
106 //    Rob Pike and Ken Thompson also have an implementation of dtoa() in
107 //    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
108 //    one in that it makes guesses and then uses strtod() to check them.
109 //    Their implementation is faster because they use their own code to
110 //    generate the digits in the first place rather than use snprintf(),
111 //    thus avoiding format string parsing overhead.  However, this makes
112 //    it considerably more complicated than the following implementation,
113 //    and it is embedded in a larger library.  If speed turns out to be
114 //    an issue, we could re-implement this in terms of their
115 //    implementation.
116 // ----------------------------------------------------------------------
117 
118 namespace {
119 // In practice, doubles should never need more than 24 bytes and floats
120 // should never need more than 14 (including null terminators), but we
121 // overestimate to be safe.
122 constexpr int kDoubleToBufferSize = 32;
123 constexpr int kFloatToBufferSize = 24;
124 
IsValidFloatChar(char c)125 inline bool IsValidFloatChar(char c) {
126   return ('0' <= c && c <= '9') || c == 'e' || c == 'E' || c == '+' || c == '-';
127 }
128 
DelocalizeRadix(char * buffer)129 void DelocalizeRadix(char *buffer) {
130   // Fast check:  if the buffer has a normal decimal point, assume no
131   // translation is needed.
132   if (strchr(buffer, '.') != nullptr) return;
133 
134   // Find the first unknown character.
135   while (IsValidFloatChar(*buffer)) ++buffer;
136 
137   if (*buffer == '\0') {
138     // No radix character found.
139     return;
140   }
141 
142   // We are now pointing at the locale-specific radix character.  Replace it
143   // with '.'.
144   *buffer = '.';
145   ++buffer;
146 
147   if (!IsValidFloatChar(*buffer) && *buffer != '\0') {
148     // It appears the radix was a multi-byte character.  We need to remove the
149     // extra bytes.
150     char *target = buffer;
151     do {
152       ++buffer;
153     } while (!IsValidFloatChar(*buffer) && *buffer != '\0');
154     memmove(target, buffer, strlen(buffer) + 1);
155   }
156 }
157 
safe_strtof(const char * str,float * value)158 bool safe_strtof(const char *str, float *value) {
159   char *endptr;
160   errno = 0;  // errno only gets set on errors
161   *value = strtof(str, &endptr);
162   return *str != 0 && *endptr == 0 && errno == 0;
163 }
164 
FloatToBuffer(float value,char * buffer)165 char *FloatToBuffer(float value, char *buffer) {
166   // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
167   // platforms these days.  Just in case some system exists where FLT_DIG
168   // is significantly larger -- and risks overflowing our buffer -- we have
169   // this assert.
170   static_assert(FLT_DIG < 10, "FLT_DIG_is_too_big");
171 
172   if (value == std::numeric_limits<double>::infinity()) {
173     absl::SNPrintF(buffer, kFloatToBufferSize, "inf");
174     return buffer;
175   } else if (value == -std::numeric_limits<double>::infinity()) {
176     absl::SNPrintF(buffer, kFloatToBufferSize, "-inf");
177     return buffer;
178   } else if (std::isnan(value)) {
179     absl::SNPrintF(buffer, kFloatToBufferSize, "nan");
180     return buffer;
181   }
182 
183   int snprintf_result =
184       absl::SNPrintF(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
185 
186   // The snprintf should never overflow because the buffer is significantly
187   // larger than the precision we asked for.
188   ABSL_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
189 
190   float parsed_value;
191   if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
192     snprintf_result =
193         absl::SNPrintF(buffer, kFloatToBufferSize, "%.*g", FLT_DIG + 3, value);
194 
195     // Should never overflow; see above.
196     ABSL_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
197   }
198 
199   DelocalizeRadix(buffer);
200   return buffer;
201 }
202 
DoubleToBuffer(double value,char * buffer)203 char *DoubleToBuffer(double value, char *buffer) {
204   // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
205   // platforms these days.  Just in case some system exists where DBL_DIG
206   // is significantly larger -- and risks overflowing our buffer -- we have
207   // this assert.
208   static_assert(DBL_DIG < 20, "DBL_DIG_is_too_big");
209 
210   if (value == std::numeric_limits<double>::infinity()) {
211     absl::SNPrintF(buffer, kDoubleToBufferSize, "inf");
212     return buffer;
213   } else if (value == -std::numeric_limits<double>::infinity()) {
214     absl::SNPrintF(buffer, kDoubleToBufferSize, "-inf");
215     return buffer;
216   } else if (std::isnan(value)) {
217     absl::SNPrintF(buffer, kDoubleToBufferSize, "nan");
218     return buffer;
219   }
220 
221   int snprintf_result =
222       absl::SNPrintF(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
223 
224   // The snprintf should never overflow because the buffer is significantly
225   // larger than the precision we asked for.
226   ABSL_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
227 
228   // We need to make parsed_value volatile in order to force the compiler to
229   // write it out to the stack.  Otherwise, it may keep the value in a
230   // register, and if it does that, it may keep it as a long double instead
231   // of a double.  This long double may have extra bits that make it compare
232   // unequal to "value" even though it would be exactly equal if it were
233   // truncated to a double.
234   volatile double parsed_value = NoLocaleStrtod(buffer, nullptr);
235   if (parsed_value != value) {
236     snprintf_result =
237         absl::SNPrintF(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG + 2, value);
238 
239     // Should never overflow; see above.
240     ABSL_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
241   }
242 
243   DelocalizeRadix(buffer);
244   return buffer;
245 }
246 }  // namespace
247 
SimpleDtoa(double value)248 std::string SimpleDtoa(double value) {
249   char buffer[kDoubleToBufferSize];
250   return DoubleToBuffer(value, buffer);
251 }
252 
SimpleFtoa(float value)253 std::string SimpleFtoa(float value) {
254   char buffer[kFloatToBufferSize];
255   return FloatToBuffer(value, buffer);
256 }
257 
258 }  // namespace io
259 }  // namespace protobuf
260 }  // namespace google
261