• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- String to integer conversion utils ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
10 #define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
11 
12 #include "src/__support/CPP/limits.h"
13 #include "src/__support/CPP/type_traits.h"
14 #include "src/__support/common.h"
15 #include "src/__support/ctype_utils.h"
16 #include "src/__support/str_to_num_result.h"
17 #include "src/__support/uint128.h"
18 #include "src/errno/libc_errno.h" // For ERANGE
19 
20 namespace LIBC_NAMESPACE {
21 namespace internal {
22 
23 // Returns a pointer to the first character in src that is not a whitespace
24 // character (as determined by isspace())
25 // TODO: Change from returning a pointer to returning a length.
26 LIBC_INLINE const char *
27 first_non_whitespace(const char *__restrict src,
28                      size_t src_len = cpp::numeric_limits<size_t>::max()) {
29   size_t src_cur = 0;
30   while (src_cur < src_len && internal::isspace(src[src_cur])) {
31     ++src_cur;
32   }
33   return src + src_cur;
34 }
35 
b36_char_to_int(char input)36 LIBC_INLINE int b36_char_to_int(char input) {
37   if (isdigit(input))
38     return input - '0';
39   if (isalpha(input))
40     return (input | 32) + 10 - 'a';
41   return 0;
42 }
43 
44 // checks if the next 3 characters of the string pointer are the start of a
45 // hexadecimal number. Does not advance the string pointer.
46 LIBC_INLINE bool
47 is_hex_start(const char *__restrict src,
48              size_t src_len = cpp::numeric_limits<size_t>::max()) {
49   if (src_len < 3)
50     return false;
51   return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
52          b36_char_to_int(*(src + 2)) < 16;
53 }
54 
55 // Takes the address of the string pointer and parses the base from the start of
56 // it.
infer_base(const char * __restrict src,size_t src_len)57 LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
58   // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
59   // sequence of the decimal digits and the letters a (or A) through f (or F)
60   // with values 10 through 15 respectively." (C standard 6.4.4.1)
61   if (is_hex_start(src, src_len))
62     return 16;
63   // An octal number is defined as "the prefix 0 optionally followed by a
64   // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
65   // number that starts with 0, including just 0, is an octal number.
66   if (src_len > 0 && src[0] == '0')
67     return 8;
68   // A decimal number is defined as beginning "with a nonzero digit and
69   // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
70   return 10;
71 }
72 
73 // Takes a pointer to a string and the base to convert to. This function is used
74 // as the backend for all of the string to int functions.
75 template <class T>
76 LIBC_INLINE StrToNumResult<T>
77 strtointeger(const char *__restrict src, int base,
78              const size_t src_len = cpp::numeric_limits<size_t>::max()) {
79   using ResultType = typename cpp::conditional_t<(cpp::is_same_v<T, UInt128> ||
80                                                   cpp::is_same_v<T, Int128>),
81                                                  UInt128, unsigned long long>;
82 
83   ResultType result = 0;
84 
85   bool is_number = false;
86   size_t src_cur = 0;
87   int error_val = 0;
88 
89   if (src_len == 0)
90     return {0, 0, 0};
91 
92   if (base < 0 || base == 1 || base > 36)
93     return {0, 0, EINVAL};
94 
95   src_cur = first_non_whitespace(src, src_len) - src;
96 
97   char result_sign = '+';
98   if (src[src_cur] == '+' || src[src_cur] == '-') {
99     result_sign = src[src_cur];
100     ++src_cur;
101   }
102 
103   if (base == 0)
104     base = infer_base(src + src_cur, src_len - src_cur);
105 
106   if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
107     src_cur = src_cur + 2;
108 
109   constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
110   const bool is_positive = (result_sign == '+');
111 
112   ResultType constexpr NEGATIVE_MAX =
113       !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
114                    : cpp::numeric_limits<T>::max();
115   ResultType const abs_max =
116       (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
117   ResultType const abs_max_div_by_base = abs_max / base;
118 
119   while (src_cur < src_len && isalnum(src[src_cur])) {
120     int cur_digit = b36_char_to_int(src[src_cur]);
121     if (cur_digit >= base)
122       break;
123 
124     is_number = true;
125     ++src_cur;
126 
127     // If the number has already hit the maximum value for the current type then
128     // the result cannot change, but we still need to advance src to the end of
129     // the number.
130     if (result == abs_max) {
131       error_val = ERANGE;
132       continue;
133     }
134 
135     if (result > abs_max_div_by_base) {
136       result = abs_max;
137       error_val = ERANGE;
138     } else {
139       result = result * base;
140     }
141     if (result > abs_max - cur_digit) {
142       result = abs_max;
143       error_val = ERANGE;
144     } else {
145       result = result + cur_digit;
146     }
147   }
148 
149   ptrdiff_t str_len = is_number ? (src_cur) : 0;
150 
151   if (error_val == ERANGE) {
152     if (is_positive || IS_UNSIGNED)
153       return {cpp::numeric_limits<T>::max(), str_len, error_val};
154     else // T is signed and there is a negative overflow
155       return {cpp::numeric_limits<T>::min(), str_len, error_val};
156   }
157 
158   return {
159       is_positive
160           ? static_cast<T>(result)
161           : static_cast<T>(
162                 -static_cast<make_integral_or_big_int_unsigned_t<T>>(result)),
163       str_len, error_val};
164 }
165 
166 } // namespace internal
167 } // namespace LIBC_NAMESPACE
168 
169 #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
170