1 //
2 // Copyright 2017 The Abseil Authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // https://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // -----------------------------------------------------------------------------
17 // File: ascii.h
18 // -----------------------------------------------------------------------------
19 //
20 // This package contains functions operating on characters and strings
21 // restricted to standard ASCII. These include character classification
22 // functions analogous to those found in the ANSI C Standard Library <ctype.h>
23 // header file.
24 //
25 // C++ implementations provide <ctype.h> functionality based on their
26 // C environment locale. In general, reliance on such a locale is not ideal, as
27 // the locale standard is problematic (and may not return invariant information
28 // for the same character set, for example). These `ascii_*()` functions are
29 // hard-wired for standard ASCII, much faster, and guaranteed to behave
30 // consistently. They will never be overloaded, nor will their function
31 // signature change.
32 //
33 // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
34 // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
35 // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
36 // `ascii_isxdigit()`
37 // Analogous to the <ctype.h> functions with similar names, these
38 // functions take an unsigned char and return a bool, based on whether the
39 // character matches the condition specified.
40 //
41 // If the input character has a numerical value greater than 127, these
42 // functions return `false`.
43 //
44 // `ascii_tolower()`, `ascii_toupper()`
45 // Analogous to the <ctype.h> functions with similar names, these functions
46 // take an unsigned char and return a char.
47 //
48 // If the input character is not an ASCII {lower,upper}-case letter (including
49 // numerical values greater than 127) then the functions return the same value
50 // as the input character.
51
52 #ifndef ABSL_STRINGS_ASCII_H_
53 #define ABSL_STRINGS_ASCII_H_
54
55 #include <algorithm>
56 #include <cstddef>
57 #include <string>
58
59 #include "absl/base/attributes.h"
60 #include "absl/base/config.h"
61 #include "absl/strings/string_view.h"
62
63 namespace absl {
64 ABSL_NAMESPACE_BEGIN
65 namespace ascii_internal {
66
67 // Declaration for an array of bitfields holding character information.
68 ABSL_DLL extern const unsigned char kPropertyBits[256];
69
70 // Declaration for the array of characters to upper-case characters.
71 ABSL_DLL extern const char kToUpper[256];
72
73 // Declaration for the array of characters to lower-case characters.
74 ABSL_DLL extern const char kToLower[256];
75
76 } // namespace ascii_internal
77
78 // ascii_isalpha()
79 //
80 // Determines whether the given character is an alphabetic character.
ascii_isalpha(unsigned char c)81 inline bool ascii_isalpha(unsigned char c) {
82 return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
83 }
84
85 // ascii_isalnum()
86 //
87 // Determines whether the given character is an alphanumeric character.
ascii_isalnum(unsigned char c)88 inline bool ascii_isalnum(unsigned char c) {
89 return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
90 }
91
92 // ascii_isspace()
93 //
94 // Determines whether the given character is a whitespace character (space,
95 // tab, vertical tab, formfeed, linefeed, or carriage return).
ascii_isspace(unsigned char c)96 inline bool ascii_isspace(unsigned char c) {
97 return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
98 }
99
100 // ascii_ispunct()
101 //
102 // Determines whether the given character is a punctuation character.
ascii_ispunct(unsigned char c)103 inline bool ascii_ispunct(unsigned char c) {
104 return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
105 }
106
107 // ascii_isblank()
108 //
109 // Determines whether the given character is a blank character (tab or space).
ascii_isblank(unsigned char c)110 inline bool ascii_isblank(unsigned char c) {
111 return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
112 }
113
114 // ascii_iscntrl()
115 //
116 // Determines whether the given character is a control character.
ascii_iscntrl(unsigned char c)117 inline bool ascii_iscntrl(unsigned char c) {
118 return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
119 }
120
121 // ascii_isxdigit()
122 //
123 // Determines whether the given character can be represented as a hexadecimal
124 // digit character (i.e. {0-9} or {A-F}).
ascii_isxdigit(unsigned char c)125 inline bool ascii_isxdigit(unsigned char c) {
126 return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
127 }
128
129 // ascii_isdigit()
130 //
131 // Determines whether the given character can be represented as a decimal
132 // digit character (i.e. {0-9}).
ascii_isdigit(unsigned char c)133 inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
134
135 // ascii_isprint()
136 //
137 // Determines whether the given character is printable, including spaces.
ascii_isprint(unsigned char c)138 inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
139
140 // ascii_isgraph()
141 //
142 // Determines whether the given character has a graphical representation.
ascii_isgraph(unsigned char c)143 inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
144
145 // ascii_isupper()
146 //
147 // Determines whether the given character is uppercase.
ascii_isupper(unsigned char c)148 inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
149
150 // ascii_islower()
151 //
152 // Determines whether the given character is lowercase.
ascii_islower(unsigned char c)153 inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
154
155 // ascii_isascii()
156 //
157 // Determines whether the given character is ASCII.
ascii_isascii(unsigned char c)158 inline bool ascii_isascii(unsigned char c) { return c < 128; }
159
160 // ascii_tolower()
161 //
162 // Returns an ASCII character, converting to lowercase if uppercase is
163 // passed. Note that character values > 127 are simply returned.
ascii_tolower(unsigned char c)164 inline char ascii_tolower(unsigned char c) {
165 return ascii_internal::kToLower[c];
166 }
167
168 // Converts the characters in `s` to lowercase, changing the contents of `s`.
169 void AsciiStrToLower(std::string* s);
170
171 // Creates a lowercase string from a given absl::string_view.
AsciiStrToLower(absl::string_view s)172 ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
173 std::string result(s);
174 absl::AsciiStrToLower(&result);
175 return result;
176 }
177
178 // ascii_toupper()
179 //
180 // Returns the ASCII character, converting to upper-case if lower-case is
181 // passed. Note that characters values > 127 are simply returned.
ascii_toupper(unsigned char c)182 inline char ascii_toupper(unsigned char c) {
183 return ascii_internal::kToUpper[c];
184 }
185
186 // Converts the characters in `s` to uppercase, changing the contents of `s`.
187 void AsciiStrToUpper(std::string* s);
188
189 // Creates an uppercase string from a given absl::string_view.
AsciiStrToUpper(absl::string_view s)190 ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
191 std::string result(s);
192 absl::AsciiStrToUpper(&result);
193 return result;
194 }
195
196 // Returns absl::string_view with whitespace stripped from the beginning of the
197 // given string_view.
StripLeadingAsciiWhitespace(absl::string_view str)198 ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
199 absl::string_view str) {
200 auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
201 return str.substr(static_cast<size_t>(it - str.begin()));
202 }
203
204 // Strips in place whitespace from the beginning of the given string.
StripLeadingAsciiWhitespace(std::string * str)205 inline void StripLeadingAsciiWhitespace(std::string* str) {
206 auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
207 str->erase(str->begin(), it);
208 }
209
210 // Returns absl::string_view with whitespace stripped from the end of the given
211 // string_view.
StripTrailingAsciiWhitespace(absl::string_view str)212 ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
213 absl::string_view str) {
214 auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
215 return str.substr(0, static_cast<size_t>(str.rend() - it));
216 }
217
218 // Strips in place whitespace from the end of the given string
StripTrailingAsciiWhitespace(std::string * str)219 inline void StripTrailingAsciiWhitespace(std::string* str) {
220 auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
221 str->erase(static_cast<size_t>(str->rend() - it));
222 }
223
224 // Returns absl::string_view with whitespace stripped from both ends of the
225 // given string_view.
StripAsciiWhitespace(absl::string_view str)226 ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
227 absl::string_view str) {
228 return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
229 }
230
231 // Strips in place whitespace from both ends of the given string
StripAsciiWhitespace(std::string * str)232 inline void StripAsciiWhitespace(std::string* str) {
233 StripTrailingAsciiWhitespace(str);
234 StripLeadingAsciiWhitespace(str);
235 }
236
237 // Removes leading, trailing, and consecutive internal whitespace.
238 void RemoveExtraAsciiWhitespace(std::string*);
239
240 ABSL_NAMESPACE_END
241 } // namespace absl
242
243 #endif // ABSL_STRINGS_ASCII_H_
244