1 /*
2 * Copyright (C) 2007, 2008, 2009, 2011 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #ifndef WTF_ASCIICType_h
30 #define WTF_ASCIICType_h
31
32 #include "wtf/Assertions.h"
33
34 // The behavior of many of the functions in the <ctype.h> header is dependent
35 // on the current locale. But in the WebKit project, all uses of those functions
36 // are in code processing something that's not locale-specific. These equivalents
37 // for some of the <ctype.h> functions are named more explicitly, not dependent
38 // on the C library locale, and we should also optimize them as needed.
39
40 // All functions return false or leave the character unchanged if passed a character
41 // that is outside the range 0-7F. So they can be used on Unicode strings or
42 // characters if the intent is to do processing only if the character is ASCII.
43
44 namespace WTF {
45
isASCII(CharType c)46 template<typename CharType> inline bool isASCII(CharType c)
47 {
48 return !(c & ~0x7F);
49 }
50
isASCIIAlpha(CharType c)51 template<typename CharType> inline bool isASCIIAlpha(CharType c)
52 {
53 return (c | 0x20) >= 'a' && (c | 0x20) <= 'z';
54 }
55
isASCIIDigit(CharType c)56 template<typename CharType> inline bool isASCIIDigit(CharType c)
57 {
58 return c >= '0' && c <= '9';
59 }
60
isASCIIAlphanumeric(CharType c)61 template<typename CharType> inline bool isASCIIAlphanumeric(CharType c)
62 {
63 return isASCIIDigit(c) || isASCIIAlpha(c);
64 }
65
isASCIIHexDigit(CharType c)66 template<typename CharType> inline bool isASCIIHexDigit(CharType c)
67 {
68 return isASCIIDigit(c) || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f');
69 }
70
isASCIILower(CharType c)71 template<typename CharType> inline bool isASCIILower(CharType c)
72 {
73 return c >= 'a' && c <= 'z';
74 }
75
isASCIIOctalDigit(CharType c)76 template<typename CharType> inline bool isASCIIOctalDigit(CharType c)
77 {
78 return (c >= '0') & (c <= '7');
79 }
80
isASCIIPrintable(CharType c)81 template<typename CharType> inline bool isASCIIPrintable(CharType c)
82 {
83 return c >= ' ' && c <= '~';
84 }
85
86 /*
87 Statistics from a run of Apple's page load test for callers of isASCIISpace:
88
89 character count
90 --------- -----
91 non-spaces 689383
92 20 space 294720
93 0A \n 89059
94 09 \t 28320
95 0D \r 0
96 0C \f 0
97 0B \v 0
98 */
isASCIISpace(CharType c)99 template<typename CharType> inline bool isASCIISpace(CharType c)
100 {
101 return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9));
102 }
103
isASCIIUpper(CharType c)104 template<typename CharType> inline bool isASCIIUpper(CharType c)
105 {
106 return c >= 'A' && c <= 'Z';
107 }
108
toASCIILower(CharType c)109 template<typename CharType> inline CharType toASCIILower(CharType c)
110 {
111 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 170060610
112 // Make a workaround for VS2012 update 3 optimizer bug, remove once VS2012 fix it.
113 return (c >= 'A' && c <= 'Z') ? c + 0x20 : c;
114 #else
115 return c | ((c >= 'A' && c <= 'Z') << 5);
116 #endif
117 }
118
toASCIILowerUnchecked(CharType character)119 template<typename CharType> inline CharType toASCIILowerUnchecked(CharType character)
120 {
121 // This function can be used for comparing any input character
122 // to a lowercase English character. The isASCIIAlphaCaselessEqual
123 // below should be used for regular comparison of ASCII alpha
124 // characters, but switch statements in CSS tokenizer require
125 // direct use of this function.
126 return character | 0x20;
127 }
128
toASCIIUpper(CharType c)129 template<typename CharType> inline CharType toASCIIUpper(CharType c)
130 {
131 return c & ~((c >= 'a' && c <= 'z') << 5);
132 }
133
toASCIIHexValue(CharType c)134 template<typename CharType> inline int toASCIIHexValue(CharType c)
135 {
136 ASSERT(isASCIIHexDigit(c));
137 return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF;
138 }
139
toASCIIHexValue(CharType upperValue,CharType lowerValue)140 template<typename CharType> inline int toASCIIHexValue(CharType upperValue, CharType lowerValue)
141 {
142 ASSERT(isASCIIHexDigit(upperValue) && isASCIIHexDigit(lowerValue));
143 return ((toASCIIHexValue(upperValue) << 4) & 0xF0) | toASCIIHexValue(lowerValue);
144 }
145
lowerNibbleToASCIIHexDigit(char c)146 inline char lowerNibbleToASCIIHexDigit(char c)
147 {
148 char nibble = c & 0xF;
149 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;
150 }
151
upperNibbleToASCIIHexDigit(char c)152 inline char upperNibbleToASCIIHexDigit(char c)
153 {
154 char nibble = (c >> 4) & 0xF;
155 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;
156 }
157
isASCIIAlphaCaselessEqual(CharType cssCharacter,char character)158 template<typename CharType> inline bool isASCIIAlphaCaselessEqual(CharType cssCharacter, char character)
159 {
160 // This function compares a (preferrably) constant ASCII
161 // lowercase letter to any input character.
162 ASSERT(character >= 'a' && character <= 'z');
163 return LIKELY(toASCIILowerUnchecked(cssCharacter) == character);
164 }
165
166 }
167
168 using WTF::isASCII;
169 using WTF::isASCIIAlpha;
170 using WTF::isASCIIAlphanumeric;
171 using WTF::isASCIIDigit;
172 using WTF::isASCIIHexDigit;
173 using WTF::isASCIILower;
174 using WTF::isASCIIOctalDigit;
175 using WTF::isASCIIPrintable;
176 using WTF::isASCIISpace;
177 using WTF::isASCIIUpper;
178 using WTF::toASCIIHexValue;
179 using WTF::toASCIILower;
180 using WTF::toASCIILowerUnchecked;
181 using WTF::toASCIIUpper;
182 using WTF::lowerNibbleToASCIIHexDigit;
183 using WTF::upperNibbleToASCIIHexDigit;
184 using WTF::isASCIIAlphaCaselessEqual;
185
186 #endif
187