1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_
6 #define V8_STRINGS_CHAR_PREDICATES_INL_H_
7
8 #include "src/base/bounds.h"
9 #include "src/strings/char-predicates.h"
10 #include "src/utils/utils.h"
11
12 namespace v8 {
13 namespace internal {
14
15 // If c is in 'A'-'Z' or 'a'-'z', return its lower-case.
16 // Else, return something outside of 'A'-'Z' and 'a'-'z'.
17 // Note: it ignores LOCALE.
AsciiAlphaToLower(base::uc32 c)18 inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; }
19
IsCarriageReturn(base::uc32 c)20 inline constexpr bool IsCarriageReturn(base::uc32 c) { return c == 0x000D; }
21
IsLineFeed(base::uc32 c)22 inline constexpr bool IsLineFeed(base::uc32 c) { return c == 0x000A; }
23
IsAsciiIdentifier(base::uc32 c)24 inline constexpr bool IsAsciiIdentifier(base::uc32 c) {
25 return IsAlphaNumeric(c) || c == '$' || c == '_';
26 }
27
IsAlphaNumeric(base::uc32 c)28 inline constexpr bool IsAlphaNumeric(base::uc32 c) {
29 return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
30 }
31
IsDecimalDigit(base::uc32 c)32 inline constexpr bool IsDecimalDigit(base::uc32 c) {
33 // ECMA-262, 3rd, 7.8.3 (p 16)
34 return base::IsInRange(c, '0', '9');
35 }
36
IsHexDigit(base::uc32 c)37 inline constexpr bool IsHexDigit(base::uc32 c) {
38 // ECMA-262, 3rd, 7.6 (p 15)
39 return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f');
40 }
41
IsOctalDigit(base::uc32 c)42 inline constexpr bool IsOctalDigit(base::uc32 c) {
43 // ECMA-262, 6th, 7.8.3
44 return base::IsInRange(c, '0', '7');
45 }
46
IsNonOctalDecimalDigit(base::uc32 c)47 inline constexpr bool IsNonOctalDecimalDigit(base::uc32 c) {
48 return base::IsInRange(c, '8', '9');
49 }
50
IsBinaryDigit(base::uc32 c)51 inline constexpr bool IsBinaryDigit(base::uc32 c) {
52 // ECMA-262, 6th, 7.8.3
53 return c == '0' || c == '1';
54 }
55
IsAsciiLower(base::uc32 c)56 inline constexpr bool IsAsciiLower(base::uc32 c) {
57 return base::IsInRange(c, 'a', 'z');
58 }
59
IsAsciiUpper(base::uc32 c)60 inline constexpr bool IsAsciiUpper(base::uc32 c) {
61 return base::IsInRange(c, 'A', 'Z');
62 }
63
ToAsciiUpper(base::uc32 c)64 inline constexpr base::uc32 ToAsciiUpper(base::uc32 c) {
65 return c & ~(IsAsciiLower(c) << 5);
66 }
67
ToAsciiLower(base::uc32 c)68 inline constexpr base::uc32 ToAsciiLower(base::uc32 c) {
69 return c | (IsAsciiUpper(c) << 5);
70 }
71
IsRegExpWord(base::uc32 c)72 inline constexpr bool IsRegExpWord(base::uc32 c) {
73 return IsAlphaNumeric(c) || c == '_';
74 }
75
76 // Constexpr cache table for character flags.
77 enum OneByteCharFlags {
78 kIsIdentifierStart = 1 << 0,
79 kIsIdentifierPart = 1 << 1,
80 kIsWhiteSpace = 1 << 2,
81 kIsWhiteSpaceOrLineTerminator = 1 << 3,
82 kMaybeLineEnd = 1 << 4
83 };
84
85 // See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
86 // ID_Start. Additionally includes '_' and '$'.
IsOneByteIDStart(base::uc32 c)87 constexpr bool IsOneByteIDStart(base::uc32 c) {
88 return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F ||
89 (c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 ||
90 c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) ||
91 (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF);
92 }
93
94 // See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
95 // ID_Continue. Additionally includes '_' and '$'.
IsOneByteIDContinue(base::uc32 c)96 constexpr bool IsOneByteIDContinue(base::uc32 c) {
97 return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F ||
98 (c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) ||
99 c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA ||
100 (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
101 (c >= 0x00F8 && c <= 0x00FF);
102 }
103
IsOneByteWhitespace(base::uc32 c)104 constexpr bool IsOneByteWhitespace(base::uc32 c) {
105 return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0';
106 }
107
BuildOneByteCharFlags(base::uc32 c)108 constexpr uint8_t BuildOneByteCharFlags(base::uc32 c) {
109 uint8_t result = 0;
110 if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart;
111 if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart;
112 if (IsOneByteWhitespace(c)) {
113 result |= kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator;
114 }
115 if (c == '\r' || c == '\n') {
116 result |= kIsWhiteSpaceOrLineTerminator | kMaybeLineEnd;
117 }
118 // Add markers to identify 0x2028 and 0x2029.
119 if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) {
120 result |= kMaybeLineEnd;
121 }
122 return result;
123 }
124 const constexpr uint8_t kOneByteCharFlags[256] = {
125 #define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N),
126 INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
127 #undef BUILD_CHAR_FLAGS
128 #define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128),
129 INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
130 #undef BUILD_CHAR_FLAGS
131 };
132
IsIdentifierStart(base::uc32 c)133 bool IsIdentifierStart(base::uc32 c) {
134 if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c);
135 DCHECK_EQ(IsIdentifierStartSlow(c),
136 static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart));
137 return kOneByteCharFlags[c] & kIsIdentifierStart;
138 }
139
IsIdentifierPart(base::uc32 c)140 bool IsIdentifierPart(base::uc32 c) {
141 if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c);
142 DCHECK_EQ(IsIdentifierPartSlow(c),
143 static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart));
144 return kOneByteCharFlags[c] & kIsIdentifierPart;
145 }
146
IsWhiteSpace(base::uc32 c)147 bool IsWhiteSpace(base::uc32 c) {
148 if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c);
149 DCHECK_EQ(IsWhiteSpaceSlow(c),
150 static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace));
151 return kOneByteCharFlags[c] & kIsWhiteSpace;
152 }
153
IsWhiteSpaceOrLineTerminator(base::uc32 c)154 bool IsWhiteSpaceOrLineTerminator(base::uc32 c) {
155 if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceOrLineTerminatorSlow(c);
156 DCHECK_EQ(
157 IsWhiteSpaceOrLineTerminatorSlow(c),
158 static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator));
159 return kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator;
160 }
161
IsLineTerminatorSequence(base::uc32 c,base::uc32 next)162 bool IsLineTerminatorSequence(base::uc32 c, base::uc32 next) {
163 if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) {
164 if (c == '\n') return true;
165 if (c == '\r') return next != '\n';
166 return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u);
167 }
168 return false;
169 }
170
171 } // namespace internal
172
173 } // namespace v8
174
175 #endif // V8_STRINGS_CHAR_PREDICATES_INL_H_
176