1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_STRINGS_CHAR_PREDICATES_H_
6 #define V8_STRINGS_CHAR_PREDICATES_H_
7
8 #include "src/common/globals.h"
9 #include "src/strings/unicode.h"
10
11 namespace v8 {
12 namespace internal {
13
14 // Unicode character predicates as defined by ECMA-262, 3rd,
15 // used for lexical analysis.
16
17 inline constexpr int AsciiAlphaToLower(uc32 c);
18 inline constexpr bool IsCarriageReturn(uc32 c);
19 inline constexpr bool IsLineFeed(uc32 c);
20 inline constexpr bool IsAsciiIdentifier(uc32 c);
21 inline constexpr bool IsAlphaNumeric(uc32 c);
22 inline constexpr bool IsDecimalDigit(uc32 c);
23 inline constexpr bool IsHexDigit(uc32 c);
24 inline constexpr bool IsOctalDigit(uc32 c);
25 inline constexpr bool IsBinaryDigit(uc32 c);
26 inline constexpr bool IsRegExpWord(uc32 c);
27
28 inline constexpr bool IsAsciiLower(uc32 ch);
29 inline constexpr bool IsAsciiUpper(uc32 ch);
30
31 inline constexpr uc32 ToAsciiUpper(uc32 ch);
32 inline constexpr uc32 ToAsciiLower(uc32 ch);
33
34 // ES#sec-names-and-keywords
35 // This includes '_', '$' and '\', and ID_Start according to
36 // http://www.unicode.org/reports/tr31/, which consists of categories
37 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
38 // 'Pattern_Syntax' or 'Pattern_White_Space'.
39 inline bool IsIdentifierStart(uc32 c);
40 #ifdef V8_INTL_SUPPORT
41 V8_EXPORT_PRIVATE bool IsIdentifierStartSlow(uc32 c);
42 #else
IsIdentifierStartSlow(uc32 c)43 inline bool IsIdentifierStartSlow(uc32 c) {
44 // Non-BMP characters are not supported without I18N.
45 return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false;
46 }
47 #endif
48
49 // ES#sec-names-and-keywords
50 // This includes \u200c and \u200d, and ID_Continue according to
51 // http://www.unicode.org/reports/tr31/, which consists of ID_Start,
52 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
53 // 'Pattern_Syntax' or 'Pattern_White_Space'.
54 inline bool IsIdentifierPart(uc32 c);
55 #ifdef V8_INTL_SUPPORT
56 V8_EXPORT_PRIVATE bool IsIdentifierPartSlow(uc32 c);
57 #else
IsIdentifierPartSlow(uc32 c)58 inline bool IsIdentifierPartSlow(uc32 c) {
59 // Non-BMP charaacters are not supported without I18N.
60 if (c <= 0xFFFF) {
61 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
62 }
63 return false;
64 }
65 #endif
66
67 // ES6 draft section 11.2
68 // This includes all code points of Unicode category 'Zs'.
69 // Further included are \u0009, \u000b, \u000c, and \ufeff.
70 inline bool IsWhiteSpace(uc32 c);
71 #ifdef V8_INTL_SUPPORT
72 V8_EXPORT_PRIVATE bool IsWhiteSpaceSlow(uc32 c);
73 #else
IsWhiteSpaceSlow(uc32 c)74 inline bool IsWhiteSpaceSlow(uc32 c) { return unibrow::WhiteSpace::Is(c); }
75 #endif
76
77 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
78 // This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp)
79 // as well as \u0009 - \u000d and \ufeff.
80 inline bool IsWhiteSpaceOrLineTerminator(uc32 c);
IsWhiteSpaceOrLineTerminatorSlow(uc32 c)81 inline bool IsWhiteSpaceOrLineTerminatorSlow(uc32 c) {
82 return IsWhiteSpaceSlow(c) || unibrow::IsLineTerminator(c);
83 }
84
85 inline bool IsLineTerminatorSequence(uc32 c, uc32 next);
86
87 } // namespace internal
88 } // namespace v8
89
90 #endif // V8_STRINGS_CHAR_PREDICATES_H_
91