• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef LIT_CHAR_HELPERS_H
17 #define LIT_CHAR_HELPERS_H
18 
19 #include "lit-globals.h"
20 
21 /*
22  * Format control characters (ECMA-262 v5, Table 1)
23  */
24 #define LIT_CHAR_ZWNJ ((ecma_char_t) 0x200C) /* zero width non-joiner */
25 #define LIT_CHAR_ZWJ  ((ecma_char_t) 0x200D) /* zero width joiner */
26 #define LIT_CHAR_BOM  ((ecma_char_t) 0xFEFF) /* byte order mark */
27 
28 /*
29  * Whitespace characters (ECMA-262 v5, Table 2)
30  */
31 #define LIT_CHAR_TAB  ((ecma_char_t) 0x0009) /* tab */
32 #define LIT_CHAR_VTAB ((ecma_char_t) 0x000B) /* vertical tab */
33 #define LIT_CHAR_FF   ((ecma_char_t) 0x000C) /* form feed */
34 #define LIT_CHAR_SP   ((ecma_char_t) 0x0020) /* space */
35 #define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
36 /* LIT_CHAR_BOM is defined above */
37 
38 bool lit_char_is_white_space (lit_code_point_t c);
39 
40 /*
41  * Line terminator characters (ECMA-262 v5, Table 3)
42  */
43 #define LIT_CHAR_LF ((ecma_char_t) 0x000A) /* line feed */
44 #define LIT_CHAR_CR ((ecma_char_t) 0x000D) /* carriage return */
45 #define LIT_CHAR_LS ((ecma_char_t) 0x2028) /* line separator */
46 #define LIT_CHAR_PS ((ecma_char_t) 0x2029) /* paragraph separator */
47 
48 bool lit_char_is_line_terminator (ecma_char_t c);
49 
50 /*
51  * String Single Character Escape Sequences (ECMA-262 v5, Table 4)
52  */
53 #define LIT_CHAR_BS           ((ecma_char_t) 0x0008) /* backspace */
54 /* LIT_CHAR_TAB is defined above */
55 /* LIT_CHAR_LF is defined above */
56 /* LIT_CHAR_VTAB is defined above */
57 /* LIT_CHAR_FF is defined above */
58 /* LIT_CHAR_CR is defined above */
59 #define LIT_CHAR_DOUBLE_QUOTE ((ecma_char_t) '"') /* double quote */
60 #define LIT_CHAR_SINGLE_QUOTE ((ecma_char_t) '\'') /* single quote */
61 #define LIT_CHAR_BACKSLASH    ((ecma_char_t) '\\') /* reverse solidus (backslash) */
62 
63 /*
64  * Comment characters (ECMA-262 v5, 7.4)
65  */
66 #define LIT_CHAR_SLASH    ((ecma_char_t) '/') /* solidus */
67 #define LIT_CHAR_ASTERISK ((ecma_char_t) '*') /* asterisk */
68 
69 /*
70  * Identifier name characters (ECMA-262 v5, 7.6)
71  */
72 #define LIT_CHAR_DOLLAR_SIGN ((ecma_char_t) '$')  /* dollar sign */
73 #define LIT_CHAR_UNDERSCORE  ((ecma_char_t) '_')  /* low line (underscore) */
74 /* LIT_CHAR_BACKSLASH defined above */
75 
76 bool lit_code_point_is_identifier_start (lit_code_point_t code_point);
77 bool lit_code_point_is_identifier_part (lit_code_point_t code_point);
78 
79 /*
80  * Punctuator characters (ECMA-262 v5, 7.7)
81  */
82 #define LIT_CHAR_LEFT_BRACE   ((ecma_char_t) '{') /* left curly bracket */
83 #define LIT_CHAR_RIGHT_BRACE  ((ecma_char_t) '}') /* right curly bracket */
84 #define LIT_CHAR_LEFT_PAREN   ((ecma_char_t) '(') /* left parenthesis */
85 #define LIT_CHAR_RIGHT_PAREN  ((ecma_char_t) ')') /* right parenthesis */
86 #define LIT_CHAR_LEFT_SQUARE  ((ecma_char_t) '[') /* left square bracket */
87 #define LIT_CHAR_RIGHT_SQUARE ((ecma_char_t) ']') /* right square bracket */
88 #define LIT_CHAR_DOT          ((ecma_char_t) '.') /* dot */
89 #define LIT_CHAR_SEMICOLON    ((ecma_char_t) ';') /* semicolon */
90 #define LIT_CHAR_COMMA        ((ecma_char_t) ',') /* comma */
91 #define LIT_CHAR_LESS_THAN    ((ecma_char_t) '<') /* less-than sign */
92 #define LIT_CHAR_GREATER_THAN ((ecma_char_t) '>') /* greater-than sign */
93 #define LIT_CHAR_EQUALS       ((ecma_char_t) '=') /* equals sign */
94 #define LIT_CHAR_PLUS         ((ecma_char_t) '+') /* plus sign */
95 #define LIT_CHAR_MINUS        ((ecma_char_t) '-') /* hyphen-minus */
96 /* LIT_CHAR_ASTERISK is defined above */
97 #define LIT_CHAR_PERCENT      ((ecma_char_t) '%') /* percent sign */
98 #define LIT_CHAR_AMPERSAND    ((ecma_char_t) '&') /* ampersand */
99 #define LIT_CHAR_VLINE        ((ecma_char_t) '|') /* vertical line */
100 #define LIT_CHAR_CIRCUMFLEX   ((ecma_char_t) '^') /* circumflex accent */
101 #define LIT_CHAR_EXCLAMATION  ((ecma_char_t) '!') /* exclamation mark */
102 #define LIT_CHAR_TILDE        ((ecma_char_t) '~') /* tilde */
103 #define LIT_CHAR_QUESTION     ((ecma_char_t) '?') /* question mark */
104 #define LIT_CHAR_COLON        ((ecma_char_t) ':') /* colon */
105 
106 /*
107  * Special characters for String.prototype.replace.
108  */
109 #define LIT_CHAR_GRAVE_ACCENT ((ecma_char_t) '`') /* grave accent */
110 
111 /**
112  * Uppercase ASCII letters
113  */
114 #define LIT_CHAR_UPPERCASE_A ((ecma_char_t) 'A')
115 #define LIT_CHAR_UPPERCASE_B ((ecma_char_t) 'B')
116 #define LIT_CHAR_UPPERCASE_C ((ecma_char_t) 'C')
117 #define LIT_CHAR_UPPERCASE_D ((ecma_char_t) 'D')
118 #define LIT_CHAR_UPPERCASE_E ((ecma_char_t) 'E')
119 #define LIT_CHAR_UPPERCASE_F ((ecma_char_t) 'F')
120 #define LIT_CHAR_UPPERCASE_G ((ecma_char_t) 'G')
121 #define LIT_CHAR_UPPERCASE_H ((ecma_char_t) 'H')
122 #define LIT_CHAR_UPPERCASE_I ((ecma_char_t) 'I')
123 #define LIT_CHAR_UPPERCASE_J ((ecma_char_t) 'J')
124 #define LIT_CHAR_UPPERCASE_K ((ecma_char_t) 'K')
125 #define LIT_CHAR_UPPERCASE_L ((ecma_char_t) 'L')
126 #define LIT_CHAR_UPPERCASE_M ((ecma_char_t) 'M')
127 #define LIT_CHAR_UPPERCASE_N ((ecma_char_t) 'N')
128 #define LIT_CHAR_UPPERCASE_O ((ecma_char_t) 'O')
129 #define LIT_CHAR_UPPERCASE_P ((ecma_char_t) 'P')
130 #define LIT_CHAR_UPPERCASE_Q ((ecma_char_t) 'Q')
131 #define LIT_CHAR_UPPERCASE_R ((ecma_char_t) 'R')
132 #define LIT_CHAR_UPPERCASE_S ((ecma_char_t) 'S')
133 #define LIT_CHAR_UPPERCASE_T ((ecma_char_t) 'T')
134 #define LIT_CHAR_UPPERCASE_U ((ecma_char_t) 'U')
135 #define LIT_CHAR_UPPERCASE_V ((ecma_char_t) 'V')
136 #define LIT_CHAR_UPPERCASE_W ((ecma_char_t) 'W')
137 #define LIT_CHAR_UPPERCASE_X ((ecma_char_t) 'X')
138 #define LIT_CHAR_UPPERCASE_Y ((ecma_char_t) 'Y')
139 #define LIT_CHAR_UPPERCASE_Z ((ecma_char_t) 'Z')
140 
141 /**
142  * Lowercase ASCII letters
143  */
144 #define LIT_CHAR_LOWERCASE_A ((ecma_char_t) 'a')
145 #define LIT_CHAR_LOWERCASE_B ((ecma_char_t) 'b')
146 #define LIT_CHAR_LOWERCASE_C ((ecma_char_t) 'c')
147 #define LIT_CHAR_LOWERCASE_D ((ecma_char_t) 'd')
148 #define LIT_CHAR_LOWERCASE_E ((ecma_char_t) 'e')
149 #define LIT_CHAR_LOWERCASE_F ((ecma_char_t) 'f')
150 #define LIT_CHAR_LOWERCASE_G ((ecma_char_t) 'g')
151 #define LIT_CHAR_LOWERCASE_H ((ecma_char_t) 'h')
152 #define LIT_CHAR_LOWERCASE_I ((ecma_char_t) 'i')
153 #define LIT_CHAR_LOWERCASE_J ((ecma_char_t) 'j')
154 #define LIT_CHAR_LOWERCASE_K ((ecma_char_t) 'k')
155 #define LIT_CHAR_LOWERCASE_L ((ecma_char_t) 'l')
156 #define LIT_CHAR_LOWERCASE_M ((ecma_char_t) 'm')
157 #define LIT_CHAR_LOWERCASE_N ((ecma_char_t) 'n')
158 #define LIT_CHAR_LOWERCASE_O ((ecma_char_t) 'o')
159 #define LIT_CHAR_LOWERCASE_P ((ecma_char_t) 'p')
160 #define LIT_CHAR_LOWERCASE_Q ((ecma_char_t) 'q')
161 #define LIT_CHAR_LOWERCASE_R ((ecma_char_t) 'r')
162 #define LIT_CHAR_LOWERCASE_S ((ecma_char_t) 's')
163 #define LIT_CHAR_LOWERCASE_T ((ecma_char_t) 't')
164 #define LIT_CHAR_LOWERCASE_U ((ecma_char_t) 'u')
165 #define LIT_CHAR_LOWERCASE_V ((ecma_char_t) 'v')
166 #define LIT_CHAR_LOWERCASE_W ((ecma_char_t) 'w')
167 #define LIT_CHAR_LOWERCASE_X ((ecma_char_t) 'x')
168 #define LIT_CHAR_LOWERCASE_Y ((ecma_char_t) 'y')
169 #define LIT_CHAR_LOWERCASE_Z ((ecma_char_t) 'z')
170 
171 /**
172  * ASCII decimal digits
173  */
174 #define LIT_CHAR_0    ((ecma_char_t) '0')
175 #define LIT_CHAR_1    ((ecma_char_t) '1')
176 #define LIT_CHAR_2    ((ecma_char_t) '2')
177 #define LIT_CHAR_3    ((ecma_char_t) '3')
178 #define LIT_CHAR_4    ((ecma_char_t) '4')
179 #define LIT_CHAR_5    ((ecma_char_t) '5')
180 #define LIT_CHAR_6    ((ecma_char_t) '6')
181 #define LIT_CHAR_7    ((ecma_char_t) '7')
182 #define LIT_CHAR_8    ((ecma_char_t) '8')
183 #define LIT_CHAR_9    ((ecma_char_t) '9')
184 
185 /**
186  * ASCII character ranges
187  */
188 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN      LIT_CHAR_UPPERCASE_A /* uppercase letters range */
189 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_END        LIT_CHAR_UPPERCASE_Z
190 
191 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN      LIT_CHAR_LOWERCASE_A /* lowercase letters range */
192 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_END        LIT_CHAR_LOWERCASE_Z
193 
194 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN  LIT_CHAR_UPPERCASE_A /* uppercase letters for
195                                                                           * hexadecimal digits range */
196 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END    LIT_CHAR_UPPERCASE_F
197 
198 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN  LIT_CHAR_LOWERCASE_A /* lowercase letters for
199                                                                           * hexadecimal digits range */
200 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END    LIT_CHAR_LOWERCASE_F
201 
202 #define LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN           LIT_CHAR_0           /* octal digits range */
203 #define LIT_CHAR_ASCII_OCTAL_DIGITS_END             LIT_CHAR_7
204 
205 #define LIT_CHAR_ASCII_DIGITS_BEGIN                 LIT_CHAR_0           /* decimal digits range */
206 #define LIT_CHAR_ASCII_DIGITS_END                   LIT_CHAR_9
207 
208 #define LEXER_TO_ASCII_LOWERCASE(character) ((character) | LIT_CHAR_SP)
209 
210 bool lit_char_is_octal_digit (ecma_char_t c);
211 bool lit_char_is_decimal_digit (ecma_char_t c);
212 bool lit_char_is_hex_digit (ecma_char_t c);
213 #if ENABLED (JERRY_ES2015)
214 bool lit_char_is_binary_digit (ecma_char_t c);
215 #endif /* ENABLED (JERRY_ES2015) */
216 uint32_t lit_char_hex_to_int (ecma_char_t c);
217 size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
218 size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
219 void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p);
220 uint32_t lit_char_hex_lookup (const lit_utf8_byte_t *buf_p, const lit_utf8_byte_t *const buf_end_p, uint32_t lookup);
221 uint32_t lit_parse_decimal (const lit_utf8_byte_t **buffer_p, const lit_utf8_byte_t *const buffer_end_p);
222 
223 /**
224  * Null character
225  */
226 #define LIT_CHAR_NULL  ((ecma_char_t) '\0')
227 
228 /*
229  * Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3)
230  */
231 bool lit_char_is_word_char (lit_code_point_t c);
232 
233 /*
234  * Utility functions for uppercasing / lowercasing
235  */
236 
237 /**
238  * Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
239  */
240 #define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
241 
242 ecma_length_t lit_char_to_lower_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
243 ecma_length_t lit_char_to_upper_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
244 
245 #endif /* !LIT_CHAR_HELPERS_H */
246