1 /* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef LIT_CHAR_HELPERS_H 17 #define LIT_CHAR_HELPERS_H 18 19 #include "lit-globals.h" 20 21 /* 22 * Format control characters (ECMA-262 v5, Table 1) 23 */ 24 #define LIT_CHAR_ZWNJ ((ecma_char_t) 0x200C) /* zero width non-joiner */ 25 #define LIT_CHAR_ZWJ ((ecma_char_t) 0x200D) /* zero width joiner */ 26 #define LIT_CHAR_BOM ((ecma_char_t) 0xFEFF) /* byte order mark */ 27 28 /* 29 * Whitespace characters (ECMA-262 v5, Table 2) 30 */ 31 #define LIT_CHAR_TAB ((ecma_char_t) 0x0009) /* tab */ 32 #define LIT_CHAR_VTAB ((ecma_char_t) 0x000B) /* vertical tab */ 33 #define LIT_CHAR_FF ((ecma_char_t) 0x000C) /* form feed */ 34 #define LIT_CHAR_SP ((ecma_char_t) 0x0020) /* space */ 35 #define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */ 36 /* LIT_CHAR_BOM is defined above */ 37 38 bool lit_char_is_white_space (lit_code_point_t c); 39 40 /* 41 * Line terminator characters (ECMA-262 v5, Table 3) 42 */ 43 #define LIT_CHAR_LF ((ecma_char_t) 0x000A) /* line feed */ 44 #define LIT_CHAR_CR ((ecma_char_t) 0x000D) /* carriage return */ 45 #define LIT_CHAR_LS ((ecma_char_t) 0x2028) /* line separator */ 46 #define LIT_CHAR_PS ((ecma_char_t) 0x2029) /* paragraph separator */ 47 48 bool lit_char_is_line_terminator (ecma_char_t c); 49 50 /* 51 * String Single Character Escape Sequences (ECMA-262 v5, Table 4) 52 */ 53 #define LIT_CHAR_BS ((ecma_char_t) 0x0008) /* backspace */ 54 /* LIT_CHAR_TAB is defined above */ 55 /* LIT_CHAR_LF is defined above */ 56 /* LIT_CHAR_VTAB is defined above */ 57 /* LIT_CHAR_FF is defined above */ 58 /* LIT_CHAR_CR is defined above */ 59 #define LIT_CHAR_DOUBLE_QUOTE ((ecma_char_t) '"') /* double quote */ 60 #define LIT_CHAR_SINGLE_QUOTE ((ecma_char_t) '\'') /* single quote */ 61 #define LIT_CHAR_BACKSLASH ((ecma_char_t) '\\') /* reverse solidus (backslash) */ 62 63 /* 64 * Comment characters (ECMA-262 v5, 7.4) 65 */ 66 #define LIT_CHAR_SLASH ((ecma_char_t) '/') /* solidus */ 67 #define LIT_CHAR_ASTERISK ((ecma_char_t) '*') /* asterisk */ 68 69 /* 70 * Identifier name characters (ECMA-262 v5, 7.6) 71 */ 72 #define LIT_CHAR_DOLLAR_SIGN ((ecma_char_t) '$') /* dollar sign */ 73 #define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */ 74 /* LIT_CHAR_BACKSLASH defined above */ 75 76 bool lit_code_point_is_identifier_start (lit_code_point_t code_point); 77 bool lit_code_point_is_identifier_part (lit_code_point_t code_point); 78 79 /* 80 * Punctuator characters (ECMA-262 v5, 7.7) 81 */ 82 #define LIT_CHAR_LEFT_BRACE ((ecma_char_t) '{') /* left curly bracket */ 83 #define LIT_CHAR_RIGHT_BRACE ((ecma_char_t) '}') /* right curly bracket */ 84 #define LIT_CHAR_LEFT_PAREN ((ecma_char_t) '(') /* left parenthesis */ 85 #define LIT_CHAR_RIGHT_PAREN ((ecma_char_t) ')') /* right parenthesis */ 86 #define LIT_CHAR_LEFT_SQUARE ((ecma_char_t) '[') /* left square bracket */ 87 #define LIT_CHAR_RIGHT_SQUARE ((ecma_char_t) ']') /* right square bracket */ 88 #define LIT_CHAR_DOT ((ecma_char_t) '.') /* dot */ 89 #define LIT_CHAR_SEMICOLON ((ecma_char_t) ';') /* semicolon */ 90 #define LIT_CHAR_COMMA ((ecma_char_t) ',') /* comma */ 91 #define LIT_CHAR_LESS_THAN ((ecma_char_t) '<') /* less-than sign */ 92 #define LIT_CHAR_GREATER_THAN ((ecma_char_t) '>') /* greater-than sign */ 93 #define LIT_CHAR_EQUALS ((ecma_char_t) '=') /* equals sign */ 94 #define LIT_CHAR_PLUS ((ecma_char_t) '+') /* plus sign */ 95 #define LIT_CHAR_MINUS ((ecma_char_t) '-') /* hyphen-minus */ 96 /* LIT_CHAR_ASTERISK is defined above */ 97 #define LIT_CHAR_PERCENT ((ecma_char_t) '%') /* percent sign */ 98 #define LIT_CHAR_AMPERSAND ((ecma_char_t) '&') /* ampersand */ 99 #define LIT_CHAR_VLINE ((ecma_char_t) '|') /* vertical line */ 100 #define LIT_CHAR_CIRCUMFLEX ((ecma_char_t) '^') /* circumflex accent */ 101 #define LIT_CHAR_EXCLAMATION ((ecma_char_t) '!') /* exclamation mark */ 102 #define LIT_CHAR_TILDE ((ecma_char_t) '~') /* tilde */ 103 #define LIT_CHAR_QUESTION ((ecma_char_t) '?') /* question mark */ 104 #define LIT_CHAR_COLON ((ecma_char_t) ':') /* colon */ 105 106 /* 107 * Special characters for String.prototype.replace. 108 */ 109 #define LIT_CHAR_GRAVE_ACCENT ((ecma_char_t) '`') /* grave accent */ 110 111 /** 112 * Uppercase ASCII letters 113 */ 114 #define LIT_CHAR_UPPERCASE_A ((ecma_char_t) 'A') 115 #define LIT_CHAR_UPPERCASE_B ((ecma_char_t) 'B') 116 #define LIT_CHAR_UPPERCASE_C ((ecma_char_t) 'C') 117 #define LIT_CHAR_UPPERCASE_D ((ecma_char_t) 'D') 118 #define LIT_CHAR_UPPERCASE_E ((ecma_char_t) 'E') 119 #define LIT_CHAR_UPPERCASE_F ((ecma_char_t) 'F') 120 #define LIT_CHAR_UPPERCASE_G ((ecma_char_t) 'G') 121 #define LIT_CHAR_UPPERCASE_H ((ecma_char_t) 'H') 122 #define LIT_CHAR_UPPERCASE_I ((ecma_char_t) 'I') 123 #define LIT_CHAR_UPPERCASE_J ((ecma_char_t) 'J') 124 #define LIT_CHAR_UPPERCASE_K ((ecma_char_t) 'K') 125 #define LIT_CHAR_UPPERCASE_L ((ecma_char_t) 'L') 126 #define LIT_CHAR_UPPERCASE_M ((ecma_char_t) 'M') 127 #define LIT_CHAR_UPPERCASE_N ((ecma_char_t) 'N') 128 #define LIT_CHAR_UPPERCASE_O ((ecma_char_t) 'O') 129 #define LIT_CHAR_UPPERCASE_P ((ecma_char_t) 'P') 130 #define LIT_CHAR_UPPERCASE_Q ((ecma_char_t) 'Q') 131 #define LIT_CHAR_UPPERCASE_R ((ecma_char_t) 'R') 132 #define LIT_CHAR_UPPERCASE_S ((ecma_char_t) 'S') 133 #define LIT_CHAR_UPPERCASE_T ((ecma_char_t) 'T') 134 #define LIT_CHAR_UPPERCASE_U ((ecma_char_t) 'U') 135 #define LIT_CHAR_UPPERCASE_V ((ecma_char_t) 'V') 136 #define LIT_CHAR_UPPERCASE_W ((ecma_char_t) 'W') 137 #define LIT_CHAR_UPPERCASE_X ((ecma_char_t) 'X') 138 #define LIT_CHAR_UPPERCASE_Y ((ecma_char_t) 'Y') 139 #define LIT_CHAR_UPPERCASE_Z ((ecma_char_t) 'Z') 140 141 /** 142 * Lowercase ASCII letters 143 */ 144 #define LIT_CHAR_LOWERCASE_A ((ecma_char_t) 'a') 145 #define LIT_CHAR_LOWERCASE_B ((ecma_char_t) 'b') 146 #define LIT_CHAR_LOWERCASE_C ((ecma_char_t) 'c') 147 #define LIT_CHAR_LOWERCASE_D ((ecma_char_t) 'd') 148 #define LIT_CHAR_LOWERCASE_E ((ecma_char_t) 'e') 149 #define LIT_CHAR_LOWERCASE_F ((ecma_char_t) 'f') 150 #define LIT_CHAR_LOWERCASE_G ((ecma_char_t) 'g') 151 #define LIT_CHAR_LOWERCASE_H ((ecma_char_t) 'h') 152 #define LIT_CHAR_LOWERCASE_I ((ecma_char_t) 'i') 153 #define LIT_CHAR_LOWERCASE_J ((ecma_char_t) 'j') 154 #define LIT_CHAR_LOWERCASE_K ((ecma_char_t) 'k') 155 #define LIT_CHAR_LOWERCASE_L ((ecma_char_t) 'l') 156 #define LIT_CHAR_LOWERCASE_M ((ecma_char_t) 'm') 157 #define LIT_CHAR_LOWERCASE_N ((ecma_char_t) 'n') 158 #define LIT_CHAR_LOWERCASE_O ((ecma_char_t) 'o') 159 #define LIT_CHAR_LOWERCASE_P ((ecma_char_t) 'p') 160 #define LIT_CHAR_LOWERCASE_Q ((ecma_char_t) 'q') 161 #define LIT_CHAR_LOWERCASE_R ((ecma_char_t) 'r') 162 #define LIT_CHAR_LOWERCASE_S ((ecma_char_t) 's') 163 #define LIT_CHAR_LOWERCASE_T ((ecma_char_t) 't') 164 #define LIT_CHAR_LOWERCASE_U ((ecma_char_t) 'u') 165 #define LIT_CHAR_LOWERCASE_V ((ecma_char_t) 'v') 166 #define LIT_CHAR_LOWERCASE_W ((ecma_char_t) 'w') 167 #define LIT_CHAR_LOWERCASE_X ((ecma_char_t) 'x') 168 #define LIT_CHAR_LOWERCASE_Y ((ecma_char_t) 'y') 169 #define LIT_CHAR_LOWERCASE_Z ((ecma_char_t) 'z') 170 171 /** 172 * ASCII decimal digits 173 */ 174 #define LIT_CHAR_0 ((ecma_char_t) '0') 175 #define LIT_CHAR_1 ((ecma_char_t) '1') 176 #define LIT_CHAR_2 ((ecma_char_t) '2') 177 #define LIT_CHAR_3 ((ecma_char_t) '3') 178 #define LIT_CHAR_4 ((ecma_char_t) '4') 179 #define LIT_CHAR_5 ((ecma_char_t) '5') 180 #define LIT_CHAR_6 ((ecma_char_t) '6') 181 #define LIT_CHAR_7 ((ecma_char_t) '7') 182 #define LIT_CHAR_8 ((ecma_char_t) '8') 183 #define LIT_CHAR_9 ((ecma_char_t) '9') 184 185 /** 186 * ASCII character ranges 187 */ 188 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters range */ 189 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_END LIT_CHAR_UPPERCASE_Z 190 191 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters range */ 192 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_END LIT_CHAR_LOWERCASE_Z 193 194 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters for 195 * hexadecimal digits range */ 196 #define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END LIT_CHAR_UPPERCASE_F 197 198 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters for 199 * hexadecimal digits range */ 200 #define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END LIT_CHAR_LOWERCASE_F 201 202 #define LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN LIT_CHAR_0 /* octal digits range */ 203 #define LIT_CHAR_ASCII_OCTAL_DIGITS_END LIT_CHAR_7 204 205 #define LIT_CHAR_ASCII_DIGITS_BEGIN LIT_CHAR_0 /* decimal digits range */ 206 #define LIT_CHAR_ASCII_DIGITS_END LIT_CHAR_9 207 208 #define LEXER_TO_ASCII_LOWERCASE(character) ((character) | LIT_CHAR_SP) 209 210 bool lit_char_is_octal_digit (ecma_char_t c); 211 bool lit_char_is_decimal_digit (ecma_char_t c); 212 bool lit_char_is_hex_digit (ecma_char_t c); 213 #if ENABLED (JERRY_ES2015) 214 bool lit_char_is_binary_digit (ecma_char_t c); 215 #endif /* ENABLED (JERRY_ES2015) */ 216 uint32_t lit_char_hex_to_int (ecma_char_t c); 217 size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point); 218 size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point); 219 void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p); 220 uint32_t lit_char_hex_lookup (const lit_utf8_byte_t *buf_p, const lit_utf8_byte_t *const buf_end_p, uint32_t lookup); 221 uint32_t lit_parse_decimal (const lit_utf8_byte_t **buffer_p, const lit_utf8_byte_t *const buffer_end_p); 222 223 /** 224 * Null character 225 */ 226 #define LIT_CHAR_NULL ((ecma_char_t) '\0') 227 228 /* 229 * Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3) 230 */ 231 bool lit_char_is_word_char (lit_code_point_t c); 232 233 /* 234 * Utility functions for uppercasing / lowercasing 235 */ 236 237 /** 238 * Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions. 239 */ 240 #define LIT_MAXIMUM_OTHER_CASE_LENGTH (3) 241 242 ecma_length_t lit_char_to_lower_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size); 243 ecma_length_t lit_char_to_upper_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size); 244 245 #endif /* !LIT_CHAR_HELPERS_H */ 246