1const UNDEFINED_CODE_POINTS = new Set([ 2 0xff_fe, 0xff_ff, 0x1_ff_fe, 0x1_ff_ff, 0x2_ff_fe, 0x2_ff_ff, 0x3_ff_fe, 0x3_ff_ff, 0x4_ff_fe, 0x4_ff_ff, 0x5_ff_fe, 3 0x5_ff_ff, 0x6_ff_fe, 0x6_ff_ff, 0x7_ff_fe, 0x7_ff_ff, 0x8_ff_fe, 0x8_ff_ff, 0x9_ff_fe, 0x9_ff_ff, 0xa_ff_fe, 4 0xa_ff_ff, 0xb_ff_fe, 0xb_ff_ff, 0xc_ff_fe, 0xc_ff_ff, 0xd_ff_fe, 0xd_ff_ff, 0xe_ff_fe, 0xe_ff_ff, 0xf_ff_fe, 5 0xf_ff_ff, 0x10_ff_fe, 0x10_ff_ff, 6]); 7 8export const REPLACEMENT_CHARACTER = '\uFFFD'; 9 10export enum CODE_POINTS { 11 EOF = -1, 12 NULL = 0x00, 13 TABULATION = 0x09, 14 CARRIAGE_RETURN = 0x0d, 15 LINE_FEED = 0x0a, 16 FORM_FEED = 0x0c, 17 SPACE = 0x20, 18 EXCLAMATION_MARK = 0x21, 19 QUOTATION_MARK = 0x22, 20 NUMBER_SIGN = 0x23, 21 AMPERSAND = 0x26, 22 APOSTROPHE = 0x27, 23 HYPHEN_MINUS = 0x2d, 24 SOLIDUS = 0x2f, 25 DIGIT_0 = 0x30, 26 DIGIT_9 = 0x39, 27 SEMICOLON = 0x3b, 28 LESS_THAN_SIGN = 0x3c, 29 EQUALS_SIGN = 0x3d, 30 GREATER_THAN_SIGN = 0x3e, 31 QUESTION_MARK = 0x3f, 32 LATIN_CAPITAL_A = 0x41, 33 LATIN_CAPITAL_F = 0x46, 34 LATIN_CAPITAL_X = 0x58, 35 LATIN_CAPITAL_Z = 0x5a, 36 RIGHT_SQUARE_BRACKET = 0x5d, 37 GRAVE_ACCENT = 0x60, 38 LATIN_SMALL_A = 0x61, 39 LATIN_SMALL_F = 0x66, 40 LATIN_SMALL_X = 0x78, 41 LATIN_SMALL_Z = 0x7a, 42 REPLACEMENT_CHARACTER = 0xff_fd, 43} 44 45export const SEQUENCES = { 46 DASH_DASH: '--', 47 CDATA_START: '[CDATA[', 48 DOCTYPE: 'doctype', 49 SCRIPT: 'script', 50 PUBLIC: 'public', 51 SYSTEM: 'system', 52}; 53 54//Surrogates 55export function isSurrogate(cp: number): boolean { 56 return cp >= 0xd8_00 && cp <= 0xdf_ff; 57} 58 59export function isSurrogatePair(cp: number): boolean { 60 return cp >= 0xdc_00 && cp <= 0xdf_ff; 61} 62 63export function getSurrogatePairCodePoint(cp1: number, cp2: number): number { 64 return (cp1 - 0xd8_00) * 0x4_00 + 0x24_00 + cp2; 65} 66 67//NOTE: excluding NULL and ASCII whitespace 68export function isControlCodePoint(cp: number): boolean { 69 return ( 70 (cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) || 71 (cp >= 0x7f && cp <= 0x9f) 72 ); 73} 74 75export function isUndefinedCodePoint(cp: number): boolean { 76 return (cp >= 0xfd_d0 && cp <= 0xfd_ef) || UNDEFINED_CODE_POINTS.has(cp); 77} 78