1/** 2 * @author Toru Nagashima <https://github.com/mysticatea> 3 */ 4"use strict"; 5 6const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils"); 7const { RegExpParser, visitRegExpAST } = require("regexpp"); 8const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); 9 10//------------------------------------------------------------------------------ 11// Helpers 12//------------------------------------------------------------------------------ 13 14/** 15 * Iterate character sequences of a given nodes. 16 * 17 * CharacterClassRange syntax can steal a part of character sequence, 18 * so this function reverts CharacterClassRange syntax and restore the sequence. 19 * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences. 20 * @returns {IterableIterator<number[]>} The list of character sequences. 21 */ 22function *iterateCharacterSequence(nodes) { 23 let seq = []; 24 25 for (const node of nodes) { 26 switch (node.type) { 27 case "Character": 28 seq.push(node.value); 29 break; 30 31 case "CharacterClassRange": 32 seq.push(node.min.value); 33 yield seq; 34 seq = [node.max.value]; 35 break; 36 37 case "CharacterSet": 38 if (seq.length > 0) { 39 yield seq; 40 seq = []; 41 } 42 break; 43 44 // no default 45 } 46 } 47 48 if (seq.length > 0) { 49 yield seq; 50 } 51} 52 53const hasCharacterSequence = { 54 surrogatePairWithoutUFlag(chars) { 55 return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c)); 56 }, 57 58 combiningClass(chars) { 59 return chars.some((c, i) => ( 60 i !== 0 && 61 isCombiningCharacter(c) && 62 !isCombiningCharacter(chars[i - 1]) 63 )); 64 }, 65 66 emojiModifier(chars) { 67 return chars.some((c, i) => ( 68 i !== 0 && 69 isEmojiModifier(c) && 70 !isEmojiModifier(chars[i - 1]) 71 )); 72 }, 73 74 regionalIndicatorSymbol(chars) { 75 return chars.some((c, i) => ( 76 i !== 0 && 77 isRegionalIndicatorSymbol(c) && 78 isRegionalIndicatorSymbol(chars[i - 1]) 79 )); 80 }, 81 82 zwj(chars) { 83 const lastIndex = chars.length - 1; 84 85 return chars.some((c, i) => ( 86 i !== 0 && 87 i !== lastIndex && 88 c === 0x200d && 89 chars[i - 1] !== 0x200d && 90 chars[i + 1] !== 0x200d 91 )); 92 } 93}; 94 95const kinds = Object.keys(hasCharacterSequence); 96 97//------------------------------------------------------------------------------ 98// Rule Definition 99//------------------------------------------------------------------------------ 100 101module.exports = { 102 meta: { 103 type: "problem", 104 105 docs: { 106 description: "disallow characters which are made with multiple code points in character class syntax", 107 category: "Possible Errors", 108 recommended: true, 109 url: "https://eslint.org/docs/rules/no-misleading-character-class" 110 }, 111 112 schema: [], 113 114 messages: { 115 surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.", 116 combiningClass: "Unexpected combined character in character class.", 117 emojiModifier: "Unexpected modified Emoji in character class.", 118 regionalIndicatorSymbol: "Unexpected national flag in character class.", 119 zwj: "Unexpected joined character sequence in character class." 120 } 121 }, 122 create(context) { 123 const parser = new RegExpParser(); 124 125 /** 126 * Verify a given regular expression. 127 * @param {Node} node The node to report. 128 * @param {string} pattern The regular expression pattern to verify. 129 * @param {string} flags The flags of the regular expression. 130 * @returns {void} 131 */ 132 function verify(node, pattern, flags) { 133 const has = { 134 surrogatePairWithoutUFlag: false, 135 combiningClass: false, 136 variationSelector: false, 137 emojiModifier: false, 138 regionalIndicatorSymbol: false, 139 zwj: false 140 }; 141 let patternNode; 142 143 try { 144 patternNode = parser.parsePattern( 145 pattern, 146 0, 147 pattern.length, 148 flags.includes("u") 149 ); 150 } catch { 151 152 // Ignore regular expressions with syntax errors 153 return; 154 } 155 156 visitRegExpAST(patternNode, { 157 onCharacterClassEnter(ccNode) { 158 for (const chars of iterateCharacterSequence(ccNode.elements)) { 159 for (const kind of kinds) { 160 has[kind] = has[kind] || hasCharacterSequence[kind](chars); 161 } 162 } 163 } 164 }); 165 166 for (const kind of kinds) { 167 if (has[kind]) { 168 context.report({ node, messageId: kind }); 169 } 170 } 171 } 172 173 return { 174 "Literal[regex]"(node) { 175 verify(node, node.regex.pattern, node.regex.flags); 176 }, 177 "Program"() { 178 const scope = context.getScope(); 179 const tracker = new ReferenceTracker(scope); 180 181 /* 182 * Iterate calls of RegExp. 183 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`, 184 * `const {RegExp: a} = window; new a()`, etc... 185 */ 186 for (const { node } of tracker.iterateGlobalReferences({ 187 RegExp: { [CALL]: true, [CONSTRUCT]: true } 188 })) { 189 const [patternNode, flagsNode] = node.arguments; 190 const pattern = getStringIfConstant(patternNode, scope); 191 const flags = getStringIfConstant(flagsNode, scope); 192 193 if (typeof pattern === "string") { 194 verify(node, pattern, flags || ""); 195 } 196 } 197 } 198 }; 199 } 200}; 201