• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/**
2 * @author Toru Nagashima <https://github.com/mysticatea>
3 */
4"use strict";
5
6const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
7const { RegExpParser, visitRegExpAST } = require("regexpp");
8const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
9
10//------------------------------------------------------------------------------
11// Helpers
12//------------------------------------------------------------------------------
13
14/**
15 * Iterate character sequences of a given nodes.
16 *
17 * CharacterClassRange syntax can steal a part of character sequence,
18 * so this function reverts CharacterClassRange syntax and restore the sequence.
19 * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
20 * @returns {IterableIterator<number[]>} The list of character sequences.
21 */
22function *iterateCharacterSequence(nodes) {
23    let seq = [];
24
25    for (const node of nodes) {
26        switch (node.type) {
27            case "Character":
28                seq.push(node.value);
29                break;
30
31            case "CharacterClassRange":
32                seq.push(node.min.value);
33                yield seq;
34                seq = [node.max.value];
35                break;
36
37            case "CharacterSet":
38                if (seq.length > 0) {
39                    yield seq;
40                    seq = [];
41                }
42                break;
43
44            // no default
45        }
46    }
47
48    if (seq.length > 0) {
49        yield seq;
50    }
51}
52
53const hasCharacterSequence = {
54    surrogatePairWithoutUFlag(chars) {
55        return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c));
56    },
57
58    combiningClass(chars) {
59        return chars.some((c, i) => (
60            i !== 0 &&
61            isCombiningCharacter(c) &&
62            !isCombiningCharacter(chars[i - 1])
63        ));
64    },
65
66    emojiModifier(chars) {
67        return chars.some((c, i) => (
68            i !== 0 &&
69            isEmojiModifier(c) &&
70            !isEmojiModifier(chars[i - 1])
71        ));
72    },
73
74    regionalIndicatorSymbol(chars) {
75        return chars.some((c, i) => (
76            i !== 0 &&
77            isRegionalIndicatorSymbol(c) &&
78            isRegionalIndicatorSymbol(chars[i - 1])
79        ));
80    },
81
82    zwj(chars) {
83        const lastIndex = chars.length - 1;
84
85        return chars.some((c, i) => (
86            i !== 0 &&
87            i !== lastIndex &&
88            c === 0x200d &&
89            chars[i - 1] !== 0x200d &&
90            chars[i + 1] !== 0x200d
91        ));
92    }
93};
94
95const kinds = Object.keys(hasCharacterSequence);
96
97//------------------------------------------------------------------------------
98// Rule Definition
99//------------------------------------------------------------------------------
100
101module.exports = {
102    meta: {
103        type: "problem",
104
105        docs: {
106            description: "disallow characters which are made with multiple code points in character class syntax",
107            category: "Possible Errors",
108            recommended: true,
109            url: "https://eslint.org/docs/rules/no-misleading-character-class"
110        },
111
112        schema: [],
113
114        messages: {
115            surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
116            combiningClass: "Unexpected combined character in character class.",
117            emojiModifier: "Unexpected modified Emoji in character class.",
118            regionalIndicatorSymbol: "Unexpected national flag in character class.",
119            zwj: "Unexpected joined character sequence in character class."
120        }
121    },
122    create(context) {
123        const parser = new RegExpParser();
124
125        /**
126         * Verify a given regular expression.
127         * @param {Node} node The node to report.
128         * @param {string} pattern The regular expression pattern to verify.
129         * @param {string} flags The flags of the regular expression.
130         * @returns {void}
131         */
132        function verify(node, pattern, flags) {
133            const has = {
134                surrogatePairWithoutUFlag: false,
135                combiningClass: false,
136                variationSelector: false,
137                emojiModifier: false,
138                regionalIndicatorSymbol: false,
139                zwj: false
140            };
141            let patternNode;
142
143            try {
144                patternNode = parser.parsePattern(
145                    pattern,
146                    0,
147                    pattern.length,
148                    flags.includes("u")
149                );
150            } catch {
151
152                // Ignore regular expressions with syntax errors
153                return;
154            }
155
156            visitRegExpAST(patternNode, {
157                onCharacterClassEnter(ccNode) {
158                    for (const chars of iterateCharacterSequence(ccNode.elements)) {
159                        for (const kind of kinds) {
160                            has[kind] = has[kind] || hasCharacterSequence[kind](chars);
161                        }
162                    }
163                }
164            });
165
166            for (const kind of kinds) {
167                if (has[kind]) {
168                    context.report({ node, messageId: kind });
169                }
170            }
171        }
172
173        return {
174            "Literal[regex]"(node) {
175                verify(node, node.regex.pattern, node.regex.flags);
176            },
177            "Program"() {
178                const scope = context.getScope();
179                const tracker = new ReferenceTracker(scope);
180
181                /*
182                 * Iterate calls of RegExp.
183                 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
184                 *       `const {RegExp: a} = window; new a()`, etc...
185                 */
186                for (const { node } of tracker.iterateGlobalReferences({
187                    RegExp: { [CALL]: true, [CONSTRUCT]: true }
188                })) {
189                    const [patternNode, flagsNode] = node.arguments;
190                    const pattern = getStringIfConstant(patternNode, scope);
191                    const flags = getStringIfConstant(flagsNode, scope);
192
193                    if (typeof pattern === "string") {
194                        verify(node, pattern, flags || "");
195                    }
196                }
197            }
198        };
199    }
200};
201