• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// translate the various posix character classes into unicode properties
2// this works across all unicode locales
3// { <posix class>: [<translation>, /u flag required, negated]
4const posixClasses = {
5    '[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true],
6    '[:alpha:]': ['\\p{L}\\p{Nl}', true],
7    '[:ascii:]': ['\\x' + '00-\\x' + '7f', false],
8    '[:blank:]': ['\\p{Zs}\\t', true],
9    '[:cntrl:]': ['\\p{Cc}', true],
10    '[:digit:]': ['\\p{Nd}', true],
11    '[:graph:]': ['\\p{Z}\\p{C}', true, true],
12    '[:lower:]': ['\\p{Ll}', true],
13    '[:print:]': ['\\p{C}', true],
14    '[:punct:]': ['\\p{P}', true],
15    '[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true],
16    '[:upper:]': ['\\p{Lu}', true],
17    '[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true],
18    '[:xdigit:]': ['A-Fa-f0-9', false],
19};
20// only need to escape a few things inside of brace expressions
21// escapes: [ \ ] -
22const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&');
23// escape all regexp magic characters
24const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
25// everything has already been escaped, we just have to join
26const rangesToString = (ranges) => ranges.join('');
27// takes a glob string at a posix brace expression, and returns
28// an equivalent regular expression source, and boolean indicating
29// whether the /u flag needs to be applied, and the number of chars
30// consumed to parse the character class.
31// This also removes out of order ranges, and returns ($.) if the
32// entire class just no good.
33export const parseClass = (glob, position) => {
34    const pos = position;
35    /* c8 ignore start */
36    if (glob.charAt(pos) !== '[') {
37        throw new Error('not in a brace expression');
38    }
39    /* c8 ignore stop */
40    const ranges = [];
41    const negs = [];
42    let i = pos + 1;
43    let sawStart = false;
44    let uflag = false;
45    let escaping = false;
46    let negate = false;
47    let endPos = pos;
48    let rangeStart = '';
49    WHILE: while (i < glob.length) {
50        const c = glob.charAt(i);
51        if ((c === '!' || c === '^') && i === pos + 1) {
52            negate = true;
53            i++;
54            continue;
55        }
56        if (c === ']' && sawStart && !escaping) {
57            endPos = i + 1;
58            break;
59        }
60        sawStart = true;
61        if (c === '\\') {
62            if (!escaping) {
63                escaping = true;
64                i++;
65                continue;
66            }
67            // escaped \ char, fall through and treat like normal char
68        }
69        if (c === '[' && !escaping) {
70            // either a posix class, a collation equivalent, or just a [
71            for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) {
72                if (glob.startsWith(cls, i)) {
73                    // invalid, [a-[] is fine, but not [a-[:alpha]]
74                    if (rangeStart) {
75                        return ['$.', false, glob.length - pos, true];
76                    }
77                    i += cls.length;
78                    if (neg)
79                        negs.push(unip);
80                    else
81                        ranges.push(unip);
82                    uflag = uflag || u;
83                    continue WHILE;
84                }
85            }
86        }
87        // now it's just a normal character, effectively
88        escaping = false;
89        if (rangeStart) {
90            // throw this range away if it's not valid, but others
91            // can still match.
92            if (c > rangeStart) {
93                ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c));
94            }
95            else if (c === rangeStart) {
96                ranges.push(braceEscape(c));
97            }
98            rangeStart = '';
99            i++;
100            continue;
101        }
102        // now might be the start of a range.
103        // can be either c-d or c-] or c<more...>] or c] at this point
104        if (glob.startsWith('-]', i + 1)) {
105            ranges.push(braceEscape(c + '-'));
106            i += 2;
107            continue;
108        }
109        if (glob.startsWith('-', i + 1)) {
110            rangeStart = c;
111            i += 2;
112            continue;
113        }
114        // not the start of a range, just a single character
115        ranges.push(braceEscape(c));
116        i++;
117    }
118    if (endPos < i) {
119        // didn't see the end of the class, not a valid class,
120        // but might still be valid as a literal match.
121        return ['', false, 0, false];
122    }
123    // if we got no ranges and no negates, then we have a range that
124    // cannot possibly match anything, and that poisons the whole glob
125    if (!ranges.length && !negs.length) {
126        return ['$.', false, glob.length - pos, true];
127    }
128    // if we got one positive range, and it's a single character, then that's
129    // not actually a magic pattern, it's just that one literal character.
130    // we should not treat that as "magic", we should just return the literal
131    // character. [_] is a perfectly valid way to escape glob magic chars.
132    if (negs.length === 0 &&
133        ranges.length === 1 &&
134        /^\\?.$/.test(ranges[0]) &&
135        !negate) {
136        const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0];
137        return [regexpEscape(r), false, endPos - pos, false];
138    }
139    const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']';
140    const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']';
141    const comb = ranges.length && negs.length
142        ? '(' + sranges + '|' + snegs + ')'
143        : ranges.length
144            ? sranges
145            : snegs;
146    return [comb, uflag, endPos - pos, true];
147};
148//# sourceMappingURL=brace-expressions.js.map