• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"use strict";
2// parse a single path portion
3Object.defineProperty(exports, "__esModule", { value: true });
4exports.AST = void 0;
5const brace_expressions_js_1 = require("./brace-expressions.js");
6const unescape_js_1 = require("./unescape.js");
7const types = new Set(['!', '?', '+', '*', '@']);
8const isExtglobType = (c) => types.has(c);
9// Patterns that get prepended to bind to the start of either the
10// entire string, or just a single path portion, to prevent dots
11// and/or traversal patterns, when needed.
12// Exts don't need the ^ or / bit, because the root binds that already.
13const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
14const startNoDot = '(?!\\.)';
15// characters that indicate a start of pattern needs the "no dots" bit,
16// because a dot *might* be matched. ( is not in the list, because in
17// the case of a child extglob, it will handle the prevention itself.
18const addPatternStart = new Set(['[', '.']);
19// cases where traversal is A-OK, no dot prevention needed
20const justDots = new Set(['..', '.']);
21const reSpecials = new Set('().*{}+?[]^$\\!');
22const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
23// any single thing other than /
24const qmark = '[^/]';
25// * => any number of characters
26const star = qmark + '*?';
27// use + when we need to ensure that *something* matches, because the * is
28// the only thing in the path portion.
29const starNoEmpty = qmark + '+?';
30// remove the \ chars that we added if we end up doing a nonmagic compare
31// const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
32class AST {
33    type;
34    #root;
35    #hasMagic;
36    #uflag = false;
37    #parts = [];
38    #parent;
39    #parentIndex;
40    #negs;
41    #filledNegs = false;
42    #options;
43    #toString;
44    // set to true if it's an extglob with no children
45    // (which really means one child of '')
46    #emptyExt = false;
47    constructor(type, parent, options = {}) {
48        this.type = type;
49        // extglobs are inherently magical
50        if (type)
51            this.#hasMagic = true;
52        this.#parent = parent;
53        this.#root = this.#parent ? this.#parent.#root : this;
54        this.#options = this.#root === this ? options : this.#root.#options;
55        this.#negs = this.#root === this ? [] : this.#root.#negs;
56        if (type === '!' && !this.#root.#filledNegs)
57            this.#negs.push(this);
58        this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
59    }
60    get hasMagic() {
61        /* c8 ignore start */
62        if (this.#hasMagic !== undefined)
63            return this.#hasMagic;
64        /* c8 ignore stop */
65        for (const p of this.#parts) {
66            if (typeof p === 'string')
67                continue;
68            if (p.type || p.hasMagic)
69                return (this.#hasMagic = true);
70        }
71        // note: will be undefined until we generate the regexp src and find out
72        return this.#hasMagic;
73    }
74    // reconstructs the pattern
75    toString() {
76        if (this.#toString !== undefined)
77            return this.#toString;
78        if (!this.type) {
79            return (this.#toString = this.#parts.map(p => String(p)).join(''));
80        }
81        else {
82            return (this.#toString =
83                this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
84        }
85    }
86    #fillNegs() {
87        /* c8 ignore start */
88        if (this !== this.#root)
89            throw new Error('should only call on root');
90        if (this.#filledNegs)
91            return this;
92        /* c8 ignore stop */
93        // call toString() once to fill this out
94        this.toString();
95        this.#filledNegs = true;
96        let n;
97        while ((n = this.#negs.pop())) {
98            if (n.type !== '!')
99                continue;
100            // walk up the tree, appending everthing that comes AFTER parentIndex
101            let p = n;
102            let pp = p.#parent;
103            while (pp) {
104                for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
105                    for (const part of n.#parts) {
106                        /* c8 ignore start */
107                        if (typeof part === 'string') {
108                            throw new Error('string part in extglob AST??');
109                        }
110                        /* c8 ignore stop */
111                        part.copyIn(pp.#parts[i]);
112                    }
113                }
114                p = pp;
115                pp = p.#parent;
116            }
117        }
118        return this;
119    }
120    push(...parts) {
121        for (const p of parts) {
122            if (p === '')
123                continue;
124            /* c8 ignore start */
125            if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
126                throw new Error('invalid part: ' + p);
127            }
128            /* c8 ignore stop */
129            this.#parts.push(p);
130        }
131    }
132    toJSON() {
133        const ret = this.type === null
134            ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
135            : [this.type, ...this.#parts.map(p => p.toJSON())];
136        if (this.isStart() && !this.type)
137            ret.unshift([]);
138        if (this.isEnd() &&
139            (this === this.#root ||
140                (this.#root.#filledNegs && this.#parent?.type === '!'))) {
141            ret.push({});
142        }
143        return ret;
144    }
145    isStart() {
146        if (this.#root === this)
147            return true;
148        // if (this.type) return !!this.#parent?.isStart()
149        if (!this.#parent?.isStart())
150            return false;
151        if (this.#parentIndex === 0)
152            return true;
153        // if everything AHEAD of this is a negation, then it's still the "start"
154        const p = this.#parent;
155        for (let i = 0; i < this.#parentIndex; i++) {
156            const pp = p.#parts[i];
157            if (!(pp instanceof AST && pp.type === '!')) {
158                return false;
159            }
160        }
161        return true;
162    }
163    isEnd() {
164        if (this.#root === this)
165            return true;
166        if (this.#parent?.type === '!')
167            return true;
168        if (!this.#parent?.isEnd())
169            return false;
170        if (!this.type)
171            return this.#parent?.isEnd();
172        // if not root, it'll always have a parent
173        /* c8 ignore start */
174        const pl = this.#parent ? this.#parent.#parts.length : 0;
175        /* c8 ignore stop */
176        return this.#parentIndex === pl - 1;
177    }
178    copyIn(part) {
179        if (typeof part === 'string')
180            this.push(part);
181        else
182            this.push(part.clone(this));
183    }
184    clone(parent) {
185        const c = new AST(this.type, parent);
186        for (const p of this.#parts) {
187            c.copyIn(p);
188        }
189        return c;
190    }
191    static #parseAST(str, ast, pos, opt) {
192        let escaping = false;
193        let inBrace = false;
194        let braceStart = -1;
195        let braceNeg = false;
196        if (ast.type === null) {
197            // outside of a extglob, append until we find a start
198            let i = pos;
199            let acc = '';
200            while (i < str.length) {
201                const c = str.charAt(i++);
202                // still accumulate escapes at this point, but we do ignore
203                // starts that are escaped
204                if (escaping || c === '\\') {
205                    escaping = !escaping;
206                    acc += c;
207                    continue;
208                }
209                if (inBrace) {
210                    if (i === braceStart + 1) {
211                        if (c === '^' || c === '!') {
212                            braceNeg = true;
213                        }
214                    }
215                    else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
216                        inBrace = false;
217                    }
218                    acc += c;
219                    continue;
220                }
221                else if (c === '[') {
222                    inBrace = true;
223                    braceStart = i;
224                    braceNeg = false;
225                    acc += c;
226                    continue;
227                }
228                if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
229                    ast.push(acc);
230                    acc = '';
231                    const ext = new AST(c, ast);
232                    i = AST.#parseAST(str, ext, i, opt);
233                    ast.push(ext);
234                    continue;
235                }
236                acc += c;
237            }
238            ast.push(acc);
239            return i;
240        }
241        // some kind of extglob, pos is at the (
242        // find the next | or )
243        let i = pos + 1;
244        let part = new AST(null, ast);
245        const parts = [];
246        let acc = '';
247        while (i < str.length) {
248            const c = str.charAt(i++);
249            // still accumulate escapes at this point, but we do ignore
250            // starts that are escaped
251            if (escaping || c === '\\') {
252                escaping = !escaping;
253                acc += c;
254                continue;
255            }
256            if (inBrace) {
257                if (i === braceStart + 1) {
258                    if (c === '^' || c === '!') {
259                        braceNeg = true;
260                    }
261                }
262                else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
263                    inBrace = false;
264                }
265                acc += c;
266                continue;
267            }
268            else if (c === '[') {
269                inBrace = true;
270                braceStart = i;
271                braceNeg = false;
272                acc += c;
273                continue;
274            }
275            if (isExtglobType(c) && str.charAt(i) === '(') {
276                part.push(acc);
277                acc = '';
278                const ext = new AST(c, part);
279                part.push(ext);
280                i = AST.#parseAST(str, ext, i, opt);
281                continue;
282            }
283            if (c === '|') {
284                part.push(acc);
285                acc = '';
286                parts.push(part);
287                part = new AST(null, ast);
288                continue;
289            }
290            if (c === ')') {
291                if (acc === '' && ast.#parts.length === 0) {
292                    ast.#emptyExt = true;
293                }
294                part.push(acc);
295                acc = '';
296                ast.push(...parts, part);
297                return i;
298            }
299            acc += c;
300        }
301        // unfinished extglob
302        // if we got here, it was a malformed extglob! not an extglob, but
303        // maybe something else in there.
304        ast.type = null;
305        ast.#hasMagic = undefined;
306        ast.#parts = [str.substring(pos - 1)];
307        return i;
308    }
309    static fromGlob(pattern, options = {}) {
310        const ast = new AST(null, undefined, options);
311        AST.#parseAST(pattern, ast, 0, options);
312        return ast;
313    }
314    // returns the regular expression if there's magic, or the unescaped
315    // string if not.
316    toMMPattern() {
317        // should only be called on root
318        /* c8 ignore start */
319        if (this !== this.#root)
320            return this.#root.toMMPattern();
321        /* c8 ignore stop */
322        const glob = this.toString();
323        const [re, body, hasMagic, uflag] = this.toRegExpSource();
324        // if we're in nocase mode, and not nocaseMagicOnly, then we do
325        // still need a regular expression if we have to case-insensitively
326        // match capital/lowercase characters.
327        const anyMagic = hasMagic ||
328            this.#hasMagic ||
329            (this.#options.nocase &&
330                !this.#options.nocaseMagicOnly &&
331                glob.toUpperCase() !== glob.toLowerCase());
332        if (!anyMagic) {
333            return body;
334        }
335        const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
336        return Object.assign(new RegExp(`^${re}$`, flags), {
337            _src: re,
338            _glob: glob,
339        });
340    }
341    // returns the string match, the regexp source, whether there's magic
342    // in the regexp (so a regular expression is required) and whether or
343    // not the uflag is needed for the regular expression (for posix classes)
344    // TODO: instead of injecting the start/end at this point, just return
345    // the BODY of the regexp, along with the start/end portions suitable
346    // for binding the start/end in either a joined full-path makeRe context
347    // (where we bind to (^|/), or a standalone matchPart context (where
348    // we bind to ^, and not /).  Otherwise slashes get duped!
349    //
350    // In part-matching mode, the start is:
351    // - if not isStart: nothing
352    // - if traversal possible, but not allowed: ^(?!\.\.?$)
353    // - if dots allowed or not possible: ^
354    // - if dots possible and not allowed: ^(?!\.)
355    // end is:
356    // - if not isEnd(): nothing
357    // - else: $
358    //
359    // In full-path matching mode, we put the slash at the START of the
360    // pattern, so start is:
361    // - if first pattern: same as part-matching mode
362    // - if not isStart(): nothing
363    // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
364    // - if dots allowed or not possible: /
365    // - if dots possible and not allowed: /(?!\.)
366    // end is:
367    // - if last pattern, same as part-matching mode
368    // - else nothing
369    //
370    // Always put the (?:$|/) on negated tails, though, because that has to be
371    // there to bind the end of the negated pattern portion, and it's easier to
372    // just stick it in now rather than try to inject it later in the middle of
373    // the pattern.
374    //
375    // We can just always return the same end, and leave it up to the caller
376    // to know whether it's going to be used joined or in parts.
377    // And, if the start is adjusted slightly, can do the same there:
378    // - if not isStart: nothing
379    // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
380    // - if dots allowed or not possible: (?:/|^)
381    // - if dots possible and not allowed: (?:/|^)(?!\.)
382    //
383    // But it's better to have a simpler binding without a conditional, for
384    // performance, so probably better to return both start options.
385    //
386    // Then the caller just ignores the end if it's not the first pattern,
387    // and the start always gets applied.
388    //
389    // But that's always going to be $ if it's the ending pattern, or nothing,
390    // so the caller can just attach $ at the end of the pattern when building.
391    //
392    // So the todo is:
393    // - better detect what kind of start is needed
394    // - return both flavors of starting pattern
395    // - attach $ at the end of the pattern when creating the actual RegExp
396    //
397    // Ah, but wait, no, that all only applies to the root when the first pattern
398    // is not an extglob. If the first pattern IS an extglob, then we need all
399    // that dot prevention biz to live in the extglob portions, because eg
400    // +(*|.x*) can match .xy but not .yx.
401    //
402    // So, return the two flavors if it's #root and the first child is not an
403    // AST, otherwise leave it to the child AST to handle it, and there,
404    // use the (?:^|/) style of start binding.
405    //
406    // Even simplified further:
407    // - Since the start for a join is eg /(?!\.) and the start for a part
408    // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
409    // or start or whatever) and prepend ^ or / at the Regexp construction.
410    toRegExpSource(allowDot) {
411        const dot = allowDot ?? !!this.#options.dot;
412        if (this.#root === this)
413            this.#fillNegs();
414        if (!this.type) {
415            const noEmpty = this.isStart() && this.isEnd();
416            const src = this.#parts
417                .map(p => {
418                const [re, _, hasMagic, uflag] = typeof p === 'string'
419                    ? AST.#parseGlob(p, this.#hasMagic, noEmpty)
420                    : p.toRegExpSource(allowDot);
421                this.#hasMagic = this.#hasMagic || hasMagic;
422                this.#uflag = this.#uflag || uflag;
423                return re;
424            })
425                .join('');
426            let start = '';
427            if (this.isStart()) {
428                if (typeof this.#parts[0] === 'string') {
429                    // this is the string that will match the start of the pattern,
430                    // so we need to protect against dots and such.
431                    // '.' and '..' cannot match unless the pattern is that exactly,
432                    // even if it starts with . or dot:true is set.
433                    const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
434                    if (!dotTravAllowed) {
435                        const aps = addPatternStart;
436                        // check if we have a possibility of matching . or ..,
437                        // and prevent that.
438                        const needNoTrav =
439                        // dots are allowed, and the pattern starts with [ or .
440                        (dot && aps.has(src.charAt(0))) ||
441                            // the pattern starts with \., and then [ or .
442                            (src.startsWith('\\.') && aps.has(src.charAt(2))) ||
443                            // the pattern starts with \.\., and then [ or .
444                            (src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
445                        // no need to prevent dots if it can't match a dot, or if a
446                        // sub-pattern will be preventing it anyway.
447                        const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
448                        start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
449                    }
450                }
451            }
452            // append the "end of path portion" pattern to negation tails
453            let end = '';
454            if (this.isEnd() &&
455                this.#root.#filledNegs &&
456                this.#parent?.type === '!') {
457                end = '(?:$|\\/)';
458            }
459            const final = start + src + end;
460            return [
461                final,
462                (0, unescape_js_1.unescape)(src),
463                (this.#hasMagic = !!this.#hasMagic),
464                this.#uflag,
465            ];
466        }
467        // We need to calculate the body *twice* if it's a repeat pattern
468        // at the start, once in nodot mode, then again in dot mode, so a
469        // pattern like *(?) can match 'x.y'
470        const repeated = this.type === '*' || this.type === '+';
471        // some kind of extglob
472        const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
473        let body = this.#partsToRegExp(dot);
474        if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
475            // invalid extglob, has to at least be *something* present, if it's
476            // the entire path portion.
477            const s = this.toString();
478            this.#parts = [s];
479            this.type = null;
480            this.#hasMagic = undefined;
481            return [s, (0, unescape_js_1.unescape)(this.toString()), false, false];
482        }
483        // XXX abstract out this map method
484        let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot
485            ? ''
486            : this.#partsToRegExp(true);
487        if (bodyDotAllowed === body) {
488            bodyDotAllowed = '';
489        }
490        if (bodyDotAllowed) {
491            body = `(?:${body})(?:${bodyDotAllowed})*?`;
492        }
493        // an empty !() is exactly equivalent to a starNoEmpty
494        let final = '';
495        if (this.type === '!' && this.#emptyExt) {
496            final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
497        }
498        else {
499            const close = this.type === '!'
500                ? // !() must match something,but !(x) can match ''
501                    '))' +
502                        (this.isStart() && !dot && !allowDot ? startNoDot : '') +
503                        star +
504                        ')'
505                : this.type === '@'
506                    ? ')'
507                    : this.type === '?'
508                        ? ')?'
509                        : this.type === '+' && bodyDotAllowed
510                            ? ')'
511                            : this.type === '*' && bodyDotAllowed
512                                ? `)?`
513                                : `)${this.type}`;
514            final = start + body + close;
515        }
516        return [
517            final,
518            (0, unescape_js_1.unescape)(body),
519            (this.#hasMagic = !!this.#hasMagic),
520            this.#uflag,
521        ];
522    }
523    #partsToRegExp(dot) {
524        return this.#parts
525            .map(p => {
526            // extglob ASTs should only contain parent ASTs
527            /* c8 ignore start */
528            if (typeof p === 'string') {
529                throw new Error('string type in extglob ast??');
530            }
531            /* c8 ignore stop */
532            // can ignore hasMagic, because extglobs are already always magic
533            const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
534            this.#uflag = this.#uflag || uflag;
535            return re;
536        })
537            .filter(p => !(this.isStart() && this.isEnd()) || !!p)
538            .join('|');
539    }
540    static #parseGlob(glob, hasMagic, noEmpty = false) {
541        let escaping = false;
542        let re = '';
543        let uflag = false;
544        for (let i = 0; i < glob.length; i++) {
545            const c = glob.charAt(i);
546            if (escaping) {
547                escaping = false;
548                re += (reSpecials.has(c) ? '\\' : '') + c;
549                continue;
550            }
551            if (c === '\\') {
552                if (i === glob.length - 1) {
553                    re += '\\\\';
554                }
555                else {
556                    escaping = true;
557                }
558                continue;
559            }
560            if (c === '[') {
561                const [src, needUflag, consumed, magic] = (0, brace_expressions_js_1.parseClass)(glob, i);
562                if (consumed) {
563                    re += src;
564                    uflag = uflag || needUflag;
565                    i += consumed - 1;
566                    hasMagic = hasMagic || magic;
567                    continue;
568                }
569            }
570            if (c === '*') {
571                if (noEmpty && glob === '*')
572                    re += starNoEmpty;
573                else
574                    re += star;
575                hasMagic = true;
576                continue;
577            }
578            if (c === '?') {
579                re += qmark;
580                hasMagic = true;
581                continue;
582            }
583            re += regExpEscape(c);
584        }
585        return [re, (0, unescape_js_1.unescape)(glob), !!hasMagic, uflag];
586    }
587}
588exports.AST = AST;
589//# sourceMappingURL=ast.js.map