• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// parse a single path portion
2import { parseClass } from './brace-expressions.js';
3import { unescape } from './unescape.js';
4const types = new Set(['!', '?', '+', '*', '@']);
5const isExtglobType = (c) => types.has(c);
6// Patterns that get prepended to bind to the start of either the
7// entire string, or just a single path portion, to prevent dots
8// and/or traversal patterns, when needed.
9// Exts don't need the ^ or / bit, because the root binds that already.
10const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
11const startNoDot = '(?!\\.)';
12// characters that indicate a start of pattern needs the "no dots" bit,
13// because a dot *might* be matched. ( is not in the list, because in
14// the case of a child extglob, it will handle the prevention itself.
15const addPatternStart = new Set(['[', '.']);
16// cases where traversal is A-OK, no dot prevention needed
17const justDots = new Set(['..', '.']);
18const reSpecials = new Set('().*{}+?[]^$\\!');
19const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
20// any single thing other than /
21const qmark = '[^/]';
22// * => any number of characters
23const star = qmark + '*?';
24// use + when we need to ensure that *something* matches, because the * is
25// the only thing in the path portion.
26const starNoEmpty = qmark + '+?';
27// remove the \ chars that we added if we end up doing a nonmagic compare
28// const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
29export class AST {
30    type;
31    #root;
32    #hasMagic;
33    #uflag = false;
34    #parts = [];
35    #parent;
36    #parentIndex;
37    #negs;
38    #filledNegs = false;
39    #options;
40    #toString;
41    // set to true if it's an extglob with no children
42    // (which really means one child of '')
43    #emptyExt = false;
44    constructor(type, parent, options = {}) {
45        this.type = type;
46        // extglobs are inherently magical
47        if (type)
48            this.#hasMagic = true;
49        this.#parent = parent;
50        this.#root = this.#parent ? this.#parent.#root : this;
51        this.#options = this.#root === this ? options : this.#root.#options;
52        this.#negs = this.#root === this ? [] : this.#root.#negs;
53        if (type === '!' && !this.#root.#filledNegs)
54            this.#negs.push(this);
55        this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
56    }
57    get hasMagic() {
58        /* c8 ignore start */
59        if (this.#hasMagic !== undefined)
60            return this.#hasMagic;
61        /* c8 ignore stop */
62        for (const p of this.#parts) {
63            if (typeof p === 'string')
64                continue;
65            if (p.type || p.hasMagic)
66                return (this.#hasMagic = true);
67        }
68        // note: will be undefined until we generate the regexp src and find out
69        return this.#hasMagic;
70    }
71    // reconstructs the pattern
72    toString() {
73        if (this.#toString !== undefined)
74            return this.#toString;
75        if (!this.type) {
76            return (this.#toString = this.#parts.map(p => String(p)).join(''));
77        }
78        else {
79            return (this.#toString =
80                this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
81        }
82    }
83    #fillNegs() {
84        /* c8 ignore start */
85        if (this !== this.#root)
86            throw new Error('should only call on root');
87        if (this.#filledNegs)
88            return this;
89        /* c8 ignore stop */
90        // call toString() once to fill this out
91        this.toString();
92        this.#filledNegs = true;
93        let n;
94        while ((n = this.#negs.pop())) {
95            if (n.type !== '!')
96                continue;
97            // walk up the tree, appending everthing that comes AFTER parentIndex
98            let p = n;
99            let pp = p.#parent;
100            while (pp) {
101                for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
102                    for (const part of n.#parts) {
103                        /* c8 ignore start */
104                        if (typeof part === 'string') {
105                            throw new Error('string part in extglob AST??');
106                        }
107                        /* c8 ignore stop */
108                        part.copyIn(pp.#parts[i]);
109                    }
110                }
111                p = pp;
112                pp = p.#parent;
113            }
114        }
115        return this;
116    }
117    push(...parts) {
118        for (const p of parts) {
119            if (p === '')
120                continue;
121            /* c8 ignore start */
122            if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
123                throw new Error('invalid part: ' + p);
124            }
125            /* c8 ignore stop */
126            this.#parts.push(p);
127        }
128    }
129    toJSON() {
130        const ret = this.type === null
131            ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
132            : [this.type, ...this.#parts.map(p => p.toJSON())];
133        if (this.isStart() && !this.type)
134            ret.unshift([]);
135        if (this.isEnd() &&
136            (this === this.#root ||
137                (this.#root.#filledNegs && this.#parent?.type === '!'))) {
138            ret.push({});
139        }
140        return ret;
141    }
142    isStart() {
143        if (this.#root === this)
144            return true;
145        // if (this.type) return !!this.#parent?.isStart()
146        if (!this.#parent?.isStart())
147            return false;
148        if (this.#parentIndex === 0)
149            return true;
150        // if everything AHEAD of this is a negation, then it's still the "start"
151        const p = this.#parent;
152        for (let i = 0; i < this.#parentIndex; i++) {
153            const pp = p.#parts[i];
154            if (!(pp instanceof AST && pp.type === '!')) {
155                return false;
156            }
157        }
158        return true;
159    }
160    isEnd() {
161        if (this.#root === this)
162            return true;
163        if (this.#parent?.type === '!')
164            return true;
165        if (!this.#parent?.isEnd())
166            return false;
167        if (!this.type)
168            return this.#parent?.isEnd();
169        // if not root, it'll always have a parent
170        /* c8 ignore start */
171        const pl = this.#parent ? this.#parent.#parts.length : 0;
172        /* c8 ignore stop */
173        return this.#parentIndex === pl - 1;
174    }
175    copyIn(part) {
176        if (typeof part === 'string')
177            this.push(part);
178        else
179            this.push(part.clone(this));
180    }
181    clone(parent) {
182        const c = new AST(this.type, parent);
183        for (const p of this.#parts) {
184            c.copyIn(p);
185        }
186        return c;
187    }
188    static #parseAST(str, ast, pos, opt) {
189        let escaping = false;
190        let inBrace = false;
191        let braceStart = -1;
192        let braceNeg = false;
193        if (ast.type === null) {
194            // outside of a extglob, append until we find a start
195            let i = pos;
196            let acc = '';
197            while (i < str.length) {
198                const c = str.charAt(i++);
199                // still accumulate escapes at this point, but we do ignore
200                // starts that are escaped
201                if (escaping || c === '\\') {
202                    escaping = !escaping;
203                    acc += c;
204                    continue;
205                }
206                if (inBrace) {
207                    if (i === braceStart + 1) {
208                        if (c === '^' || c === '!') {
209                            braceNeg = true;
210                        }
211                    }
212                    else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
213                        inBrace = false;
214                    }
215                    acc += c;
216                    continue;
217                }
218                else if (c === '[') {
219                    inBrace = true;
220                    braceStart = i;
221                    braceNeg = false;
222                    acc += c;
223                    continue;
224                }
225                if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
226                    ast.push(acc);
227                    acc = '';
228                    const ext = new AST(c, ast);
229                    i = AST.#parseAST(str, ext, i, opt);
230                    ast.push(ext);
231                    continue;
232                }
233                acc += c;
234            }
235            ast.push(acc);
236            return i;
237        }
238        // some kind of extglob, pos is at the (
239        // find the next | or )
240        let i = pos + 1;
241        let part = new AST(null, ast);
242        const parts = [];
243        let acc = '';
244        while (i < str.length) {
245            const c = str.charAt(i++);
246            // still accumulate escapes at this point, but we do ignore
247            // starts that are escaped
248            if (escaping || c === '\\') {
249                escaping = !escaping;
250                acc += c;
251                continue;
252            }
253            if (inBrace) {
254                if (i === braceStart + 1) {
255                    if (c === '^' || c === '!') {
256                        braceNeg = true;
257                    }
258                }
259                else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
260                    inBrace = false;
261                }
262                acc += c;
263                continue;
264            }
265            else if (c === '[') {
266                inBrace = true;
267                braceStart = i;
268                braceNeg = false;
269                acc += c;
270                continue;
271            }
272            if (isExtglobType(c) && str.charAt(i) === '(') {
273                part.push(acc);
274                acc = '';
275                const ext = new AST(c, part);
276                part.push(ext);
277                i = AST.#parseAST(str, ext, i, opt);
278                continue;
279            }
280            if (c === '|') {
281                part.push(acc);
282                acc = '';
283                parts.push(part);
284                part = new AST(null, ast);
285                continue;
286            }
287            if (c === ')') {
288                if (acc === '' && ast.#parts.length === 0) {
289                    ast.#emptyExt = true;
290                }
291                part.push(acc);
292                acc = '';
293                ast.push(...parts, part);
294                return i;
295            }
296            acc += c;
297        }
298        // unfinished extglob
299        // if we got here, it was a malformed extglob! not an extglob, but
300        // maybe something else in there.
301        ast.type = null;
302        ast.#hasMagic = undefined;
303        ast.#parts = [str.substring(pos - 1)];
304        return i;
305    }
306    static fromGlob(pattern, options = {}) {
307        const ast = new AST(null, undefined, options);
308        AST.#parseAST(pattern, ast, 0, options);
309        return ast;
310    }
311    // returns the regular expression if there's magic, or the unescaped
312    // string if not.
313    toMMPattern() {
314        // should only be called on root
315        /* c8 ignore start */
316        if (this !== this.#root)
317            return this.#root.toMMPattern();
318        /* c8 ignore stop */
319        const glob = this.toString();
320        const [re, body, hasMagic, uflag] = this.toRegExpSource();
321        // if we're in nocase mode, and not nocaseMagicOnly, then we do
322        // still need a regular expression if we have to case-insensitively
323        // match capital/lowercase characters.
324        const anyMagic = hasMagic ||
325            this.#hasMagic ||
326            (this.#options.nocase &&
327                !this.#options.nocaseMagicOnly &&
328                glob.toUpperCase() !== glob.toLowerCase());
329        if (!anyMagic) {
330            return body;
331        }
332        const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
333        return Object.assign(new RegExp(`^${re}$`, flags), {
334            _src: re,
335            _glob: glob,
336        });
337    }
338    // returns the string match, the regexp source, whether there's magic
339    // in the regexp (so a regular expression is required) and whether or
340    // not the uflag is needed for the regular expression (for posix classes)
341    // TODO: instead of injecting the start/end at this point, just return
342    // the BODY of the regexp, along with the start/end portions suitable
343    // for binding the start/end in either a joined full-path makeRe context
344    // (where we bind to (^|/), or a standalone matchPart context (where
345    // we bind to ^, and not /).  Otherwise slashes get duped!
346    //
347    // In part-matching mode, the start is:
348    // - if not isStart: nothing
349    // - if traversal possible, but not allowed: ^(?!\.\.?$)
350    // - if dots allowed or not possible: ^
351    // - if dots possible and not allowed: ^(?!\.)
352    // end is:
353    // - if not isEnd(): nothing
354    // - else: $
355    //
356    // In full-path matching mode, we put the slash at the START of the
357    // pattern, so start is:
358    // - if first pattern: same as part-matching mode
359    // - if not isStart(): nothing
360    // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
361    // - if dots allowed or not possible: /
362    // - if dots possible and not allowed: /(?!\.)
363    // end is:
364    // - if last pattern, same as part-matching mode
365    // - else nothing
366    //
367    // Always put the (?:$|/) on negated tails, though, because that has to be
368    // there to bind the end of the negated pattern portion, and it's easier to
369    // just stick it in now rather than try to inject it later in the middle of
370    // the pattern.
371    //
372    // We can just always return the same end, and leave it up to the caller
373    // to know whether it's going to be used joined or in parts.
374    // And, if the start is adjusted slightly, can do the same there:
375    // - if not isStart: nothing
376    // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
377    // - if dots allowed or not possible: (?:/|^)
378    // - if dots possible and not allowed: (?:/|^)(?!\.)
379    //
380    // But it's better to have a simpler binding without a conditional, for
381    // performance, so probably better to return both start options.
382    //
383    // Then the caller just ignores the end if it's not the first pattern,
384    // and the start always gets applied.
385    //
386    // But that's always going to be $ if it's the ending pattern, or nothing,
387    // so the caller can just attach $ at the end of the pattern when building.
388    //
389    // So the todo is:
390    // - better detect what kind of start is needed
391    // - return both flavors of starting pattern
392    // - attach $ at the end of the pattern when creating the actual RegExp
393    //
394    // Ah, but wait, no, that all only applies to the root when the first pattern
395    // is not an extglob. If the first pattern IS an extglob, then we need all
396    // that dot prevention biz to live in the extglob portions, because eg
397    // +(*|.x*) can match .xy but not .yx.
398    //
399    // So, return the two flavors if it's #root and the first child is not an
400    // AST, otherwise leave it to the child AST to handle it, and there,
401    // use the (?:^|/) style of start binding.
402    //
403    // Even simplified further:
404    // - Since the start for a join is eg /(?!\.) and the start for a part
405    // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
406    // or start or whatever) and prepend ^ or / at the Regexp construction.
407    toRegExpSource(allowDot) {
408        const dot = allowDot ?? !!this.#options.dot;
409        if (this.#root === this)
410            this.#fillNegs();
411        if (!this.type) {
412            const noEmpty = this.isStart() && this.isEnd();
413            const src = this.#parts
414                .map(p => {
415                const [re, _, hasMagic, uflag] = typeof p === 'string'
416                    ? AST.#parseGlob(p, this.#hasMagic, noEmpty)
417                    : p.toRegExpSource(allowDot);
418                this.#hasMagic = this.#hasMagic || hasMagic;
419                this.#uflag = this.#uflag || uflag;
420                return re;
421            })
422                .join('');
423            let start = '';
424            if (this.isStart()) {
425                if (typeof this.#parts[0] === 'string') {
426                    // this is the string that will match the start of the pattern,
427                    // so we need to protect against dots and such.
428                    // '.' and '..' cannot match unless the pattern is that exactly,
429                    // even if it starts with . or dot:true is set.
430                    const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
431                    if (!dotTravAllowed) {
432                        const aps = addPatternStart;
433                        // check if we have a possibility of matching . or ..,
434                        // and prevent that.
435                        const needNoTrav =
436                        // dots are allowed, and the pattern starts with [ or .
437                        (dot && aps.has(src.charAt(0))) ||
438                            // the pattern starts with \., and then [ or .
439                            (src.startsWith('\\.') && aps.has(src.charAt(2))) ||
440                            // the pattern starts with \.\., and then [ or .
441                            (src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
442                        // no need to prevent dots if it can't match a dot, or if a
443                        // sub-pattern will be preventing it anyway.
444                        const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
445                        start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
446                    }
447                }
448            }
449            // append the "end of path portion" pattern to negation tails
450            let end = '';
451            if (this.isEnd() &&
452                this.#root.#filledNegs &&
453                this.#parent?.type === '!') {
454                end = '(?:$|\\/)';
455            }
456            const final = start + src + end;
457            return [
458                final,
459                unescape(src),
460                (this.#hasMagic = !!this.#hasMagic),
461                this.#uflag,
462            ];
463        }
464        // We need to calculate the body *twice* if it's a repeat pattern
465        // at the start, once in nodot mode, then again in dot mode, so a
466        // pattern like *(?) can match 'x.y'
467        const repeated = this.type === '*' || this.type === '+';
468        // some kind of extglob
469        const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
470        let body = this.#partsToRegExp(dot);
471        if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
472            // invalid extglob, has to at least be *something* present, if it's
473            // the entire path portion.
474            const s = this.toString();
475            this.#parts = [s];
476            this.type = null;
477            this.#hasMagic = undefined;
478            return [s, unescape(this.toString()), false, false];
479        }
480        // XXX abstract out this map method
481        let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot
482            ? ''
483            : this.#partsToRegExp(true);
484        if (bodyDotAllowed === body) {
485            bodyDotAllowed = '';
486        }
487        if (bodyDotAllowed) {
488            body = `(?:${body})(?:${bodyDotAllowed})*?`;
489        }
490        // an empty !() is exactly equivalent to a starNoEmpty
491        let final = '';
492        if (this.type === '!' && this.#emptyExt) {
493            final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
494        }
495        else {
496            const close = this.type === '!'
497                ? // !() must match something,but !(x) can match ''
498                    '))' +
499                        (this.isStart() && !dot && !allowDot ? startNoDot : '') +
500                        star +
501                        ')'
502                : this.type === '@'
503                    ? ')'
504                    : this.type === '?'
505                        ? ')?'
506                        : this.type === '+' && bodyDotAllowed
507                            ? ')'
508                            : this.type === '*' && bodyDotAllowed
509                                ? `)?`
510                                : `)${this.type}`;
511            final = start + body + close;
512        }
513        return [
514            final,
515            unescape(body),
516            (this.#hasMagic = !!this.#hasMagic),
517            this.#uflag,
518        ];
519    }
520    #partsToRegExp(dot) {
521        return this.#parts
522            .map(p => {
523            // extglob ASTs should only contain parent ASTs
524            /* c8 ignore start */
525            if (typeof p === 'string') {
526                throw new Error('string type in extglob ast??');
527            }
528            /* c8 ignore stop */
529            // can ignore hasMagic, because extglobs are already always magic
530            const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
531            this.#uflag = this.#uflag || uflag;
532            return re;
533        })
534            .filter(p => !(this.isStart() && this.isEnd()) || !!p)
535            .join('|');
536    }
537    static #parseGlob(glob, hasMagic, noEmpty = false) {
538        let escaping = false;
539        let re = '';
540        let uflag = false;
541        for (let i = 0; i < glob.length; i++) {
542            const c = glob.charAt(i);
543            if (escaping) {
544                escaping = false;
545                re += (reSpecials.has(c) ? '\\' : '') + c;
546                continue;
547            }
548            if (c === '\\') {
549                if (i === glob.length - 1) {
550                    re += '\\\\';
551                }
552                else {
553                    escaping = true;
554                }
555                continue;
556            }
557            if (c === '[') {
558                const [src, needUflag, consumed, magic] = parseClass(glob, i);
559                if (consumed) {
560                    re += src;
561                    uflag = uflag || needUflag;
562                    i += consumed - 1;
563                    hasMagic = hasMagic || magic;
564                    continue;
565                }
566            }
567            if (c === '*') {
568                if (noEmpty && glob === '*')
569                    re += starNoEmpty;
570                else
571                    re += star;
572                hasMagic = true;
573                continue;
574            }
575            if (c === '?') {
576                re += qmark;
577                hasMagic = true;
578                continue;
579            }
580            re += regExpEscape(c);
581        }
582        return [re, unescape(glob), !!hasMagic, uflag];
583    }
584}
585//# sourceMappingURL=ast.js.map