1"use strict"; 2// parse a single path portion 3Object.defineProperty(exports, "__esModule", { value: true }); 4exports.AST = void 0; 5const brace_expressions_js_1 = require("./brace-expressions.js"); 6const unescape_js_1 = require("./unescape.js"); 7const types = new Set(['!', '?', '+', '*', '@']); 8const isExtglobType = (c) => types.has(c); 9// Patterns that get prepended to bind to the start of either the 10// entire string, or just a single path portion, to prevent dots 11// and/or traversal patterns, when needed. 12// Exts don't need the ^ or / bit, because the root binds that already. 13const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))'; 14const startNoDot = '(?!\\.)'; 15// characters that indicate a start of pattern needs the "no dots" bit, 16// because a dot *might* be matched. ( is not in the list, because in 17// the case of a child extglob, it will handle the prevention itself. 18const addPatternStart = new Set(['[', '.']); 19// cases where traversal is A-OK, no dot prevention needed 20const justDots = new Set(['..', '.']); 21const reSpecials = new Set('().*{}+?[]^$\\!'); 22const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); 23// any single thing other than / 24const qmark = '[^/]'; 25// * => any number of characters 26const star = qmark + '*?'; 27// use + when we need to ensure that *something* matches, because the * is 28// the only thing in the path portion. 29const starNoEmpty = qmark + '+?'; 30// remove the \ chars that we added if we end up doing a nonmagic compare 31// const deslash = (s: string) => s.replace(/\\(.)/g, '$1') 32class AST { 33 type; 34 #root; 35 #hasMagic; 36 #uflag = false; 37 #parts = []; 38 #parent; 39 #parentIndex; 40 #negs; 41 #filledNegs = false; 42 #options; 43 #toString; 44 // set to true if it's an extglob with no children 45 // (which really means one child of '') 46 #emptyExt = false; 47 constructor(type, parent, options = {}) { 48 this.type = type; 49 // extglobs are inherently magical 50 if (type) 51 this.#hasMagic = true; 52 this.#parent = parent; 53 this.#root = this.#parent ? this.#parent.#root : this; 54 this.#options = this.#root === this ? options : this.#root.#options; 55 this.#negs = this.#root === this ? [] : this.#root.#negs; 56 if (type === '!' && !this.#root.#filledNegs) 57 this.#negs.push(this); 58 this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0; 59 } 60 get hasMagic() { 61 /* c8 ignore start */ 62 if (this.#hasMagic !== undefined) 63 return this.#hasMagic; 64 /* c8 ignore stop */ 65 for (const p of this.#parts) { 66 if (typeof p === 'string') 67 continue; 68 if (p.type || p.hasMagic) 69 return (this.#hasMagic = true); 70 } 71 // note: will be undefined until we generate the regexp src and find out 72 return this.#hasMagic; 73 } 74 // reconstructs the pattern 75 toString() { 76 if (this.#toString !== undefined) 77 return this.#toString; 78 if (!this.type) { 79 return (this.#toString = this.#parts.map(p => String(p)).join('')); 80 } 81 else { 82 return (this.#toString = 83 this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')'); 84 } 85 } 86 #fillNegs() { 87 /* c8 ignore start */ 88 if (this !== this.#root) 89 throw new Error('should only call on root'); 90 if (this.#filledNegs) 91 return this; 92 /* c8 ignore stop */ 93 // call toString() once to fill this out 94 this.toString(); 95 this.#filledNegs = true; 96 let n; 97 while ((n = this.#negs.pop())) { 98 if (n.type !== '!') 99 continue; 100 // walk up the tree, appending everthing that comes AFTER parentIndex 101 let p = n; 102 let pp = p.#parent; 103 while (pp) { 104 for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) { 105 for (const part of n.#parts) { 106 /* c8 ignore start */ 107 if (typeof part === 'string') { 108 throw new Error('string part in extglob AST??'); 109 } 110 /* c8 ignore stop */ 111 part.copyIn(pp.#parts[i]); 112 } 113 } 114 p = pp; 115 pp = p.#parent; 116 } 117 } 118 return this; 119 } 120 push(...parts) { 121 for (const p of parts) { 122 if (p === '') 123 continue; 124 /* c8 ignore start */ 125 if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) { 126 throw new Error('invalid part: ' + p); 127 } 128 /* c8 ignore stop */ 129 this.#parts.push(p); 130 } 131 } 132 toJSON() { 133 const ret = this.type === null 134 ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON())) 135 : [this.type, ...this.#parts.map(p => p.toJSON())]; 136 if (this.isStart() && !this.type) 137 ret.unshift([]); 138 if (this.isEnd() && 139 (this === this.#root || 140 (this.#root.#filledNegs && this.#parent?.type === '!'))) { 141 ret.push({}); 142 } 143 return ret; 144 } 145 isStart() { 146 if (this.#root === this) 147 return true; 148 // if (this.type) return !!this.#parent?.isStart() 149 if (!this.#parent?.isStart()) 150 return false; 151 if (this.#parentIndex === 0) 152 return true; 153 // if everything AHEAD of this is a negation, then it's still the "start" 154 const p = this.#parent; 155 for (let i = 0; i < this.#parentIndex; i++) { 156 const pp = p.#parts[i]; 157 if (!(pp instanceof AST && pp.type === '!')) { 158 return false; 159 } 160 } 161 return true; 162 } 163 isEnd() { 164 if (this.#root === this) 165 return true; 166 if (this.#parent?.type === '!') 167 return true; 168 if (!this.#parent?.isEnd()) 169 return false; 170 if (!this.type) 171 return this.#parent?.isEnd(); 172 // if not root, it'll always have a parent 173 /* c8 ignore start */ 174 const pl = this.#parent ? this.#parent.#parts.length : 0; 175 /* c8 ignore stop */ 176 return this.#parentIndex === pl - 1; 177 } 178 copyIn(part) { 179 if (typeof part === 'string') 180 this.push(part); 181 else 182 this.push(part.clone(this)); 183 } 184 clone(parent) { 185 const c = new AST(this.type, parent); 186 for (const p of this.#parts) { 187 c.copyIn(p); 188 } 189 return c; 190 } 191 static #parseAST(str, ast, pos, opt) { 192 let escaping = false; 193 let inBrace = false; 194 let braceStart = -1; 195 let braceNeg = false; 196 if (ast.type === null) { 197 // outside of a extglob, append until we find a start 198 let i = pos; 199 let acc = ''; 200 while (i < str.length) { 201 const c = str.charAt(i++); 202 // still accumulate escapes at this point, but we do ignore 203 // starts that are escaped 204 if (escaping || c === '\\') { 205 escaping = !escaping; 206 acc += c; 207 continue; 208 } 209 if (inBrace) { 210 if (i === braceStart + 1) { 211 if (c === '^' || c === '!') { 212 braceNeg = true; 213 } 214 } 215 else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { 216 inBrace = false; 217 } 218 acc += c; 219 continue; 220 } 221 else if (c === '[') { 222 inBrace = true; 223 braceStart = i; 224 braceNeg = false; 225 acc += c; 226 continue; 227 } 228 if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') { 229 ast.push(acc); 230 acc = ''; 231 const ext = new AST(c, ast); 232 i = AST.#parseAST(str, ext, i, opt); 233 ast.push(ext); 234 continue; 235 } 236 acc += c; 237 } 238 ast.push(acc); 239 return i; 240 } 241 // some kind of extglob, pos is at the ( 242 // find the next | or ) 243 let i = pos + 1; 244 let part = new AST(null, ast); 245 const parts = []; 246 let acc = ''; 247 while (i < str.length) { 248 const c = str.charAt(i++); 249 // still accumulate escapes at this point, but we do ignore 250 // starts that are escaped 251 if (escaping || c === '\\') { 252 escaping = !escaping; 253 acc += c; 254 continue; 255 } 256 if (inBrace) { 257 if (i === braceStart + 1) { 258 if (c === '^' || c === '!') { 259 braceNeg = true; 260 } 261 } 262 else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { 263 inBrace = false; 264 } 265 acc += c; 266 continue; 267 } 268 else if (c === '[') { 269 inBrace = true; 270 braceStart = i; 271 braceNeg = false; 272 acc += c; 273 continue; 274 } 275 if (isExtglobType(c) && str.charAt(i) === '(') { 276 part.push(acc); 277 acc = ''; 278 const ext = new AST(c, part); 279 part.push(ext); 280 i = AST.#parseAST(str, ext, i, opt); 281 continue; 282 } 283 if (c === '|') { 284 part.push(acc); 285 acc = ''; 286 parts.push(part); 287 part = new AST(null, ast); 288 continue; 289 } 290 if (c === ')') { 291 if (acc === '' && ast.#parts.length === 0) { 292 ast.#emptyExt = true; 293 } 294 part.push(acc); 295 acc = ''; 296 ast.push(...parts, part); 297 return i; 298 } 299 acc += c; 300 } 301 // unfinished extglob 302 // if we got here, it was a malformed extglob! not an extglob, but 303 // maybe something else in there. 304 ast.type = null; 305 ast.#hasMagic = undefined; 306 ast.#parts = [str.substring(pos - 1)]; 307 return i; 308 } 309 static fromGlob(pattern, options = {}) { 310 const ast = new AST(null, undefined, options); 311 AST.#parseAST(pattern, ast, 0, options); 312 return ast; 313 } 314 // returns the regular expression if there's magic, or the unescaped 315 // string if not. 316 toMMPattern() { 317 // should only be called on root 318 /* c8 ignore start */ 319 if (this !== this.#root) 320 return this.#root.toMMPattern(); 321 /* c8 ignore stop */ 322 const glob = this.toString(); 323 const [re, body, hasMagic, uflag] = this.toRegExpSource(); 324 // if we're in nocase mode, and not nocaseMagicOnly, then we do 325 // still need a regular expression if we have to case-insensitively 326 // match capital/lowercase characters. 327 const anyMagic = hasMagic || 328 this.#hasMagic || 329 (this.#options.nocase && 330 !this.#options.nocaseMagicOnly && 331 glob.toUpperCase() !== glob.toLowerCase()); 332 if (!anyMagic) { 333 return body; 334 } 335 const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : ''); 336 return Object.assign(new RegExp(`^${re}$`, flags), { 337 _src: re, 338 _glob: glob, 339 }); 340 } 341 // returns the string match, the regexp source, whether there's magic 342 // in the regexp (so a regular expression is required) and whether or 343 // not the uflag is needed for the regular expression (for posix classes) 344 // TODO: instead of injecting the start/end at this point, just return 345 // the BODY of the regexp, along with the start/end portions suitable 346 // for binding the start/end in either a joined full-path makeRe context 347 // (where we bind to (^|/), or a standalone matchPart context (where 348 // we bind to ^, and not /). Otherwise slashes get duped! 349 // 350 // In part-matching mode, the start is: 351 // - if not isStart: nothing 352 // - if traversal possible, but not allowed: ^(?!\.\.?$) 353 // - if dots allowed or not possible: ^ 354 // - if dots possible and not allowed: ^(?!\.) 355 // end is: 356 // - if not isEnd(): nothing 357 // - else: $ 358 // 359 // In full-path matching mode, we put the slash at the START of the 360 // pattern, so start is: 361 // - if first pattern: same as part-matching mode 362 // - if not isStart(): nothing 363 // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/)) 364 // - if dots allowed or not possible: / 365 // - if dots possible and not allowed: /(?!\.) 366 // end is: 367 // - if last pattern, same as part-matching mode 368 // - else nothing 369 // 370 // Always put the (?:$|/) on negated tails, though, because that has to be 371 // there to bind the end of the negated pattern portion, and it's easier to 372 // just stick it in now rather than try to inject it later in the middle of 373 // the pattern. 374 // 375 // We can just always return the same end, and leave it up to the caller 376 // to know whether it's going to be used joined or in parts. 377 // And, if the start is adjusted slightly, can do the same there: 378 // - if not isStart: nothing 379 // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$) 380 // - if dots allowed or not possible: (?:/|^) 381 // - if dots possible and not allowed: (?:/|^)(?!\.) 382 // 383 // But it's better to have a simpler binding without a conditional, for 384 // performance, so probably better to return both start options. 385 // 386 // Then the caller just ignores the end if it's not the first pattern, 387 // and the start always gets applied. 388 // 389 // But that's always going to be $ if it's the ending pattern, or nothing, 390 // so the caller can just attach $ at the end of the pattern when building. 391 // 392 // So the todo is: 393 // - better detect what kind of start is needed 394 // - return both flavors of starting pattern 395 // - attach $ at the end of the pattern when creating the actual RegExp 396 // 397 // Ah, but wait, no, that all only applies to the root when the first pattern 398 // is not an extglob. If the first pattern IS an extglob, then we need all 399 // that dot prevention biz to live in the extglob portions, because eg 400 // +(*|.x*) can match .xy but not .yx. 401 // 402 // So, return the two flavors if it's #root and the first child is not an 403 // AST, otherwise leave it to the child AST to handle it, and there, 404 // use the (?:^|/) style of start binding. 405 // 406 // Even simplified further: 407 // - Since the start for a join is eg /(?!\.) and the start for a part 408 // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root 409 // or start or whatever) and prepend ^ or / at the Regexp construction. 410 toRegExpSource(allowDot) { 411 const dot = allowDot ?? !!this.#options.dot; 412 if (this.#root === this) 413 this.#fillNegs(); 414 if (!this.type) { 415 const noEmpty = this.isStart() && this.isEnd(); 416 const src = this.#parts 417 .map(p => { 418 const [re, _, hasMagic, uflag] = typeof p === 'string' 419 ? AST.#parseGlob(p, this.#hasMagic, noEmpty) 420 : p.toRegExpSource(allowDot); 421 this.#hasMagic = this.#hasMagic || hasMagic; 422 this.#uflag = this.#uflag || uflag; 423 return re; 424 }) 425 .join(''); 426 let start = ''; 427 if (this.isStart()) { 428 if (typeof this.#parts[0] === 'string') { 429 // this is the string that will match the start of the pattern, 430 // so we need to protect against dots and such. 431 // '.' and '..' cannot match unless the pattern is that exactly, 432 // even if it starts with . or dot:true is set. 433 const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]); 434 if (!dotTravAllowed) { 435 const aps = addPatternStart; 436 // check if we have a possibility of matching . or .., 437 // and prevent that. 438 const needNoTrav = 439 // dots are allowed, and the pattern starts with [ or . 440 (dot && aps.has(src.charAt(0))) || 441 // the pattern starts with \., and then [ or . 442 (src.startsWith('\\.') && aps.has(src.charAt(2))) || 443 // the pattern starts with \.\., and then [ or . 444 (src.startsWith('\\.\\.') && aps.has(src.charAt(4))); 445 // no need to prevent dots if it can't match a dot, or if a 446 // sub-pattern will be preventing it anyway. 447 const needNoDot = !dot && !allowDot && aps.has(src.charAt(0)); 448 start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : ''; 449 } 450 } 451 } 452 // append the "end of path portion" pattern to negation tails 453 let end = ''; 454 if (this.isEnd() && 455 this.#root.#filledNegs && 456 this.#parent?.type === '!') { 457 end = '(?:$|\\/)'; 458 } 459 const final = start + src + end; 460 return [ 461 final, 462 (0, unescape_js_1.unescape)(src), 463 (this.#hasMagic = !!this.#hasMagic), 464 this.#uflag, 465 ]; 466 } 467 // We need to calculate the body *twice* if it's a repeat pattern 468 // at the start, once in nodot mode, then again in dot mode, so a 469 // pattern like *(?) can match 'x.y' 470 const repeated = this.type === '*' || this.type === '+'; 471 // some kind of extglob 472 const start = this.type === '!' ? '(?:(?!(?:' : '(?:'; 473 let body = this.#partsToRegExp(dot); 474 if (this.isStart() && this.isEnd() && !body && this.type !== '!') { 475 // invalid extglob, has to at least be *something* present, if it's 476 // the entire path portion. 477 const s = this.toString(); 478 this.#parts = [s]; 479 this.type = null; 480 this.#hasMagic = undefined; 481 return [s, (0, unescape_js_1.unescape)(this.toString()), false, false]; 482 } 483 // XXX abstract out this map method 484 let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot 485 ? '' 486 : this.#partsToRegExp(true); 487 if (bodyDotAllowed === body) { 488 bodyDotAllowed = ''; 489 } 490 if (bodyDotAllowed) { 491 body = `(?:${body})(?:${bodyDotAllowed})*?`; 492 } 493 // an empty !() is exactly equivalent to a starNoEmpty 494 let final = ''; 495 if (this.type === '!' && this.#emptyExt) { 496 final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty; 497 } 498 else { 499 const close = this.type === '!' 500 ? // !() must match something,but !(x) can match '' 501 '))' + 502 (this.isStart() && !dot && !allowDot ? startNoDot : '') + 503 star + 504 ')' 505 : this.type === '@' 506 ? ')' 507 : this.type === '?' 508 ? ')?' 509 : this.type === '+' && bodyDotAllowed 510 ? ')' 511 : this.type === '*' && bodyDotAllowed 512 ? `)?` 513 : `)${this.type}`; 514 final = start + body + close; 515 } 516 return [ 517 final, 518 (0, unescape_js_1.unescape)(body), 519 (this.#hasMagic = !!this.#hasMagic), 520 this.#uflag, 521 ]; 522 } 523 #partsToRegExp(dot) { 524 return this.#parts 525 .map(p => { 526 // extglob ASTs should only contain parent ASTs 527 /* c8 ignore start */ 528 if (typeof p === 'string') { 529 throw new Error('string type in extglob ast??'); 530 } 531 /* c8 ignore stop */ 532 // can ignore hasMagic, because extglobs are already always magic 533 const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot); 534 this.#uflag = this.#uflag || uflag; 535 return re; 536 }) 537 .filter(p => !(this.isStart() && this.isEnd()) || !!p) 538 .join('|'); 539 } 540 static #parseGlob(glob, hasMagic, noEmpty = false) { 541 let escaping = false; 542 let re = ''; 543 let uflag = false; 544 for (let i = 0; i < glob.length; i++) { 545 const c = glob.charAt(i); 546 if (escaping) { 547 escaping = false; 548 re += (reSpecials.has(c) ? '\\' : '') + c; 549 continue; 550 } 551 if (c === '\\') { 552 if (i === glob.length - 1) { 553 re += '\\\\'; 554 } 555 else { 556 escaping = true; 557 } 558 continue; 559 } 560 if (c === '[') { 561 const [src, needUflag, consumed, magic] = (0, brace_expressions_js_1.parseClass)(glob, i); 562 if (consumed) { 563 re += src; 564 uflag = uflag || needUflag; 565 i += consumed - 1; 566 hasMagic = hasMagic || magic; 567 continue; 568 } 569 } 570 if (c === '*') { 571 if (noEmpty && glob === '*') 572 re += starNoEmpty; 573 else 574 re += star; 575 hasMagic = true; 576 continue; 577 } 578 if (c === '?') { 579 re += qmark; 580 hasMagic = true; 581 continue; 582 } 583 re += regExpEscape(c); 584 } 585 return [re, (0, unescape_js_1.unescape)(glob), !!hasMagic, uflag]; 586 } 587} 588exports.AST = AST; 589//# sourceMappingURL=ast.js.map