1// parse a single path portion 2import { parseClass } from './brace-expressions.js'; 3import { unescape } from './unescape.js'; 4const types = new Set(['!', '?', '+', '*', '@']); 5const isExtglobType = (c) => types.has(c); 6// Patterns that get prepended to bind to the start of either the 7// entire string, or just a single path portion, to prevent dots 8// and/or traversal patterns, when needed. 9// Exts don't need the ^ or / bit, because the root binds that already. 10const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))'; 11const startNoDot = '(?!\\.)'; 12// characters that indicate a start of pattern needs the "no dots" bit, 13// because a dot *might* be matched. ( is not in the list, because in 14// the case of a child extglob, it will handle the prevention itself. 15const addPatternStart = new Set(['[', '.']); 16// cases where traversal is A-OK, no dot prevention needed 17const justDots = new Set(['..', '.']); 18const reSpecials = new Set('().*{}+?[]^$\\!'); 19const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); 20// any single thing other than / 21const qmark = '[^/]'; 22// * => any number of characters 23const star = qmark + '*?'; 24// use + when we need to ensure that *something* matches, because the * is 25// the only thing in the path portion. 26const starNoEmpty = qmark + '+?'; 27// remove the \ chars that we added if we end up doing a nonmagic compare 28// const deslash = (s: string) => s.replace(/\\(.)/g, '$1') 29export class AST { 30 type; 31 #root; 32 #hasMagic; 33 #uflag = false; 34 #parts = []; 35 #parent; 36 #parentIndex; 37 #negs; 38 #filledNegs = false; 39 #options; 40 #toString; 41 // set to true if it's an extglob with no children 42 // (which really means one child of '') 43 #emptyExt = false; 44 constructor(type, parent, options = {}) { 45 this.type = type; 46 // extglobs are inherently magical 47 if (type) 48 this.#hasMagic = true; 49 this.#parent = parent; 50 this.#root = this.#parent ? this.#parent.#root : this; 51 this.#options = this.#root === this ? options : this.#root.#options; 52 this.#negs = this.#root === this ? [] : this.#root.#negs; 53 if (type === '!' && !this.#root.#filledNegs) 54 this.#negs.push(this); 55 this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0; 56 } 57 get hasMagic() { 58 /* c8 ignore start */ 59 if (this.#hasMagic !== undefined) 60 return this.#hasMagic; 61 /* c8 ignore stop */ 62 for (const p of this.#parts) { 63 if (typeof p === 'string') 64 continue; 65 if (p.type || p.hasMagic) 66 return (this.#hasMagic = true); 67 } 68 // note: will be undefined until we generate the regexp src and find out 69 return this.#hasMagic; 70 } 71 // reconstructs the pattern 72 toString() { 73 if (this.#toString !== undefined) 74 return this.#toString; 75 if (!this.type) { 76 return (this.#toString = this.#parts.map(p => String(p)).join('')); 77 } 78 else { 79 return (this.#toString = 80 this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')'); 81 } 82 } 83 #fillNegs() { 84 /* c8 ignore start */ 85 if (this !== this.#root) 86 throw new Error('should only call on root'); 87 if (this.#filledNegs) 88 return this; 89 /* c8 ignore stop */ 90 // call toString() once to fill this out 91 this.toString(); 92 this.#filledNegs = true; 93 let n; 94 while ((n = this.#negs.pop())) { 95 if (n.type !== '!') 96 continue; 97 // walk up the tree, appending everthing that comes AFTER parentIndex 98 let p = n; 99 let pp = p.#parent; 100 while (pp) { 101 for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) { 102 for (const part of n.#parts) { 103 /* c8 ignore start */ 104 if (typeof part === 'string') { 105 throw new Error('string part in extglob AST??'); 106 } 107 /* c8 ignore stop */ 108 part.copyIn(pp.#parts[i]); 109 } 110 } 111 p = pp; 112 pp = p.#parent; 113 } 114 } 115 return this; 116 } 117 push(...parts) { 118 for (const p of parts) { 119 if (p === '') 120 continue; 121 /* c8 ignore start */ 122 if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) { 123 throw new Error('invalid part: ' + p); 124 } 125 /* c8 ignore stop */ 126 this.#parts.push(p); 127 } 128 } 129 toJSON() { 130 const ret = this.type === null 131 ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON())) 132 : [this.type, ...this.#parts.map(p => p.toJSON())]; 133 if (this.isStart() && !this.type) 134 ret.unshift([]); 135 if (this.isEnd() && 136 (this === this.#root || 137 (this.#root.#filledNegs && this.#parent?.type === '!'))) { 138 ret.push({}); 139 } 140 return ret; 141 } 142 isStart() { 143 if (this.#root === this) 144 return true; 145 // if (this.type) return !!this.#parent?.isStart() 146 if (!this.#parent?.isStart()) 147 return false; 148 if (this.#parentIndex === 0) 149 return true; 150 // if everything AHEAD of this is a negation, then it's still the "start" 151 const p = this.#parent; 152 for (let i = 0; i < this.#parentIndex; i++) { 153 const pp = p.#parts[i]; 154 if (!(pp instanceof AST && pp.type === '!')) { 155 return false; 156 } 157 } 158 return true; 159 } 160 isEnd() { 161 if (this.#root === this) 162 return true; 163 if (this.#parent?.type === '!') 164 return true; 165 if (!this.#parent?.isEnd()) 166 return false; 167 if (!this.type) 168 return this.#parent?.isEnd(); 169 // if not root, it'll always have a parent 170 /* c8 ignore start */ 171 const pl = this.#parent ? this.#parent.#parts.length : 0; 172 /* c8 ignore stop */ 173 return this.#parentIndex === pl - 1; 174 } 175 copyIn(part) { 176 if (typeof part === 'string') 177 this.push(part); 178 else 179 this.push(part.clone(this)); 180 } 181 clone(parent) { 182 const c = new AST(this.type, parent); 183 for (const p of this.#parts) { 184 c.copyIn(p); 185 } 186 return c; 187 } 188 static #parseAST(str, ast, pos, opt) { 189 let escaping = false; 190 let inBrace = false; 191 let braceStart = -1; 192 let braceNeg = false; 193 if (ast.type === null) { 194 // outside of a extglob, append until we find a start 195 let i = pos; 196 let acc = ''; 197 while (i < str.length) { 198 const c = str.charAt(i++); 199 // still accumulate escapes at this point, but we do ignore 200 // starts that are escaped 201 if (escaping || c === '\\') { 202 escaping = !escaping; 203 acc += c; 204 continue; 205 } 206 if (inBrace) { 207 if (i === braceStart + 1) { 208 if (c === '^' || c === '!') { 209 braceNeg = true; 210 } 211 } 212 else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { 213 inBrace = false; 214 } 215 acc += c; 216 continue; 217 } 218 else if (c === '[') { 219 inBrace = true; 220 braceStart = i; 221 braceNeg = false; 222 acc += c; 223 continue; 224 } 225 if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') { 226 ast.push(acc); 227 acc = ''; 228 const ext = new AST(c, ast); 229 i = AST.#parseAST(str, ext, i, opt); 230 ast.push(ext); 231 continue; 232 } 233 acc += c; 234 } 235 ast.push(acc); 236 return i; 237 } 238 // some kind of extglob, pos is at the ( 239 // find the next | or ) 240 let i = pos + 1; 241 let part = new AST(null, ast); 242 const parts = []; 243 let acc = ''; 244 while (i < str.length) { 245 const c = str.charAt(i++); 246 // still accumulate escapes at this point, but we do ignore 247 // starts that are escaped 248 if (escaping || c === '\\') { 249 escaping = !escaping; 250 acc += c; 251 continue; 252 } 253 if (inBrace) { 254 if (i === braceStart + 1) { 255 if (c === '^' || c === '!') { 256 braceNeg = true; 257 } 258 } 259 else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { 260 inBrace = false; 261 } 262 acc += c; 263 continue; 264 } 265 else if (c === '[') { 266 inBrace = true; 267 braceStart = i; 268 braceNeg = false; 269 acc += c; 270 continue; 271 } 272 if (isExtglobType(c) && str.charAt(i) === '(') { 273 part.push(acc); 274 acc = ''; 275 const ext = new AST(c, part); 276 part.push(ext); 277 i = AST.#parseAST(str, ext, i, opt); 278 continue; 279 } 280 if (c === '|') { 281 part.push(acc); 282 acc = ''; 283 parts.push(part); 284 part = new AST(null, ast); 285 continue; 286 } 287 if (c === ')') { 288 if (acc === '' && ast.#parts.length === 0) { 289 ast.#emptyExt = true; 290 } 291 part.push(acc); 292 acc = ''; 293 ast.push(...parts, part); 294 return i; 295 } 296 acc += c; 297 } 298 // unfinished extglob 299 // if we got here, it was a malformed extglob! not an extglob, but 300 // maybe something else in there. 301 ast.type = null; 302 ast.#hasMagic = undefined; 303 ast.#parts = [str.substring(pos - 1)]; 304 return i; 305 } 306 static fromGlob(pattern, options = {}) { 307 const ast = new AST(null, undefined, options); 308 AST.#parseAST(pattern, ast, 0, options); 309 return ast; 310 } 311 // returns the regular expression if there's magic, or the unescaped 312 // string if not. 313 toMMPattern() { 314 // should only be called on root 315 /* c8 ignore start */ 316 if (this !== this.#root) 317 return this.#root.toMMPattern(); 318 /* c8 ignore stop */ 319 const glob = this.toString(); 320 const [re, body, hasMagic, uflag] = this.toRegExpSource(); 321 // if we're in nocase mode, and not nocaseMagicOnly, then we do 322 // still need a regular expression if we have to case-insensitively 323 // match capital/lowercase characters. 324 const anyMagic = hasMagic || 325 this.#hasMagic || 326 (this.#options.nocase && 327 !this.#options.nocaseMagicOnly && 328 glob.toUpperCase() !== glob.toLowerCase()); 329 if (!anyMagic) { 330 return body; 331 } 332 const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : ''); 333 return Object.assign(new RegExp(`^${re}$`, flags), { 334 _src: re, 335 _glob: glob, 336 }); 337 } 338 // returns the string match, the regexp source, whether there's magic 339 // in the regexp (so a regular expression is required) and whether or 340 // not the uflag is needed for the regular expression (for posix classes) 341 // TODO: instead of injecting the start/end at this point, just return 342 // the BODY of the regexp, along with the start/end portions suitable 343 // for binding the start/end in either a joined full-path makeRe context 344 // (where we bind to (^|/), or a standalone matchPart context (where 345 // we bind to ^, and not /). Otherwise slashes get duped! 346 // 347 // In part-matching mode, the start is: 348 // - if not isStart: nothing 349 // - if traversal possible, but not allowed: ^(?!\.\.?$) 350 // - if dots allowed or not possible: ^ 351 // - if dots possible and not allowed: ^(?!\.) 352 // end is: 353 // - if not isEnd(): nothing 354 // - else: $ 355 // 356 // In full-path matching mode, we put the slash at the START of the 357 // pattern, so start is: 358 // - if first pattern: same as part-matching mode 359 // - if not isStart(): nothing 360 // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/)) 361 // - if dots allowed or not possible: / 362 // - if dots possible and not allowed: /(?!\.) 363 // end is: 364 // - if last pattern, same as part-matching mode 365 // - else nothing 366 // 367 // Always put the (?:$|/) on negated tails, though, because that has to be 368 // there to bind the end of the negated pattern portion, and it's easier to 369 // just stick it in now rather than try to inject it later in the middle of 370 // the pattern. 371 // 372 // We can just always return the same end, and leave it up to the caller 373 // to know whether it's going to be used joined or in parts. 374 // And, if the start is adjusted slightly, can do the same there: 375 // - if not isStart: nothing 376 // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$) 377 // - if dots allowed or not possible: (?:/|^) 378 // - if dots possible and not allowed: (?:/|^)(?!\.) 379 // 380 // But it's better to have a simpler binding without a conditional, for 381 // performance, so probably better to return both start options. 382 // 383 // Then the caller just ignores the end if it's not the first pattern, 384 // and the start always gets applied. 385 // 386 // But that's always going to be $ if it's the ending pattern, or nothing, 387 // so the caller can just attach $ at the end of the pattern when building. 388 // 389 // So the todo is: 390 // - better detect what kind of start is needed 391 // - return both flavors of starting pattern 392 // - attach $ at the end of the pattern when creating the actual RegExp 393 // 394 // Ah, but wait, no, that all only applies to the root when the first pattern 395 // is not an extglob. If the first pattern IS an extglob, then we need all 396 // that dot prevention biz to live in the extglob portions, because eg 397 // +(*|.x*) can match .xy but not .yx. 398 // 399 // So, return the two flavors if it's #root and the first child is not an 400 // AST, otherwise leave it to the child AST to handle it, and there, 401 // use the (?:^|/) style of start binding. 402 // 403 // Even simplified further: 404 // - Since the start for a join is eg /(?!\.) and the start for a part 405 // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root 406 // or start or whatever) and prepend ^ or / at the Regexp construction. 407 toRegExpSource(allowDot) { 408 const dot = allowDot ?? !!this.#options.dot; 409 if (this.#root === this) 410 this.#fillNegs(); 411 if (!this.type) { 412 const noEmpty = this.isStart() && this.isEnd(); 413 const src = this.#parts 414 .map(p => { 415 const [re, _, hasMagic, uflag] = typeof p === 'string' 416 ? AST.#parseGlob(p, this.#hasMagic, noEmpty) 417 : p.toRegExpSource(allowDot); 418 this.#hasMagic = this.#hasMagic || hasMagic; 419 this.#uflag = this.#uflag || uflag; 420 return re; 421 }) 422 .join(''); 423 let start = ''; 424 if (this.isStart()) { 425 if (typeof this.#parts[0] === 'string') { 426 // this is the string that will match the start of the pattern, 427 // so we need to protect against dots and such. 428 // '.' and '..' cannot match unless the pattern is that exactly, 429 // even if it starts with . or dot:true is set. 430 const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]); 431 if (!dotTravAllowed) { 432 const aps = addPatternStart; 433 // check if we have a possibility of matching . or .., 434 // and prevent that. 435 const needNoTrav = 436 // dots are allowed, and the pattern starts with [ or . 437 (dot && aps.has(src.charAt(0))) || 438 // the pattern starts with \., and then [ or . 439 (src.startsWith('\\.') && aps.has(src.charAt(2))) || 440 // the pattern starts with \.\., and then [ or . 441 (src.startsWith('\\.\\.') && aps.has(src.charAt(4))); 442 // no need to prevent dots if it can't match a dot, or if a 443 // sub-pattern will be preventing it anyway. 444 const needNoDot = !dot && !allowDot && aps.has(src.charAt(0)); 445 start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : ''; 446 } 447 } 448 } 449 // append the "end of path portion" pattern to negation tails 450 let end = ''; 451 if (this.isEnd() && 452 this.#root.#filledNegs && 453 this.#parent?.type === '!') { 454 end = '(?:$|\\/)'; 455 } 456 const final = start + src + end; 457 return [ 458 final, 459 unescape(src), 460 (this.#hasMagic = !!this.#hasMagic), 461 this.#uflag, 462 ]; 463 } 464 // We need to calculate the body *twice* if it's a repeat pattern 465 // at the start, once in nodot mode, then again in dot mode, so a 466 // pattern like *(?) can match 'x.y' 467 const repeated = this.type === '*' || this.type === '+'; 468 // some kind of extglob 469 const start = this.type === '!' ? '(?:(?!(?:' : '(?:'; 470 let body = this.#partsToRegExp(dot); 471 if (this.isStart() && this.isEnd() && !body && this.type !== '!') { 472 // invalid extglob, has to at least be *something* present, if it's 473 // the entire path portion. 474 const s = this.toString(); 475 this.#parts = [s]; 476 this.type = null; 477 this.#hasMagic = undefined; 478 return [s, unescape(this.toString()), false, false]; 479 } 480 // XXX abstract out this map method 481 let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot 482 ? '' 483 : this.#partsToRegExp(true); 484 if (bodyDotAllowed === body) { 485 bodyDotAllowed = ''; 486 } 487 if (bodyDotAllowed) { 488 body = `(?:${body})(?:${bodyDotAllowed})*?`; 489 } 490 // an empty !() is exactly equivalent to a starNoEmpty 491 let final = ''; 492 if (this.type === '!' && this.#emptyExt) { 493 final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty; 494 } 495 else { 496 const close = this.type === '!' 497 ? // !() must match something,but !(x) can match '' 498 '))' + 499 (this.isStart() && !dot && !allowDot ? startNoDot : '') + 500 star + 501 ')' 502 : this.type === '@' 503 ? ')' 504 : this.type === '?' 505 ? ')?' 506 : this.type === '+' && bodyDotAllowed 507 ? ')' 508 : this.type === '*' && bodyDotAllowed 509 ? `)?` 510 : `)${this.type}`; 511 final = start + body + close; 512 } 513 return [ 514 final, 515 unescape(body), 516 (this.#hasMagic = !!this.#hasMagic), 517 this.#uflag, 518 ]; 519 } 520 #partsToRegExp(dot) { 521 return this.#parts 522 .map(p => { 523 // extglob ASTs should only contain parent ASTs 524 /* c8 ignore start */ 525 if (typeof p === 'string') { 526 throw new Error('string type in extglob ast??'); 527 } 528 /* c8 ignore stop */ 529 // can ignore hasMagic, because extglobs are already always magic 530 const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot); 531 this.#uflag = this.#uflag || uflag; 532 return re; 533 }) 534 .filter(p => !(this.isStart() && this.isEnd()) || !!p) 535 .join('|'); 536 } 537 static #parseGlob(glob, hasMagic, noEmpty = false) { 538 let escaping = false; 539 let re = ''; 540 let uflag = false; 541 for (let i = 0; i < glob.length; i++) { 542 const c = glob.charAt(i); 543 if (escaping) { 544 escaping = false; 545 re += (reSpecials.has(c) ? '\\' : '') + c; 546 continue; 547 } 548 if (c === '\\') { 549 if (i === glob.length - 1) { 550 re += '\\\\'; 551 } 552 else { 553 escaping = true; 554 } 555 continue; 556 } 557 if (c === '[') { 558 const [src, needUflag, consumed, magic] = parseClass(glob, i); 559 if (consumed) { 560 re += src; 561 uflag = uflag || needUflag; 562 i += consumed - 1; 563 hasMagic = hasMagic || magic; 564 continue; 565 } 566 } 567 if (c === '*') { 568 if (noEmpty && glob === '*') 569 re += starNoEmpty; 570 else 571 re += star; 572 hasMagic = true; 573 continue; 574 } 575 if (c === '?') { 576 re += qmark; 577 hasMagic = true; 578 continue; 579 } 580 re += regExpEscape(c); 581 } 582 return [re, unescape(glob), !!hasMagic, uflag]; 583 } 584} 585//# sourceMappingURL=ast.js.map