1"use strict"; 2// translate the various posix character classes into unicode properties 3// this works across all unicode locales 4Object.defineProperty(exports, "__esModule", { value: true }); 5exports.parseClass = void 0; 6// { <posix class>: [<translation>, /u flag required, negated] 7const posixClasses = { 8 '[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true], 9 '[:alpha:]': ['\\p{L}\\p{Nl}', true], 10 '[:ascii:]': ['\\x' + '00-\\x' + '7f', false], 11 '[:blank:]': ['\\p{Zs}\\t', true], 12 '[:cntrl:]': ['\\p{Cc}', true], 13 '[:digit:]': ['\\p{Nd}', true], 14 '[:graph:]': ['\\p{Z}\\p{C}', true, true], 15 '[:lower:]': ['\\p{Ll}', true], 16 '[:print:]': ['\\p{C}', true], 17 '[:punct:]': ['\\p{P}', true], 18 '[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true], 19 '[:upper:]': ['\\p{Lu}', true], 20 '[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true], 21 '[:xdigit:]': ['A-Fa-f0-9', false], 22}; 23// only need to escape a few things inside of brace expressions 24// escapes: [ \ ] - 25const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&'); 26// escape all regexp magic characters 27const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); 28// everything has already been escaped, we just have to join 29const rangesToString = (ranges) => ranges.join(''); 30// takes a glob string at a posix brace expression, and returns 31// an equivalent regular expression source, and boolean indicating 32// whether the /u flag needs to be applied, and the number of chars 33// consumed to parse the character class. 34// This also removes out of order ranges, and returns ($.) if the 35// entire class just no good. 36const parseClass = (glob, position) => { 37 const pos = position; 38 /* c8 ignore start */ 39 if (glob.charAt(pos) !== '[') { 40 throw new Error('not in a brace expression'); 41 } 42 /* c8 ignore stop */ 43 const ranges = []; 44 const negs = []; 45 let i = pos + 1; 46 let sawStart = false; 47 let uflag = false; 48 let escaping = false; 49 let negate = false; 50 let endPos = pos; 51 let rangeStart = ''; 52 WHILE: while (i < glob.length) { 53 const c = glob.charAt(i); 54 if ((c === '!' || c === '^') && i === pos + 1) { 55 negate = true; 56 i++; 57 continue; 58 } 59 if (c === ']' && sawStart && !escaping) { 60 endPos = i + 1; 61 break; 62 } 63 sawStart = true; 64 if (c === '\\') { 65 if (!escaping) { 66 escaping = true; 67 i++; 68 continue; 69 } 70 // escaped \ char, fall through and treat like normal char 71 } 72 if (c === '[' && !escaping) { 73 // either a posix class, a collation equivalent, or just a [ 74 for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) { 75 if (glob.startsWith(cls, i)) { 76 // invalid, [a-[] is fine, but not [a-[:alpha]] 77 if (rangeStart) { 78 return ['$.', false, glob.length - pos, true]; 79 } 80 i += cls.length; 81 if (neg) 82 negs.push(unip); 83 else 84 ranges.push(unip); 85 uflag = uflag || u; 86 continue WHILE; 87 } 88 } 89 } 90 // now it's just a normal character, effectively 91 escaping = false; 92 if (rangeStart) { 93 // throw this range away if it's not valid, but others 94 // can still match. 95 if (c > rangeStart) { 96 ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c)); 97 } 98 else if (c === rangeStart) { 99 ranges.push(braceEscape(c)); 100 } 101 rangeStart = ''; 102 i++; 103 continue; 104 } 105 // now might be the start of a range. 106 // can be either c-d or c-] or c<more...>] or c] at this point 107 if (glob.startsWith('-]', i + 1)) { 108 ranges.push(braceEscape(c + '-')); 109 i += 2; 110 continue; 111 } 112 if (glob.startsWith('-', i + 1)) { 113 rangeStart = c; 114 i += 2; 115 continue; 116 } 117 // not the start of a range, just a single character 118 ranges.push(braceEscape(c)); 119 i++; 120 } 121 if (endPos < i) { 122 // didn't see the end of the class, not a valid class, 123 // but might still be valid as a literal match. 124 return ['', false, 0, false]; 125 } 126 // if we got no ranges and no negates, then we have a range that 127 // cannot possibly match anything, and that poisons the whole glob 128 if (!ranges.length && !negs.length) { 129 return ['$.', false, glob.length - pos, true]; 130 } 131 // if we got one positive range, and it's a single character, then that's 132 // not actually a magic pattern, it's just that one literal character. 133 // we should not treat that as "magic", we should just return the literal 134 // character. [_] is a perfectly valid way to escape glob magic chars. 135 if (negs.length === 0 && 136 ranges.length === 1 && 137 /^\\?.$/.test(ranges[0]) && 138 !negate) { 139 const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0]; 140 return [regexpEscape(r), false, endPos - pos, false]; 141 } 142 const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']'; 143 const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']'; 144 const comb = ranges.length && negs.length 145 ? '(' + sranges + '|' + snegs + ')' 146 : ranges.length 147 ? sranges 148 : snegs; 149 return [comb, uflag, endPos - pos, true]; 150}; 151exports.parseClass = parseClass; 152//# sourceMappingURL=brace-expressions.js.map