1// translate the various posix character classes into unicode properties 2// this works across all unicode locales 3// { <posix class>: [<translation>, /u flag required, negated] 4const posixClasses = { 5 '[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true], 6 '[:alpha:]': ['\\p{L}\\p{Nl}', true], 7 '[:ascii:]': ['\\x' + '00-\\x' + '7f', false], 8 '[:blank:]': ['\\p{Zs}\\t', true], 9 '[:cntrl:]': ['\\p{Cc}', true], 10 '[:digit:]': ['\\p{Nd}', true], 11 '[:graph:]': ['\\p{Z}\\p{C}', true, true], 12 '[:lower:]': ['\\p{Ll}', true], 13 '[:print:]': ['\\p{C}', true], 14 '[:punct:]': ['\\p{P}', true], 15 '[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true], 16 '[:upper:]': ['\\p{Lu}', true], 17 '[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true], 18 '[:xdigit:]': ['A-Fa-f0-9', false], 19}; 20// only need to escape a few things inside of brace expressions 21// escapes: [ \ ] - 22const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&'); 23// escape all regexp magic characters 24const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); 25// everything has already been escaped, we just have to join 26const rangesToString = (ranges) => ranges.join(''); 27// takes a glob string at a posix brace expression, and returns 28// an equivalent regular expression source, and boolean indicating 29// whether the /u flag needs to be applied, and the number of chars 30// consumed to parse the character class. 31// This also removes out of order ranges, and returns ($.) if the 32// entire class just no good. 33export const parseClass = (glob, position) => { 34 const pos = position; 35 /* c8 ignore start */ 36 if (glob.charAt(pos) !== '[') { 37 throw new Error('not in a brace expression'); 38 } 39 /* c8 ignore stop */ 40 const ranges = []; 41 const negs = []; 42 let i = pos + 1; 43 let sawStart = false; 44 let uflag = false; 45 let escaping = false; 46 let negate = false; 47 let endPos = pos; 48 let rangeStart = ''; 49 WHILE: while (i < glob.length) { 50 const c = glob.charAt(i); 51 if ((c === '!' || c === '^') && i === pos + 1) { 52 negate = true; 53 i++; 54 continue; 55 } 56 if (c === ']' && sawStart && !escaping) { 57 endPos = i + 1; 58 break; 59 } 60 sawStart = true; 61 if (c === '\\') { 62 if (!escaping) { 63 escaping = true; 64 i++; 65 continue; 66 } 67 // escaped \ char, fall through and treat like normal char 68 } 69 if (c === '[' && !escaping) { 70 // either a posix class, a collation equivalent, or just a [ 71 for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) { 72 if (glob.startsWith(cls, i)) { 73 // invalid, [a-[] is fine, but not [a-[:alpha]] 74 if (rangeStart) { 75 return ['$.', false, glob.length - pos, true]; 76 } 77 i += cls.length; 78 if (neg) 79 negs.push(unip); 80 else 81 ranges.push(unip); 82 uflag = uflag || u; 83 continue WHILE; 84 } 85 } 86 } 87 // now it's just a normal character, effectively 88 escaping = false; 89 if (rangeStart) { 90 // throw this range away if it's not valid, but others 91 // can still match. 92 if (c > rangeStart) { 93 ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c)); 94 } 95 else if (c === rangeStart) { 96 ranges.push(braceEscape(c)); 97 } 98 rangeStart = ''; 99 i++; 100 continue; 101 } 102 // now might be the start of a range. 103 // can be either c-d or c-] or c<more...>] or c] at this point 104 if (glob.startsWith('-]', i + 1)) { 105 ranges.push(braceEscape(c + '-')); 106 i += 2; 107 continue; 108 } 109 if (glob.startsWith('-', i + 1)) { 110 rangeStart = c; 111 i += 2; 112 continue; 113 } 114 // not the start of a range, just a single character 115 ranges.push(braceEscape(c)); 116 i++; 117 } 118 if (endPos < i) { 119 // didn't see the end of the class, not a valid class, 120 // but might still be valid as a literal match. 121 return ['', false, 0, false]; 122 } 123 // if we got no ranges and no negates, then we have a range that 124 // cannot possibly match anything, and that poisons the whole glob 125 if (!ranges.length && !negs.length) { 126 return ['$.', false, glob.length - pos, true]; 127 } 128 // if we got one positive range, and it's a single character, then that's 129 // not actually a magic pattern, it's just that one literal character. 130 // we should not treat that as "magic", we should just return the literal 131 // character. [_] is a perfectly valid way to escape glob magic chars. 132 if (negs.length === 0 && 133 ranges.length === 1 && 134 /^\\?.$/.test(ranges[0]) && 135 !negate) { 136 const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0]; 137 return [regexpEscape(r), false, endPos - pos, false]; 138 } 139 const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']'; 140 const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']'; 141 const comb = ranges.length && negs.length 142 ? '(' + sranges + '|' + snegs + ')' 143 : ranges.length 144 ? sranges 145 : snegs; 146 return [comb, uflag, endPos - pos, true]; 147}; 148//# sourceMappingURL=brace-expressions.js.map