1import { 2 Selector, 3 SelectorType, 4 AttributeSelector, 5 Traversal, 6 AttributeAction, 7 TraversalType, 8 DataType, 9} from "./types"; 10 11const reName = /^[^\\#]?(?:\\(?:[\da-f]{1,6}\s?|.)|[\w\-\u00b0-\uFFFF])+/; 12const reEscape = /\\([\da-f]{1,6}\s?|(\s)|.)/gi; 13 14const enum CharCode { 15 LeftParenthesis = 40, 16 RightParenthesis = 41, 17 LeftSquareBracket = 91, 18 RightSquareBracket = 93, 19 Comma = 44, 20 Period = 46, 21 Colon = 58, 22 SingleQuote = 39, 23 DoubleQuote = 34, 24 Plus = 43, 25 Tilde = 126, 26 QuestionMark = 63, 27 ExclamationMark = 33, 28 Slash = 47, 29 Star = 42, 30 Equal = 61, 31 Dollar = 36, 32 Pipe = 124, 33 Circumflex = 94, 34 Asterisk = 42, 35 GreaterThan = 62, 36 LessThan = 60, 37 Hash = 35, 38 LowerI = 105, 39 LowerS = 115, 40 BackSlash = 92, 41 42 // Whitespace 43 Space = 32, 44 Tab = 9, 45 NewLine = 10, 46 FormFeed = 12, 47 CarriageReturn = 13, 48} 49 50const actionTypes = new Map<number, AttributeAction>([ 51 [CharCode.Tilde, AttributeAction.Element], 52 [CharCode.Circumflex, AttributeAction.Start], 53 [CharCode.Dollar, AttributeAction.End], 54 [CharCode.Asterisk, AttributeAction.Any], 55 [CharCode.ExclamationMark, AttributeAction.Not], 56 [CharCode.Pipe, AttributeAction.Hyphen], 57]); 58 59// Pseudos, whose data property is parsed as well. 60const unpackPseudos = new Set([ 61 "has", 62 "not", 63 "matches", 64 "is", 65 "where", 66 "host", 67 "host-context", 68]); 69 70/** 71 * Checks whether a specific selector is a traversal. 72 * This is useful eg. in swapping the order of elements that 73 * are not traversals. 74 * 75 * @param selector Selector to check. 76 */ 77export function isTraversal(selector: Selector): selector is Traversal { 78 switch (selector.type) { 79 case SelectorType.Adjacent: 80 case SelectorType.Child: 81 case SelectorType.Descendant: 82 case SelectorType.Parent: 83 case SelectorType.Sibling: 84 case SelectorType.ColumnCombinator: 85 return true; 86 default: 87 return false; 88 } 89} 90 91const stripQuotesFromPseudos = new Set(["contains", "icontains"]); 92 93// Unescape function taken from https://github.com/jquery/sizzle/blob/master/src/sizzle.js#L152 94function funescape(_: string, escaped: string, escapedWhitespace?: string) { 95 const high = parseInt(escaped, 16) - 0x10000; 96 97 // NaN means non-codepoint 98 return high !== high || escapedWhitespace 99 ? escaped 100 : high < 0 101 ? // BMP codepoint 102 String.fromCharCode(high + 0x10000) 103 : // Supplemental Plane codepoint (surrogate pair) 104 String.fromCharCode((high >> 10) | 0xd800, (high & 0x3ff) | 0xdc00); 105} 106 107function unescapeCSS(str: string) { 108 return str.replace(reEscape, funescape); 109} 110 111function isQuote(c: number): boolean { 112 return c === CharCode.SingleQuote || c === CharCode.DoubleQuote; 113} 114 115function isWhitespace(c: number): boolean { 116 return ( 117 c === CharCode.Space || 118 c === CharCode.Tab || 119 c === CharCode.NewLine || 120 c === CharCode.FormFeed || 121 c === CharCode.CarriageReturn 122 ); 123} 124 125/** 126 * Parses `selector`, optionally with the passed `options`. 127 * 128 * @param selector Selector to parse. 129 * @param options Options for parsing. 130 * @returns Returns a two-dimensional array. 131 * The first dimension represents selectors separated by commas (eg. `sub1, sub2`), 132 * the second contains the relevant tokens for that selector. 133 */ 134export function parse(selector: string): Selector[][] { 135 const subselects: Selector[][] = []; 136 137 const endIndex = parseSelector(subselects, `${selector}`, 0); 138 139 if (endIndex < selector.length) { 140 throw new Error(`Unmatched selector: ${selector.slice(endIndex)}`); 141 } 142 143 return subselects; 144} 145 146function parseSelector( 147 subselects: Selector[][], 148 selector: string, 149 selectorIndex: number 150): number { 151 let tokens: Selector[] = []; 152 153 function getName(offset: number): string { 154 const match = selector.slice(selectorIndex + offset).match(reName); 155 156 if (!match) { 157 throw new Error( 158 `Expected name, found ${selector.slice(selectorIndex)}` 159 ); 160 } 161 162 const [name] = match; 163 selectorIndex += offset + name.length; 164 return unescapeCSS(name); 165 } 166 167 function stripWhitespace(offset: number) { 168 selectorIndex += offset; 169 170 while ( 171 selectorIndex < selector.length && 172 isWhitespace(selector.charCodeAt(selectorIndex)) 173 ) { 174 selectorIndex++; 175 } 176 } 177 178 function readValueWithParenthesis(): string { 179 selectorIndex += 1; 180 const start = selectorIndex; 181 let counter = 1; 182 183 for ( 184 ; 185 counter > 0 && selectorIndex < selector.length; 186 selectorIndex++ 187 ) { 188 if ( 189 selector.charCodeAt(selectorIndex) === 190 CharCode.LeftParenthesis && 191 !isEscaped(selectorIndex) 192 ) { 193 counter++; 194 } else if ( 195 selector.charCodeAt(selectorIndex) === 196 CharCode.RightParenthesis && 197 !isEscaped(selectorIndex) 198 ) { 199 counter--; 200 } 201 } 202 203 if (counter) { 204 throw new Error("Parenthesis not matched"); 205 } 206 207 return unescapeCSS(selector.slice(start, selectorIndex - 1)); 208 } 209 210 function isEscaped(pos: number): boolean { 211 let slashCount = 0; 212 213 while (selector.charCodeAt(--pos) === CharCode.BackSlash) slashCount++; 214 return (slashCount & 1) === 1; 215 } 216 217 function ensureNotTraversal() { 218 if (tokens.length > 0 && isTraversal(tokens[tokens.length - 1])) { 219 throw new Error("Did not expect successive traversals."); 220 } 221 } 222 223 function addTraversal(type: TraversalType) { 224 if ( 225 tokens.length > 0 && 226 tokens[tokens.length - 1].type === SelectorType.Descendant 227 ) { 228 tokens[tokens.length - 1].type = type; 229 return; 230 } 231 232 ensureNotTraversal(); 233 234 tokens.push({ type }); 235 } 236 237 function addSpecialAttribute(name: string, action: AttributeAction) { 238 tokens.push({ 239 type: SelectorType.Attribute, 240 name, 241 action, 242 value: getName(1), 243 namespace: null, 244 ignoreCase: "quirks", 245 }); 246 } 247 248 /** 249 * We have finished parsing the current part of the selector. 250 * 251 * Remove descendant tokens at the end if they exist, 252 * and return the last index, so that parsing can be 253 * picked up from here. 254 */ 255 function finalizeSubselector() { 256 if ( 257 tokens.length && 258 tokens[tokens.length - 1].type === SelectorType.Descendant 259 ) { 260 tokens.pop(); 261 } 262 263 if (tokens.length === 0) { 264 throw new Error("Empty sub-selector"); 265 } 266 267 subselects.push(tokens); 268 } 269 270 stripWhitespace(0); 271 272 if (selector.length === selectorIndex) { 273 return selectorIndex; 274 } 275 276 loop: while (selectorIndex < selector.length) { 277 const firstChar = selector.charCodeAt(selectorIndex); 278 279 switch (firstChar) { 280 // Whitespace 281 case CharCode.Space: 282 case CharCode.Tab: 283 case CharCode.NewLine: 284 case CharCode.FormFeed: 285 case CharCode.CarriageReturn: { 286 if ( 287 tokens.length === 0 || 288 tokens[0].type !== SelectorType.Descendant 289 ) { 290 ensureNotTraversal(); 291 tokens.push({ type: SelectorType.Descendant }); 292 } 293 294 stripWhitespace(1); 295 break; 296 } 297 // Traversals 298 case CharCode.GreaterThan: { 299 addTraversal(SelectorType.Child); 300 stripWhitespace(1); 301 break; 302 } 303 case CharCode.LessThan: { 304 addTraversal(SelectorType.Parent); 305 stripWhitespace(1); 306 break; 307 } 308 case CharCode.Tilde: { 309 addTraversal(SelectorType.Sibling); 310 stripWhitespace(1); 311 break; 312 } 313 case CharCode.Plus: { 314 addTraversal(SelectorType.Adjacent); 315 stripWhitespace(1); 316 break; 317 } 318 // Special attribute selectors: .class, #id 319 case CharCode.Period: { 320 addSpecialAttribute("class", AttributeAction.Element); 321 break; 322 } 323 case CharCode.Hash: { 324 addSpecialAttribute("id", AttributeAction.Equals); 325 break; 326 } 327 case CharCode.LeftSquareBracket: { 328 stripWhitespace(1); 329 330 // Determine attribute name and namespace 331 332 let name: string; 333 let namespace: string | null = null; 334 335 if (selector.charCodeAt(selectorIndex) === CharCode.Pipe) { 336 // Equivalent to no namespace 337 name = getName(1); 338 } else if (selector.startsWith("*|", selectorIndex)) { 339 namespace = "*"; 340 name = getName(2); 341 } else { 342 name = getName(0); 343 344 if ( 345 selector.charCodeAt(selectorIndex) === CharCode.Pipe && 346 selector.charCodeAt(selectorIndex + 1) !== 347 CharCode.Equal 348 ) { 349 namespace = name; 350 name = getName(1); 351 } 352 } 353 354 stripWhitespace(0); 355 356 // Determine comparison operation 357 358 let action: AttributeAction = AttributeAction.Exists; 359 const possibleAction = actionTypes.get( 360 selector.charCodeAt(selectorIndex) 361 ); 362 363 if (possibleAction) { 364 action = possibleAction; 365 366 if ( 367 selector.charCodeAt(selectorIndex + 1) !== 368 CharCode.Equal 369 ) { 370 throw new Error("Expected `=`"); 371 } 372 373 stripWhitespace(2); 374 } else if ( 375 selector.charCodeAt(selectorIndex) === CharCode.Equal 376 ) { 377 action = AttributeAction.Equals; 378 stripWhitespace(1); 379 } 380 381 // Determine value 382 383 let value = ""; 384 let ignoreCase: boolean | null = null; 385 386 if (action !== "exists") { 387 if (isQuote(selector.charCodeAt(selectorIndex))) { 388 const quote = selector.charCodeAt(selectorIndex); 389 let sectionEnd = selectorIndex + 1; 390 while ( 391 sectionEnd < selector.length && 392 (selector.charCodeAt(sectionEnd) !== quote || 393 isEscaped(sectionEnd)) 394 ) { 395 sectionEnd += 1; 396 } 397 398 if (selector.charCodeAt(sectionEnd) !== quote) { 399 throw new Error("Attribute value didn't end"); 400 } 401 402 value = unescapeCSS( 403 selector.slice(selectorIndex + 1, sectionEnd) 404 ); 405 selectorIndex = sectionEnd + 1; 406 } else { 407 const valueStart = selectorIndex; 408 409 while ( 410 selectorIndex < selector.length && 411 ((!isWhitespace( 412 selector.charCodeAt(selectorIndex) 413 ) && 414 selector.charCodeAt(selectorIndex) !== 415 CharCode.RightSquareBracket) || 416 isEscaped(selectorIndex)) 417 ) { 418 selectorIndex += 1; 419 } 420 421 value = unescapeCSS( 422 selector.slice(valueStart, selectorIndex) 423 ); 424 } 425 426 stripWhitespace(0); 427 428 // See if we have a force ignore flag 429 430 const forceIgnore = 431 selector.charCodeAt(selectorIndex) | 0x20; 432 433 // If the forceIgnore flag is set (either `i` or `s`), use that value 434 if (forceIgnore === CharCode.LowerS) { 435 ignoreCase = false; 436 stripWhitespace(1); 437 } else if (forceIgnore === CharCode.LowerI) { 438 ignoreCase = true; 439 stripWhitespace(1); 440 } 441 } 442 443 if ( 444 selector.charCodeAt(selectorIndex) !== 445 CharCode.RightSquareBracket 446 ) { 447 throw new Error("Attribute selector didn't terminate"); 448 } 449 450 selectorIndex += 1; 451 452 const attributeSelector: AttributeSelector = { 453 type: SelectorType.Attribute, 454 name, 455 action, 456 value, 457 namespace, 458 ignoreCase, 459 }; 460 461 tokens.push(attributeSelector); 462 break; 463 } 464 case CharCode.Colon: { 465 if (selector.charCodeAt(selectorIndex + 1) === CharCode.Colon) { 466 tokens.push({ 467 type: SelectorType.PseudoElement, 468 name: getName(2).toLowerCase(), 469 data: 470 selector.charCodeAt(selectorIndex) === 471 CharCode.LeftParenthesis 472 ? readValueWithParenthesis() 473 : null, 474 }); 475 continue; 476 } 477 478 const name = getName(1).toLowerCase(); 479 let data: DataType = null; 480 481 if ( 482 selector.charCodeAt(selectorIndex) === 483 CharCode.LeftParenthesis 484 ) { 485 if (unpackPseudos.has(name)) { 486 if (isQuote(selector.charCodeAt(selectorIndex + 1))) { 487 throw new Error( 488 `Pseudo-selector ${name} cannot be quoted` 489 ); 490 } 491 492 data = []; 493 selectorIndex = parseSelector( 494 data, 495 selector, 496 selectorIndex + 1 497 ); 498 499 if ( 500 selector.charCodeAt(selectorIndex) !== 501 CharCode.RightParenthesis 502 ) { 503 throw new Error( 504 `Missing closing parenthesis in :${name} (${selector})` 505 ); 506 } 507 508 selectorIndex += 1; 509 } else { 510 data = readValueWithParenthesis(); 511 512 if (stripQuotesFromPseudos.has(name)) { 513 const quot = data.charCodeAt(0); 514 515 if ( 516 quot === data.charCodeAt(data.length - 1) && 517 isQuote(quot) 518 ) { 519 data = data.slice(1, -1); 520 } 521 } 522 523 data = unescapeCSS(data); 524 } 525 } 526 527 tokens.push({ type: SelectorType.Pseudo, name, data }); 528 break; 529 } 530 case CharCode.Comma: { 531 finalizeSubselector(); 532 tokens = []; 533 stripWhitespace(1); 534 break; 535 } 536 default: { 537 if (selector.startsWith("/*", selectorIndex)) { 538 const endIndex = selector.indexOf("*/", selectorIndex + 2); 539 540 if (endIndex < 0) { 541 throw new Error("Comment was not terminated"); 542 } 543 544 selectorIndex = endIndex + 2; 545 546 // Remove leading whitespace 547 if (tokens.length === 0) { 548 stripWhitespace(0); 549 } 550 551 break; 552 } 553 554 let namespace = null; 555 let name: string; 556 557 if (firstChar === CharCode.Asterisk) { 558 selectorIndex += 1; 559 name = "*"; 560 } else if (firstChar === CharCode.Pipe) { 561 name = ""; 562 563 if ( 564 selector.charCodeAt(selectorIndex + 1) === CharCode.Pipe 565 ) { 566 addTraversal(SelectorType.ColumnCombinator); 567 stripWhitespace(2); 568 break; 569 } 570 } else if (reName.test(selector.slice(selectorIndex))) { 571 name = getName(0); 572 } else { 573 break loop; 574 } 575 576 if ( 577 selector.charCodeAt(selectorIndex) === CharCode.Pipe && 578 selector.charCodeAt(selectorIndex + 1) !== CharCode.Pipe 579 ) { 580 namespace = name; 581 if ( 582 selector.charCodeAt(selectorIndex + 1) === 583 CharCode.Asterisk 584 ) { 585 name = "*"; 586 selectorIndex += 2; 587 } else { 588 name = getName(1); 589 } 590 } 591 592 tokens.push( 593 name === "*" 594 ? { type: SelectorType.Universal, namespace } 595 : { type: SelectorType.Tag, name, namespace } 596 ); 597 } 598 } 599 } 600 601 finalizeSubselector(); 602 return selectorIndex; 603} 604