1namespace ts { 2 /** The classifier is used for syntactic highlighting in editors via the TSServer */ 3 export function createClassifier(): Classifier { 4 const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false); 5 6 function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult { 7 return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text); 8 } 9 10 // If there is a syntactic classifier ('syntacticClassifierAbsent' is false), 11 // we will be more conservative in order to avoid conflicting with the syntactic classifier. 12 function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications { 13 let token = SyntaxKind.Unknown; 14 let lastNonTriviaToken = SyntaxKind.Unknown; 15 16 // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact) 17 // classification on template strings. Because of the context free nature of templates, 18 // the only precise way to classify a template portion would be by propagating the stack across 19 // lines, just as we do with the end-of-line state. However, this is a burden for implementers, 20 // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead 21 // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state. 22 // Situations in which this fails are 23 // 1) When template strings are nested across different lines: 24 // `hello ${ `world 25 // ` }` 26 // 27 // Where on the second line, you will get the closing of a template, 28 // a closing curly, and a new template. 29 // 30 // 2) When substitution expressions have curly braces and the curly brace falls on the next line: 31 // `hello ${ () => { 32 // return "world" } } ` 33 // 34 // Where on the second line, you will get the 'return' keyword, 35 // a string literal, and a template end consisting of '} } `'. 36 const templateStack: SyntaxKind[] = []; 37 38 const { prefix, pushTemplate } = getPrefixFromLexState(lexState); 39 text = prefix + text; 40 const offset = prefix.length; 41 if (pushTemplate) { 42 templateStack.push(SyntaxKind.TemplateHead); 43 } 44 45 scanner.setText(text); 46 47 let endOfLineState = EndOfLineState.None; 48 const spans: number[] = []; 49 50 // We can run into an unfortunate interaction between the lexical and syntactic classifier 51 // when the user is typing something generic. Consider the case where the user types: 52 // 53 // Foo<number 54 // 55 // From the lexical classifier's perspective, 'number' is a keyword, and so the word will 56 // be classified as such. However, from the syntactic classifier's tree-based perspective 57 // this is simply an expression with the identifier 'number' on the RHS of the less than 58 // token. So the classification will go back to being an identifier. The moment the user 59 // types again, number will become a keyword, then an identifier, etc. etc. 60 // 61 // To try to avoid this problem, we avoid classifying contextual keywords as keywords 62 // when the user is potentially typing something generic. We just can't do a good enough 63 // job at the lexical level, and so well leave it up to the syntactic classifier to make 64 // the determination. 65 // 66 // In order to determine if the user is potentially typing something generic, we use a 67 // weak heuristic where we track < and > tokens. It's a weak heuristic, but should 68 // work well enough in practice. 69 let angleBracketStack = 0; 70 71 do { 72 token = scanner.scan(); 73 if (!isTrivia(token)) { 74 handleToken(); 75 lastNonTriviaToken = token; 76 } 77 const end = scanner.getTextPos(); 78 pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans); 79 if (end >= text.length) { 80 const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack)); 81 if (end !== undefined) { 82 endOfLineState = end; 83 } 84 } 85 } while (token !== SyntaxKind.EndOfFileToken); 86 87 function handleToken(): void { 88 switch (token) { 89 case SyntaxKind.SlashToken: 90 case SyntaxKind.SlashEqualsToken: 91 if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) { 92 token = SyntaxKind.RegularExpressionLiteral; 93 } 94 break; 95 case SyntaxKind.LessThanToken: 96 if (lastNonTriviaToken === SyntaxKind.Identifier) { 97 // Could be the start of something generic. Keep track of that by bumping 98 // up the current count of generic contexts we may be in. 99 angleBracketStack++; 100 } 101 break; 102 case SyntaxKind.GreaterThanToken: 103 if (angleBracketStack > 0) { 104 // If we think we're currently in something generic, then mark that that 105 // generic entity is complete. 106 angleBracketStack--; 107 } 108 break; 109 case SyntaxKind.AnyKeyword: 110 case SyntaxKind.StringKeyword: 111 case SyntaxKind.NumberKeyword: 112 case SyntaxKind.BooleanKeyword: 113 case SyntaxKind.SymbolKeyword: 114 if (angleBracketStack > 0 && !syntacticClassifierAbsent) { 115 // If it looks like we're could be in something generic, don't classify this 116 // as a keyword. We may just get overwritten by the syntactic classifier, 117 // causing a noisy experience for the user. 118 token = SyntaxKind.Identifier; 119 } 120 break; 121 case SyntaxKind.TemplateHead: 122 templateStack.push(token); 123 break; 124 case SyntaxKind.OpenBraceToken: 125 // If we don't have anything on the template stack, 126 // then we aren't trying to keep track of a previously scanned template head. 127 if (templateStack.length > 0) { 128 templateStack.push(token); 129 } 130 break; 131 case SyntaxKind.CloseBraceToken: 132 // If we don't have anything on the template stack, 133 // then we aren't trying to keep track of a previously scanned template head. 134 if (templateStack.length > 0) { 135 const lastTemplateStackToken = lastOrUndefined(templateStack); 136 137 if (lastTemplateStackToken === SyntaxKind.TemplateHead) { 138 token = scanner.reScanTemplateToken(/* isTaggedTemplate */ false); 139 140 // Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us. 141 if (token === SyntaxKind.TemplateTail) { 142 templateStack.pop(); 143 } 144 else { 145 Debug.assertEqual(token, SyntaxKind.TemplateMiddle, "Should have been a template middle."); 146 } 147 } 148 else { 149 Debug.assertEqual(lastTemplateStackToken, SyntaxKind.OpenBraceToken, "Should have been an open brace"); 150 templateStack.pop(); 151 } 152 } 153 break; 154 default: 155 if (!isKeyword(token)) { 156 break; 157 } 158 159 if (lastNonTriviaToken === SyntaxKind.DotToken) { 160 token = SyntaxKind.Identifier; 161 } 162 else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) { 163 // We have two keywords in a row. Only treat the second as a keyword if 164 // it's a sequence that could legally occur in the language. Otherwise 165 // treat it as an identifier. This way, if someone writes "private var" 166 // we recognize that 'var' is actually an identifier here. 167 token = SyntaxKind.Identifier; 168 } 169 } 170 } 171 172 return { endOfLineState, spans }; 173 } 174 175 return { getClassificationsForLine, getEncodedLexicalClassifications }; 176 } 177 178 /// We do not have a full parser support to know when we should parse a regex or not 179 /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where 180 /// we have a series of divide operator. this list allows us to be more accurate by ruling out 181 /// locations where a regexp cannot exist. 182 const noRegexTable: true[] = arrayToNumericMap<SyntaxKind, true>([ 183 SyntaxKind.Identifier, 184 SyntaxKind.StringLiteral, 185 SyntaxKind.NumericLiteral, 186 SyntaxKind.BigIntLiteral, 187 SyntaxKind.RegularExpressionLiteral, 188 SyntaxKind.ThisKeyword, 189 SyntaxKind.PlusPlusToken, 190 SyntaxKind.MinusMinusToken, 191 SyntaxKind.CloseParenToken, 192 SyntaxKind.CloseBracketToken, 193 SyntaxKind.CloseBraceToken, 194 SyntaxKind.TrueKeyword, 195 SyntaxKind.FalseKeyword, 196 ], token => token, () => true); 197 198 function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined { 199 switch (token) { 200 case SyntaxKind.StringLiteral: { 201 // Check to see if we finished up on a multiline string literal. 202 if (!scanner.isUnterminated()) return undefined; 203 204 const tokenText = scanner.getTokenText(); 205 const lastCharIndex = tokenText.length - 1; 206 let numBackslashes = 0; 207 while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) { 208 numBackslashes++; 209 } 210 211 // If we have an odd number of backslashes, then the multiline string is unclosed 212 if ((numBackslashes & 1) === 0) return undefined; 213 return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral; 214 } 215 case SyntaxKind.MultiLineCommentTrivia: 216 // Check to see if the multiline comment was unclosed. 217 return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined; 218 default: 219 if (isTemplateLiteralKind(token)) { 220 if (!scanner.isUnterminated()) { 221 return undefined; 222 } 223 switch (token) { 224 case SyntaxKind.TemplateTail: 225 return EndOfLineState.InTemplateMiddleOrTail; 226 case SyntaxKind.NoSubstitutionTemplateLiteral: 227 return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate; 228 default: 229 return Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token); 230 } 231 } 232 return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined; 233 } 234 } 235 236 function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void { 237 if (classification === ClassificationType.whiteSpace) { 238 // Don't bother with whitespace classifications. They're not needed. 239 return; 240 } 241 242 if (start === 0 && offset > 0) { 243 // We're classifying the first token, and this was a case where we prepended text. 244 // We should consider the start of this token to be at the start of the original text. 245 start += offset; 246 } 247 248 const length = end - start; 249 if (length > 0) { 250 // All our tokens are in relation to the augmented text. Move them back to be 251 // relative to the original text. 252 result.push(start - offset, length, classification); 253 } 254 } 255 256 function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult { 257 const entries: ClassificationInfo[] = []; 258 const dense = classifications.spans; 259 let lastEnd = 0; 260 261 for (let i = 0; i < dense.length; i += 3) { 262 const start = dense[i]; 263 const length = dense[i + 1]; 264 const type = dense[i + 2] as ClassificationType; 265 266 // Make a whitespace entry between the last item and this one. 267 if (lastEnd >= 0) { 268 const whitespaceLength = start - lastEnd; 269 if (whitespaceLength > 0) { 270 entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); 271 } 272 } 273 274 entries.push({ length, classification: convertClassification(type) }); 275 lastEnd = start + length; 276 } 277 278 const whitespaceLength = text.length - lastEnd; 279 if (whitespaceLength > 0) { 280 entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); 281 } 282 283 return { entries, finalLexState: classifications.endOfLineState }; 284 } 285 286 function convertClassification(type: ClassificationType): TokenClass { 287 switch (type) { 288 case ClassificationType.comment: return TokenClass.Comment; 289 case ClassificationType.keyword: return TokenClass.Keyword; 290 case ClassificationType.numericLiteral: return TokenClass.NumberLiteral; 291 case ClassificationType.bigintLiteral: return TokenClass.BigIntLiteral; 292 case ClassificationType.operator: return TokenClass.Operator; 293 case ClassificationType.stringLiteral: return TokenClass.StringLiteral; 294 case ClassificationType.whiteSpace: return TokenClass.Whitespace; 295 case ClassificationType.punctuation: return TokenClass.Punctuation; 296 case ClassificationType.identifier: 297 case ClassificationType.className: 298 case ClassificationType.enumName: 299 case ClassificationType.interfaceName: 300 case ClassificationType.moduleName: 301 case ClassificationType.typeParameterName: 302 case ClassificationType.typeAliasName: 303 case ClassificationType.text: 304 case ClassificationType.parameterName: 305 return TokenClass.Identifier; 306 default: 307 return undefined!; // TODO: GH#18217 Debug.assertNever(type); 308 } 309 } 310 311 /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */ 312 function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean { 313 if (!isAccessibilityModifier(keyword1)) { 314 // Assume any other keyword combination is legal. 315 // This can be refined in the future if there are more cases we want the classifier to be better at. 316 return true; 317 } 318 switch (keyword2) { 319 case SyntaxKind.GetKeyword: 320 case SyntaxKind.SetKeyword: 321 case SyntaxKind.ConstructorKeyword: 322 case SyntaxKind.StaticKeyword: 323 case SyntaxKind.AccessorKeyword: 324 return true; // Allow things like "public get", "public constructor" and "public static". 325 default: 326 return false; // Any other keyword following "public" is actually an identifier, not a real keyword. 327 } 328 } 329 330 function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } { 331 // If we're in a string literal, then prepend: "\ 332 // (and a newline). That way when we lex we'll think we're still in a string literal. 333 // 334 // If we're in a multiline comment, then prepend: /* 335 // (and a newline). That way when we lex we'll think we're still in a multiline comment. 336 switch (lexState) { 337 case EndOfLineState.InDoubleQuoteStringLiteral: 338 return { prefix: "\"\\\n" }; 339 case EndOfLineState.InSingleQuoteStringLiteral: 340 return { prefix: "'\\\n" }; 341 case EndOfLineState.InMultiLineCommentTrivia: 342 return { prefix: "/*\n" }; 343 case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate: 344 return { prefix: "`\n" }; 345 case EndOfLineState.InTemplateMiddleOrTail: 346 return { prefix: "}\n", pushTemplate: true }; 347 case EndOfLineState.InTemplateSubstitutionPosition: 348 return { prefix: "", pushTemplate: true }; 349 case EndOfLineState.None: 350 return { prefix: "" }; 351 default: 352 return Debug.assertNever(lexState); 353 } 354 } 355 356 function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean { 357 switch (token) { 358 case SyntaxKind.AsteriskToken: 359 case SyntaxKind.SlashToken: 360 case SyntaxKind.PercentToken: 361 case SyntaxKind.PlusToken: 362 case SyntaxKind.MinusToken: 363 case SyntaxKind.LessThanLessThanToken: 364 case SyntaxKind.GreaterThanGreaterThanToken: 365 case SyntaxKind.GreaterThanGreaterThanGreaterThanToken: 366 case SyntaxKind.LessThanToken: 367 case SyntaxKind.GreaterThanToken: 368 case SyntaxKind.LessThanEqualsToken: 369 case SyntaxKind.GreaterThanEqualsToken: 370 case SyntaxKind.InstanceOfKeyword: 371 case SyntaxKind.InKeyword: 372 case SyntaxKind.AsKeyword: 373 case SyntaxKind.SatisfiesKeyword: 374 case SyntaxKind.EqualsEqualsToken: 375 case SyntaxKind.ExclamationEqualsToken: 376 case SyntaxKind.EqualsEqualsEqualsToken: 377 case SyntaxKind.ExclamationEqualsEqualsToken: 378 case SyntaxKind.AmpersandToken: 379 case SyntaxKind.CaretToken: 380 case SyntaxKind.BarToken: 381 case SyntaxKind.AmpersandAmpersandToken: 382 case SyntaxKind.BarBarToken: 383 case SyntaxKind.BarEqualsToken: 384 case SyntaxKind.AmpersandEqualsToken: 385 case SyntaxKind.CaretEqualsToken: 386 case SyntaxKind.LessThanLessThanEqualsToken: 387 case SyntaxKind.GreaterThanGreaterThanEqualsToken: 388 case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken: 389 case SyntaxKind.PlusEqualsToken: 390 case SyntaxKind.MinusEqualsToken: 391 case SyntaxKind.AsteriskEqualsToken: 392 case SyntaxKind.SlashEqualsToken: 393 case SyntaxKind.PercentEqualsToken: 394 case SyntaxKind.EqualsToken: 395 case SyntaxKind.CommaToken: 396 case SyntaxKind.QuestionQuestionToken: 397 case SyntaxKind.BarBarEqualsToken: 398 case SyntaxKind.AmpersandAmpersandEqualsToken: 399 case SyntaxKind.QuestionQuestionEqualsToken: 400 return true; 401 default: 402 return false; 403 } 404 } 405 406 function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean { 407 switch (token) { 408 case SyntaxKind.PlusToken: 409 case SyntaxKind.MinusToken: 410 case SyntaxKind.TildeToken: 411 case SyntaxKind.ExclamationToken: 412 case SyntaxKind.PlusPlusToken: 413 case SyntaxKind.MinusMinusToken: 414 return true; 415 default: 416 return false; 417 } 418 } 419 420 function classFromKind(token: SyntaxKind): ClassificationType { 421 if (isKeyword(token)) { 422 return ClassificationType.keyword; 423 } 424 else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) { 425 return ClassificationType.operator; 426 } 427 else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) { 428 return ClassificationType.punctuation; 429 } 430 431 switch (token) { 432 case SyntaxKind.NumericLiteral: 433 return ClassificationType.numericLiteral; 434 case SyntaxKind.BigIntLiteral: 435 return ClassificationType.bigintLiteral; 436 case SyntaxKind.StringLiteral: 437 return ClassificationType.stringLiteral; 438 case SyntaxKind.RegularExpressionLiteral: 439 return ClassificationType.regularExpressionLiteral; 440 case SyntaxKind.ConflictMarkerTrivia: 441 case SyntaxKind.MultiLineCommentTrivia: 442 case SyntaxKind.SingleLineCommentTrivia: 443 return ClassificationType.comment; 444 case SyntaxKind.WhitespaceTrivia: 445 case SyntaxKind.NewLineTrivia: 446 return ClassificationType.whiteSpace; 447 case SyntaxKind.Identifier: 448 default: 449 if (isTemplateLiteralKind(token)) { 450 return ClassificationType.stringLiteral; 451 } 452 return ClassificationType.identifier; 453 } 454 } 455 456 /* @internal */ 457 export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): ClassifiedSpan[] { 458 return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span)); 459 } 460 461 function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) { 462 // We don't want to actually call back into our host on every node to find out if we've 463 // been canceled. That would be an enormous amount of chattyness, along with the all 464 // the overhead of marshalling the data to/from the host. So instead we pick a few 465 // reasonable node kinds to bother checking on. These node kinds represent high level 466 // constructs that we would expect to see commonly, but just at a far less frequent 467 // interval. 468 // 469 // For example, in checker.ts (around 750k) we only have around 600 of these constructs. 470 // That means we're calling back into the host around every 1.2k of the file we process. 471 // Lib.d.ts has similar numbers. 472 switch (kind) { 473 case SyntaxKind.ModuleDeclaration: 474 case SyntaxKind.ClassDeclaration: 475 case SyntaxKind.InterfaceDeclaration: 476 case SyntaxKind.FunctionDeclaration: 477 case SyntaxKind.ClassExpression: 478 case SyntaxKind.FunctionExpression: 479 case SyntaxKind.ArrowFunction: 480 cancellationToken.throwIfCancellationRequested(); 481 } 482 } 483 484 /* @internal */ 485 export function getEncodedSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): Classifications { 486 const spans: number[] = []; 487 sourceFile.forEachChild(function cb(node: Node): void { 488 // Only walk into nodes that intersect the requested span. 489 if (!node || !textSpanIntersectsWith(span, node.pos, node.getFullWidth())) { 490 return; 491 } 492 493 checkForClassificationCancellation(cancellationToken, node.kind); 494 // Only bother calling into the typechecker if this is an identifier that 495 // could possibly resolve to a type name. This makes classification run 496 // in a third of the time it would normally take. 497 if (isIdentifier(node) && !nodeIsMissing(node) && classifiableNames.has(node.escapedText)) { 498 const symbol = typeChecker.getSymbolAtLocation(node); 499 const type = symbol && classifySymbol(symbol, getMeaningFromLocation(node), typeChecker); 500 if (type) { 501 pushClassification(node.getStart(sourceFile), node.getEnd(), type); 502 } 503 } 504 505 node.forEachChild(cb); 506 }); 507 return { spans, endOfLineState: EndOfLineState.None }; 508 509 function pushClassification(start: number, end: number, type: ClassificationType): void { 510 const length = end - start; 511 Debug.assert(length > 0, `Classification had non-positive length of ${length}`); 512 spans.push(start); 513 spans.push(length); 514 spans.push(type); 515 } 516 } 517 518 function classifySymbol(symbol: Symbol, meaningAtPosition: SemanticMeaning, checker: TypeChecker): ClassificationType | undefined { 519 const flags = symbol.getFlags(); 520 if ((flags & SymbolFlags.Classifiable) === SymbolFlags.None) { 521 return undefined; 522 } 523 else if (flags & SymbolFlags.Class) { 524 return ClassificationType.className; 525 } 526 else if (flags & SymbolFlags.Enum) { 527 return ClassificationType.enumName; 528 } 529 else if (flags & SymbolFlags.TypeAlias) { 530 return ClassificationType.typeAliasName; 531 } 532 else if (flags & SymbolFlags.Module) { 533 // Only classify a module as such if 534 // - It appears in a namespace context. 535 // - There exists a module declaration which actually impacts the value side. 536 return meaningAtPosition & SemanticMeaning.Namespace || meaningAtPosition & SemanticMeaning.Value && hasValueSideModule(symbol) ? ClassificationType.moduleName : undefined; 537 } 538 else if (flags & SymbolFlags.Alias) { 539 return classifySymbol(checker.getAliasedSymbol(symbol), meaningAtPosition, checker); 540 } 541 else if (meaningAtPosition & SemanticMeaning.Type) { 542 return flags & SymbolFlags.Interface ? ClassificationType.interfaceName : flags & SymbolFlags.TypeParameter ? ClassificationType.typeParameterName : undefined; 543 } 544 else { 545 return undefined; 546 } 547 } 548 549 /** Returns true if there exists a module that introduces entities on the value side. */ 550 function hasValueSideModule(symbol: Symbol): boolean { 551 return some(symbol.declarations, declaration => 552 isModuleDeclaration(declaration) && getModuleInstanceState(declaration) === ModuleInstanceState.Instantiated); 553 } 554 555 function getClassificationTypeName(type: ClassificationType): ClassificationTypeNames { 556 switch (type) { 557 case ClassificationType.comment: return ClassificationTypeNames.comment; 558 case ClassificationType.identifier: return ClassificationTypeNames.identifier; 559 case ClassificationType.keyword: return ClassificationTypeNames.keyword; 560 case ClassificationType.numericLiteral: return ClassificationTypeNames.numericLiteral; 561 case ClassificationType.bigintLiteral: return ClassificationTypeNames.bigintLiteral; 562 case ClassificationType.operator: return ClassificationTypeNames.operator; 563 case ClassificationType.stringLiteral: return ClassificationTypeNames.stringLiteral; 564 case ClassificationType.whiteSpace: return ClassificationTypeNames.whiteSpace; 565 case ClassificationType.text: return ClassificationTypeNames.text; 566 case ClassificationType.punctuation: return ClassificationTypeNames.punctuation; 567 case ClassificationType.className: return ClassificationTypeNames.className; 568 case ClassificationType.enumName: return ClassificationTypeNames.enumName; 569 case ClassificationType.interfaceName: return ClassificationTypeNames.interfaceName; 570 case ClassificationType.moduleName: return ClassificationTypeNames.moduleName; 571 case ClassificationType.typeParameterName: return ClassificationTypeNames.typeParameterName; 572 case ClassificationType.typeAliasName: return ClassificationTypeNames.typeAliasName; 573 case ClassificationType.parameterName: return ClassificationTypeNames.parameterName; 574 case ClassificationType.docCommentTagName: return ClassificationTypeNames.docCommentTagName; 575 case ClassificationType.jsxOpenTagName: return ClassificationTypeNames.jsxOpenTagName; 576 case ClassificationType.jsxCloseTagName: return ClassificationTypeNames.jsxCloseTagName; 577 case ClassificationType.jsxSelfClosingTagName: return ClassificationTypeNames.jsxSelfClosingTagName; 578 case ClassificationType.jsxAttribute: return ClassificationTypeNames.jsxAttribute; 579 case ClassificationType.jsxText: return ClassificationTypeNames.jsxText; 580 case ClassificationType.jsxAttributeStringLiteralValue: return ClassificationTypeNames.jsxAttributeStringLiteralValue; 581 default: return undefined!; // TODO: GH#18217 throw Debug.assertNever(type); 582 } 583 } 584 585 function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] { 586 Debug.assert(classifications.spans.length % 3 === 0); 587 const dense = classifications.spans; 588 const result: ClassifiedSpan[] = []; 589 for (let i = 0; i < dense.length; i += 3) { 590 result.push({ 591 textSpan: createTextSpan(dense[i], dense[i + 1]), 592 classificationType: getClassificationTypeName(dense[i + 2]) 593 }); 594 } 595 596 return result; 597 } 598 599 /* @internal */ 600 export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] { 601 return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span)); 602 } 603 604 /* @internal */ 605 export function getEncodedSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): Classifications { 606 const spanStart = span.start; 607 const spanLength = span.length; 608 609 // Make a scanner we can get trivia from. 610 const triviaScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text); 611 const mergeConflictScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text); 612 613 const result: number[] = []; 614 processElement(sourceFile); 615 616 return { spans: result, endOfLineState: EndOfLineState.None }; 617 618 function pushClassification(start: number, length: number, type: ClassificationType) { 619 result.push(start); 620 result.push(length); 621 result.push(type); 622 } 623 624 function classifyLeadingTriviaAndGetTokenStart(token: Node): number { 625 triviaScanner.setTextPos(token.pos); 626 while (true) { 627 const start = triviaScanner.getTextPos(); 628 // only bother scanning if we have something that could be trivia. 629 if (!couldStartTrivia(sourceFile.text, start)) { 630 return start; 631 } 632 633 const kind = triviaScanner.scan(); 634 const end = triviaScanner.getTextPos(); 635 const width = end - start; 636 637 // The moment we get something that isn't trivia, then stop processing. 638 if (!isTrivia(kind)) { 639 return start; 640 } 641 642 switch (kind) { 643 case SyntaxKind.NewLineTrivia: 644 case SyntaxKind.WhitespaceTrivia: 645 // Don't bother with newlines/whitespace. 646 continue; 647 648 case SyntaxKind.SingleLineCommentTrivia: 649 case SyntaxKind.MultiLineCommentTrivia: 650 // Only bother with the trivia if it at least intersects the span of interest. 651 classifyComment(token, kind, start, width); 652 653 // Classifying a comment might cause us to reuse the trivia scanner 654 // (because of jsdoc comments). So after we classify the comment make 655 // sure we set the scanner position back to where it needs to be. 656 triviaScanner.setTextPos(end); 657 continue; 658 659 case SyntaxKind.ConflictMarkerTrivia: 660 const text = sourceFile.text; 661 const ch = text.charCodeAt(start); 662 663 // for the <<<<<<< and >>>>>>> markers, we just add them in as comments 664 // in the classification stream. 665 if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) { 666 pushClassification(start, width, ClassificationType.comment); 667 continue; 668 } 669 670 // for the ||||||| and ======== markers, add a comment for the first line, 671 // and then lex all subsequent lines up until the end of the conflict marker. 672 Debug.assert(ch === CharacterCodes.bar || ch === CharacterCodes.equals); 673 classifyDisabledMergeCode(text, start, end); 674 break; 675 676 case SyntaxKind.ShebangTrivia: 677 // TODO: Maybe we should classify these. 678 break; 679 680 default: 681 Debug.assertNever(kind); 682 } 683 } 684 } 685 686 function classifyComment(token: Node, kind: SyntaxKind, start: number, width: number) { 687 if (kind === SyntaxKind.MultiLineCommentTrivia) { 688 // See if this is a doc comment. If so, we'll classify certain portions of it 689 // specially. 690 const docCommentAndDiagnostics = parseIsolatedJSDocComment(sourceFile.text, start, width); 691 if (docCommentAndDiagnostics && docCommentAndDiagnostics.jsDoc) { 692 // TODO: This should be predicated on `token["kind"]` being compatible with `HasJSDoc["kind"]` 693 setParent(docCommentAndDiagnostics.jsDoc, token as HasJSDoc); 694 classifyJSDocComment(docCommentAndDiagnostics.jsDoc); 695 return; 696 } 697 } 698 else if (kind === SyntaxKind.SingleLineCommentTrivia) { 699 if (tryClassifyTripleSlashComment(start, width)) { 700 return; 701 } 702 } 703 704 // Simple comment. Just add as is. 705 pushCommentRange(start, width); 706 } 707 708 function pushCommentRange(start: number, width: number) { 709 pushClassification(start, width, ClassificationType.comment); 710 } 711 712 function classifyJSDocComment(docComment: JSDoc) { 713 let pos = docComment.pos; 714 715 if (docComment.tags) { 716 for (const tag of docComment.tags) { 717 // As we walk through each tag, classify the portion of text from the end of 718 // the last tag (or the start of the entire doc comment) as 'comment'. 719 if (tag.pos !== pos) { 720 pushCommentRange(pos, tag.pos - pos); 721 } 722 723 pushClassification(tag.pos, 1, ClassificationType.punctuation); // "@" 724 pushClassification(tag.tagName.pos, tag.tagName.end - tag.tagName.pos, ClassificationType.docCommentTagName); // e.g. "param" 725 726 pos = tag.tagName.end; 727 let commentStart = tag.tagName.end; 728 729 switch (tag.kind) { 730 case SyntaxKind.JSDocParameterTag: 731 const param = tag as JSDocParameterTag; 732 processJSDocParameterTag(param); 733 commentStart = param.isNameFirst && param.typeExpression?.end || param.name.end; 734 break; 735 case SyntaxKind.JSDocPropertyTag: 736 const prop = tag as JSDocPropertyTag; 737 commentStart = prop.isNameFirst && prop.typeExpression?.end || prop.name.end; 738 break; 739 case SyntaxKind.JSDocTemplateTag: 740 processJSDocTemplateTag(tag as JSDocTemplateTag); 741 pos = tag.end; 742 commentStart = (tag as JSDocTemplateTag).typeParameters.end; 743 break; 744 case SyntaxKind.JSDocTypedefTag: 745 const type = tag as JSDocTypedefTag; 746 commentStart = type.typeExpression?.kind === SyntaxKind.JSDocTypeExpression && type.fullName?.end || type.typeExpression?.end || commentStart; 747 break; 748 case SyntaxKind.JSDocCallbackTag: 749 commentStart = (tag as JSDocCallbackTag).typeExpression.end; 750 break; 751 case SyntaxKind.JSDocTypeTag: 752 processElement((tag as JSDocTypeTag).typeExpression); 753 pos = tag.end; 754 commentStart = (tag as JSDocTypeTag).typeExpression.end; 755 break; 756 case SyntaxKind.JSDocThisTag: 757 case SyntaxKind.JSDocEnumTag: 758 commentStart = (tag as JSDocThisTag | JSDocEnumTag).typeExpression.end; 759 break; 760 case SyntaxKind.JSDocReturnTag: 761 processElement((tag as JSDocReturnTag).typeExpression); 762 pos = tag.end; 763 commentStart = (tag as JSDocReturnTag).typeExpression?.end || commentStart; 764 break; 765 case SyntaxKind.JSDocSeeTag: 766 commentStart = (tag as JSDocSeeTag).name?.end || commentStart; 767 break; 768 case SyntaxKind.JSDocAugmentsTag: 769 case SyntaxKind.JSDocImplementsTag: 770 commentStart = (tag as JSDocImplementsTag | JSDocAugmentsTag).class.end; 771 break; 772 } 773 if (typeof tag.comment === "object") { 774 pushCommentRange(tag.comment.pos, tag.comment.end - tag.comment.pos); 775 } 776 else if (typeof tag.comment === "string") { 777 pushCommentRange(commentStart, tag.end - commentStart); 778 } 779 } 780 } 781 782 if (pos !== docComment.end) { 783 pushCommentRange(pos, docComment.end - pos); 784 } 785 786 return; 787 788 function processJSDocParameterTag(tag: JSDocParameterTag) { 789 if (tag.isNameFirst) { 790 pushCommentRange(pos, tag.name.pos - pos); 791 pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName); 792 pos = tag.name.end; 793 } 794 795 if (tag.typeExpression) { 796 pushCommentRange(pos, tag.typeExpression.pos - pos); 797 processElement(tag.typeExpression); 798 pos = tag.typeExpression.end; 799 } 800 801 if (!tag.isNameFirst) { 802 pushCommentRange(pos, tag.name.pos - pos); 803 pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName); 804 pos = tag.name.end; 805 } 806 } 807 } 808 809 function tryClassifyTripleSlashComment(start: number, width: number): boolean { 810 const tripleSlashXMLCommentRegEx = /^(\/\/\/\s*)(<)(?:(\S+)((?:[^/]|\/[^>])*)(\/>)?)?/im; 811 // Require a leading whitespace character (the parser already does) to prevent terrible backtracking performance 812 const attributeRegex = /(\s)(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img; 813 814 const text = sourceFile.text.substr(start, width); 815 const match = tripleSlashXMLCommentRegEx.exec(text); 816 if (!match) { 817 return false; 818 } 819 820 // Limiting classification to exactly the elements and attributes 821 // defined in `ts.commentPragmas` would be excessive, but we can avoid 822 // some obvious false positives (e.g. in XML-like doc comments) by 823 // checking the element name. 824 // eslint-disable-next-line local/no-in-operator 825 if (!match[3] || !(match[3] in commentPragmas)) { 826 return false; 827 } 828 829 let pos = start; 830 831 pushCommentRange(pos, match[1].length); // /// 832 pos += match[1].length; 833 834 pushClassification(pos, match[2].length, ClassificationType.punctuation); // < 835 pos += match[2].length; 836 837 pushClassification(pos, match[3].length, ClassificationType.jsxSelfClosingTagName); // element name 838 pos += match[3].length; 839 840 const attrText = match[4]; 841 let attrPos = pos; 842 while (true) { 843 const attrMatch = attributeRegex.exec(attrText); 844 if (!attrMatch) { 845 break; 846 } 847 848 const newAttrPos = pos + attrMatch.index + attrMatch[1].length; // whitespace 849 if (newAttrPos > attrPos) { 850 pushCommentRange(attrPos, newAttrPos - attrPos); 851 attrPos = newAttrPos; 852 } 853 854 pushClassification(attrPos, attrMatch[2].length, ClassificationType.jsxAttribute); // attribute name 855 attrPos += attrMatch[2].length; 856 857 if (attrMatch[3].length) { 858 pushCommentRange(attrPos, attrMatch[3].length); // whitespace 859 attrPos += attrMatch[3].length; 860 } 861 862 pushClassification(attrPos, attrMatch[4].length, ClassificationType.operator); // = 863 attrPos += attrMatch[4].length; 864 865 if (attrMatch[5].length) { 866 pushCommentRange(attrPos, attrMatch[5].length); // whitespace 867 attrPos += attrMatch[5].length; 868 } 869 870 pushClassification(attrPos, attrMatch[6].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value 871 attrPos += attrMatch[6].length; 872 } 873 874 pos += match[4].length; 875 876 if (pos > attrPos) { 877 pushCommentRange(attrPos, pos - attrPos); 878 } 879 880 if (match[5]) { 881 pushClassification(pos, match[5].length, ClassificationType.punctuation); // /> 882 pos += match[5].length; 883 } 884 885 const end = start + width; 886 if (pos < end) { 887 pushCommentRange(pos, end - pos); 888 } 889 890 return true; 891 } 892 893 function processJSDocTemplateTag(tag: JSDocTemplateTag) { 894 for (const child of tag.getChildren()) { 895 processElement(child); 896 } 897 } 898 899 function classifyDisabledMergeCode(text: string, start: number, end: number) { 900 // Classify the line that the ||||||| or ======= marker is on as a comment. 901 // Then just lex all further tokens and add them to the result. 902 let i: number; 903 for (i = start; i < end; i++) { 904 if (isLineBreak(text.charCodeAt(i))) { 905 break; 906 } 907 } 908 pushClassification(start, i - start, ClassificationType.comment); 909 mergeConflictScanner.setTextPos(i); 910 911 while (mergeConflictScanner.getTextPos() < end) { 912 classifyDisabledCodeToken(); 913 } 914 } 915 916 function classifyDisabledCodeToken() { 917 const start = mergeConflictScanner.getTextPos(); 918 const tokenKind = mergeConflictScanner.scan(); 919 const end = mergeConflictScanner.getTextPos(); 920 921 const type = classifyTokenType(tokenKind); 922 if (type) { 923 pushClassification(start, end - start, type); 924 } 925 } 926 927 /** 928 * Returns true if node should be treated as classified and no further processing is required. 929 * False will mean that node is not classified and traverse routine should recurse into node contents. 930 */ 931 function tryClassifyNode(node: Node): boolean { 932 if (isJSDoc(node)) { 933 return true; 934 } 935 936 if (nodeIsMissing(node)) { 937 return true; 938 } 939 940 const classifiedElementName = tryClassifyJsxElementName(node); 941 if (!isToken(node) && node.kind !== SyntaxKind.JsxText && classifiedElementName === undefined) { 942 return false; 943 } 944 945 const tokenStart = node.kind === SyntaxKind.JsxText ? node.pos : classifyLeadingTriviaAndGetTokenStart(node); 946 947 const tokenWidth = node.end - tokenStart; 948 Debug.assert(tokenWidth >= 0); 949 if (tokenWidth > 0) { 950 const type = classifiedElementName || classifyTokenType(node.kind, node); 951 if (type) { 952 pushClassification(tokenStart, tokenWidth, type); 953 } 954 } 955 956 return true; 957 } 958 959 function tryClassifyJsxElementName(token: Node): ClassificationType | undefined { 960 switch (token.parent && token.parent.kind) { 961 case SyntaxKind.JsxOpeningElement: 962 if ((token.parent as JsxOpeningElement).tagName === token) { 963 return ClassificationType.jsxOpenTagName; 964 } 965 break; 966 case SyntaxKind.JsxClosingElement: 967 if ((token.parent as JsxClosingElement).tagName === token) { 968 return ClassificationType.jsxCloseTagName; 969 } 970 break; 971 case SyntaxKind.JsxSelfClosingElement: 972 if ((token.parent as JsxSelfClosingElement).tagName === token) { 973 return ClassificationType.jsxSelfClosingTagName; 974 } 975 break; 976 case SyntaxKind.JsxAttribute: 977 if ((token.parent as JsxAttribute).name === token) { 978 return ClassificationType.jsxAttribute; 979 } 980 break; 981 } 982 return undefined; 983 } 984 985 // for accurate classification, the actual token should be passed in. however, for 986 // cases like 'disabled merge code' classification, we just get the token kind and 987 // classify based on that instead. 988 function classifyTokenType(tokenKind: SyntaxKind, token?: Node): ClassificationType | undefined { 989 if (isKeyword(tokenKind)) { 990 return ClassificationType.keyword; 991 } 992 993 // Special case `<` and `>`: If they appear in a generic context they are punctuation, 994 // not operators. 995 if (tokenKind === SyntaxKind.LessThanToken || tokenKind === SyntaxKind.GreaterThanToken) { 996 // If the node owning the token has a type argument list or type parameter list, then 997 // we can effectively assume that a '<' and '>' belong to those lists. 998 if (token && getTypeArgumentOrTypeParameterList(token.parent)) { 999 return ClassificationType.punctuation; 1000 } 1001 } 1002 1003 if (isPunctuation(tokenKind)) { 1004 if (token) { 1005 const parent = token.parent; 1006 if (tokenKind === SyntaxKind.EqualsToken) { 1007 // the '=' in a variable declaration is special cased here. 1008 if (parent.kind === SyntaxKind.VariableDeclaration || 1009 parent.kind === SyntaxKind.PropertyDeclaration || 1010 parent.kind === SyntaxKind.Parameter || 1011 parent.kind === SyntaxKind.JsxAttribute) { 1012 return ClassificationType.operator; 1013 } 1014 } 1015 1016 if (parent.kind === SyntaxKind.BinaryExpression || 1017 parent.kind === SyntaxKind.PrefixUnaryExpression || 1018 parent.kind === SyntaxKind.PostfixUnaryExpression || 1019 parent.kind === SyntaxKind.ConditionalExpression) { 1020 return ClassificationType.operator; 1021 } 1022 } 1023 1024 return ClassificationType.punctuation; 1025 } 1026 else if (tokenKind === SyntaxKind.NumericLiteral) { 1027 return ClassificationType.numericLiteral; 1028 } 1029 else if (tokenKind === SyntaxKind.BigIntLiteral) { 1030 return ClassificationType.bigintLiteral; 1031 } 1032 else if (tokenKind === SyntaxKind.StringLiteral) { 1033 return token && token.parent.kind === SyntaxKind.JsxAttribute ? ClassificationType.jsxAttributeStringLiteralValue : ClassificationType.stringLiteral; 1034 } 1035 else if (tokenKind === SyntaxKind.RegularExpressionLiteral) { 1036 // TODO: we should get another classification type for these literals. 1037 return ClassificationType.stringLiteral; 1038 } 1039 else if (isTemplateLiteralKind(tokenKind)) { 1040 // TODO (drosen): we should *also* get another classification type for these literals. 1041 return ClassificationType.stringLiteral; 1042 } 1043 else if (tokenKind === SyntaxKind.JsxText) { 1044 return ClassificationType.jsxText; 1045 } 1046 else if (tokenKind === SyntaxKind.Identifier) { 1047 if (token) { 1048 switch (token.parent.kind) { 1049 case SyntaxKind.ClassDeclaration: 1050 if ((token.parent as ClassDeclaration).name === token) { 1051 return ClassificationType.className; 1052 } 1053 return; 1054 case SyntaxKind.TypeParameter: 1055 if ((token.parent as TypeParameterDeclaration).name === token) { 1056 return ClassificationType.typeParameterName; 1057 } 1058 return; 1059 case SyntaxKind.InterfaceDeclaration: 1060 if ((token.parent as InterfaceDeclaration).name === token) { 1061 return ClassificationType.interfaceName; 1062 } 1063 return; 1064 case SyntaxKind.EnumDeclaration: 1065 if ((token.parent as EnumDeclaration).name === token) { 1066 return ClassificationType.enumName; 1067 } 1068 return; 1069 case SyntaxKind.ModuleDeclaration: 1070 if ((token.parent as ModuleDeclaration).name === token) { 1071 return ClassificationType.moduleName; 1072 } 1073 return; 1074 case SyntaxKind.Parameter: 1075 if ((token.parent as ParameterDeclaration).name === token) { 1076 return isThisIdentifier(token) ? ClassificationType.keyword : ClassificationType.parameterName; 1077 } 1078 return; 1079 } 1080 1081 if (isConstTypeReference(token.parent)) { 1082 return ClassificationType.keyword; 1083 } 1084 } 1085 return ClassificationType.identifier; 1086 } 1087 } 1088 1089 function processElement(element: Node | undefined) { 1090 if (!element) { 1091 return; 1092 } 1093 1094 // Ignore nodes that don't intersect the original span to classify. 1095 if (decodedTextSpanIntersectsWith(spanStart, spanLength, element.pos, element.getFullWidth())) { 1096 checkForClassificationCancellation(cancellationToken, element.kind); 1097 1098 for (const child of element.getChildren(sourceFile)) { 1099 if (!tryClassifyNode(child)) { 1100 // Recurse into our child nodes. 1101 processElement(child); 1102 } 1103 } 1104 } 1105 } 1106 } 1107} 1108