1namespace ts { 2 /** The classifier is used for syntactic highlighting in editors via the TSServer */ 3 export function createClassifier(): Classifier { 4 const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false); 5 6 function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult { 7 return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text); 8 } 9 10 // If there is a syntactic classifier ('syntacticClassifierAbsent' is false), 11 // we will be more conservative in order to avoid conflicting with the syntactic classifier. 12 function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications { 13 let token = SyntaxKind.Unknown; 14 let lastNonTriviaToken = SyntaxKind.Unknown; 15 16 // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact) 17 // classification on template strings. Because of the context free nature of templates, 18 // the only precise way to classify a template portion would be by propagating the stack across 19 // lines, just as we do with the end-of-line state. However, this is a burden for implementers, 20 // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead 21 // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state. 22 // Situations in which this fails are 23 // 1) When template strings are nested across different lines: 24 // `hello ${ `world 25 // ` }` 26 // 27 // Where on the second line, you will get the closing of a template, 28 // a closing curly, and a new template. 29 // 30 // 2) When substitution expressions have curly braces and the curly brace falls on the next line: 31 // `hello ${ () => { 32 // return "world" } } ` 33 // 34 // Where on the second line, you will get the 'return' keyword, 35 // a string literal, and a template end consisting of '} } `'. 36 const templateStack: SyntaxKind[] = []; 37 38 const { prefix, pushTemplate } = getPrefixFromLexState(lexState); 39 text = prefix + text; 40 const offset = prefix.length; 41 if (pushTemplate) { 42 templateStack.push(SyntaxKind.TemplateHead); 43 } 44 45 scanner.setText(text); 46 47 let endOfLineState = EndOfLineState.None; 48 const spans: number[] = []; 49 50 // We can run into an unfortunate interaction between the lexical and syntactic classifier 51 // when the user is typing something generic. Consider the case where the user types: 52 // 53 // Foo<number 54 // 55 // From the lexical classifier's perspective, 'number' is a keyword, and so the word will 56 // be classified as such. However, from the syntactic classifier's tree-based perspective 57 // this is simply an expression with the identifier 'number' on the RHS of the less than 58 // token. So the classification will go back to being an identifier. The moment the user 59 // types again, number will become a keyword, then an identifier, etc. etc. 60 // 61 // To try to avoid this problem, we avoid classifying contextual keywords as keywords 62 // when the user is potentially typing something generic. We just can't do a good enough 63 // job at the lexical level, and so well leave it up to the syntactic classifier to make 64 // the determination. 65 // 66 // In order to determine if the user is potentially typing something generic, we use a 67 // weak heuristic where we track < and > tokens. It's a weak heuristic, but should 68 // work well enough in practice. 69 let angleBracketStack = 0; 70 71 do { 72 token = scanner.scan(); 73 if (!isTrivia(token)) { 74 handleToken(); 75 lastNonTriviaToken = token; 76 } 77 const end = scanner.getTextPos(); 78 pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans); 79 if (end >= text.length) { 80 const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack)); 81 if (end !== undefined) { 82 endOfLineState = end; 83 } 84 } 85 } while (token !== SyntaxKind.EndOfFileToken); 86 87 function handleToken(): void { 88 switch (token) { 89 case SyntaxKind.SlashToken: 90 case SyntaxKind.SlashEqualsToken: 91 if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) { 92 token = SyntaxKind.RegularExpressionLiteral; 93 } 94 break; 95 case SyntaxKind.LessThanToken: 96 if (lastNonTriviaToken === SyntaxKind.Identifier) { 97 // Could be the start of something generic. Keep track of that by bumping 98 // up the current count of generic contexts we may be in. 99 angleBracketStack++; 100 } 101 break; 102 case SyntaxKind.GreaterThanToken: 103 if (angleBracketStack > 0) { 104 // If we think we're currently in something generic, then mark that that 105 // generic entity is complete. 106 angleBracketStack--; 107 } 108 break; 109 case SyntaxKind.AnyKeyword: 110 case SyntaxKind.StringKeyword: 111 case SyntaxKind.NumberKeyword: 112 case SyntaxKind.BooleanKeyword: 113 case SyntaxKind.SymbolKeyword: 114 if (angleBracketStack > 0 && !syntacticClassifierAbsent) { 115 // If it looks like we're could be in something generic, don't classify this 116 // as a keyword. We may just get overwritten by the syntactic classifier, 117 // causing a noisy experience for the user. 118 token = SyntaxKind.Identifier; 119 } 120 break; 121 case SyntaxKind.TemplateHead: 122 templateStack.push(token); 123 break; 124 case SyntaxKind.OpenBraceToken: 125 // If we don't have anything on the template stack, 126 // then we aren't trying to keep track of a previously scanned template head. 127 if (templateStack.length > 0) { 128 templateStack.push(token); 129 } 130 break; 131 case SyntaxKind.CloseBraceToken: 132 // If we don't have anything on the template stack, 133 // then we aren't trying to keep track of a previously scanned template head. 134 if (templateStack.length > 0) { 135 const lastTemplateStackToken = lastOrUndefined(templateStack); 136 137 if (lastTemplateStackToken === SyntaxKind.TemplateHead) { 138 token = scanner.reScanTemplateToken(/* isTaggedTemplate */ false); 139 140 // Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us. 141 if (token === SyntaxKind.TemplateTail) { 142 templateStack.pop(); 143 } 144 else { 145 Debug.assertEqual(token, SyntaxKind.TemplateMiddle, "Should have been a template middle."); 146 } 147 } 148 else { 149 Debug.assertEqual(lastTemplateStackToken, SyntaxKind.OpenBraceToken, "Should have been an open brace"); 150 templateStack.pop(); 151 } 152 } 153 break; 154 default: 155 if (!isKeyword(token)) { 156 break; 157 } 158 159 if (lastNonTriviaToken === SyntaxKind.DotToken) { 160 token = SyntaxKind.Identifier; 161 } 162 else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) { 163 // We have two keywords in a row. Only treat the second as a keyword if 164 // it's a sequence that could legally occur in the language. Otherwise 165 // treat it as an identifier. This way, if someone writes "private var" 166 // we recognize that 'var' is actually an identifier here. 167 token = SyntaxKind.Identifier; 168 } 169 } 170 } 171 172 return { endOfLineState, spans }; 173 } 174 175 return { getClassificationsForLine, getEncodedLexicalClassifications }; 176 } 177 178 /// We do not have a full parser support to know when we should parse a regex or not 179 /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where 180 /// we have a series of divide operator. this list allows us to be more accurate by ruling out 181 /// locations where a regexp cannot exist. 182 const noRegexTable: true[] = arrayToNumericMap<SyntaxKind, true>([ 183 SyntaxKind.Identifier, 184 SyntaxKind.StringLiteral, 185 SyntaxKind.NumericLiteral, 186 SyntaxKind.BigIntLiteral, 187 SyntaxKind.RegularExpressionLiteral, 188 SyntaxKind.ThisKeyword, 189 SyntaxKind.PlusPlusToken, 190 SyntaxKind.MinusMinusToken, 191 SyntaxKind.CloseParenToken, 192 SyntaxKind.CloseBracketToken, 193 SyntaxKind.CloseBraceToken, 194 SyntaxKind.TrueKeyword, 195 SyntaxKind.FalseKeyword, 196 ], token => token, () => true); 197 198 function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined { 199 switch (token) { 200 case SyntaxKind.StringLiteral: { 201 // Check to see if we finished up on a multiline string literal. 202 if (!scanner.isUnterminated()) return undefined; 203 204 const tokenText = scanner.getTokenText(); 205 const lastCharIndex = tokenText.length - 1; 206 let numBackslashes = 0; 207 while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) { 208 numBackslashes++; 209 } 210 211 // If we have an odd number of backslashes, then the multiline string is unclosed 212 if ((numBackslashes & 1) === 0) return undefined; 213 return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral; 214 } 215 case SyntaxKind.MultiLineCommentTrivia: 216 // Check to see if the multiline comment was unclosed. 217 return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined; 218 default: 219 if (isTemplateLiteralKind(token)) { 220 if (!scanner.isUnterminated()) { 221 return undefined; 222 } 223 switch (token) { 224 case SyntaxKind.TemplateTail: 225 return EndOfLineState.InTemplateMiddleOrTail; 226 case SyntaxKind.NoSubstitutionTemplateLiteral: 227 return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate; 228 default: 229 return Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token); 230 } 231 } 232 return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined; 233 } 234 } 235 236 function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void { 237 if (classification === ClassificationType.whiteSpace) { 238 // Don't bother with whitespace classifications. They're not needed. 239 return; 240 } 241 242 if (start === 0 && offset > 0) { 243 // We're classifying the first token, and this was a case where we prepended text. 244 // We should consider the start of this token to be at the start of the original text. 245 start += offset; 246 } 247 248 const length = end - start; 249 if (length > 0) { 250 // All our tokens are in relation to the augmented text. Move them back to be 251 // relative to the original text. 252 result.push(start - offset, length, classification); 253 } 254 } 255 256 function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult { 257 const entries: ClassificationInfo[] = []; 258 const dense = classifications.spans; 259 let lastEnd = 0; 260 261 for (let i = 0; i < dense.length; i += 3) { 262 const start = dense[i]; 263 const length = dense[i + 1]; 264 const type = <ClassificationType>dense[i + 2]; 265 266 // Make a whitespace entry between the last item and this one. 267 if (lastEnd >= 0) { 268 const whitespaceLength = start - lastEnd; 269 if (whitespaceLength > 0) { 270 entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); 271 } 272 } 273 274 entries.push({ length, classification: convertClassification(type) }); 275 lastEnd = start + length; 276 } 277 278 const whitespaceLength = text.length - lastEnd; 279 if (whitespaceLength > 0) { 280 entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); 281 } 282 283 return { entries, finalLexState: classifications.endOfLineState }; 284 } 285 286 function convertClassification(type: ClassificationType): TokenClass { 287 switch (type) { 288 case ClassificationType.comment: return TokenClass.Comment; 289 case ClassificationType.keyword: return TokenClass.Keyword; 290 case ClassificationType.numericLiteral: return TokenClass.NumberLiteral; 291 case ClassificationType.bigintLiteral: return TokenClass.BigIntLiteral; 292 case ClassificationType.operator: return TokenClass.Operator; 293 case ClassificationType.stringLiteral: return TokenClass.StringLiteral; 294 case ClassificationType.whiteSpace: return TokenClass.Whitespace; 295 case ClassificationType.punctuation: return TokenClass.Punctuation; 296 case ClassificationType.identifier: 297 case ClassificationType.className: 298 case ClassificationType.enumName: 299 case ClassificationType.interfaceName: 300 case ClassificationType.moduleName: 301 case ClassificationType.typeParameterName: 302 case ClassificationType.typeAliasName: 303 case ClassificationType.text: 304 case ClassificationType.parameterName: 305 return TokenClass.Identifier; 306 default: 307 return undefined!; // TODO: GH#18217 Debug.assertNever(type); 308 } 309 } 310 311 /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */ 312 function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean { 313 if (!isAccessibilityModifier(keyword1)) { 314 // Assume any other keyword combination is legal. 315 // This can be refined in the future if there are more cases we want the classifier to be better at. 316 return true; 317 } 318 switch (keyword2) { 319 case SyntaxKind.GetKeyword: 320 case SyntaxKind.SetKeyword: 321 case SyntaxKind.ConstructorKeyword: 322 case SyntaxKind.StaticKeyword: 323 return true; // Allow things like "public get", "public constructor" and "public static". 324 default: 325 return false; // Any other keyword following "public" is actually an identifier, not a real keyword. 326 } 327 } 328 329 function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } { 330 // If we're in a string literal, then prepend: "\ 331 // (and a newline). That way when we lex we'll think we're still in a string literal. 332 // 333 // If we're in a multiline comment, then prepend: /* 334 // (and a newline). That way when we lex we'll think we're still in a multiline comment. 335 switch (lexState) { 336 case EndOfLineState.InDoubleQuoteStringLiteral: 337 return { prefix: "\"\\\n" }; 338 case EndOfLineState.InSingleQuoteStringLiteral: 339 return { prefix: "'\\\n" }; 340 case EndOfLineState.InMultiLineCommentTrivia: 341 return { prefix: "/*\n" }; 342 case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate: 343 return { prefix: "`\n" }; 344 case EndOfLineState.InTemplateMiddleOrTail: 345 return { prefix: "}\n", pushTemplate: true }; 346 case EndOfLineState.InTemplateSubstitutionPosition: 347 return { prefix: "", pushTemplate: true }; 348 case EndOfLineState.None: 349 return { prefix: "" }; 350 default: 351 return Debug.assertNever(lexState); 352 } 353 } 354 355 function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean { 356 switch (token) { 357 case SyntaxKind.AsteriskToken: 358 case SyntaxKind.SlashToken: 359 case SyntaxKind.PercentToken: 360 case SyntaxKind.PlusToken: 361 case SyntaxKind.MinusToken: 362 case SyntaxKind.LessThanLessThanToken: 363 case SyntaxKind.GreaterThanGreaterThanToken: 364 case SyntaxKind.GreaterThanGreaterThanGreaterThanToken: 365 case SyntaxKind.LessThanToken: 366 case SyntaxKind.GreaterThanToken: 367 case SyntaxKind.LessThanEqualsToken: 368 case SyntaxKind.GreaterThanEqualsToken: 369 case SyntaxKind.InstanceOfKeyword: 370 case SyntaxKind.InKeyword: 371 case SyntaxKind.AsKeyword: 372 case SyntaxKind.EqualsEqualsToken: 373 case SyntaxKind.ExclamationEqualsToken: 374 case SyntaxKind.EqualsEqualsEqualsToken: 375 case SyntaxKind.ExclamationEqualsEqualsToken: 376 case SyntaxKind.AmpersandToken: 377 case SyntaxKind.CaretToken: 378 case SyntaxKind.BarToken: 379 case SyntaxKind.AmpersandAmpersandToken: 380 case SyntaxKind.BarBarToken: 381 case SyntaxKind.BarEqualsToken: 382 case SyntaxKind.AmpersandEqualsToken: 383 case SyntaxKind.CaretEqualsToken: 384 case SyntaxKind.LessThanLessThanEqualsToken: 385 case SyntaxKind.GreaterThanGreaterThanEqualsToken: 386 case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken: 387 case SyntaxKind.PlusEqualsToken: 388 case SyntaxKind.MinusEqualsToken: 389 case SyntaxKind.AsteriskEqualsToken: 390 case SyntaxKind.SlashEqualsToken: 391 case SyntaxKind.PercentEqualsToken: 392 case SyntaxKind.EqualsToken: 393 case SyntaxKind.CommaToken: 394 case SyntaxKind.QuestionQuestionToken: 395 case SyntaxKind.BarBarEqualsToken: 396 case SyntaxKind.AmpersandAmpersandEqualsToken: 397 case SyntaxKind.QuestionQuestionEqualsToken: 398 return true; 399 default: 400 return false; 401 } 402 } 403 404 function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean { 405 switch (token) { 406 case SyntaxKind.PlusToken: 407 case SyntaxKind.MinusToken: 408 case SyntaxKind.TildeToken: 409 case SyntaxKind.ExclamationToken: 410 case SyntaxKind.PlusPlusToken: 411 case SyntaxKind.MinusMinusToken: 412 return true; 413 default: 414 return false; 415 } 416 } 417 418 function classFromKind(token: SyntaxKind): ClassificationType { 419 if (isKeyword(token)) { 420 return ClassificationType.keyword; 421 } 422 else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) { 423 return ClassificationType.operator; 424 } 425 else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) { 426 return ClassificationType.punctuation; 427 } 428 429 switch (token) { 430 case SyntaxKind.NumericLiteral: 431 return ClassificationType.numericLiteral; 432 case SyntaxKind.BigIntLiteral: 433 return ClassificationType.bigintLiteral; 434 case SyntaxKind.StringLiteral: 435 return ClassificationType.stringLiteral; 436 case SyntaxKind.RegularExpressionLiteral: 437 return ClassificationType.regularExpressionLiteral; 438 case SyntaxKind.ConflictMarkerTrivia: 439 case SyntaxKind.MultiLineCommentTrivia: 440 case SyntaxKind.SingleLineCommentTrivia: 441 return ClassificationType.comment; 442 case SyntaxKind.WhitespaceTrivia: 443 case SyntaxKind.NewLineTrivia: 444 return ClassificationType.whiteSpace; 445 case SyntaxKind.Identifier: 446 default: 447 if (isTemplateLiteralKind(token)) { 448 return ClassificationType.stringLiteral; 449 } 450 return ClassificationType.identifier; 451 } 452 } 453 454 /* @internal */ 455 export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): ClassifiedSpan[] { 456 return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span)); 457 } 458 459 function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) { 460 // We don't want to actually call back into our host on every node to find out if we've 461 // been canceled. That would be an enormous amount of chattyness, along with the all 462 // the overhead of marshalling the data to/from the host. So instead we pick a few 463 // reasonable node kinds to bother checking on. These node kinds represent high level 464 // constructs that we would expect to see commonly, but just at a far less frequent 465 // interval. 466 // 467 // For example, in checker.ts (around 750k) we only have around 600 of these constructs. 468 // That means we're calling back into the host around every 1.2k of the file we process. 469 // Lib.d.ts has similar numbers. 470 switch (kind) { 471 case SyntaxKind.ModuleDeclaration: 472 case SyntaxKind.ClassDeclaration: 473 case SyntaxKind.InterfaceDeclaration: 474 case SyntaxKind.FunctionDeclaration: 475 case SyntaxKind.ClassExpression: 476 case SyntaxKind.FunctionExpression: 477 case SyntaxKind.ArrowFunction: 478 cancellationToken.throwIfCancellationRequested(); 479 } 480 } 481 482 /* @internal */ 483 export function getEncodedSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): Classifications { 484 const spans: number[] = []; 485 sourceFile.forEachChild(function cb(node: Node): void { 486 // Only walk into nodes that intersect the requested span. 487 if (!node || !textSpanIntersectsWith(span, node.pos, node.getFullWidth())) { 488 return; 489 } 490 491 checkForClassificationCancellation(cancellationToken, node.kind); 492 // Only bother calling into the typechecker if this is an identifier that 493 // could possibly resolve to a type name. This makes classification run 494 // in a third of the time it would normally take. 495 if (isIdentifier(node) && !nodeIsMissing(node) && classifiableNames.has(node.escapedText)) { 496 const symbol = typeChecker.getSymbolAtLocation(node); 497 const type = symbol && classifySymbol(symbol, getMeaningFromLocation(node), typeChecker); 498 if (type) { 499 pushClassification(node.getStart(sourceFile), node.getEnd(), type); 500 } 501 } 502 503 node.forEachChild(cb); 504 }); 505 return { spans, endOfLineState: EndOfLineState.None }; 506 507 function pushClassification(start: number, end: number, type: ClassificationType): void { 508 const length = end - start; 509 Debug.assert(length > 0, `Classification had non-positive length of ${length}`); 510 spans.push(start); 511 spans.push(length); 512 spans.push(type); 513 } 514 } 515 516 function classifySymbol(symbol: Symbol, meaningAtPosition: SemanticMeaning, checker: TypeChecker): ClassificationType | undefined { 517 const flags = symbol.getFlags(); 518 if ((flags & SymbolFlags.Classifiable) === SymbolFlags.None) { 519 return undefined; 520 } 521 else if (flags & SymbolFlags.Class) { 522 return ClassificationType.className; 523 } 524 else if (flags & SymbolFlags.Enum) { 525 return ClassificationType.enumName; 526 } 527 else if (flags & SymbolFlags.TypeAlias) { 528 return ClassificationType.typeAliasName; 529 } 530 else if (flags & SymbolFlags.Module) { 531 // Only classify a module as such if 532 // - It appears in a namespace context. 533 // - There exists a module declaration which actually impacts the value side. 534 return meaningAtPosition & SemanticMeaning.Namespace || meaningAtPosition & SemanticMeaning.Value && hasValueSideModule(symbol) ? ClassificationType.moduleName : undefined; 535 } 536 else if (flags & SymbolFlags.Alias) { 537 return classifySymbol(checker.getAliasedSymbol(symbol), meaningAtPosition, checker); 538 } 539 else if (meaningAtPosition & SemanticMeaning.Type) { 540 return flags & SymbolFlags.Interface ? ClassificationType.interfaceName : flags & SymbolFlags.TypeParameter ? ClassificationType.typeParameterName : undefined; 541 } 542 else { 543 return undefined; 544 } 545 } 546 547 /** Returns true if there exists a module that introduces entities on the value side. */ 548 function hasValueSideModule(symbol: Symbol): boolean { 549 return some(symbol.declarations, declaration => 550 isModuleDeclaration(declaration) && getModuleInstanceState(declaration) === ModuleInstanceState.Instantiated); 551 } 552 553 function getClassificationTypeName(type: ClassificationType): ClassificationTypeNames { 554 switch (type) { 555 case ClassificationType.comment: return ClassificationTypeNames.comment; 556 case ClassificationType.identifier: return ClassificationTypeNames.identifier; 557 case ClassificationType.keyword: return ClassificationTypeNames.keyword; 558 case ClassificationType.numericLiteral: return ClassificationTypeNames.numericLiteral; 559 case ClassificationType.bigintLiteral: return ClassificationTypeNames.bigintLiteral; 560 case ClassificationType.operator: return ClassificationTypeNames.operator; 561 case ClassificationType.stringLiteral: return ClassificationTypeNames.stringLiteral; 562 case ClassificationType.whiteSpace: return ClassificationTypeNames.whiteSpace; 563 case ClassificationType.text: return ClassificationTypeNames.text; 564 case ClassificationType.punctuation: return ClassificationTypeNames.punctuation; 565 case ClassificationType.className: return ClassificationTypeNames.className; 566 case ClassificationType.enumName: return ClassificationTypeNames.enumName; 567 case ClassificationType.interfaceName: return ClassificationTypeNames.interfaceName; 568 case ClassificationType.moduleName: return ClassificationTypeNames.moduleName; 569 case ClassificationType.typeParameterName: return ClassificationTypeNames.typeParameterName; 570 case ClassificationType.typeAliasName: return ClassificationTypeNames.typeAliasName; 571 case ClassificationType.parameterName: return ClassificationTypeNames.parameterName; 572 case ClassificationType.docCommentTagName: return ClassificationTypeNames.docCommentTagName; 573 case ClassificationType.jsxOpenTagName: return ClassificationTypeNames.jsxOpenTagName; 574 case ClassificationType.jsxCloseTagName: return ClassificationTypeNames.jsxCloseTagName; 575 case ClassificationType.jsxSelfClosingTagName: return ClassificationTypeNames.jsxSelfClosingTagName; 576 case ClassificationType.jsxAttribute: return ClassificationTypeNames.jsxAttribute; 577 case ClassificationType.jsxText: return ClassificationTypeNames.jsxText; 578 case ClassificationType.jsxAttributeStringLiteralValue: return ClassificationTypeNames.jsxAttributeStringLiteralValue; 579 default: return undefined!; // TODO: GH#18217 throw Debug.assertNever(type); 580 } 581 } 582 583 function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] { 584 Debug.assert(classifications.spans.length % 3 === 0); 585 const dense = classifications.spans; 586 const result: ClassifiedSpan[] = []; 587 for (let i = 0; i < dense.length; i += 3) { 588 result.push({ 589 textSpan: createTextSpan(dense[i], dense[i + 1]), 590 classificationType: getClassificationTypeName(dense[i + 2]) 591 }); 592 } 593 594 return result; 595 } 596 597 /* @internal */ 598 export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] { 599 return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span)); 600 } 601 602 /* @internal */ 603 export function getEncodedSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): Classifications { 604 const spanStart = span.start; 605 const spanLength = span.length; 606 607 // Make a scanner we can get trivia from. 608 const triviaScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text); 609 const mergeConflictScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text); 610 611 const result: number[] = []; 612 processElement(sourceFile); 613 614 return { spans: result, endOfLineState: EndOfLineState.None }; 615 616 function pushClassification(start: number, length: number, type: ClassificationType) { 617 result.push(start); 618 result.push(length); 619 result.push(type); 620 } 621 622 function classifyLeadingTriviaAndGetTokenStart(token: Node): number { 623 triviaScanner.setTextPos(token.pos); 624 while (true) { 625 const start = triviaScanner.getTextPos(); 626 // only bother scanning if we have something that could be trivia. 627 if (!couldStartTrivia(sourceFile.text, start)) { 628 return start; 629 } 630 631 const kind = triviaScanner.scan(); 632 const end = triviaScanner.getTextPos(); 633 const width = end - start; 634 635 // The moment we get something that isn't trivia, then stop processing. 636 if (!isTrivia(kind)) { 637 return start; 638 } 639 640 switch (kind) { 641 case SyntaxKind.NewLineTrivia: 642 case SyntaxKind.WhitespaceTrivia: 643 // Don't bother with newlines/whitespace. 644 continue; 645 646 case SyntaxKind.SingleLineCommentTrivia: 647 case SyntaxKind.MultiLineCommentTrivia: 648 // Only bother with the trivia if it at least intersects the span of interest. 649 classifyComment(token, kind, start, width); 650 651 // Classifying a comment might cause us to reuse the trivia scanner 652 // (because of jsdoc comments). So after we classify the comment make 653 // sure we set the scanner position back to where it needs to be. 654 triviaScanner.setTextPos(end); 655 continue; 656 657 case SyntaxKind.ConflictMarkerTrivia: 658 const text = sourceFile.text; 659 const ch = text.charCodeAt(start); 660 661 // for the <<<<<<< and >>>>>>> markers, we just add them in as comments 662 // in the classification stream. 663 if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) { 664 pushClassification(start, width, ClassificationType.comment); 665 continue; 666 } 667 668 // for the ||||||| and ======== markers, add a comment for the first line, 669 // and then lex all subsequent lines up until the end of the conflict marker. 670 Debug.assert(ch === CharacterCodes.bar || ch === CharacterCodes.equals); 671 classifyDisabledMergeCode(text, start, end); 672 break; 673 674 case SyntaxKind.ShebangTrivia: 675 // TODO: Maybe we should classify these. 676 break; 677 678 default: 679 Debug.assertNever(kind); 680 } 681 } 682 } 683 684 function classifyComment(token: Node, kind: SyntaxKind, start: number, width: number) { 685 if (kind === SyntaxKind.MultiLineCommentTrivia) { 686 // See if this is a doc comment. If so, we'll classify certain portions of it 687 // specially. 688 const docCommentAndDiagnostics = parseIsolatedJSDocComment(sourceFile.text, start, width); 689 if (docCommentAndDiagnostics && docCommentAndDiagnostics.jsDoc) { 690 // TODO: This should be predicated on `token["kind"]` being compatible with `HasJSDoc["kind"]` 691 setParent(docCommentAndDiagnostics.jsDoc, token as HasJSDoc); 692 classifyJSDocComment(docCommentAndDiagnostics.jsDoc); 693 return; 694 } 695 } 696 else if (kind === SyntaxKind.SingleLineCommentTrivia) { 697 if (tryClassifyTripleSlashComment(start, width)) { 698 return; 699 } 700 } 701 702 // Simple comment. Just add as is. 703 pushCommentRange(start, width); 704 } 705 706 function pushCommentRange(start: number, width: number) { 707 pushClassification(start, width, ClassificationType.comment); 708 } 709 710 function classifyJSDocComment(docComment: JSDoc) { 711 let pos = docComment.pos; 712 713 if (docComment.tags) { 714 for (const tag of docComment.tags) { 715 // As we walk through each tag, classify the portion of text from the end of 716 // the last tag (or the start of the entire doc comment) as 'comment'. 717 if (tag.pos !== pos) { 718 pushCommentRange(pos, tag.pos - pos); 719 } 720 721 pushClassification(tag.pos, 1, ClassificationType.punctuation); // "@" 722 pushClassification(tag.tagName.pos, tag.tagName.end - tag.tagName.pos, ClassificationType.docCommentTagName); // e.g. "param" 723 724 pos = tag.tagName.end; 725 726 switch (tag.kind) { 727 case SyntaxKind.JSDocParameterTag: 728 processJSDocParameterTag(<JSDocParameterTag>tag); 729 break; 730 case SyntaxKind.JSDocTemplateTag: 731 processJSDocTemplateTag(<JSDocTemplateTag>tag); 732 pos = tag.end; 733 break; 734 case SyntaxKind.JSDocTypeTag: 735 processElement((<JSDocTypeTag>tag).typeExpression); 736 pos = tag.end; 737 break; 738 case SyntaxKind.JSDocReturnTag: 739 processElement((<JSDocReturnTag>tag).typeExpression); 740 pos = tag.end; 741 break; 742 } 743 } 744 } 745 746 if (pos !== docComment.end) { 747 pushCommentRange(pos, docComment.end - pos); 748 } 749 750 return; 751 752 function processJSDocParameterTag(tag: JSDocParameterTag) { 753 if (tag.isNameFirst) { 754 pushCommentRange(pos, tag.name.pos - pos); 755 pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName); 756 pos = tag.name.end; 757 } 758 759 if (tag.typeExpression) { 760 pushCommentRange(pos, tag.typeExpression.pos - pos); 761 processElement(tag.typeExpression); 762 pos = tag.typeExpression.end; 763 } 764 765 if (!tag.isNameFirst) { 766 pushCommentRange(pos, tag.name.pos - pos); 767 pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName); 768 pos = tag.name.end; 769 } 770 } 771 } 772 773 function tryClassifyTripleSlashComment(start: number, width: number): boolean { 774 const tripleSlashXMLCommentRegEx = /^(\/\/\/\s*)(<)(?:(\S+)((?:[^/]|\/[^>])*)(\/>)?)?/im; 775 const attributeRegex = /(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img; 776 777 const text = sourceFile.text.substr(start, width); 778 const match = tripleSlashXMLCommentRegEx.exec(text); 779 if (!match) { 780 return false; 781 } 782 783 // Limiting classification to exactly the elements and attributes 784 // defined in `ts.commentPragmas` would be excessive, but we can avoid 785 // some obvious false positives (e.g. in XML-like doc comments) by 786 // checking the element name. 787 // eslint-disable-next-line no-in-operator 788 if (!match[3] || !(match[3] in commentPragmas)) { 789 return false; 790 } 791 792 let pos = start; 793 794 pushCommentRange(pos, match[1].length); // /// 795 pos += match[1].length; 796 797 pushClassification(pos, match[2].length, ClassificationType.punctuation); // < 798 pos += match[2].length; 799 800 pushClassification(pos, match[3].length, ClassificationType.jsxSelfClosingTagName); // element name 801 pos += match[3].length; 802 803 const attrText = match[4]; 804 let attrPos = pos; 805 while (true) { 806 const attrMatch = attributeRegex.exec(attrText); 807 if (!attrMatch) { 808 break; 809 } 810 811 const newAttrPos = pos + attrMatch.index; 812 if (newAttrPos > attrPos) { 813 pushCommentRange(attrPos, newAttrPos - attrPos); 814 attrPos = newAttrPos; 815 } 816 817 pushClassification(attrPos, attrMatch[1].length, ClassificationType.jsxAttribute); // attribute name 818 attrPos += attrMatch[1].length; 819 820 if (attrMatch[2].length) { 821 pushCommentRange(attrPos, attrMatch[2].length); // whitespace 822 attrPos += attrMatch[2].length; 823 } 824 825 pushClassification(attrPos, attrMatch[3].length, ClassificationType.operator); // = 826 attrPos += attrMatch[3].length; 827 828 if (attrMatch[4].length) { 829 pushCommentRange(attrPos, attrMatch[4].length); // whitespace 830 attrPos += attrMatch[4].length; 831 } 832 833 pushClassification(attrPos, attrMatch[5].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value 834 attrPos += attrMatch[5].length; 835 } 836 837 pos += match[4].length; 838 839 if (pos > attrPos) { 840 pushCommentRange(attrPos, pos - attrPos); 841 } 842 843 if (match[5]) { 844 pushClassification(pos, match[5].length, ClassificationType.punctuation); // /> 845 pos += match[5].length; 846 } 847 848 const end = start + width; 849 if (pos < end) { 850 pushCommentRange(pos, end - pos); 851 } 852 853 return true; 854 } 855 856 function processJSDocTemplateTag(tag: JSDocTemplateTag) { 857 for (const child of tag.getChildren()) { 858 processElement(child); 859 } 860 } 861 862 function classifyDisabledMergeCode(text: string, start: number, end: number) { 863 // Classify the line that the ||||||| or ======= marker is on as a comment. 864 // Then just lex all further tokens and add them to the result. 865 let i: number; 866 for (i = start; i < end; i++) { 867 if (isLineBreak(text.charCodeAt(i))) { 868 break; 869 } 870 } 871 pushClassification(start, i - start, ClassificationType.comment); 872 mergeConflictScanner.setTextPos(i); 873 874 while (mergeConflictScanner.getTextPos() < end) { 875 classifyDisabledCodeToken(); 876 } 877 } 878 879 function classifyDisabledCodeToken() { 880 const start = mergeConflictScanner.getTextPos(); 881 const tokenKind = mergeConflictScanner.scan(); 882 const end = mergeConflictScanner.getTextPos(); 883 884 const type = classifyTokenType(tokenKind); 885 if (type) { 886 pushClassification(start, end - start, type); 887 } 888 } 889 890 /** 891 * Returns true if node should be treated as classified and no further processing is required. 892 * False will mean that node is not classified and traverse routine should recurse into node contents. 893 */ 894 function tryClassifyNode(node: Node): boolean { 895 if (isJSDoc(node)) { 896 return true; 897 } 898 899 if (nodeIsMissing(node)) { 900 return true; 901 } 902 903 const classifiedElementName = tryClassifyJsxElementName(node); 904 if (!isToken(node) && node.kind !== SyntaxKind.JsxText && classifiedElementName === undefined) { 905 return false; 906 } 907 908 const tokenStart = node.kind === SyntaxKind.JsxText ? node.pos : classifyLeadingTriviaAndGetTokenStart(node); 909 910 const tokenWidth = node.end - tokenStart; 911 Debug.assert(tokenWidth >= 0); 912 if (tokenWidth > 0) { 913 const type = classifiedElementName || classifyTokenType(node.kind, node); 914 if (type) { 915 pushClassification(tokenStart, tokenWidth, type); 916 } 917 } 918 919 return true; 920 } 921 922 function tryClassifyJsxElementName(token: Node): ClassificationType | undefined { 923 switch (token.parent && token.parent.kind) { 924 case SyntaxKind.JsxOpeningElement: 925 if ((<JsxOpeningElement>token.parent).tagName === token) { 926 return ClassificationType.jsxOpenTagName; 927 } 928 break; 929 case SyntaxKind.JsxClosingElement: 930 if ((<JsxClosingElement>token.parent).tagName === token) { 931 return ClassificationType.jsxCloseTagName; 932 } 933 break; 934 case SyntaxKind.JsxSelfClosingElement: 935 if ((<JsxSelfClosingElement>token.parent).tagName === token) { 936 return ClassificationType.jsxSelfClosingTagName; 937 } 938 break; 939 case SyntaxKind.JsxAttribute: 940 if ((<JsxAttribute>token.parent).name === token) { 941 return ClassificationType.jsxAttribute; 942 } 943 break; 944 } 945 return undefined; 946 } 947 948 // for accurate classification, the actual token should be passed in. however, for 949 // cases like 'disabled merge code' classification, we just get the token kind and 950 // classify based on that instead. 951 function classifyTokenType(tokenKind: SyntaxKind, token?: Node): ClassificationType | undefined { 952 if (isKeyword(tokenKind)) { 953 return ClassificationType.keyword; 954 } 955 956 // Special case `<` and `>`: If they appear in a generic context they are punctuation, 957 // not operators. 958 if (tokenKind === SyntaxKind.LessThanToken || tokenKind === SyntaxKind.GreaterThanToken) { 959 // If the node owning the token has a type argument list or type parameter list, then 960 // we can effectively assume that a '<' and '>' belong to those lists. 961 if (token && getTypeArgumentOrTypeParameterList(token.parent)) { 962 return ClassificationType.punctuation; 963 } 964 } 965 966 if (isPunctuation(tokenKind)) { 967 if (token) { 968 const parent = token.parent; 969 if (tokenKind === SyntaxKind.EqualsToken) { 970 // the '=' in a variable declaration is special cased here. 971 if (parent.kind === SyntaxKind.VariableDeclaration || 972 parent.kind === SyntaxKind.PropertyDeclaration || 973 parent.kind === SyntaxKind.Parameter || 974 parent.kind === SyntaxKind.JsxAttribute) { 975 return ClassificationType.operator; 976 } 977 } 978 979 if (parent.kind === SyntaxKind.BinaryExpression || 980 parent.kind === SyntaxKind.PrefixUnaryExpression || 981 parent.kind === SyntaxKind.PostfixUnaryExpression || 982 parent.kind === SyntaxKind.ConditionalExpression) { 983 return ClassificationType.operator; 984 } 985 } 986 987 return ClassificationType.punctuation; 988 } 989 else if (tokenKind === SyntaxKind.NumericLiteral) { 990 return ClassificationType.numericLiteral; 991 } 992 else if (tokenKind === SyntaxKind.BigIntLiteral) { 993 return ClassificationType.bigintLiteral; 994 } 995 else if (tokenKind === SyntaxKind.StringLiteral) { 996 return token && token.parent.kind === SyntaxKind.JsxAttribute ? ClassificationType.jsxAttributeStringLiteralValue : ClassificationType.stringLiteral; 997 } 998 else if (tokenKind === SyntaxKind.RegularExpressionLiteral) { 999 // TODO: we should get another classification type for these literals. 1000 return ClassificationType.stringLiteral; 1001 } 1002 else if (isTemplateLiteralKind(tokenKind)) { 1003 // TODO (drosen): we should *also* get another classification type for these literals. 1004 return ClassificationType.stringLiteral; 1005 } 1006 else if (tokenKind === SyntaxKind.JsxText) { 1007 return ClassificationType.jsxText; 1008 } 1009 else if (tokenKind === SyntaxKind.Identifier) { 1010 if (token) { 1011 switch (token.parent.kind) { 1012 case SyntaxKind.ClassDeclaration: 1013 if ((<ClassDeclaration>token.parent).name === token) { 1014 return ClassificationType.className; 1015 } 1016 return; 1017 case SyntaxKind.TypeParameter: 1018 if ((<TypeParameterDeclaration>token.parent).name === token) { 1019 return ClassificationType.typeParameterName; 1020 } 1021 return; 1022 case SyntaxKind.InterfaceDeclaration: 1023 if ((<InterfaceDeclaration>token.parent).name === token) { 1024 return ClassificationType.interfaceName; 1025 } 1026 return; 1027 case SyntaxKind.EnumDeclaration: 1028 if ((<EnumDeclaration>token.parent).name === token) { 1029 return ClassificationType.enumName; 1030 } 1031 return; 1032 case SyntaxKind.ModuleDeclaration: 1033 if ((<ModuleDeclaration>token.parent).name === token) { 1034 return ClassificationType.moduleName; 1035 } 1036 return; 1037 case SyntaxKind.Parameter: 1038 if ((<ParameterDeclaration>token.parent).name === token) { 1039 return isThisIdentifier(token) ? ClassificationType.keyword : ClassificationType.parameterName; 1040 } 1041 return; 1042 } 1043 } 1044 return ClassificationType.identifier; 1045 } 1046 } 1047 1048 function processElement(element: Node | undefined) { 1049 if (!element) { 1050 return; 1051 } 1052 1053 // Ignore nodes that don't intersect the original span to classify. 1054 if (decodedTextSpanIntersectsWith(spanStart, spanLength, element.pos, element.getFullWidth())) { 1055 checkForClassificationCancellation(cancellationToken, element.kind); 1056 1057 for (const child of element.getChildren(sourceFile)) { 1058 if (!tryClassifyNode(child)) { 1059 // Recurse into our child nodes. 1060 processElement(child); 1061 } 1062 } 1063 } 1064 } 1065 } 1066} 1067