1import { 2 __String, arrayToNumericMap, CancellationToken, CharacterCodes, ClassDeclaration, ClassificationInfo, 3 ClassificationResult, Classifications, ClassificationType, ClassificationTypeNames, ClassifiedSpan, Classifier, 4 commentPragmas, couldStartTrivia, createScanner, createTextSpan, Debug, decodedTextSpanIntersectsWith, 5 EndOfLineState, EnumDeclaration, getMeaningFromLocation, getModuleInstanceState, getTypeArgumentOrTypeParameterList, 6 HasJSDoc, InterfaceDeclaration, isAccessibilityModifier, isConstTypeReference, isIdentifier, isJSDoc, isKeyword, 7 isLineBreak, isModuleDeclaration, isPunctuation, isTemplateLiteralKind, isThisIdentifier, isToken, isTrivia, JSDoc, 8 JSDocAugmentsTag, JSDocCallbackTag, JSDocEnumTag, JSDocImplementsTag, JSDocParameterTag, JSDocPropertyTag, 9 JSDocReturnTag, JSDocSeeTag, JSDocTemplateTag, JSDocThisTag, JSDocTypedefTag, JSDocTypeTag, JsxAttribute, 10 JsxClosingElement, JsxOpeningElement, JsxSelfClosingElement, lastOrUndefined, ModuleDeclaration, 11 ModuleInstanceState, Node, nodeIsMissing, ParameterDeclaration, parseIsolatedJSDocComment, Push, ReadonlySet, 12 Scanner, ScriptTarget, SemanticMeaning, setParent, some, SourceFile, Symbol, SymbolFlags, SyntaxKind, TextSpan, 13 textSpanIntersectsWith, TokenClass, TypeChecker, TypeParameterDeclaration, 14} from "./_namespaces/ts"; 15 16/** The classifier is used for syntactic highlighting in editors via the TSServer */ 17export function createClassifier(): Classifier { 18 const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false); 19 20 function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult { 21 return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text); 22 } 23 24 // If there is a syntactic classifier ('syntacticClassifierAbsent' is false), 25 // we will be more conservative in order to avoid conflicting with the syntactic classifier. 26 function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications { 27 let token = SyntaxKind.Unknown; 28 let lastNonTriviaToken = SyntaxKind.Unknown; 29 30 // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact) 31 // classification on template strings. Because of the context free nature of templates, 32 // the only precise way to classify a template portion would be by propagating the stack across 33 // lines, just as we do with the end-of-line state. However, this is a burden for implementers, 34 // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead 35 // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state. 36 // Situations in which this fails are 37 // 1) When template strings are nested across different lines: 38 // `hello ${ `world 39 // ` }` 40 // 41 // Where on the second line, you will get the closing of a template, 42 // a closing curly, and a new template. 43 // 44 // 2) When substitution expressions have curly braces and the curly brace falls on the next line: 45 // `hello ${ () => { 46 // return "world" } } ` 47 // 48 // Where on the second line, you will get the 'return' keyword, 49 // a string literal, and a template end consisting of '} } `'. 50 const templateStack: SyntaxKind[] = []; 51 52 const { prefix, pushTemplate } = getPrefixFromLexState(lexState); 53 text = prefix + text; 54 const offset = prefix.length; 55 if (pushTemplate) { 56 templateStack.push(SyntaxKind.TemplateHead); 57 } 58 59 scanner.setText(text); 60 61 let endOfLineState = EndOfLineState.None; 62 const spans: number[] = []; 63 64 // We can run into an unfortunate interaction between the lexical and syntactic classifier 65 // when the user is typing something generic. Consider the case where the user types: 66 // 67 // Foo<number 68 // 69 // From the lexical classifier's perspective, 'number' is a keyword, and so the word will 70 // be classified as such. However, from the syntactic classifier's tree-based perspective 71 // this is simply an expression with the identifier 'number' on the RHS of the less than 72 // token. So the classification will go back to being an identifier. The moment the user 73 // types again, number will become a keyword, then an identifier, etc. etc. 74 // 75 // To try to avoid this problem, we avoid classifying contextual keywords as keywords 76 // when the user is potentially typing something generic. We just can't do a good enough 77 // job at the lexical level, and so well leave it up to the syntactic classifier to make 78 // the determination. 79 // 80 // In order to determine if the user is potentially typing something generic, we use a 81 // weak heuristic where we track < and > tokens. It's a weak heuristic, but should 82 // work well enough in practice. 83 let angleBracketStack = 0; 84 85 do { 86 token = scanner.scan(); 87 if (!isTrivia(token)) { 88 handleToken(); 89 lastNonTriviaToken = token; 90 } 91 const end = scanner.getTextPos(); 92 pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans); 93 if (end >= text.length) { 94 const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack)); 95 if (end !== undefined) { 96 endOfLineState = end; 97 } 98 } 99 } while (token !== SyntaxKind.EndOfFileToken); 100 101 function handleToken(): void { 102 switch (token) { 103 case SyntaxKind.SlashToken: 104 case SyntaxKind.SlashEqualsToken: 105 if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) { 106 token = SyntaxKind.RegularExpressionLiteral; 107 } 108 break; 109 case SyntaxKind.LessThanToken: 110 if (lastNonTriviaToken === SyntaxKind.Identifier) { 111 // Could be the start of something generic. Keep track of that by bumping 112 // up the current count of generic contexts we may be in. 113 angleBracketStack++; 114 } 115 break; 116 case SyntaxKind.GreaterThanToken: 117 if (angleBracketStack > 0) { 118 // If we think we're currently in something generic, then mark that that 119 // generic entity is complete. 120 angleBracketStack--; 121 } 122 break; 123 case SyntaxKind.AnyKeyword: 124 case SyntaxKind.StringKeyword: 125 case SyntaxKind.NumberKeyword: 126 case SyntaxKind.BooleanKeyword: 127 case SyntaxKind.SymbolKeyword: 128 if (angleBracketStack > 0 && !syntacticClassifierAbsent) { 129 // If it looks like we're could be in something generic, don't classify this 130 // as a keyword. We may just get overwritten by the syntactic classifier, 131 // causing a noisy experience for the user. 132 token = SyntaxKind.Identifier; 133 } 134 break; 135 case SyntaxKind.TemplateHead: 136 templateStack.push(token); 137 break; 138 case SyntaxKind.OpenBraceToken: 139 // If we don't have anything on the template stack, 140 // then we aren't trying to keep track of a previously scanned template head. 141 if (templateStack.length > 0) { 142 templateStack.push(token); 143 } 144 break; 145 case SyntaxKind.CloseBraceToken: 146 // If we don't have anything on the template stack, 147 // then we aren't trying to keep track of a previously scanned template head. 148 if (templateStack.length > 0) { 149 const lastTemplateStackToken = lastOrUndefined(templateStack); 150 151 if (lastTemplateStackToken === SyntaxKind.TemplateHead) { 152 token = scanner.reScanTemplateToken(/* isTaggedTemplate */ false); 153 154 // Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us. 155 if (token === SyntaxKind.TemplateTail) { 156 templateStack.pop(); 157 } 158 else { 159 Debug.assertEqual(token, SyntaxKind.TemplateMiddle, "Should have been a template middle."); 160 } 161 } 162 else { 163 Debug.assertEqual(lastTemplateStackToken, SyntaxKind.OpenBraceToken, "Should have been an open brace"); 164 templateStack.pop(); 165 } 166 } 167 break; 168 default: 169 if (!isKeyword(token)) { 170 break; 171 } 172 173 if (lastNonTriviaToken === SyntaxKind.DotToken) { 174 token = SyntaxKind.Identifier; 175 } 176 else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) { 177 // We have two keywords in a row. Only treat the second as a keyword if 178 // it's a sequence that could legally occur in the language. Otherwise 179 // treat it as an identifier. This way, if someone writes "private var" 180 // we recognize that 'var' is actually an identifier here. 181 token = SyntaxKind.Identifier; 182 } 183 } 184 } 185 186 return { endOfLineState, spans }; 187 } 188 189 return { getClassificationsForLine, getEncodedLexicalClassifications }; 190} 191 192/// We do not have a full parser support to know when we should parse a regex or not 193/// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where 194/// we have a series of divide operator. this list allows us to be more accurate by ruling out 195/// locations where a regexp cannot exist. 196const noRegexTable: true[] = arrayToNumericMap<SyntaxKind, true>([ 197 SyntaxKind.Identifier, 198 SyntaxKind.StringLiteral, 199 SyntaxKind.NumericLiteral, 200 SyntaxKind.BigIntLiteral, 201 SyntaxKind.RegularExpressionLiteral, 202 SyntaxKind.ThisKeyword, 203 SyntaxKind.PlusPlusToken, 204 SyntaxKind.MinusMinusToken, 205 SyntaxKind.CloseParenToken, 206 SyntaxKind.CloseBracketToken, 207 SyntaxKind.CloseBraceToken, 208 SyntaxKind.TrueKeyword, 209 SyntaxKind.FalseKeyword, 210], token => token, () => true); 211 212function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined { 213 switch (token) { 214 case SyntaxKind.StringLiteral: { 215 // Check to see if we finished up on a multiline string literal. 216 if (!scanner.isUnterminated()) return undefined; 217 218 const tokenText = scanner.getTokenText(); 219 const lastCharIndex = tokenText.length - 1; 220 let numBackslashes = 0; 221 while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) { 222 numBackslashes++; 223 } 224 225 // If we have an odd number of backslashes, then the multiline string is unclosed 226 if ((numBackslashes & 1) === 0) return undefined; 227 return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral; 228 } 229 case SyntaxKind.MultiLineCommentTrivia: 230 // Check to see if the multiline comment was unclosed. 231 return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined; 232 default: 233 if (isTemplateLiteralKind(token)) { 234 if (!scanner.isUnterminated()) { 235 return undefined; 236 } 237 switch (token) { 238 case SyntaxKind.TemplateTail: 239 return EndOfLineState.InTemplateMiddleOrTail; 240 case SyntaxKind.NoSubstitutionTemplateLiteral: 241 return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate; 242 default: 243 return Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token); 244 } 245 } 246 return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined; 247 } 248} 249 250function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void { 251 if (classification === ClassificationType.whiteSpace) { 252 // Don't bother with whitespace classifications. They're not needed. 253 return; 254 } 255 256 if (start === 0 && offset > 0) { 257 // We're classifying the first token, and this was a case where we prepended text. 258 // We should consider the start of this token to be at the start of the original text. 259 start += offset; 260 } 261 262 const length = end - start; 263 if (length > 0) { 264 // All our tokens are in relation to the augmented text. Move them back to be 265 // relative to the original text. 266 result.push(start - offset, length, classification); 267 } 268} 269 270function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult { 271 const entries: ClassificationInfo[] = []; 272 const dense = classifications.spans; 273 let lastEnd = 0; 274 275 for (let i = 0; i < dense.length; i += 3) { 276 const start = dense[i]; 277 const length = dense[i + 1]; 278 const type = dense[i + 2] as ClassificationType; 279 280 // Make a whitespace entry between the last item and this one. 281 if (lastEnd >= 0) { 282 const whitespaceLength = start - lastEnd; 283 if (whitespaceLength > 0) { 284 entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); 285 } 286 } 287 288 entries.push({ length, classification: convertClassification(type) }); 289 lastEnd = start + length; 290 } 291 292 const whitespaceLength = text.length - lastEnd; 293 if (whitespaceLength > 0) { 294 entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace }); 295 } 296 297 return { entries, finalLexState: classifications.endOfLineState }; 298} 299 300function convertClassification(type: ClassificationType): TokenClass { 301 switch (type) { 302 case ClassificationType.comment: return TokenClass.Comment; 303 case ClassificationType.keyword: return TokenClass.Keyword; 304 case ClassificationType.numericLiteral: return TokenClass.NumberLiteral; 305 case ClassificationType.bigintLiteral: return TokenClass.BigIntLiteral; 306 case ClassificationType.operator: return TokenClass.Operator; 307 case ClassificationType.stringLiteral: return TokenClass.StringLiteral; 308 case ClassificationType.whiteSpace: return TokenClass.Whitespace; 309 case ClassificationType.punctuation: return TokenClass.Punctuation; 310 case ClassificationType.identifier: 311 case ClassificationType.className: 312 case ClassificationType.enumName: 313 case ClassificationType.interfaceName: 314 case ClassificationType.moduleName: 315 case ClassificationType.typeParameterName: 316 case ClassificationType.typeAliasName: 317 case ClassificationType.text: 318 case ClassificationType.parameterName: 319 return TokenClass.Identifier; 320 default: 321 return undefined!; // TODO: GH#18217 Debug.assertNever(type); 322 } 323} 324 325/** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */ 326function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean { 327 if (!isAccessibilityModifier(keyword1)) { 328 // Assume any other keyword combination is legal. 329 // This can be refined in the future if there are more cases we want the classifier to be better at. 330 return true; 331 } 332 switch (keyword2) { 333 case SyntaxKind.GetKeyword: 334 case SyntaxKind.SetKeyword: 335 case SyntaxKind.ConstructorKeyword: 336 case SyntaxKind.StaticKeyword: 337 case SyntaxKind.AccessorKeyword: 338 return true; // Allow things like "public get", "public constructor" and "public static". 339 default: 340 return false; // Any other keyword following "public" is actually an identifier, not a real keyword. 341 } 342} 343 344function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } { 345 // If we're in a string literal, then prepend: "\ 346 // (and a newline). That way when we lex we'll think we're still in a string literal. 347 // 348 // If we're in a multiline comment, then prepend: /* 349 // (and a newline). That way when we lex we'll think we're still in a multiline comment. 350 switch (lexState) { 351 case EndOfLineState.InDoubleQuoteStringLiteral: 352 return { prefix: "\"\\\n" }; 353 case EndOfLineState.InSingleQuoteStringLiteral: 354 return { prefix: "'\\\n" }; 355 case EndOfLineState.InMultiLineCommentTrivia: 356 return { prefix: "/*\n" }; 357 case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate: 358 return { prefix: "`\n" }; 359 case EndOfLineState.InTemplateMiddleOrTail: 360 return { prefix: "}\n", pushTemplate: true }; 361 case EndOfLineState.InTemplateSubstitutionPosition: 362 return { prefix: "", pushTemplate: true }; 363 case EndOfLineState.None: 364 return { prefix: "" }; 365 default: 366 return Debug.assertNever(lexState); 367 } 368} 369 370function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean { 371 switch (token) { 372 case SyntaxKind.AsteriskToken: 373 case SyntaxKind.SlashToken: 374 case SyntaxKind.PercentToken: 375 case SyntaxKind.PlusToken: 376 case SyntaxKind.MinusToken: 377 case SyntaxKind.LessThanLessThanToken: 378 case SyntaxKind.GreaterThanGreaterThanToken: 379 case SyntaxKind.GreaterThanGreaterThanGreaterThanToken: 380 case SyntaxKind.LessThanToken: 381 case SyntaxKind.GreaterThanToken: 382 case SyntaxKind.LessThanEqualsToken: 383 case SyntaxKind.GreaterThanEqualsToken: 384 case SyntaxKind.InstanceOfKeyword: 385 case SyntaxKind.InKeyword: 386 case SyntaxKind.AsKeyword: 387 case SyntaxKind.SatisfiesKeyword: 388 case SyntaxKind.EqualsEqualsToken: 389 case SyntaxKind.ExclamationEqualsToken: 390 case SyntaxKind.EqualsEqualsEqualsToken: 391 case SyntaxKind.ExclamationEqualsEqualsToken: 392 case SyntaxKind.AmpersandToken: 393 case SyntaxKind.CaretToken: 394 case SyntaxKind.BarToken: 395 case SyntaxKind.AmpersandAmpersandToken: 396 case SyntaxKind.BarBarToken: 397 case SyntaxKind.BarEqualsToken: 398 case SyntaxKind.AmpersandEqualsToken: 399 case SyntaxKind.CaretEqualsToken: 400 case SyntaxKind.LessThanLessThanEqualsToken: 401 case SyntaxKind.GreaterThanGreaterThanEqualsToken: 402 case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken: 403 case SyntaxKind.PlusEqualsToken: 404 case SyntaxKind.MinusEqualsToken: 405 case SyntaxKind.AsteriskEqualsToken: 406 case SyntaxKind.SlashEqualsToken: 407 case SyntaxKind.PercentEqualsToken: 408 case SyntaxKind.EqualsToken: 409 case SyntaxKind.CommaToken: 410 case SyntaxKind.QuestionQuestionToken: 411 case SyntaxKind.BarBarEqualsToken: 412 case SyntaxKind.AmpersandAmpersandEqualsToken: 413 case SyntaxKind.QuestionQuestionEqualsToken: 414 return true; 415 default: 416 return false; 417 } 418} 419 420function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean { 421 switch (token) { 422 case SyntaxKind.PlusToken: 423 case SyntaxKind.MinusToken: 424 case SyntaxKind.TildeToken: 425 case SyntaxKind.ExclamationToken: 426 case SyntaxKind.PlusPlusToken: 427 case SyntaxKind.MinusMinusToken: 428 return true; 429 default: 430 return false; 431 } 432} 433 434function classFromKind(token: SyntaxKind): ClassificationType { 435 if (isKeyword(token)) { 436 return ClassificationType.keyword; 437 } 438 else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) { 439 return ClassificationType.operator; 440 } 441 else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) { 442 return ClassificationType.punctuation; 443 } 444 445 switch (token) { 446 case SyntaxKind.NumericLiteral: 447 return ClassificationType.numericLiteral; 448 case SyntaxKind.BigIntLiteral: 449 return ClassificationType.bigintLiteral; 450 case SyntaxKind.StringLiteral: 451 return ClassificationType.stringLiteral; 452 case SyntaxKind.RegularExpressionLiteral: 453 return ClassificationType.regularExpressionLiteral; 454 case SyntaxKind.ConflictMarkerTrivia: 455 case SyntaxKind.MultiLineCommentTrivia: 456 case SyntaxKind.SingleLineCommentTrivia: 457 return ClassificationType.comment; 458 case SyntaxKind.WhitespaceTrivia: 459 case SyntaxKind.NewLineTrivia: 460 return ClassificationType.whiteSpace; 461 case SyntaxKind.Identifier: 462 default: 463 if (isTemplateLiteralKind(token)) { 464 return ClassificationType.stringLiteral; 465 } 466 return ClassificationType.identifier; 467 } 468} 469 470/** @internal */ 471export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): ClassifiedSpan[] { 472 return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span)); 473} 474 475function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) { 476 // We don't want to actually call back into our host on every node to find out if we've 477 // been canceled. That would be an enormous amount of chattyness, along with the all 478 // the overhead of marshalling the data to/from the host. So instead we pick a few 479 // reasonable node kinds to bother checking on. These node kinds represent high level 480 // constructs that we would expect to see commonly, but just at a far less frequent 481 // interval. 482 // 483 // For example, in checker.ts (around 750k) we only have around 600 of these constructs. 484 // That means we're calling back into the host around every 1.2k of the file we process. 485 // Lib.d.ts has similar numbers. 486 switch (kind) { 487 case SyntaxKind.ModuleDeclaration: 488 case SyntaxKind.ClassDeclaration: 489 case SyntaxKind.InterfaceDeclaration: 490 case SyntaxKind.FunctionDeclaration: 491 case SyntaxKind.ClassExpression: 492 case SyntaxKind.FunctionExpression: 493 case SyntaxKind.ArrowFunction: 494 cancellationToken.throwIfCancellationRequested(); 495 } 496} 497 498/** @internal */ 499export function getEncodedSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): Classifications { 500 const spans: number[] = []; 501 sourceFile.forEachChild(function cb(node: Node): void { 502 // Only walk into nodes that intersect the requested span. 503 if (!node || !textSpanIntersectsWith(span, node.pos, node.getFullWidth())) { 504 return; 505 } 506 507 checkForClassificationCancellation(cancellationToken, node.kind); 508 // Only bother calling into the typechecker if this is an identifier that 509 // could possibly resolve to a type name. This makes classification run 510 // in a third of the time it would normally take. 511 if (isIdentifier(node) && !nodeIsMissing(node) && classifiableNames.has(node.escapedText)) { 512 const symbol = typeChecker.getSymbolAtLocation(node); 513 const type = symbol && classifySymbol(symbol, getMeaningFromLocation(node), typeChecker); 514 if (type) { 515 pushClassification(node.getStart(sourceFile), node.getEnd(), type); 516 } 517 } 518 519 node.forEachChild(cb); 520 }); 521 return { spans, endOfLineState: EndOfLineState.None }; 522 523 function pushClassification(start: number, end: number, type: ClassificationType): void { 524 const length = end - start; 525 Debug.assert(length > 0, `Classification had non-positive length of ${length}`); 526 spans.push(start); 527 spans.push(length); 528 spans.push(type); 529 } 530} 531 532function classifySymbol(symbol: Symbol, meaningAtPosition: SemanticMeaning, checker: TypeChecker): ClassificationType | undefined { 533 const flags = symbol.getFlags(); 534 if ((flags & SymbolFlags.Classifiable) === SymbolFlags.None) { 535 return undefined; 536 } 537 else if (flags & SymbolFlags.Class) { 538 return ClassificationType.className; 539 } 540 else if (flags & SymbolFlags.Enum) { 541 return ClassificationType.enumName; 542 } 543 else if (flags & SymbolFlags.TypeAlias) { 544 return ClassificationType.typeAliasName; 545 } 546 else if (flags & SymbolFlags.Module) { 547 // Only classify a module as such if 548 // - It appears in a namespace context. 549 // - There exists a module declaration which actually impacts the value side. 550 return meaningAtPosition & SemanticMeaning.Namespace || meaningAtPosition & SemanticMeaning.Value && hasValueSideModule(symbol) ? ClassificationType.moduleName : undefined; 551 } 552 else if (flags & SymbolFlags.Alias) { 553 return classifySymbol(checker.getAliasedSymbol(symbol), meaningAtPosition, checker); 554 } 555 else if (meaningAtPosition & SemanticMeaning.Type) { 556 return flags & SymbolFlags.Interface ? ClassificationType.interfaceName : flags & SymbolFlags.TypeParameter ? ClassificationType.typeParameterName : undefined; 557 } 558 else { 559 return undefined; 560 } 561} 562 563/** Returns true if there exists a module that introduces entities on the value side. */ 564function hasValueSideModule(symbol: Symbol): boolean { 565 return some(symbol.declarations, declaration => 566 isModuleDeclaration(declaration) && getModuleInstanceState(declaration) === ModuleInstanceState.Instantiated); 567} 568 569function getClassificationTypeName(type: ClassificationType): ClassificationTypeNames { 570 switch (type) { 571 case ClassificationType.comment: return ClassificationTypeNames.comment; 572 case ClassificationType.identifier: return ClassificationTypeNames.identifier; 573 case ClassificationType.keyword: return ClassificationTypeNames.keyword; 574 case ClassificationType.numericLiteral: return ClassificationTypeNames.numericLiteral; 575 case ClassificationType.bigintLiteral: return ClassificationTypeNames.bigintLiteral; 576 case ClassificationType.operator: return ClassificationTypeNames.operator; 577 case ClassificationType.stringLiteral: return ClassificationTypeNames.stringLiteral; 578 case ClassificationType.whiteSpace: return ClassificationTypeNames.whiteSpace; 579 case ClassificationType.text: return ClassificationTypeNames.text; 580 case ClassificationType.punctuation: return ClassificationTypeNames.punctuation; 581 case ClassificationType.className: return ClassificationTypeNames.className; 582 case ClassificationType.enumName: return ClassificationTypeNames.enumName; 583 case ClassificationType.interfaceName: return ClassificationTypeNames.interfaceName; 584 case ClassificationType.moduleName: return ClassificationTypeNames.moduleName; 585 case ClassificationType.typeParameterName: return ClassificationTypeNames.typeParameterName; 586 case ClassificationType.typeAliasName: return ClassificationTypeNames.typeAliasName; 587 case ClassificationType.parameterName: return ClassificationTypeNames.parameterName; 588 case ClassificationType.docCommentTagName: return ClassificationTypeNames.docCommentTagName; 589 case ClassificationType.jsxOpenTagName: return ClassificationTypeNames.jsxOpenTagName; 590 case ClassificationType.jsxCloseTagName: return ClassificationTypeNames.jsxCloseTagName; 591 case ClassificationType.jsxSelfClosingTagName: return ClassificationTypeNames.jsxSelfClosingTagName; 592 case ClassificationType.jsxAttribute: return ClassificationTypeNames.jsxAttribute; 593 case ClassificationType.jsxText: return ClassificationTypeNames.jsxText; 594 case ClassificationType.jsxAttributeStringLiteralValue: return ClassificationTypeNames.jsxAttributeStringLiteralValue; 595 default: return undefined!; // TODO: GH#18217 throw Debug.assertNever(type); 596 } 597} 598 599function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] { 600 Debug.assert(classifications.spans.length % 3 === 0); 601 const dense = classifications.spans; 602 const result: ClassifiedSpan[] = []; 603 for (let i = 0; i < dense.length; i += 3) { 604 result.push({ 605 textSpan: createTextSpan(dense[i], dense[i + 1]), 606 classificationType: getClassificationTypeName(dense[i + 2]) 607 }); 608 } 609 610 return result; 611} 612 613/** @internal */ 614export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] { 615 return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span)); 616} 617 618/** @internal */ 619export function getEncodedSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): Classifications { 620 const spanStart = span.start; 621 const spanLength = span.length; 622 623 // Make a scanner we can get trivia from. 624 const triviaScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text); 625 const mergeConflictScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text); 626 627 const result: number[] = []; 628 processElement(sourceFile); 629 630 return { spans: result, endOfLineState: EndOfLineState.None }; 631 632 function pushClassification(start: number, length: number, type: ClassificationType) { 633 result.push(start); 634 result.push(length); 635 result.push(type); 636 } 637 638 function classifyLeadingTriviaAndGetTokenStart(token: Node): number { 639 triviaScanner.setTextPos(token.pos); 640 while (true) { 641 const start = triviaScanner.getTextPos(); 642 // only bother scanning if we have something that could be trivia. 643 if (!couldStartTrivia(sourceFile.text, start)) { 644 return start; 645 } 646 647 const kind = triviaScanner.scan(); 648 const end = triviaScanner.getTextPos(); 649 const width = end - start; 650 651 // The moment we get something that isn't trivia, then stop processing. 652 if (!isTrivia(kind)) { 653 return start; 654 } 655 656 switch (kind) { 657 case SyntaxKind.NewLineTrivia: 658 case SyntaxKind.WhitespaceTrivia: 659 // Don't bother with newlines/whitespace. 660 continue; 661 662 case SyntaxKind.SingleLineCommentTrivia: 663 case SyntaxKind.MultiLineCommentTrivia: 664 // Only bother with the trivia if it at least intersects the span of interest. 665 classifyComment(token, kind, start, width); 666 667 // Classifying a comment might cause us to reuse the trivia scanner 668 // (because of jsdoc comments). So after we classify the comment make 669 // sure we set the scanner position back to where it needs to be. 670 triviaScanner.setTextPos(end); 671 continue; 672 673 case SyntaxKind.ConflictMarkerTrivia: 674 const text = sourceFile.text; 675 const ch = text.charCodeAt(start); 676 677 // for the <<<<<<< and >>>>>>> markers, we just add them in as comments 678 // in the classification stream. 679 if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) { 680 pushClassification(start, width, ClassificationType.comment); 681 continue; 682 } 683 684 // for the ||||||| and ======== markers, add a comment for the first line, 685 // and then lex all subsequent lines up until the end of the conflict marker. 686 Debug.assert(ch === CharacterCodes.bar || ch === CharacterCodes.equals); 687 classifyDisabledMergeCode(text, start, end); 688 break; 689 690 case SyntaxKind.ShebangTrivia: 691 // TODO: Maybe we should classify these. 692 break; 693 694 default: 695 Debug.assertNever(kind); 696 } 697 } 698 } 699 700 function classifyComment(token: Node, kind: SyntaxKind, start: number, width: number) { 701 if (kind === SyntaxKind.MultiLineCommentTrivia) { 702 // See if this is a doc comment. If so, we'll classify certain portions of it 703 // specially. 704 const docCommentAndDiagnostics = parseIsolatedJSDocComment(sourceFile.text, start, width); 705 if (docCommentAndDiagnostics && docCommentAndDiagnostics.jsDoc) { 706 // TODO: This should be predicated on `token["kind"]` being compatible with `HasJSDoc["kind"]` 707 setParent(docCommentAndDiagnostics.jsDoc, token as HasJSDoc); 708 classifyJSDocComment(docCommentAndDiagnostics.jsDoc); 709 return; 710 } 711 } 712 else if (kind === SyntaxKind.SingleLineCommentTrivia) { 713 if (tryClassifyTripleSlashComment(start, width)) { 714 return; 715 } 716 } 717 718 // Simple comment. Just add as is. 719 pushCommentRange(start, width); 720 } 721 722 function pushCommentRange(start: number, width: number) { 723 pushClassification(start, width, ClassificationType.comment); 724 } 725 726 function classifyJSDocComment(docComment: JSDoc) { 727 let pos = docComment.pos; 728 729 if (docComment.tags) { 730 for (const tag of docComment.tags) { 731 // As we walk through each tag, classify the portion of text from the end of 732 // the last tag (or the start of the entire doc comment) as 'comment'. 733 if (tag.pos !== pos) { 734 pushCommentRange(pos, tag.pos - pos); 735 } 736 737 pushClassification(tag.pos, 1, ClassificationType.punctuation); // "@" 738 pushClassification(tag.tagName.pos, tag.tagName.end - tag.tagName.pos, ClassificationType.docCommentTagName); // e.g. "param" 739 740 pos = tag.tagName.end; 741 let commentStart = tag.tagName.end; 742 743 switch (tag.kind) { 744 case SyntaxKind.JSDocParameterTag: 745 const param = tag as JSDocParameterTag; 746 processJSDocParameterTag(param); 747 commentStart = param.isNameFirst && param.typeExpression?.end || param.name.end; 748 break; 749 case SyntaxKind.JSDocPropertyTag: 750 const prop = tag as JSDocPropertyTag; 751 commentStart = prop.isNameFirst && prop.typeExpression?.end || prop.name.end; 752 break; 753 case SyntaxKind.JSDocTemplateTag: 754 processJSDocTemplateTag(tag as JSDocTemplateTag); 755 pos = tag.end; 756 commentStart = (tag as JSDocTemplateTag).typeParameters.end; 757 break; 758 case SyntaxKind.JSDocTypedefTag: 759 const type = tag as JSDocTypedefTag; 760 commentStart = type.typeExpression?.kind === SyntaxKind.JSDocTypeExpression && type.fullName?.end || type.typeExpression?.end || commentStart; 761 break; 762 case SyntaxKind.JSDocCallbackTag: 763 commentStart = (tag as JSDocCallbackTag).typeExpression.end; 764 break; 765 case SyntaxKind.JSDocTypeTag: 766 processElement((tag as JSDocTypeTag).typeExpression); 767 pos = tag.end; 768 commentStart = (tag as JSDocTypeTag).typeExpression.end; 769 break; 770 case SyntaxKind.JSDocThisTag: 771 case SyntaxKind.JSDocEnumTag: 772 commentStart = (tag as JSDocThisTag | JSDocEnumTag).typeExpression.end; 773 break; 774 case SyntaxKind.JSDocReturnTag: 775 processElement((tag as JSDocReturnTag).typeExpression); 776 pos = tag.end; 777 commentStart = (tag as JSDocReturnTag).typeExpression?.end || commentStart; 778 break; 779 case SyntaxKind.JSDocSeeTag: 780 commentStart = (tag as JSDocSeeTag).name?.end || commentStart; 781 break; 782 case SyntaxKind.JSDocAugmentsTag: 783 case SyntaxKind.JSDocImplementsTag: 784 commentStart = (tag as JSDocImplementsTag | JSDocAugmentsTag).class.end; 785 break; 786 } 787 if (typeof tag.comment === "object") { 788 pushCommentRange(tag.comment.pos, tag.comment.end - tag.comment.pos); 789 } 790 else if (typeof tag.comment === "string") { 791 pushCommentRange(commentStart, tag.end - commentStart); 792 } 793 } 794 } 795 796 if (pos !== docComment.end) { 797 pushCommentRange(pos, docComment.end - pos); 798 } 799 800 return; 801 802 function processJSDocParameterTag(tag: JSDocParameterTag) { 803 if (tag.isNameFirst) { 804 pushCommentRange(pos, tag.name.pos - pos); 805 pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName); 806 pos = tag.name.end; 807 } 808 809 if (tag.typeExpression) { 810 pushCommentRange(pos, tag.typeExpression.pos - pos); 811 processElement(tag.typeExpression); 812 pos = tag.typeExpression.end; 813 } 814 815 if (!tag.isNameFirst) { 816 pushCommentRange(pos, tag.name.pos - pos); 817 pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName); 818 pos = tag.name.end; 819 } 820 } 821 } 822 823 function tryClassifyTripleSlashComment(start: number, width: number): boolean { 824 const tripleSlashXMLCommentRegEx = /^(\/\/\/\s*)(<)(?:(\S+)((?:[^/]|\/[^>])*)(\/>)?)?/im; 825 // Require a leading whitespace character (the parser already does) to prevent terrible backtracking performance 826 const attributeRegex = /(\s)(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img; 827 828 const text = sourceFile.text.substr(start, width); 829 const match = tripleSlashXMLCommentRegEx.exec(text); 830 if (!match) { 831 return false; 832 } 833 834 // Limiting classification to exactly the elements and attributes 835 // defined in `ts.commentPragmas` would be excessive, but we can avoid 836 // some obvious false positives (e.g. in XML-like doc comments) by 837 // checking the element name. 838 // eslint-disable-next-line local/no-in-operator 839 if (!match[3] || !(match[3] in commentPragmas)) { 840 return false; 841 } 842 843 let pos = start; 844 845 pushCommentRange(pos, match[1].length); // /// 846 pos += match[1].length; 847 848 pushClassification(pos, match[2].length, ClassificationType.punctuation); // < 849 pos += match[2].length; 850 851 pushClassification(pos, match[3].length, ClassificationType.jsxSelfClosingTagName); // element name 852 pos += match[3].length; 853 854 const attrText = match[4]; 855 let attrPos = pos; 856 while (true) { 857 const attrMatch = attributeRegex.exec(attrText); 858 if (!attrMatch) { 859 break; 860 } 861 862 const newAttrPos = pos + attrMatch.index + attrMatch[1].length; // whitespace 863 if (newAttrPos > attrPos) { 864 pushCommentRange(attrPos, newAttrPos - attrPos); 865 attrPos = newAttrPos; 866 } 867 868 pushClassification(attrPos, attrMatch[2].length, ClassificationType.jsxAttribute); // attribute name 869 attrPos += attrMatch[2].length; 870 871 if (attrMatch[3].length) { 872 pushCommentRange(attrPos, attrMatch[3].length); // whitespace 873 attrPos += attrMatch[3].length; 874 } 875 876 pushClassification(attrPos, attrMatch[4].length, ClassificationType.operator); // = 877 attrPos += attrMatch[4].length; 878 879 if (attrMatch[5].length) { 880 pushCommentRange(attrPos, attrMatch[5].length); // whitespace 881 attrPos += attrMatch[5].length; 882 } 883 884 pushClassification(attrPos, attrMatch[6].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value 885 attrPos += attrMatch[6].length; 886 } 887 888 pos += match[4].length; 889 890 if (pos > attrPos) { 891 pushCommentRange(attrPos, pos - attrPos); 892 } 893 894 if (match[5]) { 895 pushClassification(pos, match[5].length, ClassificationType.punctuation); // /> 896 pos += match[5].length; 897 } 898 899 const end = start + width; 900 if (pos < end) { 901 pushCommentRange(pos, end - pos); 902 } 903 904 return true; 905 } 906 907 function processJSDocTemplateTag(tag: JSDocTemplateTag) { 908 for (const child of tag.getChildren()) { 909 processElement(child); 910 } 911 } 912 913 function classifyDisabledMergeCode(text: string, start: number, end: number) { 914 // Classify the line that the ||||||| or ======= marker is on as a comment. 915 // Then just lex all further tokens and add them to the result. 916 let i: number; 917 for (i = start; i < end; i++) { 918 if (isLineBreak(text.charCodeAt(i))) { 919 break; 920 } 921 } 922 pushClassification(start, i - start, ClassificationType.comment); 923 mergeConflictScanner.setTextPos(i); 924 925 while (mergeConflictScanner.getTextPos() < end) { 926 classifyDisabledCodeToken(); 927 } 928 } 929 930 function classifyDisabledCodeToken() { 931 const start = mergeConflictScanner.getTextPos(); 932 const tokenKind = mergeConflictScanner.scan(); 933 const end = mergeConflictScanner.getTextPos(); 934 935 const type = classifyTokenType(tokenKind); 936 if (type) { 937 pushClassification(start, end - start, type); 938 } 939 } 940 941 /** 942 * Returns true if node should be treated as classified and no further processing is required. 943 * False will mean that node is not classified and traverse routine should recurse into node contents. 944 */ 945 function tryClassifyNode(node: Node): boolean { 946 if (isJSDoc(node)) { 947 return true; 948 } 949 950 if (nodeIsMissing(node)) { 951 return true; 952 } 953 954 const classifiedElementName = tryClassifyJsxElementName(node); 955 if (!isToken(node) && node.kind !== SyntaxKind.JsxText && classifiedElementName === undefined) { 956 return false; 957 } 958 959 const tokenStart = node.kind === SyntaxKind.JsxText ? node.pos : classifyLeadingTriviaAndGetTokenStart(node); 960 961 const tokenWidth = node.end - tokenStart; 962 Debug.assert(tokenWidth >= 0); 963 if (tokenWidth > 0) { 964 const type = classifiedElementName || classifyTokenType(node.kind, node); 965 if (type) { 966 pushClassification(tokenStart, tokenWidth, type); 967 } 968 } 969 970 return true; 971 } 972 973 function tryClassifyJsxElementName(token: Node): ClassificationType | undefined { 974 switch (token.parent && token.parent.kind) { 975 case SyntaxKind.JsxOpeningElement: 976 if ((token.parent as JsxOpeningElement).tagName === token) { 977 return ClassificationType.jsxOpenTagName; 978 } 979 break; 980 case SyntaxKind.JsxClosingElement: 981 if ((token.parent as JsxClosingElement).tagName === token) { 982 return ClassificationType.jsxCloseTagName; 983 } 984 break; 985 case SyntaxKind.JsxSelfClosingElement: 986 if ((token.parent as JsxSelfClosingElement).tagName === token) { 987 return ClassificationType.jsxSelfClosingTagName; 988 } 989 break; 990 case SyntaxKind.JsxAttribute: 991 if ((token.parent as JsxAttribute).name === token) { 992 return ClassificationType.jsxAttribute; 993 } 994 break; 995 } 996 return undefined; 997 } 998 999 // for accurate classification, the actual token should be passed in. however, for 1000 // cases like 'disabled merge code' classification, we just get the token kind and 1001 // classify based on that instead. 1002 function classifyTokenType(tokenKind: SyntaxKind, token?: Node): ClassificationType | undefined { 1003 if (isKeyword(tokenKind)) { 1004 return ClassificationType.keyword; 1005 } 1006 1007 // Special case `<` and `>`: If they appear in a generic context they are punctuation, 1008 // not operators. 1009 if (tokenKind === SyntaxKind.LessThanToken || tokenKind === SyntaxKind.GreaterThanToken) { 1010 // If the node owning the token has a type argument list or type parameter list, then 1011 // we can effectively assume that a '<' and '>' belong to those lists. 1012 if (token && getTypeArgumentOrTypeParameterList(token.parent)) { 1013 return ClassificationType.punctuation; 1014 } 1015 } 1016 1017 if (isPunctuation(tokenKind)) { 1018 if (token) { 1019 const parent = token.parent; 1020 if (tokenKind === SyntaxKind.EqualsToken) { 1021 // the '=' in a variable declaration is special cased here. 1022 if (parent.kind === SyntaxKind.VariableDeclaration || 1023 parent.kind === SyntaxKind.PropertyDeclaration || 1024 parent.kind === SyntaxKind.Parameter || 1025 parent.kind === SyntaxKind.JsxAttribute) { 1026 return ClassificationType.operator; 1027 } 1028 } 1029 1030 if (parent.kind === SyntaxKind.BinaryExpression || 1031 parent.kind === SyntaxKind.PrefixUnaryExpression || 1032 parent.kind === SyntaxKind.PostfixUnaryExpression || 1033 parent.kind === SyntaxKind.ConditionalExpression) { 1034 return ClassificationType.operator; 1035 } 1036 } 1037 1038 return ClassificationType.punctuation; 1039 } 1040 else if (tokenKind === SyntaxKind.NumericLiteral) { 1041 return ClassificationType.numericLiteral; 1042 } 1043 else if (tokenKind === SyntaxKind.BigIntLiteral) { 1044 return ClassificationType.bigintLiteral; 1045 } 1046 else if (tokenKind === SyntaxKind.StringLiteral) { 1047 return token && token.parent.kind === SyntaxKind.JsxAttribute ? ClassificationType.jsxAttributeStringLiteralValue : ClassificationType.stringLiteral; 1048 } 1049 else if (tokenKind === SyntaxKind.RegularExpressionLiteral) { 1050 // TODO: we should get another classification type for these literals. 1051 return ClassificationType.stringLiteral; 1052 } 1053 else if (isTemplateLiteralKind(tokenKind)) { 1054 // TODO (drosen): we should *also* get another classification type for these literals. 1055 return ClassificationType.stringLiteral; 1056 } 1057 else if (tokenKind === SyntaxKind.JsxText) { 1058 return ClassificationType.jsxText; 1059 } 1060 else if (tokenKind === SyntaxKind.Identifier) { 1061 if (token) { 1062 switch (token.parent.kind) { 1063 case SyntaxKind.ClassDeclaration: 1064 if ((token.parent as ClassDeclaration).name === token) { 1065 return ClassificationType.className; 1066 } 1067 return; 1068 case SyntaxKind.TypeParameter: 1069 if ((token.parent as TypeParameterDeclaration).name === token) { 1070 return ClassificationType.typeParameterName; 1071 } 1072 return; 1073 case SyntaxKind.InterfaceDeclaration: 1074 if ((token.parent as InterfaceDeclaration).name === token) { 1075 return ClassificationType.interfaceName; 1076 } 1077 return; 1078 case SyntaxKind.EnumDeclaration: 1079 if ((token.parent as EnumDeclaration).name === token) { 1080 return ClassificationType.enumName; 1081 } 1082 return; 1083 case SyntaxKind.ModuleDeclaration: 1084 if ((token.parent as ModuleDeclaration).name === token) { 1085 return ClassificationType.moduleName; 1086 } 1087 return; 1088 case SyntaxKind.Parameter: 1089 if ((token.parent as ParameterDeclaration).name === token) { 1090 return isThisIdentifier(token) ? ClassificationType.keyword : ClassificationType.parameterName; 1091 } 1092 return; 1093 } 1094 1095 if (isConstTypeReference(token.parent)) { 1096 return ClassificationType.keyword; 1097 } 1098 } 1099 return ClassificationType.identifier; 1100 } 1101 } 1102 1103 function processElement(element: Node | undefined) { 1104 if (!element) { 1105 return; 1106 } 1107 1108 // Ignore nodes that don't intersect the original span to classify. 1109 if (decodedTextSpanIntersectsWith(spanStart, spanLength, element.pos, element.getFullWidth())) { 1110 checkForClassificationCancellation(cancellationToken, element.kind); 1111 1112 for (const child of element.getChildren(sourceFile)) { 1113 if (!tryClassifyNode(child)) { 1114 // Recurse into our child nodes. 1115 processElement(child); 1116 } 1117 } 1118 } 1119 } 1120} 1121