• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import {
2    __String, arrayToNumericMap, CancellationToken, CharacterCodes, ClassDeclaration, ClassificationInfo,
3    ClassificationResult, Classifications, ClassificationType, ClassificationTypeNames, ClassifiedSpan, Classifier,
4    commentPragmas, couldStartTrivia, createScanner, createTextSpan, Debug, decodedTextSpanIntersectsWith,
5    EndOfLineState, EnumDeclaration, getMeaningFromLocation, getModuleInstanceState, getTypeArgumentOrTypeParameterList,
6    HasJSDoc, InterfaceDeclaration, isAccessibilityModifier, isConstTypeReference, isIdentifier, isJSDoc, isKeyword,
7    isLineBreak, isModuleDeclaration, isPunctuation, isTemplateLiteralKind, isThisIdentifier, isToken, isTrivia, JSDoc,
8    JSDocAugmentsTag, JSDocCallbackTag, JSDocEnumTag, JSDocImplementsTag, JSDocParameterTag, JSDocPropertyTag,
9    JSDocReturnTag, JSDocSeeTag, JSDocTemplateTag, JSDocThisTag, JSDocTypedefTag, JSDocTypeTag, JsxAttribute,
10    JsxClosingElement, JsxOpeningElement, JsxSelfClosingElement, lastOrUndefined, ModuleDeclaration,
11    ModuleInstanceState, Node, nodeIsMissing, ParameterDeclaration, parseIsolatedJSDocComment, Push, ReadonlySet,
12    Scanner, ScriptTarget, SemanticMeaning, setParent, some, SourceFile, Symbol, SymbolFlags, SyntaxKind, TextSpan,
13    textSpanIntersectsWith, TokenClass, TypeChecker, TypeParameterDeclaration,
14} from "./_namespaces/ts";
15
16/** The classifier is used for syntactic highlighting in editors via the TSServer */
17export function createClassifier(): Classifier {
18    const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false);
19
20    function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
21        return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
22    }
23
24    // If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
25    // we will be more conservative in order to avoid conflicting with the syntactic classifier.
26    function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
27        let token = SyntaxKind.Unknown;
28        let lastNonTriviaToken = SyntaxKind.Unknown;
29
30        // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
31        // classification on template strings. Because of the context free nature of templates,
32        // the only precise way to classify a template portion would be by propagating the stack across
33        // lines, just as we do with the end-of-line state. However, this is a burden for implementers,
34        // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
35        // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
36        // Situations in which this fails are
37        //  1) When template strings are nested across different lines:
38        //          `hello ${ `world
39        //          ` }`
40        //
41        //     Where on the second line, you will get the closing of a template,
42        //     a closing curly, and a new template.
43        //
44        //  2) When substitution expressions have curly braces and the curly brace falls on the next line:
45        //          `hello ${ () => {
46        //          return "world" } } `
47        //
48        //     Where on the second line, you will get the 'return' keyword,
49        //     a string literal, and a template end consisting of '} } `'.
50        const templateStack: SyntaxKind[] = [];
51
52        const { prefix, pushTemplate } = getPrefixFromLexState(lexState);
53        text = prefix + text;
54        const offset = prefix.length;
55        if (pushTemplate) {
56            templateStack.push(SyntaxKind.TemplateHead);
57        }
58
59        scanner.setText(text);
60
61        let endOfLineState = EndOfLineState.None;
62        const spans: number[] = [];
63
64        // We can run into an unfortunate interaction between the lexical and syntactic classifier
65        // when the user is typing something generic.  Consider the case where the user types:
66        //
67        //      Foo<number
68        //
69        // From the lexical classifier's perspective, 'number' is a keyword, and so the word will
70        // be classified as such.  However, from the syntactic classifier's tree-based perspective
71        // this is simply an expression with the identifier 'number' on the RHS of the less than
72        // token.  So the classification will go back to being an identifier.  The moment the user
73        // types again, number will become a keyword, then an identifier, etc. etc.
74        //
75        // To try to avoid this problem, we avoid classifying contextual keywords as keywords
76        // when the user is potentially typing something generic.  We just can't do a good enough
77        // job at the lexical level, and so well leave it up to the syntactic classifier to make
78        // the determination.
79        //
80        // In order to determine if the user is potentially typing something generic, we use a
81        // weak heuristic where we track < and > tokens.  It's a weak heuristic, but should
82        // work well enough in practice.
83        let angleBracketStack = 0;
84
85        do {
86            token = scanner.scan();
87            if (!isTrivia(token)) {
88                handleToken();
89                lastNonTriviaToken = token;
90            }
91            const end = scanner.getTextPos();
92            pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans);
93            if (end >= text.length) {
94                const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack));
95                if (end !== undefined) {
96                    endOfLineState = end;
97                }
98            }
99        } while (token !== SyntaxKind.EndOfFileToken);
100
101        function handleToken(): void {
102            switch (token) {
103                case SyntaxKind.SlashToken:
104                case SyntaxKind.SlashEqualsToken:
105                    if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
106                        token = SyntaxKind.RegularExpressionLiteral;
107                    }
108                    break;
109                case SyntaxKind.LessThanToken:
110                    if (lastNonTriviaToken === SyntaxKind.Identifier) {
111                        // Could be the start of something generic.  Keep track of that by bumping
112                        // up the current count of generic contexts we may be in.
113                        angleBracketStack++;
114                    }
115                    break;
116                case SyntaxKind.GreaterThanToken:
117                    if (angleBracketStack > 0) {
118                        // If we think we're currently in something generic, then mark that that
119                        // generic entity is complete.
120                        angleBracketStack--;
121                    }
122                    break;
123                case SyntaxKind.AnyKeyword:
124                case SyntaxKind.StringKeyword:
125                case SyntaxKind.NumberKeyword:
126                case SyntaxKind.BooleanKeyword:
127                case SyntaxKind.SymbolKeyword:
128                    if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
129                        // If it looks like we're could be in something generic, don't classify this
130                        // as a keyword.  We may just get overwritten by the syntactic classifier,
131                        // causing a noisy experience for the user.
132                        token = SyntaxKind.Identifier;
133                    }
134                    break;
135                case SyntaxKind.TemplateHead:
136                    templateStack.push(token);
137                    break;
138                case SyntaxKind.OpenBraceToken:
139                    // If we don't have anything on the template stack,
140                    // then we aren't trying to keep track of a previously scanned template head.
141                    if (templateStack.length > 0) {
142                        templateStack.push(token);
143                    }
144                    break;
145                case SyntaxKind.CloseBraceToken:
146                    // If we don't have anything on the template stack,
147                    // then we aren't trying to keep track of a previously scanned template head.
148                    if (templateStack.length > 0) {
149                        const lastTemplateStackToken = lastOrUndefined(templateStack);
150
151                        if (lastTemplateStackToken === SyntaxKind.TemplateHead) {
152                            token = scanner.reScanTemplateToken(/* isTaggedTemplate */ false);
153
154                            // Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us.
155                            if (token === SyntaxKind.TemplateTail) {
156                                templateStack.pop();
157                            }
158                            else {
159                                Debug.assertEqual(token, SyntaxKind.TemplateMiddle, "Should have been a template middle.");
160                            }
161                        }
162                        else {
163                            Debug.assertEqual(lastTemplateStackToken, SyntaxKind.OpenBraceToken, "Should have been an open brace");
164                            templateStack.pop();
165                        }
166                    }
167                    break;
168                default:
169                    if (!isKeyword(token)) {
170                        break;
171                    }
172
173                    if (lastNonTriviaToken === SyntaxKind.DotToken) {
174                        token = SyntaxKind.Identifier;
175                    }
176                    else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
177                        // We have two keywords in a row.  Only treat the second as a keyword if
178                        // it's a sequence that could legally occur in the language.  Otherwise
179                        // treat it as an identifier.  This way, if someone writes "private var"
180                        // we recognize that 'var' is actually an identifier here.
181                        token = SyntaxKind.Identifier;
182                    }
183            }
184        }
185
186        return { endOfLineState, spans };
187    }
188
189    return { getClassificationsForLine, getEncodedLexicalClassifications };
190}
191
192/// We do not have a full parser support to know when we should parse a regex or not
193/// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
194/// we have a series of divide operator. this list allows us to be more accurate by ruling out
195/// locations where a regexp cannot exist.
196const noRegexTable: true[] = arrayToNumericMap<SyntaxKind, true>([
197    SyntaxKind.Identifier,
198    SyntaxKind.StringLiteral,
199    SyntaxKind.NumericLiteral,
200    SyntaxKind.BigIntLiteral,
201    SyntaxKind.RegularExpressionLiteral,
202    SyntaxKind.ThisKeyword,
203    SyntaxKind.PlusPlusToken,
204    SyntaxKind.MinusMinusToken,
205    SyntaxKind.CloseParenToken,
206    SyntaxKind.CloseBracketToken,
207    SyntaxKind.CloseBraceToken,
208    SyntaxKind.TrueKeyword,
209    SyntaxKind.FalseKeyword,
210], token => token, () => true);
211
212function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined {
213    switch (token) {
214        case SyntaxKind.StringLiteral: {
215            // Check to see if we finished up on a multiline string literal.
216            if (!scanner.isUnterminated()) return undefined;
217
218            const tokenText = scanner.getTokenText();
219            const lastCharIndex = tokenText.length - 1;
220            let numBackslashes = 0;
221            while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
222                numBackslashes++;
223            }
224
225            // If we have an odd number of backslashes, then the multiline string is unclosed
226            if ((numBackslashes & 1) === 0) return undefined;
227            return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral;
228        }
229        case SyntaxKind.MultiLineCommentTrivia:
230            // Check to see if the multiline comment was unclosed.
231            return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined;
232        default:
233            if (isTemplateLiteralKind(token)) {
234                if (!scanner.isUnterminated()) {
235                    return undefined;
236                }
237                switch (token) {
238                    case SyntaxKind.TemplateTail:
239                        return EndOfLineState.InTemplateMiddleOrTail;
240                    case SyntaxKind.NoSubstitutionTemplateLiteral:
241                        return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
242                    default:
243                        return Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
244                }
245            }
246            return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined;
247    }
248}
249
250function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void {
251    if (classification === ClassificationType.whiteSpace) {
252        // Don't bother with whitespace classifications.  They're not needed.
253        return;
254    }
255
256    if (start === 0 && offset > 0) {
257        // We're classifying the first token, and this was a case where we prepended text.
258        // We should consider the start of this token to be at the start of the original text.
259        start += offset;
260    }
261
262    const length = end - start;
263    if (length > 0) {
264        // All our tokens are in relation to the augmented text.  Move them back to be
265        // relative to the original text.
266        result.push(start - offset, length, classification);
267    }
268}
269
270function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult {
271    const entries: ClassificationInfo[] = [];
272    const dense = classifications.spans;
273    let lastEnd = 0;
274
275    for (let i = 0; i < dense.length; i += 3) {
276        const start = dense[i];
277        const length = dense[i + 1];
278        const type = dense[i + 2] as ClassificationType;
279
280        // Make a whitespace entry between the last item and this one.
281        if (lastEnd >= 0) {
282            const whitespaceLength = start - lastEnd;
283            if (whitespaceLength > 0) {
284                entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
285            }
286        }
287
288        entries.push({ length, classification: convertClassification(type) });
289        lastEnd = start + length;
290    }
291
292    const whitespaceLength = text.length - lastEnd;
293    if (whitespaceLength > 0) {
294        entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
295    }
296
297    return { entries, finalLexState: classifications.endOfLineState };
298}
299
300function convertClassification(type: ClassificationType): TokenClass {
301    switch (type) {
302        case ClassificationType.comment: return TokenClass.Comment;
303        case ClassificationType.keyword: return TokenClass.Keyword;
304        case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
305        case ClassificationType.bigintLiteral: return TokenClass.BigIntLiteral;
306        case ClassificationType.operator: return TokenClass.Operator;
307        case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
308        case ClassificationType.whiteSpace: return TokenClass.Whitespace;
309        case ClassificationType.punctuation: return TokenClass.Punctuation;
310        case ClassificationType.identifier:
311        case ClassificationType.className:
312        case ClassificationType.enumName:
313        case ClassificationType.interfaceName:
314        case ClassificationType.moduleName:
315        case ClassificationType.typeParameterName:
316        case ClassificationType.typeAliasName:
317        case ClassificationType.text:
318        case ClassificationType.parameterName:
319            return TokenClass.Identifier;
320        default:
321            return undefined!; // TODO: GH#18217 Debug.assertNever(type);
322    }
323}
324
325/** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
326function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean {
327    if (!isAccessibilityModifier(keyword1)) {
328        // Assume any other keyword combination is legal.
329        // This can be refined in the future if there are more cases we want the classifier to be better at.
330        return true;
331    }
332    switch (keyword2) {
333        case SyntaxKind.GetKeyword:
334        case SyntaxKind.SetKeyword:
335        case SyntaxKind.ConstructorKeyword:
336        case SyntaxKind.StaticKeyword:
337        case SyntaxKind.AccessorKeyword:
338            return true; // Allow things like "public get", "public constructor" and "public static".
339        default:
340            return false; // Any other keyword following "public" is actually an identifier, not a real keyword.
341    }
342}
343
344function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } {
345    // If we're in a string literal, then prepend: "\
346    // (and a newline).  That way when we lex we'll think we're still in a string literal.
347    //
348    // If we're in a multiline comment, then prepend: /*
349    // (and a newline).  That way when we lex we'll think we're still in a multiline comment.
350    switch (lexState) {
351        case EndOfLineState.InDoubleQuoteStringLiteral:
352            return { prefix: "\"\\\n" };
353        case EndOfLineState.InSingleQuoteStringLiteral:
354            return { prefix: "'\\\n" };
355        case EndOfLineState.InMultiLineCommentTrivia:
356            return { prefix: "/*\n" };
357        case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
358            return { prefix: "`\n" };
359        case EndOfLineState.InTemplateMiddleOrTail:
360            return { prefix: "}\n", pushTemplate: true };
361        case EndOfLineState.InTemplateSubstitutionPosition:
362            return { prefix: "", pushTemplate: true };
363        case EndOfLineState.None:
364            return { prefix: "" };
365        default:
366            return Debug.assertNever(lexState);
367    }
368}
369
370function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
371    switch (token) {
372        case SyntaxKind.AsteriskToken:
373        case SyntaxKind.SlashToken:
374        case SyntaxKind.PercentToken:
375        case SyntaxKind.PlusToken:
376        case SyntaxKind.MinusToken:
377        case SyntaxKind.LessThanLessThanToken:
378        case SyntaxKind.GreaterThanGreaterThanToken:
379        case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
380        case SyntaxKind.LessThanToken:
381        case SyntaxKind.GreaterThanToken:
382        case SyntaxKind.LessThanEqualsToken:
383        case SyntaxKind.GreaterThanEqualsToken:
384        case SyntaxKind.InstanceOfKeyword:
385        case SyntaxKind.InKeyword:
386        case SyntaxKind.AsKeyword:
387        case SyntaxKind.SatisfiesKeyword:
388        case SyntaxKind.EqualsEqualsToken:
389        case SyntaxKind.ExclamationEqualsToken:
390        case SyntaxKind.EqualsEqualsEqualsToken:
391        case SyntaxKind.ExclamationEqualsEqualsToken:
392        case SyntaxKind.AmpersandToken:
393        case SyntaxKind.CaretToken:
394        case SyntaxKind.BarToken:
395        case SyntaxKind.AmpersandAmpersandToken:
396        case SyntaxKind.BarBarToken:
397        case SyntaxKind.BarEqualsToken:
398        case SyntaxKind.AmpersandEqualsToken:
399        case SyntaxKind.CaretEqualsToken:
400        case SyntaxKind.LessThanLessThanEqualsToken:
401        case SyntaxKind.GreaterThanGreaterThanEqualsToken:
402        case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
403        case SyntaxKind.PlusEqualsToken:
404        case SyntaxKind.MinusEqualsToken:
405        case SyntaxKind.AsteriskEqualsToken:
406        case SyntaxKind.SlashEqualsToken:
407        case SyntaxKind.PercentEqualsToken:
408        case SyntaxKind.EqualsToken:
409        case SyntaxKind.CommaToken:
410        case SyntaxKind.QuestionQuestionToken:
411        case SyntaxKind.BarBarEqualsToken:
412        case SyntaxKind.AmpersandAmpersandEqualsToken:
413        case SyntaxKind.QuestionQuestionEqualsToken:
414            return true;
415        default:
416            return false;
417    }
418}
419
420function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
421    switch (token) {
422        case SyntaxKind.PlusToken:
423        case SyntaxKind.MinusToken:
424        case SyntaxKind.TildeToken:
425        case SyntaxKind.ExclamationToken:
426        case SyntaxKind.PlusPlusToken:
427        case SyntaxKind.MinusMinusToken:
428            return true;
429        default:
430            return false;
431    }
432}
433
434function classFromKind(token: SyntaxKind): ClassificationType {
435    if (isKeyword(token)) {
436        return ClassificationType.keyword;
437    }
438    else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
439        return ClassificationType.operator;
440    }
441    else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
442        return ClassificationType.punctuation;
443    }
444
445    switch (token) {
446        case SyntaxKind.NumericLiteral:
447            return ClassificationType.numericLiteral;
448        case SyntaxKind.BigIntLiteral:
449            return ClassificationType.bigintLiteral;
450        case SyntaxKind.StringLiteral:
451            return ClassificationType.stringLiteral;
452        case SyntaxKind.RegularExpressionLiteral:
453            return ClassificationType.regularExpressionLiteral;
454        case SyntaxKind.ConflictMarkerTrivia:
455        case SyntaxKind.MultiLineCommentTrivia:
456        case SyntaxKind.SingleLineCommentTrivia:
457            return ClassificationType.comment;
458        case SyntaxKind.WhitespaceTrivia:
459        case SyntaxKind.NewLineTrivia:
460            return ClassificationType.whiteSpace;
461        case SyntaxKind.Identifier:
462        default:
463            if (isTemplateLiteralKind(token)) {
464                return ClassificationType.stringLiteral;
465            }
466            return ClassificationType.identifier;
467    }
468}
469
470/** @internal */
471export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): ClassifiedSpan[] {
472    return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
473}
474
475function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) {
476    // We don't want to actually call back into our host on every node to find out if we've
477    // been canceled.  That would be an enormous amount of chattyness, along with the all
478    // the overhead of marshalling the data to/from the host.  So instead we pick a few
479    // reasonable node kinds to bother checking on.  These node kinds represent high level
480    // constructs that we would expect to see commonly, but just at a far less frequent
481    // interval.
482    //
483    // For example, in checker.ts (around 750k) we only have around 600 of these constructs.
484    // That means we're calling back into the host around every 1.2k of the file we process.
485    // Lib.d.ts has similar numbers.
486    switch (kind) {
487        case SyntaxKind.ModuleDeclaration:
488        case SyntaxKind.ClassDeclaration:
489        case SyntaxKind.InterfaceDeclaration:
490        case SyntaxKind.FunctionDeclaration:
491        case SyntaxKind.ClassExpression:
492        case SyntaxKind.FunctionExpression:
493        case SyntaxKind.ArrowFunction:
494            cancellationToken.throwIfCancellationRequested();
495    }
496}
497
498/** @internal */
499export function getEncodedSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): Classifications {
500    const spans: number[] = [];
501    sourceFile.forEachChild(function cb(node: Node): void {
502        // Only walk into nodes that intersect the requested span.
503        if (!node || !textSpanIntersectsWith(span, node.pos, node.getFullWidth())) {
504            return;
505        }
506
507        checkForClassificationCancellation(cancellationToken, node.kind);
508        // Only bother calling into the typechecker if this is an identifier that
509        // could possibly resolve to a type name.  This makes classification run
510        // in a third of the time it would normally take.
511        if (isIdentifier(node) && !nodeIsMissing(node) && classifiableNames.has(node.escapedText)) {
512            const symbol = typeChecker.getSymbolAtLocation(node);
513            const type = symbol && classifySymbol(symbol, getMeaningFromLocation(node), typeChecker);
514            if (type) {
515                pushClassification(node.getStart(sourceFile), node.getEnd(), type);
516            }
517        }
518
519        node.forEachChild(cb);
520    });
521    return { spans, endOfLineState: EndOfLineState.None };
522
523    function pushClassification(start: number, end: number, type: ClassificationType): void {
524        const length = end - start;
525        Debug.assert(length > 0, `Classification had non-positive length of ${length}`);
526        spans.push(start);
527        spans.push(length);
528        spans.push(type);
529    }
530}
531
532function classifySymbol(symbol: Symbol, meaningAtPosition: SemanticMeaning, checker: TypeChecker): ClassificationType | undefined {
533    const flags = symbol.getFlags();
534    if ((flags & SymbolFlags.Classifiable) === SymbolFlags.None) {
535        return undefined;
536    }
537    else if (flags & SymbolFlags.Class) {
538        return ClassificationType.className;
539    }
540    else if (flags & SymbolFlags.Enum) {
541        return ClassificationType.enumName;
542    }
543    else if (flags & SymbolFlags.TypeAlias) {
544        return ClassificationType.typeAliasName;
545    }
546    else if (flags & SymbolFlags.Module) {
547        // Only classify a module as such if
548        //  - It appears in a namespace context.
549        //  - There exists a module declaration which actually impacts the value side.
550        return meaningAtPosition & SemanticMeaning.Namespace || meaningAtPosition & SemanticMeaning.Value && hasValueSideModule(symbol) ? ClassificationType.moduleName : undefined;
551    }
552    else if (flags & SymbolFlags.Alias) {
553        return classifySymbol(checker.getAliasedSymbol(symbol), meaningAtPosition, checker);
554    }
555    else if (meaningAtPosition & SemanticMeaning.Type) {
556        return flags & SymbolFlags.Interface ? ClassificationType.interfaceName : flags & SymbolFlags.TypeParameter ? ClassificationType.typeParameterName : undefined;
557    }
558    else {
559        return undefined;
560    }
561}
562
563/** Returns true if there exists a module that introduces entities on the value side. */
564function hasValueSideModule(symbol: Symbol): boolean {
565    return some(symbol.declarations, declaration =>
566        isModuleDeclaration(declaration) && getModuleInstanceState(declaration) === ModuleInstanceState.Instantiated);
567}
568
569function getClassificationTypeName(type: ClassificationType): ClassificationTypeNames {
570    switch (type) {
571        case ClassificationType.comment: return ClassificationTypeNames.comment;
572        case ClassificationType.identifier: return ClassificationTypeNames.identifier;
573        case ClassificationType.keyword: return ClassificationTypeNames.keyword;
574        case ClassificationType.numericLiteral: return ClassificationTypeNames.numericLiteral;
575        case ClassificationType.bigintLiteral: return ClassificationTypeNames.bigintLiteral;
576        case ClassificationType.operator: return ClassificationTypeNames.operator;
577        case ClassificationType.stringLiteral: return ClassificationTypeNames.stringLiteral;
578        case ClassificationType.whiteSpace: return ClassificationTypeNames.whiteSpace;
579        case ClassificationType.text: return ClassificationTypeNames.text;
580        case ClassificationType.punctuation: return ClassificationTypeNames.punctuation;
581        case ClassificationType.className: return ClassificationTypeNames.className;
582        case ClassificationType.enumName: return ClassificationTypeNames.enumName;
583        case ClassificationType.interfaceName: return ClassificationTypeNames.interfaceName;
584        case ClassificationType.moduleName: return ClassificationTypeNames.moduleName;
585        case ClassificationType.typeParameterName: return ClassificationTypeNames.typeParameterName;
586        case ClassificationType.typeAliasName: return ClassificationTypeNames.typeAliasName;
587        case ClassificationType.parameterName: return ClassificationTypeNames.parameterName;
588        case ClassificationType.docCommentTagName: return ClassificationTypeNames.docCommentTagName;
589        case ClassificationType.jsxOpenTagName: return ClassificationTypeNames.jsxOpenTagName;
590        case ClassificationType.jsxCloseTagName: return ClassificationTypeNames.jsxCloseTagName;
591        case ClassificationType.jsxSelfClosingTagName: return ClassificationTypeNames.jsxSelfClosingTagName;
592        case ClassificationType.jsxAttribute: return ClassificationTypeNames.jsxAttribute;
593        case ClassificationType.jsxText: return ClassificationTypeNames.jsxText;
594        case ClassificationType.jsxAttributeStringLiteralValue: return ClassificationTypeNames.jsxAttributeStringLiteralValue;
595        default: return undefined!; // TODO: GH#18217 throw Debug.assertNever(type);
596    }
597}
598
599function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] {
600    Debug.assert(classifications.spans.length % 3 === 0);
601    const dense = classifications.spans;
602    const result: ClassifiedSpan[] = [];
603    for (let i = 0; i < dense.length; i += 3) {
604        result.push({
605            textSpan: createTextSpan(dense[i], dense[i + 1]),
606            classificationType: getClassificationTypeName(dense[i + 2])
607        });
608    }
609
610    return result;
611}
612
613/** @internal */
614export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] {
615    return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
616}
617
618/** @internal */
619export function getEncodedSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): Classifications {
620    const spanStart = span.start;
621    const spanLength = span.length;
622
623    // Make a scanner we can get trivia from.
624    const triviaScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
625    const mergeConflictScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
626
627    const result: number[] = [];
628    processElement(sourceFile);
629
630    return { spans: result, endOfLineState: EndOfLineState.None };
631
632    function pushClassification(start: number, length: number, type: ClassificationType) {
633        result.push(start);
634        result.push(length);
635        result.push(type);
636    }
637
638    function classifyLeadingTriviaAndGetTokenStart(token: Node): number {
639        triviaScanner.setTextPos(token.pos);
640        while (true) {
641            const start = triviaScanner.getTextPos();
642            // only bother scanning if we have something that could be trivia.
643            if (!couldStartTrivia(sourceFile.text, start)) {
644                return start;
645            }
646
647            const kind = triviaScanner.scan();
648            const end = triviaScanner.getTextPos();
649            const width = end - start;
650
651            // The moment we get something that isn't trivia, then stop processing.
652            if (!isTrivia(kind)) {
653                return start;
654            }
655
656            switch (kind) {
657                case SyntaxKind.NewLineTrivia:
658                case SyntaxKind.WhitespaceTrivia:
659                    // Don't bother with newlines/whitespace.
660                    continue;
661
662                case SyntaxKind.SingleLineCommentTrivia:
663                case SyntaxKind.MultiLineCommentTrivia:
664                    // Only bother with the trivia if it at least intersects the span of interest.
665                    classifyComment(token, kind, start, width);
666
667                    // Classifying a comment might cause us to reuse the trivia scanner
668                    // (because of jsdoc comments).  So after we classify the comment make
669                    // sure we set the scanner position back to where it needs to be.
670                    triviaScanner.setTextPos(end);
671                    continue;
672
673                case SyntaxKind.ConflictMarkerTrivia:
674                    const text = sourceFile.text;
675                    const ch = text.charCodeAt(start);
676
677                    // for the <<<<<<< and >>>>>>> markers, we just add them in as comments
678                    // in the classification stream.
679                    if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) {
680                        pushClassification(start, width, ClassificationType.comment);
681                        continue;
682                    }
683
684                    // for the ||||||| and ======== markers, add a comment for the first line,
685                    // and then lex all subsequent lines up until the end of the conflict marker.
686                    Debug.assert(ch === CharacterCodes.bar || ch === CharacterCodes.equals);
687                    classifyDisabledMergeCode(text, start, end);
688                    break;
689
690                case SyntaxKind.ShebangTrivia:
691                    // TODO: Maybe we should classify these.
692                    break;
693
694                default:
695                    Debug.assertNever(kind);
696            }
697        }
698    }
699
700    function classifyComment(token: Node, kind: SyntaxKind, start: number, width: number) {
701        if (kind === SyntaxKind.MultiLineCommentTrivia) {
702            // See if this is a doc comment.  If so, we'll classify certain portions of it
703            // specially.
704            const docCommentAndDiagnostics = parseIsolatedJSDocComment(sourceFile.text, start, width);
705            if (docCommentAndDiagnostics && docCommentAndDiagnostics.jsDoc) {
706                // TODO: This should be predicated on `token["kind"]` being compatible with `HasJSDoc["kind"]`
707                setParent(docCommentAndDiagnostics.jsDoc, token as HasJSDoc);
708                classifyJSDocComment(docCommentAndDiagnostics.jsDoc);
709                return;
710            }
711        }
712        else if (kind === SyntaxKind.SingleLineCommentTrivia) {
713            if (tryClassifyTripleSlashComment(start, width)) {
714                return;
715            }
716        }
717
718        // Simple comment.  Just add as is.
719        pushCommentRange(start, width);
720    }
721
722    function pushCommentRange(start: number, width: number) {
723        pushClassification(start, width, ClassificationType.comment);
724    }
725
726    function classifyJSDocComment(docComment: JSDoc) {
727        let pos = docComment.pos;
728
729        if (docComment.tags) {
730            for (const tag of docComment.tags) {
731                // As we walk through each tag, classify the portion of text from the end of
732                // the last tag (or the start of the entire doc comment) as 'comment'.
733                if (tag.pos !== pos) {
734                    pushCommentRange(pos, tag.pos - pos);
735                }
736
737                pushClassification(tag.pos, 1, ClassificationType.punctuation); // "@"
738                pushClassification(tag.tagName.pos, tag.tagName.end - tag.tagName.pos, ClassificationType.docCommentTagName); // e.g. "param"
739
740                pos = tag.tagName.end;
741                let commentStart = tag.tagName.end;
742
743                switch (tag.kind) {
744                    case SyntaxKind.JSDocParameterTag:
745                        const param = tag as JSDocParameterTag;
746                        processJSDocParameterTag(param);
747                        commentStart = param.isNameFirst && param.typeExpression?.end || param.name.end;
748                        break;
749                    case SyntaxKind.JSDocPropertyTag:
750                        const prop = tag as JSDocPropertyTag;
751                        commentStart = prop.isNameFirst && prop.typeExpression?.end || prop.name.end;
752                        break;
753                    case SyntaxKind.JSDocTemplateTag:
754                        processJSDocTemplateTag(tag as JSDocTemplateTag);
755                        pos = tag.end;
756                        commentStart = (tag as JSDocTemplateTag).typeParameters.end;
757                        break;
758                    case SyntaxKind.JSDocTypedefTag:
759                        const type = tag as JSDocTypedefTag;
760                        commentStart = type.typeExpression?.kind === SyntaxKind.JSDocTypeExpression && type.fullName?.end || type.typeExpression?.end || commentStart;
761                        break;
762                    case SyntaxKind.JSDocCallbackTag:
763                        commentStart = (tag as JSDocCallbackTag).typeExpression.end;
764                        break;
765                    case SyntaxKind.JSDocTypeTag:
766                        processElement((tag as JSDocTypeTag).typeExpression);
767                        pos = tag.end;
768                        commentStart = (tag as JSDocTypeTag).typeExpression.end;
769                        break;
770                    case SyntaxKind.JSDocThisTag:
771                    case SyntaxKind.JSDocEnumTag:
772                        commentStart = (tag as JSDocThisTag | JSDocEnumTag).typeExpression.end;
773                        break;
774                    case SyntaxKind.JSDocReturnTag:
775                        processElement((tag as JSDocReturnTag).typeExpression);
776                        pos = tag.end;
777                        commentStart = (tag as JSDocReturnTag).typeExpression?.end || commentStart;
778                        break;
779                    case SyntaxKind.JSDocSeeTag:
780                        commentStart = (tag as JSDocSeeTag).name?.end || commentStart;
781                        break;
782                    case SyntaxKind.JSDocAugmentsTag:
783                    case SyntaxKind.JSDocImplementsTag:
784                        commentStart = (tag as JSDocImplementsTag | JSDocAugmentsTag).class.end;
785                        break;
786                }
787                if (typeof tag.comment === "object") {
788                    pushCommentRange(tag.comment.pos, tag.comment.end - tag.comment.pos);
789                }
790                else if (typeof tag.comment === "string") {
791                    pushCommentRange(commentStart, tag.end - commentStart);
792                }
793            }
794        }
795
796        if (pos !== docComment.end) {
797            pushCommentRange(pos, docComment.end - pos);
798        }
799
800        return;
801
802        function processJSDocParameterTag(tag: JSDocParameterTag) {
803            if (tag.isNameFirst) {
804                pushCommentRange(pos, tag.name.pos - pos);
805                pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
806                pos = tag.name.end;
807            }
808
809            if (tag.typeExpression) {
810                pushCommentRange(pos, tag.typeExpression.pos - pos);
811                processElement(tag.typeExpression);
812                pos = tag.typeExpression.end;
813            }
814
815            if (!tag.isNameFirst) {
816                pushCommentRange(pos, tag.name.pos - pos);
817                pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
818                pos = tag.name.end;
819            }
820        }
821    }
822
823    function tryClassifyTripleSlashComment(start: number, width: number): boolean {
824        const tripleSlashXMLCommentRegEx = /^(\/\/\/\s*)(<)(?:(\S+)((?:[^/]|\/[^>])*)(\/>)?)?/im;
825        // Require a leading whitespace character (the parser already does) to prevent terrible backtracking performance
826        const attributeRegex = /(\s)(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img;
827
828        const text = sourceFile.text.substr(start, width);
829        const match = tripleSlashXMLCommentRegEx.exec(text);
830        if (!match) {
831            return false;
832        }
833
834        // Limiting classification to exactly the elements and attributes
835        // defined in `ts.commentPragmas` would be excessive, but we can avoid
836        // some obvious false positives (e.g. in XML-like doc comments) by
837        // checking the element name.
838        // eslint-disable-next-line local/no-in-operator
839        if (!match[3] || !(match[3] in commentPragmas)) {
840            return false;
841        }
842
843        let pos = start;
844
845        pushCommentRange(pos, match[1].length); // ///
846        pos += match[1].length;
847
848        pushClassification(pos, match[2].length, ClassificationType.punctuation); // <
849        pos += match[2].length;
850
851        pushClassification(pos, match[3].length, ClassificationType.jsxSelfClosingTagName); // element name
852        pos += match[3].length;
853
854        const attrText = match[4];
855        let attrPos = pos;
856        while (true) {
857            const attrMatch = attributeRegex.exec(attrText);
858            if (!attrMatch) {
859                break;
860            }
861
862            const newAttrPos = pos + attrMatch.index + attrMatch[1].length; // whitespace
863            if (newAttrPos > attrPos) {
864                pushCommentRange(attrPos, newAttrPos - attrPos);
865                attrPos = newAttrPos;
866            }
867
868            pushClassification(attrPos, attrMatch[2].length, ClassificationType.jsxAttribute); // attribute name
869            attrPos += attrMatch[2].length;
870
871            if (attrMatch[3].length) {
872                pushCommentRange(attrPos, attrMatch[3].length); // whitespace
873                attrPos += attrMatch[3].length;
874            }
875
876            pushClassification(attrPos, attrMatch[4].length, ClassificationType.operator); // =
877            attrPos += attrMatch[4].length;
878
879            if (attrMatch[5].length) {
880                pushCommentRange(attrPos, attrMatch[5].length); // whitespace
881                attrPos += attrMatch[5].length;
882            }
883
884            pushClassification(attrPos, attrMatch[6].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value
885            attrPos += attrMatch[6].length;
886        }
887
888        pos += match[4].length;
889
890        if (pos > attrPos) {
891            pushCommentRange(attrPos, pos - attrPos);
892        }
893
894        if (match[5]) {
895            pushClassification(pos, match[5].length, ClassificationType.punctuation); // />
896            pos += match[5].length;
897        }
898
899        const end = start + width;
900        if (pos < end) {
901            pushCommentRange(pos, end - pos);
902        }
903
904        return true;
905    }
906
907    function processJSDocTemplateTag(tag: JSDocTemplateTag) {
908        for (const child of tag.getChildren()) {
909            processElement(child);
910        }
911    }
912
913    function classifyDisabledMergeCode(text: string, start: number, end: number) {
914        // Classify the line that the ||||||| or ======= marker is on as a comment.
915        // Then just lex all further tokens and add them to the result.
916        let i: number;
917        for (i = start; i < end; i++) {
918            if (isLineBreak(text.charCodeAt(i))) {
919                break;
920            }
921        }
922        pushClassification(start, i - start, ClassificationType.comment);
923        mergeConflictScanner.setTextPos(i);
924
925        while (mergeConflictScanner.getTextPos() < end) {
926            classifyDisabledCodeToken();
927        }
928    }
929
930    function classifyDisabledCodeToken() {
931        const start = mergeConflictScanner.getTextPos();
932        const tokenKind = mergeConflictScanner.scan();
933        const end = mergeConflictScanner.getTextPos();
934
935        const type = classifyTokenType(tokenKind);
936        if (type) {
937            pushClassification(start, end - start, type);
938        }
939    }
940
941    /**
942     * Returns true if node should be treated as classified and no further processing is required.
943     * False will mean that node is not classified and traverse routine should recurse into node contents.
944     */
945    function tryClassifyNode(node: Node): boolean {
946        if (isJSDoc(node)) {
947            return true;
948        }
949
950        if (nodeIsMissing(node)) {
951            return true;
952        }
953
954        const classifiedElementName = tryClassifyJsxElementName(node);
955        if (!isToken(node) && node.kind !== SyntaxKind.JsxText && classifiedElementName === undefined) {
956            return false;
957        }
958
959        const tokenStart = node.kind === SyntaxKind.JsxText ? node.pos : classifyLeadingTriviaAndGetTokenStart(node);
960
961        const tokenWidth = node.end - tokenStart;
962        Debug.assert(tokenWidth >= 0);
963        if (tokenWidth > 0) {
964            const type = classifiedElementName || classifyTokenType(node.kind, node);
965            if (type) {
966                pushClassification(tokenStart, tokenWidth, type);
967            }
968        }
969
970        return true;
971    }
972
973    function tryClassifyJsxElementName(token: Node): ClassificationType | undefined {
974        switch (token.parent && token.parent.kind) {
975            case SyntaxKind.JsxOpeningElement:
976                if ((token.parent as JsxOpeningElement).tagName === token) {
977                    return ClassificationType.jsxOpenTagName;
978                }
979                break;
980            case SyntaxKind.JsxClosingElement:
981                if ((token.parent as JsxClosingElement).tagName === token) {
982                    return ClassificationType.jsxCloseTagName;
983                }
984                break;
985            case SyntaxKind.JsxSelfClosingElement:
986                if ((token.parent as JsxSelfClosingElement).tagName === token) {
987                    return ClassificationType.jsxSelfClosingTagName;
988                }
989                break;
990            case SyntaxKind.JsxAttribute:
991                if ((token.parent as JsxAttribute).name === token) {
992                    return ClassificationType.jsxAttribute;
993                }
994                break;
995        }
996        return undefined;
997    }
998
999    // for accurate classification, the actual token should be passed in.  however, for
1000    // cases like 'disabled merge code' classification, we just get the token kind and
1001    // classify based on that instead.
1002    function classifyTokenType(tokenKind: SyntaxKind, token?: Node): ClassificationType | undefined {
1003        if (isKeyword(tokenKind)) {
1004            return ClassificationType.keyword;
1005        }
1006
1007        // Special case `<` and `>`: If they appear in a generic context they are punctuation,
1008        // not operators.
1009        if (tokenKind === SyntaxKind.LessThanToken || tokenKind === SyntaxKind.GreaterThanToken) {
1010            // If the node owning the token has a type argument list or type parameter list, then
1011            // we can effectively assume that a '<' and '>' belong to those lists.
1012            if (token && getTypeArgumentOrTypeParameterList(token.parent)) {
1013                return ClassificationType.punctuation;
1014            }
1015        }
1016
1017        if (isPunctuation(tokenKind)) {
1018            if (token) {
1019                const parent = token.parent;
1020                if (tokenKind === SyntaxKind.EqualsToken) {
1021                    // the '=' in a variable declaration is special cased here.
1022                    if (parent.kind === SyntaxKind.VariableDeclaration ||
1023                        parent.kind === SyntaxKind.PropertyDeclaration ||
1024                        parent.kind === SyntaxKind.Parameter ||
1025                        parent.kind === SyntaxKind.JsxAttribute) {
1026                        return ClassificationType.operator;
1027                    }
1028                }
1029
1030                if (parent.kind === SyntaxKind.BinaryExpression ||
1031                    parent.kind === SyntaxKind.PrefixUnaryExpression ||
1032                    parent.kind === SyntaxKind.PostfixUnaryExpression ||
1033                    parent.kind === SyntaxKind.ConditionalExpression) {
1034                    return ClassificationType.operator;
1035                }
1036            }
1037
1038            return ClassificationType.punctuation;
1039        }
1040        else if (tokenKind === SyntaxKind.NumericLiteral) {
1041            return ClassificationType.numericLiteral;
1042        }
1043        else if (tokenKind === SyntaxKind.BigIntLiteral) {
1044            return ClassificationType.bigintLiteral;
1045        }
1046        else if (tokenKind === SyntaxKind.StringLiteral) {
1047            return token && token.parent.kind === SyntaxKind.JsxAttribute ? ClassificationType.jsxAttributeStringLiteralValue : ClassificationType.stringLiteral;
1048        }
1049        else if (tokenKind === SyntaxKind.RegularExpressionLiteral) {
1050            // TODO: we should get another classification type for these literals.
1051            return ClassificationType.stringLiteral;
1052        }
1053        else if (isTemplateLiteralKind(tokenKind)) {
1054            // TODO (drosen): we should *also* get another classification type for these literals.
1055            return ClassificationType.stringLiteral;
1056        }
1057        else if (tokenKind === SyntaxKind.JsxText) {
1058            return ClassificationType.jsxText;
1059        }
1060        else if (tokenKind === SyntaxKind.Identifier) {
1061            if (token) {
1062                switch (token.parent.kind) {
1063                    case SyntaxKind.ClassDeclaration:
1064                        if ((token.parent as ClassDeclaration).name === token) {
1065                            return ClassificationType.className;
1066                        }
1067                        return;
1068                    case SyntaxKind.TypeParameter:
1069                        if ((token.parent as TypeParameterDeclaration).name === token) {
1070                            return ClassificationType.typeParameterName;
1071                        }
1072                        return;
1073                    case SyntaxKind.InterfaceDeclaration:
1074                        if ((token.parent as InterfaceDeclaration).name === token) {
1075                            return ClassificationType.interfaceName;
1076                        }
1077                        return;
1078                    case SyntaxKind.EnumDeclaration:
1079                        if ((token.parent as EnumDeclaration).name === token) {
1080                            return ClassificationType.enumName;
1081                        }
1082                        return;
1083                    case SyntaxKind.ModuleDeclaration:
1084                        if ((token.parent as ModuleDeclaration).name === token) {
1085                            return ClassificationType.moduleName;
1086                        }
1087                        return;
1088                    case SyntaxKind.Parameter:
1089                        if ((token.parent as ParameterDeclaration).name === token) {
1090                            return isThisIdentifier(token) ? ClassificationType.keyword : ClassificationType.parameterName;
1091                        }
1092                        return;
1093                }
1094
1095                if (isConstTypeReference(token.parent)) {
1096                    return ClassificationType.keyword;
1097                }
1098            }
1099            return ClassificationType.identifier;
1100        }
1101    }
1102
1103    function processElement(element: Node | undefined) {
1104        if (!element) {
1105            return;
1106        }
1107
1108        // Ignore nodes that don't intersect the original span to classify.
1109        if (decodedTextSpanIntersectsWith(spanStart, spanLength, element.pos, element.getFullWidth())) {
1110            checkForClassificationCancellation(cancellationToken, element.kind);
1111
1112            for (const child of element.getChildren(sourceFile)) {
1113                if (!tryClassifyNode(child)) {
1114                    // Recurse into our child nodes.
1115                    processElement(child);
1116                }
1117            }
1118        }
1119    }
1120}
1121