• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1namespace ts {
2    /** The classifier is used for syntactic highlighting in editors via the TSServer */
3    export function createClassifier(): Classifier {
4        const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false);
5
6        function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
7            return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
8        }
9
10        // If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
11        // we will be more conservative in order to avoid conflicting with the syntactic classifier.
12        function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
13            let token = SyntaxKind.Unknown;
14            let lastNonTriviaToken = SyntaxKind.Unknown;
15
16            // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
17            // classification on template strings. Because of the context free nature of templates,
18            // the only precise way to classify a template portion would be by propagating the stack across
19            // lines, just as we do with the end-of-line state. However, this is a burden for implementers,
20            // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
21            // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
22            // Situations in which this fails are
23            //  1) When template strings are nested across different lines:
24            //          `hello ${ `world
25            //          ` }`
26            //
27            //     Where on the second line, you will get the closing of a template,
28            //     a closing curly, and a new template.
29            //
30            //  2) When substitution expressions have curly braces and the curly brace falls on the next line:
31            //          `hello ${ () => {
32            //          return "world" } } `
33            //
34            //     Where on the second line, you will get the 'return' keyword,
35            //     a string literal, and a template end consisting of '} } `'.
36            const templateStack: SyntaxKind[] = [];
37
38            const { prefix, pushTemplate } = getPrefixFromLexState(lexState);
39            text = prefix + text;
40            const offset = prefix.length;
41            if (pushTemplate) {
42                templateStack.push(SyntaxKind.TemplateHead);
43            }
44
45            scanner.setText(text);
46
47            let endOfLineState = EndOfLineState.None;
48            const spans: number[] = [];
49
50            // We can run into an unfortunate interaction between the lexical and syntactic classifier
51            // when the user is typing something generic.  Consider the case where the user types:
52            //
53            //      Foo<number
54            //
55            // From the lexical classifier's perspective, 'number' is a keyword, and so the word will
56            // be classified as such.  However, from the syntactic classifier's tree-based perspective
57            // this is simply an expression with the identifier 'number' on the RHS of the less than
58            // token.  So the classification will go back to being an identifier.  The moment the user
59            // types again, number will become a keyword, then an identifier, etc. etc.
60            //
61            // To try to avoid this problem, we avoid classifying contextual keywords as keywords
62            // when the user is potentially typing something generic.  We just can't do a good enough
63            // job at the lexical level, and so well leave it up to the syntactic classifier to make
64            // the determination.
65            //
66            // In order to determine if the user is potentially typing something generic, we use a
67            // weak heuristic where we track < and > tokens.  It's a weak heuristic, but should
68            // work well enough in practice.
69            let angleBracketStack = 0;
70
71            do {
72                token = scanner.scan();
73                if (!isTrivia(token)) {
74                    handleToken();
75                    lastNonTriviaToken = token;
76                }
77                const end = scanner.getTextPos();
78                pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans);
79                if (end >= text.length) {
80                    const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack));
81                    if (end !== undefined) {
82                        endOfLineState = end;
83                    }
84                }
85            } while (token !== SyntaxKind.EndOfFileToken);
86
87            function handleToken(): void {
88                switch (token) {
89                    case SyntaxKind.SlashToken:
90                    case SyntaxKind.SlashEqualsToken:
91                        if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
92                            token = SyntaxKind.RegularExpressionLiteral;
93                        }
94                        break;
95                    case SyntaxKind.LessThanToken:
96                        if (lastNonTriviaToken === SyntaxKind.Identifier) {
97                            // Could be the start of something generic.  Keep track of that by bumping
98                            // up the current count of generic contexts we may be in.
99                            angleBracketStack++;
100                        }
101                        break;
102                    case SyntaxKind.GreaterThanToken:
103                        if (angleBracketStack > 0) {
104                            // If we think we're currently in something generic, then mark that that
105                            // generic entity is complete.
106                            angleBracketStack--;
107                        }
108                        break;
109                    case SyntaxKind.AnyKeyword:
110                    case SyntaxKind.StringKeyword:
111                    case SyntaxKind.NumberKeyword:
112                    case SyntaxKind.BooleanKeyword:
113                    case SyntaxKind.SymbolKeyword:
114                        if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
115                            // If it looks like we're could be in something generic, don't classify this
116                            // as a keyword.  We may just get overwritten by the syntactic classifier,
117                            // causing a noisy experience for the user.
118                            token = SyntaxKind.Identifier;
119                        }
120                        break;
121                    case SyntaxKind.TemplateHead:
122                        templateStack.push(token);
123                        break;
124                    case SyntaxKind.OpenBraceToken:
125                        // If we don't have anything on the template stack,
126                        // then we aren't trying to keep track of a previously scanned template head.
127                        if (templateStack.length > 0) {
128                            templateStack.push(token);
129                        }
130                        break;
131                    case SyntaxKind.CloseBraceToken:
132                        // If we don't have anything on the template stack,
133                        // then we aren't trying to keep track of a previously scanned template head.
134                        if (templateStack.length > 0) {
135                            const lastTemplateStackToken = lastOrUndefined(templateStack);
136
137                            if (lastTemplateStackToken === SyntaxKind.TemplateHead) {
138                                token = scanner.reScanTemplateToken(/* isTaggedTemplate */ false);
139
140                                // Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us.
141                                if (token === SyntaxKind.TemplateTail) {
142                                    templateStack.pop();
143                                }
144                                else {
145                                    Debug.assertEqual(token, SyntaxKind.TemplateMiddle, "Should have been a template middle.");
146                                }
147                            }
148                            else {
149                                Debug.assertEqual(lastTemplateStackToken, SyntaxKind.OpenBraceToken, "Should have been an open brace");
150                                templateStack.pop();
151                            }
152                        }
153                        break;
154                    default:
155                        if (!isKeyword(token)) {
156                            break;
157                        }
158
159                        if (lastNonTriviaToken === SyntaxKind.DotToken) {
160                            token = SyntaxKind.Identifier;
161                        }
162                        else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
163                            // We have two keywords in a row.  Only treat the second as a keyword if
164                            // it's a sequence that could legally occur in the language.  Otherwise
165                            // treat it as an identifier.  This way, if someone writes "private var"
166                            // we recognize that 'var' is actually an identifier here.
167                            token = SyntaxKind.Identifier;
168                        }
169                }
170            }
171
172            return { endOfLineState, spans };
173        }
174
175        return { getClassificationsForLine, getEncodedLexicalClassifications };
176    }
177
178    /// We do not have a full parser support to know when we should parse a regex or not
179    /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
180    /// we have a series of divide operator. this list allows us to be more accurate by ruling out
181    /// locations where a regexp cannot exist.
182    const noRegexTable: true[] = arrayToNumericMap<SyntaxKind, true>([
183        SyntaxKind.Identifier,
184        SyntaxKind.StringLiteral,
185        SyntaxKind.NumericLiteral,
186        SyntaxKind.BigIntLiteral,
187        SyntaxKind.RegularExpressionLiteral,
188        SyntaxKind.ThisKeyword,
189        SyntaxKind.PlusPlusToken,
190        SyntaxKind.MinusMinusToken,
191        SyntaxKind.CloseParenToken,
192        SyntaxKind.CloseBracketToken,
193        SyntaxKind.CloseBraceToken,
194        SyntaxKind.TrueKeyword,
195        SyntaxKind.FalseKeyword,
196    ], token => token, () => true);
197
198    function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined {
199        switch (token) {
200            case SyntaxKind.StringLiteral: {
201                // Check to see if we finished up on a multiline string literal.
202                if (!scanner.isUnterminated()) return undefined;
203
204                const tokenText = scanner.getTokenText();
205                const lastCharIndex = tokenText.length - 1;
206                let numBackslashes = 0;
207                while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
208                    numBackslashes++;
209                }
210
211                // If we have an odd number of backslashes, then the multiline string is unclosed
212                if ((numBackslashes & 1) === 0) return undefined;
213                return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral;
214            }
215            case SyntaxKind.MultiLineCommentTrivia:
216                // Check to see if the multiline comment was unclosed.
217                return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined;
218            default:
219                if (isTemplateLiteralKind(token)) {
220                    if (!scanner.isUnterminated()) {
221                        return undefined;
222                    }
223                    switch (token) {
224                        case SyntaxKind.TemplateTail:
225                            return EndOfLineState.InTemplateMiddleOrTail;
226                        case SyntaxKind.NoSubstitutionTemplateLiteral:
227                            return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
228                        default:
229                            return Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
230                    }
231                }
232                return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined;
233        }
234    }
235
236    function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void {
237        if (classification === ClassificationType.whiteSpace) {
238            // Don't bother with whitespace classifications.  They're not needed.
239            return;
240        }
241
242        if (start === 0 && offset > 0) {
243            // We're classifying the first token, and this was a case where we prepended text.
244            // We should consider the start of this token to be at the start of the original text.
245            start += offset;
246        }
247
248        const length = end - start;
249        if (length > 0) {
250            // All our tokens are in relation to the augmented text.  Move them back to be
251            // relative to the original text.
252            result.push(start - offset, length, classification);
253        }
254    }
255
256    function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult {
257        const entries: ClassificationInfo[] = [];
258        const dense = classifications.spans;
259        let lastEnd = 0;
260
261        for (let i = 0; i < dense.length; i += 3) {
262            const start = dense[i];
263            const length = dense[i + 1];
264            const type = dense[i + 2] as ClassificationType;
265
266            // Make a whitespace entry between the last item and this one.
267            if (lastEnd >= 0) {
268                const whitespaceLength = start - lastEnd;
269                if (whitespaceLength > 0) {
270                    entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
271                }
272            }
273
274            entries.push({ length, classification: convertClassification(type) });
275            lastEnd = start + length;
276        }
277
278        const whitespaceLength = text.length - lastEnd;
279        if (whitespaceLength > 0) {
280            entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
281        }
282
283        return { entries, finalLexState: classifications.endOfLineState };
284    }
285
286    function convertClassification(type: ClassificationType): TokenClass {
287        switch (type) {
288            case ClassificationType.comment: return TokenClass.Comment;
289            case ClassificationType.keyword: return TokenClass.Keyword;
290            case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
291            case ClassificationType.bigintLiteral: return TokenClass.BigIntLiteral;
292            case ClassificationType.operator: return TokenClass.Operator;
293            case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
294            case ClassificationType.whiteSpace: return TokenClass.Whitespace;
295            case ClassificationType.punctuation: return TokenClass.Punctuation;
296            case ClassificationType.identifier:
297            case ClassificationType.className:
298            case ClassificationType.enumName:
299            case ClassificationType.interfaceName:
300            case ClassificationType.moduleName:
301            case ClassificationType.typeParameterName:
302            case ClassificationType.typeAliasName:
303            case ClassificationType.text:
304            case ClassificationType.parameterName:
305                return TokenClass.Identifier;
306            default:
307                return undefined!; // TODO: GH#18217 Debug.assertNever(type);
308        }
309    }
310
311    /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
312    function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean {
313        if (!isAccessibilityModifier(keyword1)) {
314            // Assume any other keyword combination is legal.
315            // This can be refined in the future if there are more cases we want the classifier to be better at.
316            return true;
317        }
318        switch (keyword2) {
319            case SyntaxKind.GetKeyword:
320            case SyntaxKind.SetKeyword:
321            case SyntaxKind.ConstructorKeyword:
322            case SyntaxKind.StaticKeyword:
323            case SyntaxKind.AccessorKeyword:
324                return true; // Allow things like "public get", "public constructor" and "public static".
325            default:
326                return false; // Any other keyword following "public" is actually an identifier, not a real keyword.
327        }
328    }
329
330    function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } {
331        // If we're in a string literal, then prepend: "\
332        // (and a newline).  That way when we lex we'll think we're still in a string literal.
333        //
334        // If we're in a multiline comment, then prepend: /*
335        // (and a newline).  That way when we lex we'll think we're still in a multiline comment.
336        switch (lexState) {
337            case EndOfLineState.InDoubleQuoteStringLiteral:
338                return { prefix: "\"\\\n" };
339            case EndOfLineState.InSingleQuoteStringLiteral:
340                return { prefix: "'\\\n" };
341            case EndOfLineState.InMultiLineCommentTrivia:
342                return { prefix: "/*\n" };
343            case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
344                return { prefix: "`\n" };
345            case EndOfLineState.InTemplateMiddleOrTail:
346                return { prefix: "}\n", pushTemplate: true };
347            case EndOfLineState.InTemplateSubstitutionPosition:
348                return { prefix: "", pushTemplate: true };
349            case EndOfLineState.None:
350                return { prefix: "" };
351            default:
352                return Debug.assertNever(lexState);
353        }
354    }
355
356    function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
357        switch (token) {
358            case SyntaxKind.AsteriskToken:
359            case SyntaxKind.SlashToken:
360            case SyntaxKind.PercentToken:
361            case SyntaxKind.PlusToken:
362            case SyntaxKind.MinusToken:
363            case SyntaxKind.LessThanLessThanToken:
364            case SyntaxKind.GreaterThanGreaterThanToken:
365            case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
366            case SyntaxKind.LessThanToken:
367            case SyntaxKind.GreaterThanToken:
368            case SyntaxKind.LessThanEqualsToken:
369            case SyntaxKind.GreaterThanEqualsToken:
370            case SyntaxKind.InstanceOfKeyword:
371            case SyntaxKind.InKeyword:
372            case SyntaxKind.AsKeyword:
373            case SyntaxKind.SatisfiesKeyword:
374            case SyntaxKind.EqualsEqualsToken:
375            case SyntaxKind.ExclamationEqualsToken:
376            case SyntaxKind.EqualsEqualsEqualsToken:
377            case SyntaxKind.ExclamationEqualsEqualsToken:
378            case SyntaxKind.AmpersandToken:
379            case SyntaxKind.CaretToken:
380            case SyntaxKind.BarToken:
381            case SyntaxKind.AmpersandAmpersandToken:
382            case SyntaxKind.BarBarToken:
383            case SyntaxKind.BarEqualsToken:
384            case SyntaxKind.AmpersandEqualsToken:
385            case SyntaxKind.CaretEqualsToken:
386            case SyntaxKind.LessThanLessThanEqualsToken:
387            case SyntaxKind.GreaterThanGreaterThanEqualsToken:
388            case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
389            case SyntaxKind.PlusEqualsToken:
390            case SyntaxKind.MinusEqualsToken:
391            case SyntaxKind.AsteriskEqualsToken:
392            case SyntaxKind.SlashEqualsToken:
393            case SyntaxKind.PercentEqualsToken:
394            case SyntaxKind.EqualsToken:
395            case SyntaxKind.CommaToken:
396            case SyntaxKind.QuestionQuestionToken:
397            case SyntaxKind.BarBarEqualsToken:
398            case SyntaxKind.AmpersandAmpersandEqualsToken:
399            case SyntaxKind.QuestionQuestionEqualsToken:
400                return true;
401            default:
402                return false;
403        }
404    }
405
406    function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
407        switch (token) {
408            case SyntaxKind.PlusToken:
409            case SyntaxKind.MinusToken:
410            case SyntaxKind.TildeToken:
411            case SyntaxKind.ExclamationToken:
412            case SyntaxKind.PlusPlusToken:
413            case SyntaxKind.MinusMinusToken:
414                return true;
415            default:
416                return false;
417        }
418    }
419
420    function classFromKind(token: SyntaxKind): ClassificationType {
421        if (isKeyword(token)) {
422            return ClassificationType.keyword;
423        }
424        else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
425            return ClassificationType.operator;
426        }
427        else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
428            return ClassificationType.punctuation;
429        }
430
431        switch (token) {
432            case SyntaxKind.NumericLiteral:
433                return ClassificationType.numericLiteral;
434            case SyntaxKind.BigIntLiteral:
435                return ClassificationType.bigintLiteral;
436            case SyntaxKind.StringLiteral:
437                return ClassificationType.stringLiteral;
438            case SyntaxKind.RegularExpressionLiteral:
439                return ClassificationType.regularExpressionLiteral;
440            case SyntaxKind.ConflictMarkerTrivia:
441            case SyntaxKind.MultiLineCommentTrivia:
442            case SyntaxKind.SingleLineCommentTrivia:
443                return ClassificationType.comment;
444            case SyntaxKind.WhitespaceTrivia:
445            case SyntaxKind.NewLineTrivia:
446                return ClassificationType.whiteSpace;
447            case SyntaxKind.Identifier:
448            default:
449                if (isTemplateLiteralKind(token)) {
450                    return ClassificationType.stringLiteral;
451                }
452                return ClassificationType.identifier;
453        }
454    }
455
456    /* @internal */
457    export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): ClassifiedSpan[] {
458        return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
459    }
460
461    function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) {
462        // We don't want to actually call back into our host on every node to find out if we've
463        // been canceled.  That would be an enormous amount of chattyness, along with the all
464        // the overhead of marshalling the data to/from the host.  So instead we pick a few
465        // reasonable node kinds to bother checking on.  These node kinds represent high level
466        // constructs that we would expect to see commonly, but just at a far less frequent
467        // interval.
468        //
469        // For example, in checker.ts (around 750k) we only have around 600 of these constructs.
470        // That means we're calling back into the host around every 1.2k of the file we process.
471        // Lib.d.ts has similar numbers.
472        switch (kind) {
473            case SyntaxKind.ModuleDeclaration:
474            case SyntaxKind.ClassDeclaration:
475            case SyntaxKind.InterfaceDeclaration:
476            case SyntaxKind.FunctionDeclaration:
477            case SyntaxKind.ClassExpression:
478            case SyntaxKind.FunctionExpression:
479            case SyntaxKind.ArrowFunction:
480                cancellationToken.throwIfCancellationRequested();
481        }
482    }
483
484    /* @internal */
485    export function getEncodedSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): Classifications {
486        const spans: number[] = [];
487        sourceFile.forEachChild(function cb(node: Node): void {
488            // Only walk into nodes that intersect the requested span.
489            if (!node || !textSpanIntersectsWith(span, node.pos, node.getFullWidth())) {
490                return;
491            }
492
493            checkForClassificationCancellation(cancellationToken, node.kind);
494            // Only bother calling into the typechecker if this is an identifier that
495            // could possibly resolve to a type name.  This makes classification run
496            // in a third of the time it would normally take.
497            if (isIdentifier(node) && !nodeIsMissing(node) && classifiableNames.has(node.escapedText)) {
498                const symbol = typeChecker.getSymbolAtLocation(node);
499                const type = symbol && classifySymbol(symbol, getMeaningFromLocation(node), typeChecker);
500                if (type) {
501                    pushClassification(node.getStart(sourceFile), node.getEnd(), type);
502                }
503            }
504
505            node.forEachChild(cb);
506        });
507        return { spans, endOfLineState: EndOfLineState.None };
508
509        function pushClassification(start: number, end: number, type: ClassificationType): void {
510            const length = end - start;
511            Debug.assert(length > 0, `Classification had non-positive length of ${length}`);
512            spans.push(start);
513            spans.push(length);
514            spans.push(type);
515        }
516    }
517
518    function classifySymbol(symbol: Symbol, meaningAtPosition: SemanticMeaning, checker: TypeChecker): ClassificationType | undefined {
519        const flags = symbol.getFlags();
520        if ((flags & SymbolFlags.Classifiable) === SymbolFlags.None) {
521            return undefined;
522        }
523        else if (flags & SymbolFlags.Class) {
524            return ClassificationType.className;
525        }
526        else if (flags & SymbolFlags.Enum) {
527            return ClassificationType.enumName;
528        }
529        else if (flags & SymbolFlags.TypeAlias) {
530            return ClassificationType.typeAliasName;
531        }
532        else if (flags & SymbolFlags.Module) {
533            // Only classify a module as such if
534            //  - It appears in a namespace context.
535            //  - There exists a module declaration which actually impacts the value side.
536            return meaningAtPosition & SemanticMeaning.Namespace || meaningAtPosition & SemanticMeaning.Value && hasValueSideModule(symbol) ? ClassificationType.moduleName : undefined;
537        }
538        else if (flags & SymbolFlags.Alias) {
539            return classifySymbol(checker.getAliasedSymbol(symbol), meaningAtPosition, checker);
540        }
541        else if (meaningAtPosition & SemanticMeaning.Type) {
542            return flags & SymbolFlags.Interface ? ClassificationType.interfaceName : flags & SymbolFlags.TypeParameter ? ClassificationType.typeParameterName : undefined;
543        }
544        else {
545            return undefined;
546        }
547    }
548
549    /** Returns true if there exists a module that introduces entities on the value side. */
550    function hasValueSideModule(symbol: Symbol): boolean {
551        return some(symbol.declarations, declaration =>
552            isModuleDeclaration(declaration) && getModuleInstanceState(declaration) === ModuleInstanceState.Instantiated);
553    }
554
555    function getClassificationTypeName(type: ClassificationType): ClassificationTypeNames {
556        switch (type) {
557            case ClassificationType.comment: return ClassificationTypeNames.comment;
558            case ClassificationType.identifier: return ClassificationTypeNames.identifier;
559            case ClassificationType.keyword: return ClassificationTypeNames.keyword;
560            case ClassificationType.numericLiteral: return ClassificationTypeNames.numericLiteral;
561            case ClassificationType.bigintLiteral: return ClassificationTypeNames.bigintLiteral;
562            case ClassificationType.operator: return ClassificationTypeNames.operator;
563            case ClassificationType.stringLiteral: return ClassificationTypeNames.stringLiteral;
564            case ClassificationType.whiteSpace: return ClassificationTypeNames.whiteSpace;
565            case ClassificationType.text: return ClassificationTypeNames.text;
566            case ClassificationType.punctuation: return ClassificationTypeNames.punctuation;
567            case ClassificationType.className: return ClassificationTypeNames.className;
568            case ClassificationType.enumName: return ClassificationTypeNames.enumName;
569            case ClassificationType.interfaceName: return ClassificationTypeNames.interfaceName;
570            case ClassificationType.moduleName: return ClassificationTypeNames.moduleName;
571            case ClassificationType.typeParameterName: return ClassificationTypeNames.typeParameterName;
572            case ClassificationType.typeAliasName: return ClassificationTypeNames.typeAliasName;
573            case ClassificationType.parameterName: return ClassificationTypeNames.parameterName;
574            case ClassificationType.docCommentTagName: return ClassificationTypeNames.docCommentTagName;
575            case ClassificationType.jsxOpenTagName: return ClassificationTypeNames.jsxOpenTagName;
576            case ClassificationType.jsxCloseTagName: return ClassificationTypeNames.jsxCloseTagName;
577            case ClassificationType.jsxSelfClosingTagName: return ClassificationTypeNames.jsxSelfClosingTagName;
578            case ClassificationType.jsxAttribute: return ClassificationTypeNames.jsxAttribute;
579            case ClassificationType.jsxText: return ClassificationTypeNames.jsxText;
580            case ClassificationType.jsxAttributeStringLiteralValue: return ClassificationTypeNames.jsxAttributeStringLiteralValue;
581            default: return undefined!; // TODO: GH#18217 throw Debug.assertNever(type);
582        }
583    }
584
585    function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] {
586        Debug.assert(classifications.spans.length % 3 === 0);
587        const dense = classifications.spans;
588        const result: ClassifiedSpan[] = [];
589        for (let i = 0; i < dense.length; i += 3) {
590            result.push({
591                textSpan: createTextSpan(dense[i], dense[i + 1]),
592                classificationType: getClassificationTypeName(dense[i + 2])
593            });
594        }
595
596        return result;
597    }
598
599    /* @internal */
600    export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] {
601        return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
602    }
603
604    /* @internal */
605    export function getEncodedSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): Classifications {
606        const spanStart = span.start;
607        const spanLength = span.length;
608
609        // Make a scanner we can get trivia from.
610        const triviaScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
611        const mergeConflictScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
612
613        const result: number[] = [];
614        processElement(sourceFile);
615
616        return { spans: result, endOfLineState: EndOfLineState.None };
617
618        function pushClassification(start: number, length: number, type: ClassificationType) {
619            result.push(start);
620            result.push(length);
621            result.push(type);
622        }
623
624        function classifyLeadingTriviaAndGetTokenStart(token: Node): number {
625            triviaScanner.setTextPos(token.pos);
626            while (true) {
627                const start = triviaScanner.getTextPos();
628                // only bother scanning if we have something that could be trivia.
629                if (!couldStartTrivia(sourceFile.text, start)) {
630                    return start;
631                }
632
633                const kind = triviaScanner.scan();
634                const end = triviaScanner.getTextPos();
635                const width = end - start;
636
637                // The moment we get something that isn't trivia, then stop processing.
638                if (!isTrivia(kind)) {
639                    return start;
640                }
641
642                switch (kind) {
643                    case SyntaxKind.NewLineTrivia:
644                    case SyntaxKind.WhitespaceTrivia:
645                        // Don't bother with newlines/whitespace.
646                        continue;
647
648                    case SyntaxKind.SingleLineCommentTrivia:
649                    case SyntaxKind.MultiLineCommentTrivia:
650                        // Only bother with the trivia if it at least intersects the span of interest.
651                        classifyComment(token, kind, start, width);
652
653                        // Classifying a comment might cause us to reuse the trivia scanner
654                        // (because of jsdoc comments).  So after we classify the comment make
655                        // sure we set the scanner position back to where it needs to be.
656                        triviaScanner.setTextPos(end);
657                        continue;
658
659                    case SyntaxKind.ConflictMarkerTrivia:
660                        const text = sourceFile.text;
661                        const ch = text.charCodeAt(start);
662
663                        // for the <<<<<<< and >>>>>>> markers, we just add them in as comments
664                        // in the classification stream.
665                        if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) {
666                            pushClassification(start, width, ClassificationType.comment);
667                            continue;
668                        }
669
670                        // for the ||||||| and ======== markers, add a comment for the first line,
671                        // and then lex all subsequent lines up until the end of the conflict marker.
672                        Debug.assert(ch === CharacterCodes.bar || ch === CharacterCodes.equals);
673                        classifyDisabledMergeCode(text, start, end);
674                        break;
675
676                    case SyntaxKind.ShebangTrivia:
677                        // TODO: Maybe we should classify these.
678                        break;
679
680                    default:
681                        Debug.assertNever(kind);
682                }
683            }
684        }
685
686        function classifyComment(token: Node, kind: SyntaxKind, start: number, width: number) {
687            if (kind === SyntaxKind.MultiLineCommentTrivia) {
688                // See if this is a doc comment.  If so, we'll classify certain portions of it
689                // specially.
690                const docCommentAndDiagnostics = parseIsolatedJSDocComment(sourceFile.text, start, width);
691                if (docCommentAndDiagnostics && docCommentAndDiagnostics.jsDoc) {
692                    // TODO: This should be predicated on `token["kind"]` being compatible with `HasJSDoc["kind"]`
693                    setParent(docCommentAndDiagnostics.jsDoc, token as HasJSDoc);
694                    classifyJSDocComment(docCommentAndDiagnostics.jsDoc);
695                    return;
696                }
697            }
698            else if (kind === SyntaxKind.SingleLineCommentTrivia) {
699                if (tryClassifyTripleSlashComment(start, width)) {
700                    return;
701                }
702            }
703
704            // Simple comment.  Just add as is.
705            pushCommentRange(start, width);
706        }
707
708        function pushCommentRange(start: number, width: number) {
709            pushClassification(start, width, ClassificationType.comment);
710        }
711
712        function classifyJSDocComment(docComment: JSDoc) {
713            let pos = docComment.pos;
714
715            if (docComment.tags) {
716                for (const tag of docComment.tags) {
717                    // As we walk through each tag, classify the portion of text from the end of
718                    // the last tag (or the start of the entire doc comment) as 'comment'.
719                    if (tag.pos !== pos) {
720                        pushCommentRange(pos, tag.pos - pos);
721                    }
722
723                    pushClassification(tag.pos, 1, ClassificationType.punctuation); // "@"
724                    pushClassification(tag.tagName.pos, tag.tagName.end - tag.tagName.pos, ClassificationType.docCommentTagName); // e.g. "param"
725
726                    pos = tag.tagName.end;
727                    let commentStart = tag.tagName.end;
728
729                    switch (tag.kind) {
730                        case SyntaxKind.JSDocParameterTag:
731                            const param = tag as JSDocParameterTag;
732                            processJSDocParameterTag(param);
733                            commentStart = param.isNameFirst && param.typeExpression?.end || param.name.end;
734                            break;
735                        case SyntaxKind.JSDocPropertyTag:
736                            const prop = tag as JSDocPropertyTag;
737                            commentStart = prop.isNameFirst && prop.typeExpression?.end || prop.name.end;
738                            break;
739                        case SyntaxKind.JSDocTemplateTag:
740                            processJSDocTemplateTag(tag as JSDocTemplateTag);
741                            pos = tag.end;
742                            commentStart = (tag as JSDocTemplateTag).typeParameters.end;
743                            break;
744                        case SyntaxKind.JSDocTypedefTag:
745                            const type = tag as JSDocTypedefTag;
746                            commentStart = type.typeExpression?.kind === SyntaxKind.JSDocTypeExpression && type.fullName?.end || type.typeExpression?.end || commentStart;
747                            break;
748                        case SyntaxKind.JSDocCallbackTag:
749                            commentStart = (tag as JSDocCallbackTag).typeExpression.end;
750                            break;
751                        case SyntaxKind.JSDocTypeTag:
752                            processElement((tag as JSDocTypeTag).typeExpression);
753                            pos = tag.end;
754                            commentStart = (tag as JSDocTypeTag).typeExpression.end;
755                            break;
756                        case SyntaxKind.JSDocThisTag:
757                        case SyntaxKind.JSDocEnumTag:
758                            commentStart = (tag as JSDocThisTag | JSDocEnumTag).typeExpression.end;
759                            break;
760                        case SyntaxKind.JSDocReturnTag:
761                            processElement((tag as JSDocReturnTag).typeExpression);
762                            pos = tag.end;
763                            commentStart = (tag as JSDocReturnTag).typeExpression?.end || commentStart;
764                            break;
765                        case SyntaxKind.JSDocSeeTag:
766                            commentStart = (tag as JSDocSeeTag).name?.end || commentStart;
767                            break;
768                        case SyntaxKind.JSDocAugmentsTag:
769                        case SyntaxKind.JSDocImplementsTag:
770                            commentStart = (tag as JSDocImplementsTag | JSDocAugmentsTag).class.end;
771                            break;
772                    }
773                    if (typeof tag.comment === "object") {
774                        pushCommentRange(tag.comment.pos, tag.comment.end - tag.comment.pos);
775                    }
776                    else if (typeof tag.comment === "string") {
777                        pushCommentRange(commentStart, tag.end - commentStart);
778                    }
779                }
780            }
781
782            if (pos !== docComment.end) {
783                pushCommentRange(pos, docComment.end - pos);
784            }
785
786            return;
787
788            function processJSDocParameterTag(tag: JSDocParameterTag) {
789                if (tag.isNameFirst) {
790                    pushCommentRange(pos, tag.name.pos - pos);
791                    pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
792                    pos = tag.name.end;
793                }
794
795                if (tag.typeExpression) {
796                    pushCommentRange(pos, tag.typeExpression.pos - pos);
797                    processElement(tag.typeExpression);
798                    pos = tag.typeExpression.end;
799                }
800
801                if (!tag.isNameFirst) {
802                    pushCommentRange(pos, tag.name.pos - pos);
803                    pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
804                    pos = tag.name.end;
805                }
806            }
807        }
808
809        function tryClassifyTripleSlashComment(start: number, width: number): boolean {
810            const tripleSlashXMLCommentRegEx = /^(\/\/\/\s*)(<)(?:(\S+)((?:[^/]|\/[^>])*)(\/>)?)?/im;
811            // Require a leading whitespace character (the parser already does) to prevent terrible backtracking performance
812            const attributeRegex = /(\s)(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img;
813
814            const text = sourceFile.text.substr(start, width);
815            const match = tripleSlashXMLCommentRegEx.exec(text);
816            if (!match) {
817                return false;
818            }
819
820            // Limiting classification to exactly the elements and attributes
821            // defined in `ts.commentPragmas` would be excessive, but we can avoid
822            // some obvious false positives (e.g. in XML-like doc comments) by
823            // checking the element name.
824            // eslint-disable-next-line local/no-in-operator
825            if (!match[3] || !(match[3] in commentPragmas)) {
826                return false;
827            }
828
829            let pos = start;
830
831            pushCommentRange(pos, match[1].length); // ///
832            pos += match[1].length;
833
834            pushClassification(pos, match[2].length, ClassificationType.punctuation); // <
835            pos += match[2].length;
836
837            pushClassification(pos, match[3].length, ClassificationType.jsxSelfClosingTagName); // element name
838            pos += match[3].length;
839
840            const attrText = match[4];
841            let attrPos = pos;
842            while (true) {
843                const attrMatch = attributeRegex.exec(attrText);
844                if (!attrMatch) {
845                    break;
846                }
847
848                const newAttrPos = pos + attrMatch.index + attrMatch[1].length; // whitespace
849                if (newAttrPos > attrPos) {
850                    pushCommentRange(attrPos, newAttrPos - attrPos);
851                    attrPos = newAttrPos;
852                }
853
854                pushClassification(attrPos, attrMatch[2].length, ClassificationType.jsxAttribute); // attribute name
855                attrPos += attrMatch[2].length;
856
857                if (attrMatch[3].length) {
858                    pushCommentRange(attrPos, attrMatch[3].length); // whitespace
859                    attrPos += attrMatch[3].length;
860                }
861
862                pushClassification(attrPos, attrMatch[4].length, ClassificationType.operator); // =
863                attrPos += attrMatch[4].length;
864
865                if (attrMatch[5].length) {
866                    pushCommentRange(attrPos, attrMatch[5].length); // whitespace
867                    attrPos += attrMatch[5].length;
868                }
869
870                pushClassification(attrPos, attrMatch[6].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value
871                attrPos += attrMatch[6].length;
872            }
873
874            pos += match[4].length;
875
876            if (pos > attrPos) {
877                pushCommentRange(attrPos, pos - attrPos);
878            }
879
880            if (match[5]) {
881                pushClassification(pos, match[5].length, ClassificationType.punctuation); // />
882                pos += match[5].length;
883            }
884
885            const end = start + width;
886            if (pos < end) {
887                pushCommentRange(pos, end - pos);
888            }
889
890            return true;
891        }
892
893        function processJSDocTemplateTag(tag: JSDocTemplateTag) {
894            for (const child of tag.getChildren()) {
895                processElement(child);
896            }
897        }
898
899        function classifyDisabledMergeCode(text: string, start: number, end: number) {
900            // Classify the line that the ||||||| or ======= marker is on as a comment.
901            // Then just lex all further tokens and add them to the result.
902            let i: number;
903            for (i = start; i < end; i++) {
904                if (isLineBreak(text.charCodeAt(i))) {
905                    break;
906                }
907            }
908            pushClassification(start, i - start, ClassificationType.comment);
909            mergeConflictScanner.setTextPos(i);
910
911            while (mergeConflictScanner.getTextPos() < end) {
912                classifyDisabledCodeToken();
913            }
914        }
915
916        function classifyDisabledCodeToken() {
917            const start = mergeConflictScanner.getTextPos();
918            const tokenKind = mergeConflictScanner.scan();
919            const end = mergeConflictScanner.getTextPos();
920
921            const type = classifyTokenType(tokenKind);
922            if (type) {
923                pushClassification(start, end - start, type);
924            }
925        }
926
927        /**
928         * Returns true if node should be treated as classified and no further processing is required.
929         * False will mean that node is not classified and traverse routine should recurse into node contents.
930         */
931        function tryClassifyNode(node: Node): boolean {
932            if (isJSDoc(node)) {
933                return true;
934            }
935
936            if (nodeIsMissing(node)) {
937                return true;
938            }
939
940            const classifiedElementName = tryClassifyJsxElementName(node);
941            if (!isToken(node) && node.kind !== SyntaxKind.JsxText && classifiedElementName === undefined) {
942                return false;
943            }
944
945            const tokenStart = node.kind === SyntaxKind.JsxText ? node.pos : classifyLeadingTriviaAndGetTokenStart(node);
946
947            const tokenWidth = node.end - tokenStart;
948            Debug.assert(tokenWidth >= 0);
949            if (tokenWidth > 0) {
950                const type = classifiedElementName || classifyTokenType(node.kind, node);
951                if (type) {
952                    pushClassification(tokenStart, tokenWidth, type);
953                }
954            }
955
956            return true;
957        }
958
959        function tryClassifyJsxElementName(token: Node): ClassificationType | undefined {
960            switch (token.parent && token.parent.kind) {
961                case SyntaxKind.JsxOpeningElement:
962                    if ((token.parent as JsxOpeningElement).tagName === token) {
963                        return ClassificationType.jsxOpenTagName;
964                    }
965                    break;
966                case SyntaxKind.JsxClosingElement:
967                    if ((token.parent as JsxClosingElement).tagName === token) {
968                        return ClassificationType.jsxCloseTagName;
969                    }
970                    break;
971                case SyntaxKind.JsxSelfClosingElement:
972                    if ((token.parent as JsxSelfClosingElement).tagName === token) {
973                        return ClassificationType.jsxSelfClosingTagName;
974                    }
975                    break;
976                case SyntaxKind.JsxAttribute:
977                    if ((token.parent as JsxAttribute).name === token) {
978                        return ClassificationType.jsxAttribute;
979                    }
980                    break;
981            }
982            return undefined;
983        }
984
985        // for accurate classification, the actual token should be passed in.  however, for
986        // cases like 'disabled merge code' classification, we just get the token kind and
987        // classify based on that instead.
988        function classifyTokenType(tokenKind: SyntaxKind, token?: Node): ClassificationType | undefined {
989            if (isKeyword(tokenKind)) {
990                return ClassificationType.keyword;
991            }
992
993            // Special case `<` and `>`: If they appear in a generic context they are punctuation,
994            // not operators.
995            if (tokenKind === SyntaxKind.LessThanToken || tokenKind === SyntaxKind.GreaterThanToken) {
996                // If the node owning the token has a type argument list or type parameter list, then
997                // we can effectively assume that a '<' and '>' belong to those lists.
998                if (token && getTypeArgumentOrTypeParameterList(token.parent)) {
999                    return ClassificationType.punctuation;
1000                }
1001            }
1002
1003            if (isPunctuation(tokenKind)) {
1004                if (token) {
1005                    const parent = token.parent;
1006                    if (tokenKind === SyntaxKind.EqualsToken) {
1007                        // the '=' in a variable declaration is special cased here.
1008                        if (parent.kind === SyntaxKind.VariableDeclaration ||
1009                            parent.kind === SyntaxKind.PropertyDeclaration ||
1010                            parent.kind === SyntaxKind.Parameter ||
1011                            parent.kind === SyntaxKind.JsxAttribute) {
1012                            return ClassificationType.operator;
1013                        }
1014                    }
1015
1016                    if (parent.kind === SyntaxKind.BinaryExpression ||
1017                        parent.kind === SyntaxKind.PrefixUnaryExpression ||
1018                        parent.kind === SyntaxKind.PostfixUnaryExpression ||
1019                        parent.kind === SyntaxKind.ConditionalExpression) {
1020                        return ClassificationType.operator;
1021                    }
1022                }
1023
1024                return ClassificationType.punctuation;
1025            }
1026            else if (tokenKind === SyntaxKind.NumericLiteral) {
1027                return ClassificationType.numericLiteral;
1028            }
1029            else if (tokenKind === SyntaxKind.BigIntLiteral) {
1030                return ClassificationType.bigintLiteral;
1031            }
1032            else if (tokenKind === SyntaxKind.StringLiteral) {
1033                return token && token.parent.kind === SyntaxKind.JsxAttribute ? ClassificationType.jsxAttributeStringLiteralValue : ClassificationType.stringLiteral;
1034            }
1035            else if (tokenKind === SyntaxKind.RegularExpressionLiteral) {
1036                // TODO: we should get another classification type for these literals.
1037                return ClassificationType.stringLiteral;
1038            }
1039            else if (isTemplateLiteralKind(tokenKind)) {
1040                // TODO (drosen): we should *also* get another classification type for these literals.
1041                return ClassificationType.stringLiteral;
1042            }
1043            else if (tokenKind === SyntaxKind.JsxText) {
1044                return ClassificationType.jsxText;
1045            }
1046            else if (tokenKind === SyntaxKind.Identifier) {
1047                if (token) {
1048                    switch (token.parent.kind) {
1049                        case SyntaxKind.ClassDeclaration:
1050                            if ((token.parent as ClassDeclaration).name === token) {
1051                                return ClassificationType.className;
1052                            }
1053                            return;
1054                        case SyntaxKind.TypeParameter:
1055                            if ((token.parent as TypeParameterDeclaration).name === token) {
1056                                return ClassificationType.typeParameterName;
1057                            }
1058                            return;
1059                        case SyntaxKind.InterfaceDeclaration:
1060                            if ((token.parent as InterfaceDeclaration).name === token) {
1061                                return ClassificationType.interfaceName;
1062                            }
1063                            return;
1064                        case SyntaxKind.EnumDeclaration:
1065                            if ((token.parent as EnumDeclaration).name === token) {
1066                                return ClassificationType.enumName;
1067                            }
1068                            return;
1069                        case SyntaxKind.ModuleDeclaration:
1070                            if ((token.parent as ModuleDeclaration).name === token) {
1071                                return ClassificationType.moduleName;
1072                            }
1073                            return;
1074                        case SyntaxKind.Parameter:
1075                            if ((token.parent as ParameterDeclaration).name === token) {
1076                                return isThisIdentifier(token) ? ClassificationType.keyword : ClassificationType.parameterName;
1077                            }
1078                            return;
1079                    }
1080
1081                    if (isConstTypeReference(token.parent)) {
1082                        return ClassificationType.keyword;
1083                    }
1084                }
1085                return ClassificationType.identifier;
1086            }
1087        }
1088
1089        function processElement(element: Node | undefined) {
1090            if (!element) {
1091                return;
1092            }
1093
1094            // Ignore nodes that don't intersect the original span to classify.
1095            if (decodedTextSpanIntersectsWith(spanStart, spanLength, element.pos, element.getFullWidth())) {
1096                checkForClassificationCancellation(cancellationToken, element.kind);
1097
1098                for (const child of element.getChildren(sourceFile)) {
1099                    if (!tryClassifyNode(child)) {
1100                        // Recurse into our child nodes.
1101                        processElement(child);
1102                    }
1103                }
1104            }
1105        }
1106    }
1107}
1108