• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1namespace ts {
2    /** The classifier is used for syntactic highlighting in editors via the TSServer */
3    export function createClassifier(): Classifier {
4        const scanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false);
5
6        function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
7            return convertClassificationsToResult(getEncodedLexicalClassifications(text, lexState, syntacticClassifierAbsent), text);
8        }
9
10        // If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
11        // we will be more conservative in order to avoid conflicting with the syntactic classifier.
12        function getEncodedLexicalClassifications(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
13            let token = SyntaxKind.Unknown;
14            let lastNonTriviaToken = SyntaxKind.Unknown;
15
16            // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact)
17            // classification on template strings. Because of the context free nature of templates,
18            // the only precise way to classify a template portion would be by propagating the stack across
19            // lines, just as we do with the end-of-line state. However, this is a burden for implementers,
20            // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead
21            // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state.
22            // Situations in which this fails are
23            //  1) When template strings are nested across different lines:
24            //          `hello ${ `world
25            //          ` }`
26            //
27            //     Where on the second line, you will get the closing of a template,
28            //     a closing curly, and a new template.
29            //
30            //  2) When substitution expressions have curly braces and the curly brace falls on the next line:
31            //          `hello ${ () => {
32            //          return "world" } } `
33            //
34            //     Where on the second line, you will get the 'return' keyword,
35            //     a string literal, and a template end consisting of '} } `'.
36            const templateStack: SyntaxKind[] = [];
37
38            const { prefix, pushTemplate } = getPrefixFromLexState(lexState);
39            text = prefix + text;
40            const offset = prefix.length;
41            if (pushTemplate) {
42                templateStack.push(SyntaxKind.TemplateHead);
43            }
44
45            scanner.setText(text);
46
47            let endOfLineState = EndOfLineState.None;
48            const spans: number[] = [];
49
50            // We can run into an unfortunate interaction between the lexical and syntactic classifier
51            // when the user is typing something generic.  Consider the case where the user types:
52            //
53            //      Foo<number
54            //
55            // From the lexical classifier's perspective, 'number' is a keyword, and so the word will
56            // be classified as such.  However, from the syntactic classifier's tree-based perspective
57            // this is simply an expression with the identifier 'number' on the RHS of the less than
58            // token.  So the classification will go back to being an identifier.  The moment the user
59            // types again, number will become a keyword, then an identifier, etc. etc.
60            //
61            // To try to avoid this problem, we avoid classifying contextual keywords as keywords
62            // when the user is potentially typing something generic.  We just can't do a good enough
63            // job at the lexical level, and so well leave it up to the syntactic classifier to make
64            // the determination.
65            //
66            // In order to determine if the user is potentially typing something generic, we use a
67            // weak heuristic where we track < and > tokens.  It's a weak heuristic, but should
68            // work well enough in practice.
69            let angleBracketStack = 0;
70
71            do {
72                token = scanner.scan();
73                if (!isTrivia(token)) {
74                    handleToken();
75                    lastNonTriviaToken = token;
76                }
77                const end = scanner.getTextPos();
78                pushEncodedClassification(scanner.getTokenPos(), end, offset, classFromKind(token), spans);
79                if (end >= text.length) {
80                    const end = getNewEndOfLineState(scanner, token, lastOrUndefined(templateStack));
81                    if (end !== undefined) {
82                        endOfLineState = end;
83                    }
84                }
85            } while (token !== SyntaxKind.EndOfFileToken);
86
87            function handleToken(): void {
88                switch (token) {
89                    case SyntaxKind.SlashToken:
90                    case SyntaxKind.SlashEqualsToken:
91                        if (!noRegexTable[lastNonTriviaToken] && scanner.reScanSlashToken() === SyntaxKind.RegularExpressionLiteral) {
92                            token = SyntaxKind.RegularExpressionLiteral;
93                        }
94                        break;
95                    case SyntaxKind.LessThanToken:
96                        if (lastNonTriviaToken === SyntaxKind.Identifier) {
97                            // Could be the start of something generic.  Keep track of that by bumping
98                            // up the current count of generic contexts we may be in.
99                            angleBracketStack++;
100                        }
101                        break;
102                    case SyntaxKind.GreaterThanToken:
103                        if (angleBracketStack > 0) {
104                            // If we think we're currently in something generic, then mark that that
105                            // generic entity is complete.
106                            angleBracketStack--;
107                        }
108                        break;
109                    case SyntaxKind.AnyKeyword:
110                    case SyntaxKind.StringKeyword:
111                    case SyntaxKind.NumberKeyword:
112                    case SyntaxKind.BooleanKeyword:
113                    case SyntaxKind.SymbolKeyword:
114                        if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
115                            // If it looks like we're could be in something generic, don't classify this
116                            // as a keyword.  We may just get overwritten by the syntactic classifier,
117                            // causing a noisy experience for the user.
118                            token = SyntaxKind.Identifier;
119                        }
120                        break;
121                    case SyntaxKind.TemplateHead:
122                        templateStack.push(token);
123                        break;
124                    case SyntaxKind.OpenBraceToken:
125                        // If we don't have anything on the template stack,
126                        // then we aren't trying to keep track of a previously scanned template head.
127                        if (templateStack.length > 0) {
128                            templateStack.push(token);
129                        }
130                        break;
131                    case SyntaxKind.CloseBraceToken:
132                        // If we don't have anything on the template stack,
133                        // then we aren't trying to keep track of a previously scanned template head.
134                        if (templateStack.length > 0) {
135                            const lastTemplateStackToken = lastOrUndefined(templateStack);
136
137                            if (lastTemplateStackToken === SyntaxKind.TemplateHead) {
138                                token = scanner.reScanTemplateToken(/* isTaggedTemplate */ false);
139
140                                // Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us.
141                                if (token === SyntaxKind.TemplateTail) {
142                                    templateStack.pop();
143                                }
144                                else {
145                                    Debug.assertEqual(token, SyntaxKind.TemplateMiddle, "Should have been a template middle.");
146                                }
147                            }
148                            else {
149                                Debug.assertEqual(lastTemplateStackToken, SyntaxKind.OpenBraceToken, "Should have been an open brace");
150                                templateStack.pop();
151                            }
152                        }
153                        break;
154                    default:
155                        if (!isKeyword(token)) {
156                            break;
157                        }
158
159                        if (lastNonTriviaToken === SyntaxKind.DotToken) {
160                            token = SyntaxKind.Identifier;
161                        }
162                        else if (isKeyword(lastNonTriviaToken) && isKeyword(token) && !canFollow(lastNonTriviaToken, token)) {
163                            // We have two keywords in a row.  Only treat the second as a keyword if
164                            // it's a sequence that could legally occur in the language.  Otherwise
165                            // treat it as an identifier.  This way, if someone writes "private var"
166                            // we recognize that 'var' is actually an identifier here.
167                            token = SyntaxKind.Identifier;
168                        }
169                }
170            }
171
172            return { endOfLineState, spans };
173        }
174
175        return { getClassificationsForLine, getEncodedLexicalClassifications };
176    }
177
178    /// We do not have a full parser support to know when we should parse a regex or not
179    /// If we consider every slash token to be a regex, we could be missing cases like "1/2/3", where
180    /// we have a series of divide operator. this list allows us to be more accurate by ruling out
181    /// locations where a regexp cannot exist.
182    const noRegexTable: true[] = arrayToNumericMap<SyntaxKind, true>([
183        SyntaxKind.Identifier,
184        SyntaxKind.StringLiteral,
185        SyntaxKind.NumericLiteral,
186        SyntaxKind.BigIntLiteral,
187        SyntaxKind.RegularExpressionLiteral,
188        SyntaxKind.ThisKeyword,
189        SyntaxKind.PlusPlusToken,
190        SyntaxKind.MinusMinusToken,
191        SyntaxKind.CloseParenToken,
192        SyntaxKind.CloseBracketToken,
193        SyntaxKind.CloseBraceToken,
194        SyntaxKind.TrueKeyword,
195        SyntaxKind.FalseKeyword,
196    ], token => token, () => true);
197
198    function getNewEndOfLineState(scanner: Scanner, token: SyntaxKind, lastOnTemplateStack: SyntaxKind | undefined): EndOfLineState | undefined {
199        switch (token) {
200            case SyntaxKind.StringLiteral: {
201                // Check to see if we finished up on a multiline string literal.
202                if (!scanner.isUnterminated()) return undefined;
203
204                const tokenText = scanner.getTokenText();
205                const lastCharIndex = tokenText.length - 1;
206                let numBackslashes = 0;
207                while (tokenText.charCodeAt(lastCharIndex - numBackslashes) === CharacterCodes.backslash) {
208                    numBackslashes++;
209                }
210
211                // If we have an odd number of backslashes, then the multiline string is unclosed
212                if ((numBackslashes & 1) === 0) return undefined;
213                return tokenText.charCodeAt(0) === CharacterCodes.doubleQuote ? EndOfLineState.InDoubleQuoteStringLiteral : EndOfLineState.InSingleQuoteStringLiteral;
214            }
215            case SyntaxKind.MultiLineCommentTrivia:
216                // Check to see if the multiline comment was unclosed.
217                return scanner.isUnterminated() ? EndOfLineState.InMultiLineCommentTrivia : undefined;
218            default:
219                if (isTemplateLiteralKind(token)) {
220                    if (!scanner.isUnterminated()) {
221                        return undefined;
222                    }
223                    switch (token) {
224                        case SyntaxKind.TemplateTail:
225                            return EndOfLineState.InTemplateMiddleOrTail;
226                        case SyntaxKind.NoSubstitutionTemplateLiteral:
227                            return EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
228                        default:
229                            return Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
230                    }
231                }
232                return lastOnTemplateStack === SyntaxKind.TemplateHead ? EndOfLineState.InTemplateSubstitutionPosition : undefined;
233        }
234    }
235
236    function pushEncodedClassification(start: number, end: number, offset: number, classification: ClassificationType, result: Push<number>): void {
237        if (classification === ClassificationType.whiteSpace) {
238            // Don't bother with whitespace classifications.  They're not needed.
239            return;
240        }
241
242        if (start === 0 && offset > 0) {
243            // We're classifying the first token, and this was a case where we prepended text.
244            // We should consider the start of this token to be at the start of the original text.
245            start += offset;
246        }
247
248        const length = end - start;
249        if (length > 0) {
250            // All our tokens are in relation to the augmented text.  Move them back to be
251            // relative to the original text.
252            result.push(start - offset, length, classification);
253        }
254    }
255
256    function convertClassificationsToResult(classifications: Classifications, text: string): ClassificationResult {
257        const entries: ClassificationInfo[] = [];
258        const dense = classifications.spans;
259        let lastEnd = 0;
260
261        for (let i = 0; i < dense.length; i += 3) {
262            const start = dense[i];
263            const length = dense[i + 1];
264            const type = <ClassificationType>dense[i + 2];
265
266            // Make a whitespace entry between the last item and this one.
267            if (lastEnd >= 0) {
268                const whitespaceLength = start - lastEnd;
269                if (whitespaceLength > 0) {
270                    entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
271                }
272            }
273
274            entries.push({ length, classification: convertClassification(type) });
275            lastEnd = start + length;
276        }
277
278        const whitespaceLength = text.length - lastEnd;
279        if (whitespaceLength > 0) {
280            entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
281        }
282
283        return { entries, finalLexState: classifications.endOfLineState };
284    }
285
286    function convertClassification(type: ClassificationType): TokenClass {
287        switch (type) {
288            case ClassificationType.comment: return TokenClass.Comment;
289            case ClassificationType.keyword: return TokenClass.Keyword;
290            case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
291            case ClassificationType.bigintLiteral: return TokenClass.BigIntLiteral;
292            case ClassificationType.operator: return TokenClass.Operator;
293            case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
294            case ClassificationType.whiteSpace: return TokenClass.Whitespace;
295            case ClassificationType.punctuation: return TokenClass.Punctuation;
296            case ClassificationType.identifier:
297            case ClassificationType.className:
298            case ClassificationType.enumName:
299            case ClassificationType.interfaceName:
300            case ClassificationType.moduleName:
301            case ClassificationType.typeParameterName:
302            case ClassificationType.typeAliasName:
303            case ClassificationType.text:
304            case ClassificationType.parameterName:
305                return TokenClass.Identifier;
306            default:
307                return undefined!; // TODO: GH#18217 Debug.assertNever(type);
308        }
309    }
310
311    /** Returns true if 'keyword2' can legally follow 'keyword1' in any language construct. */
312    function canFollow(keyword1: SyntaxKind, keyword2: SyntaxKind): boolean {
313        if (!isAccessibilityModifier(keyword1)) {
314            // Assume any other keyword combination is legal.
315            // This can be refined in the future if there are more cases we want the classifier to be better at.
316            return true;
317        }
318        switch (keyword2) {
319            case SyntaxKind.GetKeyword:
320            case SyntaxKind.SetKeyword:
321            case SyntaxKind.ConstructorKeyword:
322            case SyntaxKind.StaticKeyword:
323                return true; // Allow things like "public get", "public constructor" and "public static".
324            default:
325                return false; // Any other keyword following "public" is actually an identifier, not a real keyword.
326        }
327    }
328
329    function getPrefixFromLexState(lexState: EndOfLineState): { readonly prefix: string, readonly pushTemplate?: true } {
330        // If we're in a string literal, then prepend: "\
331        // (and a newline).  That way when we lex we'll think we're still in a string literal.
332        //
333        // If we're in a multiline comment, then prepend: /*
334        // (and a newline).  That way when we lex we'll think we're still in a multiline comment.
335        switch (lexState) {
336            case EndOfLineState.InDoubleQuoteStringLiteral:
337                return { prefix: "\"\\\n" };
338            case EndOfLineState.InSingleQuoteStringLiteral:
339                return { prefix: "'\\\n" };
340            case EndOfLineState.InMultiLineCommentTrivia:
341                return { prefix: "/*\n" };
342            case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
343                return { prefix: "`\n" };
344            case EndOfLineState.InTemplateMiddleOrTail:
345                return { prefix: "}\n", pushTemplate: true };
346            case EndOfLineState.InTemplateSubstitutionPosition:
347                return { prefix: "", pushTemplate: true };
348            case EndOfLineState.None:
349                return { prefix: "" };
350            default:
351                return Debug.assertNever(lexState);
352        }
353    }
354
355    function isBinaryExpressionOperatorToken(token: SyntaxKind): boolean {
356        switch (token) {
357            case SyntaxKind.AsteriskToken:
358            case SyntaxKind.SlashToken:
359            case SyntaxKind.PercentToken:
360            case SyntaxKind.PlusToken:
361            case SyntaxKind.MinusToken:
362            case SyntaxKind.LessThanLessThanToken:
363            case SyntaxKind.GreaterThanGreaterThanToken:
364            case SyntaxKind.GreaterThanGreaterThanGreaterThanToken:
365            case SyntaxKind.LessThanToken:
366            case SyntaxKind.GreaterThanToken:
367            case SyntaxKind.LessThanEqualsToken:
368            case SyntaxKind.GreaterThanEqualsToken:
369            case SyntaxKind.InstanceOfKeyword:
370            case SyntaxKind.InKeyword:
371            case SyntaxKind.AsKeyword:
372            case SyntaxKind.EqualsEqualsToken:
373            case SyntaxKind.ExclamationEqualsToken:
374            case SyntaxKind.EqualsEqualsEqualsToken:
375            case SyntaxKind.ExclamationEqualsEqualsToken:
376            case SyntaxKind.AmpersandToken:
377            case SyntaxKind.CaretToken:
378            case SyntaxKind.BarToken:
379            case SyntaxKind.AmpersandAmpersandToken:
380            case SyntaxKind.BarBarToken:
381            case SyntaxKind.BarEqualsToken:
382            case SyntaxKind.AmpersandEqualsToken:
383            case SyntaxKind.CaretEqualsToken:
384            case SyntaxKind.LessThanLessThanEqualsToken:
385            case SyntaxKind.GreaterThanGreaterThanEqualsToken:
386            case SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken:
387            case SyntaxKind.PlusEqualsToken:
388            case SyntaxKind.MinusEqualsToken:
389            case SyntaxKind.AsteriskEqualsToken:
390            case SyntaxKind.SlashEqualsToken:
391            case SyntaxKind.PercentEqualsToken:
392            case SyntaxKind.EqualsToken:
393            case SyntaxKind.CommaToken:
394            case SyntaxKind.QuestionQuestionToken:
395            case SyntaxKind.BarBarEqualsToken:
396            case SyntaxKind.AmpersandAmpersandEqualsToken:
397            case SyntaxKind.QuestionQuestionEqualsToken:
398                return true;
399            default:
400                return false;
401        }
402    }
403
404    function isPrefixUnaryExpressionOperatorToken(token: SyntaxKind): boolean {
405        switch (token) {
406            case SyntaxKind.PlusToken:
407            case SyntaxKind.MinusToken:
408            case SyntaxKind.TildeToken:
409            case SyntaxKind.ExclamationToken:
410            case SyntaxKind.PlusPlusToken:
411            case SyntaxKind.MinusMinusToken:
412                return true;
413            default:
414                return false;
415        }
416    }
417
418    function classFromKind(token: SyntaxKind): ClassificationType {
419        if (isKeyword(token)) {
420            return ClassificationType.keyword;
421        }
422        else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
423            return ClassificationType.operator;
424        }
425        else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
426            return ClassificationType.punctuation;
427        }
428
429        switch (token) {
430            case SyntaxKind.NumericLiteral:
431                return ClassificationType.numericLiteral;
432            case SyntaxKind.BigIntLiteral:
433                return ClassificationType.bigintLiteral;
434            case SyntaxKind.StringLiteral:
435                return ClassificationType.stringLiteral;
436            case SyntaxKind.RegularExpressionLiteral:
437                return ClassificationType.regularExpressionLiteral;
438            case SyntaxKind.ConflictMarkerTrivia:
439            case SyntaxKind.MultiLineCommentTrivia:
440            case SyntaxKind.SingleLineCommentTrivia:
441                return ClassificationType.comment;
442            case SyntaxKind.WhitespaceTrivia:
443            case SyntaxKind.NewLineTrivia:
444                return ClassificationType.whiteSpace;
445            case SyntaxKind.Identifier:
446            default:
447                if (isTemplateLiteralKind(token)) {
448                    return ClassificationType.stringLiteral;
449                }
450                return ClassificationType.identifier;
451        }
452    }
453
454    /* @internal */
455    export function getSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): ClassifiedSpan[] {
456        return convertClassificationsToSpans(getEncodedSemanticClassifications(typeChecker, cancellationToken, sourceFile, classifiableNames, span));
457    }
458
459    function checkForClassificationCancellation(cancellationToken: CancellationToken, kind: SyntaxKind) {
460        // We don't want to actually call back into our host on every node to find out if we've
461        // been canceled.  That would be an enormous amount of chattyness, along with the all
462        // the overhead of marshalling the data to/from the host.  So instead we pick a few
463        // reasonable node kinds to bother checking on.  These node kinds represent high level
464        // constructs that we would expect to see commonly, but just at a far less frequent
465        // interval.
466        //
467        // For example, in checker.ts (around 750k) we only have around 600 of these constructs.
468        // That means we're calling back into the host around every 1.2k of the file we process.
469        // Lib.d.ts has similar numbers.
470        switch (kind) {
471            case SyntaxKind.ModuleDeclaration:
472            case SyntaxKind.ClassDeclaration:
473            case SyntaxKind.InterfaceDeclaration:
474            case SyntaxKind.FunctionDeclaration:
475            case SyntaxKind.ClassExpression:
476            case SyntaxKind.FunctionExpression:
477            case SyntaxKind.ArrowFunction:
478                cancellationToken.throwIfCancellationRequested();
479        }
480    }
481
482    /* @internal */
483    export function getEncodedSemanticClassifications(typeChecker: TypeChecker, cancellationToken: CancellationToken, sourceFile: SourceFile, classifiableNames: ReadonlySet<__String>, span: TextSpan): Classifications {
484        const spans: number[] = [];
485        sourceFile.forEachChild(function cb(node: Node): void {
486            // Only walk into nodes that intersect the requested span.
487            if (!node || !textSpanIntersectsWith(span, node.pos, node.getFullWidth())) {
488                return;
489            }
490
491            checkForClassificationCancellation(cancellationToken, node.kind);
492            // Only bother calling into the typechecker if this is an identifier that
493            // could possibly resolve to a type name.  This makes classification run
494            // in a third of the time it would normally take.
495            if (isIdentifier(node) && !nodeIsMissing(node) && classifiableNames.has(node.escapedText)) {
496                const symbol = typeChecker.getSymbolAtLocation(node);
497                const type = symbol && classifySymbol(symbol, getMeaningFromLocation(node), typeChecker);
498                if (type) {
499                    pushClassification(node.getStart(sourceFile), node.getEnd(), type);
500                }
501            }
502
503            node.forEachChild(cb);
504        });
505        return { spans, endOfLineState: EndOfLineState.None };
506
507        function pushClassification(start: number, end: number, type: ClassificationType): void {
508            const length = end - start;
509            Debug.assert(length > 0, `Classification had non-positive length of ${length}`);
510            spans.push(start);
511            spans.push(length);
512            spans.push(type);
513        }
514    }
515
516    function classifySymbol(symbol: Symbol, meaningAtPosition: SemanticMeaning, checker: TypeChecker): ClassificationType | undefined {
517        const flags = symbol.getFlags();
518        if ((flags & SymbolFlags.Classifiable) === SymbolFlags.None) {
519            return undefined;
520        }
521        else if (flags & SymbolFlags.Class) {
522            return ClassificationType.className;
523        }
524        else if (flags & SymbolFlags.Enum) {
525            return ClassificationType.enumName;
526        }
527        else if (flags & SymbolFlags.TypeAlias) {
528            return ClassificationType.typeAliasName;
529        }
530        else if (flags & SymbolFlags.Module) {
531            // Only classify a module as such if
532            //  - It appears in a namespace context.
533            //  - There exists a module declaration which actually impacts the value side.
534            return meaningAtPosition & SemanticMeaning.Namespace || meaningAtPosition & SemanticMeaning.Value && hasValueSideModule(symbol) ? ClassificationType.moduleName : undefined;
535        }
536        else if (flags & SymbolFlags.Alias) {
537            return classifySymbol(checker.getAliasedSymbol(symbol), meaningAtPosition, checker);
538        }
539        else if (meaningAtPosition & SemanticMeaning.Type) {
540            return flags & SymbolFlags.Interface ? ClassificationType.interfaceName : flags & SymbolFlags.TypeParameter ? ClassificationType.typeParameterName : undefined;
541        }
542        else {
543            return undefined;
544        }
545    }
546
547    /** Returns true if there exists a module that introduces entities on the value side. */
548    function hasValueSideModule(symbol: Symbol): boolean {
549        return some(symbol.declarations, declaration =>
550            isModuleDeclaration(declaration) && getModuleInstanceState(declaration) === ModuleInstanceState.Instantiated);
551    }
552
553    function getClassificationTypeName(type: ClassificationType): ClassificationTypeNames {
554        switch (type) {
555            case ClassificationType.comment: return ClassificationTypeNames.comment;
556            case ClassificationType.identifier: return ClassificationTypeNames.identifier;
557            case ClassificationType.keyword: return ClassificationTypeNames.keyword;
558            case ClassificationType.numericLiteral: return ClassificationTypeNames.numericLiteral;
559            case ClassificationType.bigintLiteral: return ClassificationTypeNames.bigintLiteral;
560            case ClassificationType.operator: return ClassificationTypeNames.operator;
561            case ClassificationType.stringLiteral: return ClassificationTypeNames.stringLiteral;
562            case ClassificationType.whiteSpace: return ClassificationTypeNames.whiteSpace;
563            case ClassificationType.text: return ClassificationTypeNames.text;
564            case ClassificationType.punctuation: return ClassificationTypeNames.punctuation;
565            case ClassificationType.className: return ClassificationTypeNames.className;
566            case ClassificationType.enumName: return ClassificationTypeNames.enumName;
567            case ClassificationType.interfaceName: return ClassificationTypeNames.interfaceName;
568            case ClassificationType.moduleName: return ClassificationTypeNames.moduleName;
569            case ClassificationType.typeParameterName: return ClassificationTypeNames.typeParameterName;
570            case ClassificationType.typeAliasName: return ClassificationTypeNames.typeAliasName;
571            case ClassificationType.parameterName: return ClassificationTypeNames.parameterName;
572            case ClassificationType.docCommentTagName: return ClassificationTypeNames.docCommentTagName;
573            case ClassificationType.jsxOpenTagName: return ClassificationTypeNames.jsxOpenTagName;
574            case ClassificationType.jsxCloseTagName: return ClassificationTypeNames.jsxCloseTagName;
575            case ClassificationType.jsxSelfClosingTagName: return ClassificationTypeNames.jsxSelfClosingTagName;
576            case ClassificationType.jsxAttribute: return ClassificationTypeNames.jsxAttribute;
577            case ClassificationType.jsxText: return ClassificationTypeNames.jsxText;
578            case ClassificationType.jsxAttributeStringLiteralValue: return ClassificationTypeNames.jsxAttributeStringLiteralValue;
579            default: return undefined!; // TODO: GH#18217 throw Debug.assertNever(type);
580        }
581    }
582
583    function convertClassificationsToSpans(classifications: Classifications): ClassifiedSpan[] {
584        Debug.assert(classifications.spans.length % 3 === 0);
585        const dense = classifications.spans;
586        const result: ClassifiedSpan[] = [];
587        for (let i = 0; i < dense.length; i += 3) {
588            result.push({
589                textSpan: createTextSpan(dense[i], dense[i + 1]),
590                classificationType: getClassificationTypeName(dense[i + 2])
591            });
592        }
593
594        return result;
595    }
596
597    /* @internal */
598    export function getSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): ClassifiedSpan[] {
599        return convertClassificationsToSpans(getEncodedSyntacticClassifications(cancellationToken, sourceFile, span));
600    }
601
602    /* @internal */
603    export function getEncodedSyntacticClassifications(cancellationToken: CancellationToken, sourceFile: SourceFile, span: TextSpan): Classifications {
604        const spanStart = span.start;
605        const spanLength = span.length;
606
607        // Make a scanner we can get trivia from.
608        const triviaScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
609        const mergeConflictScanner = createScanner(ScriptTarget.Latest, /*skipTrivia*/ false, sourceFile.languageVariant, sourceFile.text);
610
611        const result: number[] = [];
612        processElement(sourceFile);
613
614        return { spans: result, endOfLineState: EndOfLineState.None };
615
616        function pushClassification(start: number, length: number, type: ClassificationType) {
617            result.push(start);
618            result.push(length);
619            result.push(type);
620        }
621
622        function classifyLeadingTriviaAndGetTokenStart(token: Node): number {
623            triviaScanner.setTextPos(token.pos);
624            while (true) {
625                const start = triviaScanner.getTextPos();
626                // only bother scanning if we have something that could be trivia.
627                if (!couldStartTrivia(sourceFile.text, start)) {
628                    return start;
629                }
630
631                const kind = triviaScanner.scan();
632                const end = triviaScanner.getTextPos();
633                const width = end - start;
634
635                // The moment we get something that isn't trivia, then stop processing.
636                if (!isTrivia(kind)) {
637                    return start;
638                }
639
640                switch (kind) {
641                    case SyntaxKind.NewLineTrivia:
642                    case SyntaxKind.WhitespaceTrivia:
643                        // Don't bother with newlines/whitespace.
644                        continue;
645
646                    case SyntaxKind.SingleLineCommentTrivia:
647                    case SyntaxKind.MultiLineCommentTrivia:
648                        // Only bother with the trivia if it at least intersects the span of interest.
649                        classifyComment(token, kind, start, width);
650
651                        // Classifying a comment might cause us to reuse the trivia scanner
652                        // (because of jsdoc comments).  So after we classify the comment make
653                        // sure we set the scanner position back to where it needs to be.
654                        triviaScanner.setTextPos(end);
655                        continue;
656
657                    case SyntaxKind.ConflictMarkerTrivia:
658                        const text = sourceFile.text;
659                        const ch = text.charCodeAt(start);
660
661                        // for the <<<<<<< and >>>>>>> markers, we just add them in as comments
662                        // in the classification stream.
663                        if (ch === CharacterCodes.lessThan || ch === CharacterCodes.greaterThan) {
664                            pushClassification(start, width, ClassificationType.comment);
665                            continue;
666                        }
667
668                        // for the ||||||| and ======== markers, add a comment for the first line,
669                        // and then lex all subsequent lines up until the end of the conflict marker.
670                        Debug.assert(ch === CharacterCodes.bar || ch === CharacterCodes.equals);
671                        classifyDisabledMergeCode(text, start, end);
672                        break;
673
674                    case SyntaxKind.ShebangTrivia:
675                        // TODO: Maybe we should classify these.
676                        break;
677
678                    default:
679                        Debug.assertNever(kind);
680                }
681            }
682        }
683
684        function classifyComment(token: Node, kind: SyntaxKind, start: number, width: number) {
685            if (kind === SyntaxKind.MultiLineCommentTrivia) {
686                // See if this is a doc comment.  If so, we'll classify certain portions of it
687                // specially.
688                const docCommentAndDiagnostics = parseIsolatedJSDocComment(sourceFile.text, start, width);
689                if (docCommentAndDiagnostics && docCommentAndDiagnostics.jsDoc) {
690                    // TODO: This should be predicated on `token["kind"]` being compatible with `HasJSDoc["kind"]`
691                    setParent(docCommentAndDiagnostics.jsDoc, token as HasJSDoc);
692                    classifyJSDocComment(docCommentAndDiagnostics.jsDoc);
693                    return;
694                }
695            }
696            else if (kind === SyntaxKind.SingleLineCommentTrivia) {
697                if (tryClassifyTripleSlashComment(start, width)) {
698                    return;
699                }
700            }
701
702            // Simple comment.  Just add as is.
703            pushCommentRange(start, width);
704        }
705
706        function pushCommentRange(start: number, width: number) {
707            pushClassification(start, width, ClassificationType.comment);
708        }
709
710        function classifyJSDocComment(docComment: JSDoc) {
711            let pos = docComment.pos;
712
713            if (docComment.tags) {
714                for (const tag of docComment.tags) {
715                    // As we walk through each tag, classify the portion of text from the end of
716                    // the last tag (or the start of the entire doc comment) as 'comment'.
717                    if (tag.pos !== pos) {
718                        pushCommentRange(pos, tag.pos - pos);
719                    }
720
721                    pushClassification(tag.pos, 1, ClassificationType.punctuation); // "@"
722                    pushClassification(tag.tagName.pos, tag.tagName.end - tag.tagName.pos, ClassificationType.docCommentTagName); // e.g. "param"
723
724                    pos = tag.tagName.end;
725
726                    switch (tag.kind) {
727                        case SyntaxKind.JSDocParameterTag:
728                            processJSDocParameterTag(<JSDocParameterTag>tag);
729                            break;
730                        case SyntaxKind.JSDocTemplateTag:
731                            processJSDocTemplateTag(<JSDocTemplateTag>tag);
732                            pos = tag.end;
733                            break;
734                        case SyntaxKind.JSDocTypeTag:
735                            processElement((<JSDocTypeTag>tag).typeExpression);
736                            pos = tag.end;
737                            break;
738                        case SyntaxKind.JSDocReturnTag:
739                            processElement((<JSDocReturnTag>tag).typeExpression);
740                            pos = tag.end;
741                            break;
742                    }
743                }
744            }
745
746            if (pos !== docComment.end) {
747                pushCommentRange(pos, docComment.end - pos);
748            }
749
750            return;
751
752            function processJSDocParameterTag(tag: JSDocParameterTag) {
753                if (tag.isNameFirst) {
754                    pushCommentRange(pos, tag.name.pos - pos);
755                    pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
756                    pos = tag.name.end;
757                }
758
759                if (tag.typeExpression) {
760                    pushCommentRange(pos, tag.typeExpression.pos - pos);
761                    processElement(tag.typeExpression);
762                    pos = tag.typeExpression.end;
763                }
764
765                if (!tag.isNameFirst) {
766                    pushCommentRange(pos, tag.name.pos - pos);
767                    pushClassification(tag.name.pos, tag.name.end - tag.name.pos, ClassificationType.parameterName);
768                    pos = tag.name.end;
769                }
770            }
771        }
772
773        function tryClassifyTripleSlashComment(start: number, width: number): boolean {
774            const tripleSlashXMLCommentRegEx = /^(\/\/\/\s*)(<)(?:(\S+)((?:[^/]|\/[^>])*)(\/>)?)?/im;
775            const attributeRegex = /(\S+)(\s*)(=)(\s*)('[^']+'|"[^"]+")/img;
776
777            const text = sourceFile.text.substr(start, width);
778            const match = tripleSlashXMLCommentRegEx.exec(text);
779            if (!match) {
780                return false;
781            }
782
783            // Limiting classification to exactly the elements and attributes
784            // defined in `ts.commentPragmas` would be excessive, but we can avoid
785            // some obvious false positives (e.g. in XML-like doc comments) by
786            // checking the element name.
787            // eslint-disable-next-line no-in-operator
788            if (!match[3] || !(match[3] in commentPragmas)) {
789                return false;
790            }
791
792            let pos = start;
793
794            pushCommentRange(pos, match[1].length); // ///
795            pos += match[1].length;
796
797            pushClassification(pos, match[2].length, ClassificationType.punctuation); // <
798            pos += match[2].length;
799
800            pushClassification(pos, match[3].length, ClassificationType.jsxSelfClosingTagName); // element name
801            pos += match[3].length;
802
803            const attrText = match[4];
804            let attrPos = pos;
805            while (true) {
806                const attrMatch = attributeRegex.exec(attrText);
807                if (!attrMatch) {
808                    break;
809                }
810
811                const newAttrPos = pos + attrMatch.index;
812                if (newAttrPos > attrPos) {
813                    pushCommentRange(attrPos, newAttrPos - attrPos);
814                    attrPos = newAttrPos;
815                }
816
817                pushClassification(attrPos, attrMatch[1].length, ClassificationType.jsxAttribute); // attribute name
818                attrPos += attrMatch[1].length;
819
820                if (attrMatch[2].length) {
821                    pushCommentRange(attrPos, attrMatch[2].length); // whitespace
822                    attrPos += attrMatch[2].length;
823                }
824
825                pushClassification(attrPos, attrMatch[3].length, ClassificationType.operator); // =
826                attrPos += attrMatch[3].length;
827
828                if (attrMatch[4].length) {
829                    pushCommentRange(attrPos, attrMatch[4].length); // whitespace
830                    attrPos += attrMatch[4].length;
831                }
832
833                pushClassification(attrPos, attrMatch[5].length, ClassificationType.jsxAttributeStringLiteralValue); // attribute value
834                attrPos += attrMatch[5].length;
835            }
836
837            pos += match[4].length;
838
839            if (pos > attrPos) {
840                pushCommentRange(attrPos, pos - attrPos);
841            }
842
843            if (match[5]) {
844                pushClassification(pos, match[5].length, ClassificationType.punctuation); // />
845                pos += match[5].length;
846            }
847
848            const end = start + width;
849            if (pos < end) {
850                pushCommentRange(pos, end - pos);
851            }
852
853            return true;
854        }
855
856        function processJSDocTemplateTag(tag: JSDocTemplateTag) {
857            for (const child of tag.getChildren()) {
858                processElement(child);
859            }
860        }
861
862        function classifyDisabledMergeCode(text: string, start: number, end: number) {
863            // Classify the line that the ||||||| or ======= marker is on as a comment.
864            // Then just lex all further tokens and add them to the result.
865            let i: number;
866            for (i = start; i < end; i++) {
867                if (isLineBreak(text.charCodeAt(i))) {
868                    break;
869                }
870            }
871            pushClassification(start, i - start, ClassificationType.comment);
872            mergeConflictScanner.setTextPos(i);
873
874            while (mergeConflictScanner.getTextPos() < end) {
875                classifyDisabledCodeToken();
876            }
877        }
878
879        function classifyDisabledCodeToken() {
880            const start = mergeConflictScanner.getTextPos();
881            const tokenKind = mergeConflictScanner.scan();
882            const end = mergeConflictScanner.getTextPos();
883
884            const type = classifyTokenType(tokenKind);
885            if (type) {
886                pushClassification(start, end - start, type);
887            }
888        }
889
890        /**
891         * Returns true if node should be treated as classified and no further processing is required.
892         * False will mean that node is not classified and traverse routine should recurse into node contents.
893         */
894        function tryClassifyNode(node: Node): boolean {
895            if (isJSDoc(node)) {
896                return true;
897            }
898
899            if (nodeIsMissing(node)) {
900                return true;
901            }
902
903            const classifiedElementName = tryClassifyJsxElementName(node);
904            if (!isToken(node) && node.kind !== SyntaxKind.JsxText && classifiedElementName === undefined) {
905                return false;
906            }
907
908            const tokenStart = node.kind === SyntaxKind.JsxText ? node.pos : classifyLeadingTriviaAndGetTokenStart(node);
909
910            const tokenWidth = node.end - tokenStart;
911            Debug.assert(tokenWidth >= 0);
912            if (tokenWidth > 0) {
913                const type = classifiedElementName || classifyTokenType(node.kind, node);
914                if (type) {
915                    pushClassification(tokenStart, tokenWidth, type);
916                }
917            }
918
919            return true;
920        }
921
922        function tryClassifyJsxElementName(token: Node): ClassificationType | undefined {
923            switch (token.parent && token.parent.kind) {
924                case SyntaxKind.JsxOpeningElement:
925                    if ((<JsxOpeningElement>token.parent).tagName === token) {
926                        return ClassificationType.jsxOpenTagName;
927                    }
928                    break;
929                case SyntaxKind.JsxClosingElement:
930                    if ((<JsxClosingElement>token.parent).tagName === token) {
931                        return ClassificationType.jsxCloseTagName;
932                    }
933                    break;
934                case SyntaxKind.JsxSelfClosingElement:
935                    if ((<JsxSelfClosingElement>token.parent).tagName === token) {
936                        return ClassificationType.jsxSelfClosingTagName;
937                    }
938                    break;
939                case SyntaxKind.JsxAttribute:
940                    if ((<JsxAttribute>token.parent).name === token) {
941                        return ClassificationType.jsxAttribute;
942                    }
943                    break;
944            }
945            return undefined;
946        }
947
948        // for accurate classification, the actual token should be passed in.  however, for
949        // cases like 'disabled merge code' classification, we just get the token kind and
950        // classify based on that instead.
951        function classifyTokenType(tokenKind: SyntaxKind, token?: Node): ClassificationType | undefined {
952            if (isKeyword(tokenKind)) {
953                return ClassificationType.keyword;
954            }
955
956            // Special case `<` and `>`: If they appear in a generic context they are punctuation,
957            // not operators.
958            if (tokenKind === SyntaxKind.LessThanToken || tokenKind === SyntaxKind.GreaterThanToken) {
959                // If the node owning the token has a type argument list or type parameter list, then
960                // we can effectively assume that a '<' and '>' belong to those lists.
961                if (token && getTypeArgumentOrTypeParameterList(token.parent)) {
962                    return ClassificationType.punctuation;
963                }
964            }
965
966            if (isPunctuation(tokenKind)) {
967                if (token) {
968                    const parent = token.parent;
969                    if (tokenKind === SyntaxKind.EqualsToken) {
970                        // the '=' in a variable declaration is special cased here.
971                        if (parent.kind === SyntaxKind.VariableDeclaration ||
972                            parent.kind === SyntaxKind.PropertyDeclaration ||
973                            parent.kind === SyntaxKind.Parameter ||
974                            parent.kind === SyntaxKind.JsxAttribute) {
975                            return ClassificationType.operator;
976                        }
977                    }
978
979                    if (parent.kind === SyntaxKind.BinaryExpression ||
980                        parent.kind === SyntaxKind.PrefixUnaryExpression ||
981                        parent.kind === SyntaxKind.PostfixUnaryExpression ||
982                        parent.kind === SyntaxKind.ConditionalExpression) {
983                        return ClassificationType.operator;
984                    }
985                }
986
987                return ClassificationType.punctuation;
988            }
989            else if (tokenKind === SyntaxKind.NumericLiteral) {
990                return ClassificationType.numericLiteral;
991            }
992            else if (tokenKind === SyntaxKind.BigIntLiteral) {
993                return ClassificationType.bigintLiteral;
994            }
995            else if (tokenKind === SyntaxKind.StringLiteral) {
996                return token && token.parent.kind === SyntaxKind.JsxAttribute ? ClassificationType.jsxAttributeStringLiteralValue : ClassificationType.stringLiteral;
997            }
998            else if (tokenKind === SyntaxKind.RegularExpressionLiteral) {
999                // TODO: we should get another classification type for these literals.
1000                return ClassificationType.stringLiteral;
1001            }
1002            else if (isTemplateLiteralKind(tokenKind)) {
1003                // TODO (drosen): we should *also* get another classification type for these literals.
1004                return ClassificationType.stringLiteral;
1005            }
1006            else if (tokenKind === SyntaxKind.JsxText) {
1007                return ClassificationType.jsxText;
1008            }
1009            else if (tokenKind === SyntaxKind.Identifier) {
1010                if (token) {
1011                    switch (token.parent.kind) {
1012                        case SyntaxKind.ClassDeclaration:
1013                            if ((<ClassDeclaration>token.parent).name === token) {
1014                                return ClassificationType.className;
1015                            }
1016                            return;
1017                        case SyntaxKind.TypeParameter:
1018                            if ((<TypeParameterDeclaration>token.parent).name === token) {
1019                                return ClassificationType.typeParameterName;
1020                            }
1021                            return;
1022                        case SyntaxKind.InterfaceDeclaration:
1023                            if ((<InterfaceDeclaration>token.parent).name === token) {
1024                                return ClassificationType.interfaceName;
1025                            }
1026                            return;
1027                        case SyntaxKind.EnumDeclaration:
1028                            if ((<EnumDeclaration>token.parent).name === token) {
1029                                return ClassificationType.enumName;
1030                            }
1031                            return;
1032                        case SyntaxKind.ModuleDeclaration:
1033                            if ((<ModuleDeclaration>token.parent).name === token) {
1034                                return ClassificationType.moduleName;
1035                            }
1036                            return;
1037                        case SyntaxKind.Parameter:
1038                            if ((<ParameterDeclaration>token.parent).name === token) {
1039                                return isThisIdentifier(token) ? ClassificationType.keyword : ClassificationType.parameterName;
1040                            }
1041                            return;
1042                    }
1043                }
1044                return ClassificationType.identifier;
1045            }
1046        }
1047
1048        function processElement(element: Node | undefined) {
1049            if (!element) {
1050                return;
1051            }
1052
1053            // Ignore nodes that don't intersect the original span to classify.
1054            if (decodedTextSpanIntersectsWith(spanStart, spanLength, element.pos, element.getFullWidth())) {
1055                checkForClassificationCancellation(cancellationToken, element.kind);
1056
1057                for (const child of element.getChildren(sourceFile)) {
1058                    if (!tryClassifyNode(child)) {
1059                        // Recurse into our child nodes.
1060                        processElement(child);
1061                    }
1062                }
1063            }
1064        }
1065    }
1066}
1067