1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "classifier.h"
17 #include <cstddef>
18 #include <vector>
19 #include "checker/ETSchecker.h"
20 #include "checker/checker.h"
21 #include "compiler/lowering/util.h"
22 #include "generated/tokenType.h"
23 #include "internal_api.h"
24 #include "ir/astNode.h"
25 #include "lexer/lexer.h"
26 #include "macros.h"
27 #include "mem/arena_allocator.h"
28 #include "public/es2panda_lib.h"
29 #include "public/public.h"
30 #include "utils/arena_containers.h"
31 #include "varbinder/declaration.h"
32
33 namespace ark::es2panda::lsp {
34
InitLexer(es2panda_Context * context)35 std::unique_ptr<lexer::Lexer> InitLexer(es2panda_Context *context)
36 {
37 auto ctx = reinterpret_cast<public_lib::Context *>(context);
38 auto parserContext = parser::ParserContext(ctx->parserProgram, parser::ParserStatus::NO_OPTS);
39 std::unique_ptr<lexer::Lexer> lexer = std::make_unique<lexer::Lexer>(&parserContext, *ctx->diagnosticEngine);
40 return lexer;
41 }
42
AstNodeTypeToClassificationType(ir::AstNodeType type)43 ClassificationType AstNodeTypeToClassificationType(ir::AstNodeType type)
44 {
45 switch (type) {
46 case ir::AstNodeType::CLASS_DEFINITION:
47 return ClassificationType::CLASS_NAME;
48 case ir::AstNodeType::TS_TYPE_PARAMETER:
49 return ClassificationType::TYPE_PARAMETER_NAME;
50 case ir::AstNodeType::TS_INTERFACE_DECLARATION:
51 return ClassificationType::INTERFACE_NAME;
52 case ir::AstNodeType::TS_ENUM_DECLARATION:
53 return ClassificationType::ENUM_NAME;
54 case ir::AstNodeType::ETS_PARAMETER_EXPRESSION:
55 return ClassificationType::PARAMETER_NAME;
56 case ir::AstNodeType::TS_TYPE_ALIAS_DECLARATION:
57 return ClassificationType::TYPE_ALIAS_NAME;
58 default:
59 return ClassificationType::IDENTIFIER;
60 }
61 }
62
GetClassificationType(const lexer::Token & token,ir::AstNode * astNode)63 ClassificationType GetClassificationType(const lexer::Token &token, ir::AstNode *astNode)
64 {
65 if (token.IsPunctuatorToken(token.Type())) {
66 return ClassificationType::PUNCTUATION;
67 }
68
69 if (token.Type() == lexer::TokenType::LITERAL_NUMBER) {
70 return ClassificationType::NUMERIC_LITERAL;
71 }
72
73 if (token.Type() == lexer::TokenType::LITERAL_STRING || token.Type() == lexer::TokenType::LITERAL_REGEXP) {
74 return ClassificationType::STRING_LITERAL;
75 }
76
77 if (token.Type() == lexer::TokenType::LITERAL_TRUE || token.Type() == lexer::TokenType::LITERAL_FALSE) {
78 return ClassificationType::BOOLEAN_LITERAL;
79 }
80
81 if (token.Type() == lexer::TokenType::LITERAL_NULL) {
82 return ClassificationType::NULL_LITERAL;
83 }
84
85 // don't use token.IsKeyword() here because token.type_ is LITERAL_IDENT for "keyword_like" keywords
86 if (token.KeywordType() >= lexer::TokenType::FIRST_KEYW) {
87 return ClassificationType::KEYWORD;
88 }
89
90 if (token.Type() == lexer::TokenType::LITERAL_IDENT) {
91 if (astNode == nullptr) {
92 return ClassificationType::IDENTIFIER;
93 }
94 auto parentNode = astNode->Parent();
95 return AstNodeTypeToClassificationType(parentNode->Type());
96 }
97
98 UNREACHABLE();
99 }
100
ClassificationTypeToString(ClassificationType type)101 char const *ClassificationTypeToString(ClassificationType type)
102 {
103 switch (type) {
104 case ClassificationType::IDENTIFIER:
105 return "identifier";
106 case ClassificationType::KEYWORD:
107 return "keyword";
108 case ClassificationType::NUMERIC_LITERAL:
109 return "number";
110 case ClassificationType::STRING_LITERAL:
111 return "string";
112 case ClassificationType::BOOLEAN_LITERAL:
113 return "boolean";
114 case ClassificationType::NULL_LITERAL:
115 return "null";
116 case ClassificationType::PUNCTUATION:
117 return "punctuation";
118 case ClassificationType::CLASS_NAME:
119 return "class name";
120 case ClassificationType::ENUM_NAME:
121 return "enum name";
122 case ClassificationType::INTERFACE_NAME:
123 return "interface name";
124 case ClassificationType::TYPE_PARAMETER_NAME:
125 return "type parameter name";
126 case ClassificationType::TYPE_ALIAS_NAME:
127 return "type alias name";
128 case ClassificationType::PARAMETER_NAME:
129 return "parameter name";
130 default:
131 return "";
132 }
133 }
134
GetSyntacticClassifications(es2panda_Context * context,size_t startPos,size_t length)135 ArenaVector<ClassifiedSpan *> GetSyntacticClassifications(es2panda_Context *context, size_t startPos, size_t length)
136 {
137 auto lexer = InitLexer(context);
138 auto ctx = reinterpret_cast<public_lib::Context *>(context);
139 auto result = ArenaVector<ClassifiedSpan *>(ctx->allocator->Adapter());
140 lexer->NextToken();
141 while (lexer->GetToken().Type() != lexer::TokenType::EOS) {
142 ir::AstNode *currentNode = nullptr;
143 auto currentToken = lexer->GetToken();
144 if (currentToken.Type() == lexer::TokenType::LITERAL_IDENT) {
145 currentNode = GetTouchingToken(context, currentToken.Start().index, false);
146 }
147
148 if (currentToken.End().index > startPos + length) {
149 break;
150 }
151
152 if (currentToken.Start().index >= startPos && currentToken.End().index <= startPos + length) {
153 size_t tokenStart = currentToken.Start().index;
154 size_t tokenLength = currentToken.End().index - currentToken.Start().index;
155 auto classificationType = GetClassificationType(currentToken, currentNode);
156 auto name = ClassificationTypeToString(classificationType);
157 auto classifiedSpan = ctx->allocator->New<ClassifiedSpan>();
158 classifiedSpan->start = tokenStart;
159 classifiedSpan->length = tokenLength;
160 classifiedSpan->name = name;
161 result.push_back(classifiedSpan);
162 }
163 lexer->NextToken();
164 }
165 return result;
166 }
167
GetDecls(ir::AstNode * astNode)168 std::unordered_map<std::string, ir::AstNode *> GetDecls(ir::AstNode *astNode)
169 {
170 auto decls = astNode->Scope()->Decls();
171 std::unordered_map<std::string, ir::AstNode *> declNames = {};
172 for (auto decl : decls) {
173 auto name = std::string(decl->Name());
174 auto node = decl->Node();
175 // After enum refactoring, enum declaration is transformed to a class declaration
176 if (node != nullptr && compiler::ClassDefinitionIsEnumTransformed(node)) {
177 node = node->AsClassDefinition()->OrigEnumDecl()->AsTSEnumDeclaration();
178 }
179 declNames[name] = node;
180 }
181 return declNames;
182 }
183
GetSemanticClassifications(es2panda_Context * context,size_t startPos,size_t length)184 ArenaVector<ClassifiedSpan *> GetSemanticClassifications(es2panda_Context *context, size_t startPos, size_t length)
185 {
186 auto ctx = reinterpret_cast<public_lib::Context *>(context);
187 auto ast = reinterpret_cast<ir::AstNode *>(ctx->parserProgram->Ast());
188 auto result = ArenaVector<ClassifiedSpan *>(ctx->allocator->Adapter());
189
190 auto decls = GetDecls(ast);
191 auto lexer = InitLexer(context);
192 lexer->NextToken();
193 while (lexer->GetToken().Type() != lexer::TokenType::EOS) {
194 ir::AstNode *currentNode = nullptr;
195 auto currentToken = lexer->GetToken();
196 if (currentToken.Type() == lexer::TokenType::LITERAL_IDENT) {
197 currentNode = GetTouchingToken(context, currentToken.Start().index, false);
198 }
199
200 if (currentToken.End().index > startPos + length) {
201 break;
202 }
203
204 if (currentToken.Start().index >= startPos && currentToken.End().index <= startPos + length) {
205 size_t tokenStart = currentToken.Start().index;
206 size_t tokenLength = currentToken.End().index - currentToken.Start().index;
207 auto classificationType = GetClassificationType(currentToken, currentNode);
208
209 auto tokenName = std::string(currentToken.Ident());
210 if (classificationType == ClassificationType::IDENTIFIER && decls.count(tokenName) != 0) {
211 auto classifiedSpan = ctx->allocator->New<ClassifiedSpan>();
212 classifiedSpan->start = tokenStart;
213 classifiedSpan->length = tokenLength;
214 auto type = AstNodeTypeToClassificationType(decls[tokenName]->Type());
215 classifiedSpan->name = ClassificationTypeToString(type);
216 result.push_back(classifiedSpan);
217 }
218 }
219 lexer->NextToken();
220 }
221 return result;
222 }
223
224 } // namespace ark::es2panda::lsp