1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <regex>
17 #include "internal_api.h"
18 #include "todo_comments.h"
19 #include "lexer/token/letters.h"
20 #include "public/public.h"
21
22 namespace {
IsNodeModulesFile(const std::string_view & path)23 bool IsNodeModulesFile(const std::string_view &path)
24 {
25 return path.find("/node_modules/") != std::string::npos;
26 }
27
IsOHModulesFile(const std::string_view & path)28 bool IsOHModulesFile(const std::string_view &path)
29 {
30 return path.find("/oh_modules/") != std::string::npos;
31 }
32
IsLetterOrDigit(char32_t c)33 bool IsLetterOrDigit(char32_t c)
34 {
35 return (c >= ark::es2panda::lexer::LEX_CHAR_LOWERCASE_A && c <= ark::es2panda::lexer::LEX_CHAR_LOWERCASE_Z) ||
36 (c >= ark::es2panda::lexer::LEX_CHAR_UPPERCASE_A && c <= ark::es2panda::lexer::LEX_CHAR_UPPERCASE_Z) ||
37 (c >= ark::es2panda::lexer::LEX_CHAR_0 && c <= ark::es2panda::lexer::LEX_CHAR_9);
38 }
39
40 // Function to escape regex special characters
EscapeRegExp(const std::string & str)41 std::string EscapeRegExp(const std::string &str)
42 {
43 std::string escaped;
44 for (char c : str) {
45 if (std::string("-[]/{}()*+?.\\^$|").find(c) != std::string::npos) {
46 escaped += '\\'; // Escape special characters
47 }
48 escaped += c;
49 }
50 return escaped;
51 }
52
GetTodoCommentsRegExp(const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> & descriptors)53 std::regex GetTodoCommentsRegExp(const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> &descriptors)
54 {
55 // Single-line comments: // TO-DO or //// TO-DO
56 std::string singleLineCommentStart = R"((?:\/\/+\s*))";
57
58 // Multi-line comment start: /* TO-DO or /** TO-DO
59 std::string multiLineCommentStart = R"((?:\/\*+\s*))";
60
61 // Any number of spaces or `*` at the start of a line (for block comments)
62 std::string anyNumberOfSpacesAndAsterisksAtStartOfLine = R"((?:^(?:\s|\*)*))";
63
64 // Match any of the comment start patterns
65 std::string preamble = "(" + singleLineCommentStart + "|" + multiLineCommentStart + "|" +
66 anyNumberOfSpacesAndAsterisksAtStartOfLine + ")";
67
68 /*
69 * This comments includes commonly flagged descriptors such as "TO-DO", "FIX-ME", "NOTE", "HACK", "FIX", "WARNING".
70 * A regex is created to identify these patterns intentionally.
71 */
72 std::vector<std::string> literalGroups;
73 literalGroups.reserve(descriptors.size());
74 for (const auto &d : descriptors) {
75 literalGroups.push_back("(" + EscapeRegExp(d.GetText()) + ")");
76 }
77
78 // Join the literal groups with '|'
79 std::string literals;
80 for (size_t i = 0; i < literalGroups.size(); ++i) {
81 if (i > 0) {
82 literals += "|";
83 }
84 literals += literalGroups[i];
85 }
86 literals = "(?:" + literals + ")";
87
88 // Match the remainder of the line (up to the end of line or block comment end `*/`)
89 std::string messageRemainder = R"((?:.*?))";
90 std::string endOfLineOrEndOfComment = R"((?:$|\*\/))";
91
92 // Final regex string
93 std::string regExpString = preamble + "(" + literals + messageRemainder + ")" + endOfLineOrEndOfComment;
94
95 // Return compiled regex (case insensitive only)
96 return std::regex(regExpString, std::regex_constants::icase);
97 }
98
SplitLines(const std::string_view & input)99 std::vector<std::string> SplitLines(const std::string_view &input)
100 {
101 std::vector<std::string> lines;
102 size_t pos = 0;
103 size_t newLinePos = 0;
104
105 while ((newLinePos = input.find('\n', pos)) != std::string_view::npos) {
106 lines.emplace_back(input.substr(pos, newLinePos - pos));
107 pos = newLinePos + 1;
108 }
109
110 // Add the last line if there's content after the last newline
111 if (pos < input.length()) {
112 lines.emplace_back(input.substr(pos));
113 }
114
115 return lines;
116 }
117
118 // Helper function to find the correct descriptor
FindMatchedDescriptor(const std::cmatch & match,const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> & descriptors,size_t & firstDescriptorCaptureIndex)119 const ark::es2panda::lsp::TodoCommentDescriptor *FindMatchedDescriptor(
120 const std::cmatch &match, const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> &descriptors,
121 size_t &firstDescriptorCaptureIndex)
122 {
123 for (size_t i = 0; i < descriptors.size(); i++) {
124 if (match[i + firstDescriptorCaptureIndex].matched) {
125 return &descriptors[i];
126 }
127 }
128 return nullptr;
129 }
130
ExtractAndCleanMessage(const std::string & rawMessage,const std::string & preamble)131 std::string ExtractAndCleanMessage(const std::string &rawMessage, const std::string &preamble)
132 {
133 std::string message = rawMessage;
134
135 // For block comments, strip leading asterisks if present
136 if (message.find('*') != std::string::npos &&
137 (preamble.find("/*") != std::string::npos || preamble.find('*') != std::string::npos)) {
138 // This is a block comment - clean up asterisks
139 size_t firstNonAsterisk = message.find_first_not_of("* \t");
140 if (firstNonAsterisk != std::string::npos) {
141 message = message.substr(firstNonAsterisk);
142 }
143 }
144
145 return message;
146 }
147
ProcessMatchedTodo(const ark::es2panda::lsp::TodoMatchContext & ctx,const std::cmatch & match)148 bool ProcessMatchedTodo(const ark::es2panda::lsp::TodoMatchContext &ctx, const std::cmatch &match)
149 {
150 const size_t preambleIndex = 1;
151 size_t firstDescriptorCaptureIndex = 3;
152 const size_t messageIndex = 2;
153
154 // Find which descriptor matched
155 const ark::es2panda::lsp::TodoCommentDescriptor *descriptor =
156 FindMatchedDescriptor(match, ctx.descriptors, firstDescriptorCaptureIndex);
157
158 if (descriptor == nullptr) {
159 return false;
160 }
161
162 std::string preamble = match[preambleIndex].str();
163
164 // Calculate absolute position in the file
165 size_t matchPositionInLine = std::distance(ctx.line->c_str(), match[0].first);
166 size_t matchPosition = ctx.lineStart + matchPositionInLine;
167 size_t descriptorPosition = matchPosition + preamble.length();
168
169 // We don't want to match something like 'TODOBY'
170 size_t afterTodoPos = descriptorPosition + descriptor->GetText().length();
171 if (afterTodoPos < ctx.fileContents.length() && IsLetterOrDigit(ctx.fileContents[afterTodoPos])) {
172 return false;
173 }
174
175 // Verify this is in a comment
176 if (ark::es2panda::lsp::GetTouchingToken(ctx.context, descriptorPosition, true) != nullptr) {
177 return false;
178 }
179
180 std::string message = ExtractAndCleanMessage(match[messageIndex].str(), preamble);
181
182 ctx.result.emplace_back(*descriptor, message, descriptorPosition);
183 return true;
184 }
185 } // namespace
186
187 namespace ark::es2panda::lsp {
GetTodoCommentsImpl(es2panda_Context * context,std::vector<ark::es2panda::lsp::TodoCommentDescriptor> & descriptors,CancellationToken * cancellationToken)188 std::vector<ark::es2panda::lsp::TodoComment> GetTodoCommentsImpl(
189 es2panda_Context *context, std::vector<ark::es2panda::lsp::TodoCommentDescriptor> &descriptors,
190 CancellationToken *cancellationToken)
191 {
192 auto ctx = reinterpret_cast<public_lib::Context *>(context);
193
194 if (cancellationToken->IsCancellationRequested()) {
195 return {};
196 }
197
198 if (descriptors.empty() || IsNodeModulesFile(ctx->sourceFile->filePath) ||
199 IsOHModulesFile(ctx->sourceFile->filePath)) {
200 return {};
201 }
202
203 auto fileContents = ctx->sourceFile->source;
204 std::vector<TodoComment> result;
205
206 // Split the file content into lines to handle line-by-line processing
207 std::vector<std::string> lines = SplitLines(fileContents);
208 std::regex regExp = GetTodoCommentsRegExp(descriptors);
209
210 TodoMatchContext matchContext = {context, descriptors, 0, nullptr, fileContents, result};
211
212 size_t lineStart = 0;
213 for (const auto &line : lines) {
214 if (cancellationToken->IsCancellationRequested()) {
215 return {};
216 }
217
218 matchContext.lineStart = lineStart;
219 matchContext.line = &line;
220
221 std::cmatch match;
222 std::string_view lineView(line);
223 const char *lineData = lineView.data();
224 const char *lineEnd = lineData;
225 std::advance(lineEnd, lineView.size());
226
227 while (std::regex_search(lineData, lineEnd, match, regExp)) {
228 ProcessMatchedTodo(matchContext, match);
229 lineData = match.suffix().first;
230 }
231
232 // Move to next line (add +1 for newline character)
233 lineStart += lineView.size() + 1;
234 }
235
236 return result;
237 }
238 } // namespace ark::es2panda::lsp
239