• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <regex>
17 #include "internal_api.h"
18 #include "todo_comments.h"
19 #include "lexer/token/letters.h"
20 #include "public/public.h"
21 
22 namespace {
IsNodeModulesFile(const std::string_view & path)23 bool IsNodeModulesFile(const std::string_view &path)
24 {
25     return path.find("/node_modules/") != std::string::npos;
26 }
27 
IsOHModulesFile(const std::string_view & path)28 bool IsOHModulesFile(const std::string_view &path)
29 {
30     return path.find("/oh_modules/") != std::string::npos;
31 }
32 
IsLetterOrDigit(char32_t c)33 bool IsLetterOrDigit(char32_t c)
34 {
35     return (c >= ark::es2panda::lexer::LEX_CHAR_LOWERCASE_A && c <= ark::es2panda::lexer::LEX_CHAR_LOWERCASE_Z) ||
36            (c >= ark::es2panda::lexer::LEX_CHAR_UPPERCASE_A && c <= ark::es2panda::lexer::LEX_CHAR_UPPERCASE_Z) ||
37            (c >= ark::es2panda::lexer::LEX_CHAR_0 && c <= ark::es2panda::lexer::LEX_CHAR_9);
38 }
39 
40 // Function to escape regex special characters
EscapeRegExp(const std::string & str)41 std::string EscapeRegExp(const std::string &str)
42 {
43     std::string escaped;
44     for (char c : str) {
45         if (std::string("-[]/{}()*+?.\\^$|").find(c) != std::string::npos) {
46             escaped += '\\';  // Escape special characters
47         }
48         escaped += c;
49     }
50     return escaped;
51 }
52 
GetTodoCommentsRegExp(const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> & descriptors)53 std::regex GetTodoCommentsRegExp(const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> &descriptors)
54 {
55     // Single-line comments: // TO-DO or //// TO-DO
56     std::string singleLineCommentStart = R"((?:\/\/+\s*))";
57 
58     // Multi-line comment start: /* TO-DO or /** TO-DO
59     std::string multiLineCommentStart = R"((?:\/\*+\s*))";
60 
61     // Any number of spaces or `*` at the start of a line (for block comments)
62     std::string anyNumberOfSpacesAndAsterisksAtStartOfLine = R"((?:^(?:\s|\*)*))";
63 
64     // Match any of the comment start patterns
65     std::string preamble = "(" + singleLineCommentStart + "|" + multiLineCommentStart + "|" +
66                            anyNumberOfSpacesAndAsterisksAtStartOfLine + ")";
67 
68     /*
69      * This comments includes commonly flagged descriptors such as "TO-DO", "FIX-ME", "NOTE", "HACK", "FIX", "WARNING".
70      * A regex is created to identify these patterns intentionally.
71      */
72     std::vector<std::string> literalGroups;
73     literalGroups.reserve(descriptors.size());
74     for (const auto &d : descriptors) {
75         literalGroups.push_back("(" + EscapeRegExp(d.GetText()) + ")");
76     }
77 
78     // Join the literal groups with '|'
79     std::string literals;
80     for (size_t i = 0; i < literalGroups.size(); ++i) {
81         if (i > 0) {
82             literals += "|";
83         }
84         literals += literalGroups[i];
85     }
86     literals = "(?:" + literals + ")";
87 
88     // Match the remainder of the line (up to the end of line or block comment end `*/`)
89     std::string messageRemainder = R"((?:.*?))";
90     std::string endOfLineOrEndOfComment = R"((?:$|\*\/))";
91 
92     // Final regex string
93     std::string regExpString = preamble + "(" + literals + messageRemainder + ")" + endOfLineOrEndOfComment;
94 
95     // Return compiled regex (case insensitive only)
96     return std::regex(regExpString, std::regex_constants::icase);
97 }
98 
SplitLines(const std::string_view & input)99 std::vector<std::string> SplitLines(const std::string_view &input)
100 {
101     std::vector<std::string> lines;
102     size_t pos = 0;
103     size_t newLinePos = 0;
104 
105     while ((newLinePos = input.find('\n', pos)) != std::string_view::npos) {
106         lines.emplace_back(input.substr(pos, newLinePos - pos));
107         pos = newLinePos + 1;
108     }
109 
110     // Add the last line if there's content after the last newline
111     if (pos < input.length()) {
112         lines.emplace_back(input.substr(pos));
113     }
114 
115     return lines;
116 }
117 
118 // Helper function to find the correct descriptor
FindMatchedDescriptor(const std::cmatch & match,const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> & descriptors,size_t & firstDescriptorCaptureIndex)119 const ark::es2panda::lsp::TodoCommentDescriptor *FindMatchedDescriptor(
120     const std::cmatch &match, const std::vector<ark::es2panda::lsp::TodoCommentDescriptor> &descriptors,
121     size_t &firstDescriptorCaptureIndex)
122 {
123     for (size_t i = 0; i < descriptors.size(); i++) {
124         if (match[i + firstDescriptorCaptureIndex].matched) {
125             return &descriptors[i];
126         }
127     }
128     return nullptr;
129 }
130 
ExtractAndCleanMessage(const std::string & rawMessage,const std::string & preamble)131 std::string ExtractAndCleanMessage(const std::string &rawMessage, const std::string &preamble)
132 {
133     std::string message = rawMessage;
134 
135     // For block comments, strip leading asterisks if present
136     if (message.find('*') != std::string::npos &&
137         (preamble.find("/*") != std::string::npos || preamble.find('*') != std::string::npos)) {
138         // This is a block comment - clean up asterisks
139         size_t firstNonAsterisk = message.find_first_not_of("* \t");
140         if (firstNonAsterisk != std::string::npos) {
141             message = message.substr(firstNonAsterisk);
142         }
143     }
144 
145     return message;
146 }
147 
ProcessMatchedTodo(const ark::es2panda::lsp::TodoMatchContext & ctx,const std::cmatch & match)148 bool ProcessMatchedTodo(const ark::es2panda::lsp::TodoMatchContext &ctx, const std::cmatch &match)
149 {
150     const size_t preambleIndex = 1;
151     size_t firstDescriptorCaptureIndex = 3;
152     const size_t messageIndex = 2;
153 
154     // Find which descriptor matched
155     const ark::es2panda::lsp::TodoCommentDescriptor *descriptor =
156         FindMatchedDescriptor(match, ctx.descriptors, firstDescriptorCaptureIndex);
157 
158     if (descriptor == nullptr) {
159         return false;
160     }
161 
162     std::string preamble = match[preambleIndex].str();
163 
164     // Calculate absolute position in the file
165     size_t matchPositionInLine = std::distance(ctx.line->c_str(), match[0].first);
166     size_t matchPosition = ctx.lineStart + matchPositionInLine;
167     size_t descriptorPosition = matchPosition + preamble.length();
168 
169     // We don't want to match something like 'TODOBY'
170     size_t afterTodoPos = descriptorPosition + descriptor->GetText().length();
171     if (afterTodoPos < ctx.fileContents.length() && IsLetterOrDigit(ctx.fileContents[afterTodoPos])) {
172         return false;
173     }
174 
175     // Verify this is in a comment
176     if (ark::es2panda::lsp::GetTouchingToken(ctx.context, descriptorPosition, true) != nullptr) {
177         return false;
178     }
179 
180     std::string message = ExtractAndCleanMessage(match[messageIndex].str(), preamble);
181 
182     ctx.result.emplace_back(*descriptor, message, descriptorPosition);
183     return true;
184 }
185 }  // namespace
186 
187 namespace ark::es2panda::lsp {
GetTodoCommentsImpl(es2panda_Context * context,std::vector<ark::es2panda::lsp::TodoCommentDescriptor> & descriptors,CancellationToken * cancellationToken)188 std::vector<ark::es2panda::lsp::TodoComment> GetTodoCommentsImpl(
189     es2panda_Context *context, std::vector<ark::es2panda::lsp::TodoCommentDescriptor> &descriptors,
190     CancellationToken *cancellationToken)
191 {
192     auto ctx = reinterpret_cast<public_lib::Context *>(context);
193 
194     if (cancellationToken->IsCancellationRequested()) {
195         return {};
196     }
197 
198     if (descriptors.empty() || IsNodeModulesFile(ctx->sourceFile->filePath) ||
199         IsOHModulesFile(ctx->sourceFile->filePath)) {
200         return {};
201     }
202 
203     auto fileContents = ctx->sourceFile->source;
204     std::vector<TodoComment> result;
205 
206     // Split the file content into lines to handle line-by-line processing
207     std::vector<std::string> lines = SplitLines(fileContents);
208     std::regex regExp = GetTodoCommentsRegExp(descriptors);
209 
210     TodoMatchContext matchContext = {context, descriptors, 0, nullptr, fileContents, result};
211 
212     size_t lineStart = 0;
213     for (const auto &line : lines) {
214         if (cancellationToken->IsCancellationRequested()) {
215             return {};
216         }
217 
218         matchContext.lineStart = lineStart;
219         matchContext.line = &line;
220 
221         std::cmatch match;
222         std::string_view lineView(line);
223         const char *lineData = lineView.data();
224         const char *lineEnd = lineData;
225         std::advance(lineEnd, lineView.size());
226 
227         while (std::regex_search(lineData, lineEnd, match, regExp)) {
228             ProcessMatchedTodo(matchContext, match);
229             lineData = match.suffix().first;
230         }
231 
232         // Move to next line (add +1 for newline character)
233         lineStart += lineView.size() + 1;
234     }
235 
236     return result;
237 }
238 }  // namespace ark::es2panda::lsp
239