• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_BASE_STRING_HELP_H
17 #define ECMASCRIPT_BASE_STRING_HELP_H
18 
19 #include <algorithm>
20 #include <codecvt>
21 #include <locale>
22 #include <regex>
23 #include <sstream>
24 #include <string>
25 #include <vector>
26 
27 #include "ecmascript/base/utf_helper.h"
28 #include "ecmascript/mem/c_containers.h"
29 #include "ecmascript/mem/c_string.h"
30 
31 #include "securec.h"
32 #include "unicode/unistr.h"
33 
34 namespace panda::ecmascript::base {
35 // White Space Code Points and Line Terminators Code Point
36 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
37 static constexpr uint16_t SPACE_OR_LINE_TERMINAL[] = {
38     0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
39     0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF,
40 };
41 static constexpr int UICODE_FROM_UTF8[] = {
42     0x80, 0xc0, 0xdf, 0xe0, 0xef, 0xf0, 0xf7, 0xf8, 0xfb, 0xfc, 0xfd,
43 };
44 static constexpr int UTF8_MIN_CODE[] = {
45     0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
46 };
47 static constexpr char UTF8_FIRST_CODE[] = {
48     0x1f, 0xf, 0x7, 0x3, 0x1,
49 };
50 class StringHelper {
51 public:
52     static constexpr int INVALID_UNICODE_FROM_UTF8 = -1;
53 
ReplaceAll(CString str,const CString & oldValue,const CString & newValue)54     static inline CString ReplaceAll(CString str, const CString &oldValue,
55                                      const CString &newValue)
56     {
57         if (oldValue.empty() || oldValue == newValue) {
58             return str;
59         }
60         CString::size_type pos(0);
61         while ((pos = str.find(oldValue, pos)) != CString::npos) {
62             str.replace(pos, oldValue.length(), newValue);
63             pos += newValue.length();
64         }
65         return str;
66     }
67 
Replace(CString str,const CString & oldValue,const CString & newValue)68     static inline CString Replace(CString str, const CString &oldValue,
69                                   const CString &newValue)
70     {
71         if (oldValue.empty() || oldValue == newValue) {
72             return str;
73         }
74         CString::size_type pos(0);
75         if ((pos = str.find(oldValue, pos)) != CString::npos) {
76             str.replace(pos, oldValue.length(), newValue);
77         }
78         return str;
79     }
80 
Utf16ToU16String(const uint16_t * utf16Data,uint32_t dataLen)81     static inline std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen)
82     {
83         auto *char16tData = reinterpret_cast<const char16_t *>(utf16Data);
84         std::u16string u16str(char16tData, dataLen);
85         return u16str;
86     }
87 
Utf8ToString(const uint8_t * utf8Data,uint32_t dataLen)88     static inline std::string Utf8ToString(const uint8_t *utf8Data, uint32_t dataLen)
89     {
90         auto *charData = reinterpret_cast<const char *>(utf8Data);
91         std::string str(charData, dataLen);
92         return str;
93     }
94 
Utf8ToU16String(const uint8_t * utf8Data,uint32_t dataLen)95     static inline std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen)
96     {
97         auto *charData = reinterpret_cast<const char *>(utf8Data);
98         std::string str(charData, dataLen);
99         std::u16string u16str = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
100         return u16str;
101     }
102 
WstringToString(const std::wstring & wstr)103     static inline std::string WstringToString(const std::wstring &wstr)
104     {
105         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.to_bytes(wstr);
106     }
107 
StringToWstring(const std::string & str)108     static inline std::wstring StringToWstring(const std::string &str)
109     {
110         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.from_bytes(str);
111     }
112 
U16stringToString(const std::u16string & u16str)113     static inline std::string U16stringToString(const std::u16string &u16str)
114     {
115         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(u16str);
116     }
117 
StringToU16string(const std::string & str)118     static inline std::u16string StringToU16string(const std::string &str)
119     {
120         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
121     }
122 
Find(const std::string & thisStr,const std::string & searchStr,int32_t pos)123     static inline size_t Find(const std::string &thisStr, const std::string &searchStr, int32_t pos)
124     {
125         size_t idx = thisStr.find(searchStr, pos);
126         return idx;
127     }
128 
Find(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)129     static inline size_t Find(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
130     {
131         size_t idx = thisStr.find(searchStr, pos);
132         return idx;
133     }
134 
RFind(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)135     static inline size_t RFind(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
136     {
137         size_t idx = thisStr.rfind(searchStr, pos);
138         return idx;
139     }
140 
ToUpper(const std::u16string & str)141     static inline std::string ToUpper(const std::u16string &str)
142     {
143         std::u16string tmpStr = str;
144         const char16_t *constChar16tData = tmpStr.data();
145         icu::UnicodeString uString(constChar16tData);
146         icu::UnicodeString up = uString.toUpper();
147         std::string res;
148         up.toUTF8String(res);
149         return res;
150     }
151 
ToLocaleUpper(const std::u16string & str,const icu::Locale & locale)152     static inline std::string ToLocaleUpper(const std::u16string &str, const icu::Locale &locale)
153     {
154         std::u16string tmpStr = str;
155         const char16_t *constChar16tData = tmpStr.data();
156         icu::UnicodeString uString(constChar16tData);
157         icu::UnicodeString up = uString.toUpper(locale);
158         std::string res;
159         up.toUTF8String(res);
160         return res;
161     }
162 
ToLower(const std::u16string & str)163     static inline std::string ToLower(const std::u16string &str)
164     {
165         const char16_t *constChar16tData = str.data();
166         icu::UnicodeString uString(constChar16tData, str.length());
167         std::string res;
168         uString.toLower().toUTF8String(res);
169         return res;
170     }
171 
ToLocaleLower(const std::u16string & str,const icu::Locale & locale)172     static inline std::string ToLocaleLower(const std::u16string &str, const icu::Locale &locale)
173     {
174         std::u16string tmpStr = str;
175         const char16_t *constChar16tData = tmpStr.data();
176         icu::UnicodeString uString(constChar16tData);
177         icu::UnicodeString low = uString.toLower(locale);
178         std::string res;
179         low.toUTF8String(res);
180         return res;
181     }
182 
FindFromU16ToUpper(const std::u16string & thisStr,uint16_t * u16Data)183     static inline size_t FindFromU16ToUpper(const std::u16string &thisStr, uint16_t *u16Data)
184     {
185         std::u16string tmpStr = Utf16ToU16String(u16Data, 1);
186         const char16_t *constChar16tData = tmpStr.data();
187         icu::UnicodeString uString(constChar16tData);
188         icu::UnicodeString up = uString.toUpper();
189         std::string res;
190         up.toUTF8String(res);
191         std::u16string searchStr = StringToU16string(res);
192         size_t idx = Find(thisStr, searchStr, 0);
193         return idx;
194     }
195 
UnicodeFromUtf8(const uint8_t * p,int maxLen,const uint8_t ** pp)196     static int UnicodeFromUtf8(const uint8_t *p, int maxLen, const uint8_t **pp)
197     {
198         int c = *p++;
199         if (c < UICODE_FROM_UTF8[0]) {
200             *pp = p;
201             return c;
202         }
203         int l = 0;
204         if (c >= UICODE_FROM_UTF8[1] && c <= UICODE_FROM_UTF8[2]) { // 1 - 2: 0000 0080 - 0000 07FF
205             l = 1; // 1: 0000 0080 - 0000 07FF Unicode
206         } else if (c >= UICODE_FROM_UTF8[3] && c <= UICODE_FROM_UTF8[4]) { // 3 - 4: 0000 0800 - 0000 FFFF
207             l = 2; // 2: 0000 0800 - 0000 FFFF Unicode
208         } else if (c >= UICODE_FROM_UTF8[5] && c <= UICODE_FROM_UTF8[6]) { // 5 - 6: 0001 0000 - 0010 FFFF
209             l = 3; // 3: 0001 0000 - 0010 FFFF Unicode
210         } else if (c >= UICODE_FROM_UTF8[7] && c <= UICODE_FROM_UTF8[8]) { // 7 - 8: 0020 0000 - 03FF FFFF
211             l = 4; // 4: 0020 0000 - 03FF FFFF Unicode
212         } else if (c == UICODE_FROM_UTF8[9] || c == UICODE_FROM_UTF8[10]) { // 9 - 10: 0400 0000 - 7FFF FFFF
213             l = 5; // 5: 0400 0000 - 7FFF FFFF Unicode
214         } else {
215             return INVALID_UNICODE_FROM_UTF8;
216         }
217         /* check that we have enough characters */
218         if ((l + 1) > maxLen) {
219             return INVALID_UNICODE_FROM_UTF8;
220         }
221         return FromUtf8(c, l, p, pp);
222     }
223 
FromUtf8(int c,int l,const uint8_t * p,const uint8_t ** pp)224     static int FromUtf8(int c, int l, const uint8_t *p, const uint8_t **pp)
225     {
226         uint32_t b;
227         c &= UTF8_FIRST_CODE[l - 1];
228         for (int i = 0; i < l; i++) {
229             b = *p++;
230             if (b < utf_helper::UTF8_2B_SECOND || b >= utf_helper::UTF8_2B_FIRST) {
231                 return INVALID_UNICODE_FROM_UTF8;
232             }
233             c = (c << 6) | (b & utf_helper::UTF8_2B_THIRD); // 6: Maximum Unicode range
234         }
235         if (c < UTF8_MIN_CODE[l - 1]) {
236             return INVALID_UNICODE_FROM_UTF8;
237         }
238         *pp = p;
239         return c;
240     }
241 
InplaceAppend(std::u16string & str1,const std::u16string & str2)242     static inline void InplaceAppend(std::u16string &str1, const std::u16string &str2)
243     {
244         str1.append(str2);
245     }
246 
Append(const std::u16string & str1,const std::u16string & str2)247     static inline std::u16string Append(const std::u16string &str1, const std::u16string &str2)
248     {
249         std::u16string tmpStr = str1;
250         return tmpStr.append(str2);
251     }
252 
Utf8ToU32String(const std::vector<uint8_t> & data)253     static inline uint32_t Utf8ToU32String(const std::vector<uint8_t> &data)
254     {
255         std::string str(data.begin(), data.end());
256         std::u32string u32str = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>{}.from_bytes(str);
257         auto u32data = reinterpret_cast<uint32_t *>(u32str.data());
258         return *u32data;
259     }
260 
Utf32ToString(uint32_t u32Data)261     static inline std::string Utf32ToString(uint32_t u32Data)
262     {
263         UChar32 charData = static_cast<int32_t>(u32Data);
264         icu::UnicodeString uString(charData);
265         std::string res;
266         uString.toUTF8String(res);
267         return res;
268     }
269 
GetSpecifiedLine(const std::string & srcStr,int lineNumber)270     static inline std::string GetSpecifiedLine(const std::string &srcStr, int lineNumber)
271     {
272         if (lineNumber < 1) {
273             return "";
274         }
275         bool escape = true;
276         if (srcStr.find('\n') == std::string::npos) {
277             escape = false;
278         }
279         size_t prePos = 0;
280         int findPrePos = lineNumber - 1;
281         for (int i = 0; i < findPrePos; i++) {
282             if (escape) {
283                 prePos = srcStr.find('\n', prePos);
284                 if (prePos == std::string::npos) {
285                     return "";
286                 }
287                 prePos += 1;
288             } else {
289                 prePos = srcStr.find("\\n", prePos);
290                 if (prePos == std::string::npos) {
291                     return "";
292                 }
293                 prePos += 2; // 2 : add the two characters found to start searching again
294             }
295         }
296         size_t findEndPos = 0;
297         if (escape) {
298             findEndPos = srcStr.find('\n', prePos);
299         } else {
300             findEndPos = srcStr.find("\\n", prePos);
301         }
302         if (findEndPos == std::string::npos) {
303             return srcStr.substr(prePos, srcStr.length() - prePos);
304         }
305         ASSERT(findEndPos > prePos);
306         return srcStr.substr(prePos, findEndPos - prePos);
307     }
308 
IsNonspace(uint16_t c)309     static inline bool IsNonspace(uint16_t c)
310     {
311         uint32_t len = sizeof(SPACE_OR_LINE_TERMINAL) / sizeof(SPACE_OR_LINE_TERMINAL[0]);
312         for (uint32_t i = 0; i < len; i++) {
313             if (c == SPACE_OR_LINE_TERMINAL[i]) {
314                 return true;
315             }
316             if (c < SPACE_OR_LINE_TERMINAL[i]) {
317                 return false;
318             }
319         }
320         return false;
321     }
322 
323     template<typename T>
GetStart(Span<T> & data,uint32_t length)324     static inline uint32_t GetStart(Span<T> &data, uint32_t length)
325     {
326         uint32_t start = 0;
327         while (start < length && IsNonspace(data[start])) {
328             start++;
329         }
330         return start;
331     }
332 
333     template<typename T>
GetEnd(Span<T> & data,int32_t start,uint32_t length)334     static inline int32_t GetEnd(Span<T> &data, int32_t start, uint32_t length)
335     {
336         if (length == 0U) {
337             return 0;
338         }
339         int32_t end = static_cast<int32_t>(length - 1);
340         while (end >= start && IsNonspace(data[end])) {
341             end--;
342         }
343         return end;
344     }
345 
Utf8CharInRange(uint8_t value,char start,char end)346     static bool Utf8CharInRange(uint8_t value, char start, char end)
347     {
348         ASSERT(start <= end);
349         return (value >= static_cast<uint8_t>(start)) && (value <= static_cast<uint8_t>(end));
350     }
351 
Vformat(const char * fmt,va_list args)352     static inline std::string Vformat(const char *fmt, va_list args)
353     {
354         static constexpr size_t SIZE = 1024;
355 
356         std::string result;
357         result.resize(SIZE);
358 
359         bool is_truncated = true;
360         while (is_truncated) {
361             va_list copy_args;
362             va_copy(copy_args, args);
363             int r = vsnprintf_truncated_s(result.data(), result.size() + 1, fmt, copy_args);
364             va_end(copy_args);
365 
366             if (r < 0) {
367                 return "";
368             }
369 
370             is_truncated = static_cast<size_t>(r) == result.size();
371             result.resize(result.size() * 2U);
372         }
373 
374         result.erase(std::find(result.begin(), result.end(), '\0'), result.end());
375 
376         return result;
377     }
378 
SplitString(const std::string & str,const std::string & delimiter)379     static std::vector<std::string> SplitString(const std::string &str, const std::string &delimiter)
380     {
381         std::size_t strIndex = 0;
382         std::vector<std::string> value;
383         std::size_t pos = str.find_first_of(delimiter, strIndex);
384         while ((pos < str.size()) && (pos > strIndex)) {
385             std::string subStr = str.substr(strIndex, pos - strIndex);
386             value.push_back(std::move(subStr));
387             strIndex = pos;
388             strIndex = str.find_first_not_of(delimiter, strIndex);
389             pos = str.find_first_of(delimiter, strIndex);
390         }
391         if (pos > strIndex) {
392             std::string subStr = str.substr(strIndex, pos - strIndex);
393             if (!subStr.empty()) {
394                 value.push_back(std::move(subStr));
395             }
396         }
397         return value;
398     }
399 
EndsWith(const std::string & str,const std::string & suffix)400     static bool EndsWith(const std::string &str, const std::string &suffix)
401     {
402         if (str.length() < suffix.length()) {
403             return false;
404         }
405         std::string subStr = str.substr(str.length() - suffix.length(), str.length());
406         return subStr == suffix;
407     }
408 
StrToUInt32(const char * content,uint32_t * result)409     static bool StrToUInt32(const char *content, uint32_t *result)
410     {
411         const int DEC = 10;
412         char *endPtr = nullptr;
413         *result = std::strtoul(content, &endPtr, DEC);
414         if (endPtr == content || *endPtr != '\0') {
415             return false;
416         }
417         return true;
418     }
419 
StringStartWith(const CString & str,const CString & startStr)420     static bool StringStartWith(const CString& str, const CString& startStr)
421     {
422         size_t startStrLen = startStr.length();
423         return ((str.length() >= startStrLen) && (str.compare(0, startStrLen, startStr) == 0));
424     }
425 
StringEndWith(const CString & str,const CString & endStr)426     static bool StringEndWith(const CString& str, const CString& endStr)
427     {
428         size_t endStrLen = endStr.length();
429         size_t len = str.length();
430         return ((len >= endStrLen) && (str.compare(len - endStrLen, endStrLen, endStr) == 0));
431     }
432 
433     static void SplitString(const CString& str, CVector<CString>& out, size_t startPos, size_t times = 0, char c = '/')
434     {
435         size_t left = startPos;
436         size_t pos = 0;
437         size_t index = 0;
438         while ((pos = str.find(c, left)) != CString::npos) {
439             if (times != 0 && index >= times) {
440                 return;
441             }
442             out.emplace_back(str.substr(left, pos - left));
443             left = pos + 1;
444             index++;
445         }
446 
447         if ((times == 0 || index < times) && left < str.length()) {
448             out.emplace_back(str.substr(left));
449         }
450     }
451 
452     static CString JoinString(const CVector<CString>& strs, size_t startIndex, size_t endIndex, char c = '/')
453     {
454         CString out;
455         for (size_t index = startIndex; index < strs.size() && index <= endIndex; ++index) {
456             if (!strs[index].empty()) {
457                 out.append(strs[index]) += c;
458             }
459         }
460         if (!out.empty()) {
461             out.pop_back();
462         }
463         return out;
464     }
465 };
466 }  // namespace panda::ecmascript::base
467 #endif  // ECMASCRIPT_BASE_STRING_HELP_H
468