• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_BASE_STRING_HELP_H
17 #define ECMASCRIPT_BASE_STRING_HELP_H
18 
19 #include <algorithm>
20 #include <codecvt>
21 #include <locale>
22 #include <regex>
23 #include <sstream>
24 #include <string>
25 #include <vector>
26 
27 #include "common_components/base/utf_helper.h"
28 #include "ecmascript/mem/c_containers.h"
29 #include "ecmascript/mem/c_string.h"
30 #include "libpandabase/utils/span.h"
31 
32 #include "securec.h"
33 #include "unicode/unistr.h"
34 
35 namespace panda::ecmascript::base {
36 // White Space Code Points and Line Terminators Code Point
37 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
38 static constexpr uint16_t SPACE_OR_LINE_TERMINAL[] = {
39     0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
40     0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF,
41 };
42 static constexpr int UICODE_FROM_UTF8[] = {
43     0x80, 0xc0, 0xdf, 0xe0, 0xef, 0xf0, 0xf7, 0xf8, 0xfb, 0xfc, 0xfd,
44 };
45 static constexpr int UTF8_MIN_CODE[] = {
46     0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
47 };
48 static constexpr char UTF8_FIRST_CODE[] = {
49     0x1f, 0xf, 0x7, 0x3, 0x1,
50 };
51 
52 // Concatenates multiple arguments into a CString.
53 // Note: For better performance, consider using AppendToBaseString with a pre-allocated CString.
54 template <typename T, typename... Args>
ConcatToCString(T && first,Args &&...args)55 inline CString ConcatToCString(T&& first, Args&&... args)
56 {
57     CString result = CString(std::forward<T>(first));
58     ((result += std::forward<Args>(args)), ...);
59     return result;
60 }
61 
62 // Concatenates multiple arguments into a std::string.
63 // Note: For better performance, consider using AppendToBaseString with a pre-allocated std::string.
64 template <typename T, typename... Args>
ConcatToStdString(T && first,Args &&...args)65 inline std::string ConcatToStdString(T&& first, Args&&... args)
66 {
67     std::string result = std::string(std::forward<T>(first));
68     ((result += std::forward<Args>(args)), ...);
69     return result;
70 }
71 
72 // Appends multiple arguments to a base string (std::string or CString).
73 // Note: For better performance, pre-allocate the base string using reserve() before calling this function.
74 template <typename T, typename... Args>
AppendToBaseString(T & base,Args &&...args)75 inline void AppendToBaseString(T& base, Args&&... args)
76 {
77     static_assert(std::is_same_v<T, std::string> || std::is_same_v<T, CString>,
78                   "base must be std::string or CString");
79     ((base += std::forward<Args>(args)), ...);
80 }
81 
82 class StringHelper {
83 public:
84     static constexpr int INVALID_UNICODE_FROM_UTF8 = -1;
85 
ReplaceAll(CString str,const CString & oldValue,const CString & newValue)86     static inline CString ReplaceAll(CString str, const CString &oldValue,
87                                      const CString &newValue)
88     {
89         if (oldValue.empty() || oldValue == newValue) {
90             return str;
91         }
92         CString::size_type pos(0);
93         while ((pos = str.find(oldValue, pos)) != CString::npos) {
94             str.replace(pos, oldValue.length(), newValue);
95             pos += newValue.length();
96         }
97         return str;
98     }
99 
100     template<class T>
Replace(CString str,const T & oldValue,const T & newValue)101     static inline CString Replace(CString str, const T &oldValue, const T &newValue)
102     {
103         if (oldValue.empty() || oldValue == newValue) {
104             return str;
105         }
106         CString::size_type pos(0);
107         if ((pos = str.find(oldValue, pos)) != CString::npos) {
108             str.replace(pos, oldValue.length(), newValue);
109         }
110         return str;
111     }
112 
Utf16ToU16String(const uint16_t * utf16Data,uint32_t dataLen)113     static inline std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen)
114     {
115         auto *char16tData = reinterpret_cast<const char16_t *>(utf16Data);
116         std::u16string u16str(char16tData, dataLen);
117         return u16str;
118     }
119 
Utf8ToString(const uint8_t * utf8Data,uint32_t dataLen)120     static inline std::string Utf8ToString(const uint8_t *utf8Data, uint32_t dataLen)
121     {
122         auto *charData = reinterpret_cast<const char *>(utf8Data);
123         std::string str(charData, dataLen);
124         return str;
125     }
126 
Utf8ToCString(const uint8_t * utf8Data,uint32_t dataLen)127     static inline CString Utf8ToCString(const uint8_t *utf8Data, uint32_t dataLen)
128     {
129         auto *charData = reinterpret_cast<const char *>(utf8Data);
130         return { charData, dataLen };
131     }
132 
Utf8ToU16String(const uint8_t * utf8Data,uint32_t dataLen)133     static inline std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen)
134     {
135         auto *charData = reinterpret_cast<const char *>(utf8Data);
136         std::string str(charData, dataLen);
137         std::u16string u16str = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
138         return u16str;
139     }
140 
WstringToString(const std::wstring & wstr)141     static inline std::string WstringToString(const std::wstring &wstr)
142     {
143         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.to_bytes(wstr);
144     }
145 
StringToWstring(const std::string & str)146     static inline std::wstring StringToWstring(const std::string &str)
147     {
148         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.from_bytes(str);
149     }
150 
U16stringToString(const std::u16string & u16str)151     static inline std::string U16stringToString(const std::u16string &u16str)
152     {
153         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(u16str);
154     }
155 
StringToU16string(const std::string & str)156     static inline std::u16string StringToU16string(const std::string &str)
157     {
158         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
159     }
160 
Find(const std::string & thisStr,const std::string & searchStr,int32_t pos)161     static inline size_t Find(const std::string &thisStr, const std::string &searchStr, int32_t pos)
162     {
163         size_t idx = thisStr.find(searchStr, pos);
164         return idx;
165     }
166 
Find(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)167     static inline size_t Find(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
168     {
169         size_t idx = thisStr.find(searchStr, pos);
170         return idx;
171     }
172 
RFind(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)173     static inline size_t RFind(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
174     {
175         size_t idx = thisStr.rfind(searchStr, pos);
176         return idx;
177     }
178 
ToUpper(const std::u16string & str)179     static inline std::string ToUpper(const std::u16string &str)
180     {
181         std::u16string tmpStr = str;
182         const char16_t *constChar16tData = tmpStr.data();
183         icu::UnicodeString uString(constChar16tData);
184         icu::UnicodeString up = uString.toUpper();
185         std::string res;
186         up.toUTF8String(res);
187         return res;
188     }
189 
ToLocaleUpper(const std::u16string & str,const icu::Locale & locale)190     static inline std::string ToLocaleUpper(const std::u16string &str, const icu::Locale &locale)
191     {
192         std::u16string tmpStr = str;
193         const char16_t *constChar16tData = tmpStr.data();
194         icu::UnicodeString uString(constChar16tData);
195         icu::UnicodeString up = uString.toUpper(locale);
196         std::string res;
197         up.toUTF8String(res);
198         return res;
199     }
200 
ToLower(const std::u16string & str)201     static inline std::string ToLower(const std::u16string &str)
202     {
203         const char16_t *constChar16tData = str.data();
204         icu::UnicodeString uString(constChar16tData, str.length());
205         std::string res;
206         uString.toLower().toUTF8String(res);
207         return res;
208     }
209 
ToLocaleLower(const std::u16string & str,const icu::Locale & locale)210     static inline std::string ToLocaleLower(const std::u16string &str, const icu::Locale &locale)
211     {
212         std::u16string tmpStr = str;
213         const char16_t *constChar16tData = tmpStr.data();
214         icu::UnicodeString uString(constChar16tData);
215         icu::UnicodeString low = uString.toLower(locale);
216         std::string res;
217         low.toUTF8String(res);
218         return res;
219     }
220 
FindFromU16ToUpper(const std::u16string & thisStr,uint16_t * u16Data)221     static inline size_t FindFromU16ToUpper(const std::u16string &thisStr, uint16_t *u16Data)
222     {
223         std::u16string tmpStr = Utf16ToU16String(u16Data, 1);
224         const char16_t *constChar16tData = tmpStr.data();
225         icu::UnicodeString uString(constChar16tData);
226         icu::UnicodeString up = uString.toUpper();
227         std::string res;
228         up.toUTF8String(res);
229         std::u16string searchStr = StringToU16string(res);
230         size_t idx = Find(thisStr, searchStr, 0);
231         return idx;
232     }
233 
FindFromU8ToUpper(const std::string & thisStr,uint8_t * u8Data)234     static inline size_t FindFromU8ToUpper(const std::string &thisStr, uint8_t *u8Data)
235     {
236         std::string tmpStr = Utf8ToString(u8Data, 1);
237         std::transform(tmpStr.begin(), tmpStr.end(), tmpStr.begin(), [](unsigned char c) { return std::toupper(c); });
238         size_t idx = Find(thisStr, tmpStr, 0);
239         return idx;
240     }
241 
UnicodeFromUtf8(const uint8_t * p,int maxLen,const uint8_t ** pp)242     static int UnicodeFromUtf8(const uint8_t *p, int maxLen, const uint8_t **pp)
243     {
244         int c = *p++;
245         if (c < UICODE_FROM_UTF8[0]) {
246             *pp = p;
247             return c;
248         }
249         int l = 0;
250         if (c >= UICODE_FROM_UTF8[1] && c <= UICODE_FROM_UTF8[2]) { // 1 - 2: 0000 0080 - 0000 07FF
251             l = 1; // 1: 0000 0080 - 0000 07FF Unicode
252         } else if (c >= UICODE_FROM_UTF8[3] && c <= UICODE_FROM_UTF8[4]) { // 3 - 4: 0000 0800 - 0000 FFFF
253             l = 2; // 2: 0000 0800 - 0000 FFFF Unicode
254         } else if (c >= UICODE_FROM_UTF8[5] && c <= UICODE_FROM_UTF8[6]) { // 5 - 6: 0001 0000 - 0010 FFFF
255             l = 3; // 3: 0001 0000 - 0010 FFFF Unicode
256         } else if (c >= UICODE_FROM_UTF8[7] && c <= UICODE_FROM_UTF8[8]) { // 7 - 8: 0020 0000 - 03FF FFFF
257             l = 4; // 4: 0020 0000 - 03FF FFFF Unicode
258         } else if (c == UICODE_FROM_UTF8[9] || c == UICODE_FROM_UTF8[10]) { // 9 - 10: 0400 0000 - 7FFF FFFF
259             l = 5; // 5: 0400 0000 - 7FFF FFFF Unicode
260         } else {
261             return INVALID_UNICODE_FROM_UTF8;
262         }
263         /* check that we have enough characters */
264         if ((l + 1) > maxLen) {
265             return INVALID_UNICODE_FROM_UTF8;
266         }
267         return FromUtf8(c, l, p, pp);
268     }
269 
FromUtf8(int c,int l,const uint8_t * p,const uint8_t ** pp)270     static int FromUtf8(int c, int l, const uint8_t *p, const uint8_t **pp)
271     {
272         uint32_t b;
273         c &= UTF8_FIRST_CODE[l - 1];
274         for (int i = 0; i < l; i++) {
275             b = *p++;
276             if (b < common::utf_helper::UTF8_2B_SECOND || b >= common::utf_helper::UTF8_2B_FIRST) {
277                 return INVALID_UNICODE_FROM_UTF8;
278             }
279             c = (c << 6) | (b & common::utf_helper::UTF8_2B_THIRD); // 6: Maximum Unicode range
280         }
281         if (c < UTF8_MIN_CODE[l - 1]) {
282             return INVALID_UNICODE_FROM_UTF8;
283         }
284         *pp = p;
285         return c;
286     }
287 
InplaceAppend(std::u16string & str1,const std::u16string & str2)288     static inline void InplaceAppend(std::u16string &str1, const std::u16string &str2)
289     {
290         str1.append(str2);
291     }
292 
Append(const std::u16string & str1,const std::u16string & str2)293     static inline std::u16string Append(const std::u16string &str1, const std::u16string &str2)
294     {
295         std::u16string tmpStr = str1;
296         return tmpStr.append(str2);
297     }
298 
Utf8ToU32String(const std::vector<uint8_t> & data)299     static inline uint32_t Utf8ToU32String(const std::vector<uint8_t> &data)
300     {
301         std::string str(data.begin(), data.end());
302         std::u32string u32str = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>{}.from_bytes(str);
303         auto u32data = reinterpret_cast<uint32_t *>(u32str.data());
304         return *u32data;
305     }
306 
Utf32ToString(uint32_t u32Data)307     static inline std::string Utf32ToString(uint32_t u32Data)
308     {
309         UChar32 charData = static_cast<int32_t>(u32Data);
310         icu::UnicodeString uString(charData);
311         std::string res;
312         uString.toUTF8String(res);
313         return res;
314     }
315 
GetSpecifiedLine(const std::string & srcStr,int lineNumber)316     static inline std::string GetSpecifiedLine(const std::string &srcStr, int lineNumber)
317     {
318         if (lineNumber < 1) {
319             return "";
320         }
321         bool escape = true;
322         if (srcStr.find('\n') == std::string::npos) {
323             escape = false;
324         }
325         size_t prePos = 0;
326         int findPrePos = lineNumber - 1;
327         for (int i = 0; i < findPrePos; i++) {
328             if (escape) {
329                 prePos = srcStr.find('\n', prePos);
330                 if (prePos == std::string::npos) {
331                     return "";
332                 }
333                 prePos += 1;
334             } else {
335                 prePos = srcStr.find("\\n", prePos);
336                 if (prePos == std::string::npos) {
337                     return "";
338                 }
339                 prePos += 2; // 2 : add the two characters found to start searching again
340             }
341         }
342         size_t findEndPos = 0;
343         if (escape) {
344             findEndPos = srcStr.find('\n', prePos);
345         } else {
346             findEndPos = srcStr.find("\\n", prePos);
347         }
348         if (findEndPos == std::string::npos) {
349             return srcStr.substr(prePos, srcStr.length() - prePos);
350         }
351         ASSERT(findEndPos > prePos);
352         return srcStr.substr(prePos, findEndPos - prePos);
353     }
354 
IsNonspace(uint16_t c)355     static inline bool IsNonspace(uint16_t c)
356     {
357         uint32_t len = sizeof(SPACE_OR_LINE_TERMINAL) / sizeof(SPACE_OR_LINE_TERMINAL[0]);
358         for (uint32_t i = 0; i < len; i++) {
359             if (c == SPACE_OR_LINE_TERMINAL[i]) {
360                 return true;
361             }
362             if (c < SPACE_OR_LINE_TERMINAL[i]) {
363                 return false;
364             }
365         }
366         return false;
367     }
368 
369     template<typename T>
GetStart(Span<T> & data,uint32_t length)370     static inline uint32_t GetStart(Span<T> &data, uint32_t length)
371     {
372         uint32_t start = 0;
373         while (start < length && IsNonspace(data[start])) {
374             start++;
375         }
376         return start;
377     }
378 
379     template<typename T>
GetEnd(Span<T> & data,int32_t start,uint32_t length)380     static inline int32_t GetEnd(Span<T> &data, int32_t start, uint32_t length)
381     {
382         if (length == 0U) {
383             return 0;
384         }
385         int32_t end = static_cast<int32_t>(length - 1);
386         while (end >= start && IsNonspace(data[end])) {
387             end--;
388         }
389         return end;
390     }
391 
Utf8CharInRange(uint8_t value,char start,char end)392     static bool Utf8CharInRange(uint8_t value, char start, char end)
393     {
394         ASSERT(start <= end);
395         return (value >= static_cast<uint8_t>(start)) && (value <= static_cast<uint8_t>(end));
396     }
397 
Vformat(const char * fmt,va_list args)398     static inline std::string Vformat(const char *fmt, va_list args)
399     {
400         static constexpr size_t SIZE = 1024;
401 
402         std::string result;
403         result.resize(SIZE);
404 
405         bool is_truncated = true;
406         while (is_truncated) {
407             va_list copy_args;
408             va_copy(copy_args, args);
409             int r = vsnprintf_truncated_s(result.data(), result.size() + 1, fmt, copy_args);
410             va_end(copy_args);
411 
412             if (r < 0) {
413                 return "";
414             }
415 
416             is_truncated = static_cast<size_t>(r) == result.size();
417             result.resize(result.size() * 2U);
418         }
419 
420         result.erase(std::find(result.begin(), result.end(), '\0'), result.end());
421 
422         return result;
423     }
424 
SplitString(const std::string & str,const std::string & delimiter)425     static std::vector<std::string> SplitString(const std::string &str, const std::string &delimiter)
426     {
427         std::size_t strIndex = 0;
428         std::vector<std::string> value;
429         std::size_t pos = str.find_first_of(delimiter, strIndex);
430         while ((pos < str.size()) && (pos > strIndex)) {
431             std::string subStr = str.substr(strIndex, pos - strIndex);
432             value.push_back(std::move(subStr));
433             strIndex = pos;
434             strIndex = str.find_first_not_of(delimiter, strIndex);
435             pos = str.find_first_of(delimiter, strIndex);
436         }
437         if (pos > strIndex) {
438             std::string subStr = str.substr(strIndex, pos - strIndex);
439             if (!subStr.empty()) {
440                 value.push_back(std::move(subStr));
441             }
442         }
443         return value;
444     }
445 
EndsWith(const std::string & str,const std::string & suffix)446     static bool EndsWith(const std::string &str, const std::string &suffix)
447     {
448         if (str.length() < suffix.length()) {
449             return false;
450         }
451         std::string subStr = str.substr(str.length() - suffix.length(), str.length());
452         return subStr == suffix;
453     }
454 
StrToUInt32(const char * content,uint32_t * result)455     static bool StrToUInt32(const char *content, uint32_t *result)
456     {
457         const int DEC = 10;
458         char *endPtr = nullptr;
459         *result = std::strtoul(content, &endPtr, DEC);
460         if (endPtr == content || *endPtr != '\0') {
461             return false;
462         }
463         return true;
464     }
465 
466     template<class T>
StringStartWith(const CString & str,const T & startStr)467     static bool StringStartWith(const CString& str, const T& startStr)
468     {
469         size_t startStrLen = startStr.length();
470         return str.length() >= startStrLen && str.compare(0, startStrLen, startStr) == 0;
471     }
472 
StringStartWith(const CString & str,const char startStr)473     static bool StringStartWith(const CString& str, const char startStr)
474     {
475         return !str.empty() && str[0] == startStr;
476     }
477 
478     template<class T>
StringEndWith(const CString & str,const T & endStr)479     static bool StringEndWith(const CString& str, const T& endStr)
480     {
481         size_t endStrLen = endStr.length();
482         size_t len = str.length();
483         return len >= endStrLen && str.compare(len - endStrLen, endStrLen, endStr) == 0;
484     }
485 
StringEndWith(const CString & str,const char endStr)486     static bool StringEndWith(const CString& str, const char endStr)
487     {
488         const size_t len = str.length();
489         return len > 0 && str[len - 1] == endStr;
490     }
491 
492     static void SplitString(const CString& str, CVector<CString>& out, size_t startPos, size_t times = 0, char c = '/')
493     {
494         size_t left = startPos;
495         size_t pos = 0;
496         size_t index = 0;
497         while ((pos = str.find(c, left)) != CString::npos) {
498             if (times != 0 && index >= times) {
499                 return;
500             }
501             out.emplace_back(str.substr(left, pos - left));
502             left = pos + 1;
503             index++;
504         }
505 
506         if ((times == 0 || index < times) && left < str.length()) {
507             out.emplace_back(str.substr(left));
508         }
509     }
510 
511     static CString JoinString(const CVector<CString>& strs, size_t startIndex, size_t endIndex, char c = '/')
512     {
513         CString out;
514         for (size_t index = startIndex; index < strs.size() && index <= endIndex; ++index) {
515             if (!strs[index].empty()) {
516                 out.append(strs[index]) += c;
517             }
518         }
519         if (!out.empty()) {
520             out.pop_back();
521         }
522         return out;
523     }
524 };
525 }  // namespace panda::ecmascript::base
526 #endif  // ECMASCRIPT_BASE_STRING_HELP_H
527