• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_BASE_STRING_HELP_H
17 #define ECMASCRIPT_BASE_STRING_HELP_H
18 
19 #include <algorithm>
20 #include <codecvt>
21 #include <locale>
22 #include <regex>
23 #include <sstream>
24 #include <string>
25 #include <vector>
26 
27 #include "ecmascript/base/utf_helper.h"
28 #include "ecmascript/ecma_vm.h"
29 #include "ecmascript/js_thread.h"
30 #include "ecmascript/mem/assert_scope.h"
31 #include "ecmascript/object_factory.h"
32 
33 #include "unicode/unistr.h"
34 
35 namespace panda::ecmascript::base {
36 // White Space Code Points and Line Terminators Code Point
37 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
38 static constexpr uint16_t SPACE_OR_LINE_TERMINAL[] = {
39     0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
40     0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF,
41 };
42 static constexpr int UICODE_FROM_UTF8[] = {
43     0x80, 0xc0, 0xdf, 0xe0, 0xef, 0xf0, 0xf7, 0xf8, 0xfb, 0xfc, 0xfd,
44 };
45 static constexpr int UTF8_MIN_CODE[] = {
46     0x80, 0x800, 0x10000, 0x00200000, 0x04000000,
47 };
48 static constexpr char UTF8_FIRST_CODE[] = {
49     0x1f, 0xf, 0x7, 0x3, 0x1,
50 };
51 class StringHelper {
52 public:
53     static constexpr int INVALID_UNICODE_FROM_UTF8 = -1;
54 
RepalceAll(CString str,const CString & oldValue,const CString & newValue)55     static inline CString RepalceAll(CString str, const CString &oldValue,
56                                      const CString &newValue)
57     {
58         if (oldValue.empty() || oldValue == newValue) {
59             return str;
60         }
61         CString::size_type pos(0);
62         while ((pos = str.find(oldValue, pos)) != CString::npos) {
63             str.replace(pos, oldValue.length(), newValue);
64             pos += newValue.length();
65         }
66         return str;
67     }
68 
Utf16ToU16String(const uint16_t * utf16Data,uint32_t dataLen)69     static inline std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen)
70     {
71         auto *char16tData = reinterpret_cast<const char16_t *>(utf16Data);
72         std::u16string u16str(char16tData, dataLen);
73         return u16str;
74     }
75 
Utf8ToString(const uint8_t * utf8Data,uint32_t dataLen)76     static inline std::string Utf8ToString(const uint8_t *utf8Data, uint32_t dataLen)
77     {
78         auto *charData = reinterpret_cast<const char *>(utf8Data);
79         std::string str(charData, dataLen);
80         return str;
81     }
82 
Utf8ToU16String(const uint8_t * utf8Data,uint32_t dataLen)83     static inline std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen)
84     {
85         auto *charData = reinterpret_cast<const char *>(utf8Data);
86         std::string str(charData, dataLen);
87         std::u16string u16str = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
88         return u16str;
89     }
90 
WstringToString(const std::wstring & wstr)91     static inline std::string WstringToString(const std::wstring &wstr)
92     {
93         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.to_bytes(wstr);
94     }
95 
StringToWstring(const std::string & str)96     static inline std::wstring StringToWstring(const std::string &str)
97     {
98         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.from_bytes(str);
99     }
100 
U16stringToString(const std::u16string & u16str)101     static inline std::string U16stringToString(const std::u16string &u16str)
102     {
103         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(u16str);
104     }
105 
StringToU16string(const std::string & str)106     static inline std::u16string StringToU16string(const std::string &str)
107     {
108         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
109     }
110 
Find(const std::string & thisStr,const std::string & searchStr,int32_t pos)111     static inline size_t Find(const std::string &thisStr, const std::string &searchStr, int32_t pos)
112     {
113         size_t idx = thisStr.find(searchStr, pos);
114         return idx;
115     }
116 
Find(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)117     static inline size_t Find(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
118     {
119         size_t idx = thisStr.find(searchStr, pos);
120         return idx;
121     }
122 
RFind(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)123     static inline size_t RFind(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
124     {
125         size_t idx = thisStr.rfind(searchStr, pos);
126         return idx;
127     }
128 
ToUpper(const std::u16string & str)129     static inline std::string ToUpper(const std::u16string &str)
130     {
131         std::u16string tmpStr = str;
132         const char16_t *constChar16tData = tmpStr.data();
133         icu::UnicodeString uString(constChar16tData);
134         icu::UnicodeString up = uString.toUpper();
135         std::string res;
136         up.toUTF8String(res);
137         return res;
138     }
139 
ToLocaleUpper(const std::u16string & str,const icu::Locale & locale)140     static inline std::string ToLocaleUpper(const std::u16string &str, const icu::Locale &locale)
141     {
142         std::u16string tmpStr = str;
143         const char16_t *constChar16tData = tmpStr.data();
144         icu::UnicodeString uString(constChar16tData);
145         icu::UnicodeString up = uString.toUpper(locale);
146         std::string res;
147         up.toUTF8String(res);
148         return res;
149     }
150 
ToLower(const std::u16string & str)151     static inline std::string ToLower(const std::u16string &str)
152     {
153         std::u16string tmpStr = str;
154         const char16_t *constChar16tData = tmpStr.data();
155         icu::UnicodeString uString(constChar16tData);
156         icu::UnicodeString low = uString.toLower();
157         std::string res;
158         low.toUTF8String(res);
159         return res;
160     }
161 
ToLocaleLower(const std::u16string & str,const icu::Locale & locale)162     static inline std::string ToLocaleLower(const std::u16string &str, const icu::Locale &locale)
163     {
164         std::u16string tmpStr = str;
165         const char16_t *constChar16tData = tmpStr.data();
166         icu::UnicodeString uString(constChar16tData);
167         icu::UnicodeString low = uString.toLower(locale);
168         std::string res;
169         low.toUTF8String(res);
170         return res;
171     }
172 
FindFromU16ToUpper(const std::u16string & thisStr,uint16_t * u16Data)173     static inline size_t FindFromU16ToUpper(const std::u16string &thisStr, uint16_t *u16Data)
174     {
175         std::u16string tmpStr = Utf16ToU16String(u16Data, 1);
176         const char16_t *constChar16tData = tmpStr.data();
177         icu::UnicodeString uString(constChar16tData);
178         icu::UnicodeString up = uString.toUpper();
179         std::string res;
180         up.toUTF8String(res);
181         std::u16string searchStr = StringToU16string(res);
182         size_t idx = Find(thisStr, searchStr, 0);
183         return idx;
184     }
185 
UnicodeFromUtf8(const uint8_t * p,int maxLen,const uint8_t ** pp)186     static int UnicodeFromUtf8(const uint8_t *p, int maxLen, const uint8_t **pp)
187     {
188         int c = *p++;
189         if (c < UICODE_FROM_UTF8[0]) {
190             *pp = p;
191             return c;
192         }
193         int l = 0;
194         if (c >=  UICODE_FROM_UTF8[1] && c <= UICODE_FROM_UTF8[2]) { // 1 - 2: 0000 0080 - 0000 07FF
195             l = 1; // 1: 0000 0080 - 0000 07FF Unicode
196         } else if (c >= UICODE_FROM_UTF8[3] && c <= UICODE_FROM_UTF8[4]) { // 3 - 4: 0000 0800 - 0000 FFFF
197             l = 2; // 2: 0000 0800 - 0000 FFFF Unicode
198         } else if (c >= UICODE_FROM_UTF8[5] && c <= UICODE_FROM_UTF8[6]) { // 5 - 6: 0001 0000 - 0010 FFFF
199             l = 3; // 3: 0001 0000 - 0010 FFFF Unicode
200         } else if (c >= UICODE_FROM_UTF8[7] && c <= UICODE_FROM_UTF8[8]) { // 7 - 8: 0020 0000 - 03FF FFFF
201             l = 4; // 4: 0020 0000 - 03FF FFFF Unicode
202         } else if (c == UICODE_FROM_UTF8[9] || c == UICODE_FROM_UTF8[10]) { // 9 - 10: 0400 0000 - 7FFF FFFF
203             l = 5; // 5: 0400 0000 - 7FFF FFFF Unicode
204         } else {
205             return INVALID_UNICODE_FROM_UTF8;
206         }
207         /* check that we have enough characters */
208         if ((l + 1) > maxLen) {
209             return INVALID_UNICODE_FROM_UTF8;
210         }
211         return FromUtf8(c, l, p, pp);
212     }
213 
FromUtf8(int c,int l,const uint8_t * p,const uint8_t ** pp)214     static int FromUtf8(int c, int l, const uint8_t *p, const uint8_t **pp)
215     {
216         uint32_t b;
217         c &= UTF8_FIRST_CODE[l - 1];
218         for (int i = 0; i < l; i++) {
219             b = *p++;
220             if (b < utf_helper::UTF8_2B_SECOND || b >= utf_helper::UTF8_2B_FIRST) {
221                 return INVALID_UNICODE_FROM_UTF8;
222             }
223             c = (c << 6) | (b & utf_helper::UTF8_2B_THIRD); // 6: Maximum Unicode range
224         }
225         if (c < UTF8_MIN_CODE[l - 1]) {
226             return INVALID_UNICODE_FROM_UTF8;
227         }
228         *pp = p;
229         return c;
230     }
231 
Append(const std::u16string & str1,const std::u16string & str2)232     static inline std::u16string Append(const std::u16string &str1, const std::u16string &str2)
233     {
234         std::u16string tmpStr = str1;
235         return tmpStr.append(str2);
236     }
237 
Utf8ToU32String(const std::vector<uint8_t> & data)238     static inline uint32_t Utf8ToU32String(const std::vector<uint8_t> &data)
239     {
240         std::string str(data.begin(), data.end());
241         std::u32string u32str = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>{}.from_bytes(str);
242         auto u32data = reinterpret_cast<uint32_t *>(u32str.data());
243         return *u32data;
244     }
245 
Utf32ToString(uint32_t u32Data)246     static inline std::string Utf32ToString(uint32_t u32Data)
247     {
248         UChar32 charData = static_cast<int32_t>(u32Data);
249         icu::UnicodeString uString(charData);
250         std::string res;
251         uString.toUTF8String(res);
252         return res;
253     }
254 
GetSpecifiedLine(const std::string & srcStr,int lineNumber)255     static inline std::string GetSpecifiedLine(const std::string &srcStr, int lineNumber)
256     {
257         if (lineNumber < 1) {
258             return "";
259         }
260         size_t prePos = 0;
261         int findPrePos = lineNumber - 1;
262         for (int i = 0; i < findPrePos; i++) {
263             prePos = srcStr.find('\n', prePos);
264             if (prePos == std::string::npos) {
265                 return "";
266             }
267             prePos += 1;
268         }
269         size_t findEndPos = srcStr.find('\n', prePos);
270         if (findEndPos == std::string::npos) {
271             return srcStr.substr(prePos, srcStr.length() - prePos);
272         }
273         ASSERT(findEndPos > prePos);
274         return srcStr.substr(prePos, findEndPos - prePos);
275     }
276 
IsNonspace(uint16_t c)277     static inline bool IsNonspace(uint16_t c)
278     {
279         uint32_t len = sizeof(SPACE_OR_LINE_TERMINAL) / sizeof(SPACE_OR_LINE_TERMINAL[0]);
280         for (uint32_t i = 0; i < len; i++) {
281             if (c == SPACE_OR_LINE_TERMINAL[i]) {
282                 return true;
283             }
284             if (c < SPACE_OR_LINE_TERMINAL[i]) {
285                 return false;
286             }
287         }
288         return false;
289     }
290 
291     template<typename T>
GetStart(Span<T> & data,uint32_t length)292     static inline uint32_t GetStart(Span<T> &data, uint32_t length)
293     {
294         uint32_t start = 0;
295         while (start < length && IsNonspace(data[start])) {
296             start++;
297         }
298         return start;
299     }
300 
301     template<typename T>
GetEnd(Span<T> & data,uint32_t start,uint32_t length)302     static inline uint32_t GetEnd(Span<T> &data, uint32_t start, uint32_t length)
303     {
304         if (length == 0U) {
305             return 0;
306         }
307         uint32_t end = length - 1;
308         while (end >= start && IsNonspace(data[end])) {
309             end--;
310         }
311         return end;
312     }
313 
Utf8CharInRange(uint8_t value,char start,char end)314     static bool Utf8CharInRange(uint8_t value, char start, char end)
315     {
316         ASSERT(start <= end);
317         return (value >= static_cast<uint8_t>(start)) && (value <= static_cast<uint8_t>(end));
318     }
319 
Vformat(const char * fmt,va_list args)320     static inline std::string Vformat(const char *fmt, va_list args)
321     {
322         static constexpr size_t SIZE = 1024;
323 
324         std::string result;
325         result.resize(SIZE);
326 
327         bool is_truncated = true;
328         while (is_truncated) {
329             va_list copy_args;
330             va_copy(copy_args, args);
331             int r = vsnprintf_truncated_s(result.data(), result.size() + 1, fmt, copy_args);
332             va_end(copy_args);
333 
334             if (r < 0) {
335                 return "";
336             }
337 
338             is_truncated = static_cast<size_t>(r) == result.size();
339             result.resize(result.size() * 2U);
340         }
341 
342         result.erase(std::find(result.begin(), result.end(), '\0'), result.end());
343 
344         return result;
345     }
346 
SplitString(const std::string & str,const std::string & delimiter)347     static std::vector<std::string> SplitString(const std::string &str, const std::string &delimiter)
348     {
349         std::size_t strIndex = 0;
350         std::vector<std::string> value;
351         std::size_t pos = str.find_first_of(delimiter, strIndex);
352         while ((pos < str.size()) && (pos > strIndex)) {
353             std::string subStr = str.substr(strIndex, pos - strIndex);
354             value.push_back(std::move(subStr));
355             strIndex = pos;
356             strIndex = str.find_first_not_of(delimiter, strIndex);
357             pos = str.find_first_of(delimiter, strIndex);
358         }
359         if (pos > strIndex) {
360             std::string subStr = str.substr(strIndex, pos - strIndex);
361             if (!subStr.empty()) {
362                 value.push_back(std::move(subStr));
363             }
364         }
365         return value;
366     }
367 
EndsWith(const std::string & str,const std::string & suffix)368     static bool EndsWith(const std::string &str, const std::string &suffix)
369     {
370         if (str.length() < suffix.length()) {
371             return false;
372         }
373         std::string subStr = str.substr(str.length() - suffix.length(), str.length());
374         return subStr == suffix;
375     }
376 
StrToUInt32(const char * content,uint32_t * result)377     static bool StrToUInt32(const char *content, uint32_t *result)
378     {
379         const int DEC = 10;
380         char *endPtr = nullptr;
381         *result = std::strtoul(content, &endPtr, DEC);
382         if (endPtr == content || *endPtr != '\0') {
383             return false;
384         }
385         return true;
386     }
387 
StringStartWith(const CString & str,const CString & startStr)388     static bool StringStartWith(const CString& str, const CString& startStr)
389     {
390         size_t startStrLen = startStr.length();
391         return ((str.length() >= startStrLen) && (str.compare(0, startStrLen, startStr) == 0));
392     }
393 
StringEndWith(const CString & str,const CString & endStr)394     static bool StringEndWith(const CString& str, const CString& endStr)
395     {
396         size_t endStrLen = endStr.length();
397         size_t len = str.length();
398         return ((len >= endStrLen) && (str.compare(len - endStrLen, endStrLen, endStr) == 0));
399     }
400 
401     static void SplitString(const CString& str, CVector<CString>& out, size_t startPos, size_t times = 0, char c = '/')
402     {
403         size_t left = startPos;
404         size_t pos = 0;
405         size_t index = 0;
406         while ((pos = str.find(c, left)) != CString::npos) {
407             if (times != 0 && index >= times) {
408                 return;
409             }
410             out.emplace_back(str.substr(left, pos - left));
411             left = pos + 1;
412             index++;
413         }
414 
415         if ((times == 0 || index < times) && left < str.length()) {
416             out.emplace_back(str.substr(left));
417         }
418     }
419 
420     static CString JoinString(const CVector<CString>& strs, size_t startIndex, size_t endIndex, char c = '/')
421     {
422         CString out;
423         for (size_t index = startIndex; index < strs.size() && index <= endIndex; ++index) {
424             if (!strs[index].empty()) {
425                 out.append(strs[index]) += c;
426             }
427         }
428         if (!out.empty()) {
429             out.pop_back();
430         }
431         return out;
432     }
433 };
434 }  // namespace panda::ecmascript::base
435 #endif  // ECMASCRIPT_BASE_STRING_HELP_H
436