• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_BASE_STRING_HELP_H
17 #define ECMASCRIPT_BASE_STRING_HELP_H
18 
19 #include <algorithm>
20 #include <codecvt>
21 #include <locale>
22 #include <regex>
23 #include <string>
24 #include <vector>
25 
26 #include "ecmascript/base/utf_helper.h"
27 #include "ecmascript/ecma_string-inl.h"
28 #include "ecmascript/ecma_vm.h"
29 #include "ecmascript/js_thread.h"
30 #include "ecmascript/mem/assert_scope-inl.h"
31 #include "ecmascript/object_factory.h"
32 #include "libpandafile/file_items.h"
33 #include "unicode/unistr.h"
34 
35 namespace panda::ecmascript::base {
36 // White Space Code Points and Line Terminators Code Point
37 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
38 static constexpr uint16_t SPACE_OR_LINE_TERMINAL[] = {
39     0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
40     0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF,
41 };
42 
43 class StringHelper {
44 public:
45     static std::string ToStdString(EcmaString *string);
46 
47     static bool CheckDuplicate(EcmaString *string);
48 
Contains(const EcmaString * string,const EcmaString * other)49     static inline bool Contains(const EcmaString *string, const EcmaString *other)
50     {
51         [[maybe_unused]] DisallowGarbageCollection noGc;
52         CString str = ConvertToString(string, StringConvertedUsage::LOGICOPERATION);
53         CString oth = ConvertToString(other, StringConvertedUsage::LOGICOPERATION);
54         CString::size_type index = str.find(oth);
55         return (index != CString::npos);
56     }
57 
RepalceAll(CString str,const CString & oldValue,const CString & newValue)58     static inline CString RepalceAll(CString str, const CString &oldValue,
59                                             const CString &newValue)
60     {
61         if (oldValue.empty() || oldValue == newValue) {
62             return str;
63         }
64         CString::size_type pos(0);
65         while ((pos = str.find(oldValue, pos)) != CString::npos) {
66             str.replace(pos, oldValue.length(), newValue);
67             pos += newValue.length();
68         }
69         return str;
70     }
71 
SubString(JSThread * thread,const JSHandle<EcmaString> & string,uint32_t start,uint32_t length)72     static inline std::string SubString(JSThread *thread, const JSHandle<EcmaString> &string, uint32_t start,
73                                         uint32_t length)
74     {
75         EcmaString *substring = EcmaString::FastSubString(string, start, length, thread->GetEcmaVM());
76         return std::string(ConvertToString(substring, StringConvertedUsage::LOGICOPERATION));
77     }
78 
Utf16ToU16String(const uint16_t * utf16Data,uint32_t dataLen)79     static inline std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen)
80     {
81         auto *char16tData = reinterpret_cast<const char16_t *>(utf16Data);
82         std::u16string u16str(char16tData, dataLen);
83         return u16str;
84     }
85 
Utf8ToString(const uint8_t * utf8Data,uint32_t dataLen)86     static inline std::string Utf8ToString(const uint8_t *utf8Data, uint32_t dataLen)
87     {
88         auto *charData = reinterpret_cast<const char *>(utf8Data);
89         std::string str(charData, dataLen);
90         return str;
91     }
92 
Utf8ToU16String(const uint8_t * utf8Data,uint32_t dataLen)93     static inline std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen)
94     {
95         auto *charData = reinterpret_cast<const char *>(utf8Data);
96         std::string str(charData, dataLen);
97         std::u16string u16str = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
98         return u16str;
99     }
100 
WstringToString(const std::wstring & wstr)101     static inline std::string WstringToString(const std::wstring &wstr)
102     {
103         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.to_bytes(wstr);
104     }
105 
StringToWstring(const std::string & str)106     static inline std::wstring StringToWstring(const std::string &str)
107     {
108         return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.from_bytes(str);
109     }
110 
U16stringToString(const std::u16string & u16str)111     static inline std::string U16stringToString(const std::u16string &u16str)
112     {
113         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(u16str);
114     }
115 
StringToU16string(const std::string & str)116     static inline std::u16string StringToU16string(const std::string &str)
117     {
118         return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str);
119     }
120 
Find(const std::string & thisStr,const std::string & searchStr,int32_t pos)121     static inline size_t Find(const std::string &thisStr, const std::string &searchStr, int32_t pos)
122     {
123         size_t idx = thisStr.find(searchStr, pos);
124         return idx;
125     }
126 
Find(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)127     static inline size_t Find(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
128     {
129         size_t idx = thisStr.find(searchStr, pos);
130         return idx;
131     }
132 
RFind(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)133     static inline size_t RFind(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos)
134     {
135         size_t idx = thisStr.rfind(searchStr, pos);
136         return idx;
137     }
138 
ToUpper(JSThread * thread,const std::u16string & str)139     static inline EcmaString *ToUpper(JSThread *thread, const std::u16string &str)
140     {
141         ecmascript::ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
142         std::u16string tmpStr = str;
143         const char16_t *constChar16tData = tmpStr.data();
144         icu::UnicodeString uString(constChar16tData);
145         icu::UnicodeString up = uString.toUpper();
146         std::string res;
147         up.toUTF8String(res);
148         return *factory->NewFromStdString(res);
149     }
150 
ToLower(JSThread * thread,const std::u16string & str)151     static inline EcmaString *ToLower(JSThread *thread, const std::u16string &str)
152     {
153         ecmascript::ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
154         std::u16string tmpStr = str;
155         const char16_t *constChar16tData = tmpStr.data();
156         icu::UnicodeString uString(constChar16tData);
157         icu::UnicodeString low = uString.toLower();
158         std::string res;
159         low.toUTF8String(res);
160         return *factory->NewFromStdString(res);
161     }
162 
FindFromU16ToUpper(const std::u16string & thisStr,uint16_t * u16Data)163     static inline size_t FindFromU16ToUpper(const std::u16string &thisStr, uint16_t *u16Data)
164     {
165         std::u16string tmpStr = Utf16ToU16String(u16Data, 1);
166         const char16_t *constChar16tData = tmpStr.data();
167         icu::UnicodeString uString(constChar16tData);
168         icu::UnicodeString up = uString.toUpper();
169         std::string res;
170         up.toUTF8String(res);
171         std::u16string searchStr = StringToU16string(res);
172         size_t idx = Find(thisStr, searchStr, 0);
173         return idx;
174     }
175 
176     static EcmaString *Repeat(JSThread *thread, const std::u16string &thisStr, int32_t repeatLen, bool canBeCompress);
177 
178     static EcmaString *Trim(JSThread *thread, const std::u16string &thisStr);
179 
Append(const std::u16string & str1,const std::u16string & str2)180     static inline std::u16string Append(const std::u16string &str1, const std::u16string &str2)
181     {
182         std::u16string tmpStr = str1;
183         return tmpStr.append(str2);
184     }
185 
Utf8ToU32String(const std::vector<uint8_t> & data)186     static inline uint32_t Utf8ToU32String(const std::vector<uint8_t> &data)
187     {
188         std::string str(data.begin(), data.end());
189         std::u32string u32str = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>{}.from_bytes(str);
190         auto u32data = reinterpret_cast<uint32_t *>(u32str.data());
191         return *u32data;
192     }
193 
Utf32ToString(uint32_t u32Data)194     static inline std::string Utf32ToString(uint32_t u32Data)
195     {
196         UChar32 charData = u32Data;
197         icu::UnicodeString uString(charData);
198         std::string res;
199         uString.toUTF8String(res);
200         return res;
201     }
202 
IsNonspace(uint16_t c)203     static inline bool IsNonspace(uint16_t c)
204     {
205         uint32_t len = sizeof(SPACE_OR_LINE_TERMINAL) / sizeof(SPACE_OR_LINE_TERMINAL[0]);
206         for (uint32_t i = 0; i < len; i++) {
207             if (c == SPACE_OR_LINE_TERMINAL[i]) {
208                 return true;
209             }
210             if (c < SPACE_OR_LINE_TERMINAL[i]) {
211                 return false;
212             }
213         }
214         return false;
215     }
216 
217     template<typename T>
GetStart(Span<T> & data,uint32_t length)218     static inline uint32_t GetStart(Span<T> &data, uint32_t length)
219     {
220         uint32_t start = 0;
221         while (start < length && IsNonspace(data[start])) {
222             start++;
223         }
224         return start;
225     }
226 
227     template<typename T>
GetEnd(Span<T> & data,uint32_t start,uint32_t length)228     static inline uint32_t GetEnd(Span<T> &data, uint32_t start, uint32_t length)
229     {
230         uint32_t end = length - 1;
231         while (end >= start && IsNonspace(data[end])) {
232             end--;
233         }
234         return end;
235     }
236 };
237 }  // namespace panda::ecmascript::base
238 #endif  // ECMASCRIPT_BASE_STRING_HELP_H
239