1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_BASE_STRING_HELP_H 17 #define ECMASCRIPT_BASE_STRING_HELP_H 18 19 #include <algorithm> 20 #include <codecvt> 21 #include <locale> 22 #include <regex> 23 #include <string> 24 #include <vector> 25 26 #include "ecmascript/base/utf_helper.h" 27 #include "ecmascript/ecma_string-inl.h" 28 #include "ecmascript/ecma_vm.h" 29 #include "ecmascript/js_thread.h" 30 #include "ecmascript/mem/assert_scope-inl.h" 31 #include "ecmascript/object_factory.h" 32 #include "libpandafile/file_items.h" 33 #include "unicode/unistr.h" 34 35 namespace panda::ecmascript::base { 36 // White Space Code Points and Line Terminators Code Point 37 // NOLINTNEXTLINE(modernize-avoid-c-arrays) 38 static constexpr uint16_t SPACE_OR_LINE_TERMINAL[] = { 39 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 40 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF, 41 }; 42 43 class StringHelper { 44 public: 45 static std::string ToStdString(EcmaString *string); 46 47 static bool CheckDuplicate(EcmaString *string); 48 Contains(const EcmaString * string,const EcmaString * other)49 static inline bool Contains(const EcmaString *string, const EcmaString *other) 50 { 51 [[maybe_unused]] DisallowGarbageCollection noGc; 52 CString str = ConvertToString(string, StringConvertedUsage::LOGICOPERATION); 53 CString oth = ConvertToString(other, StringConvertedUsage::LOGICOPERATION); 54 CString::size_type index = str.find(oth); 55 return (index != CString::npos); 56 } 57 RepalceAll(CString str,const CString & oldValue,const CString & newValue)58 static inline CString RepalceAll(CString str, const CString &oldValue, 59 const CString &newValue) 60 { 61 if (oldValue.empty() || oldValue == newValue) { 62 return str; 63 } 64 CString::size_type pos(0); 65 while ((pos = str.find(oldValue, pos)) != CString::npos) { 66 str.replace(pos, oldValue.length(), newValue); 67 pos += newValue.length(); 68 } 69 return str; 70 } 71 SubString(JSThread * thread,const JSHandle<EcmaString> & string,uint32_t start,uint32_t length)72 static inline std::string SubString(JSThread *thread, const JSHandle<EcmaString> &string, uint32_t start, 73 uint32_t length) 74 { 75 EcmaString *substring = EcmaString::FastSubString(string, start, length, thread->GetEcmaVM()); 76 return std::string(ConvertToString(substring, StringConvertedUsage::LOGICOPERATION)); 77 } 78 Utf16ToU16String(const uint16_t * utf16Data,uint32_t dataLen)79 static inline std::u16string Utf16ToU16String(const uint16_t *utf16Data, uint32_t dataLen) 80 { 81 auto *char16tData = reinterpret_cast<const char16_t *>(utf16Data); 82 std::u16string u16str(char16tData, dataLen); 83 return u16str; 84 } 85 Utf8ToString(const uint8_t * utf8Data,uint32_t dataLen)86 static inline std::string Utf8ToString(const uint8_t *utf8Data, uint32_t dataLen) 87 { 88 auto *charData = reinterpret_cast<const char *>(utf8Data); 89 std::string str(charData, dataLen); 90 return str; 91 } 92 Utf8ToU16String(const uint8_t * utf8Data,uint32_t dataLen)93 static inline std::u16string Utf8ToU16String(const uint8_t *utf8Data, uint32_t dataLen) 94 { 95 auto *charData = reinterpret_cast<const char *>(utf8Data); 96 std::string str(charData, dataLen); 97 std::u16string u16str = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str); 98 return u16str; 99 } 100 WstringToString(const std::wstring & wstr)101 static inline std::string WstringToString(const std::wstring &wstr) 102 { 103 return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.to_bytes(wstr); 104 } 105 StringToWstring(const std::string & str)106 static inline std::wstring StringToWstring(const std::string &str) 107 { 108 return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{}.from_bytes(str); 109 } 110 U16stringToString(const std::u16string & u16str)111 static inline std::string U16stringToString(const std::u16string &u16str) 112 { 113 return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.to_bytes(u16str); 114 } 115 StringToU16string(const std::string & str)116 static inline std::u16string StringToU16string(const std::string &str) 117 { 118 return std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>{}.from_bytes(str); 119 } 120 Find(const std::string & thisStr,const std::string & searchStr,int32_t pos)121 static inline size_t Find(const std::string &thisStr, const std::string &searchStr, int32_t pos) 122 { 123 size_t idx = thisStr.find(searchStr, pos); 124 return idx; 125 } 126 Find(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)127 static inline size_t Find(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos) 128 { 129 size_t idx = thisStr.find(searchStr, pos); 130 return idx; 131 } 132 RFind(const std::u16string & thisStr,const std::u16string & searchStr,int32_t pos)133 static inline size_t RFind(const std::u16string &thisStr, const std::u16string &searchStr, int32_t pos) 134 { 135 size_t idx = thisStr.rfind(searchStr, pos); 136 return idx; 137 } 138 ToUpper(JSThread * thread,const std::u16string & str)139 static inline EcmaString *ToUpper(JSThread *thread, const std::u16string &str) 140 { 141 ecmascript::ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 142 std::u16string tmpStr = str; 143 const char16_t *constChar16tData = tmpStr.data(); 144 icu::UnicodeString uString(constChar16tData); 145 icu::UnicodeString up = uString.toUpper(); 146 std::string res; 147 up.toUTF8String(res); 148 return *factory->NewFromStdString(res); 149 } 150 ToLower(JSThread * thread,const std::u16string & str)151 static inline EcmaString *ToLower(JSThread *thread, const std::u16string &str) 152 { 153 ecmascript::ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 154 std::u16string tmpStr = str; 155 const char16_t *constChar16tData = tmpStr.data(); 156 icu::UnicodeString uString(constChar16tData); 157 icu::UnicodeString low = uString.toLower(); 158 std::string res; 159 low.toUTF8String(res); 160 return *factory->NewFromStdString(res); 161 } 162 FindFromU16ToUpper(const std::u16string & thisStr,uint16_t * u16Data)163 static inline size_t FindFromU16ToUpper(const std::u16string &thisStr, uint16_t *u16Data) 164 { 165 std::u16string tmpStr = Utf16ToU16String(u16Data, 1); 166 const char16_t *constChar16tData = tmpStr.data(); 167 icu::UnicodeString uString(constChar16tData); 168 icu::UnicodeString up = uString.toUpper(); 169 std::string res; 170 up.toUTF8String(res); 171 std::u16string searchStr = StringToU16string(res); 172 size_t idx = Find(thisStr, searchStr, 0); 173 return idx; 174 } 175 176 static EcmaString *Repeat(JSThread *thread, const std::u16string &thisStr, int32_t repeatLen, bool canBeCompress); 177 178 static EcmaString *Trim(JSThread *thread, const std::u16string &thisStr); 179 Append(const std::u16string & str1,const std::u16string & str2)180 static inline std::u16string Append(const std::u16string &str1, const std::u16string &str2) 181 { 182 std::u16string tmpStr = str1; 183 return tmpStr.append(str2); 184 } 185 Utf8ToU32String(const std::vector<uint8_t> & data)186 static inline uint32_t Utf8ToU32String(const std::vector<uint8_t> &data) 187 { 188 std::string str(data.begin(), data.end()); 189 std::u32string u32str = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>{}.from_bytes(str); 190 auto u32data = reinterpret_cast<uint32_t *>(u32str.data()); 191 return *u32data; 192 } 193 Utf32ToString(uint32_t u32Data)194 static inline std::string Utf32ToString(uint32_t u32Data) 195 { 196 UChar32 charData = u32Data; 197 icu::UnicodeString uString(charData); 198 std::string res; 199 uString.toUTF8String(res); 200 return res; 201 } 202 IsNonspace(uint16_t c)203 static inline bool IsNonspace(uint16_t c) 204 { 205 uint32_t len = sizeof(SPACE_OR_LINE_TERMINAL) / sizeof(SPACE_OR_LINE_TERMINAL[0]); 206 for (uint32_t i = 0; i < len; i++) { 207 if (c == SPACE_OR_LINE_TERMINAL[i]) { 208 return true; 209 } 210 if (c < SPACE_OR_LINE_TERMINAL[i]) { 211 return false; 212 } 213 } 214 return false; 215 } 216 217 template<typename T> GetStart(Span<T> & data,uint32_t length)218 static inline uint32_t GetStart(Span<T> &data, uint32_t length) 219 { 220 uint32_t start = 0; 221 while (start < length && IsNonspace(data[start])) { 222 start++; 223 } 224 return start; 225 } 226 227 template<typename T> GetEnd(Span<T> & data,uint32_t start,uint32_t length)228 static inline uint32_t GetEnd(Span<T> &data, uint32_t start, uint32_t length) 229 { 230 uint32_t end = length - 1; 231 while (end >= start && IsNonspace(data[end])) { 232 end--; 233 } 234 return end; 235 } 236 }; 237 } // namespace panda::ecmascript::base 238 #endif // ECMASCRIPT_BASE_STRING_HELP_H 239