1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string_table.h"
17
18 #include "ecmascript/ecma_string-inl.h"
19 #include "ecmascript/ecma_vm.h"
20 #include "ecmascript/js_thread.h"
21 #include "ecmascript/jspandafile/js_pandafile.h"
22 #include "ecmascript/mem/c_string.h"
23 #include "ecmascript/mem/space.h"
24 #include "ecmascript/object_factory.h"
25
26 namespace panda::ecmascript {
EcmaStringTable(const EcmaVM * vm)27 EcmaStringTable::EcmaStringTable(const EcmaVM *vm) : vm_(vm) {}
28
GetString(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString) const29 std::pair<EcmaString *, uint32_t> EcmaStringTable::GetString(const JSHandle<EcmaString> &firstString,
30 const JSHandle<EcmaString> &secondString) const
31 {
32 ASSERT(EcmaStringAccessor(firstString).NotTreeString());
33 ASSERT(EcmaStringAccessor(secondString).NotTreeString());
34 auto [hashCode, isInteger] = EcmaStringAccessor(firstString).ComputeRawHashcode();
35 hashCode = EcmaStringAccessor(secondString).ComputeHashcode(hashCode, isInteger);
36
37 auto range = table_.equal_range(hashCode);
38 for (auto item = range.first; item != range.second; ++item) {
39 auto foundString = item->second;
40 if (EcmaStringAccessor(foundString).EqualToSplicedString(*firstString, *secondString)) {
41 return std::make_pair(foundString, hashCode);
42 }
43 }
44 return std::make_pair(nullptr, hashCode);
45 }
46
GetString(const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress) const47 std::pair<EcmaString *, uint32_t> EcmaStringTable::GetString(const uint8_t *utf8Data,
48 uint32_t utf8Len, bool canBeCompress) const
49 {
50 uint32_t hashCode = EcmaStringAccessor::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
51 auto range = table_.equal_range(hashCode);
52 for (auto item = range.first; item != range.second; ++item) {
53 auto foundString = item->second;
54 if (EcmaStringAccessor::StringIsEqualUint8Data(foundString, utf8Data, utf8Len, canBeCompress)) {
55 return std::make_pair(foundString, hashCode);
56 }
57 }
58 return std::make_pair(nullptr, hashCode);
59 }
60
GetString(const uint16_t * utf16Data,uint32_t utf16Len) const61 std::pair<EcmaString *, uint32_t> EcmaStringTable::GetString(const uint16_t *utf16Data, uint32_t utf16Len) const
62 {
63 uint32_t hashCode = EcmaStringAccessor::ComputeHashcodeUtf16(const_cast<uint16_t *>(utf16Data), utf16Len);
64 auto range = table_.equal_range(hashCode);
65 for (auto item = range.first; item != range.second; ++item) {
66 auto foundString = item->second;
67 if (EcmaStringAccessor::StringsAreEqualUtf16(foundString, utf16Data, utf16Len)) {
68 return std::make_pair(foundString, hashCode);
69 }
70 }
71 return std::make_pair(nullptr, hashCode);
72 }
73
GetString(EcmaString * string) const74 EcmaString *EcmaStringTable::GetString(EcmaString *string) const
75 {
76 auto hashcode = EcmaStringAccessor(string).GetHashcode();
77 auto range = table_.equal_range(hashcode);
78 for (auto item = range.first; item != range.second; ++item) {
79 auto foundString = item->second;
80 if (EcmaStringAccessor::StringsAreEqual(foundString, string)) {
81 return foundString;
82 }
83 }
84 return nullptr;
85 }
86
InternString(EcmaString * string)87 void EcmaStringTable::InternString(EcmaString *string)
88 {
89 if (EcmaStringAccessor(string).IsInternString()) {
90 return;
91 }
92 // Strings in string table should not be in the young space.
93 ASSERT(!Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(string))->InYoungSpace());
94 ASSERT(EcmaStringAccessor(string).NotTreeString());
95 auto hashcode = EcmaStringAccessor(string).GetHashcode();
96 table_.emplace(hashcode, string);
97 EcmaStringAccessor(string).SetInternString();
98 }
99
InternEmptyString(EcmaString * emptyStr)100 void EcmaStringTable::InternEmptyString(EcmaString *emptyStr)
101 {
102 InternString(emptyStr);
103 }
104
GetOrInternString(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)105 EcmaString *EcmaStringTable::GetOrInternString(const JSHandle<EcmaString> &firstString,
106 const JSHandle<EcmaString> &secondString)
107 {
108 auto firstFlat = JSHandle<EcmaString>(vm_->GetJSThread(), EcmaStringAccessor::Flatten(vm_, firstString));
109 auto secondFlat = JSHandle<EcmaString>(vm_->GetJSThread(), EcmaStringAccessor::Flatten(vm_, secondString));
110 std::pair<EcmaString *, uint32_t> result = GetString(firstFlat, secondFlat);
111 if (result.first != nullptr) {
112 return result.first;
113 }
114 JSHandle<EcmaString> concatHandle(vm_->GetJSThread(),
115 EcmaStringAccessor::Concat(vm_, firstFlat, secondFlat, MemSpaceType::OLD_SPACE));
116 EcmaString *concatString = EcmaStringAccessor::Flatten(vm_, concatHandle, MemSpaceType::OLD_SPACE);
117 concatString->SetMixHashcode(result.second);
118 InternString(concatString);
119 return concatString;
120 }
121
GetOrInternString(const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress)122 EcmaString *EcmaStringTable::GetOrInternString(const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress)
123 {
124 std::pair<EcmaString *, uint32_t> result = GetString(utf8Data, utf8Len, canBeCompress);
125 if (result.first != nullptr) {
126 return result.first;
127 }
128
129 EcmaString *str =
130 EcmaStringAccessor::CreateFromUtf8(vm_, utf8Data, utf8Len, canBeCompress, MemSpaceType::OLD_SPACE);
131 str->SetMixHashcode(result.second);
132 InternString(str);
133 return str;
134 }
135
136 /*
137 This function is used to create global constant strings from non-movable sapce only.
138 It only inserts string into string-table and provides no string-table validity check.
139 */
CreateAndInternStringNonMovable(const uint8_t * utf8Data,uint32_t utf8Len)140 EcmaString *EcmaStringTable::CreateAndInternStringNonMovable(const uint8_t *utf8Data, uint32_t utf8Len)
141 {
142 std::pair<EcmaString *, uint32_t> result = GetString(utf8Data, utf8Len, true);
143 if (result.first != nullptr) {
144 return result.first;
145 }
146
147 EcmaString *str = EcmaStringAccessor::CreateFromUtf8(vm_, utf8Data, utf8Len, true, MemSpaceType::NON_MOVABLE);
148 str->SetMixHashcode(result.second);
149 InternString(str);
150 return str;
151 }
152
GetOrInternString(const uint16_t * utf16Data,uint32_t utf16Len,bool canBeCompress)153 EcmaString *EcmaStringTable::GetOrInternString(const uint16_t *utf16Data, uint32_t utf16Len, bool canBeCompress)
154 {
155 std::pair<EcmaString *, uint32_t> result = GetString(utf16Data, utf16Len);
156 if (result.first != nullptr) {
157 return result.first;
158 }
159
160 EcmaString *str =
161 EcmaStringAccessor::CreateFromUtf16(vm_, utf16Data, utf16Len, canBeCompress, MemSpaceType::OLD_SPACE);
162 str->SetMixHashcode(result.second);
163 InternString(str);
164 return str;
165 }
166
GetOrInternString(EcmaString * string)167 EcmaString *EcmaStringTable::GetOrInternString(EcmaString *string)
168 {
169 if (EcmaStringAccessor(string).IsInternString()) {
170 return string;
171 }
172 JSHandle<EcmaString> strHandle(vm_->GetJSThread(), string);
173 // may gc
174 auto strFlat = EcmaStringAccessor::Flatten(vm_, strHandle, MemSpaceType::OLD_SPACE);
175 if (EcmaStringAccessor(strFlat).IsInternString()) {
176 return strFlat;
177 }
178 EcmaString *result = GetString(strFlat);
179 if (result != nullptr) {
180 return result;
181 }
182
183 if (EcmaStringAccessor(strFlat).NotTreeString()) {
184 Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(strFlat));
185 if (objectRegion->InYoungSpace()) {
186 JSHandle<EcmaString> resultHandle(vm_->GetJSThread(), strFlat);
187 strFlat = EcmaStringAccessor::CopyStringToOldSpace(vm_,
188 resultHandle, EcmaStringAccessor(strFlat).GetLength(), EcmaStringAccessor(strFlat).IsUtf8());
189 }
190 }
191 InternString(strFlat);
192 return strFlat;
193 }
194
InsertStringToTable(const JSHandle<EcmaString> & strHandle)195 EcmaString *EcmaStringTable::InsertStringToTable(const JSHandle<EcmaString> &strHandle)
196 {
197 auto strFlat = EcmaStringAccessor::Flatten(vm_, strHandle, MemSpaceType::OLD_SPACE);
198 if (EcmaStringAccessor(strFlat).NotTreeString()) {
199 Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(strFlat));
200 if (objectRegion->InYoungSpace()) {
201 JSHandle<EcmaString> resultHandle(vm_->GetJSThread(), strFlat);
202 strFlat = EcmaStringAccessor::CopyStringToOldSpace(vm_,
203 resultHandle, EcmaStringAccessor(strFlat).GetLength(), EcmaStringAccessor(strFlat).IsUtf8());
204 }
205 }
206 InternString(strFlat);
207 return strFlat;
208 }
209
TryGetInternString(EcmaString * string)210 EcmaString *EcmaStringTable::TryGetInternString(EcmaString *string)
211 {
212 return GetString(string);
213 }
214
GetOrInternStringWithSpaceType(const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress,MemSpaceType type,bool isConstantString,uint32_t idOffset)215 EcmaString *EcmaStringTable::GetOrInternStringWithSpaceType(const uint8_t *utf8Data, uint32_t utf8Len,
216 bool canBeCompress, MemSpaceType type,
217 bool isConstantString, uint32_t idOffset)
218 {
219 std::pair<EcmaString *, uint32_t> result = GetString(utf8Data, utf8Len, canBeCompress);
220 if (result.first != nullptr) {
221 return result.first;
222 }
223 type = type == MemSpaceType::NON_MOVABLE ? MemSpaceType::NON_MOVABLE : MemSpaceType::OLD_SPACE;
224 EcmaString *str;
225 if (canBeCompress) {
226 // Constant string will be created in this branch.
227 str = EcmaStringAccessor::CreateFromUtf8(vm_, utf8Data, utf8Len, canBeCompress, type, isConstantString,
228 idOffset);
229 } else {
230 str = EcmaStringAccessor::CreateFromUtf8(vm_, utf8Data, utf8Len, canBeCompress, type);
231 }
232 str->SetMixHashcode(result.second);
233 InternString(str);
234 return str;
235 }
236
GetOrInternStringWithSpaceType(const uint8_t * utf8Data,uint32_t utf16Len,MemSpaceType type)237 EcmaString *EcmaStringTable::GetOrInternStringWithSpaceType(const uint8_t *utf8Data, uint32_t utf16Len,
238 MemSpaceType type)
239 {
240 type = type == MemSpaceType::NON_MOVABLE ? MemSpaceType::NON_MOVABLE : MemSpaceType::OLD_SPACE;
241 EcmaString *str = EcmaStringAccessor::CreateUtf16StringFromUtf8(vm_, utf8Data, utf16Len, type);
242 EcmaString *result = GetString(str);
243 if (result != nullptr) {
244 return result;
245 }
246 InternString(str);
247 return str;
248 }
249
SweepWeakReference(const WeakRootVisitor & visitor)250 void EcmaStringTable::SweepWeakReference(const WeakRootVisitor &visitor)
251 {
252 for (auto it = table_.begin(); it != table_.end();) {
253 // Strings in string table should not be in the young space. Only old gc will sweep string table.
254 auto *object = it->second;
255 auto fwd = visitor(object);
256 ASSERT(!Region::ObjectAddressToRange(object)->InYoungSpace());
257 if (fwd == nullptr) {
258 LOG_ECMA(VERBOSE) << "StringTable: delete string " << std::hex << object;
259 it = table_.erase(it);
260 } else if (fwd != object) {
261 it->second = static_cast<EcmaString *>(fwd);
262 ++it;
263 LOG_ECMA(VERBOSE) << "StringTable: forward " << std::hex << object << " -> " << fwd;
264 } else {
265 ++it;
266 }
267 }
268 }
269
RelocateConstantData(const JSPandaFile * jsPandaFile)270 void EcmaStringTable::RelocateConstantData(const JSPandaFile *jsPandaFile)
271 {
272 auto thread = vm_->GetJSThread();
273 for (auto it = table_.begin(); it != table_.end();) {
274 auto *object = it->second;
275 if (!EcmaStringAccessor(object).IsConstantString()) {
276 ++it;
277 continue;
278 }
279 auto constantStr = ConstantString::Cast(object);
280 if (constantStr->GetEntityId() < 0 || !jsPandaFile->Contain(constantStr->GetConstantData())) {
281 // EntityId is -1, which means this str has been relocated. Or the data is not in pandafile.
282 ++it;
283 continue;
284 }
285 uint32_t id = constantStr->GetEntityIdU32();
286 panda_file::File::StringData sd = jsPandaFile->GetStringData(EntityId(id));
287 if (constantStr->GetConstantData() == sd.data) {
288 uint32_t strLen = sd.utf16_length;
289 if (UNLIKELY(strLen == 0)) {
290 it->second = *(vm_->GetFactory()->GetEmptyString());
291 }
292 size_t byteLength = sd.is_ascii ? 1 : sizeof(uint16_t);
293 JSHandle<ByteArray> newData = vm_->GetFactory()->NewByteArray(
294 strLen, byteLength, reinterpret_cast<void *>(const_cast<uint8_t *>(sd.data)),
295 MemSpaceType::NON_MOVABLE);
296 constantStr->SetRelocatedData(thread, newData.GetTaggedValue());
297 constantStr->SetConstantData(static_cast<uint8_t *>(newData->GetData()));
298 constantStr->SetEntityId(-1);
299 } else {
300 LOG_ECMA(ERROR) << "ConstantString data pointer is inconsistent with sd.data";
301 }
302 ++it;
303 }
304 }
305
CheckStringTableValidity()306 bool EcmaStringTable::CheckStringTableValidity()
307 {
308 for (auto itemOuter = table_.begin(); itemOuter != table_.end(); ++itemOuter) {
309 auto outerString = itemOuter->second;
310 if (!EcmaStringAccessor(outerString).NotTreeString()) {
311 return false;
312 }
313 int counter = 0;
314 auto hashcode = EcmaStringAccessor(outerString).GetHashcode();
315 auto range = table_.equal_range(hashcode);
316 auto it = range.first;
317 for (; it != range.second; ++it) {
318 auto foundString = it->second;
319 if (EcmaStringAccessor::StringsAreEqual(foundString, outerString)) {
320 ++counter;
321 }
322 }
323 if (counter > 1) {
324 return false;
325 }
326 }
327 return true;
328 }
329
CreateSingleCharTable(JSThread * thread)330 void SingleCharTable::CreateSingleCharTable(JSThread *thread)
331 {
332 auto table = thread->GetEcmaVM()->GetFactory()->NewTaggedArray(MAX_ONEBYTE_CHARCODE,
333 JSTaggedValue::Undefined(), MemSpaceType::NON_MOVABLE);
334 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
335 for (uint32_t i = 1; i < MAX_ONEBYTE_CHARCODE; ++i) {
336 std::string tmp(1, i + 0X00); // 1: size
337 table->Set(thread, i, factory->NewFromASCIINonMovable(tmp).GetTaggedValue());
338 }
339 thread->SetSingleCharTable((table.GetTaggedValue()));
340 }
341 } // namespace panda::ecmascript
342