1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20
21 namespace panda::ecmascript {
22 bool EcmaString::compressedStringsEnabled = true;
23 static constexpr int SMALL_STRING_SIZE = 128;
24
Concat(const JSHandle<EcmaString> & str1Handle,const JSHandle<EcmaString> & str2Handle,const EcmaVM * vm)25 EcmaString *EcmaString::Concat(const JSHandle<EcmaString> &str1Handle, const JSHandle<EcmaString> &str2Handle,
26 const EcmaVM *vm)
27 {
28 // allocator may trig gc and move src, need to hold it
29 EcmaString *string1 = *str1Handle;
30 EcmaString *string2 = *str2Handle;
31
32 uint32_t length1 = string1->GetLength();
33
34 uint32_t length2 = string2->GetLength();
35 uint32_t newLength = length1 + length2;
36 if (newLength == 0) {
37 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
38 }
39 bool compressed = GetCompressedStringsEnabled() && (!string1->IsUtf16() && !string2->IsUtf16());
40 auto newString = AllocStringObject(newLength, compressed, vm);
41
42 // retrieve strings after gc
43 string1 = *str1Handle;
44 string2 = *str2Handle;
45 if (compressed) {
46 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
47 Span<const uint8_t> src1(string1->GetDataUtf8(), length1);
48 EcmaString::StringCopy(sp, newLength, src1, length1);
49
50 sp = sp.SubSpan(length1);
51 Span<const uint8_t> src2(string2->GetDataUtf8(), length2);
52 EcmaString::StringCopy(sp, newLength - length1, src2, length2);
53 } else {
54 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
55 if (!string1->IsUtf16()) {
56 for (uint32_t i = 0; i < length1; ++i) {
57 sp[i] = string1->At<false>(i);
58 }
59 } else {
60 Span<const uint16_t> src1(string1->GetDataUtf16(), length1);
61 EcmaString::StringCopy(sp, newLength << 1U, src1, length1 << 1U);
62 }
63 sp = sp.SubSpan(length1);
64 if (!string2->IsUtf16()) {
65 for (uint32_t i = 0; i < length2; ++i) {
66 sp[i] = string2->At<false>(i);
67 }
68 } else {
69 uint32_t length = length2 << 1U;
70 Span<const uint16_t> src2(string2->GetDataUtf16(), length2);
71 EcmaString::StringCopy(sp, length, src2, length);
72 }
73 }
74
75 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
76 return newString;
77 }
78
79 /* static */
FastSubString(const JSHandle<EcmaString> & src,uint32_t start,uint32_t utf16Len,const EcmaVM * vm)80 EcmaString *EcmaString::FastSubString(const JSHandle<EcmaString> &src, uint32_t start, uint32_t utf16Len,
81 const EcmaVM *vm)
82 {
83 if (src->IsUtf8()) {
84 return FastSubUtf8String(vm, src, start, utf16Len);
85 }
86 return FastSubUtf16String(vm, src, start, utf16Len);
87 }
88
89 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)90 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
91 {
92 for (int32_t i = 0; i < count; ++i) {
93 auto left = static_cast<int32_t>(lhsSp[i]);
94 auto right = static_cast<int32_t>(rhsSp[i]);
95 if (left != right) {
96 return left - right;
97 }
98 }
99 return 0;
100 }
101
Compare(const EcmaString * rhs) const102 int32_t EcmaString::Compare(const EcmaString *rhs) const
103 {
104 const EcmaString *lhs = this;
105 if (lhs == rhs) {
106 return 0;
107 }
108 int32_t lhsCount = lhs->GetLength();
109 int32_t rhsCount = rhs->GetLength();
110 int32_t countDiff = lhsCount - rhsCount;
111 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
112 if (!lhs->IsUtf16() && !rhs->IsUtf16()) {
113 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
114 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
115 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
116 if (charDiff != 0) {
117 return charDiff;
118 }
119 } else if (!lhs->IsUtf16()) {
120 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
121 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
122 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
123 if (charDiff != 0) {
124 return charDiff;
125 }
126 } else if (!rhs->IsUtf16()) {
127 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), rhsCount);
128 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), lhsCount);
129 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
130 if (charDiff != 0) {
131 return charDiff;
132 }
133 } else {
134 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
135 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
136 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
137 if (charDiff != 0) {
138 return charDiff;
139 }
140 }
141 return countDiff;
142 }
143
144 /* static */
145 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)146 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
147 {
148 ASSERT(rhsSp.size() > 0);
149 auto first = static_cast<int32_t>(rhsSp[0]);
150 int32_t i;
151 for (i = pos; i <= max; i++) {
152 if (static_cast<int32_t>(lhsSp[i]) != first) {
153 i++;
154 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
155 i++;
156 }
157 }
158 /* Found first character, now look at the rest of rhsSp */
159 if (i <= max) {
160 int j = i + 1;
161 int end = j + rhsSp.size() - 1;
162
163 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
164 }
165 if (j == end) {
166 /* Found whole string. */
167 return i;
168 }
169 }
170 }
171 return -1;
172 }
173
IndexOf(const EcmaString * rhs,int32_t pos) const174 int32_t EcmaString::IndexOf(const EcmaString *rhs, int32_t pos) const
175 {
176 if (rhs == nullptr) {
177 return -1;
178 }
179 const EcmaString *lhs = this;
180 int32_t lhsCount = lhs->GetLength();
181 int32_t rhsCount = rhs->GetLength();
182 if (rhsCount == 0) {
183 return pos;
184 }
185
186 if (pos >= lhsCount) {
187 return -1;
188 }
189
190 if (pos < 0) {
191 pos = 0;
192 }
193
194 int32_t max = lhsCount - rhsCount;
195 if (max < 0) {
196 return -1;
197 }
198 if (rhs->IsUtf8() && lhs->IsUtf8()) {
199 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
200 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
201 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
202 } else if (rhs->IsUtf16() && lhs->IsUtf16()) { // NOLINT(readability-else-after-return)
203 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
204 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
205 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
206 } else if (rhs->IsUtf16()) {
207 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
208 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
209 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
210 } else { // NOLINT(readability-else-after-return)
211 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
212 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
213 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
214 }
215
216 return -1;
217 }
218
219 // static
CanBeCompressed(const EcmaString * string)220 bool EcmaString::CanBeCompressed(const EcmaString *string)
221 {
222 if (string->IsUtf8()) {
223 return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
224 }
225 return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
226 }
227
228 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)229 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
230 {
231 if (!compressedStringsEnabled) {
232 return false;
233 }
234 bool isCompressed = true;
235 uint32_t index = 0;
236 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
237 while (index < utf8Len) {
238 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
239 if (!IsASCIICharacter(utf8Data[index])) {
240 isCompressed = false;
241 break;
242 }
243 ++index;
244 }
245 return isCompressed;
246 }
247
248 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)249 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
250 {
251 if (!compressedStringsEnabled) {
252 return false;
253 }
254 bool isCompressed = true;
255 Span<const uint16_t> data(utf16Data, utf16Len);
256 for (uint32_t i = 0; i < utf16Len; i++) {
257 if (!IsASCIICharacter(data[i])) {
258 isCompressed = false;
259 break;
260 }
261 }
262 return isCompressed;
263 }
264
265 /* static */
CopyUtf16AsUtf8(const uint16_t * utf16From,uint8_t * utf8To,uint32_t utf16Len)266 void EcmaString::CopyUtf16AsUtf8(const uint16_t *utf16From, uint8_t *utf8To, uint32_t utf16Len)
267 {
268 Span<const uint16_t> from(utf16From, utf16Len);
269 Span<uint8_t> to(utf8To, utf16Len);
270 for (uint32_t i = 0; i < utf16Len; i++) {
271 to[i] = from[i];
272 }
273 }
274
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)275 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
276 {
277 if (GetLength() != str1->GetLength() + str2->GetLength()) {
278 return false;
279 }
280 if (IsUtf16()) {
281 if (str1->IsUtf8() && str2->IsUtf8()) {
282 return false;
283 }
284
285 if (EcmaString::StringsAreEqualUtf16(str1, GetDataUtf16(), str1->GetLength())) {
286 return EcmaString::StringsAreEqualUtf16(str2, GetDataUtf16() + str1->GetLength(), str2->GetLength());
287 }
288 } else {
289 if (str1->IsUtf16() || str2->IsUtf16()) {
290 return false;
291 }
292 Span<const uint8_t> concatData(GetDataUtf8(), str1->GetLength());
293 Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
294 if (EcmaString::StringsAreEquals(concatData, data1)) {
295 concatData = Span<const uint8_t>(GetDataUtf8() + str1->GetLength(), str2->GetLength());
296 Span<const uint8_t> data2(str2->GetDataUtf8(), str2->GetLength());
297 return EcmaString::StringsAreEquals(concatData, data2);
298 }
299 }
300 return false;
301 }
302
303 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)304 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
305 {
306 if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength()) ||
307 (str1->GetHashcode() != str2->GetHashcode())) {
308 return false;
309 }
310
311 if (str1->IsUtf16()) {
312 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
313 Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
314 return EcmaString::StringsAreEquals(data1, data2);
315 } else { // NOLINT(readability-else-after-return)
316 Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
317 Span<const uint8_t> data2(str2->GetDataUtf8(), str1->GetLength());
318 return EcmaString::StringsAreEquals(data1, data2);
319 }
320 }
321
322 /* static */
StringsAreEqualUtf8(const EcmaString * str1,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress)323 bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len,
324 bool canBeCompress)
325 {
326 if (canBeCompress != str1->IsUtf8()) {
327 return false;
328 }
329
330 if (canBeCompress && str1->GetLength() != utf8Len) {
331 return false;
332 }
333
334 if (canBeCompress) {
335 Span<const uint8_t> data1(str1->GetDataUtf8(), utf8Len);
336 Span<const uint8_t> data2(utf8Data, utf8Len);
337 return EcmaString::StringsAreEquals(data1, data2);
338 }
339 return IsUtf8EqualsUtf16(utf8Data, utf8Len, str1->GetDataUtf16(), str1->GetLength());
340 }
341
342 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)343 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
344 {
345 bool result = false;
346 if (str1->GetLength() != utf16Len) {
347 result = false;
348 } else if (!str1->IsUtf16()) {
349 result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), str1->GetLength(), utf16Data, utf16Len);
350 } else {
351 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
352 Span<const uint16_t> data2(utf16Data, utf16Len);
353 result = EcmaString::StringsAreEquals(data1, data2);
354 }
355 return result;
356 }
357
358 /* static */
359 template<typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)360 bool EcmaString::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
361 {
362 ASSERT(str1.Size() <= str2.Size());
363 size_t size = str1.Size();
364 if (size < SMALL_STRING_SIZE) {
365 for (size_t i = 0; i < size; i++) {
366 if (str1[i] != str2[i]) {
367 return false;
368 }
369 }
370 return true;
371 }
372 return !memcmp(str1.data(), str2.data(), size);
373 }
374
375 template<typename T>
StringCopy(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)376 bool EcmaString::StringCopy(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
377 {
378 ASSERT(dstMax >= count);
379 ASSERT(dst.Size() >= src.Size());
380 if (src.Size() < SMALL_STRING_SIZE) {
381 for (size_t i = 0; i < src.Size(); i++) {
382 dst[i] = src[i];
383 }
384 return true;
385 }
386 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
387 LOG_ECMA(FATAL) << "memcpy_s failed";
388 UNREACHABLE();
389 }
390 return true;
391 }
392
393 template<class T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)394 static int32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
395 {
396 uint32_t hash = hashSeed;
397 #if defined(__GNUC__)
398 #pragma GCC diagnostic push
399 #pragma GCC diagnostic ignored "-Wignored-attributes"
400 Span<const T> sp(data, size);
401 #pragma GCC diagnostic pop
402 #endif
403 for (auto c : sp) {
404 constexpr size_t SHIFT = 5;
405 hash = (hash << SHIFT) - hash + c;
406 }
407 return static_cast<int32_t>(hash);
408 }
409
ComputeHashForUtf8(const uint8_t * utf8Data)410 static int32_t ComputeHashForUtf8(const uint8_t *utf8Data)
411 {
412 if (utf8Data == nullptr) {
413 return 0;
414 }
415 uint32_t hash = 0;
416 while (*utf8Data != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
417 constexpr size_t SHIFT = 5;
418 hash = (hash << SHIFT) - hash + *utf8Data++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
419 }
420 return static_cast<int32_t>(hash);
421 }
422
ComputeHashcode(uint32_t hashSeed) const423 uint32_t EcmaString::ComputeHashcode(uint32_t hashSeed) const
424 {
425 uint32_t hash;
426 if (compressedStringsEnabled) {
427 if (!IsUtf16()) {
428 hash = ComputeHashForData(GetDataUtf8(), GetLength(), hashSeed);
429 } else {
430 hash = ComputeHashForData(GetDataUtf16(), GetLength(), hashSeed);
431 }
432 } else {
433 ASSERT(static_cast<size_t>(GetLength())<(std::numeric_limits<size_t>::max()>>1U));
434 hash = ComputeHashForData(GetDataUtf16(), GetLength(), hashSeed);
435 }
436 return hash;
437 }
438
439 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)440 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
441 {
442 uint32_t hash;
443 if (canBeCompress) {
444 hash = ComputeHashForUtf8(utf8Data);
445 } else {
446 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
447 CVector<uint16_t> tmpBuffer(utf16Len);
448 [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
449 utf16Len, 0);
450 ASSERT(len == utf16Len);
451 hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
452 }
453 return hash;
454 }
455
456 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)457 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
458 {
459 return ComputeHashForData(utf16Data, length, 0);
460 }
461
462 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)463 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
464 uint32_t utf16Len)
465 {
466 // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
467 uint32_t utf8ConvertLength = utf16Len + 1;
468 CVector<uint16_t> tmpBuffer(utf8ConvertLength);
469 auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0);
470 if (len != utf16Len) {
471 return false;
472 }
473
474 Span<const uint16_t> data1(tmpBuffer.data(), len);
475 Span<const uint16_t> data2(utf16Data, utf16Len);
476 return EcmaString::StringsAreEquals(data1, data2);
477 }
478 } // namespace panda::ecmascript
479