• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20 
21 namespace panda::ecmascript {
22 
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)23 EcmaString *EcmaString::Concat(const EcmaVM *vm,
24     const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
25 {
26     // allocator may trig gc and move src, need to hold it
27     EcmaString *strLeft = *left;
28     EcmaString *strRight = *right;
29     uint32_t leftLength = strLeft->GetLength();
30     bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
31     uint32_t rightLength = strRight->GetLength();
32     uint32_t newLength = leftLength + rightLength;
33     if (newLength == 0) {
34         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
35     }
36 
37     if (leftLength == 0) {
38         if (type == MemSpaceType::OLD_SPACE) {
39             Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*right));
40             if (objectRegion->InYoungSpace()) {
41                 return CopyStringToOldSpace(vm, right, rightLength, compressed);
42             }
43         }
44         return strRight;
45     }
46     if (rightLength == 0) {
47         if (type == MemSpaceType::OLD_SPACE) {
48             Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*left));
49             if (objectRegion->InYoungSpace()) {
50                 return CopyStringToOldSpace(vm, left, leftLength, compressed);
51             }
52         }
53         return strLeft;
54     }
55 
56     // if the result string is small, make a LineString
57     if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
58         ASSERT(strLeft->IsLineOrConstantString());
59         ASSERT(strRight->IsLineOrConstantString());
60         auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
61         // retrieve strings after gc
62         strLeft = *left;
63         strRight = *right;
64         if (compressed) {
65             // copy left part
66             Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
67             Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
68             EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
69             // copy right part
70             sp = sp.SubSpan(leftLength);
71             Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
72             EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
73         } else {
74             // copy left part
75             Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
76             if (strLeft->IsUtf8()) {
77                 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
78             } else {
79                 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
80                 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
81             }
82             // copy right part
83             sp = sp.SubSpan(leftLength);
84             if (strRight->IsUtf8()) {
85                 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
86             } else {
87                 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
88                 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
89             }
90         }
91         ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
92         return newString;
93     }
94     return CreateTreeString(vm, left, right, newLength, compressed);
95 }
96 
97 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)98 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
99     uint32_t length, bool compressed)
100 {
101     EcmaString *strOrigin = *original;
102     ASSERT(strOrigin->IsLineOrConstantString());
103     EcmaString *newString = nullptr;
104     if (strOrigin->IsLineString()) {
105         newString = CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE);
106     } else if (strOrigin->IsConstantString()) {
107         return CreateConstantString(vm, strOrigin->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
108     }
109     strOrigin = *original;
110     if (compressed) {
111         // copy
112         Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
113         Span<const uint8_t> srcSp(strOrigin->GetDataUtf8(), length);
114         EcmaString::MemCopyChars(sp, length, srcSp, length);
115     } else {
116         // copy left part
117         Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
118         if (strOrigin->IsUtf8()) {
119             EcmaString::CopyChars(sp.data(), strOrigin->GetDataUtf8(), length);
120         } else {
121             Span<const uint16_t> srcSp(strOrigin->GetDataUtf16(), length);
122             EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
123         }
124     }
125     ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
126     return newString;
127 }
128 
129 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)130 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
131     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
132 {
133     ASSERT((start + length) <= src->GetLength());
134     if (length == 0) {
135         return *vm->GetFactory()->GetEmptyString();
136     }
137     if (start == 0 && length == src->GetLength()) {
138         return *src;
139     }
140     auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
141     if (srcFlat->IsUtf8()) {
142         return FastSubUtf8String(vm, srcFlat, start, length);
143     }
144     return FastSubUtf16String(vm, srcFlat, start, length);
145 }
146 
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)147 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
148 {
149     ASSERT(IsLineString() && !IsConstantString());
150     if (IsUtf8()) {
151         ASSERT(src->IsUtf8());
152         CVector<uint8_t> buf;
153         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
154         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
155         if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
156             LOG_FULL(FATAL) << "memcpy_s failed";
157             UNREACHABLE();
158         }
159     } else if (src->IsUtf8()) {
160         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
161         CVector<uint8_t> buf;
162         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
163         Span<uint16_t> to(GetDataUtf16Writable() + start, length);
164         Span<const uint8_t> from(data, length);
165         for (uint32_t i = 0; i < length; i++) {
166             to[i] = from[i];
167         }
168     } else {
169         CVector<uint16_t> buf;
170         const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
171         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
172         if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
173             destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
174             LOG_FULL(FATAL) << "memcpy_s failed";
175             UNREACHABLE();
176         }
177     }
178 }
179 
180 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)181 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
182 {
183     for (int32_t i = 0; i < count; ++i) {
184         auto left = static_cast<int32_t>(lhsSp[i]);
185         auto right = static_cast<int32_t>(rhsSp[i]);
186         if (left != right) {
187             return left - right;
188         }
189     }
190     return 0;
191 }
192 
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)193 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
194 {
195     if (*left == *right) {
196         return 0;
197     }
198     auto leftFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, left));
199     auto rightFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, right));
200     EcmaString *lhs = *leftFlat;
201     EcmaString *rhs = *rightFlat;
202     int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
203     int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
204     int32_t countDiff = lhsCount - rhsCount;
205     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
206     if (!lhs->IsUtf16() && !rhs->IsUtf16()) {
207         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
208         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
209         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
210         if (charDiff != 0) {
211             return charDiff;
212         }
213     } else if (!lhs->IsUtf16()) {
214         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
215         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
216         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
217         if (charDiff != 0) {
218             return charDiff;
219         }
220     } else if (!rhs->IsUtf16()) {
221         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), rhsCount);
222         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), lhsCount);
223         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
224         if (charDiff != 0) {
225             return charDiff;
226         }
227     } else {
228         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
229         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
230         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
231         if (charDiff != 0) {
232             return charDiff;
233         }
234     }
235     return countDiff;
236 }
237 
238 /* static */
239 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)240 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
241 {
242     ASSERT(rhsSp.size() > 0);
243     auto first = static_cast<int32_t>(rhsSp[0]);
244     for (int32_t i = pos; i <= max; i++) {
245         if (static_cast<int32_t>(lhsSp[i]) != first) {
246             i++;
247             while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
248                 i++;
249             }
250         }
251         /* Found first character, now look at the rest of rhsSp */
252         if (i <= max) {
253             int j = i + 1;
254             int end = j + static_cast<int>(rhsSp.size()) - 1;
255 
256             for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
257             }
258             if (j == end) {
259                 /* Found whole string. */
260                 return i;
261             }
262         }
263     }
264     return -1;
265 }
266 
267 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)268 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
269 {
270     int rhsSize = static_cast<int>(rhsSp.size());
271     ASSERT(rhsSize > 0);
272     auto first = rhsSp[0];
273     for (int32_t i = pos; i >= 0; i--) {
274         if (lhsSp[i] != first) {
275             continue;
276         }
277         /* Found first character, now look at the rest of rhsSp */
278         int j = 1;
279         while (j < rhsSize) {
280             if (rhsSp[j] != lhsSp[i + j]) {
281                 break;
282             }
283             j++;
284         }
285         if (j == rhsSize) {
286             return i;
287         }
288     }
289     return -1;
290 }
291 
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)292 int32_t EcmaString::IndexOf(const EcmaVM *vm,
293     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
294 {
295     EcmaString *lhs = *receiver;
296     EcmaString *rhs = *search;
297     if (lhs == nullptr || rhs == nullptr) {
298         return -1;
299     }
300     int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
301     int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
302 
303     if (pos > lhsCount) {
304         return -1;
305     }
306 
307     if (rhsCount == 0) {
308         return pos;
309     }
310 
311     if (pos < 0) {
312         pos = 0;
313     }
314 
315     int32_t max = lhsCount - rhsCount;
316     if (max < 0) {
317         return -1;
318     }
319 
320     if (pos + rhsCount > lhsCount) {
321         return -1;
322     }
323 
324     auto receiverFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, receiver));
325     auto searchFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, search));
326     lhs = *receiverFlat;
327     rhs = *searchFlat;
328 
329     if (rhs->IsUtf8() && lhs->IsUtf8()) {
330         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
331         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
332         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
333     } else if (rhs->IsUtf16() && lhs->IsUtf16()) {  // NOLINT(readability-else-after-return)
334         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
335         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
336         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
337     } else if (rhs->IsUtf16()) {
338         return -1;
339     } else {  // NOLINT(readability-else-after-return)
340         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
341         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
342         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
343     }
344 }
345 
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)346 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
347     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
348 {
349     EcmaString *lhs = *receiver;
350     EcmaString *rhs = *search;
351     if (lhs == nullptr || rhs == nullptr) {
352         return -1;
353     }
354 
355     int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
356     int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
357     if (lhsCount < rhsCount) {
358         return -1;
359     }
360 
361     if (pos < 0) {
362         pos = 0;
363     }
364 
365     if (pos > lhsCount) {
366         pos = lhsCount;
367     }
368 
369     if (pos + rhsCount > lhsCount) {
370         pos = lhsCount - rhsCount;
371     }
372 
373     if (rhsCount == 0) {
374         return pos;
375     }
376 
377     auto receiverFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, receiver));
378     auto searchFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, search));
379     lhs = *receiverFlat;
380     rhs = *searchFlat;
381 
382     if (rhs->IsUtf8() && lhs->IsUtf8()) {
383         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
384         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
385         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
386     } else if (rhs->IsUtf16() && lhs->IsUtf16()) {  // NOLINT(readability-else-after-return)
387         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
388         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
389         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
390     } else if (rhs->IsUtf16()) {
391         return -1;
392     } else {  // NOLINT(readability-else-after-return)
393         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
394         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
395         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
396     }
397 }
398 
ToU16String(uint32_t len)399 std::u16string EcmaString::ToU16String(uint32_t len)
400 {
401     uint32_t length = len > 0 ? len : GetLength();
402     std::u16string result;
403     if (IsUtf16()) {
404         CVector<uint16_t> buf;
405         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
406         result = base::StringHelper::Utf16ToU16String(data, length);
407     } else {
408         CVector<uint8_t> buf;
409         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
410         result = base::StringHelper::Utf8ToU16String(data, length);
411     }
412     return result;
413 }
414 
415 // static
CanBeCompressed(const EcmaString * string)416 bool EcmaString::CanBeCompressed(const EcmaString *string)
417 {
418     ASSERT(string->IsLineOrConstantString());
419     if (string->IsUtf8()) {
420         return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
421     }
422     return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
423 }
424 
425 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)426 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
427 {
428     bool isCompressed = true;
429     uint32_t index = 0;
430     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
431     while (index < utf8Len) {
432         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
433         if (!IsASCIICharacter(utf8Data[index])) {
434             isCompressed = false;
435             break;
436         }
437         ++index;
438     }
439     return isCompressed;
440 }
441 
442 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)443 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
444 {
445     bool isCompressed = true;
446     Span<const uint16_t> data(utf16Data, utf16Len);
447     for (uint32_t i = 0; i < utf16Len; i++) {
448         if (!IsASCIICharacter(data[i])) {
449             isCompressed = false;
450             break;
451         }
452     }
453     return isCompressed;
454 }
455 
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)456 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
457 {
458     ASSERT(IsLineOrConstantString());
459     ASSERT(str1->IsLineOrConstantString() && str2->IsLineOrConstantString());
460     if (GetLength() != str1->GetLength() + str2->GetLength()) {
461         return false;
462     }
463     if (IsUtf16()) {
464         if (str1->IsUtf8() && str2->IsUtf8()) {
465             return false;
466         }
467         if (EcmaString::StringsAreEqualUtf16(str1, GetDataUtf16(), str1->GetLength())) {
468             return EcmaString::StringsAreEqualUtf16(str2, GetDataUtf16() + str1->GetLength(), str2->GetLength());
469         }
470     } else {
471         if (str1->IsUtf16() || str2->IsUtf16()) {
472             return false;
473         }
474         Span<const uint8_t> concatData(GetDataUtf8(), str1->GetLength());
475         Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
476         if (EcmaString::StringsAreEquals(concatData, data1)) {
477             concatData = Span<const uint8_t>(GetDataUtf8() + str1->GetLength(), str2->GetLength());
478             Span<const uint8_t> data2(str2->GetDataUtf8(), str2->GetLength());
479             return EcmaString::StringsAreEquals(concatData, data2);
480         }
481     }
482     return false;
483 }
484 
485 /* static */
StringsAreEqualSameUtfEncoding(EcmaString * str1,EcmaString * str2)486 bool EcmaString::StringsAreEqualSameUtfEncoding(EcmaString *str1, EcmaString *str2)
487 {
488     if (str1->IsUtf16()) {
489         CVector<uint16_t> buf1;
490         CVector<uint16_t> buf2;
491         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(str1, buf1);
492         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(str2, buf2);
493         Span<const uint16_t> sp1(data1, str1->GetLength());
494         Span<const uint16_t> sp2(data2, str2->GetLength());
495         return EcmaString::StringsAreEquals(sp1, sp2);
496     } else {  // NOLINT(readability-else-after-return)
497         CVector<uint8_t> buf1;
498         CVector<uint8_t> buf2;
499         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(str1, buf1);
500         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(str2, buf2);
501         Span<const uint8_t> sp1(data1, str1->GetLength());
502         Span<const uint8_t> sp2(data2, str2->GetLength());
503         return EcmaString::StringsAreEquals(sp1, sp2);
504     }
505 }
506 
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)507 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
508 {
509     if (str1 == str2) {
510         return true;
511     }
512     if (str1->IsUtf16() != str2->IsUtf16()) {
513         return false;
514     }
515     uint32_t str1Len = str1->GetLength();
516     if (str1Len != str2->GetLength()) {
517         return false;
518     }
519     if (str1Len == 0) {
520         return true;
521     }
522 
523     uint32_t str1Hash;
524     uint32_t str2Hash;
525     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
526         if (str1Hash != str2Hash) {
527             return false;
528         }
529     }
530 
531     auto str1Flat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, str1));
532     auto str2Flat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, str2));
533     return StringsAreEqualSameUtfEncoding(*str1Flat, *str2Flat);
534 }
535 
536 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)537 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
538 {
539     if (str1 == str2) {
540         return true;
541     }
542     if (str1->IsUtf16() != str2->IsUtf16()) {
543         return false;
544     }
545     uint32_t str1Len = str1->GetLength();
546     if (str1Len != str2->GetLength()) {
547         return false;
548     }
549     if (str1Len == 0) {
550         return true;
551     }
552 
553     uint32_t str1Hash;
554     uint32_t str2Hash;
555     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
556         if (str1Hash != str2Hash) {
557             return false;
558         }
559     }
560     return StringsAreEqualSameUtfEncoding(str1, str2);
561 }
562 
563 /* static */
StringsAreEqualUtf8(const EcmaString * str1,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress)564 bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len,
565                                      bool canBeCompress)
566 {
567     if (canBeCompress != str1->IsUtf8()) {
568         return false;
569     }
570     if (canBeCompress && str1->GetLength() != utf8Len) {
571         return false;
572     }
573     if (canBeCompress) {
574         CVector<uint8_t> buf;
575         Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), utf8Len);
576         Span<const uint8_t> data2(utf8Data, utf8Len);
577         return EcmaString::StringsAreEquals(data1, data2);
578     }
579     CVector<uint16_t> buf;
580     uint32_t length = str1->GetLength();
581     const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
582     return IsUtf8EqualsUtf16(utf8Data, utf8Len, data, length);
583 }
584 
585 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)586 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
587 {
588     uint32_t length = str1->GetLength();
589     if (length != utf16Len) {
590         return false;
591     }
592     if (str1->IsUtf8()) {
593         CVector<uint8_t> buf;
594         const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
595         return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
596     } else {
597         CVector<uint16_t> buf;
598         Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
599         Span<const uint16_t> data2(utf16Data, utf16Len);
600         return EcmaString::StringsAreEquals(data1, data2);
601     }
602 }
603 
604 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)605 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
606 {
607     ASSERT(dstMax >= count);
608     ASSERT(dst.Size() >= src.Size());
609     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
610         LOG_FULL(FATAL) << "memcpy_s failed";
611         UNREACHABLE();
612     }
613     return true;
614 }
615 
ComputeHashcode(uint32_t hashSeed) const616 uint32_t EcmaString::ComputeHashcode(uint32_t hashSeed) const
617 {
618     uint32_t hash;
619     uint32_t length = GetLength();
620     if (IsUtf8()) {
621         CVector<uint8_t> buf;
622         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
623         hash = ComputeHashForData(data, length, hashSeed);
624     } else {
625         CVector<uint16_t> buf;
626         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
627         hash = ComputeHashForData(data, length, hashSeed);
628     }
629     return hash;
630 }
631 
632 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)633 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
634 {
635     uint32_t hash = 0;
636     if (canBeCompress) {
637         hash = ComputeHashForData(utf8Data, utf8Len, 0);
638     } else {
639         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
640         CVector<uint16_t> tmpBuffer(utf16Len);
641         [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
642                                                                                utf16Len, 0);
643         ASSERT(len == utf16Len);
644         hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
645     }
646     return hash;
647 }
648 
649 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)650 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
651 {
652     return ComputeHashForData(utf16Data, length, 0);
653 }
654 
655 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)656 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
657                                    uint32_t utf16Len)
658 {
659     // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
660     uint32_t utf8ConvertLength = utf16Len + 1;
661     CVector<uint16_t> tmpBuffer(utf8ConvertLength);
662     auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0);
663     if (len != utf16Len) {
664         return false;
665     }
666 
667     Span<const uint16_t> data1(tmpBuffer.data(), len);
668     Span<const uint16_t> data2(utf16Data, utf16Len);
669     return EcmaString::StringsAreEquals(data1, data2);
670 }
671 
ToElementIndex(uint32_t * index)672 bool EcmaString::ToElementIndex(uint32_t *index)
673 {
674     uint32_t len = GetLength();
675     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
676         return false;
677     }
678     if (UNLIKELY(IsUtf16())) {
679         return false;
680     }
681 
682     CVector<uint8_t> buf;
683     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
684     uint32_t c = data[0];
685     uint64_t n = 0;
686     if (c == '0') {
687         *index = 0;
688         return len == 1;
689     }
690     if (c > '0' && c <= '9') {
691         n = c - '0';
692         for (uint32_t i = 1; i < len; i++) {
693             c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
694             if (c < '0' || c > '9') {
695                 return false;
696             }
697             // NOLINTNEXTLINE(readability-magic-numbers)
698             n = n * 10 + (c - '0');  // 10: decimal factor
699         }
700         if (n < JSObject::MAX_ELEMENT_INDEX) {
701             *index = n;
702             return true;
703         }
704     }
705     return false;
706 }
707 
ToTypedArrayIndex(uint32_t * index)708 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
709 {
710     uint32_t len = GetLength();
711     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
712         return false;
713     }
714     if (UNLIKELY(IsUtf16())) {
715         return false;
716     }
717 
718     CVector<uint8_t> buf;
719     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
720     uint32_t c = data[0];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
721     uint64_t n = 0;
722     if (c == '0') {
723         *index = 0;
724         return len == 1;
725     }
726     if (c > '0' && c <= '9') {
727         n = c - '0';
728         for (uint32_t i = 1; i < len; i++) {
729             c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
730             if (c >= '0' && c <= '9') {
731                 // NOLINTNEXTLINE(readability-magic-numbers)
732                 n = n * 10 + (c - '0');  // 10: decimal factor
733             } else if (c == '.') {
734                 n = JSObject::MAX_ELEMENT_INDEX;
735                 break;
736             } else {
737                 return false;
738             }
739         }
740         if (n < JSObject::MAX_ELEMENT_INDEX) {
741             *index = n;
742             return true;
743         } else {
744             *index = JSObject::MAX_ELEMENT_INDEX;
745             return true;
746         }
747     } else if (c == '-') {
748         *index = JSObject::MAX_ELEMENT_INDEX;
749         return true;
750     }
751     return false;
752 }
753 
754 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)755 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
756 {
757     uint32_t srcLen = src->GetLength();
758     int32_t start = 0;
759     int32_t end = static_cast<int32_t>(srcLen) - 1;
760 
761     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
762         start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
763     }
764     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
765         end = base::StringHelper::GetEnd(data, start, srcLen);
766     }
767     EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
768     return res;
769 }
770 
771 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)772 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
773 {
774     auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
775     uint32_t srcLength = srcFlat->GetLength();
776     auto factory = vm->GetFactory();
777     if (srcFlat->IsUtf16()) {
778         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat->GetDataUtf16(), srcLength);
779         std::string res = base::StringHelper::ToLower(u16str);
780         return *(factory->NewFromStdString(res));
781     } else {
782         return ConvertUtf8ToLowerOrUpper(vm, srcFlat, true);
783     }
784 }
785 
786 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)787 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
788 {
789     auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
790     uint32_t srcLength = srcFlat->GetLength();
791     const char start = 'A';
792     const char end = 'Z';
793     uint32_t upperIndex = srcLength;
794     Span<uint8_t> data(srcFlat->GetDataUtf8Writable(), srcLength);
795     for (uint32_t index = 0; index < srcLength; ++index) {
796         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
797             upperIndex = index;
798             break;
799         }
800     }
801     if (upperIndex == srcLength) {
802         return *src;
803     }
804     return ConvertUtf8ToLowerOrUpper(vm, srcFlat, true, upperIndex);
805 }
806 
807 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & srcFlat,bool toLower,uint32_t startIndex)808 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &srcFlat,
809                                                   bool toLower, uint32_t startIndex)
810 {
811     const char start = toLower ? 'A' : 'a';
812     const char end = toLower ? 'Z' : 'z';
813     uint32_t srcLength = srcFlat->GetLength();
814     auto newString = CreateLineString(vm, srcLength, true);
815     Span<uint8_t> data(srcFlat->GetDataUtf8Writable(), srcLength);
816     auto newStringPtr = newString->GetDataUtf8Writable();
817     if (startIndex > 0) {
818         if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
819             LOG_FULL(FATAL) << "memcpy_s failed";
820             UNREACHABLE();
821         }
822     }
823     for (uint32_t index = startIndex; index < srcLength; ++index) {
824         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
825             *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
826         } else {
827             *(newStringPtr + index) = data[index];
828         }
829     }
830     return newString;
831 }
832 
833 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)834 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
835 {
836     auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
837     uint32_t srcLength = srcFlat->GetLength();
838     auto factory = vm->GetFactory();
839     if (srcFlat->IsUtf16()) {
840         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat->GetDataUtf16(), srcLength);
841         std::string res = base::StringHelper::ToUpper(u16str);
842         return *(factory->NewFromStdString(res));
843     } else {
844         return ConvertUtf8ToLowerOrUpper(vm, srcFlat, false);
845     }
846 }
847 
848 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)849 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
850 {
851     auto factory = vm->GetFactory();
852     auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
853     std::u16string utf16 = srcFlat->ToU16String();
854     std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
855     return *(factory->NewFromStdString(res));
856 }
857 
858 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)859 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
860 {
861     auto factory = vm->GetFactory();
862     auto srcFlat = JSHandle<EcmaString>(vm->GetJSThread(), Flatten(vm, src));
863     std::u16string utf16 = srcFlat->ToU16String();
864     std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
865     return *(factory->NewFromStdString(res));
866 }
867 
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)868 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
869 {
870     auto srcFlat = JSHandle<EcmaString>(thread, Flatten(thread->GetEcmaVM(), src));
871     uint32_t srcLen = srcFlat->GetLength();
872     if (UNLIKELY(srcLen == 0)) {
873         return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
874     }
875     if (srcFlat->IsUtf8()) {
876         Span<const uint8_t> data(srcFlat->GetDataUtf8(), srcLen);
877         return TrimBody(thread, srcFlat, data, mode);
878     } else {
879         Span<const uint16_t> data(srcFlat->GetDataUtf16(), srcLen);
880         return TrimBody(thread, srcFlat, data, mode);
881     }
882 }
883 
SlowFlatten(const EcmaVM * vm,const JSHandle<TreeEcmaString> & string,MemSpaceType type)884 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<TreeEcmaString> &string, MemSpaceType type)
885 {
886     auto thread = vm->GetJSThread();
887     ASSERT(EcmaString::Cast(string->GetSecond())->GetLength() != 0);
888 
889     uint32_t length = string->GetLength();
890     EcmaString *result = nullptr;
891     if (string->IsUtf8()) {
892         result = CreateLineStringWithSpaceType(vm, length, true, type);
893         WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
894     } else {
895         result = CreateLineStringWithSpaceType(vm, length, false, type);
896         WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
897     }
898     string->SetFirst(thread, JSTaggedValue(result));
899     string->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
900     return result;
901 }
902 
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)903 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
904 {
905     EcmaString *s = *string;
906     if (s->IsLineOrConstantString()) {
907         return s;
908     }
909     if (s->IsTreeString()) {
910         JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
911         if (!tree->IsFlat()) {
912             return SlowFlatten(vm, tree, type);
913         }
914         s = EcmaString::Cast(tree->GetFirst());
915     }
916     return s;
917 }
918 
FlattenNoGC(const EcmaVM * vm,EcmaString * string)919 EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string)
920 {
921     DISALLOW_GARBAGE_COLLECTION;
922     if (string->IsLineOrConstantString()) {
923         return string;
924     }
925     if (string->IsTreeString()) {
926         TreeEcmaString *tree = TreeEcmaString::Cast(string);
927         if (tree->IsFlat()) {
928             string = EcmaString::Cast(tree->GetFirst());
929         } else {
930             uint32_t length = tree->GetLength();
931             EcmaString *result = nullptr;
932             if (tree->IsUtf8()) {
933                 result = CreateLineStringNoGC(vm, length, true);
934                 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
935             } else {
936                 result = CreateLineStringNoGC(vm, length, false);
937                 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
938             }
939             tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
940             tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
941             return result;
942         }
943     }
944     return string;
945 }
946 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)947 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
948 {
949     ASSERT(src->IsUtf8());
950     uint32_t length = src->GetLength();
951     EcmaString *string = const_cast<EcmaString *>(src);
952     if (string->IsTreeString()) {
953         if (string->IsFlat()) {
954             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
955         } else {
956             buf.reserve(length);
957             WriteToFlat(string, buf.data(), length);
958             return buf.data();
959         }
960     }
961     return string->GetDataUtf8();
962 }
963 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)964 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
965 {
966     ASSERT(src->IsUtf16());
967     uint32_t length = src->GetLength();
968     EcmaString *string = const_cast<EcmaString *>(src);
969     if (string->IsTreeString()) {
970         if (string->IsFlat()) {
971             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
972         } else {
973             buf.reserve(length);
974             WriteToFlat(string, buf.data(), length);
975             return buf.data();
976         }
977     }
978     return string->GetDataUtf16();
979 }
980 
EcmaStringAccessor(EcmaString * string)981 EcmaStringAccessor::EcmaStringAccessor(EcmaString *string)
982 {
983     ASSERT(string != nullptr);
984     string_ = string;
985 }
986 
EcmaStringAccessor(TaggedObject * obj)987 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
988 {
989     ASSERT(obj != nullptr);
990     string_ = EcmaString::Cast(obj);
991 }
992 
EcmaStringAccessor(JSTaggedValue value)993 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
994 {
995     ASSERT(value.IsString());
996     string_ = EcmaString::Cast(value.GetTaggedObject());
997 }
998 
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)999 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1000     : string_(*strHandle)
1001 {
1002 }
1003 
ToStdString(StringConvertedUsage usage)1004 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1005 {
1006     if (string_ == nullptr) {
1007         return "";
1008     }
1009     bool modify = (usage != StringConvertedUsage::PRINT);
1010     CVector<uint8_t> buf;
1011     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1012     std::string res;
1013     res.reserve(sp.size());
1014     for (const auto &c : sp) {
1015         res.push_back(c);
1016     }
1017     return res;
1018 }
1019 
ToCString(StringConvertedUsage usage)1020 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
1021 {
1022     if (string_ == nullptr) {
1023         return "";
1024     }
1025     bool modify = (usage != StringConvertedUsage::PRINT);
1026     CVector<uint8_t> buf;
1027     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1028     CString res;
1029     res.reserve(sp.size());
1030     for (const auto &c : sp) {
1031         res.push_back(c);
1032     }
1033     return res;
1034 }
1035 }  // namespace panda::ecmascript
1036