• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20 
21 namespace panda::ecmascript {
22 bool EcmaString::compressedStringsEnabled = true;
23 static constexpr int SMALL_STRING_SIZE = 128;
24 
Concat(const JSHandle<EcmaString> & str1Handle,const JSHandle<EcmaString> & str2Handle,const EcmaVM * vm)25 EcmaString *EcmaString::Concat(const JSHandle<EcmaString> &str1Handle, const JSHandle<EcmaString> &str2Handle,
26                                const EcmaVM *vm)
27 {
28     // allocator may trig gc and move src, need to hold it
29     EcmaString *string1 = *str1Handle;
30     EcmaString *string2 = *str2Handle;
31 
32     uint32_t length1 = string1->GetLength();
33 
34     uint32_t length2 = string2->GetLength();
35     uint32_t newLength = length1 + length2;
36     if (newLength == 0) {
37         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
38     }
39     bool compressed = GetCompressedStringsEnabled() && (!string1->IsUtf16() && !string2->IsUtf16());
40     auto newString = AllocStringObject(newLength, compressed, vm);
41 
42     // retrieve strings after gc
43     string1 = *str1Handle;
44     string2 = *str2Handle;
45     if (compressed) {
46         Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
47         Span<const uint8_t> src1(string1->GetDataUtf8(), length1);
48         EcmaString::StringCopy(sp, newLength, src1, length1);
49 
50         sp = sp.SubSpan(length1);
51         Span<const uint8_t> src2(string2->GetDataUtf8(), length2);
52         EcmaString::StringCopy(sp, newLength - length1, src2, length2);
53     } else {
54         Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
55         if (!string1->IsUtf16()) {
56             for (uint32_t i = 0; i < length1; ++i) {
57                 sp[i] = string1->At<false>(i);
58             }
59         } else {
60             Span<const uint16_t> src1(string1->GetDataUtf16(), length1);
61             EcmaString::StringCopy(sp, newLength << 1U, src1, length1 << 1U);
62         }
63         sp = sp.SubSpan(length1);
64         if (!string2->IsUtf16()) {
65             for (uint32_t i = 0; i < length2; ++i) {
66                 sp[i] = string2->At<false>(i);
67             }
68         } else {
69             uint32_t length = length2 << 1U;
70             Span<const uint16_t> src2(string2->GetDataUtf16(), length2);
71             EcmaString::StringCopy(sp, length, src2, length);
72         }
73     }
74 
75     ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
76     return newString;
77 }
78 
79 /* static */
FastSubString(const JSHandle<EcmaString> & src,uint32_t start,uint32_t utf16Len,const EcmaVM * vm)80 EcmaString *EcmaString::FastSubString(const JSHandle<EcmaString> &src, uint32_t start, uint32_t utf16Len,
81                                       const EcmaVM *vm)
82 {
83     if (src->IsUtf8()) {
84         return FastSubUtf8String(vm, src, start, utf16Len);
85     }
86     return FastSubUtf16String(vm, src, start, utf16Len);
87 }
88 
89 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)90 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
91 {
92     for (int32_t i = 0; i < count; ++i) {
93         auto left = static_cast<int32_t>(lhsSp[i]);
94         auto right = static_cast<int32_t>(rhsSp[i]);
95         if (left != right) {
96             return left - right;
97         }
98     }
99     return 0;
100 }
101 
Compare(const EcmaString * rhs) const102 int32_t EcmaString::Compare(const EcmaString *rhs) const
103 {
104     const EcmaString *lhs = this;
105     if (lhs == rhs) {
106         return 0;
107     }
108     int32_t lhsCount = lhs->GetLength();
109     int32_t rhsCount = rhs->GetLength();
110     int32_t countDiff = lhsCount - rhsCount;
111     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
112     if (!lhs->IsUtf16() && !rhs->IsUtf16()) {
113         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
114         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
115         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
116         if (charDiff != 0) {
117             return charDiff;
118         }
119     } else if (!lhs->IsUtf16()) {
120         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
121         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
122         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
123         if (charDiff != 0) {
124             return charDiff;
125         }
126     } else if (!rhs->IsUtf16()) {
127         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), rhsCount);
128         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), lhsCount);
129         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
130         if (charDiff != 0) {
131             return charDiff;
132         }
133     } else {
134         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
135         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
136         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
137         if (charDiff != 0) {
138             return charDiff;
139         }
140     }
141     return countDiff;
142 }
143 
144 /* static */
145 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)146 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
147 {
148     ASSERT(rhsSp.size() > 0);
149     auto first = static_cast<int32_t>(rhsSp[0]);
150     int32_t i;
151     for (i = pos; i <= max; i++) {
152         if (static_cast<int32_t>(lhsSp[i]) != first) {
153             i++;
154             while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
155                 i++;
156             }
157         }
158         /* Found first character, now look at the rest of rhsSp */
159         if (i <= max) {
160             int j = i + 1;
161             int end = j + rhsSp.size() - 1;
162 
163             for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
164             }
165             if (j == end) {
166                 /* Found whole string. */
167                 return i;
168             }
169         }
170     }
171     return -1;
172 }
173 
IndexOf(const EcmaString * rhs,int32_t pos) const174 int32_t EcmaString::IndexOf(const EcmaString *rhs, int32_t pos) const
175 {
176     if (rhs == nullptr) {
177         return -1;
178     }
179     const EcmaString *lhs = this;
180     int32_t lhsCount = lhs->GetLength();
181     int32_t rhsCount = rhs->GetLength();
182     if (rhsCount == 0) {
183         return pos;
184     }
185 
186     if (pos >= lhsCount) {
187         return -1;
188     }
189 
190     if (pos < 0) {
191         pos = 0;
192     }
193 
194     int32_t max = lhsCount - rhsCount;
195     if (max < 0) {
196         return -1;
197     }
198     if (rhs->IsUtf8() && lhs->IsUtf8()) {
199         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
200         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
201         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
202     } else if (rhs->IsUtf16() && lhs->IsUtf16()) {  // NOLINT(readability-else-after-return)
203         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
204         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
205         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
206     } else if (rhs->IsUtf16()) {
207         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
208         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
209         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
210     } else {  // NOLINT(readability-else-after-return)
211         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
212         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
213         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
214     }
215 
216     return -1;
217 }
218 
219 // static
CanBeCompressed(const EcmaString * string)220 bool EcmaString::CanBeCompressed(const EcmaString *string)
221 {
222     if (string->IsUtf8()) {
223         return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
224     }
225     return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
226 }
227 
228 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)229 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
230 {
231     if (!compressedStringsEnabled) {
232         return false;
233     }
234     bool isCompressed = true;
235     uint32_t index = 0;
236     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
237     while (index < utf8Len) {
238         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
239         if (!IsASCIICharacter(utf8Data[index])) {
240             isCompressed = false;
241             break;
242         }
243         ++index;
244     }
245     return isCompressed;
246 }
247 
248 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)249 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
250 {
251     if (!compressedStringsEnabled) {
252         return false;
253     }
254     bool isCompressed = true;
255     Span<const uint16_t> data(utf16Data, utf16Len);
256     for (uint32_t i = 0; i < utf16Len; i++) {
257         if (!IsASCIICharacter(data[i])) {
258             isCompressed = false;
259             break;
260         }
261     }
262     return isCompressed;
263 }
264 
265 /* static */
CopyUtf16AsUtf8(const uint16_t * utf16From,uint8_t * utf8To,uint32_t utf16Len)266 void EcmaString::CopyUtf16AsUtf8(const uint16_t *utf16From, uint8_t *utf8To, uint32_t utf16Len)
267 {
268     Span<const uint16_t> from(utf16From, utf16Len);
269     Span<uint8_t> to(utf8To, utf16Len);
270     for (uint32_t i = 0; i < utf16Len; i++) {
271         to[i] = from[i];
272     }
273 }
274 
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)275 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
276 {
277     if (GetLength() != str1->GetLength() + str2->GetLength()) {
278         return false;
279     }
280     if (IsUtf16()) {
281         if (str1->IsUtf8() && str2->IsUtf8()) {
282             return false;
283         }
284 
285         if (EcmaString::StringsAreEqualUtf16(str1, GetDataUtf16(), str1->GetLength())) {
286             return EcmaString::StringsAreEqualUtf16(str2, GetDataUtf16() + str1->GetLength(), str2->GetLength());
287         }
288     } else {
289         if (str1->IsUtf16() || str2->IsUtf16()) {
290             return false;
291         }
292         Span<const uint8_t> concatData(GetDataUtf8(), str1->GetLength());
293         Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
294         if (EcmaString::StringsAreEquals(concatData, data1)) {
295             concatData = Span<const uint8_t>(GetDataUtf8() + str1->GetLength(), str2->GetLength());
296             Span<const uint8_t> data2(str2->GetDataUtf8(), str2->GetLength());
297             return EcmaString::StringsAreEquals(concatData, data2);
298         }
299     }
300     return false;
301 }
302 
303 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)304 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
305 {
306     if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength()) ||
307         (str1->GetHashcode() != str2->GetHashcode())) {
308         return false;
309     }
310 
311     if (str1->IsUtf16()) {
312         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
313         Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
314         return EcmaString::StringsAreEquals(data1, data2);
315     } else {  // NOLINT(readability-else-after-return)
316         Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
317         Span<const uint8_t> data2(str2->GetDataUtf8(), str1->GetLength());
318         return EcmaString::StringsAreEquals(data1, data2);
319     }
320 }
321 
322 /* static */
StringsAreEqualUtf8(const EcmaString * str1,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress)323 bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len,
324                                      bool canBeCompress)
325 {
326     if (canBeCompress != str1->IsUtf8()) {
327         return false;
328     }
329 
330     if (canBeCompress && str1->GetLength() != utf8Len) {
331         return false;
332     }
333 
334     if (canBeCompress) {
335         Span<const uint8_t> data1(str1->GetDataUtf8(), utf8Len);
336         Span<const uint8_t> data2(utf8Data, utf8Len);
337         return EcmaString::StringsAreEquals(data1, data2);
338     }
339     return IsUtf8EqualsUtf16(utf8Data, utf8Len, str1->GetDataUtf16(), str1->GetLength());
340 }
341 
342 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)343 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
344 {
345     bool result = false;
346     if (str1->GetLength() != utf16Len) {
347         result = false;
348     } else if (!str1->IsUtf16()) {
349         result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), str1->GetLength(), utf16Data, utf16Len);
350     } else {
351         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
352         Span<const uint16_t> data2(utf16Data, utf16Len);
353         result = EcmaString::StringsAreEquals(data1, data2);
354     }
355     return result;
356 }
357 
358 /* static */
359 template<typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)360 bool EcmaString::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
361 {
362     ASSERT(str1.Size() <= str2.Size());
363     size_t size = str1.Size();
364     if (size < SMALL_STRING_SIZE) {
365         for (size_t i = 0; i < size; i++) {
366             if (str1[i] != str2[i]) {
367                 return false;
368             }
369         }
370         return true;
371     }
372     return !memcmp(str1.data(), str2.data(), size);
373 }
374 
375 template<typename T>
StringCopy(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)376 bool EcmaString::StringCopy(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
377 {
378     ASSERT(dstMax >= count);
379     ASSERT(dst.Size() >= src.Size());
380     if (src.Size() < SMALL_STRING_SIZE) {
381         for (size_t i = 0; i < src.Size(); i++) {
382             dst[i] = src[i];
383         }
384         return true;
385     }
386     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
387         LOG_ECMA(FATAL) << "memcpy_s failed";
388         UNREACHABLE();
389     }
390     return true;
391 }
392 
393 template<class T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)394 static int32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
395 {
396     uint32_t hash = hashSeed;
397 #if defined(__GNUC__)
398 #pragma GCC diagnostic push
399 #pragma GCC diagnostic ignored "-Wignored-attributes"
400     Span<const T> sp(data, size);
401 #pragma GCC diagnostic pop
402 #endif
403     for (auto c : sp) {
404         constexpr size_t SHIFT = 5;
405         hash = (hash << SHIFT) - hash + c;
406     }
407     return static_cast<int32_t>(hash);
408 }
409 
ComputeHashForUtf8(const uint8_t * utf8Data)410 static int32_t ComputeHashForUtf8(const uint8_t *utf8Data)
411 {
412     if (utf8Data == nullptr) {
413         return 0;
414     }
415     uint32_t hash = 0;
416     while (*utf8Data != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
417         constexpr size_t SHIFT = 5;
418         hash = (hash << SHIFT) - hash + *utf8Data++;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
419     }
420     return static_cast<int32_t>(hash);
421 }
422 
ComputeHashcode(uint32_t hashSeed) const423 uint32_t EcmaString::ComputeHashcode(uint32_t hashSeed) const
424 {
425     uint32_t hash;
426     if (compressedStringsEnabled) {
427         if (!IsUtf16()) {
428             hash = ComputeHashForData(GetDataUtf8(), GetLength(), hashSeed);
429         } else {
430             hash = ComputeHashForData(GetDataUtf16(), GetLength(), hashSeed);
431         }
432     } else {
433         ASSERT(static_cast<size_t>(GetLength())<(std::numeric_limits<size_t>::max()>>1U));
434         hash = ComputeHashForData(GetDataUtf16(), GetLength(), hashSeed);
435     }
436     return hash;
437 }
438 
439 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)440 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
441 {
442     uint32_t hash;
443     if (canBeCompress) {
444         hash = ComputeHashForUtf8(utf8Data);
445     } else {
446         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
447         CVector<uint16_t> tmpBuffer(utf16Len);
448         [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
449                                                                                utf16Len, 0);
450         ASSERT(len == utf16Len);
451         hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
452     }
453     return hash;
454 }
455 
456 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)457 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
458 {
459     return ComputeHashForData(utf16Data, length, 0);
460 }
461 
462 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)463 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
464                                    uint32_t utf16Len)
465 {
466     // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
467     uint32_t utf8ConvertLength = utf16Len + 1;
468     CVector<uint16_t> tmpBuffer(utf8ConvertLength);
469     auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0);
470     if (len != utf16Len) {
471         return false;
472     }
473 
474     Span<const uint16_t> data1(tmpBuffer.data(), len);
475     Span<const uint16_t> data2(utf16Data, utf16Len);
476     return EcmaString::StringsAreEquals(data1, data2);
477 }
478 }  // namespace panda::ecmascript
479