• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/base/string_helper.h"
17 #include "common_components/base/utf_helper.h"
18 #include "ecmascript/builtins/builtins_global.h"
19 #include "ecmascript/builtins/builtins_global_uri.h"
20 #include "ecmascript/ecma_string-inl.h"
21 
22 namespace panda::ecmascript::builtins {
23 using StringHelper = base::StringHelper;
24 
25 #if ENABLE_NEXT_OPTIMIZATION
AppendPercentEncodedByte(std::u16string & sStr,uint8_t byte,uint8_t & len)26 void BuiltinsGlobal::AppendPercentEncodedByte(std::u16string &sStr, uint8_t byte, uint8_t &len)
27 {
28     sStr[++len] = common::utf_helper::GetHexChar16((byte >> 4) & BIT_MASK); // 4: high 4 bits
29     sStr[++len] = common::utf_helper::GetHexChar16(byte & BIT_MASK);        // low 4 bits
30     ++len;
31 }
32 
AppendU32Data(std::u16string & resStr,uint32_t data)33 void BuiltinsGlobal::AppendU32Data(std::u16string &resStr, uint32_t data)
34 {
35     uint8_t len = 0;
36     std::u16string sStr(u"%00%00%00%00");
37     if (data <= 0x7F) {          // 0x7F: 1 byte
38         AppendPercentEncodedByte(sStr, data, len);
39     } else if (data <= 0x7FF) {  // 0x7FF: 2 bytes
40         AppendPercentEncodedByte(sStr, BIT_MASK_TWO + (data >> 6), len);                   // 6: high 5 bits
41         AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len);         // low 6 bits
42     } else if (data <= 0xFFFF) { // 0xFFFF: 3 bytes
43         AppendPercentEncodedByte(sStr, BIT_MASK_THR + (data >> 12), len);                  // 12: highest 4 bits
44         AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 6) & SIX_BIT_MASK), len);  // 6: middle 6 bits
45         AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len);         // lowest 6 bits
46     } else {                     // 4 bytes
47         AppendPercentEncodedByte(sStr, BIT_MASK_FOR + (data >> 18), len);                  // 18: highest 3 bits
48         AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 12) & SIX_BIT_MASK), len); // 12: higher 6 bits
49         AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 6) & SIX_BIT_MASK), len);  // 6: lower 6 bits
50         AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len);         // lowest 6 bits
51     }
52     resStr.append(sStr, 0, len);
53 }
54 
55 template <typename T>
GetCodeUnit(Span<T> & sp,int32_t index,int32_t length)56 uint16_t BuiltinsGlobal::GetCodeUnit(Span<T> &sp, int32_t index, int32_t length)
57 {
58     if ((index < 0) || (index >= length)) {
59         return 0;
60     }
61     return sp[index];
62 }
63 
64 // Runtime Semantics
Encode(JSThread * thread,const JSHandle<EcmaString> & str,judgURIFunc IsInURISet)65 JSTaggedValue BuiltinsGlobal::Encode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet)
66 {
67     BUILTINS_API_TRACE(thread, Global, Encode);
68     // 1. Let strLen be the number of code units in string.
69     CString errorMsg;
70     auto stringAcc = EcmaStringAccessor(str);
71     uint32_t strLen = stringAcc.GetLength();
72     // 2. Let R be the empty String.
73     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
74     std::u16string resStr;
75     resStr.reserve(strLen);
76     JSHandle<EcmaString> string;
77     bool isTreeString = stringAcc.IsTreeString();
78     if (isTreeString) {
79         string = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), str));
80         stringAcc = EcmaStringAccessor(string);
81     }
82     // 3. Let k be 0.
83     // 4. Repeat
84     uint32_t k = 0;
85     while (true) {
86         // a. If k equals strLen, return R.
87         if (k == strLen) {
88             auto *uint16tData = reinterpret_cast<uint16_t *>(resStr.data());
89             uint32_t resSize = resStr.size();
90             return factory->NewFromUtf16Literal(uint16tData, resSize).GetTaggedValue();
91         }
92 
93         // b. Let C be the code unit at index k within string.
94         // c. If C is in unescapedSet, then
95         //   i. Let S be a String containing only the code unit C.
96         //   ii. Let R be a new String value computed by concatenating the previous value of R and S.
97         // d. Else C is not in unescapedSet,
98         uint16_t cc = stringAcc.Get(thread, k);
99         if (LIKELY(IsInURISet(cc))) {
100             resStr.push_back(static_cast<const char16_t>(cc));
101         } else {
102             // i. If the code unit value of C is not less than 0xDC00 and not greater than 0xDFFF,
103             //    throw a URIError exception.
104             if (cc >= common::utf_helper::DECODE_TRAIL_LOW && cc <= common::utf_helper::DECODE_TRAIL_HIGH) {
105                 JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
106                 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
107                 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
108             }
109 
110             // ii. If the code unit value of C is less than 0xD800 or greater than 0xDBFF, then
111             //    1. Let V be the code unit value of C.
112             // iii. Else,
113             //    1. Increase k by 1.
114             //    2. If k equals strLen, throw a URIError exception.
115             //    3. Let kChar be the code unit value of the code unit at index k within string.
116             //    4. If kChar is less than 0xDC00 or greater than 0xDFFF, throw a URIError exception.
117             //    5. Let V be UTF16Decode(C, kChar).
118             uint32_t vv;
119             if (cc < common::utf_helper::DECODE_LEAD_LOW || cc > common::utf_helper::DECODE_LEAD_HIGH) {
120                 vv = cc;
121             } else {
122                 k++;
123                 if (k == strLen) {
124                     JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
125                     errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
126                     THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
127                 }
128                 uint16_t kc = stringAcc.Get(thread, k);
129                 if (kc < common::utf_helper::DECODE_TRAIL_LOW || kc > common::utf_helper::DECODE_TRAIL_HIGH) {
130                     JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
131                     errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
132                     THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
133                 }
134                 vv = common::utf_helper::UTF16Decode(cc, kc);
135             }
136 
137             // iv. Encode V and append it to resStr
138             AppendU32Data(resStr, vv);
139         }
140         // e. Increase k by 1.
141         k++;
142     }
143 }
144 
145 // Runtime Semantics
Decode(JSThread * thread,const JSHandle<EcmaString> & str,judgURIFunc IsInURISet)146 JSTaggedValue BuiltinsGlobal::Decode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet)
147 {
148     BUILTINS_API_TRACE(thread, Global, Decode);
149     JSHandle<EcmaString> string = str;
150     if (EcmaStringAccessor(str).IsTreeString()) {
151         string = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), str));
152     }
153 
154     auto stringAcc = EcmaStringAccessor(string);
155     JSTaggedValue result;
156     if (stringAcc.IsLineString()) {
157         // line string or flatten tree string
158         if (!stringAcc.IsUtf16()) {
159             result = DoDecode<uint8_t>(thread, string, IsInURISet, stringAcc.GetDataUtf8());
160         } else {
161             result = DoDecode<uint16_t>(thread, string, IsInURISet, stringAcc.GetDataUtf16());
162         }
163     } else {
164         ASSERT(stringAcc.IsSlicedString());
165         auto parent = SlicedEcmaString::Cast(string.GetTaggedValue())->GetParent(thread);
166         auto parentStrAcc = EcmaStringAccessor(parent);
167         auto startIndex = SlicedEcmaString::Cast(string.GetTaggedValue())->GetStartIndex();
168         if (parentStrAcc.IsLineString() && !parentStrAcc.IsUtf8()) {
169             result = DoDecode<uint16_t>(thread, string, IsInURISet, parentStrAcc.GetDataUtf16() + startIndex);
170         } else {
171             result = DoDecode<uint8_t>(thread, string, IsInURISet, parentStrAcc.GetDataUtf8() + startIndex);
172         }
173     }
174     return result;
175 }
176 
177 template <typename T>
DoDecode(JSThread * thread,const JSHandle<EcmaString> & str,judgURIFunc IsInURISet,const T * data)178 JSTaggedValue BuiltinsGlobal::DoDecode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet,
179                                        const T *data)
180 {
181     // 1. Let strLen be the number of code units in string.
182     int32_t strLen = static_cast<int32_t>(EcmaStringAccessor(str).GetLength());
183     // 2. Let R be the empty String.
184     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
185     std::u16string resStr;
186     resStr.reserve(strLen);
187     std::vector<T> tmpVec;
188     tmpVec.resize(strLen);
189     if (LIKELY(strLen != 0)) {
190         if (memcpy_s(tmpVec.data(), sizeof(T) * strLen, data, sizeof(T) * strLen) != EOK) {
191             LOG_FULL(FATAL) << "memcpy_s failed";
192             UNREACHABLE();
193         }
194     }
195     Span<T> sp(tmpVec.data(), strLen);
196     // 3. Let k be 0.
197     // 4. Repeat
198     int32_t k = 0;
199     while (true) {
200         if (k == strLen) {
201             // a. If k equals strLen, return R.
202             auto *uint16tData = reinterpret_cast<uint16_t *>(resStr.data());
203             uint32_t resSize = resStr.size();
204             return factory->NewFromUtf16Literal(uint16tData, resSize).GetTaggedValue();
205         }
206 
207         // b. Let C be the code unit at index k within string.
208         // c. If C is not "%", then
209         //    i. Let S be the String containing only the code unit C.
210         // d. Else C is "%",
211         //   i. Let start be k.
212         //   iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
213         //   v. Increase k by 2.
214         //   vi. If the most significant bit in B is 0, then
215         //      1. Let C be the code unit with code unit value B.
216         //      2. If C is not in reservedSet, then
217         //         a. Let S be the String containing only the code unit C.
218         //      3. Else C is in reservedSet,
219         //         a. Let S be the substring of string from index start to index k inclusive.
220         uint16_t cc = GetCodeUnit<T>(sp, k, strLen);
221         if (cc != '%') {
222             if (cc == 0 && strLen == 1) {
223                 JSHandle<EcmaString> tmpEcmaString = factory->NewFromUtf16Literal(&cc, 1);
224                 return tmpEcmaString.GetTaggedValue();
225             }
226             resStr.push_back(static_cast<const char16_t>(cc));
227         } else {
228             DecodePercentEncoding<T>(thread, str, k, IsInURISet, strLen, resStr, sp);
229             RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
230         }
231         k++;
232     }
233 }
234 
HandleSingleByteCharacter(JSThread * thread,uint8_t & bb,const JSHandle<EcmaString> & str,uint32_t & start,int32_t & k,std::u16string & resStr,judgURIFunc IsInURISet)235 void BuiltinsGlobal::HandleSingleByteCharacter(JSThread *thread, uint8_t &bb,
236                                                const JSHandle<EcmaString> &str,
237                                                uint32_t &start, int32_t &k,
238                                                std::u16string &resStr, judgURIFunc IsInURISet)
239 {
240     if (!IsInURISet(bb)) {
241         resStr.push_back(static_cast<const char16_t>(bb));
242     } else {
243         auto substr = EcmaStringAccessor::FastSubString(
244             thread->GetEcmaVM(), str, start, k - start + 1U);
245         resStr.append(StringHelper::StringToU16string(
246             EcmaStringAccessor(substr).ToStdString(thread, StringConvertedUsage::LOGICOPERATION)));
247     }
248 }
249 
250 template <typename T>
DecodePercentEncoding(JSThread * thread,const JSHandle<EcmaString> & str,int32_t & k,judgURIFunc IsInURISet,int32_t strLen,std::u16string & resStr,Span<T> & sp)251 JSTaggedValue BuiltinsGlobal::DecodePercentEncoding(JSThread *thread, const JSHandle<EcmaString> &str, int32_t &k,
252                                                     judgURIFunc IsInURISet, int32_t strLen, std::u16string &resStr,
253                                                     Span<T> &sp)
254 {
255     [[maybe_unused]] uint32_t start = static_cast<uint32_t>(k);
256     CString errorMsg;
257     // ii. If k + 2 is greater than or equal to strLen, throw a URIError exception.
258     // iii. If the code units at index (k+1) and (k + 2) within string do not represent hexadecimal digits,
259     //      throw a URIError exception.
260     if ((k + 2) >= strLen) {  // 2: means plus 2
261         errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
262         THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
263     }
264     uint16_t frontChar = GetCodeUnit<T>(sp, k + 1, strLen);
265     uint16_t behindChar = GetCodeUnit<T>(sp, k + 2, strLen);  // 2: means plus 2
266     if (!(common::utf_helper::IsHexDigits(frontChar) && common::utf_helper::IsHexDigits(behindChar))) {
267         errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
268         THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
269     }
270     uint8_t bb = common::utf_helper::GetValueFromTwoHex(frontChar, behindChar);
271     k += 2;  // 2: means plus 2
272     if ((bb & BIT_MASK_ONE) == 0) {
273         HandleSingleByteCharacter(thread, bb, str, start, k, resStr, IsInURISet);
274     } else {
275         // vii. Else the most significant bit in B is 1,
276         //   1. Let n be the smallest nonnegative integer such that (B << n) & 0x80 is equal to 0.
277         //   3. Let Octets be an array of 8-bit integers of size n.
278         //   4. Put B into Octets at index 0.
279         //   6. Let j be 1.
280         //   7. Repeat, while j < n
281         //     a. Increase k by 1.
282         //     d. Let B be the 8-bit value represented by the two hexadecimal digits at
283         //        index (k + 1) and (k + 2).
284         //     f. Increase k by 2.
285         //     g. Put B into Octets at index j.
286         //     h. Increase j by 1.
287         //   9. If V < 0x10000, then
288         //     a. Let C be the code unit V.
289         //     b. If C is not in reservedSet, then
290         //        i. Let S be the String containing only the code unit C.
291         //     c. Else C is in reservedSet,
292         //        i. Let S be the substring of string from index start to index k inclusive.
293         //   10. Else V ≥ 0x10000,
294         //     a. Let L be (((V – 0x10000) & 0x3FF) + 0xDC00).
295         //     b. Let H be ((((V – 0x10000) >> 10) & 0x3FF) + 0xD800).
296         //     c. Let S be the String containing the two code units H and L.
297         int32_t n = 0;
298         while ((((static_cast<uint32_t>(bb) << static_cast<uint32_t>(n)) & BIT_MASK_ONE) != 0)) {
299             n++;
300             if (n > 4) { // 4 : 4 means less than 4
301                 break;
302             }
303         }
304         // 2. If n equals 1 or n is greater than 4, throw a URIError exception.
305         if ((n == 1) || (n > 4)) {
306             errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
307             THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
308         }
309 
310         std::vector<uint8_t> oct = {bb};
311 
312         // 5. If k + (3 × (n – 1)) is greater than or equal to strLen, throw a URIError exception.
313         if (k + (3 * (n - 1)) >= strLen) {  // 3: means multiply by 3
314             errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
315             THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
316         }
317         DecodePercentEncoding<T>(thread, n, k, str, bb, oct, sp, strLen);
318         RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
319         UTF16EncodeCodePoint(thread, IsInURISet, oct, str, start, k, resStr);
320         RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
321     }
322     return JSTaggedValue::True();
323 }
324 
325 template <typename T>
DecodePercentEncoding(JSThread * thread,int32_t & n,int32_t & k,const JSHandle<EcmaString> & str,uint8_t & bb,std::vector<uint8_t> & oct,Span<T> & sp,int32_t strLen)326 JSTaggedValue BuiltinsGlobal::DecodePercentEncoding(JSThread *thread, int32_t &n,
327                                                     int32_t &k, const JSHandle<EcmaString> &str,
328                                                     uint8_t &bb, std::vector<uint8_t> &oct, Span<T> &sp, int32_t strLen)
329 {
330     CString errorMsg;
331     int32_t j = 1;
332     while (j < n) {
333         k++;
334         uint16_t codeUnit = GetCodeUnit<T>(sp, k, strLen);
335         // b. If the code unit at index k within string is not "%", throw a URIError exception.
336         // c. If the code units at index (k +1) and (k + 2) within string do not represent hexadecimal
337         //    digits, throw a URIError exception.
338         if (!(codeUnit == '%')) {
339             errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
340             THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
341         }
342         uint16_t frontChart = GetCodeUnit<T>(sp, k + 1, strLen);
343         uint16_t behindChart = GetCodeUnit<T>(sp, k + 2, strLen);  // 2: means plus 2
344         if (!(common::utf_helper::IsHexDigits(frontChart) && common::utf_helper::IsHexDigits(behindChart))) {
345             errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
346             THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
347         }
348         bb = common::utf_helper::GetValueFromTwoHex(frontChart, behindChart);
349         // e. If the two most significant bits in B are not 10, throw a URIError exception.
350         if (!((bb & BIT_MASK_TWO) == BIT_MASK_ONE)) {
351             errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
352             THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
353         }
354         k += 2;  // 2: means plus 2
355         oct.push_back(bb);
356         j++;
357     }
358     return JSTaggedValue::True();
359 }
360 
UTF16EncodeCodePoint(JSThread * thread,judgURIFunc IsInURISet,const std::vector<uint8_t> & oct,const JSHandle<EcmaString> & str,uint32_t & start,int32_t & k,std::u16string & resStr)361 JSTaggedValue BuiltinsGlobal::UTF16EncodeCodePoint(JSThread *thread, judgURIFunc IsInURISet,
362                                                    const std::vector<uint8_t> &oct, const JSHandle<EcmaString> &str,
363                                                    uint32_t &start, int32_t &k, std::u16string &resStr)
364 {
365     if (!common::utf_helper::IsValidUTF8(oct)) {
366         CString errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
367         THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
368     }
369     uint32_t vv = StringHelper::Utf8ToU32String(oct);
370     if (vv < common::utf_helper::DECODE_SECOND_FACTOR) {
371         if (!IsInURISet(vv)) {
372             resStr.append(StringHelper::Utf16ToU16String(reinterpret_cast<uint16_t *>(&vv), 1));
373         } else {
374             auto substr = EcmaStringAccessor::FastSubString(
375                 thread->GetEcmaVM(), str, start, static_cast<uint32_t>(k) - start + 1U);
376             resStr.append(StringHelper::StringToU16string(
377                 EcmaStringAccessor(substr).ToStdString(thread, StringConvertedUsage::LOGICOPERATION)));
378         }
379     } else {
380         uint16_t lv = (((vv - common::utf_helper::DECODE_SECOND_FACTOR) & BIT16_MASK) +
381             common::utf_helper::DECODE_TRAIL_LOW);
382         uint16_t hv = ((((vv - common::utf_helper::DECODE_SECOND_FACTOR) >> 10U) & BIT16_MASK) +  // NOLINT
383             common::utf_helper::DECODE_LEAD_LOW);  // 10: means shift left by 10 digits
384             resStr.push_back(static_cast<const char16_t>(hv));
385             resStr.push_back(static_cast<const char16_t>(lv));
386     }
387     return JSTaggedValue::True();
388 }
389 #endif // ENABLE_NEXT_OPTIMIZATION
390 }  // namespace panda::ecmascript::builtins
391