1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/base/string_helper.h"
17 #include "common_components/base/utf_helper.h"
18 #include "ecmascript/builtins/builtins_global.h"
19 #include "ecmascript/builtins/builtins_global_uri.h"
20 #include "ecmascript/ecma_string-inl.h"
21
22 namespace panda::ecmascript::builtins {
23 using StringHelper = base::StringHelper;
24
25 #if ENABLE_NEXT_OPTIMIZATION
AppendPercentEncodedByte(std::u16string & sStr,uint8_t byte,uint8_t & len)26 void BuiltinsGlobal::AppendPercentEncodedByte(std::u16string &sStr, uint8_t byte, uint8_t &len)
27 {
28 sStr[++len] = common::utf_helper::GetHexChar16((byte >> 4) & BIT_MASK); // 4: high 4 bits
29 sStr[++len] = common::utf_helper::GetHexChar16(byte & BIT_MASK); // low 4 bits
30 ++len;
31 }
32
AppendU32Data(std::u16string & resStr,uint32_t data)33 void BuiltinsGlobal::AppendU32Data(std::u16string &resStr, uint32_t data)
34 {
35 uint8_t len = 0;
36 std::u16string sStr(u"%00%00%00%00");
37 if (data <= 0x7F) { // 0x7F: 1 byte
38 AppendPercentEncodedByte(sStr, data, len);
39 } else if (data <= 0x7FF) { // 0x7FF: 2 bytes
40 AppendPercentEncodedByte(sStr, BIT_MASK_TWO + (data >> 6), len); // 6: high 5 bits
41 AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len); // low 6 bits
42 } else if (data <= 0xFFFF) { // 0xFFFF: 3 bytes
43 AppendPercentEncodedByte(sStr, BIT_MASK_THR + (data >> 12), len); // 12: highest 4 bits
44 AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 6) & SIX_BIT_MASK), len); // 6: middle 6 bits
45 AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len); // lowest 6 bits
46 } else { // 4 bytes
47 AppendPercentEncodedByte(sStr, BIT_MASK_FOR + (data >> 18), len); // 18: highest 3 bits
48 AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 12) & SIX_BIT_MASK), len); // 12: higher 6 bits
49 AppendPercentEncodedByte(sStr, BIT_MASK_ONE + ((data >> 6) & SIX_BIT_MASK), len); // 6: lower 6 bits
50 AppendPercentEncodedByte(sStr, BIT_MASK_ONE + (data & SIX_BIT_MASK), len); // lowest 6 bits
51 }
52 resStr.append(sStr, 0, len);
53 }
54
55 template <typename T>
GetCodeUnit(Span<T> & sp,int32_t index,int32_t length)56 uint16_t BuiltinsGlobal::GetCodeUnit(Span<T> &sp, int32_t index, int32_t length)
57 {
58 if ((index < 0) || (index >= length)) {
59 return 0;
60 }
61 return sp[index];
62 }
63
64 // Runtime Semantics
Encode(JSThread * thread,const JSHandle<EcmaString> & str,judgURIFunc IsInURISet)65 JSTaggedValue BuiltinsGlobal::Encode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet)
66 {
67 BUILTINS_API_TRACE(thread, Global, Encode);
68 // 1. Let strLen be the number of code units in string.
69 CString errorMsg;
70 auto stringAcc = EcmaStringAccessor(str);
71 uint32_t strLen = stringAcc.GetLength();
72 // 2. Let R be the empty String.
73 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
74 std::u16string resStr;
75 resStr.reserve(strLen);
76 JSHandle<EcmaString> string;
77 bool isTreeString = stringAcc.IsTreeString();
78 if (isTreeString) {
79 string = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), str));
80 stringAcc = EcmaStringAccessor(string);
81 }
82 // 3. Let k be 0.
83 // 4. Repeat
84 uint32_t k = 0;
85 while (true) {
86 // a. If k equals strLen, return R.
87 if (k == strLen) {
88 auto *uint16tData = reinterpret_cast<uint16_t *>(resStr.data());
89 uint32_t resSize = resStr.size();
90 return factory->NewFromUtf16Literal(uint16tData, resSize).GetTaggedValue();
91 }
92
93 // b. Let C be the code unit at index k within string.
94 // c. If C is in unescapedSet, then
95 // i. Let S be a String containing only the code unit C.
96 // ii. Let R be a new String value computed by concatenating the previous value of R and S.
97 // d. Else C is not in unescapedSet,
98 uint16_t cc = stringAcc.Get(thread, k);
99 if (LIKELY(IsInURISet(cc))) {
100 resStr.push_back(static_cast<const char16_t>(cc));
101 } else {
102 // i. If the code unit value of C is not less than 0xDC00 and not greater than 0xDFFF,
103 // throw a URIError exception.
104 if (cc >= common::utf_helper::DECODE_TRAIL_LOW && cc <= common::utf_helper::DECODE_TRAIL_HIGH) {
105 JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
106 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
107 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
108 }
109
110 // ii. If the code unit value of C is less than 0xD800 or greater than 0xDBFF, then
111 // 1. Let V be the code unit value of C.
112 // iii. Else,
113 // 1. Increase k by 1.
114 // 2. If k equals strLen, throw a URIError exception.
115 // 3. Let kChar be the code unit value of the code unit at index k within string.
116 // 4. If kChar is less than 0xDC00 or greater than 0xDFFF, throw a URIError exception.
117 // 5. Let V be UTF16Decode(C, kChar).
118 uint32_t vv;
119 if (cc < common::utf_helper::DECODE_LEAD_LOW || cc > common::utf_helper::DECODE_LEAD_HIGH) {
120 vv = cc;
121 } else {
122 k++;
123 if (k == strLen) {
124 JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
125 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
126 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
127 }
128 uint16_t kc = stringAcc.Get(thread, k);
129 if (kc < common::utf_helper::DECODE_TRAIL_LOW || kc > common::utf_helper::DECODE_TRAIL_HIGH) {
130 JSTaggedValue strVal = isTreeString ? string.GetTaggedValue() : str.GetTaggedValue();
131 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, strVal);
132 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
133 }
134 vv = common::utf_helper::UTF16Decode(cc, kc);
135 }
136
137 // iv. Encode V and append it to resStr
138 AppendU32Data(resStr, vv);
139 }
140 // e. Increase k by 1.
141 k++;
142 }
143 }
144
145 // Runtime Semantics
Decode(JSThread * thread,const JSHandle<EcmaString> & str,judgURIFunc IsInURISet)146 JSTaggedValue BuiltinsGlobal::Decode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet)
147 {
148 BUILTINS_API_TRACE(thread, Global, Decode);
149 JSHandle<EcmaString> string = str;
150 if (EcmaStringAccessor(str).IsTreeString()) {
151 string = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), str));
152 }
153
154 auto stringAcc = EcmaStringAccessor(string);
155 JSTaggedValue result;
156 if (stringAcc.IsLineString()) {
157 // line string or flatten tree string
158 if (!stringAcc.IsUtf16()) {
159 result = DoDecode<uint8_t>(thread, string, IsInURISet, stringAcc.GetDataUtf8());
160 } else {
161 result = DoDecode<uint16_t>(thread, string, IsInURISet, stringAcc.GetDataUtf16());
162 }
163 } else {
164 ASSERT(stringAcc.IsSlicedString());
165 auto parent = SlicedEcmaString::Cast(string.GetTaggedValue())->GetParent(thread);
166 auto parentStrAcc = EcmaStringAccessor(parent);
167 auto startIndex = SlicedEcmaString::Cast(string.GetTaggedValue())->GetStartIndex();
168 if (parentStrAcc.IsLineString() && !parentStrAcc.IsUtf8()) {
169 result = DoDecode<uint16_t>(thread, string, IsInURISet, parentStrAcc.GetDataUtf16() + startIndex);
170 } else {
171 result = DoDecode<uint8_t>(thread, string, IsInURISet, parentStrAcc.GetDataUtf8() + startIndex);
172 }
173 }
174 return result;
175 }
176
177 template <typename T>
DoDecode(JSThread * thread,const JSHandle<EcmaString> & str,judgURIFunc IsInURISet,const T * data)178 JSTaggedValue BuiltinsGlobal::DoDecode(JSThread *thread, const JSHandle<EcmaString> &str, judgURIFunc IsInURISet,
179 const T *data)
180 {
181 // 1. Let strLen be the number of code units in string.
182 int32_t strLen = static_cast<int32_t>(EcmaStringAccessor(str).GetLength());
183 // 2. Let R be the empty String.
184 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
185 std::u16string resStr;
186 resStr.reserve(strLen);
187 std::vector<T> tmpVec;
188 tmpVec.resize(strLen);
189 if (LIKELY(strLen != 0)) {
190 if (memcpy_s(tmpVec.data(), sizeof(T) * strLen, data, sizeof(T) * strLen) != EOK) {
191 LOG_FULL(FATAL) << "memcpy_s failed";
192 UNREACHABLE();
193 }
194 }
195 Span<T> sp(tmpVec.data(), strLen);
196 // 3. Let k be 0.
197 // 4. Repeat
198 int32_t k = 0;
199 while (true) {
200 if (k == strLen) {
201 // a. If k equals strLen, return R.
202 auto *uint16tData = reinterpret_cast<uint16_t *>(resStr.data());
203 uint32_t resSize = resStr.size();
204 return factory->NewFromUtf16Literal(uint16tData, resSize).GetTaggedValue();
205 }
206
207 // b. Let C be the code unit at index k within string.
208 // c. If C is not "%", then
209 // i. Let S be the String containing only the code unit C.
210 // d. Else C is "%",
211 // i. Let start be k.
212 // iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
213 // v. Increase k by 2.
214 // vi. If the most significant bit in B is 0, then
215 // 1. Let C be the code unit with code unit value B.
216 // 2. If C is not in reservedSet, then
217 // a. Let S be the String containing only the code unit C.
218 // 3. Else C is in reservedSet,
219 // a. Let S be the substring of string from index start to index k inclusive.
220 uint16_t cc = GetCodeUnit<T>(sp, k, strLen);
221 if (cc != '%') {
222 if (cc == 0 && strLen == 1) {
223 JSHandle<EcmaString> tmpEcmaString = factory->NewFromUtf16Literal(&cc, 1);
224 return tmpEcmaString.GetTaggedValue();
225 }
226 resStr.push_back(static_cast<const char16_t>(cc));
227 } else {
228 DecodePercentEncoding<T>(thread, str, k, IsInURISet, strLen, resStr, sp);
229 RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
230 }
231 k++;
232 }
233 }
234
HandleSingleByteCharacter(JSThread * thread,uint8_t & bb,const JSHandle<EcmaString> & str,uint32_t & start,int32_t & k,std::u16string & resStr,judgURIFunc IsInURISet)235 void BuiltinsGlobal::HandleSingleByteCharacter(JSThread *thread, uint8_t &bb,
236 const JSHandle<EcmaString> &str,
237 uint32_t &start, int32_t &k,
238 std::u16string &resStr, judgURIFunc IsInURISet)
239 {
240 if (!IsInURISet(bb)) {
241 resStr.push_back(static_cast<const char16_t>(bb));
242 } else {
243 auto substr = EcmaStringAccessor::FastSubString(
244 thread->GetEcmaVM(), str, start, k - start + 1U);
245 resStr.append(StringHelper::StringToU16string(
246 EcmaStringAccessor(substr).ToStdString(thread, StringConvertedUsage::LOGICOPERATION)));
247 }
248 }
249
250 template <typename T>
DecodePercentEncoding(JSThread * thread,const JSHandle<EcmaString> & str,int32_t & k,judgURIFunc IsInURISet,int32_t strLen,std::u16string & resStr,Span<T> & sp)251 JSTaggedValue BuiltinsGlobal::DecodePercentEncoding(JSThread *thread, const JSHandle<EcmaString> &str, int32_t &k,
252 judgURIFunc IsInURISet, int32_t strLen, std::u16string &resStr,
253 Span<T> &sp)
254 {
255 [[maybe_unused]] uint32_t start = static_cast<uint32_t>(k);
256 CString errorMsg;
257 // ii. If k + 2 is greater than or equal to strLen, throw a URIError exception.
258 // iii. If the code units at index (k+1) and (k + 2) within string do not represent hexadecimal digits,
259 // throw a URIError exception.
260 if ((k + 2) >= strLen) { // 2: means plus 2
261 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
262 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
263 }
264 uint16_t frontChar = GetCodeUnit<T>(sp, k + 1, strLen);
265 uint16_t behindChar = GetCodeUnit<T>(sp, k + 2, strLen); // 2: means plus 2
266 if (!(common::utf_helper::IsHexDigits(frontChar) && common::utf_helper::IsHexDigits(behindChar))) {
267 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
268 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
269 }
270 uint8_t bb = common::utf_helper::GetValueFromTwoHex(frontChar, behindChar);
271 k += 2; // 2: means plus 2
272 if ((bb & BIT_MASK_ONE) == 0) {
273 HandleSingleByteCharacter(thread, bb, str, start, k, resStr, IsInURISet);
274 } else {
275 // vii. Else the most significant bit in B is 1,
276 // 1. Let n be the smallest nonnegative integer such that (B << n) & 0x80 is equal to 0.
277 // 3. Let Octets be an array of 8-bit integers of size n.
278 // 4. Put B into Octets at index 0.
279 // 6. Let j be 1.
280 // 7. Repeat, while j < n
281 // a. Increase k by 1.
282 // d. Let B be the 8-bit value represented by the two hexadecimal digits at
283 // index (k + 1) and (k + 2).
284 // f. Increase k by 2.
285 // g. Put B into Octets at index j.
286 // h. Increase j by 1.
287 // 9. If V < 0x10000, then
288 // a. Let C be the code unit V.
289 // b. If C is not in reservedSet, then
290 // i. Let S be the String containing only the code unit C.
291 // c. Else C is in reservedSet,
292 // i. Let S be the substring of string from index start to index k inclusive.
293 // 10. Else V ≥ 0x10000,
294 // a. Let L be (((V – 0x10000) & 0x3FF) + 0xDC00).
295 // b. Let H be ((((V – 0x10000) >> 10) & 0x3FF) + 0xD800).
296 // c. Let S be the String containing the two code units H and L.
297 int32_t n = 0;
298 while ((((static_cast<uint32_t>(bb) << static_cast<uint32_t>(n)) & BIT_MASK_ONE) != 0)) {
299 n++;
300 if (n > 4) { // 4 : 4 means less than 4
301 break;
302 }
303 }
304 // 2. If n equals 1 or n is greater than 4, throw a URIError exception.
305 if ((n == 1) || (n > 4)) {
306 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
307 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
308 }
309
310 std::vector<uint8_t> oct = {bb};
311
312 // 5. If k + (3 × (n – 1)) is greater than or equal to strLen, throw a URIError exception.
313 if (k + (3 * (n - 1)) >= strLen) { // 3: means multiply by 3
314 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
315 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
316 }
317 DecodePercentEncoding<T>(thread, n, k, str, bb, oct, sp, strLen);
318 RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
319 UTF16EncodeCodePoint(thread, IsInURISet, oct, str, start, k, resStr);
320 RETURN_EXCEPTION_IF_ABRUPT_COMPLETION(thread);
321 }
322 return JSTaggedValue::True();
323 }
324
325 template <typename T>
DecodePercentEncoding(JSThread * thread,int32_t & n,int32_t & k,const JSHandle<EcmaString> & str,uint8_t & bb,std::vector<uint8_t> & oct,Span<T> & sp,int32_t strLen)326 JSTaggedValue BuiltinsGlobal::DecodePercentEncoding(JSThread *thread, int32_t &n,
327 int32_t &k, const JSHandle<EcmaString> &str,
328 uint8_t &bb, std::vector<uint8_t> &oct, Span<T> &sp, int32_t strLen)
329 {
330 CString errorMsg;
331 int32_t j = 1;
332 while (j < n) {
333 k++;
334 uint16_t codeUnit = GetCodeUnit<T>(sp, k, strLen);
335 // b. If the code unit at index k within string is not "%", throw a URIError exception.
336 // c. If the code units at index (k +1) and (k + 2) within string do not represent hexadecimal
337 // digits, throw a URIError exception.
338 if (!(codeUnit == '%')) {
339 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
340 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
341 }
342 uint16_t frontChart = GetCodeUnit<T>(sp, k + 1, strLen);
343 uint16_t behindChart = GetCodeUnit<T>(sp, k + 2, strLen); // 2: means plus 2
344 if (!(common::utf_helper::IsHexDigits(frontChart) && common::utf_helper::IsHexDigits(behindChart))) {
345 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
346 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
347 }
348 bb = common::utf_helper::GetValueFromTwoHex(frontChart, behindChart);
349 // e. If the two most significant bits in B are not 10, throw a URIError exception.
350 if (!((bb & BIT_MASK_TWO) == BIT_MASK_ONE)) {
351 errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
352 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
353 }
354 k += 2; // 2: means plus 2
355 oct.push_back(bb);
356 j++;
357 }
358 return JSTaggedValue::True();
359 }
360
UTF16EncodeCodePoint(JSThread * thread,judgURIFunc IsInURISet,const std::vector<uint8_t> & oct,const JSHandle<EcmaString> & str,uint32_t & start,int32_t & k,std::u16string & resStr)361 JSTaggedValue BuiltinsGlobal::UTF16EncodeCodePoint(JSThread *thread, judgURIFunc IsInURISet,
362 const std::vector<uint8_t> &oct, const JSHandle<EcmaString> &str,
363 uint32_t &start, int32_t &k, std::u16string &resStr)
364 {
365 if (!common::utf_helper::IsValidUTF8(oct)) {
366 CString errorMsg = "DecodeURI: invalid character: " + ConvertToString(thread, str.GetTaggedValue());
367 THROW_URI_ERROR_AND_RETURN(thread, errorMsg.c_str(), JSTaggedValue::Exception());
368 }
369 uint32_t vv = StringHelper::Utf8ToU32String(oct);
370 if (vv < common::utf_helper::DECODE_SECOND_FACTOR) {
371 if (!IsInURISet(vv)) {
372 resStr.append(StringHelper::Utf16ToU16String(reinterpret_cast<uint16_t *>(&vv), 1));
373 } else {
374 auto substr = EcmaStringAccessor::FastSubString(
375 thread->GetEcmaVM(), str, start, static_cast<uint32_t>(k) - start + 1U);
376 resStr.append(StringHelper::StringToU16string(
377 EcmaStringAccessor(substr).ToStdString(thread, StringConvertedUsage::LOGICOPERATION)));
378 }
379 } else {
380 uint16_t lv = (((vv - common::utf_helper::DECODE_SECOND_FACTOR) & BIT16_MASK) +
381 common::utf_helper::DECODE_TRAIL_LOW);
382 uint16_t hv = ((((vv - common::utf_helper::DECODE_SECOND_FACTOR) >> 10U) & BIT16_MASK) + // NOLINT
383 common::utf_helper::DECODE_LEAD_LOW); // 10: means shift left by 10 digits
384 resStr.push_back(static_cast<const char16_t>(hv));
385 resStr.push_back(static_cast<const char16_t>(lv));
386 }
387 return JSTaggedValue::True();
388 }
389 #endif // ENABLE_NEXT_OPTIMIZATION
390 } // namespace panda::ecmascript::builtins
391