1 /*
2 * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/base/json_helper.h"
17 #include "common_components/base/utf_helper.h"
18
19
20 namespace panda::ecmascript::base {
21
22 #if ENABLE_NEXT_OPTIMIZATION
23 constexpr int K_JSON_ESCAPE_TABLE_ENTRY_SIZE = 8;
24
25 // Table for escaping Latin1 characters.
26 // Table entries start at a multiple of 8 with the first byte indicating length.
27 constexpr const char* const JSON_ESCAPE_TABLE =
28 "\\u0000\0 \\u0001\0 \\u0002\0 \\u0003\0 \\u0004\0 \\u0005\0 \\u0006\0 \\u0007\0 "
29 "\\b\0 \\t\0 \\n\0 \\u000b\0 \\f\0 \\r\0 \\u000e\0 \\u000f\0 "
30 "\\u0010\0 \\u0011\0 \\u0012\0 \\u0013\0 \\u0014\0 \\u0015\0 \\u0016\0 \\u0017\0 "
31 "\\u0018\0 \\u0019\0 \\u001a\0 \\u001b\0 \\u001c\0 \\u001d\0 \\u001e\0 \\u001f\0 "
32 " \0 !\0 \\\"\0 #\0 $\0 %\0 &\0 '\0 "
33 "(\0 )\0 *\0 +\0 ,\0 -\0 .\0 /\0 "
34 "0\0 1\0 2\0 3\0 4\0 5\0 6\0 7\0 "
35 "8\0 9\0 :\0 ;\0 <\0 =\0 >\0 ?\0 "
36 "@\0 A\0 B\0 C\0 D\0 E\0 F\0 G\0 "
37 "H\0 I\0 J\0 K\0 L\0 M\0 N\0 O\0 "
38 "P\0 Q\0 R\0 S\0 T\0 U\0 V\0 W\0 "
39 "X\0 Y\0 Z\0 [\0 \\\\\0 ]\0 ^\0 _\0 ";
40
41 constexpr bool JSON_DO_NOT_ESCAPE_FLAG_TABLE[] = {
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 };
59
DoNotEscape(uint8_t c)60 constexpr bool DoNotEscape(uint8_t c)
61 {
62 return JSON_DO_NOT_ESCAPE_FLAG_TABLE[c];
63 }
64
DoNotEscape(uint16_t c)65 bool DoNotEscape(uint16_t c)
66 {
67 return (c >= 0x20 && c <= 0x21) ||
68 (c >= 0x23 && c != 0x5C && (c < 0xD800 || c > 0xDFFF));
69 }
70
IsFastValueToQuotedString(const Span<const uint8_t> & sp)71 bool JsonHelper::IsFastValueToQuotedString(const Span<const uint8_t> &sp)
72 {
73 for (const auto utf8Ch : sp) {
74 if (!DoNotEscape(utf8Ch)) {
75 return false;
76 }
77 }
78 return true;
79 }
80 #else
81 bool JsonHelper::IsFastValueToQuotedString(const CString& str)
82 {
83 for (const auto item : str) {
84 const auto ch = static_cast<uint8_t>(item);
85 switch (ch) {
86 case '\"':
87 case '\\':
88 case '\b':
89 case '\f':
90 case '\n':
91 case '\r':
92 case '\t':
93 case ZERO_FIRST:
94 case ALONE_SURROGATE_3B_FIRST:
95 return false;
96 default:
97 if (ch < CODE_SPACE) {
98 return false;
99 }
100 break;
101 }
102 }
103 return true;
104 }
105 #endif
106
107 #if ENABLE_NEXT_OPTIMIZATION
AppendQuotedValueToC16String(const Span<const uint16_t> & sp,uint32_t & index,C16String & output)108 void JsonHelper::AppendQuotedValueToC16String(const Span<const uint16_t> &sp, uint32_t &index, C16String &output)
109 {
110 auto ch = sp[index];
111 if (common::utf_helper::IsUTF16Surrogate(ch)) {
112 // utf-16 to quoted string
113 if (ch <= common::utf_helper::DECODE_LEAD_HIGH) {
114 if (index + 1 < sp.size() && common::utf_helper::IsUTF16LowSurrogate(sp[index + 1])) {
115 AppendChar(output, ch);
116 AppendChar(output, sp[index + 1]);
117 ++index;
118 } else {
119 AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
120 }
121 } else {
122 AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
123 }
124 } else {
125 ASSERT(ch < 0x60);
126 AppendString(output, &JSON_ESCAPE_TABLE[ch * K_JSON_ESCAPE_TABLE_ENTRY_SIZE]);
127 }
128 }
129
130 template <typename SrcType, typename DstType>
AppendValueToQuotedString(const Span<const SrcType> & sp,DstType & output)131 void JsonHelper::AppendValueToQuotedString(const Span<const SrcType> &sp, DstType &output)
132 {
133 static_assert(sizeof(typename DstType::value_type) >= sizeof(SrcType));
134 AppendString(output, "\"");
135 if constexpr (sizeof(SrcType) == 1) {
136 if (IsFastValueToQuotedString(sp)) {
137 AppendString(output, reinterpret_cast<const char *>(sp.data()), sp.size());
138 AppendString(output, "\"");
139 return;
140 }
141 }
142 uint32_t len = sp.size();
143 for (uint32_t i = 0; i < len; ++i) {
144 auto ch = sp[i];
145 if (DoNotEscape(ch)) {
146 AppendChar(output, ch);
147 } else if constexpr (sizeof(SrcType) != 1) {
148 AppendQuotedValueToC16String(sp, i, output);
149 } else {
150 ASSERT(ch < 0x60);
151 AppendString(output, &JSON_ESCAPE_TABLE[ch * K_JSON_ESCAPE_TABLE_ENTRY_SIZE]);
152 }
153 }
154 AppendString(output, "\"");
155 }
156 template void JsonHelper::AppendValueToQuotedString<uint8_t, CString>(
157 const Span<const uint8_t> &sp, CString &output);
158 template void JsonHelper::AppendValueToQuotedString<uint8_t, C16String>(
159 const Span<const uint8_t> &sp, C16String &output);
160 template void JsonHelper::AppendValueToQuotedString<uint16_t, C16String>(
161 const Span<const uint16_t> &sp, C16String &output);
162
163 #else
AppendValueToQuotedString(const CString & str,CString & output)164 void JsonHelper::AppendValueToQuotedString(const CString& str, CString& output)
165 {
166 output += "\"";
167 bool isFast = IsFastValueToQuotedString(str); // fast mode
168 if (isFast) {
169 output += str;
170 output += "\"";
171 return;
172 }
173 for (uint32_t i = 0; i < str.size(); ++i) {
174 const auto ch = static_cast<uint8_t>(str[i]);
175 switch (ch) {
176 case '\"':
177 output += "\\\"";
178 break;
179 case '\\':
180 output += "\\\\";
181 break;
182 case '\b':
183 output += "\\b";
184 break;
185 case '\f':
186 output += "\\f";
187 break;
188 case '\n':
189 output += "\\n";
190 break;
191 case '\r':
192 output += "\\r";
193 break;
194 case '\t':
195 output += "\\t";
196 break;
197 case ZERO_FIRST:
198 output += "\\u0000";
199 ++i;
200 break;
201 case ALONE_SURROGATE_3B_FIRST:
202 if (i + 2 < str.size() && // 2: Check 2 more characters
203 static_cast<uint8_t>(str[i + 1]) >= ALONE_SURROGATE_3B_SECOND_START && // 1: 1th character after ch
204 static_cast<uint8_t>(str[i + 1]) <= ALONE_SURROGATE_3B_SECOND_END && // 1: 1th character after ch
205 static_cast<uint8_t>(str[i + 2]) >= ALONE_SURROGATE_3B_THIRD_START && // 2: 2nd character after ch
206 static_cast<uint8_t>(str[i + 2]) <= ALONE_SURROGATE_3B_THIRD_END) { // 2: 2nd character after ch
207 auto unicodeRes = common::utf_helper::ConvertUtf8ToUnicodeChar(
208 reinterpret_cast<const uint8_t*>(str.c_str() + i), 3); // 3: Parse 3 characters
209 ASSERT(unicodeRes.first != common::utf_helper::INVALID_UTF8);
210 AppendUnicodeEscape(static_cast<uint32_t>(unicodeRes.first), output);
211 i += 2; // 2 : Skip 2 characters
212 break;
213 }
214 [[fallthrough]];
215 default:
216 if (ch < CODE_SPACE) {
217 AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
218 } else {
219 output += ch;
220 }
221 }
222 }
223 output += "\"";
224 }
225 #endif
226 } // namespace panda::ecmascript::base