1 /*
2 * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/base/json_helper.h"
17 #include "ecmascript/base/utf_helper.h"
18
19
20 namespace panda::ecmascript::base {
21 constexpr uint8_t CODE_SPACE = 0x20;
22 constexpr uint8_t ZERO_FIRST = 0xc0; // \u0000 => c0 80
23 constexpr uint8_t ALONE_SURROGATE_3B_FIRST = 0xed;
24 constexpr uint8_t ALONE_SURROGATE_3B_SECOND_START = 0xa0;
25 constexpr uint8_t ALONE_SURROGATE_3B_SECOND_END = 0xbf;
26 constexpr uint8_t ALONE_SURROGATE_3B_THIRD_START = 0x80;
27 constexpr uint8_t ALONE_SURROGATE_3B_THIRD_END = 0xbf;
28
IsFastValueToQuotedString(const CString & str)29 bool JsonHelper::IsFastValueToQuotedString(const CString& str)
30 {
31 for (const auto item : str) {
32 const auto ch = static_cast<uint8_t>(item);
33 switch (ch) {
34 case '\"':
35 case '\\':
36 case '\b':
37 case '\f':
38 case '\n':
39 case '\r':
40 case '\t':
41 case ZERO_FIRST:
42 case ALONE_SURROGATE_3B_FIRST:
43 return false;
44 default:
45 if (ch < CODE_SPACE) {
46 return false;
47 }
48 break;
49 }
50 }
51 return true;
52 }
53
IsFastValueToQuotedString(const Span<const uint8_t> & sp)54 bool JsonHelper::IsFastValueToQuotedString(const Span<const uint8_t>& sp)
55 {
56 for (const auto ch : sp) {
57 switch (ch) {
58 case '\"':
59 case '\\':
60 case '\b':
61 case '\f':
62 case '\n':
63 case '\r':
64 case '\t':
65 case ZERO_FIRST:
66 case ALONE_SURROGATE_3B_FIRST:
67 return false;
68 default:
69 if (ch < CODE_SPACE) {
70 return false;
71 }
72 break;
73 }
74 }
75 return true;
76 }
77
AppendValueToQuotedString(const CString & str,CString & output)78 void JsonHelper::AppendValueToQuotedString(const CString& str, CString& output)
79 {
80 output += "\"";
81 bool isFast = IsFastValueToQuotedString(str); // fast mode
82 if (isFast) {
83 output += str;
84 output += "\"";
85 return;
86 }
87 for (uint32_t i = 0; i < str.size(); ++i) {
88 const auto ch = static_cast<uint8_t>(str[i]);
89 switch (ch) {
90 case '\"':
91 output += "\\\"";
92 break;
93 case '\\':
94 output += "\\\\";
95 break;
96 case '\b':
97 output += "\\b";
98 break;
99 case '\f':
100 output += "\\f";
101 break;
102 case '\n':
103 output += "\\n";
104 break;
105 case '\r':
106 output += "\\r";
107 break;
108 case '\t':
109 output += "\\t";
110 break;
111 case ZERO_FIRST:
112 output += "\\u0000";
113 ++i;
114 break;
115 case ALONE_SURROGATE_3B_FIRST:
116 if (i + 2 < str.size() && // 2: Check 2 more characters
117 static_cast<uint8_t>(str[i + 1]) >= ALONE_SURROGATE_3B_SECOND_START && // 1: 1th character after ch
118 static_cast<uint8_t>(str[i + 1]) <= ALONE_SURROGATE_3B_SECOND_END && // 1: 1th character after ch
119 static_cast<uint8_t>(str[i + 2]) >= ALONE_SURROGATE_3B_THIRD_START && // 2: 2nd character after ch
120 static_cast<uint8_t>(str[i + 2]) <= ALONE_SURROGATE_3B_THIRD_END) { // 2: 2nd character after ch
121 auto unicodeRes = utf_helper::ConvertUtf8ToUnicodeChar(
122 reinterpret_cast<const uint8_t*>(str.c_str() + i), 3); // 3: Parse 3 characters
123 ASSERT(unicodeRes.first != utf_helper::INVALID_UTF8);
124 AppendUnicodeEscape(static_cast<uint32_t>(unicodeRes.first), output);
125 i += 2; // 2 : Skip 2 characters
126 break;
127 }
128 [[fallthrough]];
129 default:
130 if (ch < CODE_SPACE) {
131 AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
132 } else {
133 output += ch;
134 }
135 }
136 }
137 output += "\"";
138 }
139
AppendValueToQuotedString(const Span<const uint8_t> & sp,CString & output)140 void JsonHelper::AppendValueToQuotedString(const Span<const uint8_t>& sp, CString& output)
141 {
142 output += "\"";
143 bool isFast = IsFastValueToQuotedString(sp); // fast mode
144 if (isFast) {
145 output.append(reinterpret_cast<const char*>(sp.data()), sp.size());
146 output += "\"";
147 return;
148 }
149 for (uint32_t i = 0; i < sp.size(); ++i) {
150 const auto ch = sp[i];
151 switch (ch) {
152 case '\"':
153 output += "\\\"";
154 break;
155 case '\\':
156 output += "\\\\";
157 break;
158 case '\b':
159 output += "\\b";
160 break;
161 case '\f':
162 output += "\\f";
163 break;
164 case '\n':
165 output += "\\n";
166 break;
167 case '\r':
168 output += "\\r";
169 break;
170 case '\t':
171 output += "\\t";
172 break;
173 case ZERO_FIRST:
174 output += "\\u0000";
175 ++i;
176 break;
177 case ALONE_SURROGATE_3B_FIRST:
178 if (i + 2 < sp.size() && // 2: Check 2 more characters
179 sp[i + 1] >= ALONE_SURROGATE_3B_SECOND_START && // 1: 1st character after ch
180 sp[i + 1] <= ALONE_SURROGATE_3B_SECOND_END && // 1: 1st character after ch
181 sp[i + 2] >= ALONE_SURROGATE_3B_THIRD_START && // 2: 2nd character after ch
182 sp[i + 2] <= ALONE_SURROGATE_3B_THIRD_END) { // 2: 2nd character after ch
183 auto unicodeRes = utf_helper::ConvertUtf8ToUnicodeChar(sp.data() + i, 3); // 3: Parse 3 characters
184 ASSERT(unicodeRes.first != utf_helper::INVALID_UTF8);
185 AppendUnicodeEscape(static_cast<uint32_t>(unicodeRes.first), output);
186 i += 2; // 2 : Skip 2 characters
187 break;
188 }
189 [[fallthrough]];
190 default:
191 if (ch < CODE_SPACE) {
192 AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
193 } else {
194 output += ch;
195 }
196 }
197 }
198 output += "\"";
199 }
200 } // namespace panda::ecmascript::base