• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/base/json_helper.h"
17 #include "common_components/base/utf_helper.h"
18 
19 
20 namespace panda::ecmascript::base {
21 
22 #if ENABLE_NEXT_OPTIMIZATION
23 constexpr int K_JSON_ESCAPE_TABLE_ENTRY_SIZE = 8;
24 
25 // Table for escaping Latin1 characters.
26 // Table entries start at a multiple of 8 with the first byte indicating length.
27 constexpr const char* const JSON_ESCAPE_TABLE =
28     "\\u0000\0 \\u0001\0 \\u0002\0 \\u0003\0 \\u0004\0 \\u0005\0 \\u0006\0 \\u0007\0 "
29     "\\b\0     \\t\0     \\n\0     \\u000b\0 \\f\0     \\r\0     \\u000e\0 \\u000f\0 "
30     "\\u0010\0 \\u0011\0 \\u0012\0 \\u0013\0 \\u0014\0 \\u0015\0 \\u0016\0 \\u0017\0 "
31     "\\u0018\0 \\u0019\0 \\u001a\0 \\u001b\0 \\u001c\0 \\u001d\0 \\u001e\0 \\u001f\0 "
32     " \0      !\0      \\\"\0     #\0      $\0      %\0      &\0      '\0      "
33     "(\0      )\0      *\0      +\0      ,\0      -\0      .\0      /\0      "
34     "0\0      1\0      2\0      3\0      4\0      5\0      6\0      7\0      "
35     "8\0      9\0      :\0      ;\0      <\0      =\0      >\0      ?\0      "
36     "@\0      A\0      B\0      C\0      D\0      E\0      F\0      G\0      "
37     "H\0      I\0      J\0      K\0      L\0      M\0      N\0      O\0      "
38     "P\0      Q\0      R\0      S\0      T\0      U\0      V\0      W\0      "
39     "X\0      Y\0      Z\0      [\0      \\\\\0     ]\0      ^\0      _\0      ";
40 
41 constexpr bool JSON_DO_NOT_ESCAPE_FLAG_TABLE[] = {
42     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44     1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
48     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 };
59 
DoNotEscape(uint8_t c)60 constexpr bool DoNotEscape(uint8_t c)
61 {
62     return JSON_DO_NOT_ESCAPE_FLAG_TABLE[c];
63 }
64 
DoNotEscape(uint16_t c)65 bool DoNotEscape(uint16_t c)
66 {
67     return (c >= 0x20 && c <= 0x21) ||
68            (c >= 0x23 && c != 0x5C && (c < 0xD800 || c > 0xDFFF));
69 }
70 
IsFastValueToQuotedString(const Span<const uint8_t> & sp)71 bool JsonHelper::IsFastValueToQuotedString(const Span<const uint8_t> &sp)
72 {
73     for (const auto utf8Ch : sp) {
74         if (!DoNotEscape(utf8Ch)) {
75             return false;
76         }
77     }
78     return true;
79 }
80 #else
81 bool JsonHelper::IsFastValueToQuotedString(const CString& str)
82 {
83     for (const auto item : str) {
84         const auto ch = static_cast<uint8_t>(item);
85         switch (ch) {
86             case '\"':
87             case '\\':
88             case '\b':
89             case '\f':
90             case '\n':
91             case '\r':
92             case '\t':
93             case ZERO_FIRST:
94             case ALONE_SURROGATE_3B_FIRST:
95                 return false;
96             default:
97                 if (ch < CODE_SPACE) {
98                     return false;
99                 }
100                 break;
101         }
102     }
103     return true;
104 }
105 #endif
106 
107 #if ENABLE_NEXT_OPTIMIZATION
AppendQuotedValueToC16String(const Span<const uint16_t> & sp,uint32_t & index,C16String & output)108 void JsonHelper::AppendQuotedValueToC16String(const Span<const uint16_t> &sp, uint32_t &index, C16String &output)
109 {
110     auto ch = sp[index];
111     if (common::utf_helper::IsUTF16Surrogate(ch)) {
112         // utf-16 to quoted string
113         if (ch <= common::utf_helper::DECODE_LEAD_HIGH) {
114             if (index + 1 < sp.size() && common::utf_helper::IsUTF16LowSurrogate(sp[index + 1])) {
115                 AppendChar(output, ch);
116                 AppendChar(output, sp[index + 1]);
117                 ++index;
118             } else {
119                 AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
120             }
121         } else {
122             AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
123         }
124     } else {
125         ASSERT(ch < 0x60);
126         AppendString(output, &JSON_ESCAPE_TABLE[ch * K_JSON_ESCAPE_TABLE_ENTRY_SIZE]);
127     }
128 }
129 
130 template <typename SrcType, typename DstType>
AppendValueToQuotedString(const Span<const SrcType> & sp,DstType & output)131 void JsonHelper::AppendValueToQuotedString(const Span<const SrcType> &sp, DstType &output)
132 {
133     static_assert(sizeof(typename DstType::value_type) >= sizeof(SrcType));
134     AppendString(output, "\"");
135     if constexpr (sizeof(SrcType) == 1) {
136         if (IsFastValueToQuotedString(sp)) {
137             AppendString(output, reinterpret_cast<const char *>(sp.data()), sp.size());
138             AppendString(output, "\"");
139             return;
140         }
141     }
142     uint32_t len = sp.size();
143     for (uint32_t i = 0; i < len; ++i) {
144         auto ch = sp[i];
145         if (DoNotEscape(ch)) {
146             AppendChar(output, ch);
147         } else if constexpr (sizeof(SrcType) != 1) {
148             AppendQuotedValueToC16String(sp, i, output);
149         } else {
150             ASSERT(ch < 0x60);
151             AppendString(output, &JSON_ESCAPE_TABLE[ch * K_JSON_ESCAPE_TABLE_ENTRY_SIZE]);
152         }
153     }
154     AppendString(output, "\"");
155 }
156 template void JsonHelper::AppendValueToQuotedString<uint8_t, CString>(
157     const Span<const uint8_t> &sp, CString &output);
158 template void JsonHelper::AppendValueToQuotedString<uint8_t, C16String>(
159     const Span<const uint8_t> &sp, C16String &output);
160 template void JsonHelper::AppendValueToQuotedString<uint16_t, C16String>(
161     const Span<const uint16_t> &sp, C16String &output);
162 
163 #else
AppendValueToQuotedString(const CString & str,CString & output)164 void JsonHelper::AppendValueToQuotedString(const CString& str, CString& output)
165 {
166     output += "\"";
167     bool isFast = IsFastValueToQuotedString(str); // fast mode
168     if (isFast) {
169         output += str;
170         output += "\"";
171         return;
172     }
173     for (uint32_t i = 0; i < str.size(); ++i) {
174         const auto ch = static_cast<uint8_t>(str[i]);
175         switch (ch) {
176             case '\"':
177                 output += "\\\"";
178                 break;
179             case '\\':
180                 output += "\\\\";
181                 break;
182             case '\b':
183                 output += "\\b";
184                 break;
185             case '\f':
186                 output += "\\f";
187                 break;
188             case '\n':
189                 output += "\\n";
190                 break;
191             case '\r':
192                 output += "\\r";
193                 break;
194             case '\t':
195                 output += "\\t";
196                 break;
197             case ZERO_FIRST:
198                 output += "\\u0000";
199                 ++i;
200                 break;
201             case ALONE_SURROGATE_3B_FIRST:
202                 if (i + 2 < str.size() && // 2: Check 2 more characters
203                     static_cast<uint8_t>(str[i + 1]) >= ALONE_SURROGATE_3B_SECOND_START && // 1: 1th character after ch
204                     static_cast<uint8_t>(str[i + 1]) <= ALONE_SURROGATE_3B_SECOND_END && // 1: 1th character after ch
205                     static_cast<uint8_t>(str[i + 2]) >= ALONE_SURROGATE_3B_THIRD_START && // 2: 2nd character after ch
206                     static_cast<uint8_t>(str[i + 2]) <= ALONE_SURROGATE_3B_THIRD_END) {   // 2: 2nd character after ch
207                     auto unicodeRes = common::utf_helper::ConvertUtf8ToUnicodeChar(
208                         reinterpret_cast<const uint8_t*>(str.c_str() + i), 3); // 3: Parse 3 characters
209                     ASSERT(unicodeRes.first != common::utf_helper::INVALID_UTF8);
210                     AppendUnicodeEscape(static_cast<uint32_t>(unicodeRes.first), output);
211                     i += 2; // 2 : Skip 2 characters
212                     break;
213                 }
214                 [[fallthrough]];
215             default:
216                 if (ch < CODE_SPACE) {
217                     AppendUnicodeEscape(static_cast<uint32_t>(ch), output);
218                 } else {
219                     output += ch;
220                 }
221         }
222     }
223     output += "\"";
224 }
225 #endif
226 } // namespace panda::ecmascript::base