• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "array_buffer_helper.h"
17 
18 #include <array>
19 #include <iomanip>
20 #include <algorithm>
21 #include <cstdlib>
22 #include <cctype>
23 
24 #include "plugins/ets/runtime/types/ets_arraybuffer.h"
25 
26 namespace ark::ets::intrinsics::helpers {
27 
28 namespace base64 {
29 
30 constexpr std::string_view K_BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
31 
32 // Lookup table size for all possible byte values.
33 constexpr size_t K_LOOKUP_TABLE_SIZE = 256;
34 
35 // Base64 block and binary block size constants.
36 constexpr size_t K_BASE64_BLOCK_SIZE = 4;  // Size of a Base64 encoded block.
37 constexpr size_t K_MAX_PADDING_CHARS = 2;  // Maximum number of padding characters allowed.
38 constexpr size_t K_BINARY_BLOCK_SIZE = 3;  // Size of binary data block that becomes one Base64 block.
39 
40 // Bit manipulation constants.
41 constexpr size_t K_BITS_PER_BYTE = 8;
42 constexpr uint32_t K_BASE64_MASK = 0x3F;  // Mask for 6 bits (Base64 character).
43 
44 // Bit shift constants for Base64 encoding/decoding.
45 constexpr size_t K_FIRST_CHAR_SHIFT = 2;
46 constexpr size_t K_SECOND_CHAR_SHIFT = 4;
47 constexpr size_t K_THIRD_CHAR_SHIFT = 6;
48 constexpr size_t K_HIGH_BYTE_SHIFT = 16;
49 constexpr size_t K_MID_BYTE_SHIFT = 12;
50 constexpr size_t K_LOW_BYTE_SHIFT = 6;
51 constexpr size_t K_LAST_CHAR_SHIFT = 18;
52 
53 constexpr unsigned int K_TWO_BIT_MASK = 0x3;
54 
55 constexpr char K_PADDING_CHAR = '=';
56 
BuildDecodingTable()57 constexpr std::array<int, K_LOOKUP_TABLE_SIZE> BuildDecodingTable() noexcept
58 {
59     std::array<int, K_LOOKUP_TABLE_SIZE> table {};
60     for (auto &entry : table) {
61         entry = -1;  // mark as invalid
62     }
63     for (size_t i = 0; i < K_BASE64_CHARS.size(); ++i) {
64         table[static_cast<unsigned char>(K_BASE64_CHARS[i])] = static_cast<int>(i);
65     }
66     return table;
67 }
68 
69 constexpr auto K_DECODING_TABLE = BuildDecodingTable();
70 
IsBase64Character(unsigned char c)71 [[nodiscard]] constexpr bool IsBase64Character(unsigned char c) noexcept
72 {
73     return K_DECODING_TABLE[c] != -1;
74 }
75 
ValidateBase64Input(std::string_view input)76 [[nodiscard]] bool ValidateBase64Input(std::string_view input) noexcept
77 {
78     if (input.empty()) {
79         return true;
80     }
81     if (input.size() % K_BASE64_BLOCK_SIZE != 0) {
82         return false;
83     }
84 
85     const auto paddingStart = std::find(input.begin(), input.end(), K_PADDING_CHAR);
86     const bool hasPadding = paddingStart != input.end();
87     // Count padding characters at the end
88     const auto paddingCount = static_cast<size_t>(std::count(paddingStart, input.end(), K_PADDING_CHAR));
89     const bool validChars =
90         std::all_of(input.begin(), paddingStart, [](unsigned char c) { return IsBase64Character(c); });
91     const bool validPadding =
92         !hasPadding || (paddingCount <= K_MAX_PADDING_CHARS &&
93                         std::all_of(paddingStart, input.end(), [](char c) { return c == K_PADDING_CHAR; }));
94     return validChars && validPadding;
95 }
96 
Decode(std::string_view encodedData)97 [[nodiscard]] PandaString Decode(std::string_view encodedData)
98 {
99     if (encodedData.empty()) {
100         return {};
101     }
102     const size_t kPaddingOffset = 2;
103     size_t pad = 0;
104     if (!encodedData.empty() && encodedData.back() == K_PADDING_CHAR) {
105         ++pad;
106     }
107     if (encodedData.size() >= kPaddingOffset && encodedData[encodedData.size() - kPaddingOffset] == K_PADDING_CHAR) {
108         ++pad;
109     }
110 
111     PandaString decoded;
112     decoded.reserve((encodedData.size() / K_BASE64_BLOCK_SIZE) * K_BINARY_BLOCK_SIZE - pad);
113 
114     const int kSecondCharOffset = 2;
115     const int kThirdCharOffset = 3;
116 
117     for (size_t i = 0; i < encodedData.size(); i += K_BASE64_BLOCK_SIZE) {
118         int decodedChar0 = K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i])];
119         int decodedChar1 = K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i + 1])];
120         int decodedChar2 = (encodedData[i + kSecondCharOffset] == K_PADDING_CHAR)
121                                ? 0
122                                : K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i + kSecondCharOffset])];
123         int decodedChar3 = (encodedData[i + kThirdCharOffset] == K_PADDING_CHAR)
124                                ? 0
125                                : K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i + kThirdCharOffset])];
126 
127         auto unsignedChar0 = static_cast<unsigned int>(decodedChar0);
128         auto unsignedChar1 = static_cast<unsigned int>(decodedChar1);
129         auto unsignedChar2 = static_cast<unsigned int>(decodedChar2);
130         auto unsignedChar3 = static_cast<unsigned int>(decodedChar3);
131 
132         decoded.push_back(
133             static_cast<char>((unsignedChar0 << K_FIRST_CHAR_SHIFT) | (unsignedChar1 >> K_SECOND_CHAR_SHIFT)));
134         if (encodedData[i + kSecondCharOffset] != K_PADDING_CHAR) {
135             decoded.push_back(static_cast<char>(((unsignedChar1 & K_BASE64_MASK) << K_SECOND_CHAR_SHIFT) |
136                                                 (unsignedChar2 >> K_FIRST_CHAR_SHIFT)));
137         }
138         if (encodedData[i + kThirdCharOffset] != K_PADDING_CHAR) {
139             decoded.push_back(
140                 static_cast<char>(((unsignedChar2 & K_TWO_BIT_MASK) << K_THIRD_CHAR_SHIFT) | unsignedChar3));
141         }
142     }
143     return decoded;
144 }
145 
Encode(const PandaVector<uint8_t> & binaryData)146 [[nodiscard]] PandaString Encode(const PandaVector<uint8_t> &binaryData)
147 {
148     if (binaryData.empty()) {
149         return {};
150     }
151     PandaString encoded;
152     const size_t kReserveMultiplier = 2;
153 
154     encoded.reserve(((binaryData.size() + kReserveMultiplier) / K_BINARY_BLOCK_SIZE) * K_BASE64_BLOCK_SIZE);
155 
156     const size_t kOneByte = 1;
157     const size_t kTwoBytes = 2;
158 
159     size_t pos = 0;
160     while (pos < binaryData.size()) {
161         size_t remain = binaryData.size() - pos;
162         uint32_t triple = (static_cast<uint32_t>(binaryData[pos]) << K_HIGH_BYTE_SHIFT);
163         if (remain > kOneByte) {
164             triple |= (static_cast<uint32_t>(binaryData[pos + kOneByte]) << K_BITS_PER_BYTE);
165         }
166         if (remain > kTwoBytes) {
167             triple |= static_cast<uint32_t>(binaryData[pos + kTwoBytes]);
168         }
169 
170         encoded.push_back(K_BASE64_CHARS[(triple >> K_LAST_CHAR_SHIFT) & K_BASE64_MASK]);
171         encoded.push_back(K_BASE64_CHARS[(triple >> K_MID_BYTE_SHIFT) & K_BASE64_MASK]);
172         encoded.push_back(remain > kOneByte ? K_BASE64_CHARS[(triple >> K_LOW_BYTE_SHIFT) & K_BASE64_MASK]
173                                             : K_PADDING_CHAR);
174         encoded.push_back(remain > kTwoBytes ? K_BASE64_CHARS[triple & K_BASE64_MASK] : K_PADDING_CHAR);
175 
176         pos += K_BINARY_BLOCK_SIZE;
177     }
178     return encoded;
179 }
180 
181 }  // namespace base64
182 
183 namespace encoding {
184 
185 using namespace std::literals::string_view_literals;
186 // UTF-16 related constants.
187 constexpr size_t K_UTF16_BYTES_PER_CHAR = 2;  // Number of bytes per UTF-16 character.
188 constexpr size_t K_HIGH_BYTE_SHIFT = 8;       // Shift for high byte in UTF-16.
189 
190 constexpr size_t HEX_BASE = 16;
191 constexpr size_t K_HEX_PAIR_SIZE = 2;
192 
193 // Named constants for bit masks
194 constexpr uint8_t K_ASCII_MASK = 0x7F;
195 constexpr uint8_t K_BYTE_MASK = 0xFF;
196 
197 constexpr std::array K_SINGLE_BYTE_ENCODINGS = {"utf8"sv, "utf-8"sv, "ascii"sv, "latin1"sv, "binary"sv};
198 constexpr std::array K_DOUBLE_BYTE_ENCODINGS = {"utf16le"sv, "ucs2"sv, "ucs-2"sv};
199 
200 constexpr std::array UTF8_ENCODINGS = {"utf8"sv, "utf-8"sv};                // UTF-8 variants
201 constexpr std::array ASCII_ENCODINGS = {"ascii"sv};                         // ASCII (7-bit)
202 constexpr std::array UTF16_ENCODINGS = {"utf16le"sv, "ucs2"sv, "ucs-2"sv};  // UTF-16 little-endian variants
203 constexpr std::array BASE64_ENCODINGS = {"base64"sv, "base64url"sv};        // Base64 variants
204 constexpr std::array LATIN_ENCODINGS = {"latin1"sv, "binary"sv};            // Latin1/binary encodings
205 
ValidateBuffer(const EtsEscompatArrayBuffer * buffer)206 [[nodiscard]] Result<bool> ValidateBuffer(const EtsEscompatArrayBuffer *buffer) noexcept
207 {
208     if (buffer == nullptr) {
209         return Err<PandaString>(PandaString("Buffer is null"));
210     }
211     if (buffer->WasDetached()) {
212         return Err<PandaString>(PandaString("Buffer was detached"));
213     }
214     return true;
215 }
ValidateIndices(int byteLength,int start,int end)216 [[nodiscard]] Result<bool> ValidateIndices(int byteLength, int start, int end)
217 {
218     if (start < 0 || start > byteLength) {
219         return Err<PandaString>(PandaString("Start index is out of bounds"));
220     }
221     if (end < 0 || end > byteLength || end < start) {
222         return Err<PandaString>(PandaString("End index is out of bounds"));
223     }
224     return true;
225 }
GetEncoding(const PandaString * encodingObj)226 [[nodiscard]] PandaString GetEncoding(const PandaString *encodingObj) noexcept
227 {
228     return encodingObj != nullptr ? *encodingObj : PandaString("utf8");
229 }
BytesFromString(std::string_view input)230 PandaVector<uint8_t> BytesFromString(std::string_view input)
231 {
232     PandaVector<uint8_t> bytes;
233     bytes.assign(input.begin(), input.end());
234     return bytes;
235 }
236 
StringFromBytes(const PandaVector<uint8_t> & bytes)237 PandaString StringFromBytes(const PandaVector<uint8_t> &bytes)
238 {
239     return PandaString(bytes.begin(), bytes.end());
240 }
241 
MaskBytes(std::string_view input,uint8_t mask)242 PandaVector<uint8_t> MaskBytes(std::string_view input, uint8_t mask)
243 {
244     PandaVector<uint8_t> bytes;
245     bytes.resize(input.size());
246     for (size_t i = 0; i < input.size(); ++i) {
247         bytes[i] = static_cast<uint8_t>(static_cast<unsigned char>(input[i]) & mask);
248     }
249     return bytes;
250 }
251 
ConvertUtf8Encoding(const PandaVector<uint8_t> & bytes)252 PandaString ConvertUtf8Encoding(const PandaVector<uint8_t> &bytes)
253 {
254     return StringFromBytes(bytes);
255 }
256 
ConvertUtf16Encoding(const PandaVector<uint8_t> & bytes)257 PandaString ConvertUtf16Encoding(const PandaVector<uint8_t> &bytes)
258 {
259     if (bytes.size() % K_UTF16_BYTES_PER_CHAR != 0) {
260         return PandaString("Invalid UTF-16 byte sequence");
261     }
262     PandaString output;
263     for (size_t i = 0; i < bytes.size(); i += K_UTF16_BYTES_PER_CHAR) {
264         uint16_t ch = static_cast<uint16_t>(bytes[i]) |
265                       static_cast<uint16_t>(static_cast<uint16_t>(bytes[i + 1]) << K_HIGH_BYTE_SHIFT);
266         output.push_back(static_cast<char>(ch));
267     }
268     return output;
269 }
270 
ConvertBase64Encoding(const PandaVector<uint8_t> & bytes,std::string_view encoding)271 PandaString ConvertBase64Encoding(const PandaVector<uint8_t> &bytes, std::string_view encoding)
272 {
273     PandaString output = base64::Encode(bytes);
274     if (encoding == "base64url") {
275         std::replace(output.begin(), output.end(), '+', '-');
276         std::replace(output.begin(), output.end(), '/', '_');
277         size_t lastValidCharPos = output.find_last_not_of('=');
278         if (lastValidCharPos != PandaString::npos) {
279             output.erase(lastValidCharPos + 1);
280         }
281     }
282     return output;
283 }
284 
ConvertUtf8ToBytes(std::string_view input)285 PandaVector<uint8_t> ConvertUtf8ToBytes(std::string_view input)
286 {
287     return BytesFromString(input);
288 }
289 
ConvertAsciiToBytes(std::string_view input)290 PandaVector<uint8_t> ConvertAsciiToBytes(std::string_view input)
291 {
292     return MaskBytes(input, K_ASCII_MASK);
293 }
294 
ConvertLatinToBytes(std::string_view input)295 PandaVector<uint8_t> ConvertLatinToBytes(std::string_view input)
296 {
297     return MaskBytes(input, K_BYTE_MASK);
298 }
299 
ConvertHexEncoding(const PandaVector<uint8_t> & bytes)300 PandaString ConvertHexEncoding(const PandaVector<uint8_t> &bytes)
301 {
302     PandaOStringStream oss;
303     const int width = 2;
304     for (uint8_t byte : bytes) {
305         oss << std::hex << std::setw(width) << std::setfill('0') << static_cast<int>(byte);
306     }
307     return oss.str();
308 }
309 
ConvertLatinEncoding(const PandaVector<uint8_t> & bytes)310 PandaString ConvertLatinEncoding(const PandaVector<uint8_t> &bytes)
311 {
312     PandaString output;
313     output.reserve(bytes.size());
314     for (uint8_t byte : bytes) {
315         output.push_back(static_cast<char>(byte));
316     }
317     return output;
318 }
319 
ConvertUtf16ToBytes(std::string_view input)320 PandaVector<uint8_t> ConvertUtf16ToBytes(std::string_view input)
321 {
322     const size_t kUtf16Multiplier = 2;
323     const size_t kShiftBy8 = 8U;
324     PandaVector<uint8_t> bytes;
325     bytes.resize(input.size() * kUtf16Multiplier);
326 
327     for (size_t i = 0; i < input.size(); ++i) {
328         auto ch = static_cast<uint16_t>(static_cast<unsigned char>(input[i]));
329         auto position = i * kUtf16Multiplier;
330         bytes[position] = static_cast<uint8_t>(ch & K_BYTE_MASK);
331         bytes[position + 1] = static_cast<uint8_t>(static_cast<unsigned int>(ch) >> kShiftBy8);
332     }
333     return bytes;
334 }
335 
ConvertBase64ToBytes(const PandaString & input,std::string_view encoding)336 Result<PandaVector<uint8_t>> ConvertBase64ToBytes(const PandaString &input, std::string_view encoding)
337 {
338     PandaString decoded;
339     if (encoding == "base64url") {
340         PandaString temp = input;
341         std::replace(temp.begin(), temp.end(), '-', '+');
342         std::replace(temp.begin(), temp.end(), '_', '/');
343         decoded = base64::Decode(temp);
344     } else {
345         decoded = base64::Decode(input);
346     }
347 
348     return BytesFromString(decoded);
349 }
350 
IsSymbolHex(char c)351 bool IsSymbolHex(char c)
352 {
353     return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') || ((c >= '0') && (c <= '9'));
354 }
355 
FoundInputHex(const PandaString & input,bool & error)356 PandaString FoundInputHex(const PandaString &input, bool &error)
357 {
358     if (input.size() == 1) {
359         error = true;
360         return "";
361     }
362 
363     PandaString inputHex;
364     bool isFirstPair = true;
365     size_t idx = 1;
366     while (idx < input.size()) {
367         if (isFirstPair) {
368             if (IsSymbolHex(input[idx - 1]) && IsSymbolHex(input[idx])) {
369                 inputHex += input[idx - 1];
370                 inputHex += input[idx];
371             } else {
372                 error = true;
373                 return "";
374             }
375             isFirstPair = false;
376         } else {
377             if (IsSymbolHex(input[idx - 1]) && IsSymbolHex(input[idx])) {
378                 inputHex += input[idx - 1];
379                 inputHex += input[idx];
380             } else {
381                 break;
382             }
383         }
384         idx += K_HEX_PAIR_SIZE;
385     }
386     error = false;
387     return inputHex;
388 }
389 
ConvertHexToBytes(const PandaString & input)390 Result<PandaVector<uint8_t>> ConvertHexToBytes(const PandaString &input)
391 {
392     // Found hex codes in input. Other symbols should be ignored
393     // input should start with hex code(-s)
394     bool error = false;
395     PandaString inputHex = FoundInputHex(input, error);
396     if (error) {
397         return Err<PandaString>(PandaString("The argument 'value' is invalid. Received is '") + input +
398                                 PandaString("'"));
399     }
400 
401     // Check size
402     if (inputHex.empty()) {
403         inputHex += "0";
404         inputHex += "0";
405     } else if (inputHex.size() % K_HEX_PAIR_SIZE != 0) {
406         return Err<PandaString>(PandaString("Hex string must have an even length"));
407     }
408     PandaVector<uint8_t> bytes;
409     size_t bytesLength = inputHex.size() / K_HEX_PAIR_SIZE;
410     bytes.reserve(bytesLength);
411     const size_t kHexStringLength = 3;
412     for (size_t i = 0; i < inputHex.size(); i += K_HEX_PAIR_SIZE) {
413         std::array<char, kHexStringLength> hex = {inputHex[i], inputHex[i + 1], '\0'};
414         char *endptr = nullptr;
415         uint64_t value = std::strtoul(hex.data(), &endptr, HEX_BASE);
416         if (*endptr != '\0') {
417             return Err<PandaString>(PandaString("Invalid hex string"));
418         }
419         bytes.push_back(static_cast<uint8_t>(value));
420     }
421     return bytes;
422 }
423 
ConvertStringToBytes(const PandaString & input,std::string_view encoding)424 Result<PandaVector<uint8_t>> ConvertStringToBytes(const PandaString &input, std::string_view encoding)
425 {
426     if (std::find(UTF8_ENCODINGS.begin(), UTF8_ENCODINGS.end(), encoding) != UTF8_ENCODINGS.end()) {
427         return ConvertUtf8ToBytes(input);
428     }
429     if (std::find(ASCII_ENCODINGS.begin(), ASCII_ENCODINGS.end(), encoding) != ASCII_ENCODINGS.end()) {
430         return ConvertAsciiToBytes(input);
431     }
432     if (std::find(UTF16_ENCODINGS.begin(), UTF16_ENCODINGS.end(), encoding) != UTF16_ENCODINGS.end()) {
433         return ConvertUtf16ToBytes(input);
434     }
435     if (std::find(BASE64_ENCODINGS.begin(), BASE64_ENCODINGS.end(), encoding) != BASE64_ENCODINGS.end()) {
436         return ConvertBase64ToBytes(input, encoding);
437     }
438     if (std::find(LATIN_ENCODINGS.begin(), LATIN_ENCODINGS.end(), encoding) != LATIN_ENCODINGS.end()) {
439         return ConvertLatinToBytes(input);
440     }
441     if (encoding == "hex") {
442         return ConvertHexToBytes(input);
443     }
444     return Err<PandaString>(PandaString("Unsupported encoding: ") + PandaString(encoding));
445 }
446 
CalculateStringBytesLength(std::string_view input,std::string_view encoding)447 Result<int32_t> CalculateStringBytesLength(std::string_view input, std::string_view encoding)
448 {
449     const int32_t kUtf16Multiplier = 2;
450     const int32_t kHexDivisor = 2;
451     if (std::find(K_SINGLE_BYTE_ENCODINGS.begin(), K_SINGLE_BYTE_ENCODINGS.end(), encoding) !=
452         K_SINGLE_BYTE_ENCODINGS.end()) {
453         return static_cast<int32_t>(input.size());
454     }
455     if (std::find(K_DOUBLE_BYTE_ENCODINGS.begin(), K_DOUBLE_BYTE_ENCODINGS.end(), encoding) !=
456         K_DOUBLE_BYTE_ENCODINGS.end()) {
457         return static_cast<int32_t>(input.size() * kUtf16Multiplier);
458     }
459     if (encoding == "base64" || encoding == "base64url") {
460         size_t len = input.size();
461         size_t pad = ((len != 0U) && input.back() == '=') ? 1 : 0;
462         size_t offsetTwo = 2;
463         if ((pad != 0U) && len > 1 && input[len - offsetTwo] == '=') {
464             ++pad;
465         }
466         size_t threeLength = len * 3;
467         size_t s = threeLength / base64::K_BASE64_BLOCK_SIZE;
468         if (s < pad) {
469             return Err<PandaString>(PandaString("Invalid base64 string: ") + PandaString(input));
470         }
471         size_t size = s - pad;
472         return static_cast<int32_t>(size);
473     }
474     if (encoding == "hex") {
475         if (input.size() % kHexDivisor != 0) {
476             return Err<PandaString>(PandaString("Hex string must have an even length"));
477         }
478         return static_cast<int32_t>(input.size() / kHexDivisor);
479     }
480     return Err<PandaString>(PandaString("Unsupported encoding: ") + PandaString(encoding));
481 }
482 
483 }  // namespace encoding
484 
485 }  // namespace ark::ets::intrinsics::helpers
486