1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "array_buffer_helper.h"
17
18 #include <array>
19 #include <iomanip>
20 #include <algorithm>
21 #include <cstdlib>
22 #include <cctype>
23
24 #include "plugins/ets/runtime/types/ets_arraybuffer.h"
25
26 namespace ark::ets::intrinsics::helpers {
27
28 namespace base64 {
29
30 constexpr std::string_view K_BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
31
32 // Lookup table size for all possible byte values.
33 constexpr size_t K_LOOKUP_TABLE_SIZE = 256;
34
35 // Base64 block and binary block size constants.
36 constexpr size_t K_BASE64_BLOCK_SIZE = 4; // Size of a Base64 encoded block.
37 constexpr size_t K_MAX_PADDING_CHARS = 2; // Maximum number of padding characters allowed.
38 constexpr size_t K_BINARY_BLOCK_SIZE = 3; // Size of binary data block that becomes one Base64 block.
39
40 // Bit manipulation constants.
41 constexpr size_t K_BITS_PER_BYTE = 8;
42 constexpr uint32_t K_BASE64_MASK = 0x3F; // Mask for 6 bits (Base64 character).
43
44 // Bit shift constants for Base64 encoding/decoding.
45 constexpr size_t K_FIRST_CHAR_SHIFT = 2;
46 constexpr size_t K_SECOND_CHAR_SHIFT = 4;
47 constexpr size_t K_THIRD_CHAR_SHIFT = 6;
48 constexpr size_t K_HIGH_BYTE_SHIFT = 16;
49 constexpr size_t K_MID_BYTE_SHIFT = 12;
50 constexpr size_t K_LOW_BYTE_SHIFT = 6;
51 constexpr size_t K_LAST_CHAR_SHIFT = 18;
52
53 constexpr unsigned int K_TWO_BIT_MASK = 0x3;
54
55 constexpr char K_PADDING_CHAR = '=';
56
BuildDecodingTable()57 constexpr std::array<int, K_LOOKUP_TABLE_SIZE> BuildDecodingTable() noexcept
58 {
59 std::array<int, K_LOOKUP_TABLE_SIZE> table {};
60 for (auto &entry : table) {
61 entry = -1; // mark as invalid
62 }
63 for (size_t i = 0; i < K_BASE64_CHARS.size(); ++i) {
64 table[static_cast<unsigned char>(K_BASE64_CHARS[i])] = static_cast<int>(i);
65 }
66 return table;
67 }
68
69 constexpr auto K_DECODING_TABLE = BuildDecodingTable();
70
IsBase64Character(unsigned char c)71 [[nodiscard]] constexpr bool IsBase64Character(unsigned char c) noexcept
72 {
73 return K_DECODING_TABLE[c] != -1;
74 }
75
ValidateBase64Input(std::string_view input)76 [[nodiscard]] bool ValidateBase64Input(std::string_view input) noexcept
77 {
78 if (input.empty()) {
79 return true;
80 }
81 if (input.size() % K_BASE64_BLOCK_SIZE != 0) {
82 return false;
83 }
84
85 const auto paddingStart = std::find(input.begin(), input.end(), K_PADDING_CHAR);
86 const bool hasPadding = paddingStart != input.end();
87 // Count padding characters at the end
88 const auto paddingCount = static_cast<size_t>(std::count(paddingStart, input.end(), K_PADDING_CHAR));
89 const bool validChars =
90 std::all_of(input.begin(), paddingStart, [](unsigned char c) { return IsBase64Character(c); });
91 const bool validPadding =
92 !hasPadding || (paddingCount <= K_MAX_PADDING_CHARS &&
93 std::all_of(paddingStart, input.end(), [](char c) { return c == K_PADDING_CHAR; }));
94 return validChars && validPadding;
95 }
96
Decode(std::string_view encodedData)97 [[nodiscard]] PandaString Decode(std::string_view encodedData)
98 {
99 if (encodedData.empty()) {
100 return {};
101 }
102 const size_t kPaddingOffset = 2;
103 size_t pad = 0;
104 if (!encodedData.empty() && encodedData.back() == K_PADDING_CHAR) {
105 ++pad;
106 }
107 if (encodedData.size() >= kPaddingOffset && encodedData[encodedData.size() - kPaddingOffset] == K_PADDING_CHAR) {
108 ++pad;
109 }
110
111 PandaString decoded;
112 decoded.reserve((encodedData.size() / K_BASE64_BLOCK_SIZE) * K_BINARY_BLOCK_SIZE - pad);
113
114 const int kSecondCharOffset = 2;
115 const int kThirdCharOffset = 3;
116
117 for (size_t i = 0; i < encodedData.size(); i += K_BASE64_BLOCK_SIZE) {
118 int decodedChar0 = K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i])];
119 int decodedChar1 = K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i + 1])];
120 int decodedChar2 = (encodedData[i + kSecondCharOffset] == K_PADDING_CHAR)
121 ? 0
122 : K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i + kSecondCharOffset])];
123 int decodedChar3 = (encodedData[i + kThirdCharOffset] == K_PADDING_CHAR)
124 ? 0
125 : K_DECODING_TABLE[static_cast<unsigned char>(encodedData[i + kThirdCharOffset])];
126
127 auto unsignedChar0 = static_cast<unsigned int>(decodedChar0);
128 auto unsignedChar1 = static_cast<unsigned int>(decodedChar1);
129 auto unsignedChar2 = static_cast<unsigned int>(decodedChar2);
130 auto unsignedChar3 = static_cast<unsigned int>(decodedChar3);
131
132 decoded.push_back(
133 static_cast<char>((unsignedChar0 << K_FIRST_CHAR_SHIFT) | (unsignedChar1 >> K_SECOND_CHAR_SHIFT)));
134 if (encodedData[i + kSecondCharOffset] != K_PADDING_CHAR) {
135 decoded.push_back(static_cast<char>(((unsignedChar1 & K_BASE64_MASK) << K_SECOND_CHAR_SHIFT) |
136 (unsignedChar2 >> K_FIRST_CHAR_SHIFT)));
137 }
138 if (encodedData[i + kThirdCharOffset] != K_PADDING_CHAR) {
139 decoded.push_back(
140 static_cast<char>(((unsignedChar2 & K_TWO_BIT_MASK) << K_THIRD_CHAR_SHIFT) | unsignedChar3));
141 }
142 }
143 return decoded;
144 }
145
Encode(const PandaVector<uint8_t> & binaryData)146 [[nodiscard]] PandaString Encode(const PandaVector<uint8_t> &binaryData)
147 {
148 if (binaryData.empty()) {
149 return {};
150 }
151 PandaString encoded;
152 const size_t kReserveMultiplier = 2;
153
154 encoded.reserve(((binaryData.size() + kReserveMultiplier) / K_BINARY_BLOCK_SIZE) * K_BASE64_BLOCK_SIZE);
155
156 const size_t kOneByte = 1;
157 const size_t kTwoBytes = 2;
158
159 size_t pos = 0;
160 while (pos < binaryData.size()) {
161 size_t remain = binaryData.size() - pos;
162 uint32_t triple = (static_cast<uint32_t>(binaryData[pos]) << K_HIGH_BYTE_SHIFT);
163 if (remain > kOneByte) {
164 triple |= (static_cast<uint32_t>(binaryData[pos + kOneByte]) << K_BITS_PER_BYTE);
165 }
166 if (remain > kTwoBytes) {
167 triple |= static_cast<uint32_t>(binaryData[pos + kTwoBytes]);
168 }
169
170 encoded.push_back(K_BASE64_CHARS[(triple >> K_LAST_CHAR_SHIFT) & K_BASE64_MASK]);
171 encoded.push_back(K_BASE64_CHARS[(triple >> K_MID_BYTE_SHIFT) & K_BASE64_MASK]);
172 encoded.push_back(remain > kOneByte ? K_BASE64_CHARS[(triple >> K_LOW_BYTE_SHIFT) & K_BASE64_MASK]
173 : K_PADDING_CHAR);
174 encoded.push_back(remain > kTwoBytes ? K_BASE64_CHARS[triple & K_BASE64_MASK] : K_PADDING_CHAR);
175
176 pos += K_BINARY_BLOCK_SIZE;
177 }
178 return encoded;
179 }
180
181 } // namespace base64
182
183 namespace encoding {
184
185 using namespace std::literals::string_view_literals;
186 // UTF-16 related constants.
187 constexpr size_t K_UTF16_BYTES_PER_CHAR = 2; // Number of bytes per UTF-16 character.
188 constexpr size_t K_HIGH_BYTE_SHIFT = 8; // Shift for high byte in UTF-16.
189
190 constexpr size_t HEX_BASE = 16;
191 constexpr size_t K_HEX_PAIR_SIZE = 2;
192
193 // Named constants for bit masks
194 constexpr uint8_t K_ASCII_MASK = 0x7F;
195 constexpr uint8_t K_BYTE_MASK = 0xFF;
196
197 constexpr std::array K_SINGLE_BYTE_ENCODINGS = {"utf8"sv, "utf-8"sv, "ascii"sv, "latin1"sv, "binary"sv};
198 constexpr std::array K_DOUBLE_BYTE_ENCODINGS = {"utf16le"sv, "ucs2"sv, "ucs-2"sv};
199
200 constexpr std::array UTF8_ENCODINGS = {"utf8"sv, "utf-8"sv}; // UTF-8 variants
201 constexpr std::array ASCII_ENCODINGS = {"ascii"sv}; // ASCII (7-bit)
202 constexpr std::array UTF16_ENCODINGS = {"utf16le"sv, "ucs2"sv, "ucs-2"sv}; // UTF-16 little-endian variants
203 constexpr std::array BASE64_ENCODINGS = {"base64"sv, "base64url"sv}; // Base64 variants
204 constexpr std::array LATIN_ENCODINGS = {"latin1"sv, "binary"sv}; // Latin1/binary encodings
205
ValidateBuffer(const EtsEscompatArrayBuffer * buffer)206 [[nodiscard]] Result<bool> ValidateBuffer(const EtsEscompatArrayBuffer *buffer) noexcept
207 {
208 if (buffer == nullptr) {
209 return Err<PandaString>(PandaString("Buffer is null"));
210 }
211 if (buffer->WasDetached()) {
212 return Err<PandaString>(PandaString("Buffer was detached"));
213 }
214 return true;
215 }
ValidateIndices(int byteLength,int start,int end)216 [[nodiscard]] Result<bool> ValidateIndices(int byteLength, int start, int end)
217 {
218 if (start < 0 || start > byteLength) {
219 return Err<PandaString>(PandaString("Start index is out of bounds"));
220 }
221 if (end < 0 || end > byteLength || end < start) {
222 return Err<PandaString>(PandaString("End index is out of bounds"));
223 }
224 return true;
225 }
GetEncoding(const PandaString * encodingObj)226 [[nodiscard]] PandaString GetEncoding(const PandaString *encodingObj) noexcept
227 {
228 return encodingObj != nullptr ? *encodingObj : PandaString("utf8");
229 }
BytesFromString(std::string_view input)230 PandaVector<uint8_t> BytesFromString(std::string_view input)
231 {
232 PandaVector<uint8_t> bytes;
233 bytes.assign(input.begin(), input.end());
234 return bytes;
235 }
236
StringFromBytes(const PandaVector<uint8_t> & bytes)237 PandaString StringFromBytes(const PandaVector<uint8_t> &bytes)
238 {
239 return PandaString(bytes.begin(), bytes.end());
240 }
241
MaskBytes(std::string_view input,uint8_t mask)242 PandaVector<uint8_t> MaskBytes(std::string_view input, uint8_t mask)
243 {
244 PandaVector<uint8_t> bytes;
245 bytes.resize(input.size());
246 for (size_t i = 0; i < input.size(); ++i) {
247 bytes[i] = static_cast<uint8_t>(static_cast<unsigned char>(input[i]) & mask);
248 }
249 return bytes;
250 }
251
ConvertUtf8Encoding(const PandaVector<uint8_t> & bytes)252 PandaString ConvertUtf8Encoding(const PandaVector<uint8_t> &bytes)
253 {
254 return StringFromBytes(bytes);
255 }
256
ConvertUtf16Encoding(const PandaVector<uint8_t> & bytes)257 PandaString ConvertUtf16Encoding(const PandaVector<uint8_t> &bytes)
258 {
259 if (bytes.size() % K_UTF16_BYTES_PER_CHAR != 0) {
260 return PandaString("Invalid UTF-16 byte sequence");
261 }
262 PandaString output;
263 for (size_t i = 0; i < bytes.size(); i += K_UTF16_BYTES_PER_CHAR) {
264 uint16_t ch = static_cast<uint16_t>(bytes[i]) |
265 static_cast<uint16_t>(static_cast<uint16_t>(bytes[i + 1]) << K_HIGH_BYTE_SHIFT);
266 output.push_back(static_cast<char>(ch));
267 }
268 return output;
269 }
270
ConvertBase64Encoding(const PandaVector<uint8_t> & bytes,std::string_view encoding)271 PandaString ConvertBase64Encoding(const PandaVector<uint8_t> &bytes, std::string_view encoding)
272 {
273 PandaString output = base64::Encode(bytes);
274 if (encoding == "base64url") {
275 std::replace(output.begin(), output.end(), '+', '-');
276 std::replace(output.begin(), output.end(), '/', '_');
277 size_t lastValidCharPos = output.find_last_not_of('=');
278 if (lastValidCharPos != PandaString::npos) {
279 output.erase(lastValidCharPos + 1);
280 }
281 }
282 return output;
283 }
284
ConvertUtf8ToBytes(std::string_view input)285 PandaVector<uint8_t> ConvertUtf8ToBytes(std::string_view input)
286 {
287 return BytesFromString(input);
288 }
289
ConvertAsciiToBytes(std::string_view input)290 PandaVector<uint8_t> ConvertAsciiToBytes(std::string_view input)
291 {
292 return MaskBytes(input, K_ASCII_MASK);
293 }
294
ConvertLatinToBytes(std::string_view input)295 PandaVector<uint8_t> ConvertLatinToBytes(std::string_view input)
296 {
297 return MaskBytes(input, K_BYTE_MASK);
298 }
299
ConvertHexEncoding(const PandaVector<uint8_t> & bytes)300 PandaString ConvertHexEncoding(const PandaVector<uint8_t> &bytes)
301 {
302 PandaOStringStream oss;
303 const int width = 2;
304 for (uint8_t byte : bytes) {
305 oss << std::hex << std::setw(width) << std::setfill('0') << static_cast<int>(byte);
306 }
307 return oss.str();
308 }
309
ConvertLatinEncoding(const PandaVector<uint8_t> & bytes)310 PandaString ConvertLatinEncoding(const PandaVector<uint8_t> &bytes)
311 {
312 PandaString output;
313 output.reserve(bytes.size());
314 for (uint8_t byte : bytes) {
315 output.push_back(static_cast<char>(byte));
316 }
317 return output;
318 }
319
ConvertUtf16ToBytes(std::string_view input)320 PandaVector<uint8_t> ConvertUtf16ToBytes(std::string_view input)
321 {
322 const size_t kUtf16Multiplier = 2;
323 const size_t kShiftBy8 = 8U;
324 PandaVector<uint8_t> bytes;
325 bytes.resize(input.size() * kUtf16Multiplier);
326
327 for (size_t i = 0; i < input.size(); ++i) {
328 auto ch = static_cast<uint16_t>(static_cast<unsigned char>(input[i]));
329 auto position = i * kUtf16Multiplier;
330 bytes[position] = static_cast<uint8_t>(ch & K_BYTE_MASK);
331 bytes[position + 1] = static_cast<uint8_t>(static_cast<unsigned int>(ch) >> kShiftBy8);
332 }
333 return bytes;
334 }
335
ConvertBase64ToBytes(const PandaString & input,std::string_view encoding)336 Result<PandaVector<uint8_t>> ConvertBase64ToBytes(const PandaString &input, std::string_view encoding)
337 {
338 PandaString decoded;
339 if (encoding == "base64url") {
340 PandaString temp = input;
341 std::replace(temp.begin(), temp.end(), '-', '+');
342 std::replace(temp.begin(), temp.end(), '_', '/');
343 decoded = base64::Decode(temp);
344 } else {
345 decoded = base64::Decode(input);
346 }
347
348 return BytesFromString(decoded);
349 }
350
IsSymbolHex(char c)351 bool IsSymbolHex(char c)
352 {
353 return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') || ((c >= '0') && (c <= '9'));
354 }
355
FoundInputHex(const PandaString & input,bool & error)356 PandaString FoundInputHex(const PandaString &input, bool &error)
357 {
358 if (input.size() == 1) {
359 error = true;
360 return "";
361 }
362
363 PandaString inputHex;
364 bool isFirstPair = true;
365 size_t idx = 1;
366 while (idx < input.size()) {
367 if (isFirstPair) {
368 if (IsSymbolHex(input[idx - 1]) && IsSymbolHex(input[idx])) {
369 inputHex += input[idx - 1];
370 inputHex += input[idx];
371 } else {
372 error = true;
373 return "";
374 }
375 isFirstPair = false;
376 } else {
377 if (IsSymbolHex(input[idx - 1]) && IsSymbolHex(input[idx])) {
378 inputHex += input[idx - 1];
379 inputHex += input[idx];
380 } else {
381 break;
382 }
383 }
384 idx += K_HEX_PAIR_SIZE;
385 }
386 error = false;
387 return inputHex;
388 }
389
ConvertHexToBytes(const PandaString & input)390 Result<PandaVector<uint8_t>> ConvertHexToBytes(const PandaString &input)
391 {
392 // Found hex codes in input. Other symbols should be ignored
393 // input should start with hex code(-s)
394 bool error = false;
395 PandaString inputHex = FoundInputHex(input, error);
396 if (error) {
397 return Err<PandaString>(PandaString("The argument 'value' is invalid. Received is '") + input +
398 PandaString("'"));
399 }
400
401 // Check size
402 if (inputHex.empty()) {
403 inputHex += "0";
404 inputHex += "0";
405 } else if (inputHex.size() % K_HEX_PAIR_SIZE != 0) {
406 return Err<PandaString>(PandaString("Hex string must have an even length"));
407 }
408 PandaVector<uint8_t> bytes;
409 size_t bytesLength = inputHex.size() / K_HEX_PAIR_SIZE;
410 bytes.reserve(bytesLength);
411 const size_t kHexStringLength = 3;
412 for (size_t i = 0; i < inputHex.size(); i += K_HEX_PAIR_SIZE) {
413 std::array<char, kHexStringLength> hex = {inputHex[i], inputHex[i + 1], '\0'};
414 char *endptr = nullptr;
415 uint64_t value = std::strtoul(hex.data(), &endptr, HEX_BASE);
416 if (*endptr != '\0') {
417 return Err<PandaString>(PandaString("Invalid hex string"));
418 }
419 bytes.push_back(static_cast<uint8_t>(value));
420 }
421 return bytes;
422 }
423
ConvertStringToBytes(const PandaString & input,std::string_view encoding)424 Result<PandaVector<uint8_t>> ConvertStringToBytes(const PandaString &input, std::string_view encoding)
425 {
426 if (std::find(UTF8_ENCODINGS.begin(), UTF8_ENCODINGS.end(), encoding) != UTF8_ENCODINGS.end()) {
427 return ConvertUtf8ToBytes(input);
428 }
429 if (std::find(ASCII_ENCODINGS.begin(), ASCII_ENCODINGS.end(), encoding) != ASCII_ENCODINGS.end()) {
430 return ConvertAsciiToBytes(input);
431 }
432 if (std::find(UTF16_ENCODINGS.begin(), UTF16_ENCODINGS.end(), encoding) != UTF16_ENCODINGS.end()) {
433 return ConvertUtf16ToBytes(input);
434 }
435 if (std::find(BASE64_ENCODINGS.begin(), BASE64_ENCODINGS.end(), encoding) != BASE64_ENCODINGS.end()) {
436 return ConvertBase64ToBytes(input, encoding);
437 }
438 if (std::find(LATIN_ENCODINGS.begin(), LATIN_ENCODINGS.end(), encoding) != LATIN_ENCODINGS.end()) {
439 return ConvertLatinToBytes(input);
440 }
441 if (encoding == "hex") {
442 return ConvertHexToBytes(input);
443 }
444 return Err<PandaString>(PandaString("Unsupported encoding: ") + PandaString(encoding));
445 }
446
CalculateStringBytesLength(std::string_view input,std::string_view encoding)447 Result<int32_t> CalculateStringBytesLength(std::string_view input, std::string_view encoding)
448 {
449 const int32_t kUtf16Multiplier = 2;
450 const int32_t kHexDivisor = 2;
451 if (std::find(K_SINGLE_BYTE_ENCODINGS.begin(), K_SINGLE_BYTE_ENCODINGS.end(), encoding) !=
452 K_SINGLE_BYTE_ENCODINGS.end()) {
453 return static_cast<int32_t>(input.size());
454 }
455 if (std::find(K_DOUBLE_BYTE_ENCODINGS.begin(), K_DOUBLE_BYTE_ENCODINGS.end(), encoding) !=
456 K_DOUBLE_BYTE_ENCODINGS.end()) {
457 return static_cast<int32_t>(input.size() * kUtf16Multiplier);
458 }
459 if (encoding == "base64" || encoding == "base64url") {
460 size_t len = input.size();
461 size_t pad = ((len != 0U) && input.back() == '=') ? 1 : 0;
462 size_t offsetTwo = 2;
463 if ((pad != 0U) && len > 1 && input[len - offsetTwo] == '=') {
464 ++pad;
465 }
466 size_t threeLength = len * 3;
467 size_t s = threeLength / base64::K_BASE64_BLOCK_SIZE;
468 if (s < pad) {
469 return Err<PandaString>(PandaString("Invalid base64 string: ") + PandaString(input));
470 }
471 size_t size = s - pad;
472 return static_cast<int32_t>(size);
473 }
474 if (encoding == "hex") {
475 if (input.size() % kHexDivisor != 0) {
476 return Err<PandaString>(PandaString("Hex string must have an even length"));
477 }
478 return static_cast<int32_t>(input.size() / kHexDivisor);
479 }
480 return Err<PandaString>(PandaString("Unsupported encoding: ") + PandaString(encoding));
481 }
482
483 } // namespace encoding
484
485 } // namespace ark::ets::intrinsics::helpers
486