1 /* 2 * Copyright (c) 2023 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #include "util_helper.h" 17 18 #include "native_engine.h" 19 #include "securec.h" 20 21 namespace Commonlibrary::Platform { CreateConverter(const std::string & encStr_,UErrorCode & codeflag)22 UConverter* CreateConverter(const std::string& encStr_, UErrorCode& codeflag) 23 { 24 UConverter *conv = ucnv_open(encStr_.c_str(), &codeflag); 25 if (U_FAILURE(codeflag)) { 26 HILOG_ERROR("Unable to create a UConverter object: %s\n", u_errorName(codeflag)); 27 return NULL; 28 } 29 ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag); 30 if (U_FAILURE(codeflag)) { 31 HILOG_ERROR("Unable to set the from Unicode callback function"); 32 ucnv_close(conv); 33 return NULL; 34 } 35 36 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag); 37 if (U_FAILURE(codeflag)) { 38 HILOG_ERROR("Unable to set the to Unicode callback function"); 39 ucnv_close(conv); 40 return NULL; 41 } 42 return conv; 43 } 44 ConvertToString(UChar * uchar,size_t length)45 std::string ConvertToString(UChar * uchar, size_t length) 46 { 47 std::u16string tempStr16(uchar); 48 std::string tepStr = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(tempStr16); 49 return tepStr; 50 } 51 EncodeIntoChinese(napi_env env,napi_value src,std::string encoding,std::string & buffer)52 void EncodeIntoChinese(napi_env env, napi_value src, std::string encoding, std::string& buffer) 53 { 54 NativeEngine *engine = reinterpret_cast<NativeEngine*>(env); 55 engine->EncodeToChinese(src, buffer, encoding); 56 } 57 UnicodeConversion(std::string encoding,char16_t * originalBuffer,size_t inputSize)58 std::string UnicodeConversion(std::string encoding, char16_t* originalBuffer, size_t inputSize) 59 { 60 std::string buffer = ""; 61 UErrorCode codeflag = U_ZERO_ERROR; 62 UConverter* converter = ucnv_open(encoding.c_str(), &codeflag); 63 if (U_FAILURE(codeflag)) { 64 HILOG_ERROR("TextEncoder:: ucnv_open failed !"); 65 return ""; 66 } 67 68 size_t maxByteSize = static_cast<size_t>(ucnv_getMaxCharSize(converter)); 69 const UChar *source = originalBuffer; 70 size_t limit = maxByteSize * inputSize; 71 size_t len = limit * sizeof(char); 72 char *targetArray = nullptr; 73 if (limit > 0) { 74 targetArray = new (std::nothrow) char[limit + 1]; 75 if (targetArray == nullptr) { 76 HILOG_ERROR("TextEncoder:: UnicodeConversion memory allocation failed, targetArray is nullptr"); 77 ucnv_close(converter); 78 return ""; 79 } 80 if (memset_s(targetArray, len + sizeof(char), 0, len + sizeof(char)) != EOK) { 81 HILOG_ERROR("TextEncoder:: encode targetArray memset_s failed"); 82 ucnv_close(converter); 83 FreedMemory(targetArray); 84 return ""; 85 } 86 } else { 87 HILOG_ERROR("TextEncoder:: limit is error"); 88 ucnv_close(converter); 89 return ""; 90 } 91 92 char *target = targetArray; 93 const char *targetLimit = targetArray + limit; 94 const UChar *sourceLimit = source + u_strlen(source); 95 if (sourceLimit == nullptr) { 96 HILOG_ERROR("TextEncoder:: sourceLimit is nullptr"); 97 return ""; 98 } 99 ucnv_fromUnicode(converter, &target, targetLimit, &source, sourceLimit, nullptr, true, &codeflag); 100 if (U_FAILURE(codeflag)) { 101 HILOG_ERROR("TextEncoder:: ucnv_fromUnicode conversion failed."); 102 ucnv_close(converter); 103 FreedMemory(targetArray); 104 return ""; 105 } 106 107 buffer = targetArray; 108 ucnv_close(converter); 109 FreedMemory(targetArray); 110 return buffer; 111 } 112 EncodeToUtf8(TextEcodeInfo encodeInfo,char * writeResult,uint32_t * written,size_t length,int32_t * nchars)113 void EncodeToUtf8(TextEcodeInfo encodeInfo, char* writeResult, uint32_t* written, size_t length, int32_t* nchars) 114 { 115 if (encodeInfo.encoding == "utf-16be" || encodeInfo.encoding == "utf-16le") { 116 EncodeTo16BE(encodeInfo, writeResult, written, length, nchars); 117 } else { 118 OtherEncodeUtf8(encodeInfo, writeResult, written, length, nchars); 119 } 120 } 121 EncodeConversion(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)122 void EncodeConversion(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding) 123 { 124 if (encoding == "utf-16le") { 125 size_t outLen = 0; 126 void *data = nullptr; 127 std::u16string u16Str = EncodeUtf16BE(env, src); 128 outLen = u16Str.length() * 2; // 2:multiple 129 outLens = outLen; 130 napi_create_arraybuffer(env, outLen, &data, arrayBuffer); 131 if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16Str.data()), outLen) != EOK) { 132 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error"); 133 return; 134 } 135 } else if (encoding == "utf-16be") { 136 size_t outLen = 0; 137 void *data = nullptr; 138 std::u16string u16BEStr = EncodeUtf16BE(env, src); 139 std::u16string u16LEStr = Utf16BEToLE(u16BEStr); 140 outLen = u16LEStr.length() * 2; // 2:multiple 141 outLens = outLen; 142 napi_create_arraybuffer(env, outLen, &data, arrayBuffer); 143 if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16LEStr.data()), outLen) != EOK) { 144 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error"); 145 return; 146 } 147 } else { 148 OtherEncode(env, src, arrayBuffer, outLens, encoding); 149 } 150 } 151 GetMaxByteSize(std::string encoding)152 int GetMaxByteSize(std::string encoding) 153 { 154 UErrorCode codeflag = U_ZERO_ERROR; 155 UConverter* converter = ucnv_open(encoding.c_str(), &codeflag); 156 if (U_FAILURE(codeflag)) { 157 HILOG_ERROR("TextEncoder:: ucnv_open failed !"); 158 return 0; 159 } 160 161 int maxByteSize = static_cast<int>(ucnv_getMaxCharSize(converter)); 162 ucnv_close(converter); 163 return maxByteSize; 164 } 165 FreedMemory(char * data)166 void FreedMemory(char *data) 167 { 168 if (data != nullptr) { 169 delete[] data; 170 data = nullptr; 171 } 172 } 173 IsOneByte(uint8_t u8Char)174 bool IsOneByte(uint8_t u8Char) 175 { 176 return (u8Char & 0x80) == 0; 177 } 178 Utf8ToUtf16BE(const std::string & u8Str,bool * ok)179 std::u16string Utf8ToUtf16BE(const std::string &u8Str, bool *ok) 180 { 181 std::u16string u16Str = u""; 182 u16Str.reserve(u8Str.size()); 183 std::string::size_type len = u8Str.length(); 184 const unsigned char *data = reinterpret_cast<const unsigned char *>(u8Str.data()); 185 bool isOk = true; 186 for (std::string::size_type i = 0; i < len; ++i) { 187 uint8_t c1 = data[i]; 188 if (IsOneByte(c1)) { 189 u16Str.push_back(static_cast<char16_t>(c1)); 190 continue; 191 } 192 switch (c1 & HIGER_4_BITS_MASK) { 193 case FOUR_BYTES_STYLE: { 194 uint8_t c2 = data[++i]; 195 uint8_t c3 = data[++i]; 196 uint8_t c4 = data[++i]; 197 uint32_t codePoint = ((c1 & LOWER_3_BITS_MASK) << (3 * UTF8_VALID_BITS)) | // 3:multiple 198 ((c2 & LOWER_6_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple 199 ((c3 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c4 & LOWER_6_BITS_MASK); 200 if (codePoint >= UTF16_SPECIAL_VALUE) { 201 codePoint -= UTF16_SPECIAL_VALUE; 202 u16Str.push_back(static_cast<char16_t>((codePoint >> 10) | HIGH_AGENT_MASK)); // 10:offset value 203 u16Str.push_back(static_cast<char16_t>((codePoint & LOWER_10_BITS_MASK) | LOW_AGENT_MASK)); 204 } else { 205 u16Str.push_back(static_cast<char16_t>(codePoint)); 206 } 207 break; 208 } 209 case THREE_BYTES_STYLE: { 210 uint8_t c2 = data[++i]; 211 uint8_t c3 = data[++i]; 212 uint32_t codePoint = ((c1 & LOWER_4_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple 213 ((c2 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c3 & LOWER_6_BITS_MASK); 214 u16Str.push_back(static_cast<char16_t>(codePoint)); 215 break; 216 } 217 case TWO_BYTES_STYLE1: 218 case TWO_BYTES_STYLE2: { 219 uint8_t c2 = data[++i]; 220 uint32_t codePoint = ((c1 & LOWER_5_BITS_MASK) << UTF8_VALID_BITS) | (c2 & LOWER_6_BITS_MASK); 221 u16Str.push_back(static_cast<char16_t>(codePoint)); 222 break; 223 } 224 default: { 225 isOk = false; 226 break; 227 } 228 } 229 } 230 if (ok != nullptr) { 231 *ok = isOk; 232 } 233 return u16Str; 234 } 235 Utf16BEToLE(const std::u16string & wstr)236 std::u16string Utf16BEToLE(const std::u16string &wstr) 237 { 238 std::u16string str16 = u""; 239 const char16_t *data = wstr.data(); 240 for (unsigned int i = 0; i < wstr.length(); i++) { 241 char16_t wc = data[i]; 242 char16_t high = (wc >> 8) & 0x00FF; // 8:offset value 243 char16_t low = wc & 0x00FF; 244 char16_t c16 = (low << 8) | high; // 8:offset value 245 str16.push_back(c16); 246 } 247 return str16; 248 } 249 OtherEncode(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)250 void OtherEncode(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding) 251 { 252 size_t outLen = 0; 253 void *data = nullptr; 254 size_t inputSize = 0; 255 napi_get_value_string_utf16(env, src, nullptr, 0, &inputSize); 256 char16_t *originalBuffer = ApplyMemory(inputSize); 257 if (originalBuffer == nullptr) { 258 HILOG_ERROR("TextEncoder:: originalBuffer is nullptr"); 259 return; 260 } 261 napi_get_value_string_utf16(env, src, originalBuffer, inputSize + 1, &inputSize); 262 outLen = static_cast<size_t>(GetMaxByteSize(encoding)) * inputSize; 263 napi_create_arraybuffer(env, outLen, &data, arrayBuffer); 264 char *writeResult = static_cast<char*>(data); 265 if (writeResult == nullptr) { 266 FreedMemory(originalBuffer); 267 HILOG_ERROR("TextEncoder:: writeResult is nullptr"); 268 return; 269 } 270 std::string buffer = ""; 271 std::u16string originalStr(originalBuffer, inputSize); 272 size_t shifting = 0; 273 size_t resultShifting = 0; 274 size_t findIndex = originalStr.find('\0'); 275 if (findIndex == std::string::npos) { 276 buffer = UnicodeConversion(encoding, originalBuffer, inputSize); 277 outLens = buffer.length(); 278 if (memcpy_s(writeResult, outLens, reinterpret_cast<char*>(buffer.data()), outLens) != EOK) { 279 FreedMemory(originalBuffer); 280 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error"); 281 return; 282 } 283 } else { 284 while (findIndex != std::string::npos) { 285 buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize); 286 if (memcpy_s(writeResult + resultShifting, buffer.length(), 287 reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) { 288 FreedMemory(originalBuffer); 289 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error"); 290 return; 291 } 292 resultShifting += buffer.length(); 293 *(writeResult + resultShifting) = '\0'; 294 resultShifting += 1; 295 outLens += buffer.length() + 1; 296 shifting += findIndex + 1; 297 originalStr = originalStr.substr(findIndex + 1, inputSize); 298 findIndex = originalStr.find('\0'); 299 } 300 buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize); 301 outLens += buffer.length(); 302 if (memcpy_s(writeResult + resultShifting, buffer.length(), 303 reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) { 304 FreedMemory(originalBuffer); 305 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error"); 306 return; 307 } 308 } 309 FreedMemory(originalBuffer); 310 } 311 EncodeUtf16BE(napi_env env,napi_value src)312 std::u16string EncodeUtf16BE(napi_env env, napi_value src) 313 { 314 std::string buffer = ""; 315 size_t bufferSize = 0; 316 317 if (napi_get_value_string_utf8(env, src, nullptr, 0, &bufferSize) != napi_ok) { 318 HILOG_ERROR("TextEncoder:: can not get src size"); 319 return u""; 320 } 321 buffer.reserve(bufferSize + 1); 322 buffer.resize(bufferSize); 323 if (napi_get_value_string_utf8(env, src, buffer.data(), bufferSize + 1, &bufferSize) != napi_ok) { 324 HILOG_ERROR("TextEncoder:: can not get src value"); 325 return u""; 326 } 327 std::u16string u16Str = Utf8ToUtf16BE(buffer); 328 return u16Str; 329 } 330 IsValidLowSurrogate(char16_t high)331 bool IsValidLowSurrogate(char16_t high) 332 { 333 // 0xD800: minimum value of low proxy term. 0xDBFF: Maximum value of low proxy term. 334 return (high >= 0xD800 && high <= 0xDBFF); 335 } 336 IsValidHighSurrogate(char16_t low)337 bool IsValidHighSurrogate(char16_t low) 338 { 339 // 0xDC00: minimum value of high proxy item. 0xDFFF: maximum value of high proxy item. 340 return (low >= 0xDC00 && low <= 0xDFFF); 341 } 342 OtherEncodeUtf8Inner(char16_t * originalBuffer,InputBufferInfo inputInfo,size_t & index,OutBufferInfo & outInfo)343 uint32_t OtherEncodeUtf8Inner(char16_t *originalBuffer, InputBufferInfo inputInfo, size_t &index, 344 OutBufferInfo &outInfo) 345 { 346 if (IsValidLowSurrogate(originalBuffer[index]) && inputInfo.encoding == "utf-8") { 347 size_t tempIndex = index + 1; 348 if (tempIndex < inputInfo.inputSize && IsValidHighSurrogate(originalBuffer[index + 1])) { 349 // 2: move the pointer forward to the position of two elements. 350 std::u16string utf16String(&originalBuffer[index], &originalBuffer[index] + 2); 351 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter; 352 outInfo.rstBuffer = converter.to_bytes(utf16String); 353 outInfo.rstBufferLength = outInfo.rstBuffer.length(); 354 if (outInfo.rstBufferLength > outInfo.writedSize) { 355 return STATE_BREAK_ZERO; 356 } 357 index++; 358 outInfo.cntSize += 2; // 2: two bytes 359 outInfo.bufferResult += outInfo.rstBuffer; 360 outInfo.writedSize -= outInfo.rstBufferLength; 361 return STATE_CONTINUE_ONE; 362 } 363 } 364 return STATE_OTHER_TWO; 365 } 366 OtherEncodeUtf8(TextEcodeInfo encodeInfo,char * writeResult,uint32_t * written,size_t length,int32_t * nchar)367 void OtherEncodeUtf8(TextEcodeInfo encodeInfo, char* writeResult, uint32_t* written, size_t length, int32_t* nchar) 368 { 369 size_t inputSize = 0; 370 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize); 371 char16_t *originalBuffer = ApplyMemory(inputSize); 372 if (originalBuffer == nullptr) { 373 HILOG_ERROR("TextEncoder:: originalBuffer is nullptr"); 374 return; 375 } 376 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize); 377 std::vector<char16_t> targetBuffer(inputSize + 1, u'\0'); 378 InputBufferInfo inputInfo(encodeInfo.encoding, inputSize); 379 OutBufferInfo outInfo(length, "", 0, 0, ""); 380 for (size_t i = 0; i < inputSize; i++) { 381 targetBuffer[i] = originalBuffer[i]; 382 uint32_t rstState = OtherEncodeUtf8Inner(originalBuffer, inputInfo, i, outInfo); 383 if (rstState == STATE_BREAK_ZERO) { 384 break; 385 } else if (rstState == STATE_CONTINUE_ONE) { 386 continue; 387 } 388 outInfo.rstBuffer = UnicodeConversion(encodeInfo.encoding, &targetBuffer[i], inputSize); 389 outInfo.rstBufferLength = outInfo.rstBuffer.length(); 390 if (outInfo.rstBufferLength > outInfo.writedSize) { 391 break; 392 } 393 outInfo.cntSize++; 394 outInfo.bufferResult += outInfo.rstBuffer; 395 outInfo.writedSize -= outInfo.rstBufferLength; 396 } 397 size_t writeLength = outInfo.bufferResult.length(); 398 for (size_t j = 0; j < writeLength; j++) { 399 *writeResult = outInfo.bufferResult[j]; 400 writeResult++; 401 } 402 *nchar = static_cast<int32_t>(outInfo.cntSize); 403 *written = static_cast<uint32_t>(writeLength); 404 FreedMemory(originalBuffer); 405 } 406 EncodeTo16BE(TextEcodeInfo encodeInfo,char * writeResult,uint32_t * written,size_t length,int32_t * nchars)407 void EncodeTo16BE(TextEcodeInfo encodeInfo, char* writeResult, uint32_t* written, size_t length, int32_t* nchars) 408 { 409 size_t inputSize = 0; 410 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize); 411 char16_t *originalBuffer = ApplyMemory(inputSize); 412 if (originalBuffer == nullptr) { 413 HILOG_ERROR("TextEncoder:: originalBuffer is nullptr"); 414 return; 415 } 416 napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize); 417 418 size_t writableSize = length; 419 std::u16string bufferResult = u""; 420 size_t i = 0; 421 for (; i < inputSize; i++) { 422 std::string strBuff = ""; 423 std::u16string buffer = u""; 424 strBuff = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(originalBuffer[i]); 425 if (encodeInfo.encoding == "utf-16le") { 426 buffer = Utf8ToUtf16BE(strBuff); 427 } else { 428 std::u16string u16Str = Utf8ToUtf16BE(strBuff); 429 buffer = Utf16BEToLE(u16Str); 430 } 431 size_t bufferLength = buffer.length() * 2; // 2:multiple 432 if (bufferLength > writableSize) { 433 break; 434 } 435 bufferResult += buffer; 436 writableSize -= bufferLength; 437 } 438 439 size_t writeLength = bufferResult.length() * 2; // 2:multiple 440 if (memcpy_s(writeResult, writeLength, reinterpret_cast<char*>(bufferResult.data()), writeLength) != EOK) { 441 FreedMemory(originalBuffer); 442 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error"); 443 return; 444 } 445 *nchars = static_cast<int32_t>(i); 446 *written = static_cast<uint32_t>(writeLength); 447 FreedMemory(originalBuffer); 448 } 449 ApplyMemory(const size_t & inputSize)450 char16_t *ApplyMemory(const size_t &inputSize) 451 { 452 char16_t *originalBuffer = nullptr; 453 if (inputSize > 0) { 454 originalBuffer = new (std::nothrow) char16_t[inputSize + 1]; 455 if (originalBuffer == nullptr) { 456 HILOG_ERROR("TextEncoder:: originalBuffer memory allocation failed, originalBuffer is nullptr"); 457 return nullptr; 458 } 459 if (memset_s(originalBuffer, inputSize + 1, u'\0', inputSize + 1) != EOK) { 460 HILOG_ERROR("encode originalBuffer memset_s failed"); 461 FreedMemory(originalBuffer); 462 return nullptr; 463 } 464 } else { 465 HILOG_ERROR("inputSize is error"); 466 return nullptr; 467 } 468 return originalBuffer; 469 } 470 FreedMemory(char16_t * & data)471 void FreedMemory(char16_t *&data) 472 { 473 if (data != nullptr) { 474 delete[] data; 475 data = nullptr; 476 } 477 } 478 } // namespace Commonlibrary::Platform