• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "util_helper.h"
17 
18 #include "native_engine.h"
19 #include "securec.h"
20 
21 namespace Commonlibrary::Platform {
CreateConverter(const std::string & encStr_,UErrorCode & codeflag)22     UConverter* CreateConverter(const std::string& encStr_, UErrorCode& codeflag)
23     {
24         UConverter *conv = ucnv_open(encStr_.c_str(), &codeflag);
25         if (U_FAILURE(codeflag)) {
26             HILOG_ERROR("Unable to create a UConverter object: %s\n", u_errorName(codeflag));
27             return NULL;
28         }
29         ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag);
30         if (U_FAILURE(codeflag)) {
31             HILOG_ERROR("Unable to set the from Unicode callback function");
32             ucnv_close(conv);
33             return NULL;
34         }
35 
36         ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &codeflag);
37         if (U_FAILURE(codeflag)) {
38             HILOG_ERROR("Unable to set the to Unicode callback function");
39             ucnv_close(conv);
40             return NULL;
41         }
42         return conv;
43     }
44 
ConvertToString(UChar * uchar,size_t length)45     std::string ConvertToString(UChar * uchar, size_t length)
46     {
47         std::u16string tempStr16(uchar);
48         std::string tepStr = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(tempStr16);
49         return tepStr;
50     }
51 
EncodeIntoChinese(napi_env env,napi_value src,std::string encoding,std::string & buffer)52     void EncodeIntoChinese(napi_env env, napi_value src, std::string encoding, std::string& buffer)
53     {
54         NativeEngine *engine = reinterpret_cast<NativeEngine*>(env);
55         engine->EncodeToChinese(src, buffer, encoding);
56     }
57 
UnicodeConversion(std::string encoding,char16_t * originalBuffer,size_t inputSize)58     std::string UnicodeConversion(std::string encoding, char16_t* originalBuffer, size_t inputSize)
59     {
60         std::string buffer = "";
61         UErrorCode codeflag = U_ZERO_ERROR;
62         UConverter* converter = ucnv_open(encoding.c_str(), &codeflag);
63         if (U_FAILURE(codeflag)) {
64             HILOG_ERROR("TextEncoder:: ucnv_open failed !");
65             return "";
66         }
67 
68         size_t maxByteSize = static_cast<size_t>(ucnv_getMaxCharSize(converter));
69         const UChar *source = originalBuffer;
70         size_t limit = maxByteSize * inputSize;
71         size_t len = limit * sizeof(char);
72         char *targetArray = nullptr;
73         if (limit > 0) {
74             targetArray = new (std::nothrow) char[limit + 1];
75             if (targetArray == nullptr) {
76                 HILOG_ERROR("TextEncoder:: UnicodeConversion memory allocation failed, targetArray is nullptr");
77                 ucnv_close(converter);
78                 return "";
79             }
80             if (memset_s(targetArray, len + sizeof(char), 0, len + sizeof(char)) != EOK) {
81                 HILOG_ERROR("TextEncoder:: encode targetArray memset_s failed");
82                 ucnv_close(converter);
83                 FreedMemory(targetArray);
84                 return "";
85             }
86         } else {
87             HILOG_ERROR("TextEncoder:: limit is error");
88             ucnv_close(converter);
89             return "";
90         }
91 
92         char *target = targetArray;
93         const char *targetLimit = targetArray + limit;
94         const UChar *sourceLimit = source + u_strlen(source);
95         if (sourceLimit == nullptr) {
96             HILOG_ERROR("TextEncoder:: sourceLimit is nullptr");
97             return "";
98         }
99         ucnv_fromUnicode(converter, &target, targetLimit, &source, sourceLimit, nullptr, true, &codeflag);
100         if (U_FAILURE(codeflag)) {
101             HILOG_ERROR("TextEncoder:: ucnv_fromUnicode conversion failed.");
102             ucnv_close(converter);
103             FreedMemory(targetArray);
104             return "";
105         }
106 
107         buffer = targetArray;
108         ucnv_close(converter);
109         FreedMemory(targetArray);
110         return buffer;
111     }
112 
EncodeToUtf8(TextEcodeInfo encodeInfo,char * writeResult,uint32_t * written,size_t length,int32_t * nchars)113     void EncodeToUtf8(TextEcodeInfo encodeInfo, char* writeResult, uint32_t* written, size_t length, int32_t* nchars)
114     {
115         if (encodeInfo.encoding == "utf-16be" || encodeInfo.encoding == "utf-16le") {
116             EncodeTo16BE(encodeInfo, writeResult, written, length, nchars);
117         } else {
118             OtherEncodeUtf8(encodeInfo, writeResult, written, length, nchars);
119         }
120     }
121 
EncodeConversion(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)122     void EncodeConversion(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding)
123     {
124         if (encoding == "utf-16le") {
125             size_t  outLen = 0;
126             void *data = nullptr;
127             std::u16string u16Str = EncodeUtf16BE(env, src);
128             outLen = u16Str.length() * 2; // 2:multiple
129             outLens = outLen;
130             napi_create_arraybuffer(env, outLen, &data, arrayBuffer);
131             if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16Str.data()), outLen) != EOK) {
132                 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error");
133                 return;
134             }
135         } else if (encoding == "utf-16be") {
136             size_t  outLen = 0;
137             void *data = nullptr;
138             std::u16string u16BEStr = EncodeUtf16BE(env, src);
139             std::u16string u16LEStr = Utf16BEToLE(u16BEStr);
140             outLen = u16LEStr.length() * 2; // 2:multiple
141             outLens = outLen;
142             napi_create_arraybuffer(env, outLen, &data, arrayBuffer);
143             if (memcpy_s(data, outLen, reinterpret_cast<void*>(u16LEStr.data()), outLen) != EOK) {
144                 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error");
145                 return;
146             }
147         } else {
148             OtherEncode(env, src, arrayBuffer, outLens, encoding);
149         }
150     }
151 
GetMaxByteSize(std::string encoding)152     int GetMaxByteSize(std::string encoding)
153     {
154         UErrorCode codeflag = U_ZERO_ERROR;
155         UConverter* converter = ucnv_open(encoding.c_str(), &codeflag);
156         if (U_FAILURE(codeflag)) {
157             HILOG_ERROR("TextEncoder:: ucnv_open failed !");
158             return 0;
159         }
160 
161         int maxByteSize = static_cast<int>(ucnv_getMaxCharSize(converter));
162         ucnv_close(converter);
163         return maxByteSize;
164     }
165 
FreedMemory(char * data)166     void FreedMemory(char *data)
167     {
168         if (data != nullptr) {
169             delete[] data;
170             data = nullptr;
171         }
172     }
173 
IsOneByte(uint8_t u8Char)174     bool IsOneByte(uint8_t u8Char)
175     {
176         return (u8Char & 0x80) == 0;
177     }
178 
Utf8ToUtf16BE(const std::string & u8Str,bool * ok)179     std::u16string Utf8ToUtf16BE(const std::string &u8Str, bool *ok)
180     {
181         std::u16string u16Str = u"";
182         u16Str.reserve(u8Str.size());
183         std::string::size_type len = u8Str.length();
184         const unsigned char *data = reinterpret_cast<const unsigned char *>(u8Str.data());
185         bool isOk = true;
186         for (std::string::size_type i = 0; i < len; ++i) {
187             uint8_t c1 = data[i];
188             if (IsOneByte(c1)) {
189                 u16Str.push_back(static_cast<char16_t>(c1));
190                 continue;
191             }
192             switch (c1 & HIGER_4_BITS_MASK) {
193                 case FOUR_BYTES_STYLE: {
194                     uint8_t c2 = data[++i];
195                     uint8_t c3 = data[++i];
196                     uint8_t c4 = data[++i];
197                     uint32_t codePoint = ((c1 & LOWER_3_BITS_MASK) << (3 * UTF8_VALID_BITS)) | // 3:multiple
198                         ((c2 & LOWER_6_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple
199                         ((c3 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c4 & LOWER_6_BITS_MASK);
200                     if (codePoint >= UTF16_SPECIAL_VALUE) {
201                         codePoint -= UTF16_SPECIAL_VALUE;
202                         u16Str.push_back(static_cast<char16_t>((codePoint >> 10) | HIGH_AGENT_MASK)); // 10:offset value
203                         u16Str.push_back(static_cast<char16_t>((codePoint & LOWER_10_BITS_MASK) | LOW_AGENT_MASK));
204                     } else {
205                         u16Str.push_back(static_cast<char16_t>(codePoint));
206                     }
207                     break;
208                 }
209                 case THREE_BYTES_STYLE: {
210                     uint8_t c2 = data[++i];
211                     uint8_t c3 = data[++i];
212                     uint32_t codePoint = ((c1 & LOWER_4_BITS_MASK) << (2 * UTF8_VALID_BITS)) | // 2:multiple
213                         ((c2 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS) | (c3 & LOWER_6_BITS_MASK);
214                     u16Str.push_back(static_cast<char16_t>(codePoint));
215                     break;
216                 }
217                 case TWO_BYTES_STYLE1:
218                 case TWO_BYTES_STYLE2: {
219                     uint8_t c2 = data[++i];
220                     uint32_t codePoint = ((c1 & LOWER_5_BITS_MASK) << UTF8_VALID_BITS) | (c2 & LOWER_6_BITS_MASK);
221                     u16Str.push_back(static_cast<char16_t>(codePoint));
222                     break;
223                 }
224                 default: {
225                     isOk = false;
226                     break;
227                 }
228             }
229         }
230         if (ok != nullptr) {
231             *ok = isOk;
232         }
233         return u16Str;
234     }
235 
Utf16BEToLE(const std::u16string & wstr)236     std::u16string Utf16BEToLE(const std::u16string &wstr)
237     {
238         std::u16string str16 = u"";
239         const char16_t *data = wstr.data();
240         for (unsigned int i = 0; i < wstr.length(); i++) {
241             char16_t wc = data[i];
242             char16_t high = (wc >> 8) & 0x00FF; // 8:offset value
243             char16_t low = wc & 0x00FF;
244             char16_t c16 = (low << 8) | high; // 8:offset value
245             str16.push_back(c16);
246         }
247         return str16;
248     }
249 
OtherEncode(napi_env env,napi_value src,napi_value * arrayBuffer,size_t & outLens,std::string encoding)250     void OtherEncode(napi_env env, napi_value src, napi_value* arrayBuffer, size_t &outLens, std::string encoding)
251     {
252         size_t  outLen = 0;
253         void *data = nullptr;
254         size_t inputSize = 0;
255         napi_get_value_string_utf16(env, src, nullptr, 0, &inputSize);
256         char16_t *originalBuffer = ApplyMemory(inputSize);
257         if (originalBuffer == nullptr) {
258             HILOG_ERROR("TextEncoder:: originalBuffer is nullptr");
259             return;
260         }
261         napi_get_value_string_utf16(env, src, originalBuffer, inputSize + 1, &inputSize);
262         outLen = static_cast<size_t>(GetMaxByteSize(encoding)) * inputSize;
263         napi_create_arraybuffer(env, outLen, &data, arrayBuffer);
264         char *writeResult = static_cast<char*>(data);
265         if (writeResult == nullptr) {
266             FreedMemory(originalBuffer);
267             HILOG_ERROR("TextEncoder:: writeResult is nullptr");
268             return;
269         }
270         std::string buffer = "";
271         std::u16string originalStr(originalBuffer, inputSize);
272         size_t shifting = 0;
273         size_t resultShifting = 0;
274         size_t findIndex = originalStr.find('\0');
275         if (findIndex == std::string::npos) {
276             buffer = UnicodeConversion(encoding, originalBuffer, inputSize);
277             outLens = buffer.length();
278             if (memcpy_s(writeResult, outLens, reinterpret_cast<char*>(buffer.data()), outLens) != EOK) {
279                 FreedMemory(originalBuffer);
280                 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error");
281                 return;
282             }
283         } else {
284             while (findIndex != std::string::npos) {
285                 buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize);
286                 if (memcpy_s(writeResult + resultShifting, buffer.length(),
287                              reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) {
288                     FreedMemory(originalBuffer);
289                     HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error");
290                     return;
291                 }
292                 resultShifting +=  buffer.length();
293                 *(writeResult + resultShifting) = '\0';
294                 resultShifting += 1;
295                 outLens += buffer.length() + 1;
296                 shifting += findIndex + 1;
297                 originalStr = originalStr.substr(findIndex + 1, inputSize);
298                 findIndex = originalStr.find('\0');
299             }
300             buffer = UnicodeConversion(encoding, originalBuffer + shifting, inputSize);
301             outLens += buffer.length();
302             if (memcpy_s(writeResult + resultShifting, buffer.length(),
303                          reinterpret_cast<char*>(buffer.data()), buffer.length()) != EOK) {
304                 FreedMemory(originalBuffer);
305                 HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error");
306                 return;
307             }
308         }
309         FreedMemory(originalBuffer);
310     }
311 
EncodeUtf16BE(napi_env env,napi_value src)312     std::u16string EncodeUtf16BE(napi_env env, napi_value src)
313     {
314         std::string buffer = "";
315         size_t bufferSize = 0;
316 
317         if (napi_get_value_string_utf8(env, src, nullptr, 0, &bufferSize) != napi_ok) {
318             HILOG_ERROR("TextEncoder:: can not get src size");
319             return u"";
320         }
321         buffer.reserve(bufferSize + 1);
322         buffer.resize(bufferSize);
323         if (napi_get_value_string_utf8(env, src, buffer.data(), bufferSize + 1, &bufferSize) != napi_ok) {
324             HILOG_ERROR("TextEncoder:: can not get src value");
325             return u"";
326         }
327         std::u16string u16Str = Utf8ToUtf16BE(buffer);
328         return u16Str;
329     }
330 
IsValidLowSurrogate(char16_t high)331     bool IsValidLowSurrogate(char16_t high)
332     {
333         // 0xD800: minimum value of low proxy term. 0xDBFF: Maximum value of low proxy term.
334         return (high >= 0xD800 && high <= 0xDBFF);
335     }
336 
IsValidHighSurrogate(char16_t low)337     bool IsValidHighSurrogate(char16_t low)
338     {
339         // 0xDC00: minimum value of high proxy item. 0xDFFF: maximum value of high proxy item.
340         return (low >= 0xDC00 && low <= 0xDFFF);
341     }
342 
OtherEncodeUtf8Inner(char16_t * originalBuffer,InputBufferInfo inputInfo,size_t & index,OutBufferInfo & outInfo)343     uint32_t OtherEncodeUtf8Inner(char16_t *originalBuffer, InputBufferInfo inputInfo, size_t &index,
344         OutBufferInfo &outInfo)
345     {
346         if (IsValidLowSurrogate(originalBuffer[index]) && inputInfo.encoding == "utf-8") {
347             size_t tempIndex = index + 1;
348             if (tempIndex < inputInfo.inputSize && IsValidHighSurrogate(originalBuffer[index + 1])) {
349                 // 2: move the pointer forward to the position of two elements.
350                 std::u16string utf16String(&originalBuffer[index], &originalBuffer[index] + 2);
351                 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> converter;
352                 outInfo.rstBuffer = converter.to_bytes(utf16String);
353                 outInfo.rstBufferLength = outInfo.rstBuffer.length();
354                 if (outInfo.rstBufferLength > outInfo.writedSize) {
355                     return STATE_BREAK_ZERO;
356                 }
357                 index++;
358                 outInfo.cntSize += 2; // 2: two bytes
359                 outInfo.bufferResult += outInfo.rstBuffer;
360                 outInfo.writedSize -= outInfo.rstBufferLength;
361                 return STATE_CONTINUE_ONE;
362             }
363         }
364         return STATE_OTHER_TWO;
365     }
366 
OtherEncodeUtf8(TextEcodeInfo encodeInfo,char * writeResult,uint32_t * written,size_t length,int32_t * nchar)367     void OtherEncodeUtf8(TextEcodeInfo encodeInfo, char* writeResult, uint32_t* written, size_t length, int32_t* nchar)
368     {
369         size_t inputSize = 0;
370         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize);
371         char16_t *originalBuffer = ApplyMemory(inputSize);
372         if (originalBuffer == nullptr) {
373             HILOG_ERROR("TextEncoder:: originalBuffer is nullptr");
374             return;
375         }
376         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize);
377         std::vector<char16_t> targetBuffer(inputSize + 1, u'\0');
378         InputBufferInfo inputInfo(encodeInfo.encoding, inputSize);
379         OutBufferInfo outInfo(length, "", 0, 0, "");
380         for (size_t i = 0; i < inputSize; i++) {
381             targetBuffer[i] = originalBuffer[i];
382             uint32_t rstState = OtherEncodeUtf8Inner(originalBuffer, inputInfo, i, outInfo);
383             if (rstState == STATE_BREAK_ZERO) {
384                 break;
385             } else if (rstState == STATE_CONTINUE_ONE) {
386                 continue;
387             }
388             outInfo.rstBuffer = UnicodeConversion(encodeInfo.encoding, &targetBuffer[i], inputSize);
389             outInfo.rstBufferLength = outInfo.rstBuffer.length();
390             if (outInfo.rstBufferLength > outInfo.writedSize) {
391                 break;
392             }
393             outInfo.cntSize++;
394             outInfo.bufferResult += outInfo.rstBuffer;
395             outInfo.writedSize -= outInfo.rstBufferLength;
396         }
397         size_t writeLength = outInfo.bufferResult.length();
398         for (size_t j = 0; j < writeLength; j++) {
399             *writeResult = outInfo.bufferResult[j];
400             writeResult++;
401         }
402         *nchar = static_cast<int32_t>(outInfo.cntSize);
403         *written = static_cast<uint32_t>(writeLength);
404         FreedMemory(originalBuffer);
405     }
406 
EncodeTo16BE(TextEcodeInfo encodeInfo,char * writeResult,uint32_t * written,size_t length,int32_t * nchars)407     void EncodeTo16BE(TextEcodeInfo encodeInfo, char* writeResult, uint32_t* written, size_t length, int32_t* nchars)
408     {
409         size_t inputSize = 0;
410         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, nullptr, 0, &inputSize);
411         char16_t *originalBuffer = ApplyMemory(inputSize);
412         if (originalBuffer == nullptr) {
413             HILOG_ERROR("TextEncoder:: originalBuffer is nullptr");
414             return;
415         }
416         napi_get_value_string_utf16(encodeInfo.env, encodeInfo.src, originalBuffer, inputSize + 1, &inputSize);
417 
418         size_t writableSize = length;
419         std::u16string bufferResult = u"";
420         size_t i = 0;
421         for (; i < inputSize; i++) {
422             std::string strBuff = "";
423             std::u16string buffer = u"";
424             strBuff = std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> {}.to_bytes(originalBuffer[i]);
425             if (encodeInfo.encoding == "utf-16le") {
426                 buffer = Utf8ToUtf16BE(strBuff);
427             } else {
428                 std::u16string u16Str = Utf8ToUtf16BE(strBuff);
429                 buffer = Utf16BEToLE(u16Str);
430             }
431             size_t bufferLength = buffer.length() * 2; // 2:multiple
432             if (bufferLength > writableSize) {
433                 break;
434             }
435             bufferResult += buffer;
436             writableSize -= bufferLength;
437         }
438 
439         size_t writeLength = bufferResult.length() * 2; // 2:multiple
440         if (memcpy_s(writeResult, writeLength, reinterpret_cast<char*>(bufferResult.data()), writeLength) != EOK) {
441             FreedMemory(originalBuffer);
442             HILOG_FATAL("TextEncoder:: copy buffer to arraybuffer error");
443             return;
444         }
445         *nchars = static_cast<int32_t>(i);
446         *written = static_cast<uint32_t>(writeLength);
447         FreedMemory(originalBuffer);
448     }
449 
ApplyMemory(const size_t & inputSize)450     char16_t *ApplyMemory(const size_t &inputSize)
451     {
452         char16_t *originalBuffer = nullptr;
453         if (inputSize > 0) {
454             originalBuffer = new (std::nothrow) char16_t[inputSize + 1];
455             if (originalBuffer == nullptr) {
456                 HILOG_ERROR("TextEncoder:: originalBuffer memory allocation failed, originalBuffer is nullptr");
457                 return nullptr;
458             }
459             if (memset_s(originalBuffer, inputSize + 1, u'\0', inputSize + 1) != EOK) {
460                 HILOG_ERROR("encode originalBuffer memset_s failed");
461                 FreedMemory(originalBuffer);
462                 return nullptr;
463             }
464         } else {
465             HILOG_ERROR("inputSize is error");
466             return nullptr;
467         }
468         return originalBuffer;
469     }
470 
FreedMemory(char16_t * & data)471     void FreedMemory(char16_t *&data)
472     {
473         if (data != nullptr) {
474             delete[] data;
475             data = nullptr;
476         }
477     }
478 } // namespace Commonlibrary::Platform