• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "commonlibrary/ets_utils/js_api_module/buffer/converter.h"
17 
18 #include <codecvt>
19 #include <locale>
20 
21 using namespace std;
22 
23 namespace OHOS::buffer {
IsOneByte(uint8_t u8Char)24 bool IsOneByte(uint8_t u8Char)
25 {
26     return (u8Char & 0x80) == 0;
27 }
28 
Utf8ToUtf16BE(const string & u8Str,bool * ok)29 u16string Utf8ToUtf16BE(const string &u8Str, bool *ok)
30 {
31     u16string u16Str = u"";
32     u16Str.reserve(u8Str.size());
33     string::size_type len = u8Str.length();
34 
35     const unsigned char *data = reinterpret_cast<const unsigned char *>(u8Str.data());
36 
37     bool isOk = true;
38     for (string::size_type i = 0; i < len; ++i) {
39         uint8_t c1 = data[i]; // The first byte
40         if (IsOneByte(c1)) { // only 1 byte represents the UNICODE code point
41             u16Str.push_back(static_cast<char16_t>(c1));
42             continue;
43         }
44         switch (c1 & HIGER_4_BITS_MASK) {
45             case FOUR_BYTES_STYLE: { // 4 byte characters, from 0x10000 to 0x10FFFF
46                 uint8_t c2 = data[++i]; // The second byte
47                 uint8_t c3 = data[++i]; // The third byte
48                 uint8_t c4 = data[++i]; // The forth byte
49                 // Calculate the UNICODE code point value (3 bits lower for the first byte, 6 bits for the other)
50                 // 3 : shift left 3 times of UTF8_VALID_BITS
51                 uint32_t codePoint = ((c1 & LOWER_3_BITS_MASK) << (3 * UTF8_VALID_BITS))
52                                      // 2 : shift left 2 times of UTF8_VALID_BITS
53                                      | ((c2 & LOWER_6_BITS_MASK) << (2 * UTF8_VALID_BITS))
54                                      | ((c3 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS)
55                                      | (c4 & LOWER_6_BITS_MASK);
56 
57                 // In UTF-16, U+10000 to U+10FFFF represent surrogate pairs with two 16-bit units
58                 if (codePoint >= UTF16_SPECIAL_VALUE) {
59                     codePoint -= UTF16_SPECIAL_VALUE;
60                     // 10 : a half of 20 , shift right 10 bits
61                     u16Str.push_back(static_cast<char16_t>((codePoint >> 10) | HIGH_AGENT_MASK));
62                     u16Str.push_back(static_cast<char16_t>((codePoint & LOWER_10_BITS_MASK) | LOW_AGENT_MASK));
63                 } else { // In UTF-16, U+0000 to U+D7FF and U+E000 to U+FFFF are Unicode code point values
64                     // U+D800 to U+DFFF are invalid characters, for simplicity,
65                     // assume it does not exist (if any, not encoded)
66                     u16Str.push_back(static_cast<char16_t>(codePoint));
67                 }
68                 break;
69             }
70             case THREE_BYTES_STYLE: { // 3 byte characters, from 0x800 to 0xFFFF
71                 uint8_t c2 = data[++i]; // The second byte
72                 uint8_t c3 = data[++i]; // The third byte
73                 // Calculates the UNICODE code point value
74                 // (4 bits lower for the first byte, 6 bits lower for the other)
75                 // 2 : shift left 2 times of UTF8_VALID_BITS
76                 uint32_t codePoint = ((c1 & LOWER_4_BITS_MASK) << (2 * UTF8_VALID_BITS))
77                                     | ((c2 & LOWER_6_BITS_MASK) << UTF8_VALID_BITS)
78                                     | (c3 & LOWER_6_BITS_MASK);
79                 u16Str.push_back(static_cast<char16_t>(codePoint));
80                 break;
81             }
82             case TWO_BYTES_STYLE1: // 2 byte characters, from 0x80 to 0x7FF
83             case TWO_BYTES_STYLE2: {
84                 uint8_t c2 = data[++i]; // The second byte
85                 // Calculates the UNICODE code point value
86                 // (5 bits lower for the first byte, 6 bits lower for the other)
87                 uint32_t codePoint = ((c1 & LOWER_5_BITS_MASK) << UTF8_VALID_BITS)
88                                     | (c2 & LOWER_6_BITS_MASK);
89                 u16Str.push_back(static_cast<char16_t>(codePoint));
90                 break;
91             }
92             default: {
93                 isOk = false;
94                 break;
95             }
96         }
97     }
98     if (ok != nullptr) {
99         *ok = isOk;
100     }
101 
102     return u16Str;
103 }
104 
Utf16BEToLE(const u16string & wstr)105 u16string Utf16BEToLE(const u16string &wstr)
106 {
107     u16string str16 = u"";
108     const char16_t *data = wstr.data();
109     for (unsigned int i = 0; i < wstr.length(); i++) {
110         char16_t wc = data[i];
111         char16_t high = (wc >> 8) & 0x00FF;
112         char16_t low = wc & 0x00FF;
113         char16_t c16 = (low << 8) | high;
114         str16.push_back(c16);
115     }
116     return str16;
117 }
118 
Utf16BEToANSI(const u16string & wstr)119 string Utf16BEToANSI(const u16string &wstr)
120 {
121     string ret = "";
122     for (u16string::const_iterator it = wstr.begin(); it != wstr.end(); ++it) {
123         char16_t wc = (*it);
124         // get the lower bit from the UNICODE code point
125         char c = static_cast<char>(wc & LOWER_8_BITS_MASK);
126         ret.push_back(c);
127     }
128     return ret;
129 }
130 
Utf8ToUtf16BEToANSI(const string & str)131 string Utf8ToUtf16BEToANSI(const string &str)
132 {
133     u16string u16Str = Utf8ToUtf16BE(str);
134     string ret = Utf16BEToANSI(u16Str);
135     return ret;
136 }
137 
IsBase64Char(unsigned char c)138 bool IsBase64Char(unsigned char c)
139 {
140     return (isalnum(c) || (c == '+') || (c == '/'));
141 }
142 
143 /**
144 * Base64Encode - Base64 encode
145 * @src: Data to be encoded
146 * @len: Length of the data to be encoded
147 * Returns: Allocated buffer of outLen bytes of encoded data,
148 * or empty string on failure
149 */
Base64Encode(const unsigned char * src,size_t len)150 string Base64Encode(const unsigned char *src, size_t len)
151 {
152     if (src == nullptr) {
153         return string();
154     }
155     unsigned char *out = nullptr;
156     unsigned char *pos = nullptr;
157     const unsigned char *pEnd = nullptr;
158     const unsigned char *pStart = nullptr;
159     size_t outLen = 4 * ((len + 2) / 3); // 3-byte blocks to 4-byte
160 
161     if (outLen < len) {
162         return string(); // integer overflow
163     }
164 
165     string outStr = "";
166     outStr.resize(outLen);
167     out = reinterpret_cast<unsigned char *>(&outStr[0]);
168 
169     pEnd = src + len;
170     pStart = src;
171     pos = out;
172     // 3 : 3 bytes is just 24 bits which is 4 times of 6 bits
173     while (pEnd - pStart >= 3) {
174         // 2 : add two zeros in front of the first set of 6 bits to become a new 8 binary bits
175         *pos = base64Table[pStart[0] >> 2];
176         // 4 : add two zeros in front of the following second set of 6 bits to become the new 8 binary bits
177         *(pos + 1) = base64Table[((pStart[0] & LOWER_2_BITS_MASK) << 4) | (pStart[1] >> 4)];
178         // 2 : 4 : 6 : add two zeros in front of the following third set of 6 bits to become the new 8 binary bits
179         *(pos + 2) = base64Table[((pStart[1] & LOWER_4_BITS_MASK) << 2) | (pStart[2] >> 6)];
180         // 2 : 3 : add two zeros in front of the following forth set of 6 bits to become the new 8 binary bits
181         *(pos + 3) = base64Table[pStart[2] & LOWER_6_BITS_MASK];
182         // 4 : the pointer of pos scrolls off 4 bytes to point the next 4 bytes of encoded chars
183         pos += 4;
184         // 3 : the pointer of pStart scrolls off 3 bytes to point the next 3 bytes of which will be encoded chars
185         pStart += 3;
186     }
187 
188     // process the last set of less than 3 bytes of data
189     if (pEnd - pStart > 0) {
190         // 2 : add two zeros in front of the first set of 6 bits to become a new 8 binary bits
191         *pos = base64Table[pStart[0] >> 2];
192         if (pEnd - pStart == 1) { // one byte remaining
193             // 4 : paddle the last two bits of the last byte with two zeros in front of it and four zeros after it
194             *(pos + 1) = base64Table[(pStart[0] & LOWER_2_BITS_MASK) << 4];
195             // 2 : fill in the missing bytes with '='
196             *(pos + 2) = '=';
197         } else { // two bytes remaining
198             // 4 : add two zeros in front of the second set of 6 bits to become the new 8 binary bits
199             *(pos + 1) = base64Table[((pStart[0] & LOWER_2_BITS_MASK) << 4) | (pStart[1] >> 4)];
200             // 2 : paddle the last four bits of the last byte with two zeros in front of it and two zeros after it
201             *(pos + 2) = base64Table[(pStart[1] & LOWER_4_BITS_MASK) << 2];
202         }
203         // 3 : fill in the missing bytes with '='
204         *(pos + 3) = '=';
205     }
206 
207     return outStr;
208 }
209 
Base64Decode(string const & encodedStr)210 string Base64Decode(string const& encodedStr)
211 {
212     size_t len = encodedStr.size();
213     unsigned int index = 0;
214     unsigned int cursor = 0;
215     unsigned char charArray4[4] = {0}; // an array to stage a group of indexes for encoded string
216     unsigned char charArray3[3] = {0}; // an array to stage a set of original string
217     string ret = "";
218 
219     while ((encodedStr[cursor] != '=') && IsBase64Char(encodedStr[cursor])) {
220         // stage a 4-byte string to charArray4
221         charArray4[index] = encodedStr[cursor];
222         index++;
223         cursor++;
224         if (index == 4) { // 4 : after 4 chars is assigned to charArray4
225             // 4 : fill data into charArray4
226             for (index = 0; index < 4; index++) {
227                 charArray4[index] = base64Table.find(charArray4[index]) & LOWER_8_BITS_MASK;
228             }
229             // get the last six bits of the first byte of charArray4 and the first valid
230             // 2 : 4 : two bits(except two higer bits) of the second byte, combine them to a new byte
231             charArray3[0] = (charArray4[0] << 2) + ((charArray4[1] & 0x30) >> 4);
232             // get the last four bits of the second byte of charArray4 and the first valid
233             // 4 : 2 : four bits(except two higer bits) of the third byte, combine them to a new byte
234             charArray3[1] = ((charArray4[1] & LOWER_4_BITS_MASK) << 4) + ((charArray4[2] & MIDDLE_4_BITS_MASK) >> 2);
235             // get the last two bits of the third byte of charArray4 and the forth byte,
236             // 2 : 3 : 6 : combine them to a new byte
237             charArray3[2] = ((charArray4[2] & LOWER_2_BITS_MASK) << 6) + charArray4[3];
238             // 3 : assigns the decoded string to the return value
239             for (index = 0; index < 3; index++) {
240                 ret += charArray3[index];
241             }
242             index = 0;
243         }
244         if (cursor > len - 1) {
245             break;
246         }
247     }
248 
249     if (index != 0) {
250         // fill data into charArray4
251         for (unsigned int i = 0; i < index; i++) {
252             charArray4[i] = base64Table.find(charArray4[i]) & LOWER_8_BITS_MASK;
253         }
254         // get the last six bits of the first byte of charArray4 and the first valid
255         // 2 : 4 : two bits(except two higer bits) of the second byte, combine them to a new byte
256         charArray3[0] = (charArray4[0] << 2) + ((charArray4[1] & 0x30) >> 4);
257         // get the last four bits of the second byte of charArray4 and the first valid
258         // 4 : 2 : four bits(except two higer bits) of the third byte, combine them to a new byte
259         charArray3[1] = ((charArray4[1] & LOWER_4_BITS_MASK) << 4) + ((charArray4[2] & LOWER_6_BITS_MASK) >> 2);
260         // assigns the decoded string to the return value
261         for (unsigned int i = 0; i < index - 1; i++) {
262             ret += charArray3[i];
263         }
264     }
265 
266     return ret;
267 }
268 
IsValidHex(const string & hex)269 bool IsValidHex(const string &hex)
270 {
271     bool isValid = false;
272     for (unsigned int i = 0; i < hex.size(); i++) {
273         char c = hex.at(i);
274         // 0 ~ 9, A ~ F, a ~ f
275         if ((c <= '9' && c >= '0') || (c <= 'F' && c >= 'A') || (c <= 'f' && c >= 'a')) {
276             isValid = true;
277         } else {
278             isValid = false;
279             break;
280         }
281     }
282     return isValid;
283 }
284 
HexDecode(const string & hexStr)285 string HexDecode(const string &hexStr)
286 {
287     auto arr = hexStr.c_str();
288     string nums = "";
289     unsigned int arrSize = hexStr.size();
290 
291     // 2 : means a half length of hex str's size
292     for (unsigned int i = 0; i < arrSize / 2; i++) {
293         string hexStrTmp = "";
294         int num = 0;
295         // 2 : offset is i * 2
296         hexStrTmp.push_back(arr[i * 2]);
297         // 2 : offset is i * 2 + 1
298         hexStrTmp.push_back(arr[i * 2 + 1]);
299         if (!IsValidHex(hexStrTmp)) {
300             break;
301         }
302         // 16 : the base is 16
303         num = stoi(hexStrTmp, nullptr, 16);
304         nums.push_back(static_cast<char>(num));
305     }
306 
307     return nums;
308 }
309 
310 // Find the position of the last character in pat from patIndex
GetGoodSuffixLengthByLastChar(uint8_t * pat,int patIndex,int patLen)311 int GetGoodSuffixLengthByLastChar(uint8_t *pat, int patIndex, int patLen)
312 {
313     int lastIndex = patLen - 1;
314     int index = -1;
315     while (patIndex >= 0) {
316         if (pat[patIndex] == pat[lastIndex]) {
317             index = patIndex;
318             break;
319         } else {
320             patIndex--;
321         }
322     }
323     return lastIndex - index;
324 }
325 // Find the position of the first character in pat from patIndex
GetGoodSuffixLengthByFirstChar(uint8_t * pat,int patIndex,int tarlen)326 int GetGoodSuffixLengthByFirstChar(uint8_t *pat, int patIndex, int tarlen)
327 {
328     int indexOfNextFirstChar = tarlen;
329     for (int i = patIndex; i < tarlen; i++) {
330         if (pat[0] == pat[i]) {
331             indexOfNextFirstChar = i;
332             break;
333         }
334     }
335     return indexOfNextFirstChar;
336 }
337 
338 // Match forward from patIndex to get the position of the singleChar in the pat
339 // and the length of the bad character
GetBadCharLengthInReverseOrder(uint8_t * pat,char singleChar,int patIndex)340 int GetBadCharLengthInReverseOrder(uint8_t *pat, char singleChar, int patIndex)
341 {
342     int index = -1;
343     for (int i = patIndex - 1; i >= 0; --i) {
344         if (pat[i] == singleChar) {
345             index = i;
346             break;
347         }
348     }
349     return patIndex - index;
350 }
351 
352 // Get the position of character c in pat
GetBadCharLengthInSequence(uint8_t * pat,char singleChar,int patIndex,int tarlen)353 int GetBadCharLengthInSequence(uint8_t *pat, char singleChar, int patIndex, int tarlen)
354 {
355     int resIndex = tarlen;
356     for (int i = patIndex; i < tarlen; i++) {
357         if (singleChar == pat[i]) {
358             resIndex = i;
359             break;
360         }
361     }
362     return resIndex;
363 }
364 
FindLastIndex(uint8_t * source,uint8_t * target,int soulen,int tarlen)365 int FindLastIndex(uint8_t *source, uint8_t *target, int soulen, int tarlen)
366 {
367     if (source == nullptr || target == nullptr) {
368         return -1;
369     }
370     if (soulen < tarlen || tarlen == 0) {
371         return -1;
372     }
373     int i = soulen - tarlen;
374     int j = 0;
375 
376     while (i >= 0) {
377         if (source[i] == target[j]) {
378             if (j == tarlen - 1) {
379                 return i - (tarlen - 1);
380             }
381             i++;
382             j++;
383         } else {
384             if (j == 0) {
385                 int badValue = GetBadCharLengthInSequence(target, source[i], j, tarlen);
386                 i = i - badValue;
387                 j = 0;
388             } else {
389                 int badValue = GetBadCharLengthInSequence(target, source[i], j, tarlen);
390                 int goodSuffix = GetGoodSuffixLengthByFirstChar(target, j, tarlen);
391                 int distance = badValue > goodSuffix ? badValue : goodSuffix;
392                 i = i - distance;
393                 j = 0;
394             }
395         }
396     }
397     return -1;
398 }
399 
FindIndex(uint8_t * source,uint8_t * target,int soulen,int tarlen)400 int FindIndex(uint8_t* source, uint8_t* target, int soulen, int tarlen)
401 {
402     if (source == nullptr || target == nullptr) {
403         return -1;
404     }
405     if (soulen < tarlen || tarlen == 0) {
406         return -1;
407     }
408     int i = tarlen - 1;
409     int j = tarlen - 1;
410     while (i < soulen) {
411         if (source[i] == target[j]) {
412             if (j == 0) {
413                 return i;
414             }
415             i--;
416             j--;
417         } else {
418             if (j == tarlen - 1) {
419                 int badValue = GetBadCharLengthInReverseOrder(target, source[i], j);
420                 i = i + badValue;
421                 j = tarlen - 1;
422             } else {
423                 int badValue = GetBadCharLengthInReverseOrder(target, source[i], j);
424                 int goodSuffix = GetGoodSuffixLengthByLastChar(target, j, tarlen);
425                 int distance = badValue > goodSuffix ? badValue : goodSuffix;
426                 i = i + tarlen - 1 - j + distance;
427                 j = tarlen - 1;
428             }
429         }
430     }
431     return -1;
432 }
433 }
434