• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "jerryscript_native_string.h"
16 #include "securec.h"
17 
JerryScriptNativeString(JerryScriptNativeEngine * engine,const char * value,size_t length)18 JerryScriptNativeString::JerryScriptNativeString(JerryScriptNativeEngine* engine, const char* value, size_t length)
19     : JerryScriptNativeString(engine, jerry_create_string_sz_from_utf8((const unsigned char*)value, length))
20 {
21 }
22 
JerryScriptNativeString(JerryScriptNativeEngine * engine,const char16_t * value,size_t length)23 JerryScriptNativeString::JerryScriptNativeString(JerryScriptNativeEngine* engine, const char16_t* value, size_t length)
24     : JerryScriptNativeString(engine, CreateStringFromUtf16(value, length))
25 {
26 }
27 
JerryScriptNativeString(JerryScriptNativeEngine * engine,jerry_value_t value)28 JerryScriptNativeString::JerryScriptNativeString(JerryScriptNativeEngine* engine, jerry_value_t value)
29     : JerryScriptNativeValue(engine, value)
30 {
31 }
32 
~JerryScriptNativeString()33 JerryScriptNativeString::~JerryScriptNativeString() {}
34 
GetInterface(int interfaceId)35 void* JerryScriptNativeString::GetInterface(int interfaceId)
36 {
37     return (NativeString::INTERFACE_ID == interfaceId) ? (NativeString*)this : nullptr;
38 }
39 
GetCString(char * buffer,size_t size,size_t * length)40 void JerryScriptNativeString::GetCString(char* buffer, size_t size, size_t* length)
41 {
42     if (buffer == nullptr || size == 0) {
43         *length = GetLength();
44     } else {
45         *length = jerry_string_to_utf8_char_buffer(value_, (jerry_char_t*)buffer, size);
46     }
47 }
48 
GetLength()49 size_t JerryScriptNativeString::GetLength()
50 {
51     return jerry_get_utf8_string_size(value_);
52 }
53 
GetCString16(char16_t * buffer,size_t size,size_t * length)54 void JerryScriptNativeString::GetCString16(char16_t* buffer, size_t size, size_t* length)
55 {
56     constexpr int maxStringLength = 1024;
57     size_t utf8Length = GetLength();
58     if (size == 0 || utf8Length == 0 || utf8Length > maxStringLength) {
59         return;
60     }
61     char *str = new char[utf8Length];
62     jerry_string_to_utf8_char_buffer(value_, (jerry_char_t*)str, utf8Length);
63     if (length != nullptr) {
64         *length = Utf8ToUtf16Length(str, utf8Length);
65         if (buffer != nullptr) {
66             memset_s(buffer, sizeof(char16_t) * size, 0x0, sizeof(char16_t) * size);
67             Utf8ToUtf16(str, strlen(str), buffer, size);
68         }
69     }
70     delete[] str;
71 }
72 
Utf8ToUtf16(const char * utf8Str,size_t u8len,char16_t * u16str,size_t u16len)73 char16_t* JerryScriptNativeString::Utf8ToUtf16(const char* utf8Str, size_t u8len, char16_t* u16str, size_t u16len)
74 {
75     if (u16len == 0) {
76         return u16str;
77     }
78     const char* u8end = utf8Str + u8len;
79     const char* u8cur = utf8Str;
80     const char16_t* u16end = u16str + u16len - 1;
81     constexpr int  offset = 10;
82     char16_t* u16cur = u16str;
83 
84     while ((u8cur < u8end) && (u16cur < u16end)) {
85         size_t len = Utf8CodePointLen(*u8cur);
86         uint32_t codepoint = Utf8ToUtf32CodePoint(u8cur, len);
87         // Convert the UTF32 codepoint to one or more UTF16 codepoints
88         if (codepoint <= 0xFFFF) {
89             // Single UTF16 character
90             *u16cur++ = (char16_t)codepoint;
91         } else {
92             // Multiple UTF16 characters with surrogates
93             codepoint = codepoint - 0x10000;
94             *u16cur++ = (char16_t)((codepoint >> offset) + 0xD800);
95             if (u16cur >= u16end) {
96                 // Ooops...  not enough room for this surrogate pair.
97                 return u16cur - 1;
98             }
99             *u16cur++ = (char16_t)((codepoint & 0x3FF) + 0xDC00);
100         }
101 
102         u8cur += len;
103     }
104     return u16cur;
105 }
106 
Utf8CodePointLen(uint8_t ch)107 size_t JerryScriptNativeString::Utf8CodePointLen(uint8_t ch)
108 {
109     constexpr int  offset = 3;
110     return ((0xe5000000 >> ((ch >> offset) & 0x1e)) & offset) + 1;
111 }
112 
Utf8ToUtf32CodePoint(const char * src,size_t length)113 uint32_t JerryScriptNativeString::Utf8ToUtf32CodePoint(const char* src, size_t length)
114 {
115     uint32_t unicode = 0;
116     constexpr int  lengthSizeOne = 1;
117     constexpr int  lengthSizeTwo = 2;
118     constexpr int  lengthSizeThree = 3;
119     constexpr int  lengthSizeFour = 4;
120     constexpr int  offsetZero = 0;
121     constexpr int  offsetOne = 1;
122     constexpr int  offsetTwo = 2;
123     constexpr int  offsetThree = 3;
124     switch (length) {
125         case lengthSizeOne:
126             return src[offsetZero];
127         case lengthSizeTwo:
128             unicode = src[offsetZero] & 0x1f;
129             Utf8ShiftAndMask(&unicode, src[offsetOne]);
130             return unicode;
131         case lengthSizeThree:
132             unicode = src[offsetZero] & 0x0f;
133             Utf8ShiftAndMask(&unicode, src[offsetOne]);
134             Utf8ShiftAndMask(&unicode, src[offsetTwo]);
135             return unicode;
136         case lengthSizeFour:
137             unicode = src[offsetZero] & 0x07;
138             Utf8ShiftAndMask(&unicode, src[offsetOne]);
139             Utf8ShiftAndMask(&unicode, src[offsetTwo]);
140             Utf8ShiftAndMask(&unicode, src[offsetThree]);
141             return unicode;
142         default:
143             return 0xffff;
144     }
145 }
146 
Utf8ShiftAndMask(uint32_t * codePoint,const uint8_t byte)147 void JerryScriptNativeString::Utf8ShiftAndMask(uint32_t* codePoint, const uint8_t byte)
148 {
149     *codePoint <<= 6;
150     *codePoint |= 0x3F & byte;
151 }
152 
Utf8ToUtf16Length(const char * str8,size_t str8Len)153 int JerryScriptNativeString::Utf8ToUtf16Length(const char* str8, size_t str8Len)
154 {
155     const char* str8end = str8 + str8Len;
156     int utf16len = 0;
157     while (str8 < str8end) {
158         utf16len++;
159         int u8charlen = Utf8CodePointLen(*str8);
160         if (str8 + u8charlen - 1 >= str8end) {
161             return -1;
162         }
163         uint32_t codepoint = Utf8ToUtf32CodePoint(str8, u8charlen);
164         if (codepoint > 0xFFFF) {
165             utf16len++; // this will be a surrogate pair in utf16
166         }
167         str8 += u8charlen;
168     }
169     if (str8 != str8end) {
170         return -1;
171     }
172     return utf16len;
173 }
174 
Utf32CodePointUtf8Length(char32_t srcChar)175 size_t JerryScriptNativeString::Utf32CodePointUtf8Length(char32_t srcChar)
176 {
177     if (srcChar < ONE_BYTE_UTF8) {
178         return UTF8_BYTES_ONE;
179     } else if (srcChar < TWO_BYTES_UTF8) {
180         return UTF8_BYTES_TWO;
181     } else if (srcChar < THREE_BYTES_UTF8) {
182         if ((srcChar < UNICODE_RESERVED_START) || (srcChar > UNICODE_RESERVED_END)) {
183             return UTF8_BYTES_THREE;
184         } else {
185             // Surrogates are invalid UTF-32 characters.
186             return 0;
187         }
188     } else if (srcChar <= UNICODE_MAX_NUM) {
189         // Max code point for Unicode is 0x0010FFFF.
190         return UTF8_BYTES_FOUR;
191     } else {
192         // Invalid UTF-32 character.
193         return 0;
194     }
195 }
196 
197 // get the length of utf8 from utf16
Utf16ToUtf8Length(const char16_t * str16,size_t str16Len)198 int JerryScriptNativeString::Utf16ToUtf8Length(const char16_t* str16, size_t str16Len)
199 {
200     if (str16 == nullptr || str16Len == 0) {
201         return -1;
202     }
203 
204     const char16_t* str16End = str16 + str16Len;
205     int utf8Len = 0;
206     while (str16 < str16End) {
207         int charLen = 0;
208         if (((*str16 & 0xFC00) == 0xD800) && ((str16 + 1) < str16End)
209             && ((*(str16 + 1) & 0xFC00) == 0xDC00)) {
210             // surrogate pairs are always 4 bytes.
211             charLen = UTF8_BYTES_FOUR;
212             str16 += UTF8_BYTES_TWO;
213         } else {
214             charLen = Utf32CodePointUtf8Length((char32_t)*str16++);
215         }
216 
217         if (utf8Len > (INT_MAX - charLen)) {
218             return -1;
219         }
220         utf8Len += charLen;
221     }
222     return utf8Len;
223 }
224 
225 // inner function and str16 is not null
Char16ToChar8(const char16_t * str16,size_t str16Len)226 char* JerryScriptNativeString::Char16ToChar8(const char16_t* str16, size_t str16Len)
227 {
228     char* str8 = nullptr;
229     int utf8Len = Utf16ToUtf8Length(str16, str16Len);
230     if (utf8Len < 0) {
231         return nullptr;
232     }
233 
234     // Allow for closing '\0'
235     utf8Len += 1;
236     str8 = reinterpret_cast<char*>(calloc(utf8Len, sizeof(char)));
237     if (str8 == nullptr) {
238         return nullptr;
239     }
240 
241     StrncpyStr16ToStr8(str16, str16Len, str8, utf8Len);
242     return str8;
243 }
244 
245 // inner function, utf8Str and utf16Str is not nullptr
StrncpyStr16ToStr8(const char16_t * utf16Str,size_t str16Len,char * utf8Str,size_t str8Len)246 void JerryScriptNativeString::StrncpyStr16ToStr8(const char16_t* utf16Str,
247     size_t str16Len, char* utf8Str, size_t str8Len)
248 {
249     constexpr int  shiftLeftSize = 10;
250     const char16_t* curUtf16 = utf16Str;
251     const char16_t* endUtf16 = utf16Str + str16Len;
252     char* cur = utf8Str;
253     while (curUtf16 < endUtf16) {
254         char32_t utf32;
255         // surrogate pairs
256         if (((*curUtf16 & 0xFC00) == 0xD800) && ((curUtf16 + 1) < endUtf16)
257             && (((*(curUtf16 + 1) & 0xFC00)) == 0xDC00)) {
258             utf32 = (*curUtf16++ - 0xD800) << shiftLeftSize;
259             utf32 |= *curUtf16++ - 0xDC00;
260             utf32 += 0x10000;
261         } else {
262             utf32 = *curUtf16++;
263         }
264         const size_t len = Utf32CodePointUtf8Length(utf32);
265         if (str8Len < len) {
266             break;
267         }
268 
269         Utf32CodePointToUtf8(reinterpret_cast<uint8_t*>(cur), utf32, len);
270         cur += len;
271         str8Len -= len;
272     }
273     *cur = '\0';
274 }
275 
Utf32CodePointToUtf8(uint8_t * dstP,char32_t srcChar,size_t bytes)276 void JerryScriptNativeString::Utf32CodePointToUtf8(uint8_t* dstP, char32_t srcChar, size_t bytes)
277 {
278     dstP += bytes;
279     if (bytes >= UTF8_BYTES_FOUR) {
280         *--dstP = (uint8_t)((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
281         srcChar >>= UTF8_OFFSET;
282     }
283 
284     if (bytes >= UTF8_BYTES_THREE) {
285         *--dstP = (uint8_t)((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
286         srcChar >>= UTF8_OFFSET;
287     }
288 
289     if (bytes >= UTF8_BYTES_TWO) {
290         *--dstP = (uint8_t)((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
291         srcChar >>= UTF8_OFFSET;
292     }
293 
294     if (bytes >= UTF8_BYTES_ONE) {
295         *--dstP = (uint8_t)(srcChar | UTF8_FIRST_BYTE_MARK[bytes]);
296     }
297 }
298 
CreateStringFromUtf16(const char16_t * value,size_t length)299 jerry_value_t JerryScriptNativeString::CreateStringFromUtf16(const char16_t* value, size_t length)
300 {
301     int utf8Len = Utf16ToUtf8Length(value, length);
302     const char* utf8Char = Char16ToChar8(value, length);
303     return jerry_create_string_sz_from_utf8((const unsigned char*)utf8Char, utf8Len);
304 }
305