1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "jerryscript_native_string.h"
16 #include "securec.h"
17
JerryScriptNativeString(JerryScriptNativeEngine * engine,const char * value,size_t length)18 JerryScriptNativeString::JerryScriptNativeString(JerryScriptNativeEngine* engine, const char* value, size_t length)
19 : JerryScriptNativeString(engine, jerry_create_string_sz_from_utf8((const unsigned char*)value, length))
20 {
21 }
22
JerryScriptNativeString(JerryScriptNativeEngine * engine,const char16_t * value,size_t length)23 JerryScriptNativeString::JerryScriptNativeString(JerryScriptNativeEngine* engine, const char16_t* value, size_t length)
24 : JerryScriptNativeString(engine, CreateStringFromUtf16(value, length))
25 {
26 }
27
JerryScriptNativeString(JerryScriptNativeEngine * engine,jerry_value_t value)28 JerryScriptNativeString::JerryScriptNativeString(JerryScriptNativeEngine* engine, jerry_value_t value)
29 : JerryScriptNativeValue(engine, value)
30 {
31 }
32
~JerryScriptNativeString()33 JerryScriptNativeString::~JerryScriptNativeString() {}
34
GetInterface(int interfaceId)35 void* JerryScriptNativeString::GetInterface(int interfaceId)
36 {
37 return (NativeString::INTERFACE_ID == interfaceId) ? (NativeString*)this : nullptr;
38 }
39
GetCString(char * buffer,size_t size,size_t * length)40 void JerryScriptNativeString::GetCString(char* buffer, size_t size, size_t* length)
41 {
42 if (buffer == nullptr || size == 0) {
43 *length = GetLength();
44 } else {
45 *length = jerry_string_to_utf8_char_buffer(value_, (jerry_char_t*)buffer, size);
46 }
47 }
48
GetLength()49 size_t JerryScriptNativeString::GetLength()
50 {
51 return jerry_get_utf8_string_size(value_);
52 }
53
GetCString16(char16_t * buffer,size_t size,size_t * length)54 void JerryScriptNativeString::GetCString16(char16_t* buffer, size_t size, size_t* length)
55 {
56 constexpr int maxStringLength = 1024;
57 size_t utf8Length = GetLength();
58 if (size == 0 || utf8Length == 0 || utf8Length > maxStringLength) {
59 return;
60 }
61 char *str = new char[utf8Length];
62 jerry_string_to_utf8_char_buffer(value_, (jerry_char_t*)str, utf8Length);
63 if (length != nullptr) {
64 *length = Utf8ToUtf16Length(str, utf8Length);
65 if (buffer != nullptr) {
66 memset_s(buffer, sizeof(char16_t) * size, 0x0, sizeof(char16_t) * size);
67 Utf8ToUtf16(str, strlen(str), buffer, size);
68 }
69 }
70 delete[] str;
71 }
72
Utf8ToUtf16(const char * utf8Str,size_t u8len,char16_t * u16str,size_t u16len)73 char16_t* JerryScriptNativeString::Utf8ToUtf16(const char* utf8Str, size_t u8len, char16_t* u16str, size_t u16len)
74 {
75 if (u16len == 0) {
76 return u16str;
77 }
78 const char* u8end = utf8Str + u8len;
79 const char* u8cur = utf8Str;
80 const char16_t* u16end = u16str + u16len - 1;
81 constexpr int offset = 10;
82 char16_t* u16cur = u16str;
83
84 while ((u8cur < u8end) && (u16cur < u16end)) {
85 size_t len = Utf8CodePointLen(*u8cur);
86 uint32_t codepoint = Utf8ToUtf32CodePoint(u8cur, len);
87 // Convert the UTF32 codepoint to one or more UTF16 codepoints
88 if (codepoint <= 0xFFFF) {
89 // Single UTF16 character
90 *u16cur++ = (char16_t)codepoint;
91 } else {
92 // Multiple UTF16 characters with surrogates
93 codepoint = codepoint - 0x10000;
94 *u16cur++ = (char16_t)((codepoint >> offset) + 0xD800);
95 if (u16cur >= u16end) {
96 // Ooops... not enough room for this surrogate pair.
97 return u16cur - 1;
98 }
99 *u16cur++ = (char16_t)((codepoint & 0x3FF) + 0xDC00);
100 }
101
102 u8cur += len;
103 }
104 return u16cur;
105 }
106
Utf8CodePointLen(uint8_t ch)107 size_t JerryScriptNativeString::Utf8CodePointLen(uint8_t ch)
108 {
109 constexpr int offset = 3;
110 return ((0xe5000000 >> ((ch >> offset) & 0x1e)) & offset) + 1;
111 }
112
Utf8ToUtf32CodePoint(const char * src,size_t length)113 uint32_t JerryScriptNativeString::Utf8ToUtf32CodePoint(const char* src, size_t length)
114 {
115 uint32_t unicode = 0;
116 constexpr int lengthSizeOne = 1;
117 constexpr int lengthSizeTwo = 2;
118 constexpr int lengthSizeThree = 3;
119 constexpr int lengthSizeFour = 4;
120 constexpr int offsetZero = 0;
121 constexpr int offsetOne = 1;
122 constexpr int offsetTwo = 2;
123 constexpr int offsetThree = 3;
124 switch (length) {
125 case lengthSizeOne:
126 return src[offsetZero];
127 case lengthSizeTwo:
128 unicode = src[offsetZero] & 0x1f;
129 Utf8ShiftAndMask(&unicode, src[offsetOne]);
130 return unicode;
131 case lengthSizeThree:
132 unicode = src[offsetZero] & 0x0f;
133 Utf8ShiftAndMask(&unicode, src[offsetOne]);
134 Utf8ShiftAndMask(&unicode, src[offsetTwo]);
135 return unicode;
136 case lengthSizeFour:
137 unicode = src[offsetZero] & 0x07;
138 Utf8ShiftAndMask(&unicode, src[offsetOne]);
139 Utf8ShiftAndMask(&unicode, src[offsetTwo]);
140 Utf8ShiftAndMask(&unicode, src[offsetThree]);
141 return unicode;
142 default:
143 return 0xffff;
144 }
145 }
146
Utf8ShiftAndMask(uint32_t * codePoint,const uint8_t byte)147 void JerryScriptNativeString::Utf8ShiftAndMask(uint32_t* codePoint, const uint8_t byte)
148 {
149 *codePoint <<= 6;
150 *codePoint |= 0x3F & byte;
151 }
152
Utf8ToUtf16Length(const char * str8,size_t str8Len)153 int JerryScriptNativeString::Utf8ToUtf16Length(const char* str8, size_t str8Len)
154 {
155 const char* str8end = str8 + str8Len;
156 int utf16len = 0;
157 while (str8 < str8end) {
158 utf16len++;
159 int u8charlen = Utf8CodePointLen(*str8);
160 if (str8 + u8charlen - 1 >= str8end) {
161 return -1;
162 }
163 uint32_t codepoint = Utf8ToUtf32CodePoint(str8, u8charlen);
164 if (codepoint > 0xFFFF) {
165 utf16len++; // this will be a surrogate pair in utf16
166 }
167 str8 += u8charlen;
168 }
169 if (str8 != str8end) {
170 return -1;
171 }
172 return utf16len;
173 }
174
Utf32CodePointUtf8Length(char32_t srcChar)175 size_t JerryScriptNativeString::Utf32CodePointUtf8Length(char32_t srcChar)
176 {
177 if (srcChar < ONE_BYTE_UTF8) {
178 return UTF8_BYTES_ONE;
179 } else if (srcChar < TWO_BYTES_UTF8) {
180 return UTF8_BYTES_TWO;
181 } else if (srcChar < THREE_BYTES_UTF8) {
182 if ((srcChar < UNICODE_RESERVED_START) || (srcChar > UNICODE_RESERVED_END)) {
183 return UTF8_BYTES_THREE;
184 } else {
185 // Surrogates are invalid UTF-32 characters.
186 return 0;
187 }
188 } else if (srcChar <= UNICODE_MAX_NUM) {
189 // Max code point for Unicode is 0x0010FFFF.
190 return UTF8_BYTES_FOUR;
191 } else {
192 // Invalid UTF-32 character.
193 return 0;
194 }
195 }
196
197 // get the length of utf8 from utf16
Utf16ToUtf8Length(const char16_t * str16,size_t str16Len)198 int JerryScriptNativeString::Utf16ToUtf8Length(const char16_t* str16, size_t str16Len)
199 {
200 if (str16 == nullptr || str16Len == 0) {
201 return -1;
202 }
203
204 const char16_t* str16End = str16 + str16Len;
205 int utf8Len = 0;
206 while (str16 < str16End) {
207 int charLen = 0;
208 if (((*str16 & 0xFC00) == 0xD800) && ((str16 + 1) < str16End)
209 && ((*(str16 + 1) & 0xFC00) == 0xDC00)) {
210 // surrogate pairs are always 4 bytes.
211 charLen = UTF8_BYTES_FOUR;
212 str16 += UTF8_BYTES_TWO;
213 } else {
214 charLen = Utf32CodePointUtf8Length((char32_t)*str16++);
215 }
216
217 if (utf8Len > (INT_MAX - charLen)) {
218 return -1;
219 }
220 utf8Len += charLen;
221 }
222 return utf8Len;
223 }
224
225 // inner function and str16 is not null
Char16ToChar8(const char16_t * str16,size_t str16Len)226 char* JerryScriptNativeString::Char16ToChar8(const char16_t* str16, size_t str16Len)
227 {
228 char* str8 = nullptr;
229 int utf8Len = Utf16ToUtf8Length(str16, str16Len);
230 if (utf8Len < 0) {
231 return nullptr;
232 }
233
234 // Allow for closing '\0'
235 utf8Len += 1;
236 str8 = reinterpret_cast<char*>(calloc(utf8Len, sizeof(char)));
237 if (str8 == nullptr) {
238 return nullptr;
239 }
240
241 StrncpyStr16ToStr8(str16, str16Len, str8, utf8Len);
242 return str8;
243 }
244
245 // inner function, utf8Str and utf16Str is not nullptr
StrncpyStr16ToStr8(const char16_t * utf16Str,size_t str16Len,char * utf8Str,size_t str8Len)246 void JerryScriptNativeString::StrncpyStr16ToStr8(const char16_t* utf16Str,
247 size_t str16Len, char* utf8Str, size_t str8Len)
248 {
249 constexpr int shiftLeftSize = 10;
250 const char16_t* curUtf16 = utf16Str;
251 const char16_t* endUtf16 = utf16Str + str16Len;
252 char* cur = utf8Str;
253 while (curUtf16 < endUtf16) {
254 char32_t utf32;
255 // surrogate pairs
256 if (((*curUtf16 & 0xFC00) == 0xD800) && ((curUtf16 + 1) < endUtf16)
257 && (((*(curUtf16 + 1) & 0xFC00)) == 0xDC00)) {
258 utf32 = (*curUtf16++ - 0xD800) << shiftLeftSize;
259 utf32 |= *curUtf16++ - 0xDC00;
260 utf32 += 0x10000;
261 } else {
262 utf32 = *curUtf16++;
263 }
264 const size_t len = Utf32CodePointUtf8Length(utf32);
265 if (str8Len < len) {
266 break;
267 }
268
269 Utf32CodePointToUtf8(reinterpret_cast<uint8_t*>(cur), utf32, len);
270 cur += len;
271 str8Len -= len;
272 }
273 *cur = '\0';
274 }
275
Utf32CodePointToUtf8(uint8_t * dstP,char32_t srcChar,size_t bytes)276 void JerryScriptNativeString::Utf32CodePointToUtf8(uint8_t* dstP, char32_t srcChar, size_t bytes)
277 {
278 dstP += bytes;
279 if (bytes >= UTF8_BYTES_FOUR) {
280 *--dstP = (uint8_t)((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
281 srcChar >>= UTF8_OFFSET;
282 }
283
284 if (bytes >= UTF8_BYTES_THREE) {
285 *--dstP = (uint8_t)((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
286 srcChar >>= UTF8_OFFSET;
287 }
288
289 if (bytes >= UTF8_BYTES_TWO) {
290 *--dstP = (uint8_t)((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
291 srcChar >>= UTF8_OFFSET;
292 }
293
294 if (bytes >= UTF8_BYTES_ONE) {
295 *--dstP = (uint8_t)(srcChar | UTF8_FIRST_BYTE_MARK[bytes]);
296 }
297 }
298
CreateStringFromUtf16(const char16_t * value,size_t length)299 jerry_value_t JerryScriptNativeString::CreateStringFromUtf16(const char16_t* value, size_t length)
300 {
301 int utf8Len = Utf16ToUtf8Length(value, length);
302 const char* utf8Char = Char16ToChar8(value, length);
303 return jerry_create_string_sz_from_utf8((const unsigned char*)utf8Char, utf8Len);
304 }
305