1 /* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecma-alloc.h"
17 #include "ecma-conversion.h"
18 #include "ecma-gc.h"
19 #include "ecma-globals.h"
20 #include "ecma-helpers.h"
21 #include "jrt.h"
22 #include "jrt-libc-includes.h"
23 #include "lit-char-helpers.h"
24 #include "lit-magic-strings.h"
25
26 /** \addtogroup ecma ECMA
27 * @{
28 *
29 * \addtogroup ecmahelpers Helpers for operations with ECMA data types
30 * @{
31 */
32
33 JERRY_STATIC_ASSERT (ECMA_STRING_CONTAINER_MASK >= ECMA_STRING_CONTAINER__MAX,
34 ecma_string_container_types_must_be_lower_than_the_container_mask);
35
36 JERRY_STATIC_ASSERT ((ECMA_STRING_MAX_REF | ECMA_STRING_CONTAINER_MASK | ECMA_STATIC_STRING_FLAG) == UINT32_MAX,
37 ecma_string_ref_and_container_fields_should_fill_the_32_bit_field);
38
39 JERRY_STATIC_ASSERT (ECMA_STRING_NOT_ARRAY_INDEX == UINT32_MAX,
40 ecma_string_not_array_index_must_be_equal_to_uint32_max);
41
42 JERRY_STATIC_ASSERT ((ECMA_TYPE_DIRECT_STRING & 0x1) != 0,
43 ecma_type_direct_string_must_be_odd_number);
44
45 JERRY_STATIC_ASSERT (LIT_MAGIC_STRING__COUNT <= ECMA_DIRECT_STRING_MAX_IMM,
46 all_magic_strings_must_be_encoded_as_direct_string);
47
48 JERRY_STATIC_ASSERT ((int) ECMA_DIRECT_STRING_UINT == (int) ECMA_STRING_CONTAINER_UINT32_IN_DESC,
49 ecma_direct_and_container_types_must_match);
50
51 JERRY_STATIC_ASSERT (ECMA_PROPERTY_NAME_TYPE_SHIFT > ECMA_VALUE_SHIFT,
52 ecma_property_name_type_shift_must_be_greater_than_ecma_value_shift);
53
54 JERRY_STATIC_ASSERT (sizeof (ecma_stringbuilder_header_t) <= sizeof (ecma_ascii_string_t),
55 ecma_stringbuilder_header_must_not_be_larger_than_ecma_ascii_string);
56
57 /**
58 * Convert a string to an unsigned 32 bit value if possible
59 *
60 * @return true if the conversion is successful
61 * false otherwise
62 */
63 static bool
ecma_string_to_array_index(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size,uint32_t * result_p)64 ecma_string_to_array_index (const lit_utf8_byte_t *string_p, /**< utf-8 string */
65 lit_utf8_size_t string_size, /**< string size */
66 uint32_t *result_p) /**< [out] converted value */
67 {
68 JERRY_ASSERT (string_size > 0 && *string_p >= LIT_CHAR_0 && *string_p <= LIT_CHAR_9);
69
70 if (*string_p == LIT_CHAR_0)
71 {
72 *result_p = 0;
73 return (string_size == 1);
74 }
75
76 if (string_size > ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
77 {
78 return false;
79 }
80
81 uint32_t index = 0;
82 const lit_utf8_byte_t *string_end_p = string_p + string_size;
83
84 if (string_size == ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
85 {
86 string_end_p--;
87 }
88
89 do
90 {
91 if (*string_p > LIT_CHAR_9 || *string_p < LIT_CHAR_0)
92 {
93 return false;
94 }
95
96 index = (index * 10) + (uint32_t) (*string_p++ - LIT_CHAR_0);
97 }
98 while (string_p < string_end_p);
99
100 if (string_size < ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
101 {
102 *result_p = index;
103 return true;
104 }
105
106 /* Overflow must be checked as well when size is
107 * equal to ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32. */
108 if (*string_p > LIT_CHAR_9
109 || *string_p < LIT_CHAR_0
110 || index > (UINT32_MAX / 10)
111 || (index == (UINT32_MAX / 10) && *string_p > LIT_CHAR_5))
112 {
113 return false;
114 }
115
116 *result_p = (index * 10) + (uint32_t) (*string_p - LIT_CHAR_0);
117 return true;
118 } /* ecma_string_to_array_index */
119
120 /**
121 * Returns the characters and size of a string.
122 *
123 * Note:
124 * UINT type is not supported
125 *
126 * @return byte array start - if the byte array of a string is available
127 * NULL - otherwise
128 */
129 static const lit_utf8_byte_t *
ecma_string_get_chars_fast(const ecma_string_t * string_p,lit_utf8_size_t * size_p)130 ecma_string_get_chars_fast (const ecma_string_t *string_p, /**< ecma-string */
131 lit_utf8_size_t *size_p) /**< [out] size of the ecma string */
132 {
133 if (ECMA_IS_DIRECT_STRING (string_p))
134 {
135 if (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC)
136 {
137 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
138
139 if (id >= LIT_MAGIC_STRING__COUNT)
140 {
141 id -= LIT_MAGIC_STRING__COUNT;
142
143 *size_p = lit_get_magic_string_ex_size (id);
144 return lit_get_magic_string_ex_utf8 (id);
145 }
146
147 *size_p = lit_get_magic_string_size (id);
148 return lit_get_magic_string_utf8 (id);
149 }
150 }
151
152 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
153
154 switch (ECMA_STRING_GET_CONTAINER (string_p))
155 {
156 case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
157 {
158 *size_p = ((ecma_utf8_string_t *) string_p)->size;
159 return ECMA_UTF8_STRING_GET_BUFFER (string_p);
160 }
161 case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
162 {
163 *size_p = ((ecma_long_utf8_string_t *) string_p)->size;
164 return ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p);
165 }
166 case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
167 {
168 *size_p = ((ecma_ascii_string_t *) string_p)->size;
169 return ECMA_ASCII_STRING_GET_BUFFER (string_p);
170 }
171 default:
172 {
173 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
174
175 lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
176 *size_p = lit_get_magic_string_ex_size (id);
177 return lit_get_magic_string_ex_utf8 (id);
178 }
179 }
180 } /* ecma_string_get_chars_fast */
181
182 /**
183 * Allocate new ecma-string and fill it with reference to ECMA magic string
184 *
185 * @return pointer to ecma-string descriptor
186 */
187 static ecma_string_t *
ecma_new_ecma_string_from_magic_string_ex_id(lit_magic_string_ex_id_t id)188 ecma_new_ecma_string_from_magic_string_ex_id (lit_magic_string_ex_id_t id) /**< identifier of externl magic string */
189 {
190 JERRY_ASSERT (id < lit_get_magic_string_ex_count ());
191
192 uintptr_t string_id = (uintptr_t) (id + LIT_MAGIC_STRING__COUNT);
193
194 if (JERRY_LIKELY (string_id <= ECMA_DIRECT_STRING_MAX_IMM))
195 {
196 return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_MAGIC, string_id);
197 }
198
199 ecma_string_t *string_desc_p = ecma_alloc_string ();
200
201 string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_MAGIC_STRING_EX | ECMA_STRING_REF_ONE;
202 string_desc_p->u.magic_string_ex_id = id + LIT_MAGIC_STRING__COUNT;
203
204 return string_desc_p;
205 } /* ecma_new_ecma_string_from_magic_string_ex_id */
206
207 #if ENABLED (JERRY_ES2015)
208 /**
209 * Allocate new ecma-string and fill it with reference to the symbol descriptor
210 *
211 * @return pointer to ecma-string descriptor
212 */
213 ecma_string_t *
ecma_new_symbol_from_descriptor_string(ecma_value_t string_desc)214 ecma_new_symbol_from_descriptor_string (ecma_value_t string_desc) /**< ecma-string */
215 {
216 JERRY_ASSERT (!ecma_is_value_symbol (string_desc));
217
218 ecma_extended_string_t *symbol_p = ecma_alloc_extended_string ();
219 symbol_p->header.refs_and_container = ECMA_STRING_REF_ONE | ECMA_STRING_CONTAINER_SYMBOL;
220 symbol_p->u.symbol_descriptor = string_desc;
221 symbol_p->header.u.hash = (lit_string_hash_t) (((uintptr_t) symbol_p) >> ECMA_SYMBOL_HASH_SHIFT);
222 JERRY_ASSERT ((symbol_p->header.u.hash & ECMA_GLOBAL_SYMBOL_FLAG) == 0);
223
224 return (ecma_string_t *) symbol_p;
225 } /* ecma_new_symbol_from_descriptor_string */
226
227 /**
228 * Check whether an ecma-string contains an ecma-symbol
229 *
230 * @return true - if the ecma-string contains an ecma-symbol
231 * false - otherwise
232 */
233 bool
ecma_prop_name_is_symbol(ecma_string_t * string_p)234 ecma_prop_name_is_symbol (ecma_string_t *string_p) /**< ecma-string */
235 {
236 JERRY_ASSERT (string_p != NULL);
237
238 return (!ECMA_IS_DIRECT_STRING (string_p)
239 && ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_SYMBOL);
240 } /* ecma_prop_name_is_symbol */
241 #endif /* ENABLED (JERRY_ES2015) */
242
243 /**
244 * Allocate new UTF8 ecma-string and fill it with characters from the given utf8 buffer
245 *
246 * @return pointer to ecma-string descriptor
247 */
248 static inline ecma_string_t * JERRY_ATTR_ALWAYS_INLINE
ecma_new_ecma_string_from_utf8_buffer(lit_utf8_size_t length,lit_utf8_size_t size,lit_utf8_byte_t ** data_p)249 ecma_new_ecma_string_from_utf8_buffer (lit_utf8_size_t length, /**< length of the buffer */
250 lit_utf8_size_t size, /**< size of the buffer */
251 lit_utf8_byte_t **data_p) /**< [out] pointer to the start of the string buffer */
252 {
253 if (JERRY_LIKELY (size <= UINT16_MAX))
254 {
255 if (JERRY_LIKELY (length == size))
256 {
257 ecma_ascii_string_t *string_desc_p;
258 string_desc_p = (ecma_ascii_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_ascii_string_t));
259 string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
260 string_desc_p->size = (uint16_t) size;
261
262 *data_p = ECMA_ASCII_STRING_GET_BUFFER (string_desc_p);
263 return (ecma_string_t *) string_desc_p;
264 }
265
266 JERRY_ASSERT (length < size);
267
268 ecma_utf8_string_t *string_desc_p;
269 string_desc_p = (ecma_utf8_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_utf8_string_t));
270 string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
271 string_desc_p->size = (uint16_t) size;
272 string_desc_p->length = (uint16_t) length;
273
274 *data_p = ECMA_UTF8_STRING_GET_BUFFER (string_desc_p);
275 return (ecma_string_t *) string_desc_p;
276 }
277
278 ecma_long_utf8_string_t *string_desc_p;
279 string_desc_p = (ecma_long_utf8_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_long_utf8_string_t));
280 string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
281 string_desc_p->size = size;
282 string_desc_p->length = length;
283
284 *data_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string_desc_p);
285 return (ecma_string_t *) string_desc_p;
286 } /* ecma_new_ecma_string_from_utf8_buffer */
287
288 /**
289 * Checks whether a string has a special representation, that is, the string is either a magic string,
290 * an external magic string, or an uint32 number, and creates an ecma string using the special representation,
291 * if available.
292 *
293 * @return pointer to ecma string with the special representation
294 * NULL, if there is no special representation for the string
295 */
296 ecma_string_t *
ecma_find_special_string(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size)297 ecma_find_special_string (const lit_utf8_byte_t *string_p, /**< utf8 string */
298 lit_utf8_size_t string_size) /**< string size */
299 {
300 JERRY_ASSERT (string_p != NULL || string_size == 0);
301 lit_magic_string_id_t magic_string_id = lit_is_utf8_string_magic (string_p, string_size);
302
303 if (magic_string_id != LIT_MAGIC_STRING__COUNT)
304 {
305 return ecma_get_magic_string (magic_string_id);
306 }
307
308 JERRY_ASSERT (string_size > 0);
309
310 if (*string_p >= LIT_CHAR_0 && *string_p <= LIT_CHAR_9)
311 {
312 uint32_t array_index;
313
314 if (ecma_string_to_array_index (string_p, string_size, &array_index))
315 {
316 return ecma_new_ecma_string_from_uint32 (array_index);
317 }
318 }
319
320 if (lit_get_magic_string_ex_count () > 0)
321 {
322 lit_magic_string_ex_id_t magic_string_ex_id = lit_is_ex_utf8_string_magic (string_p, string_size);
323
324 if (magic_string_ex_id < lit_get_magic_string_ex_count ())
325 {
326 return ecma_new_ecma_string_from_magic_string_ex_id (magic_string_ex_id);
327 }
328 }
329
330 return NULL;
331 } /* ecma_find_special_string */
332
333 /**
334 * Allocate new ecma-string and fill it with characters from the utf8 string
335 *
336 * @return pointer to ecma-string descriptor
337 */
338 ecma_string_t *
ecma_new_ecma_string_from_utf8(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size)339 ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 string */
340 lit_utf8_size_t string_size) /**< string size */
341 {
342 JERRY_ASSERT (string_p != NULL || string_size == 0);
343 JERRY_ASSERT (lit_is_valid_cesu8_string (string_p, string_size));
344
345 ecma_string_t *string_desc_p = ecma_find_special_string (string_p, string_size);
346
347 if (string_desc_p != NULL)
348 {
349 return string_desc_p;
350 }
351
352 lit_utf8_byte_t *data_p;
353 string_desc_p = ecma_new_ecma_string_from_utf8_buffer (lit_utf8_string_length (string_p, string_size),
354 string_size,
355 &data_p);
356
357 string_desc_p->u.hash = lit_utf8_string_calc_hash (string_p, string_size);
358 memcpy (data_p, string_p, string_size);
359
360 return string_desc_p;
361 } /* ecma_new_ecma_string_from_utf8 */
362
363 static ecma_long_utf8_string_t g_literalStringCache;
364
ecma_new_nonref_ecma_string_from_utf8(const lit_utf8_byte_t * string_p,lit_utf8_size_t size)365 ecma_string_t * ecma_new_nonref_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t size)
366 {
367 ecma_length_t length = lit_utf8_string_length (string_p, size);
368
369 if (JERRY_LIKELY (size <= UINT16_MAX))
370 {
371 if (JERRY_LIKELY (length != size))
372 {
373 JERRY_ASSERT (length < size);
374 ecma_utf8_string_t *string_desc_p;
375 string_desc_p = (ecma_utf8_string_t *)&g_literalStringCache;
376 string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
377 string_desc_p->header.u.hash = lit_utf8_string_calc_hash (string_p, size);
378 string_desc_p->size = (uint16_t) size;
379 string_desc_p->length = (uint16_t) length;
380
381 return (ecma_string_t *) string_desc_p;
382 }
383
384 ecma_ascii_string_t *string_desc_p;
385 string_desc_p = (ecma_ascii_string_t *)&g_literalStringCache;
386 string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
387 string_desc_p->header.u.hash = lit_utf8_string_calc_hash (string_p, size);
388 string_desc_p->size = (uint16_t) size;
389
390 return (ecma_string_t *) string_desc_p;
391 }
392
393 ecma_long_utf8_string_t *string_desc_p;
394 string_desc_p = (ecma_long_utf8_string_t *)&g_literalStringCache;
395 string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
396 string_desc_p->header.u.hash = lit_utf8_string_calc_hash (string_p, size);
397 string_desc_p->size = size;
398 string_desc_p->length = length;
399
400 return (ecma_string_t *) string_desc_p;
401 }
402
403 /**
404 * Allocate a new ecma-string and initialize it from the utf8 string argument.
405 * All 4-bytes long unicode sequences are converted into two 3-bytes long sequences.
406 *
407 * @return pointer to ecma-string descriptor
408 */
409 ecma_string_t *
ecma_new_ecma_string_from_utf8_converted_to_cesu8(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size)410 ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string_p, /**< utf-8 string */
411 lit_utf8_size_t string_size) /**< utf-8 string size */
412 {
413 JERRY_ASSERT (string_p != NULL || string_size == 0);
414
415 ecma_length_t converted_string_length = 0;
416 lit_utf8_size_t converted_string_size = 0;
417 lit_utf8_size_t pos = 0;
418
419 /* Calculate the required length and size information of the converted cesu-8 encoded string */
420 while (pos < string_size)
421 {
422 if ((string_p[pos] & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
423 {
424 pos++;
425 }
426 else if ((string_p[pos] & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
427 {
428 pos += 2;
429 }
430 else if ((string_p[pos] & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
431 {
432 pos += 3;
433 }
434 else
435 {
436 JERRY_ASSERT ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER);
437 pos += 4;
438 converted_string_size += 2;
439 converted_string_length++;
440 }
441
442 converted_string_length++;
443 }
444
445 JERRY_ASSERT (pos == string_size);
446
447 if (converted_string_size == 0)
448 {
449 return ecma_new_ecma_string_from_utf8 (string_p, string_size);
450 }
451
452 converted_string_size += string_size;
453
454 JERRY_ASSERT (lit_is_valid_utf8_string (string_p, string_size));
455
456 lit_utf8_byte_t *data_p;
457 ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8_buffer (converted_string_length,
458 converted_string_size,
459 &data_p);
460
461 const lit_utf8_byte_t *const begin_data_p = data_p;
462 pos = 0;
463
464 while (pos < string_size)
465 {
466 if ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
467 {
468 /* Processing 4 byte unicode sequence. Always converted to two 3 byte long sequence. */
469 lit_four_byte_utf8_char_to_cesu8 (data_p, string_p + pos);
470 data_p += 3 * 2;
471 pos += 4;
472 }
473 else
474 {
475 *data_p++ = string_p[pos++];
476 }
477 }
478
479 JERRY_ASSERT (pos == string_size);
480
481 string_desc_p->u.hash = lit_utf8_string_calc_hash (begin_data_p, converted_string_size);
482
483 return (ecma_string_t *) string_desc_p;
484 } /* ecma_new_ecma_string_from_utf8_converted_to_cesu8 */
485
486 /**
487 * Allocate new ecma-string and fill it with cesu-8 character which represents specified code unit
488 *
489 * @return pointer to ecma-string descriptor
490 */
491 ecma_string_t *
ecma_new_ecma_string_from_code_unit(ecma_char_t code_unit)492 ecma_new_ecma_string_from_code_unit (ecma_char_t code_unit) /**< code unit */
493 {
494 lit_utf8_byte_t lit_utf8_bytes[LIT_UTF8_MAX_BYTES_IN_CODE_UNIT];
495 lit_utf8_size_t bytes_size = lit_code_unit_to_utf8 (code_unit, lit_utf8_bytes);
496
497 return ecma_new_ecma_string_from_utf8 (lit_utf8_bytes, bytes_size);
498 } /* ecma_new_ecma_string_from_code_unit */
499
500 #if ENABLED (JERRY_ES2015)
501
502 /**
503 * Allocate new ecma-string and fill it with cesu-8 character which represents specified code units
504 *
505 * @return pointer to ecma-string descriptor
506 */
507 ecma_string_t *
ecma_new_ecma_string_from_code_units(ecma_char_t first_code_unit,ecma_char_t second_code_unit)508 ecma_new_ecma_string_from_code_units (ecma_char_t first_code_unit, /**< code unit */
509 ecma_char_t second_code_unit) /**< code unit */
510 {
511 lit_utf8_byte_t lit_utf8_bytes[2 * LIT_UTF8_MAX_BYTES_IN_CODE_UNIT];
512 lit_utf8_size_t bytes_size = lit_code_unit_to_utf8 (first_code_unit, lit_utf8_bytes);
513 bytes_size += lit_code_unit_to_utf8 (second_code_unit, lit_utf8_bytes + bytes_size);
514
515 return ecma_new_ecma_string_from_utf8 (lit_utf8_bytes, bytes_size);
516 } /* ecma_new_ecma_string_from_code_units */
517
518 #endif /* ENABLED (JERRY_ES2015) */
519
520 /**
521 * Allocate new ecma-string and fill it with ecma-number
522 *
523 * Note: the number cannot be represented as direct string
524 *
525 * @return pointer to ecma-string descriptor
526 */
527 ecma_string_t *
ecma_new_non_direct_string_from_uint32(uint32_t uint32_number)528 ecma_new_non_direct_string_from_uint32 (uint32_t uint32_number) /**< uint32 value of the string */
529 {
530 JERRY_ASSERT (uint32_number > ECMA_DIRECT_STRING_MAX_IMM);
531
532 ecma_string_t *string_p = ecma_alloc_string ();
533
534 string_p->refs_and_container = ECMA_STRING_CONTAINER_UINT32_IN_DESC | ECMA_STRING_REF_ONE;
535 string_p->u.uint32_number = uint32_number;
536
537 return string_p;
538 } /* ecma_new_non_direct_string_from_uint32 */
539
540 /**
541 * Allocate new ecma-string and fill it with ecma-number
542 *
543 * @return pointer to ecma-string descriptor
544 */
545 ecma_string_t *
ecma_new_ecma_string_from_uint32(uint32_t uint32_number)546 ecma_new_ecma_string_from_uint32 (uint32_t uint32_number) /**< uint32 value of the string */
547 {
548 if (JERRY_LIKELY (uint32_number <= ECMA_DIRECT_STRING_MAX_IMM))
549 {
550 return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_UINT, (uintptr_t) uint32_number);
551 }
552
553 return ecma_new_non_direct_string_from_uint32 (uint32_number);
554 } /* ecma_new_ecma_string_from_uint32 */
555
556 /**
557 * Returns the constant assigned to the uint32 number.
558 *
559 * Note:
560 * Calling ecma_deref_ecma_string on the returned pointer is optional.
561 *
562 * @return pointer to ecma-string descriptor
563 */
564 ecma_string_t *
ecma_get_ecma_string_from_uint32(uint32_t uint32_number)565 ecma_get_ecma_string_from_uint32 (uint32_t uint32_number) /**< input number */
566 {
567 JERRY_ASSERT (uint32_number <= ECMA_DIRECT_STRING_MAX_IMM);
568
569 return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_UINT, (uintptr_t) uint32_number);
570 } /* ecma_get_ecma_string_from_uint32 */
571
572 /**
573 * Allocate new ecma-string and fill it with ecma-number
574 *
575 * @return pointer to ecma-string descriptor
576 */
577 ecma_string_t *
ecma_new_ecma_string_from_number(ecma_number_t num)578 ecma_new_ecma_string_from_number (ecma_number_t num) /**< ecma-number */
579 {
580 uint32_t uint32_num = ecma_number_to_uint32 (num);
581 if (num == ((ecma_number_t) uint32_num))
582 {
583 return ecma_new_ecma_string_from_uint32 (uint32_num);
584 }
585
586 if (ecma_number_is_nan (num))
587 {
588 return ecma_get_magic_string (LIT_MAGIC_STRING_NAN);
589 }
590
591 if (ecma_number_is_infinity (num))
592 {
593 lit_magic_string_id_t id = (ecma_number_is_negative (num) ? LIT_MAGIC_STRING_NEGATIVE_INFINITY_UL
594 : LIT_MAGIC_STRING_INFINITY_UL);
595 return ecma_get_magic_string (id);
596 }
597
598 lit_utf8_byte_t str_buf[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
599 lit_utf8_size_t str_size = ecma_number_to_utf8_string (num, str_buf, sizeof (str_buf));
600
601 JERRY_ASSERT (str_size > 0);
602 #ifndef JERRY_NDEBUG
603 JERRY_ASSERT (lit_is_utf8_string_magic (str_buf, str_size) == LIT_MAGIC_STRING__COUNT
604 && lit_is_ex_utf8_string_magic (str_buf, str_size) == lit_get_magic_string_ex_count ());
605 #endif /* !JERRY_NDEBUG */
606
607 lit_utf8_byte_t *data_p;
608 ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8_buffer (lit_utf8_string_length (str_buf, str_size),
609 str_size,
610 &data_p);
611
612 string_desc_p->u.hash = lit_utf8_string_calc_hash (str_buf, str_size);
613 memcpy (data_p, str_buf, str_size);
614
615 return string_desc_p;
616 } /* ecma_new_ecma_string_from_number */
617
618 /**
619 * Returns the constant assigned to the magic string id.
620 *
621 * Note:
622 * Calling ecma_deref_ecma_string on the returned pointer is optional.
623 *
624 * @return pointer to ecma-string descriptor
625 */
626 extern inline ecma_string_t * JERRY_ATTR_ALWAYS_INLINE
ecma_get_magic_string(lit_magic_string_id_t id)627 ecma_get_magic_string (lit_magic_string_id_t id) /**< identifier of magic string */
628 {
629 JERRY_ASSERT (id < LIT_MAGIC_STRING__COUNT);
630 return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_MAGIC, (uintptr_t) id);
631 } /* ecma_get_magic_string */
632
633 /**
634 * Append a cesu8 string after an ecma-string
635 *
636 * Note:
637 * The string1_p argument is freed. If it needs to be preserved,
638 * call ecma_ref_ecma_string with string1_p before the call.
639 *
640 * @return concatenation of an ecma-string and a cesu8 string
641 */
642 ecma_string_t *
ecma_append_chars_to_string(ecma_string_t * string1_p,const lit_utf8_byte_t * cesu8_string2_p,lit_utf8_size_t cesu8_string2_size,lit_utf8_size_t cesu8_string2_length)643 ecma_append_chars_to_string (ecma_string_t *string1_p, /**< base ecma-string */
644 const lit_utf8_byte_t *cesu8_string2_p, /**< characters to be appended */
645 lit_utf8_size_t cesu8_string2_size, /**< byte size of cesu8_string2_p */
646 lit_utf8_size_t cesu8_string2_length) /**< character length of cesu8_string2_p */
647 {
648 JERRY_ASSERT (string1_p != NULL && cesu8_string2_size > 0 && cesu8_string2_length > 0);
649
650 if (JERRY_UNLIKELY (ecma_string_is_empty (string1_p)))
651 {
652 return ecma_new_ecma_string_from_utf8 (cesu8_string2_p, cesu8_string2_size);
653 }
654
655 lit_utf8_size_t cesu8_string1_size;
656 lit_utf8_size_t cesu8_string1_length;
657 uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
658 lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
659
660 const lit_utf8_byte_t *cesu8_string1_p = ecma_string_get_chars (string1_p,
661 &cesu8_string1_size,
662 &cesu8_string1_length,
663 uint32_to_string_buffer,
664 &flags);
665
666 JERRY_ASSERT (!(flags & ECMA_STRING_FLAG_MUST_BE_FREED));
667 JERRY_ASSERT (cesu8_string1_length > 0);
668 JERRY_ASSERT (cesu8_string1_length <= cesu8_string1_size);
669
670 lit_utf8_size_t new_size = cesu8_string1_size + cesu8_string2_size;
671
672 /* Poor man's carry flag check: it is impossible to allocate this large string. */
673 if (new_size < (cesu8_string1_size | cesu8_string2_size))
674 {
675 jerry_fatal (ERR_OUT_OF_MEMORY);
676 }
677
678 lit_magic_string_id_t magic_string_id;
679 magic_string_id = lit_is_utf8_string_pair_magic (cesu8_string1_p,
680 cesu8_string1_size,
681 cesu8_string2_p,
682 cesu8_string2_size);
683
684 if (magic_string_id != LIT_MAGIC_STRING__COUNT)
685 {
686 ecma_deref_ecma_string (string1_p);
687 return ecma_get_magic_string (magic_string_id);
688 }
689
690 if ((flags & ECMA_STRING_FLAG_IS_UINT32) && new_size <= ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
691 {
692 memcpy (uint32_to_string_buffer + cesu8_string1_size, cesu8_string2_p, cesu8_string2_size);
693
694 uint32_t array_index;
695
696 if (ecma_string_to_array_index (uint32_to_string_buffer, new_size, &array_index))
697 {
698 ecma_deref_ecma_string (string1_p);
699 return ecma_new_ecma_string_from_uint32 (array_index);
700 }
701 }
702
703 if (lit_get_magic_string_ex_count () > 0)
704 {
705 lit_magic_string_ex_id_t magic_string_ex_id;
706 magic_string_ex_id = lit_is_ex_utf8_string_pair_magic (cesu8_string1_p,
707 cesu8_string1_size,
708 cesu8_string2_p,
709 cesu8_string2_size);
710
711 if (magic_string_ex_id < lit_get_magic_string_ex_count ())
712 {
713 ecma_deref_ecma_string (string1_p);
714 return ecma_new_ecma_string_from_magic_string_ex_id (magic_string_ex_id);
715 }
716 }
717
718 lit_utf8_byte_t *data_p;
719 ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8_buffer (cesu8_string1_length + cesu8_string2_length,
720 new_size,
721 &data_p);
722
723 lit_string_hash_t hash_start;
724
725 if (JERRY_UNLIKELY (flags & ECMA_STRING_FLAG_REHASH_NEEDED))
726 {
727 hash_start = lit_utf8_string_calc_hash (cesu8_string1_p, cesu8_string1_size);
728 }
729 else
730 {
731 JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (string1_p));
732 hash_start = string1_p->u.hash;
733 }
734
735 string_desc_p->u.hash = lit_utf8_string_hash_combine (hash_start, cesu8_string2_p, cesu8_string2_size);
736
737 memcpy (data_p, cesu8_string1_p, cesu8_string1_size);
738 memcpy (data_p + cesu8_string1_size, cesu8_string2_p, cesu8_string2_size);
739
740 ecma_deref_ecma_string (string1_p);
741 return (ecma_string_t *) string_desc_p;
742 } /* ecma_append_chars_to_string */
743
744 /**
745 * Concatenate ecma-strings
746 *
747 * Note:
748 * The string1_p argument is freed. If it needs to be preserved,
749 * call ecma_ref_ecma_string with string1_p before the call.
750 *
751 * @return concatenation of two ecma-strings
752 */
753 ecma_string_t *
ecma_concat_ecma_strings(ecma_string_t * string1_p,ecma_string_t * string2_p)754 ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
755 ecma_string_t *string2_p) /**< second ecma-string */
756 {
757 JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
758
759 if (JERRY_UNLIKELY (ecma_string_is_empty (string1_p)))
760 {
761 ecma_ref_ecma_string (string2_p);
762 return string2_p;
763 }
764 else if (JERRY_UNLIKELY (ecma_string_is_empty (string2_p)))
765 {
766 return string1_p;
767 }
768
769 lit_utf8_size_t cesu8_string2_size;
770 lit_utf8_size_t cesu8_string2_length;
771 lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
772 uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
773
774 const lit_utf8_byte_t *cesu8_string2_p = ecma_string_get_chars (string2_p,
775 &cesu8_string2_size,
776 &cesu8_string2_length,
777 uint32_to_string_buffer,
778 &flags);
779
780 JERRY_ASSERT (cesu8_string2_p != NULL);
781
782 ecma_string_t *result_p = ecma_append_chars_to_string (string1_p,
783 cesu8_string2_p,
784 cesu8_string2_size,
785 cesu8_string2_length);
786
787 JERRY_ASSERT (!(flags & ECMA_STRING_FLAG_MUST_BE_FREED));
788
789 return result_p;
790 } /* ecma_concat_ecma_strings */
791
792 /**
793 * Increase reference counter of ecma-string.
794 */
795 void
ecma_ref_ecma_string(ecma_string_t * string_p)796 ecma_ref_ecma_string (ecma_string_t *string_p) /**< string descriptor */
797 {
798 JERRY_ASSERT (string_p != NULL);
799
800 if (ECMA_IS_DIRECT_STRING (string_p))
801 {
802 return;
803 }
804
805 #ifdef JERRY_NDEBUG
806 if (ECMA_STRING_IS_STATIC (string_p))
807 {
808 return;
809 }
810 #endif /* JERRY_NDEBUG */
811
812 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
813
814 if (JERRY_LIKELY (string_p->refs_and_container < ECMA_STRING_MAX_REF))
815 {
816 /* Increase reference counter. */
817 string_p->refs_and_container += ECMA_STRING_REF_ONE;
818 }
819 else
820 {
821 jerry_fatal (ERR_REF_COUNT_LIMIT);
822 }
823 } /* ecma_ref_ecma_string */
824
825 /**
826 * Decrease reference counter and deallocate ecma-string
827 * if the counter becomes zero.
828 */
829 void
ecma_deref_ecma_string(ecma_string_t * string_p)830 ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */
831 {
832 JERRY_ASSERT (string_p != NULL);
833
834 if (ECMA_IS_DIRECT_STRING (string_p))
835 {
836 return;
837 }
838
839 #ifdef JERRY_NDEBUG
840 if (ECMA_STRING_IS_STATIC (string_p))
841 {
842 return;
843 }
844 #endif /* JERRY_NDEBUG */
845
846 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
847
848 /* Decrease reference counter. */
849 string_p->refs_and_container -= ECMA_STRING_REF_ONE;
850
851 if (string_p->refs_and_container >= ECMA_STRING_REF_ONE)
852 {
853 return;
854 }
855
856 ecma_destroy_ecma_string (string_p);
857 } /* ecma_deref_ecma_string */
858
859 /**
860 * Deallocate an ecma-string
861 */
862 void
ecma_destroy_ecma_string(ecma_string_t * string_p)863 ecma_destroy_ecma_string (ecma_string_t *string_p) /**< ecma-string */
864 {
865 JERRY_ASSERT (string_p != NULL);
866 JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (string_p));
867 JERRY_ASSERT ((string_p->refs_and_container < ECMA_STRING_REF_ONE) || ECMA_STRING_IS_STATIC (string_p));
868
869 switch (ECMA_STRING_GET_CONTAINER (string_p))
870 {
871 case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
872 {
873 ecma_dealloc_string_buffer (string_p, ((ecma_utf8_string_t *) string_p)->size + sizeof (ecma_utf8_string_t));
874 return;
875 }
876 case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
877 {
878 ecma_dealloc_string_buffer (string_p,
879 ((ecma_long_utf8_string_t *) string_p)->size + sizeof (ecma_long_utf8_string_t));
880 return;
881 }
882 case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
883 {
884 ecma_dealloc_string_buffer (string_p,
885 ((ecma_ascii_string_t *) string_p)->size + sizeof (ecma_ascii_string_t));
886 return;
887 }
888 #if ENABLED (JERRY_ES2015)
889 case ECMA_STRING_CONTAINER_SYMBOL:
890 {
891 ecma_extended_string_t * symbol_p = (ecma_extended_string_t *) string_p;
892 ecma_free_value (symbol_p->u.symbol_descriptor);
893 ecma_dealloc_extended_string (symbol_p);
894 return;
895 }
896 #endif /* ENABLED (JERRY_ES2015) */
897 default:
898 {
899 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC
900 || ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
901
902 /* only the string descriptor itself should be freed */
903 ecma_dealloc_string (string_p);
904 }
905 }
906 } /* ecma_destroy_ecma_string */
907
908 /**
909 * Convert ecma-string to number
910 *
911 * @return converted ecma-number
912 */
913 ecma_number_t
ecma_string_to_number(const ecma_string_t * string_p)914 ecma_string_to_number (const ecma_string_t *string_p) /**< ecma-string */
915 {
916 JERRY_ASSERT (string_p != NULL);
917
918 if (ECMA_IS_DIRECT_STRING (string_p))
919 {
920 if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_UINT))
921 {
922 return (ecma_number_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
923 }
924 }
925 else if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
926 {
927 return ((ecma_number_t) string_p->u.uint32_number);
928 }
929
930 lit_utf8_size_t size;
931 const lit_utf8_byte_t *chars_p = ecma_string_get_chars_fast (string_p, &size);
932
933 JERRY_ASSERT (chars_p != NULL);
934
935 if (size == 0)
936 {
937 return ECMA_NUMBER_ZERO;
938 }
939
940 return ecma_utf8_string_to_number (chars_p, size);
941 } /* ecma_string_to_number */
942
943 /**
944 * Check if string is array index.
945 *
946 * @return ECMA_STRING_NOT_ARRAY_INDEX if string is not array index
947 * the array index otherwise
948 */
949 inline uint32_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_get_array_index(const ecma_string_t * str_p)950 ecma_string_get_array_index (const ecma_string_t *str_p) /**< ecma-string */
951 {
952 if (ECMA_IS_DIRECT_STRING (str_p))
953 {
954 if (ECMA_IS_DIRECT_STRING_WITH_TYPE (str_p, ECMA_DIRECT_STRING_UINT))
955 {
956 /* Value cannot be equal to the maximum value of a 32 bit unsigned number. */
957 return (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (str_p);
958 }
959
960 return ECMA_STRING_NOT_ARRAY_INDEX;
961 }
962
963 if (ECMA_STRING_GET_CONTAINER (str_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
964 {
965 /* When the uint32_number is equal to the maximum value of 32 bit unsigned integer number,
966 * it is also an invalid array index. The comparison to ECMA_STRING_NOT_ARRAY_INDEX will
967 * be true in this case. */
968 return str_p->u.uint32_number;
969 }
970
971 return ECMA_STRING_NOT_ARRAY_INDEX;
972 } /* ecma_string_get_array_index */
973
974 /**
975 * Convert ecma-string's contents to a cesu-8 string and put it to the buffer.
976 * It is the caller's responsibility to make sure that the string fits in the buffer.
977 *
978 * @return number of bytes, actually copied to the buffer.
979 */
980 lit_utf8_size_t JERRY_ATTR_WARN_UNUSED_RESULT
ecma_string_copy_to_cesu8_buffer(const ecma_string_t * string_p,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)981 ecma_string_copy_to_cesu8_buffer (const ecma_string_t *string_p, /**< ecma-string descriptor */
982 lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
983 * (can be NULL if buffer_size == 0) */
984 lit_utf8_size_t buffer_size) /**< size of buffer */
985 {
986 JERRY_ASSERT (string_p != NULL);
987 JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
988 JERRY_ASSERT (ecma_string_get_size (string_p) <= buffer_size);
989
990 lit_utf8_size_t size;
991
992 if (ECMA_IS_DIRECT_STRING (string_p))
993 {
994 if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_UINT))
995 {
996 uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
997 size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
998 JERRY_ASSERT (size <= buffer_size);
999 return size;
1000 }
1001 }
1002 else
1003 {
1004 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
1005
1006 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1007 {
1008 uint32_t uint32_number = string_p->u.uint32_number;
1009 size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
1010 JERRY_ASSERT (size <= buffer_size);
1011 return size;
1012 }
1013 }
1014
1015 const lit_utf8_byte_t *chars_p = ecma_string_get_chars_fast (string_p, &size);
1016
1017 JERRY_ASSERT (chars_p != NULL);
1018 JERRY_ASSERT (size <= buffer_size);
1019
1020 memcpy (buffer_p, chars_p, size);
1021 return size;
1022 } /* ecma_string_copy_to_cesu8_buffer */
1023
1024 /**
1025 * Convert ecma-string's contents to an utf-8 string and put it to the buffer.
1026 * It is the caller's responsibility to make sure that the string fits in the buffer.
1027 *
1028 * @return number of bytes, actually copied to the buffer.
1029 */
1030 lit_utf8_size_t JERRY_ATTR_WARN_UNUSED_RESULT
ecma_string_copy_to_utf8_buffer(const ecma_string_t * string_p,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1031 ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_p, /**< ecma-string descriptor */
1032 lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1033 * (can be NULL if buffer_size == 0) */
1034 lit_utf8_size_t buffer_size) /**< size of buffer */
1035 {
1036 JERRY_ASSERT (string_p != NULL);
1037 JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
1038 JERRY_ASSERT (ecma_string_get_utf8_size (string_p) <= buffer_size);
1039
1040 lit_utf8_size_t size;
1041
1042 if (ECMA_IS_DIRECT_STRING (string_p))
1043 {
1044 if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_UINT))
1045 {
1046 uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
1047 size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
1048 JERRY_ASSERT (size <= buffer_size);
1049 return size;
1050 }
1051 }
1052 else
1053 {
1054 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
1055
1056 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1057 {
1058 uint32_t uint32_number = string_p->u.uint32_number;
1059 size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
1060 JERRY_ASSERT (size <= buffer_size);
1061 return size;
1062 }
1063 }
1064
1065 uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
1066 const lit_utf8_byte_t *chars_p = ecma_string_get_chars (string_p, &size, NULL, NULL, &flags);
1067
1068 JERRY_ASSERT (chars_p != NULL);
1069
1070 if (flags & ECMA_STRING_FLAG_IS_ASCII)
1071 {
1072 JERRY_ASSERT (size <= buffer_size);
1073 memcpy (buffer_p, chars_p, size);
1074 return size;
1075 }
1076
1077 size = lit_convert_cesu8_string_to_utf8_string (chars_p,
1078 size,
1079 buffer_p,
1080 buffer_size);
1081
1082 if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
1083 {
1084 jmem_heap_free_block ((void *) chars_p, size);
1085 }
1086
1087 JERRY_ASSERT (size <= buffer_size);
1088 return size;
1089 } /* ecma_string_copy_to_utf8_buffer */
1090
1091 /**
1092 * Convert ecma-string's contents to a cesu-8 string, extract the parts of the converted string between the specified
1093 * start position and the end position (or the end of the string, whichever comes first), and copy these characters
1094 * into the buffer.
1095 *
1096 * @return number of bytes, actually copied to the buffer.
1097 */
1098 lit_utf8_size_t
ecma_substring_copy_to_cesu8_buffer(const ecma_string_t * string_desc_p,ecma_length_t start_pos,ecma_length_t end_pos,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1099 ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
1100 ecma_length_t start_pos, /**< position of the first character */
1101 ecma_length_t end_pos, /**< position of the last character */
1102 lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1103 * (can be NULL if buffer_size == 0) */
1104 lit_utf8_size_t buffer_size) /**< size of buffer */
1105 {
1106 JERRY_ASSERT (string_desc_p != NULL);
1107 JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
1108
1109 ecma_length_t string_length = ecma_string_get_length (string_desc_p);
1110 lit_utf8_size_t size = 0;
1111
1112 if (start_pos >= string_length || start_pos >= end_pos)
1113 {
1114 return 0;
1115 }
1116
1117 if (end_pos > string_length)
1118 {
1119 end_pos = string_length;
1120 }
1121
1122 ECMA_STRING_TO_UTF8_STRING (string_desc_p, utf8_str_p, utf8_str_size);
1123
1124 const lit_utf8_byte_t *start_p = utf8_str_p;
1125
1126 if (string_length == utf8_str_size)
1127 {
1128 start_p += start_pos;
1129 size = end_pos - start_pos;
1130
1131 if (size > buffer_size)
1132 {
1133 size = buffer_size;
1134 }
1135
1136 memcpy (buffer_p, start_p, size);
1137 }
1138 else
1139 {
1140 end_pos -= start_pos;
1141 while (start_pos--)
1142 {
1143 start_p += lit_get_unicode_char_size_by_utf8_first_byte (*start_p);
1144 }
1145
1146 const lit_utf8_byte_t *end_p = start_p;
1147
1148 while (end_pos--)
1149 {
1150 lit_utf8_size_t code_unit_size = lit_get_unicode_char_size_by_utf8_first_byte (*end_p);
1151
1152 if ((size + code_unit_size) > buffer_size)
1153 {
1154 break;
1155 }
1156
1157 end_p += code_unit_size;
1158 size += code_unit_size;
1159 }
1160
1161 memcpy (buffer_p, start_p, size);
1162 }
1163
1164 ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size);
1165
1166 JERRY_ASSERT (size <= buffer_size);
1167 return size;
1168 } /* ecma_substring_copy_to_cesu8_buffer */
1169
1170 /**
1171 * Convert ecma-string's contents to an utf-8 string, extract the parts of the converted string between the specified
1172 * start position and the end position (or the end of the string, whichever comes first), and copy these characters
1173 * into the buffer.
1174 *
1175 * @return number of bytes, actually copied to the buffer.
1176 */
1177 lit_utf8_size_t
ecma_substring_copy_to_utf8_buffer(const ecma_string_t * string_desc_p,ecma_length_t start_pos,ecma_length_t end_pos,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1178 ecma_substring_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
1179 ecma_length_t start_pos, /**< position of the first character */
1180 ecma_length_t end_pos, /**< position of the last character */
1181 lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1182 * (can be NULL if buffer_size == 0) */
1183 lit_utf8_size_t buffer_size) /**< size of buffer */
1184 {
1185 JERRY_ASSERT (string_desc_p != NULL);
1186 JERRY_ASSERT (ECMA_IS_DIRECT_STRING (string_desc_p) || string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
1187 JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
1188
1189 lit_utf8_size_t size = 0;
1190
1191 ecma_length_t utf8_str_length = ecma_string_get_utf8_length (string_desc_p);
1192
1193 if (start_pos >= utf8_str_length || start_pos >= end_pos)
1194 {
1195 return 0;
1196 }
1197
1198 if (end_pos > utf8_str_length)
1199 {
1200 end_pos = utf8_str_length;
1201 }
1202
1203 ECMA_STRING_TO_UTF8_STRING (string_desc_p, cesu8_str_p, cesu8_str_size);
1204 ecma_length_t cesu8_str_length = ecma_string_get_length (string_desc_p);
1205
1206 if (cesu8_str_length == cesu8_str_size)
1207 {
1208 cesu8_str_p += start_pos;
1209 size = end_pos - start_pos;
1210
1211 if (size > buffer_size)
1212 {
1213 size = buffer_size;
1214 }
1215
1216 memcpy (buffer_p, cesu8_str_p, size);
1217 }
1218 else
1219 {
1220 const lit_utf8_byte_t *cesu8_end_pos = cesu8_str_p + cesu8_str_size;
1221 end_pos -= start_pos;
1222
1223 while (start_pos--)
1224 {
1225 ecma_char_t ch;
1226 lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &ch);
1227
1228 cesu8_str_p += code_unit_size;
1229 if ((cesu8_str_p != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
1230 {
1231 ecma_char_t next_ch;
1232 lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &next_ch);
1233 if (lit_is_code_point_utf16_low_surrogate (next_ch))
1234 {
1235 JERRY_ASSERT (code_unit_size == next_ch_size);
1236 cesu8_str_p += code_unit_size;
1237 }
1238 }
1239 }
1240
1241 const lit_utf8_byte_t *cesu8_pos = cesu8_str_p;
1242
1243 lit_utf8_byte_t *utf8_pos = buffer_p;
1244 lit_utf8_byte_t *utf8_end_pos = buffer_p + buffer_size;
1245
1246 while (end_pos--)
1247 {
1248 ecma_char_t ch;
1249 lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_pos, &ch);
1250
1251 if ((size + code_unit_size) > buffer_size)
1252 {
1253 break;
1254 }
1255
1256 if (((cesu8_pos + code_unit_size) != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
1257 {
1258 ecma_char_t next_ch;
1259 lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_pos + code_unit_size, &next_ch);
1260
1261 if (lit_is_code_point_utf16_low_surrogate (next_ch))
1262 {
1263 JERRY_ASSERT (code_unit_size == next_ch_size);
1264
1265 if ((size + code_unit_size + 1) > buffer_size)
1266 {
1267 break;
1268 }
1269
1270 cesu8_pos += next_ch_size;
1271
1272 lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (ch, next_ch);
1273 lit_code_point_to_utf8 (code_point, utf8_pos);
1274 size += (code_unit_size + 1);
1275 }
1276 else
1277 {
1278 memcpy (utf8_pos, cesu8_pos, code_unit_size);
1279 size += code_unit_size;
1280 }
1281 }
1282 else
1283 {
1284 memcpy (utf8_pos, cesu8_pos, code_unit_size);
1285 size += code_unit_size;
1286 }
1287
1288 utf8_pos = buffer_p + size;
1289 cesu8_pos += code_unit_size;
1290 }
1291
1292 JERRY_ASSERT (utf8_pos <= utf8_end_pos);
1293 }
1294
1295 ECMA_FINALIZE_UTF8_STRING (cesu8_str_p, cesu8_str_size);
1296 JERRY_ASSERT (size <= buffer_size);
1297
1298 return size;
1299 } /* ecma_substring_copy_to_utf8_buffer */
1300
1301 /**
1302 * Convert ecma-string's contents to a cesu-8 string and put it to the buffer.
1303 * It is the caller's responsibility to make sure that the string fits in the buffer.
1304 * Check if the size of the string is equal with the size of the buffer.
1305 */
1306 inline void JERRY_ATTR_ALWAYS_INLINE
ecma_string_to_utf8_bytes(const ecma_string_t * string_desc_p,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1307 ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
1308 lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1309 * (can be NULL if buffer_size == 0) */
1310 lit_utf8_size_t buffer_size) /**< size of buffer */
1311 {
1312 const lit_utf8_size_t size = ecma_string_copy_to_cesu8_buffer (string_desc_p, buffer_p, buffer_size);
1313 JERRY_ASSERT (size == buffer_size);
1314 } /* ecma_string_to_utf8_bytes */
1315
1316 /**
1317 * Get size of the uint32 number stored locally in the string's descriptor
1318 *
1319 * Note: the represented number size and length are equal
1320 *
1321 * @return size in bytes
1322 */
1323 static inline ecma_length_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_get_uint32_size(const uint32_t uint32_number)1324 ecma_string_get_uint32_size (const uint32_t uint32_number) /**< number in the string-descriptor */
1325 {
1326 uint32_t prev_number = 1;
1327 uint32_t next_number = 100;
1328 ecma_length_t size = 1;
1329
1330 const uint32_t max_size = 9;
1331
1332 while (size < max_size && uint32_number >= next_number)
1333 {
1334 prev_number = next_number;
1335 next_number *= 100;
1336 size += 2;
1337 }
1338
1339 if (uint32_number >= prev_number * 10)
1340 {
1341 size++;
1342 }
1343
1344 return size;
1345 } /* ecma_string_get_uint32_size */
1346
1347 /**
1348 * Checks whether the given string is a sequence of ascii characters.
1349 */
1350 #define ECMA_STRING_IS_ASCII(char_p, size) ((size) == lit_utf8_string_length ((char_p), (size)))
1351
1352 /**
1353 * Returns with the cesu8 character array of a string.
1354 *
1355 * Note:
1356 * - This function returns with a newly allocated buffer for uint32 strings,
1357 * which must be freed if the optional uint32_buff_p parameter is NULL.
1358 * - The ASCII check only happens if the flags parameter gets
1359 * 'ECMA_STRING_FLAG_IS_ASCII' as an input.
1360 *
1361 * @return start of cesu8 characters
1362 */
1363 const lit_utf8_byte_t *
ecma_string_get_chars(const ecma_string_t * string_p,lit_utf8_size_t * size_p,lit_utf8_size_t * length_p,lit_utf8_byte_t * uint32_buff_p,uint8_t * flags_p)1364 ecma_string_get_chars (const ecma_string_t *string_p, /**< ecma-string */
1365 lit_utf8_size_t *size_p, /**< [out] size of the ecma string */
1366 lit_utf8_size_t *length_p, /**< [out] optional argument. If the pointer is not NULL the pointed
1367 * memory area is filled with the length of the ecma string */
1368 lit_utf8_byte_t *uint32_buff_p, /**< [out] optional argument. If the pointer is not NULL the
1369 * pointed memory area is filled with the string converted
1370 * uint32 string descriptor */
1371 uint8_t *flags_p) /**< [in,out] any combination of ecma_string_flag_t bits */
1372 {
1373 ecma_length_t length;
1374 lit_utf8_size_t size;
1375 const lit_utf8_byte_t *result_p;
1376
1377 if (ECMA_IS_DIRECT_STRING (string_p))
1378 {
1379 *flags_p |= ECMA_STRING_FLAG_REHASH_NEEDED;
1380
1381 switch (ECMA_GET_DIRECT_STRING_TYPE (string_p))
1382 {
1383 case ECMA_DIRECT_STRING_MAGIC:
1384 {
1385 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
1386
1387 if (id >= LIT_MAGIC_STRING__COUNT)
1388 {
1389 id -= LIT_MAGIC_STRING__COUNT;
1390 size = lit_get_magic_string_ex_size (id);
1391 result_p = lit_get_magic_string_ex_utf8 (id);
1392 length = 0;
1393
1394 if (JERRY_UNLIKELY (*flags_p & ECMA_STRING_FLAG_IS_ASCII))
1395 {
1396 length = lit_utf8_string_length (result_p, size);
1397 }
1398 }
1399 else
1400 {
1401 size = lit_get_magic_string_size (id);
1402 length = size;
1403
1404 result_p = lit_get_magic_string_utf8 (id);
1405
1406 /* All magic strings must be ascii strings. */
1407 JERRY_ASSERT (ECMA_STRING_IS_ASCII (result_p, size));
1408 }
1409 break;
1410 }
1411 default:
1412 {
1413 JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_UINT);
1414 uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
1415 size = (lit_utf8_size_t) ecma_string_get_uint32_size (uint32_number);
1416
1417 if (uint32_buff_p != NULL)
1418 {
1419 result_p = uint32_buff_p;
1420 }
1421 else
1422 {
1423 result_p = (const lit_utf8_byte_t *) jmem_heap_alloc_block (size);
1424 *flags_p |= ECMA_STRING_FLAG_MUST_BE_FREED;
1425 }
1426
1427 length = ecma_uint32_to_utf8_string (uint32_number, (lit_utf8_byte_t *) result_p, size);
1428
1429 JERRY_ASSERT (length == size);
1430 *flags_p |= ECMA_STRING_FLAG_IS_UINT32;
1431 break;
1432 }
1433 }
1434 }
1435 else
1436 {
1437 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
1438
1439 switch (ECMA_STRING_GET_CONTAINER (string_p))
1440 {
1441 case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
1442 {
1443 ecma_utf8_string_t *utf8_string_desc_p = (ecma_utf8_string_t *) string_p;
1444 size = utf8_string_desc_p->size;
1445 length = utf8_string_desc_p->length;
1446 result_p = ECMA_UTF8_STRING_GET_BUFFER (utf8_string_desc_p);
1447 break;
1448 }
1449 case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
1450 {
1451 ecma_long_utf8_string_t *long_utf8_string_desc_p = (ecma_long_utf8_string_t *) string_p;
1452 size = long_utf8_string_desc_p->size;
1453 length = long_utf8_string_desc_p->length;
1454 result_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (long_utf8_string_desc_p);
1455 break;
1456 }
1457 case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
1458 {
1459 ecma_ascii_string_t *ascii_string_desc_p = (ecma_ascii_string_t *) string_p;
1460 size = ascii_string_desc_p->size;
1461 length = ascii_string_desc_p->size;
1462 result_p = ECMA_ASCII_STRING_GET_BUFFER (ascii_string_desc_p);
1463 break;
1464 }
1465 case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
1466 {
1467 size = (lit_utf8_size_t) ecma_string_get_uint32_size (string_p->u.uint32_number);
1468
1469 if (uint32_buff_p != NULL)
1470 {
1471 result_p = uint32_buff_p;
1472 }
1473 else
1474 {
1475 result_p = (const lit_utf8_byte_t *) jmem_heap_alloc_block (size);
1476 *flags_p |= ECMA_STRING_FLAG_MUST_BE_FREED;
1477 }
1478
1479 length = ecma_uint32_to_utf8_string (string_p->u.uint32_number, (lit_utf8_byte_t *) result_p, size);
1480
1481 JERRY_ASSERT (length == size);
1482 *flags_p |= ECMA_STRING_FLAG_IS_UINT32 | ECMA_STRING_FLAG_REHASH_NEEDED;
1483 break;
1484
1485 }
1486 default:
1487 {
1488 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
1489
1490 lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
1491 size = lit_get_magic_string_ex_size (id);
1492 length = 0;
1493
1494 if (JERRY_UNLIKELY (*flags_p & ECMA_STRING_FLAG_IS_ASCII))
1495 {
1496 length = lit_utf8_string_length (lit_get_magic_string_ex_utf8 (id), size);
1497 }
1498
1499 result_p = lit_get_magic_string_ex_utf8 (id);
1500 *flags_p |= ECMA_STRING_FLAG_REHASH_NEEDED;
1501 break;
1502 }
1503 }
1504 }
1505
1506 *size_p = size;
1507 if (length_p != NULL)
1508 {
1509 *length_p = length;
1510 }
1511
1512 if ((*flags_p & ECMA_STRING_FLAG_IS_ASCII)
1513 && length != size)
1514 {
1515 *flags_p = (uint8_t) (*flags_p & (uint8_t) ~ECMA_STRING_FLAG_IS_ASCII);
1516 }
1517
1518 return result_p;
1519 } /* ecma_string_get_chars */
1520
1521 /**
1522 * Checks whether the string equals to the magic string id.
1523 *
1524 * @return true - if the string equals to the magic string id
1525 * false - otherwise
1526 */
1527 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_string_to_magic_id(const ecma_string_t * string_p,lit_magic_string_id_t id)1528 ecma_compare_ecma_string_to_magic_id (const ecma_string_t *string_p, /**< property name */
1529 lit_magic_string_id_t id) /**< magic string id */
1530 {
1531 return (string_p == ecma_get_magic_string (id));
1532 } /* ecma_compare_ecma_string_to_magic_id */
1533
1534 /**
1535 * Checks whether ecma string is empty or not
1536 *
1537 * @return true - if the string is an empty string
1538 * false - otherwise
1539 */
1540 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_string_is_empty(const ecma_string_t * string_p)1541 ecma_string_is_empty (const ecma_string_t *string_p) /**< ecma-string */
1542 {
1543 return ecma_compare_ecma_string_to_magic_id (string_p, LIT_MAGIC_STRING__EMPTY);
1544 } /* ecma_string_is_empty */
1545
1546 /**
1547 * Checks whether the string equals to "length".
1548 *
1549 * @return true - if the string equals to "length"
1550 * false - otherwise
1551 */
1552 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_string_is_length(const ecma_string_t * string_p)1553 ecma_string_is_length (const ecma_string_t *string_p) /**< property name */
1554 {
1555 return ecma_compare_ecma_string_to_magic_id (string_p, LIT_MAGIC_STRING_LENGTH);
1556 } /* ecma_string_is_length */
1557
1558 /**
1559 * Converts a property name into a string
1560 *
1561 * @return pointer to the converted ecma string
1562 */
1563 static inline ecma_string_t * JERRY_ATTR_ALWAYS_INLINE
ecma_property_to_string(ecma_property_t property,jmem_cpointer_t prop_name_cp)1564 ecma_property_to_string (ecma_property_t property, /**< property name type */
1565 jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1566 {
1567 uintptr_t property_string = ((uintptr_t) (property)) & (0x3 << ECMA_PROPERTY_NAME_TYPE_SHIFT);
1568 property_string = (property_string >> ECMA_STRING_TYPE_CONVERSION_SHIFT) | ECMA_TYPE_DIRECT_STRING;
1569 return (ecma_string_t *) (property_string | (((uintptr_t) prop_name_cp) << ECMA_DIRECT_STRING_SHIFT));
1570 } /* ecma_property_to_string */
1571
1572 /**
1573 * Converts a string into a property name
1574 *
1575 * @return the compressed pointer part of the name
1576 */
1577 inline jmem_cpointer_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_to_property_name(ecma_string_t * prop_name_p,ecma_property_t * name_type_p)1578 ecma_string_to_property_name (ecma_string_t *prop_name_p, /**< property name */
1579 ecma_property_t *name_type_p) /**< [out] property name type */
1580 {
1581 if (ECMA_IS_DIRECT_STRING (prop_name_p))
1582 {
1583 *name_type_p = (ecma_property_t) ECMA_DIRECT_STRING_TYPE_TO_PROP_NAME_TYPE (prop_name_p);
1584 return (jmem_cpointer_t) ECMA_GET_DIRECT_STRING_VALUE (prop_name_p);
1585 }
1586
1587 *name_type_p = ECMA_DIRECT_STRING_PTR << ECMA_PROPERTY_NAME_TYPE_SHIFT;
1588
1589 ecma_ref_ecma_string (prop_name_p);
1590
1591 jmem_cpointer_t prop_name_cp;
1592 ECMA_SET_NON_NULL_POINTER (prop_name_cp, prop_name_p);
1593 return prop_name_cp;
1594 } /* ecma_string_to_property_name */
1595
1596 /**
1597 * Converts a property name into a string
1598 *
1599 * @return the string pointer
1600 * string must be released with ecma_deref_ecma_string
1601 */
1602 ecma_string_t *
ecma_string_from_property_name(ecma_property_t property,jmem_cpointer_t prop_name_cp)1603 ecma_string_from_property_name (ecma_property_t property, /**< property name type */
1604 jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1605 {
1606 if (ECMA_PROPERTY_GET_NAME_TYPE (property) != ECMA_DIRECT_STRING_PTR)
1607 {
1608 return ecma_property_to_string (property, prop_name_cp);
1609 }
1610
1611 ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1612 ecma_ref_ecma_string (prop_name_p);
1613 return prop_name_p;
1614 } /* ecma_string_from_property_name */
1615
1616 /**
1617 * Get hash code of property name
1618 *
1619 * @return hash code of property name
1620 */
1621 inline lit_string_hash_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_get_property_name_hash(ecma_property_t property,jmem_cpointer_t prop_name_cp)1622 ecma_string_get_property_name_hash (ecma_property_t property, /**< property name type */
1623 jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1624 {
1625 if (ECMA_PROPERTY_GET_NAME_TYPE (property) == ECMA_DIRECT_STRING_PTR)
1626 {
1627 ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1628 return prop_name_p->u.hash;
1629 }
1630
1631 return (lit_string_hash_t) prop_name_cp;
1632 } /* ecma_string_get_property_name_hash */
1633
1634 /**
1635 * Check if property name is array index.
1636 *
1637 * @return ECMA_STRING_NOT_ARRAY_INDEX if string is not array index
1638 * the array index otherwise
1639 */
1640 uint32_t
ecma_string_get_property_index(ecma_property_t property,jmem_cpointer_t prop_name_cp)1641 ecma_string_get_property_index (ecma_property_t property, /**< property name type */
1642 jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1643 {
1644 switch (ECMA_PROPERTY_GET_NAME_TYPE (property))
1645 {
1646 case ECMA_DIRECT_STRING_UINT:
1647 {
1648 return (uint32_t) prop_name_cp;
1649 }
1650 case ECMA_DIRECT_STRING_PTR:
1651 {
1652 ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1653 return ecma_string_get_array_index (prop_name_p);
1654 }
1655 default:
1656 {
1657 return ECMA_STRING_NOT_ARRAY_INDEX;
1658 }
1659 }
1660 } /* ecma_string_get_property_index */
1661
1662 /**
1663 * Compare a property name to a string
1664 *
1665 * @return true if they are equals
1666 * false otherwise
1667 */
1668 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_string_compare_to_property_name(ecma_property_t property,jmem_cpointer_t prop_name_cp,const ecma_string_t * string_p)1669 ecma_string_compare_to_property_name (ecma_property_t property, /**< property name type */
1670 jmem_cpointer_t prop_name_cp, /**< property name compressed pointer */
1671 const ecma_string_t *string_p) /**< other string */
1672 {
1673 if (ECMA_PROPERTY_GET_NAME_TYPE (property) != ECMA_DIRECT_STRING_PTR)
1674 {
1675 return ecma_property_to_string (property, prop_name_cp) == string_p;
1676 }
1677
1678 if (ECMA_IS_DIRECT_STRING (string_p))
1679 {
1680 return false;
1681 }
1682
1683 ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1684 return ecma_compare_ecma_non_direct_strings (prop_name_p, string_p);
1685 } /* ecma_string_compare_to_property_name */
1686
1687 /**
1688 * Long path part of ecma-string to ecma-string comparison routine
1689 *
1690 * See also:
1691 * ecma_compare_ecma_strings
1692 *
1693 * @return true - if strings are equal;
1694 * false - otherwise
1695 */
1696 static bool JERRY_ATTR_NOINLINE
ecma_compare_ecma_strings_longpath(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1697 ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /**< ecma-string */
1698 const ecma_string_t *string2_p) /**< ecma-string */
1699 {
1700 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_GET_CONTAINER (string2_p));
1701
1702 const lit_utf8_byte_t *utf8_string1_p, *utf8_string2_p;
1703 lit_utf8_size_t utf8_string1_size, utf8_string2_size;
1704
1705 if (JERRY_LIKELY (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING))
1706 {
1707 utf8_string1_p = ECMA_UTF8_STRING_GET_BUFFER (string1_p);
1708 utf8_string1_size = ((ecma_utf8_string_t *) string1_p)->size;
1709 utf8_string2_p = ECMA_UTF8_STRING_GET_BUFFER (string2_p);
1710 utf8_string2_size = ((ecma_utf8_string_t *) string2_p)->size;
1711
1712 }
1713 else if (ECMA_STRING_GET_CONTAINER(string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)
1714 {
1715 utf8_string1_p = ECMA_ASCII_STRING_GET_BUFFER (string1_p);
1716 utf8_string1_size = ((ecma_ascii_string_t *) string1_p)->size;
1717 utf8_string2_p = ECMA_ASCII_STRING_GET_BUFFER (string2_p);
1718 utf8_string2_size = ((ecma_ascii_string_t *) string2_p)->size;
1719 }
1720 else
1721 {
1722 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING);
1723
1724 utf8_string1_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string1_p);
1725 utf8_string1_size = ((ecma_long_utf8_string_t *) string1_p)->size;
1726 utf8_string2_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string2_p);
1727 utf8_string2_size = ((ecma_long_utf8_string_t *) string2_p)->size;
1728 }
1729
1730 if (utf8_string1_size != utf8_string2_size)
1731 {
1732 return false;
1733 }
1734
1735 return !memcmp ((char *) utf8_string1_p, (char *) utf8_string2_p, utf8_string1_size);
1736 } /* ecma_compare_ecma_strings_longpath */
1737
1738 /**
1739 * Compare two ecma-strings
1740 *
1741 * @return true - if strings are equal;
1742 * false - otherwise
1743 */
1744 extern inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_strings(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1745 ecma_compare_ecma_strings (const ecma_string_t *string1_p, /**< ecma-string */
1746 const ecma_string_t *string2_p) /**< ecma-string */
1747 {
1748 JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
1749
1750 /* Fast paths first. */
1751 if (string1_p == string2_p)
1752 {
1753 return true;
1754 }
1755
1756 /* Either string is direct, return with false. */
1757 if (ECMA_IS_DIRECT_STRING (((uintptr_t) string1_p) | ((uintptr_t) string2_p)))
1758 {
1759 return false;
1760 }
1761
1762 if (string1_p->u.hash != string2_p->u.hash)
1763 {
1764 return false;
1765 }
1766
1767 ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
1768
1769 if (string1_container != ECMA_STRING_GET_CONTAINER (string2_p))
1770 {
1771 return false;
1772 }
1773
1774 if (string1_container == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1775 {
1776 return true;
1777 }
1778
1779 #if ENABLED (JERRY_ES2015)
1780 if (string1_container == ECMA_STRING_CONTAINER_SYMBOL)
1781 {
1782 return false;
1783 }
1784 #endif /* ENABLED (JERRY_ES2015) */
1785
1786 return ecma_compare_ecma_strings_longpath (string1_p, string2_p);
1787 } /* ecma_compare_ecma_strings */
1788
1789 static bool JERRY_ATTR_NOINLINE
ecma_compare_ecma_strings_longpath_with_literal(const ecma_string_t * string1_p,const ecma_string_t * string2_p,const lit_utf8_byte_t * chars_p)1790 ecma_compare_ecma_strings_longpath_with_literal (const ecma_string_t *string1_p, /**< ecma_string */
1791 const ecma_string_t *string2_p,
1792 const lit_utf8_byte_t *chars_p) /**< ecma_string */
1793 {
1794 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_GET_CONTAINER (string2_p));
1795
1796 const lit_utf8_byte_t *utf8_string2_p;
1797 lit_utf8_size_t utf8_string1_size,utf8_string2_size;
1798
1799 if(JERRY_LIKELY (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING))
1800 {
1801 utf8_string1_size = ((ecma_utf8_string_t *) string1_p)->size;
1802 utf8_string2_p = ECMA_UTF8_STRING_GET_BUFFER (string2_p);
1803 utf8_string2_size = ((ecma_utf8_string_t *) string2_p)->size;
1804
1805 }
1806 else if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)
1807 {
1808 utf8_string1_size = ((ecma_ascii_string_t *) string1_p)->size;
1809 utf8_string2_p = ECMA_ASCII_STRING_GET_BUFFER (string2_p);
1810 utf8_string2_size = ((ecma_ascii_string_t *) string2_p)->size;
1811 }
1812 else
1813 {
1814 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING);
1815
1816 utf8_string1_size = ((ecma_long_utf8_string_t *) string1_p)->size;
1817 utf8_string2_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string2_p);
1818 utf8_string2_size = ((ecma_long_utf8_string_t *) string2_p)->size;
1819 }
1820
1821 if (utf8_string1_size != utf8_string2_size)
1822 {
1823 return false;
1824 }
1825
1826 return !memcmp ((char *) chars_p, (char *) utf8_string2_p, utf8_string1_size);
1827 } /* ecma_compare_ecma_strings_longpath */
1828
1829 extern inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_strings_with_literal(const ecma_string_t * string1_p,const ecma_string_t * string2_p,const lit_utf8_byte_t * chars_p)1830 ecma_compare_ecma_strings_with_literal (const ecma_string_t *string1_p, /**< ecma-string */
1831 const ecma_string_t *string2_p,\
1832 const lit_utf8_byte_t *chars_p) /**< ecma-string */
1833 {
1834 JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
1835
1836 /* Fast paths first. */
1837 if (string1_p == string2_p)
1838 {
1839 return true;
1840 }
1841
1842 /* Either string is direct, return with false. */
1843 if (ECMA_IS_DIRECT_STRING (((uintptr_t) string1_p) | ((uintptr_t) string2_p)))
1844 {
1845 return false;
1846 }
1847
1848 if (string1_p->u.hash != string2_p->u.hash)
1849 {
1850 return false;
1851 }
1852
1853 ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
1854
1855 if (string1_container != ECMA_STRING_GET_CONTAINER (string2_p))
1856 {
1857 return false;
1858 }
1859
1860 if (string1_container == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1861 {
1862 return true;
1863 }
1864
1865 #if ENABLED (JERRY_ES2015)
1866 if (string1_container == ECMA_STRING_CONTAINER_SYMBOL)
1867 {
1868 return false;
1869 }
1870 #endif /* ENABLED (JERRY_ES2015) */
1871
1872 return ecma_compare_ecma_strings_longpath_with_literal (string1_p, string2_p, chars_p);
1873 } /* ecma_compare_ecma_strings */
1874
1875 /**
1876 * Compare two non-direct ecma-strings
1877 *
1878 * @return true - if strings are equal;
1879 * false - otherwise
1880 */
1881 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_non_direct_strings(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1882 ecma_compare_ecma_non_direct_strings (const ecma_string_t *string1_p, /**< ecma-string */
1883 const ecma_string_t *string2_p) /**< ecma-string */
1884 {
1885 JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
1886 JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (string1_p) && !ECMA_IS_DIRECT_STRING (string2_p));
1887
1888 /* Fast paths first. */
1889 if (string1_p == string2_p)
1890 {
1891 return true;
1892 }
1893
1894 if (string1_p->u.hash != string2_p->u.hash)
1895 {
1896 return false;
1897 }
1898
1899 ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
1900
1901 if (string1_container != ECMA_STRING_GET_CONTAINER (string2_p))
1902 {
1903 return false;
1904 }
1905
1906 if (string1_container == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1907 {
1908 return true;
1909 }
1910
1911 #if ENABLED (JERRY_ES2015)
1912 if (string1_container == ECMA_STRING_CONTAINER_SYMBOL)
1913 {
1914 return false;
1915 }
1916 #endif /* ENABLED (JERRY_ES2015) */
1917
1918 return ecma_compare_ecma_strings_longpath (string1_p, string2_p);
1919 } /* ecma_compare_ecma_non_direct_strings */
1920
1921 /**
1922 * Relational compare of ecma-strings.
1923 *
1924 * First string is less than second string if:
1925 * - strings are not equal;
1926 * - first string is prefix of second or is lexicographically less than second.
1927 *
1928 * @return true - if first string is less than second string,
1929 * false - otherwise
1930 */
1931 bool
ecma_compare_ecma_strings_relational(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1932 ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-string */
1933 const ecma_string_t *string2_p) /**< ecma-string */
1934 {
1935 if (ecma_compare_ecma_strings (string1_p,
1936 string2_p))
1937 {
1938 return false;
1939 }
1940
1941 const lit_utf8_byte_t *utf8_string1_p, *utf8_string2_p;
1942 lit_utf8_size_t utf8_string1_size, utf8_string2_size;
1943
1944 lit_utf8_byte_t uint32_to_string_buffer1[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
1945 lit_utf8_byte_t uint32_to_string_buffer2[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
1946
1947 if (ECMA_IS_DIRECT_STRING (string1_p))
1948 {
1949 if (ECMA_GET_DIRECT_STRING_TYPE (string1_p) != ECMA_DIRECT_STRING_UINT)
1950 {
1951 utf8_string1_p = ecma_string_get_chars_fast (string1_p, &utf8_string1_size);
1952 }
1953 else
1954 {
1955 utf8_string1_size = ecma_uint32_to_utf8_string ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string1_p),
1956 uint32_to_string_buffer1,
1957 ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
1958 utf8_string1_p = uint32_to_string_buffer1;
1959 }
1960 }
1961 else
1962 {
1963 JERRY_ASSERT (string1_p->refs_and_container >= ECMA_STRING_REF_ONE);
1964
1965 if (ECMA_STRING_GET_CONTAINER (string1_p) != ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1966 {
1967 utf8_string1_p = ecma_string_get_chars_fast (string1_p, &utf8_string1_size);
1968 }
1969 else
1970 {
1971 utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
1972 uint32_to_string_buffer1,
1973 ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
1974 utf8_string1_p = uint32_to_string_buffer1;
1975 }
1976 }
1977
1978 if (ECMA_IS_DIRECT_STRING (string2_p))
1979 {
1980 if (ECMA_GET_DIRECT_STRING_TYPE (string2_p) != ECMA_DIRECT_STRING_UINT)
1981 {
1982 utf8_string2_p = ecma_string_get_chars_fast (string2_p, &utf8_string2_size);
1983 }
1984 else
1985 {
1986 utf8_string2_size = ecma_uint32_to_utf8_string ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string2_p),
1987 uint32_to_string_buffer2,
1988 ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
1989 utf8_string2_p = uint32_to_string_buffer2;
1990 }
1991 }
1992 else
1993 {
1994 JERRY_ASSERT (string2_p->refs_and_container >= ECMA_STRING_REF_ONE);
1995
1996 if (ECMA_STRING_GET_CONTAINER (string2_p) != ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1997 {
1998 utf8_string2_p = ecma_string_get_chars_fast (string2_p, &utf8_string2_size);
1999 }
2000 else
2001 {
2002 utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
2003 uint32_to_string_buffer2,
2004 ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
2005 utf8_string2_p = uint32_to_string_buffer2;
2006 }
2007 }
2008
2009 return lit_compare_utf8_strings_relational (utf8_string1_p,
2010 utf8_string1_size,
2011 utf8_string2_p,
2012 utf8_string2_size);
2013 } /* ecma_compare_ecma_strings_relational */
2014
2015 /**
2016 * Special value to represent that no size is available.
2017 */
2018 #define ECMA_STRING_NO_ASCII_SIZE 0xffffffff
2019
2020 /**
2021 * Return the size of uint32 and magic strings.
2022 * The length of these strings are equal to their size.
2023 *
2024 * @return number of characters in the string
2025 */
2026 static ecma_length_t
ecma_string_get_ascii_size(const ecma_string_t * string_p)2027 ecma_string_get_ascii_size (const ecma_string_t *string_p) /**< ecma-string */
2028 {
2029 if (ECMA_IS_DIRECT_STRING (string_p))
2030 {
2031 switch (ECMA_GET_DIRECT_STRING_TYPE (string_p))
2032 {
2033 case ECMA_DIRECT_STRING_MAGIC:
2034 {
2035 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2036
2037 if (id >= LIT_MAGIC_STRING__COUNT)
2038 {
2039 return ECMA_STRING_NO_ASCII_SIZE;
2040 }
2041
2042 JERRY_ASSERT (ECMA_STRING_IS_ASCII (lit_get_magic_string_utf8 (id),
2043 lit_get_magic_string_size (id)));
2044
2045 return lit_get_magic_string_size (id);
2046 }
2047 default:
2048 {
2049 JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_UINT);
2050 uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2051 return ecma_string_get_uint32_size (uint32_number);
2052 }
2053 }
2054 }
2055
2056 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
2057
2058 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
2059 {
2060 return ecma_string_get_uint32_size (string_p->u.uint32_number);
2061 }
2062 else if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)
2063 {
2064 return ((ecma_ascii_string_t *) string_p)->size;
2065 }
2066
2067 return ECMA_STRING_NO_ASCII_SIZE;
2068 } /* ecma_string_get_ascii_size */
2069
2070 /**
2071 * Get length of ecma-string
2072 *
2073 * @return number of characters in the string
2074 */
2075 ecma_length_t
ecma_string_get_length(const ecma_string_t * string_p)2076 ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */
2077 {
2078 ecma_length_t length = ecma_string_get_ascii_size (string_p);
2079
2080 if (length != ECMA_STRING_NO_ASCII_SIZE)
2081 {
2082 return length;
2083 }
2084
2085 if (ECMA_IS_DIRECT_STRING (string_p))
2086 {
2087 JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2088 JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2089
2090 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT;
2091 return lit_utf8_string_length (lit_get_magic_string_ex_utf8 (id),
2092 lit_get_magic_string_ex_size (id));
2093 }
2094
2095 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2096 {
2097 return (ecma_length_t) (((ecma_utf8_string_t *) string_p)->length);
2098 }
2099
2100 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2101 {
2102 return (ecma_length_t) (((ecma_long_utf8_string_t *) string_p)->length);
2103 }
2104
2105 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2106
2107 lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
2108 return lit_utf8_string_length (lit_get_magic_string_ex_utf8 (id),
2109 lit_get_magic_string_ex_size (id));
2110 } /* ecma_string_get_length */
2111
2112 /**
2113 * Get length of UTF-8 encoded string length from ecma-string
2114 *
2115 * @return number of characters in the UTF-8 encoded string
2116 */
2117 ecma_length_t
ecma_string_get_utf8_length(const ecma_string_t * string_p)2118 ecma_string_get_utf8_length (const ecma_string_t *string_p) /**< ecma-string */
2119 {
2120 ecma_length_t length = ecma_string_get_ascii_size (string_p);
2121
2122 if (length != ECMA_STRING_NO_ASCII_SIZE)
2123 {
2124 return length;
2125 }
2126
2127 if (ECMA_IS_DIRECT_STRING (string_p))
2128 {
2129 JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2130 JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2131
2132 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT;
2133 return lit_get_utf8_length_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2134 lit_get_magic_string_ex_size (id));
2135 }
2136
2137 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2138 {
2139 ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p;
2140
2141 if (utf8_string_p->size == utf8_string_p->length)
2142 {
2143 return (ecma_length_t) (utf8_string_p->length);
2144 }
2145
2146 return lit_get_utf8_length_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), utf8_string_p->size);
2147 }
2148
2149 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2150 {
2151 ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) string_p;
2152
2153 if (long_utf8_string_p->size == long_utf8_string_p->length)
2154 {
2155 return (ecma_length_t) (long_utf8_string_p->length);
2156 }
2157
2158 return lit_get_utf8_length_of_cesu8_string (ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p),
2159 long_utf8_string_p->size);
2160 }
2161
2162 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2163
2164 lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
2165
2166 return lit_get_utf8_length_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2167 lit_get_magic_string_ex_size (id));
2168 } /* ecma_string_get_utf8_length */
2169
2170 /**
2171 * Get size of ecma-string
2172 *
2173 * @return number of bytes in the buffer needed to represent the string
2174 */
2175 lit_utf8_size_t
ecma_string_get_size(const ecma_string_t * string_p)2176 ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */
2177 {
2178 ecma_length_t length = ecma_string_get_ascii_size (string_p);
2179
2180 if (length != ECMA_STRING_NO_ASCII_SIZE)
2181 {
2182 return length;
2183 }
2184
2185 if (ECMA_IS_DIRECT_STRING (string_p))
2186 {
2187 JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2188 JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2189
2190 return lit_get_magic_string_ex_size ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT);
2191 }
2192
2193 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2194 {
2195 return (lit_utf8_size_t) (((ecma_utf8_string_t *) string_p)->size);
2196 }
2197
2198 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2199 {
2200 return (lit_utf8_size_t) (((ecma_long_utf8_string_t *) string_p)->size);
2201 }
2202
2203 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2204
2205 return lit_get_magic_string_ex_size (LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id);
2206 } /* ecma_string_get_size */
2207
2208 /**
2209 * Get the UTF-8 encoded string size from ecma-string
2210 *
2211 * @return number of bytes in the buffer needed to represent an UTF-8 encoded string
2212 */
2213 lit_utf8_size_t
ecma_string_get_utf8_size(const ecma_string_t * string_p)2214 ecma_string_get_utf8_size (const ecma_string_t *string_p) /**< ecma-string */
2215 {
2216 ecma_length_t length = ecma_string_get_ascii_size (string_p);
2217
2218 if (length != ECMA_STRING_NO_ASCII_SIZE)
2219 {
2220 return length;
2221 }
2222
2223 if (ECMA_IS_DIRECT_STRING (string_p))
2224 {
2225 JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2226 JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2227
2228 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT;
2229 return lit_get_utf8_size_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2230 lit_get_magic_string_ex_size (id));
2231 }
2232
2233 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2234 {
2235 ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p;
2236
2237 if (utf8_string_p->size == utf8_string_p->length)
2238 {
2239 return utf8_string_p->size;
2240 }
2241
2242 return lit_get_utf8_size_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), utf8_string_p->size);
2243 }
2244
2245 if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2246 {
2247 ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) string_p;
2248
2249 if (long_utf8_string_p->size == long_utf8_string_p->length)
2250 {
2251 return long_utf8_string_p->size;
2252 }
2253
2254 return lit_get_utf8_size_of_cesu8_string (ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p),
2255 long_utf8_string_p->size);
2256 }
2257
2258 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2259
2260 lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
2261 return lit_get_utf8_size_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2262 lit_get_magic_string_ex_size (id));
2263 } /* ecma_string_get_utf8_size */
2264
2265 /**
2266 * Get character from specified position in an external ecma-string.
2267 *
2268 * @return character value
2269 */
2270 static ecma_char_t JERRY_ATTR_NOINLINE
ecma_external_string_get_char_at_pos(lit_utf8_size_t id,ecma_length_t index)2271 ecma_external_string_get_char_at_pos (lit_utf8_size_t id, /**< id of the external magic string */
2272 ecma_length_t index) /**< index of character */
2273 {
2274 id -= LIT_MAGIC_STRING__COUNT;
2275 const lit_utf8_byte_t *data_p = lit_get_magic_string_ex_utf8 (id);
2276 lit_utf8_size_t size = lit_get_magic_string_ex_size (id);
2277 lit_utf8_size_t length = lit_utf8_string_length (data_p, size);
2278
2279 if (JERRY_LIKELY (size == length))
2280 {
2281 return (ecma_char_t) data_p[index];
2282 }
2283
2284 return lit_utf8_string_code_unit_at (data_p, size, index);
2285 } /* ecma_external_string_get_char_at_pos */
2286
2287 /**
2288 * Get character from specified position in the ecma-string.
2289 *
2290 * @return character value
2291 */
2292 ecma_char_t
ecma_string_get_char_at_pos(const ecma_string_t * string_p,ecma_length_t index)2293 ecma_string_get_char_at_pos (const ecma_string_t *string_p, /**< ecma-string */
2294 ecma_length_t index) /**< index of character */
2295 {
2296 JERRY_ASSERT (index < ecma_string_get_length (string_p));
2297
2298 lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
2299
2300 if (ECMA_IS_DIRECT_STRING (string_p))
2301 {
2302 switch (ECMA_GET_DIRECT_STRING_TYPE (string_p))
2303 {
2304 case ECMA_DIRECT_STRING_MAGIC:
2305 {
2306 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2307
2308 if (JERRY_LIKELY (id < LIT_MAGIC_STRING__COUNT))
2309 {
2310 /* All magic strings must be ascii strings. */
2311 const lit_utf8_byte_t *data_p = lit_get_magic_string_utf8 (id);
2312
2313 return (ecma_char_t) data_p[index];
2314 }
2315
2316 return ecma_external_string_get_char_at_pos (id, index);
2317 }
2318 default:
2319 {
2320 JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_UINT);
2321 uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2322
2323 ecma_uint32_to_utf8_string (uint32_number, uint32_to_string_buffer, ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
2324
2325 return (ecma_char_t) uint32_to_string_buffer[index];
2326 }
2327 }
2328 }
2329
2330 JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
2331
2332 switch (ECMA_STRING_GET_CONTAINER (string_p))
2333 {
2334 case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
2335 {
2336 ecma_utf8_string_t *utf8_string_desc_p = (ecma_utf8_string_t *) string_p;
2337 lit_utf8_size_t size = utf8_string_desc_p->size;
2338 const lit_utf8_byte_t *data_p = ECMA_UTF8_STRING_GET_BUFFER (string_p);
2339
2340 if (JERRY_LIKELY (size == utf8_string_desc_p->length))
2341 {
2342 return (ecma_char_t) data_p[index];
2343 }
2344
2345 return lit_utf8_string_code_unit_at (data_p, size, index);
2346 }
2347 case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
2348 {
2349 ecma_long_utf8_string_t *long_utf8_string_desc_p = (ecma_long_utf8_string_t *) string_p;
2350 lit_utf8_size_t size = long_utf8_string_desc_p->size;
2351 const lit_utf8_byte_t *data_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p);
2352
2353 if (JERRY_LIKELY (size == long_utf8_string_desc_p->length))
2354 {
2355 return (ecma_char_t) data_p[index];
2356 }
2357
2358 return lit_utf8_string_code_unit_at (data_p, size, index);
2359 }
2360 case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
2361 {
2362 const lit_utf8_byte_t *data_p = ECMA_ASCII_STRING_GET_BUFFER (string_p);
2363 return (ecma_char_t) data_p[index];
2364 }
2365 case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
2366 {
2367 ecma_uint32_to_utf8_string (string_p->u.uint32_number,
2368 uint32_to_string_buffer,
2369 ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
2370
2371 return (ecma_char_t) uint32_to_string_buffer[index];
2372 }
2373 default:
2374 {
2375 JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2376 return ecma_external_string_get_char_at_pos (string_p->u.magic_string_ex_id, index);
2377 }
2378 }
2379 } /* ecma_string_get_char_at_pos */
2380
2381 /**
2382 * Check if passed string equals to one of magic strings
2383 * and if equal magic string was found, return it's id in 'out_id_p' argument.
2384 *
2385 * @return id - if magic string equal to passed string was found,
2386 * LIT_MAGIC_STRING__COUNT - otherwise.
2387 */
2388 lit_magic_string_id_t
ecma_get_string_magic(const ecma_string_t * string_p)2389 ecma_get_string_magic (const ecma_string_t *string_p) /**< ecma-string */
2390 {
2391 if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_MAGIC))
2392 {
2393 uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2394
2395 if (id < LIT_MAGIC_STRING__COUNT)
2396 {
2397 return (lit_magic_string_id_t) id;
2398 }
2399 }
2400
2401 return LIT_MAGIC_STRING__COUNT;
2402 } /* ecma_get_string_magic */
2403
2404 /**
2405 * Try to calculate hash of the ecma-string
2406 *
2407 * @return calculated hash
2408 */
2409 inline lit_string_hash_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_hash(const ecma_string_t * string_p)2410 ecma_string_hash (const ecma_string_t *string_p) /**< ecma-string to calculate hash for */
2411 {
2412 if (ECMA_IS_DIRECT_STRING (string_p))
2413 {
2414 return (lit_string_hash_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2415 }
2416
2417 return (lit_string_hash_t) string_p->u.hash;
2418 } /* ecma_string_hash */
2419
2420 /**
2421 * Create a substring from an ecma string
2422 *
2423 * @return a newly consturcted ecma string with its value initialized to a copy of a substring of the first argument
2424 */
2425 ecma_string_t *
ecma_string_substr(const ecma_string_t * string_p,ecma_length_t start_pos,ecma_length_t end_pos)2426 ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma string */
2427 ecma_length_t start_pos, /**< start position, should be less or equal than string length */
2428 ecma_length_t end_pos) /**< end position, should be less or equal than string length */
2429 {
2430 const ecma_length_t string_length = ecma_string_get_length (string_p);
2431 JERRY_ASSERT (start_pos <= string_length);
2432 JERRY_ASSERT (end_pos <= string_length);
2433
2434 if (start_pos >= end_pos)
2435 {
2436 return ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
2437 }
2438
2439 ecma_string_t *ecma_string_p = NULL;
2440 end_pos -= start_pos;
2441
2442 ECMA_STRING_TO_UTF8_STRING (string_p, start_p, buffer_size);
2443
2444 if (string_length == buffer_size)
2445 {
2446 ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p + start_pos,
2447 (lit_utf8_size_t) end_pos);
2448 }
2449 else
2450 {
2451 while (start_pos--)
2452 {
2453 start_p += lit_get_unicode_char_size_by_utf8_first_byte (*start_p);
2454 }
2455
2456 const lit_utf8_byte_t *end_p = start_p;
2457 while (end_pos--)
2458 {
2459 end_p += lit_get_unicode_char_size_by_utf8_first_byte (*end_p);
2460 }
2461
2462 ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p, (lit_utf8_size_t) (end_p - start_p));
2463 }
2464
2465 ECMA_FINALIZE_UTF8_STRING (start_p, buffer_size);
2466
2467 return ecma_string_p;
2468 } /* ecma_string_substr */
2469
2470 /**
2471 * Helper function for trimming.
2472 *
2473 * Used by:
2474 * - ecma_string_trim
2475 * - ecma_utf8_string_to_number
2476 * - ecma_builtin_global_object_parse_int
2477 * - ecma_builtin_global_object_parse_float
2478 */
2479 void
ecma_string_trim_helper(const lit_utf8_byte_t ** utf8_str_p,lit_utf8_size_t * utf8_str_size)2480 ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] current string position */
2481 lit_utf8_size_t *utf8_str_size) /**< [in, out] size of the given string */
2482 {
2483 ecma_char_t ch;
2484 lit_utf8_size_t read_size;
2485 const lit_utf8_byte_t *nonws_start_p = *utf8_str_p + *utf8_str_size;
2486 const lit_utf8_byte_t *current_p = *utf8_str_p;
2487
2488 while (current_p < nonws_start_p)
2489 {
2490 read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
2491
2492 if (!lit_char_is_white_space (ch))
2493 {
2494 nonws_start_p = current_p;
2495 break;
2496 }
2497
2498 current_p += read_size;
2499 }
2500
2501 current_p = *utf8_str_p + *utf8_str_size;
2502
2503 while (current_p > nonws_start_p)
2504 {
2505 read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
2506
2507 if (!lit_char_is_white_space (ch))
2508 {
2509 break;
2510 }
2511
2512 current_p -= read_size;
2513 }
2514
2515 *utf8_str_p = nonws_start_p;
2516 *utf8_str_size = (lit_utf8_size_t) (current_p - nonws_start_p);
2517 } /* ecma_string_trim_helper */
2518
2519 /**
2520 * Trim leading and trailing whitespace characters from string.
2521 *
2522 * @return trimmed ecma string
2523 */
2524 ecma_string_t *
ecma_string_trim(const ecma_string_t * string_p)2525 ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string */
2526 {
2527 ecma_string_t *ret_string_p;
2528
2529 lit_utf8_size_t utf8_str_size;
2530 uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
2531 const lit_utf8_byte_t *utf8_str_p = ecma_string_get_chars (string_p, &utf8_str_size, NULL, NULL, &flags);
2532
2533 if (utf8_str_size > 0)
2534 {
2535 ecma_string_trim_helper (&utf8_str_p, &utf8_str_size);
2536 ret_string_p = ecma_new_ecma_string_from_utf8 (utf8_str_p, utf8_str_size);
2537 }
2538 else
2539 {
2540 ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
2541 }
2542
2543 if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
2544 {
2545 jmem_heap_free_block ((void *) utf8_str_p, utf8_str_size);
2546 }
2547
2548 return ret_string_p;
2549 } /* ecma_string_trim */
2550
2551 /**
2552 * Create an empty string builder
2553 *
2554 * @return new string builder
2555 */
2556 ecma_stringbuilder_t
ecma_stringbuilder_create(void)2557 ecma_stringbuilder_create (void)
2558 {
2559 const lit_utf8_size_t initial_size = sizeof (ecma_ascii_string_t);
2560 ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
2561 header_p->current_size = initial_size;
2562 #if ENABLED (JERRY_MEM_STATS)
2563 jmem_stats_allocate_string_bytes (initial_size);
2564 #endif /* ENABLED (JERRY_MEM_STATS) */
2565
2566 ecma_stringbuilder_t ret = {.header_p = header_p};
2567 return ret;
2568 } /* ecma_stringbuilder_create */
2569
2570 /**
2571 * Create a string builder from an ecma string
2572 *
2573 * @return new string builder
2574 */
2575 ecma_stringbuilder_t
ecma_stringbuilder_create_from(ecma_string_t * string_p)2576 ecma_stringbuilder_create_from (ecma_string_t *string_p) /**< ecma string */
2577 {
2578 const lit_utf8_size_t string_size = ecma_string_get_size (string_p);
2579 const lit_utf8_size_t initial_size = string_size + (lit_utf8_size_t) sizeof (ecma_ascii_string_t);
2580
2581 ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
2582 header_p->current_size = initial_size;
2583 #if ENABLED (JERRY_MEM_STATS)
2584 jmem_stats_allocate_string_bytes (initial_size);
2585 #endif /* ENABLED (JERRY_MEM_STATS) */
2586
2587 size_t copied_size = ecma_string_copy_to_cesu8_buffer (string_p,
2588 ECMA_STRINGBUILDER_STRING_PTR (header_p),
2589 string_size);
2590 JERRY_ASSERT (copied_size == string_size);
2591
2592 ecma_stringbuilder_t ret = {.header_p = header_p};
2593 return ret;
2594 } /* ecma_stringbuilder_create_from */
2595
2596 /**
2597 * Create a string builder from a raw string
2598 *
2599 * @return new string builder
2600 */
2601 ecma_stringbuilder_t
ecma_stringbuilder_create_raw(const lit_utf8_byte_t * data_p,const lit_utf8_size_t data_size)2602 ecma_stringbuilder_create_raw (const lit_utf8_byte_t *data_p, /**< pointer to data */
2603 const lit_utf8_size_t data_size) /**< size of the data */
2604 {
2605 const lit_utf8_size_t initial_size = data_size + (lit_utf8_size_t) sizeof (ecma_ascii_string_t);
2606
2607 ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
2608 header_p->current_size = initial_size;
2609 #if ENABLED (JERRY_MEM_STATS)
2610 jmem_stats_allocate_string_bytes (initial_size);
2611 #endif /* ENABLED (JERRY_MEM_STATS) */
2612
2613 memcpy (ECMA_STRINGBUILDER_STRING_PTR (header_p), data_p, data_size);
2614
2615 ecma_stringbuilder_t ret = {.header_p = header_p};
2616 return ret;
2617 } /* ecma_stringbuilder_create_raw */
2618
2619 /**
2620 * Grow the underlying buffer of a string builder
2621 *
2622 * @return pointer to the end of the data in the underlying buffer
2623 */
2624 static lit_utf8_byte_t *
ecma_stringbuilder_grow(ecma_stringbuilder_t * builder_p,lit_utf8_size_t required_size)2625 ecma_stringbuilder_grow (ecma_stringbuilder_t *builder_p, /**< string builder */
2626 lit_utf8_size_t required_size) /**< required size */
2627 {
2628 ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2629 JERRY_ASSERT (header_p != NULL);
2630
2631 const lit_utf8_size_t new_size = header_p->current_size + required_size;
2632 header_p = jmem_heap_realloc_block (header_p, header_p->current_size, new_size);
2633 header_p->current_size = new_size;
2634 builder_p->header_p = header_p;
2635
2636 #if ENABLED (JERRY_MEM_STATS)
2637 jmem_stats_allocate_string_bytes (required_size);
2638 #endif /* ENABLED (JERRY_MEM_STATS) */
2639
2640 return ((lit_utf8_byte_t *) header_p) + header_p->current_size - required_size;
2641 } /* ecma_stringbuilder_grow */
2642
2643 /**
2644 * Get the current size of the string in a string builder
2645 *
2646 * @return the size of the string data
2647 */
2648 lit_utf8_size_t
ecma_stringbuilder_get_size(ecma_stringbuilder_t * builder_p)2649 ecma_stringbuilder_get_size (ecma_stringbuilder_t *builder_p) /**< string builder */
2650 {
2651 ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2652 JERRY_ASSERT (header_p != NULL);
2653
2654 return ECMA_STRINGBUILDER_STRING_SIZE (header_p);
2655 } /* ecma_stringbuilder_get_size */
2656
2657 /**
2658 * Get pointer to the raw string data in a string builder
2659 *
2660 * @return pointer to the string data
2661 */
2662 lit_utf8_byte_t *
ecma_stringbuilder_get_data(ecma_stringbuilder_t * builder_p)2663 ecma_stringbuilder_get_data (ecma_stringbuilder_t *builder_p) /**< string builder */
2664 {
2665 ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2666 JERRY_ASSERT (header_p != NULL);
2667
2668 return ECMA_STRINGBUILDER_STRING_PTR (header_p);
2669 } /* ecma_stringbuilder_get_data */
2670
2671 /**
2672 * Revert the string builder to a smaller size
2673 */
2674 void
ecma_stringbuilder_revert(ecma_stringbuilder_t * builder_p,const lit_utf8_size_t size)2675 ecma_stringbuilder_revert (ecma_stringbuilder_t *builder_p, /**< string builder */
2676 const lit_utf8_size_t size) /**< new size */
2677 {
2678 ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2679 JERRY_ASSERT (header_p != NULL);
2680
2681 const lit_utf8_size_t new_size = size + (lit_utf8_size_t) (sizeof (ecma_ascii_string_t));
2682 JERRY_ASSERT (new_size <= header_p->current_size);
2683
2684 #if ENABLED (JERRY_MEM_STATS)
2685 jmem_stats_free_string_bytes (header_p->current_size - new_size);
2686 #endif /* ENABLED (JERRY_MEM_STATS) */
2687
2688 header_p = jmem_heap_realloc_block (header_p, header_p->current_size, new_size);
2689 header_p->current_size = new_size;
2690 builder_p->header_p = header_p;
2691 } /* ecma_stringbuilder_revert */
2692
2693 /**
2694 * Append an ecma_string_t to a string builder
2695 */
2696 void
ecma_stringbuilder_append(ecma_stringbuilder_t * builder_p,const ecma_string_t * string_p)2697 ecma_stringbuilder_append (ecma_stringbuilder_t *builder_p, /**< string builder */
2698 const ecma_string_t *string_p) /**< ecma string */
2699 {
2700 const lit_utf8_size_t string_size = ecma_string_get_size (string_p);
2701 lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, string_size);
2702
2703 size_t copied_size = ecma_string_copy_to_cesu8_buffer (string_p,
2704 dest_p,
2705 string_size);
2706 JERRY_ASSERT (copied_size == string_size);
2707 } /* ecma_stringbuilder_append */
2708
2709 /**
2710 * Append a magic string to a string builder
2711 */
2712 void
ecma_stringbuilder_append_magic(ecma_stringbuilder_t * builder_p,const lit_magic_string_id_t id)2713 ecma_stringbuilder_append_magic (ecma_stringbuilder_t *builder_p, /**< string builder */
2714 const lit_magic_string_id_t id) /**< magic string id */
2715 {
2716 const lit_utf8_size_t string_size = lit_get_magic_string_size (id);
2717 lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, string_size);
2718
2719 const lit_utf8_byte_t *string_data_p = lit_get_magic_string_utf8 (id);
2720 memcpy (dest_p, string_data_p, string_size);
2721 } /* ecma_stringbuilder_append_magic */
2722
2723 /**
2724 * Append raw string data to a string builder
2725 */
2726 void
ecma_stringbuilder_append_raw(ecma_stringbuilder_t * builder_p,const lit_utf8_byte_t * data_p,const lit_utf8_size_t data_size)2727 ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p, /**< string builder */
2728 const lit_utf8_byte_t *data_p, /**< pointer to data */
2729 const lit_utf8_size_t data_size) /**< size of the data */
2730 {
2731 lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, data_size);
2732 memcpy (dest_p, data_p, data_size);
2733 } /* ecma_stringbuilder_append_raw */
2734
2735 /**
2736 * Append an ecma_char_t to a string builder
2737 */
2738 void
ecma_stringbuilder_append_char(ecma_stringbuilder_t * builder_p,const ecma_char_t c)2739 ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
2740 const ecma_char_t c) /**< ecma char */
2741 {
2742 const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c);
2743 lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
2744
2745 lit_code_point_to_cesu8_bytes (dest_p, c);
2746 } /* ecma_stringbuilder_append_char */
2747
2748 /**
2749 * Append a single byte to a string builder
2750 */
2751 void
ecma_stringbuilder_append_byte(ecma_stringbuilder_t * builder_p,const lit_utf8_byte_t byte)2752 ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, /**< string builder */
2753 const lit_utf8_byte_t byte) /**< byte */
2754 {
2755 lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, 1);
2756 *dest_p = byte;
2757 } /* ecma_stringbuilder_append_byte */
2758
2759 /**
2760 * Finalize a string builder, returning the created string, and releasing the underlying buffer.
2761 *
2762 * Note:
2763 * The builder should no longer be used.
2764 *
2765 * @return the created string
2766 */
2767 ecma_string_t *
ecma_stringbuilder_finalize(ecma_stringbuilder_t * builder_p)2768 ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p) /**< string builder */
2769 {
2770 ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2771 JERRY_ASSERT (header_p != NULL);
2772
2773 const lit_utf8_size_t string_size = ECMA_STRINGBUILDER_STRING_SIZE (header_p);
2774 lit_utf8_byte_t *string_begin_p = ECMA_STRINGBUILDER_STRING_PTR (header_p);
2775
2776 ecma_string_t *string_p = ecma_find_special_string (string_begin_p, string_size);
2777
2778 if (JERRY_UNLIKELY (string_p != NULL))
2779 {
2780 ecma_stringbuilder_destroy (builder_p);
2781 return string_p;
2782 }
2783
2784 #ifndef JERRY_NDEBUG
2785 builder_p->header_p = NULL;
2786 #endif
2787
2788 size_t container_size = sizeof (ecma_utf8_string_t);
2789 const lit_string_hash_t hash = lit_utf8_string_calc_hash (string_begin_p, string_size);
2790 const lit_utf8_size_t length = lit_utf8_string_length (string_begin_p, string_size);
2791
2792 if (JERRY_LIKELY (string_size <= UINT16_MAX))
2793 {
2794 if (JERRY_LIKELY (length == string_size))
2795 {
2796 ecma_ascii_string_t *ascii_string_p = (ecma_ascii_string_t *) header_p;
2797 ascii_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
2798 ascii_string_p->header.u.hash = hash;
2799 ascii_string_p->size = (uint16_t) string_size;
2800
2801 return (ecma_string_t *) ascii_string_p;
2802 }
2803 }
2804 else
2805 {
2806 container_size = sizeof (ecma_long_utf8_string_t);
2807 }
2808
2809 const size_t utf8_string_size = string_size + container_size;
2810 header_p = jmem_heap_realloc_block (header_p, header_p->current_size, utf8_string_size);
2811 memmove (((lit_utf8_byte_t *) header_p + container_size),
2812 ECMA_STRINGBUILDER_STRING_PTR (header_p),
2813 string_size);
2814
2815 #if ENABLED (JERRY_MEM_STATS)
2816 jmem_stats_allocate_string_bytes (container_size - sizeof (ecma_ascii_string_t));
2817 #endif /* ENABLED (JERRY_MEM_STATS) */
2818
2819 if (JERRY_LIKELY (string_size <= UINT16_MAX))
2820 {
2821 ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) header_p;
2822
2823 utf8_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
2824 utf8_string_p->header.u.hash = hash;
2825 utf8_string_p->size = (uint16_t) string_size;
2826 utf8_string_p->length = (uint16_t) length;
2827
2828 return (ecma_string_t *) utf8_string_p;
2829 }
2830
2831 ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) header_p;
2832
2833 long_utf8_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
2834 long_utf8_string_p->header.u.hash = hash;
2835 long_utf8_string_p->size = string_size;
2836 long_utf8_string_p->length = length;
2837
2838 return (ecma_string_t *) long_utf8_string_p;
2839 } /* ecma_stringbuilder_finalize */
2840
2841 /**
2842 * Destroy a string builder that is no longer needed without creating a string from the contents.
2843 */
2844 void
ecma_stringbuilder_destroy(ecma_stringbuilder_t * builder_p)2845 ecma_stringbuilder_destroy (ecma_stringbuilder_t *builder_p) /**< string builder */
2846 {
2847 JERRY_ASSERT (builder_p->header_p != NULL);
2848 const lit_utf8_size_t size = builder_p->header_p->current_size;
2849 jmem_heap_free_block (builder_p->header_p, size);
2850
2851 #ifndef JERRY_NDEBUG
2852 builder_p->header_p = NULL;
2853 #endif
2854
2855 #if ENABLED (JERRY_MEM_STATS)
2856 jmem_stats_free_string_bytes (size);
2857 #endif /* ENABLED (JERRY_MEM_STATS) */
2858 } /* ecma_stringbuilder_destroy */
2859
2860 #if ENABLED (JERRY_ES2015)
2861 /**
2862 * AdvanceStringIndex operation
2863 *
2864 * See also:
2865 * ECMA-262 v6.0, 21.2.5.2.3
2866 *
2867 * @return uint32_t - the proper character index based on the operation
2868 */
2869 uint32_t
ecma_op_advance_string_index(ecma_string_t * str_p,uint32_t index,bool is_unicode)2870 ecma_op_advance_string_index (ecma_string_t *str_p, /**< input string */
2871 uint32_t index, /**< given character index */
2872 bool is_unicode) /**< true - if regexp object's "unicode" flag is set
2873 false - otherwise */
2874 {
2875 if (index >= UINT32_MAX - 1)
2876 {
2877 return UINT32_MAX;
2878 }
2879
2880 uint32_t next_index = index + 1;
2881
2882 if (!is_unicode)
2883 {
2884 return next_index;
2885 }
2886
2887 ecma_length_t str_len = ecma_string_get_length (str_p);
2888
2889 if (next_index >= str_len)
2890 {
2891 return next_index;
2892 }
2893
2894 ecma_char_t first = ecma_string_get_char_at_pos (str_p, index);
2895
2896 if (first < LIT_UTF16_HIGH_SURROGATE_MIN || first > LIT_UTF16_HIGH_SURROGATE_MAX)
2897 {
2898 return next_index;
2899 }
2900
2901 ecma_char_t second = ecma_string_get_char_at_pos (str_p, next_index);
2902
2903 if (second < LIT_UTF16_LOW_SURROGATE_MIN || second > LIT_UTF16_LOW_SURROGATE_MAX)
2904 {
2905 return next_index;
2906 }
2907
2908 return next_index + 1;
2909 } /* ecma_op_advance_string_index */
2910 #endif /* ENABLED (JERRY_ES2015) */
2911
2912 /**
2913 * @}
2914 * @}
2915 */
2916