• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecma-alloc.h"
17 #include "ecma-conversion.h"
18 #include "ecma-gc.h"
19 #include "ecma-globals.h"
20 #include "ecma-helpers.h"
21 #include "jrt.h"
22 #include "jrt-libc-includes.h"
23 #include "lit-char-helpers.h"
24 #include "lit-magic-strings.h"
25 
26 /** \addtogroup ecma ECMA
27  * @{
28  *
29  * \addtogroup ecmahelpers Helpers for operations with ECMA data types
30  * @{
31  */
32 
33 JERRY_STATIC_ASSERT (ECMA_STRING_CONTAINER_MASK >= ECMA_STRING_CONTAINER__MAX,
34                      ecma_string_container_types_must_be_lower_than_the_container_mask);
35 
36 JERRY_STATIC_ASSERT ((ECMA_STRING_MAX_REF | ECMA_STRING_CONTAINER_MASK | ECMA_STATIC_STRING_FLAG) == UINT32_MAX,
37                      ecma_string_ref_and_container_fields_should_fill_the_32_bit_field);
38 
39 JERRY_STATIC_ASSERT (ECMA_STRING_NOT_ARRAY_INDEX == UINT32_MAX,
40                      ecma_string_not_array_index_must_be_equal_to_uint32_max);
41 
42 JERRY_STATIC_ASSERT ((ECMA_TYPE_DIRECT_STRING & 0x1) != 0,
43                      ecma_type_direct_string_must_be_odd_number);
44 
45 JERRY_STATIC_ASSERT (LIT_MAGIC_STRING__COUNT <= ECMA_DIRECT_STRING_MAX_IMM,
46                      all_magic_strings_must_be_encoded_as_direct_string);
47 
48 JERRY_STATIC_ASSERT ((int) ECMA_DIRECT_STRING_UINT == (int) ECMA_STRING_CONTAINER_UINT32_IN_DESC,
49                      ecma_direct_and_container_types_must_match);
50 
51 JERRY_STATIC_ASSERT (ECMA_PROPERTY_NAME_TYPE_SHIFT > ECMA_VALUE_SHIFT,
52                      ecma_property_name_type_shift_must_be_greater_than_ecma_value_shift);
53 
54 JERRY_STATIC_ASSERT (sizeof (ecma_stringbuilder_header_t) <= sizeof (ecma_ascii_string_t),
55                      ecma_stringbuilder_header_must_not_be_larger_than_ecma_ascii_string);
56 
57 /**
58  * Convert a string to an unsigned 32 bit value if possible
59  *
60  * @return true if the conversion is successful
61  *         false otherwise
62  */
63 static bool
ecma_string_to_array_index(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size,uint32_t * result_p)64 ecma_string_to_array_index (const lit_utf8_byte_t *string_p, /**< utf-8 string */
65                             lit_utf8_size_t string_size, /**< string size */
66                             uint32_t *result_p) /**< [out] converted value */
67 {
68   JERRY_ASSERT (string_size > 0 && *string_p >= LIT_CHAR_0 && *string_p <= LIT_CHAR_9);
69 
70   if (*string_p == LIT_CHAR_0)
71   {
72     *result_p = 0;
73     return (string_size == 1);
74   }
75 
76   if (string_size > ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
77   {
78     return false;
79   }
80 
81   uint32_t index = 0;
82   const lit_utf8_byte_t *string_end_p = string_p + string_size;
83 
84   if (string_size == ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
85   {
86     string_end_p--;
87   }
88 
89   do
90   {
91     if (*string_p > LIT_CHAR_9 || *string_p < LIT_CHAR_0)
92     {
93       return false;
94     }
95 
96     index = (index * 10) + (uint32_t) (*string_p++ - LIT_CHAR_0);
97   }
98   while (string_p < string_end_p);
99 
100   if (string_size < ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
101   {
102     *result_p = index;
103     return true;
104   }
105 
106   /* Overflow must be checked as well when size is
107    * equal to ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32. */
108   if (*string_p > LIT_CHAR_9
109       || *string_p < LIT_CHAR_0
110       || index > (UINT32_MAX / 10)
111       || (index == (UINT32_MAX / 10) && *string_p > LIT_CHAR_5))
112   {
113     return false;
114   }
115 
116   *result_p = (index * 10) + (uint32_t) (*string_p - LIT_CHAR_0);
117   return true;
118 } /* ecma_string_to_array_index */
119 
120 /**
121  * Returns the characters and size of a string.
122  *
123  * Note:
124  *   UINT type is not supported
125  *
126  * @return byte array start - if the byte array of a string is available
127  *         NULL - otherwise
128  */
129 static const lit_utf8_byte_t *
ecma_string_get_chars_fast(const ecma_string_t * string_p,lit_utf8_size_t * size_p)130 ecma_string_get_chars_fast (const ecma_string_t *string_p, /**< ecma-string */
131                             lit_utf8_size_t *size_p) /**< [out] size of the ecma string */
132 {
133   if (ECMA_IS_DIRECT_STRING (string_p))
134   {
135     if (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC)
136     {
137       uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
138 
139       if (id >= LIT_MAGIC_STRING__COUNT)
140       {
141         id -= LIT_MAGIC_STRING__COUNT;
142 
143         *size_p = lit_get_magic_string_ex_size (id);
144         return lit_get_magic_string_ex_utf8 (id);
145       }
146 
147       *size_p = lit_get_magic_string_size (id);
148       return lit_get_magic_string_utf8 (id);
149     }
150   }
151 
152   JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
153 
154   switch (ECMA_STRING_GET_CONTAINER (string_p))
155   {
156     case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
157     {
158       *size_p = ((ecma_utf8_string_t *) string_p)->size;
159       return ECMA_UTF8_STRING_GET_BUFFER (string_p);
160     }
161     case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
162     {
163       *size_p = ((ecma_long_utf8_string_t *) string_p)->size;
164       return ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p);
165     }
166     case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
167     {
168       *size_p = ((ecma_ascii_string_t *) string_p)->size;
169       return ECMA_ASCII_STRING_GET_BUFFER (string_p);
170     }
171     default:
172     {
173       JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
174 
175       lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
176       *size_p = lit_get_magic_string_ex_size (id);
177       return lit_get_magic_string_ex_utf8 (id);
178     }
179   }
180 } /* ecma_string_get_chars_fast */
181 
182 /**
183  * Allocate new ecma-string and fill it with reference to ECMA magic string
184  *
185  * @return pointer to ecma-string descriptor
186  */
187 static ecma_string_t *
ecma_new_ecma_string_from_magic_string_ex_id(lit_magic_string_ex_id_t id)188 ecma_new_ecma_string_from_magic_string_ex_id (lit_magic_string_ex_id_t id) /**< identifier of externl magic string */
189 {
190   JERRY_ASSERT (id < lit_get_magic_string_ex_count ());
191 
192   uintptr_t string_id = (uintptr_t) (id + LIT_MAGIC_STRING__COUNT);
193 
194   if (JERRY_LIKELY (string_id <= ECMA_DIRECT_STRING_MAX_IMM))
195   {
196     return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_MAGIC, string_id);
197   }
198 
199   ecma_string_t *string_desc_p = ecma_alloc_string ();
200 
201   string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_MAGIC_STRING_EX | ECMA_STRING_REF_ONE;
202   string_desc_p->u.magic_string_ex_id = id + LIT_MAGIC_STRING__COUNT;
203 
204   return string_desc_p;
205 } /* ecma_new_ecma_string_from_magic_string_ex_id */
206 
207 #if ENABLED (JERRY_ES2015)
208 /**
209  * Allocate new ecma-string and fill it with reference to the symbol descriptor
210  *
211  * @return pointer to ecma-string descriptor
212  */
213 ecma_string_t *
ecma_new_symbol_from_descriptor_string(ecma_value_t string_desc)214 ecma_new_symbol_from_descriptor_string (ecma_value_t string_desc) /**< ecma-string */
215 {
216   JERRY_ASSERT (!ecma_is_value_symbol (string_desc));
217 
218   ecma_extended_string_t *symbol_p = ecma_alloc_extended_string ();
219   symbol_p->header.refs_and_container = ECMA_STRING_REF_ONE | ECMA_STRING_CONTAINER_SYMBOL;
220   symbol_p->u.symbol_descriptor = string_desc;
221   symbol_p->header.u.hash = (lit_string_hash_t) (((uintptr_t) symbol_p) >> ECMA_SYMBOL_HASH_SHIFT);
222   JERRY_ASSERT ((symbol_p->header.u.hash & ECMA_GLOBAL_SYMBOL_FLAG) == 0);
223 
224   return (ecma_string_t *) symbol_p;
225 } /* ecma_new_symbol_from_descriptor_string */
226 
227 /**
228  * Check whether an ecma-string contains an ecma-symbol
229  *
230  * @return true - if the ecma-string contains an ecma-symbol
231  *         false - otherwise
232  */
233 bool
ecma_prop_name_is_symbol(ecma_string_t * string_p)234 ecma_prop_name_is_symbol (ecma_string_t *string_p) /**< ecma-string */
235 {
236   JERRY_ASSERT (string_p != NULL);
237 
238   return (!ECMA_IS_DIRECT_STRING (string_p)
239           && ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_SYMBOL);
240 } /* ecma_prop_name_is_symbol */
241 #endif /* ENABLED (JERRY_ES2015) */
242 
243 /**
244  * Allocate new UTF8 ecma-string and fill it with characters from the given utf8 buffer
245  *
246  * @return pointer to ecma-string descriptor
247  */
248 static inline ecma_string_t * JERRY_ATTR_ALWAYS_INLINE
ecma_new_ecma_string_from_utf8_buffer(lit_utf8_size_t length,lit_utf8_size_t size,lit_utf8_byte_t ** data_p)249 ecma_new_ecma_string_from_utf8_buffer (lit_utf8_size_t length, /**< length of the buffer */
250                                        lit_utf8_size_t size, /**< size of the buffer */
251                                        lit_utf8_byte_t **data_p) /**< [out] pointer to the start of the string buffer */
252 {
253   if (JERRY_LIKELY (size <= UINT16_MAX))
254   {
255     if (JERRY_LIKELY (length == size))
256     {
257       ecma_ascii_string_t *string_desc_p;
258       string_desc_p = (ecma_ascii_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_ascii_string_t));
259       string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
260       string_desc_p->size = (uint16_t) size;
261 
262       *data_p = ECMA_ASCII_STRING_GET_BUFFER (string_desc_p);
263       return (ecma_string_t *) string_desc_p;
264     }
265 
266     JERRY_ASSERT (length < size);
267 
268     ecma_utf8_string_t *string_desc_p;
269     string_desc_p = (ecma_utf8_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_utf8_string_t));
270     string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
271     string_desc_p->size = (uint16_t) size;
272     string_desc_p->length = (uint16_t) length;
273 
274     *data_p = ECMA_UTF8_STRING_GET_BUFFER (string_desc_p);
275     return (ecma_string_t *) string_desc_p;
276   }
277 
278   ecma_long_utf8_string_t *string_desc_p;
279   string_desc_p = (ecma_long_utf8_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_long_utf8_string_t));
280   string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
281   string_desc_p->size = size;
282   string_desc_p->length = length;
283 
284   *data_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string_desc_p);
285   return (ecma_string_t *) string_desc_p;
286 } /* ecma_new_ecma_string_from_utf8_buffer */
287 
288 /**
289  * Checks whether a string has a special representation, that is, the string is either a magic string,
290  * an external magic string, or an uint32 number, and creates an ecma string using the special representation,
291  * if available.
292  *
293  * @return pointer to ecma string with the special representation
294  *         NULL, if there is no special representation for the string
295  */
296 ecma_string_t *
ecma_find_special_string(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size)297 ecma_find_special_string (const lit_utf8_byte_t *string_p, /**< utf8 string */
298                           lit_utf8_size_t string_size) /**< string size */
299 {
300   JERRY_ASSERT (string_p != NULL || string_size == 0);
301   lit_magic_string_id_t magic_string_id = lit_is_utf8_string_magic (string_p, string_size);
302 
303   if (magic_string_id != LIT_MAGIC_STRING__COUNT)
304   {
305     return ecma_get_magic_string (magic_string_id);
306   }
307 
308   JERRY_ASSERT (string_size > 0);
309 
310   if (*string_p >= LIT_CHAR_0 && *string_p <= LIT_CHAR_9)
311   {
312     uint32_t array_index;
313 
314     if (ecma_string_to_array_index (string_p, string_size, &array_index))
315     {
316       return ecma_new_ecma_string_from_uint32 (array_index);
317     }
318   }
319 
320   if (lit_get_magic_string_ex_count () > 0)
321   {
322     lit_magic_string_ex_id_t magic_string_ex_id = lit_is_ex_utf8_string_magic (string_p, string_size);
323 
324     if (magic_string_ex_id < lit_get_magic_string_ex_count ())
325     {
326       return ecma_new_ecma_string_from_magic_string_ex_id (magic_string_ex_id);
327     }
328   }
329 
330   return NULL;
331 } /* ecma_find_special_string */
332 
333 /**
334  * Allocate new ecma-string and fill it with characters from the utf8 string
335  *
336  * @return pointer to ecma-string descriptor
337  */
338 ecma_string_t *
ecma_new_ecma_string_from_utf8(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size)339 ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 string */
340                                 lit_utf8_size_t string_size) /**< string size */
341 {
342   JERRY_ASSERT (string_p != NULL || string_size == 0);
343   JERRY_ASSERT (lit_is_valid_cesu8_string (string_p, string_size));
344 
345   ecma_string_t *string_desc_p = ecma_find_special_string (string_p, string_size);
346 
347   if (string_desc_p != NULL)
348   {
349     return string_desc_p;
350   }
351 
352   lit_utf8_byte_t *data_p;
353   string_desc_p = ecma_new_ecma_string_from_utf8_buffer (lit_utf8_string_length (string_p, string_size),
354                                                          string_size,
355                                                          &data_p);
356 
357   string_desc_p->u.hash = lit_utf8_string_calc_hash (string_p, string_size);
358   memcpy (data_p, string_p, string_size);
359 
360   return string_desc_p;
361 } /* ecma_new_ecma_string_from_utf8 */
362 
363 static ecma_long_utf8_string_t g_literalStringCache;
364 
ecma_new_nonref_ecma_string_from_utf8(const lit_utf8_byte_t * string_p,lit_utf8_size_t size)365 ecma_string_t * ecma_new_nonref_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t size)
366 {
367   ecma_length_t length = lit_utf8_string_length (string_p, size);
368 
369   if  (JERRY_LIKELY (size <= UINT16_MAX))
370   {
371       if  (JERRY_LIKELY (length != size))
372        {
373            JERRY_ASSERT (length < size);
374            ecma_utf8_string_t *string_desc_p;
375            string_desc_p = (ecma_utf8_string_t *)&g_literalStringCache;
376            string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
377            string_desc_p->header.u.hash = lit_utf8_string_calc_hash (string_p, size);
378            string_desc_p->size = (uint16_t) size;
379            string_desc_p->length = (uint16_t) length;
380 
381            return (ecma_string_t *) string_desc_p;
382        }
383 
384        ecma_ascii_string_t *string_desc_p;
385        string_desc_p = (ecma_ascii_string_t *)&g_literalStringCache;
386        string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
387        string_desc_p->header.u.hash = lit_utf8_string_calc_hash (string_p, size);
388        string_desc_p->size = (uint16_t) size;
389 
390        return (ecma_string_t *) string_desc_p;
391   }
392 
393   ecma_long_utf8_string_t *string_desc_p;
394   string_desc_p = (ecma_long_utf8_string_t *)&g_literalStringCache;
395   string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
396   string_desc_p->header.u.hash = lit_utf8_string_calc_hash (string_p, size);
397   string_desc_p->size = size;
398   string_desc_p->length = length;
399 
400   return  (ecma_string_t *) string_desc_p;
401 }
402 
403 /**
404  * Allocate a new ecma-string and initialize it from the utf8 string argument.
405  * All 4-bytes long unicode sequences are converted into two 3-bytes long sequences.
406  *
407  * @return pointer to ecma-string descriptor
408  */
409 ecma_string_t *
ecma_new_ecma_string_from_utf8_converted_to_cesu8(const lit_utf8_byte_t * string_p,lit_utf8_size_t string_size)410 ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string_p, /**< utf-8 string */
411                                                    lit_utf8_size_t string_size) /**< utf-8 string size */
412 {
413   JERRY_ASSERT (string_p != NULL || string_size == 0);
414 
415   ecma_length_t converted_string_length = 0;
416   lit_utf8_size_t converted_string_size = 0;
417   lit_utf8_size_t pos = 0;
418 
419   /* Calculate the required length and size information of the converted cesu-8 encoded string */
420   while (pos < string_size)
421   {
422     if ((string_p[pos] & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
423     {
424       pos++;
425     }
426     else if ((string_p[pos] & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
427     {
428       pos += 2;
429     }
430     else if ((string_p[pos] & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
431     {
432       pos += 3;
433     }
434     else
435     {
436       JERRY_ASSERT ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER);
437       pos += 4;
438       converted_string_size += 2;
439       converted_string_length++;
440     }
441 
442     converted_string_length++;
443   }
444 
445   JERRY_ASSERT (pos == string_size);
446 
447   if (converted_string_size == 0)
448   {
449     return ecma_new_ecma_string_from_utf8 (string_p, string_size);
450   }
451 
452   converted_string_size += string_size;
453 
454   JERRY_ASSERT (lit_is_valid_utf8_string (string_p, string_size));
455 
456   lit_utf8_byte_t *data_p;
457   ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8_buffer (converted_string_length,
458                                                                         converted_string_size,
459                                                                         &data_p);
460 
461   const lit_utf8_byte_t *const begin_data_p = data_p;
462   pos = 0;
463 
464   while (pos < string_size)
465   {
466     if ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
467     {
468       /* Processing 4 byte unicode sequence. Always converted to two 3 byte long sequence. */
469       lit_four_byte_utf8_char_to_cesu8 (data_p, string_p + pos);
470       data_p += 3 * 2;
471       pos += 4;
472     }
473     else
474     {
475       *data_p++ = string_p[pos++];
476     }
477   }
478 
479   JERRY_ASSERT (pos == string_size);
480 
481   string_desc_p->u.hash = lit_utf8_string_calc_hash (begin_data_p, converted_string_size);
482 
483   return (ecma_string_t *) string_desc_p;
484 } /* ecma_new_ecma_string_from_utf8_converted_to_cesu8 */
485 
486 /**
487  * Allocate new ecma-string and fill it with cesu-8 character which represents specified code unit
488  *
489  * @return pointer to ecma-string descriptor
490  */
491 ecma_string_t *
ecma_new_ecma_string_from_code_unit(ecma_char_t code_unit)492 ecma_new_ecma_string_from_code_unit (ecma_char_t code_unit) /**< code unit */
493 {
494   lit_utf8_byte_t lit_utf8_bytes[LIT_UTF8_MAX_BYTES_IN_CODE_UNIT];
495   lit_utf8_size_t bytes_size = lit_code_unit_to_utf8 (code_unit, lit_utf8_bytes);
496 
497   return ecma_new_ecma_string_from_utf8 (lit_utf8_bytes, bytes_size);
498 } /* ecma_new_ecma_string_from_code_unit */
499 
500 #if ENABLED (JERRY_ES2015)
501 
502 /**
503  * Allocate new ecma-string and fill it with cesu-8 character which represents specified code units
504  *
505  * @return pointer to ecma-string descriptor
506  */
507 ecma_string_t *
ecma_new_ecma_string_from_code_units(ecma_char_t first_code_unit,ecma_char_t second_code_unit)508 ecma_new_ecma_string_from_code_units (ecma_char_t first_code_unit, /**< code unit */
509                                       ecma_char_t second_code_unit) /**< code unit */
510 {
511   lit_utf8_byte_t lit_utf8_bytes[2 * LIT_UTF8_MAX_BYTES_IN_CODE_UNIT];
512   lit_utf8_size_t bytes_size = lit_code_unit_to_utf8 (first_code_unit, lit_utf8_bytes);
513   bytes_size += lit_code_unit_to_utf8 (second_code_unit, lit_utf8_bytes + bytes_size);
514 
515   return ecma_new_ecma_string_from_utf8 (lit_utf8_bytes, bytes_size);
516 } /* ecma_new_ecma_string_from_code_units */
517 
518 #endif /* ENABLED (JERRY_ES2015) */
519 
520 /**
521  * Allocate new ecma-string and fill it with ecma-number
522  *
523  * Note: the number cannot be represented as direct string
524  *
525  * @return pointer to ecma-string descriptor
526  */
527 ecma_string_t *
ecma_new_non_direct_string_from_uint32(uint32_t uint32_number)528 ecma_new_non_direct_string_from_uint32 (uint32_t uint32_number) /**< uint32 value of the string */
529 {
530   JERRY_ASSERT (uint32_number > ECMA_DIRECT_STRING_MAX_IMM);
531 
532   ecma_string_t *string_p = ecma_alloc_string ();
533 
534   string_p->refs_and_container = ECMA_STRING_CONTAINER_UINT32_IN_DESC | ECMA_STRING_REF_ONE;
535   string_p->u.uint32_number = uint32_number;
536 
537   return string_p;
538 } /* ecma_new_non_direct_string_from_uint32 */
539 
540 /**
541  * Allocate new ecma-string and fill it with ecma-number
542  *
543  * @return pointer to ecma-string descriptor
544  */
545 ecma_string_t *
ecma_new_ecma_string_from_uint32(uint32_t uint32_number)546 ecma_new_ecma_string_from_uint32 (uint32_t uint32_number) /**< uint32 value of the string */
547 {
548   if (JERRY_LIKELY (uint32_number <= ECMA_DIRECT_STRING_MAX_IMM))
549   {
550     return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_UINT, (uintptr_t) uint32_number);
551   }
552 
553   return ecma_new_non_direct_string_from_uint32 (uint32_number);
554 } /* ecma_new_ecma_string_from_uint32 */
555 
556 /**
557  * Returns the constant assigned to the uint32 number.
558  *
559  * Note:
560  *   Calling ecma_deref_ecma_string on the returned pointer is optional.
561  *
562  * @return pointer to ecma-string descriptor
563  */
564 ecma_string_t *
ecma_get_ecma_string_from_uint32(uint32_t uint32_number)565 ecma_get_ecma_string_from_uint32 (uint32_t uint32_number) /**< input number */
566 {
567   JERRY_ASSERT (uint32_number <= ECMA_DIRECT_STRING_MAX_IMM);
568 
569   return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_UINT, (uintptr_t) uint32_number);
570 } /* ecma_get_ecma_string_from_uint32 */
571 
572 /**
573  * Allocate new ecma-string and fill it with ecma-number
574  *
575  * @return pointer to ecma-string descriptor
576  */
577 ecma_string_t *
ecma_new_ecma_string_from_number(ecma_number_t num)578 ecma_new_ecma_string_from_number (ecma_number_t num) /**< ecma-number */
579 {
580   uint32_t uint32_num = ecma_number_to_uint32 (num);
581   if (num == ((ecma_number_t) uint32_num))
582   {
583     return ecma_new_ecma_string_from_uint32 (uint32_num);
584   }
585 
586   if (ecma_number_is_nan (num))
587   {
588     return ecma_get_magic_string (LIT_MAGIC_STRING_NAN);
589   }
590 
591   if (ecma_number_is_infinity (num))
592   {
593     lit_magic_string_id_t id = (ecma_number_is_negative (num) ? LIT_MAGIC_STRING_NEGATIVE_INFINITY_UL
594                                                               : LIT_MAGIC_STRING_INFINITY_UL);
595     return ecma_get_magic_string (id);
596   }
597 
598   lit_utf8_byte_t str_buf[ECMA_MAX_CHARS_IN_STRINGIFIED_NUMBER];
599   lit_utf8_size_t str_size = ecma_number_to_utf8_string (num, str_buf, sizeof (str_buf));
600 
601   JERRY_ASSERT (str_size > 0);
602 #ifndef JERRY_NDEBUG
603   JERRY_ASSERT (lit_is_utf8_string_magic (str_buf, str_size) == LIT_MAGIC_STRING__COUNT
604                 && lit_is_ex_utf8_string_magic (str_buf, str_size) == lit_get_magic_string_ex_count ());
605 #endif /* !JERRY_NDEBUG */
606 
607   lit_utf8_byte_t *data_p;
608   ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8_buffer (lit_utf8_string_length (str_buf, str_size),
609                                                                         str_size,
610                                                                         &data_p);
611 
612   string_desc_p->u.hash = lit_utf8_string_calc_hash (str_buf, str_size);
613   memcpy (data_p, str_buf, str_size);
614 
615   return string_desc_p;
616 } /* ecma_new_ecma_string_from_number */
617 
618 /**
619  * Returns the constant assigned to the magic string id.
620  *
621  * Note:
622  *   Calling ecma_deref_ecma_string on the returned pointer is optional.
623  *
624  * @return pointer to ecma-string descriptor
625  */
626 extern inline ecma_string_t * JERRY_ATTR_ALWAYS_INLINE
ecma_get_magic_string(lit_magic_string_id_t id)627 ecma_get_magic_string (lit_magic_string_id_t id) /**< identifier of magic string */
628 {
629   JERRY_ASSERT (id < LIT_MAGIC_STRING__COUNT);
630   return (ecma_string_t *) ECMA_CREATE_DIRECT_STRING (ECMA_DIRECT_STRING_MAGIC, (uintptr_t) id);
631 } /* ecma_get_magic_string */
632 
633 /**
634  * Append a cesu8 string after an ecma-string
635  *
636  * Note:
637  *   The string1_p argument is freed. If it needs to be preserved,
638  *   call ecma_ref_ecma_string with string1_p before the call.
639  *
640  * @return concatenation of an ecma-string and a cesu8 string
641  */
642 ecma_string_t *
ecma_append_chars_to_string(ecma_string_t * string1_p,const lit_utf8_byte_t * cesu8_string2_p,lit_utf8_size_t cesu8_string2_size,lit_utf8_size_t cesu8_string2_length)643 ecma_append_chars_to_string (ecma_string_t *string1_p, /**< base ecma-string */
644                              const lit_utf8_byte_t *cesu8_string2_p, /**< characters to be appended */
645                              lit_utf8_size_t cesu8_string2_size, /**< byte size of cesu8_string2_p */
646                              lit_utf8_size_t cesu8_string2_length) /**< character length of cesu8_string2_p */
647 {
648   JERRY_ASSERT (string1_p != NULL && cesu8_string2_size > 0 && cesu8_string2_length > 0);
649 
650   if (JERRY_UNLIKELY (ecma_string_is_empty (string1_p)))
651   {
652     return ecma_new_ecma_string_from_utf8 (cesu8_string2_p, cesu8_string2_size);
653   }
654 
655   lit_utf8_size_t cesu8_string1_size;
656   lit_utf8_size_t cesu8_string1_length;
657   uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
658   lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
659 
660   const lit_utf8_byte_t *cesu8_string1_p = ecma_string_get_chars (string1_p,
661                                                                   &cesu8_string1_size,
662                                                                   &cesu8_string1_length,
663                                                                   uint32_to_string_buffer,
664                                                                   &flags);
665 
666   JERRY_ASSERT (!(flags & ECMA_STRING_FLAG_MUST_BE_FREED));
667   JERRY_ASSERT (cesu8_string1_length > 0);
668   JERRY_ASSERT (cesu8_string1_length <= cesu8_string1_size);
669 
670   lit_utf8_size_t new_size = cesu8_string1_size + cesu8_string2_size;
671 
672   /* Poor man's carry flag check: it is impossible to allocate this large string. */
673   if (new_size < (cesu8_string1_size | cesu8_string2_size))
674   {
675     jerry_fatal (ERR_OUT_OF_MEMORY);
676   }
677 
678   lit_magic_string_id_t magic_string_id;
679   magic_string_id = lit_is_utf8_string_pair_magic (cesu8_string1_p,
680                                                    cesu8_string1_size,
681                                                    cesu8_string2_p,
682                                                    cesu8_string2_size);
683 
684   if (magic_string_id != LIT_MAGIC_STRING__COUNT)
685   {
686     ecma_deref_ecma_string (string1_p);
687     return ecma_get_magic_string (magic_string_id);
688   }
689 
690   if ((flags & ECMA_STRING_FLAG_IS_UINT32) && new_size <= ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32)
691   {
692     memcpy (uint32_to_string_buffer + cesu8_string1_size, cesu8_string2_p, cesu8_string2_size);
693 
694     uint32_t array_index;
695 
696     if (ecma_string_to_array_index (uint32_to_string_buffer, new_size, &array_index))
697     {
698       ecma_deref_ecma_string (string1_p);
699       return ecma_new_ecma_string_from_uint32 (array_index);
700     }
701   }
702 
703   if (lit_get_magic_string_ex_count () > 0)
704   {
705     lit_magic_string_ex_id_t magic_string_ex_id;
706     magic_string_ex_id = lit_is_ex_utf8_string_pair_magic (cesu8_string1_p,
707                                                            cesu8_string1_size,
708                                                            cesu8_string2_p,
709                                                            cesu8_string2_size);
710 
711     if (magic_string_ex_id < lit_get_magic_string_ex_count ())
712     {
713       ecma_deref_ecma_string (string1_p);
714       return ecma_new_ecma_string_from_magic_string_ex_id (magic_string_ex_id);
715     }
716   }
717 
718   lit_utf8_byte_t *data_p;
719   ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8_buffer (cesu8_string1_length + cesu8_string2_length,
720                                                                         new_size,
721                                                                         &data_p);
722 
723   lit_string_hash_t hash_start;
724 
725   if (JERRY_UNLIKELY (flags & ECMA_STRING_FLAG_REHASH_NEEDED))
726   {
727     hash_start = lit_utf8_string_calc_hash (cesu8_string1_p, cesu8_string1_size);
728   }
729   else
730   {
731     JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (string1_p));
732     hash_start = string1_p->u.hash;
733   }
734 
735   string_desc_p->u.hash = lit_utf8_string_hash_combine (hash_start, cesu8_string2_p, cesu8_string2_size);
736 
737   memcpy (data_p, cesu8_string1_p, cesu8_string1_size);
738   memcpy (data_p + cesu8_string1_size, cesu8_string2_p, cesu8_string2_size);
739 
740   ecma_deref_ecma_string (string1_p);
741   return (ecma_string_t *) string_desc_p;
742 } /* ecma_append_chars_to_string */
743 
744 /**
745  * Concatenate ecma-strings
746  *
747  * Note:
748  *   The string1_p argument is freed. If it needs to be preserved,
749  *   call ecma_ref_ecma_string with string1_p before the call.
750  *
751  * @return concatenation of two ecma-strings
752  */
753 ecma_string_t *
ecma_concat_ecma_strings(ecma_string_t * string1_p,ecma_string_t * string2_p)754 ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
755                           ecma_string_t *string2_p) /**< second ecma-string */
756 {
757   JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
758 
759   if (JERRY_UNLIKELY (ecma_string_is_empty (string1_p)))
760   {
761     ecma_ref_ecma_string (string2_p);
762     return string2_p;
763   }
764   else if (JERRY_UNLIKELY (ecma_string_is_empty (string2_p)))
765   {
766     return string1_p;
767   }
768 
769   lit_utf8_size_t cesu8_string2_size;
770   lit_utf8_size_t cesu8_string2_length;
771   lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
772   uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
773 
774   const lit_utf8_byte_t *cesu8_string2_p = ecma_string_get_chars (string2_p,
775                                                                   &cesu8_string2_size,
776                                                                   &cesu8_string2_length,
777                                                                   uint32_to_string_buffer,
778                                                                   &flags);
779 
780   JERRY_ASSERT (cesu8_string2_p != NULL);
781 
782   ecma_string_t *result_p = ecma_append_chars_to_string (string1_p,
783                                                          cesu8_string2_p,
784                                                          cesu8_string2_size,
785                                                          cesu8_string2_length);
786 
787   JERRY_ASSERT (!(flags & ECMA_STRING_FLAG_MUST_BE_FREED));
788 
789   return result_p;
790 } /* ecma_concat_ecma_strings */
791 
792 /**
793  * Increase reference counter of ecma-string.
794  */
795 void
ecma_ref_ecma_string(ecma_string_t * string_p)796 ecma_ref_ecma_string (ecma_string_t *string_p) /**< string descriptor */
797 {
798   JERRY_ASSERT (string_p != NULL);
799 
800   if (ECMA_IS_DIRECT_STRING (string_p))
801   {
802     return;
803   }
804 
805 #ifdef JERRY_NDEBUG
806   if (ECMA_STRING_IS_STATIC (string_p))
807   {
808     return;
809   }
810 #endif /* JERRY_NDEBUG */
811 
812   JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
813 
814   if (JERRY_LIKELY (string_p->refs_and_container < ECMA_STRING_MAX_REF))
815   {
816     /* Increase reference counter. */
817     string_p->refs_and_container += ECMA_STRING_REF_ONE;
818   }
819   else
820   {
821     jerry_fatal (ERR_REF_COUNT_LIMIT);
822   }
823 } /* ecma_ref_ecma_string */
824 
825 /**
826  * Decrease reference counter and deallocate ecma-string
827  * if the counter becomes zero.
828  */
829 void
ecma_deref_ecma_string(ecma_string_t * string_p)830 ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */
831 {
832   JERRY_ASSERT (string_p != NULL);
833 
834   if (ECMA_IS_DIRECT_STRING (string_p))
835   {
836     return;
837   }
838 
839 #ifdef JERRY_NDEBUG
840   if (ECMA_STRING_IS_STATIC (string_p))
841   {
842     return;
843   }
844 #endif /* JERRY_NDEBUG */
845 
846   JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
847 
848   /* Decrease reference counter. */
849   string_p->refs_and_container -= ECMA_STRING_REF_ONE;
850 
851   if (string_p->refs_and_container >= ECMA_STRING_REF_ONE)
852   {
853     return;
854   }
855 
856   ecma_destroy_ecma_string (string_p);
857 } /* ecma_deref_ecma_string */
858 
859 /**
860  * Deallocate an ecma-string
861  */
862 void
ecma_destroy_ecma_string(ecma_string_t * string_p)863 ecma_destroy_ecma_string (ecma_string_t *string_p) /**< ecma-string */
864 {
865   JERRY_ASSERT (string_p != NULL);
866   JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (string_p));
867   JERRY_ASSERT ((string_p->refs_and_container < ECMA_STRING_REF_ONE) || ECMA_STRING_IS_STATIC (string_p));
868 
869   switch (ECMA_STRING_GET_CONTAINER (string_p))
870   {
871     case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
872     {
873       ecma_dealloc_string_buffer (string_p, ((ecma_utf8_string_t *) string_p)->size + sizeof (ecma_utf8_string_t));
874       return;
875     }
876     case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
877     {
878       ecma_dealloc_string_buffer (string_p,
879                                   ((ecma_long_utf8_string_t *) string_p)->size + sizeof (ecma_long_utf8_string_t));
880       return;
881     }
882     case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
883     {
884       ecma_dealloc_string_buffer (string_p,
885                                   ((ecma_ascii_string_t *) string_p)->size + sizeof (ecma_ascii_string_t));
886       return;
887     }
888 #if ENABLED (JERRY_ES2015)
889     case ECMA_STRING_CONTAINER_SYMBOL:
890     {
891       ecma_extended_string_t * symbol_p = (ecma_extended_string_t *) string_p;
892       ecma_free_value (symbol_p->u.symbol_descriptor);
893       ecma_dealloc_extended_string (symbol_p);
894       return;
895     }
896 #endif /* ENABLED (JERRY_ES2015) */
897     default:
898     {
899       JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC
900                     || ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
901 
902       /* only the string descriptor itself should be freed */
903       ecma_dealloc_string (string_p);
904     }
905   }
906 } /* ecma_destroy_ecma_string */
907 
908 /**
909  * Convert ecma-string to number
910  *
911  * @return converted ecma-number
912  */
913 ecma_number_t
ecma_string_to_number(const ecma_string_t * string_p)914 ecma_string_to_number (const ecma_string_t *string_p) /**< ecma-string */
915 {
916   JERRY_ASSERT (string_p != NULL);
917 
918   if (ECMA_IS_DIRECT_STRING (string_p))
919   {
920     if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_UINT))
921     {
922       return (ecma_number_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
923     }
924   }
925   else if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
926   {
927     return ((ecma_number_t) string_p->u.uint32_number);
928   }
929 
930   lit_utf8_size_t size;
931   const lit_utf8_byte_t *chars_p = ecma_string_get_chars_fast (string_p, &size);
932 
933   JERRY_ASSERT (chars_p != NULL);
934 
935   if (size == 0)
936   {
937     return ECMA_NUMBER_ZERO;
938   }
939 
940   return ecma_utf8_string_to_number (chars_p, size);
941 } /* ecma_string_to_number */
942 
943 /**
944  * Check if string is array index.
945  *
946  * @return ECMA_STRING_NOT_ARRAY_INDEX if string is not array index
947  *         the array index otherwise
948  */
949 inline uint32_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_get_array_index(const ecma_string_t * str_p)950 ecma_string_get_array_index (const ecma_string_t *str_p) /**< ecma-string */
951 {
952   if (ECMA_IS_DIRECT_STRING (str_p))
953   {
954     if (ECMA_IS_DIRECT_STRING_WITH_TYPE (str_p, ECMA_DIRECT_STRING_UINT))
955     {
956       /* Value cannot be equal to the maximum value of a 32 bit unsigned number. */
957       return (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (str_p);
958     }
959 
960     return ECMA_STRING_NOT_ARRAY_INDEX;
961   }
962 
963   if (ECMA_STRING_GET_CONTAINER (str_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
964   {
965     /* When the uint32_number is equal to the maximum value of 32 bit unsigned integer number,
966      * it is also an invalid array index. The comparison to ECMA_STRING_NOT_ARRAY_INDEX will
967      * be true in this case. */
968     return str_p->u.uint32_number;
969   }
970 
971   return ECMA_STRING_NOT_ARRAY_INDEX;
972 } /* ecma_string_get_array_index */
973 
974 /**
975  * Convert ecma-string's contents to a cesu-8 string and put it to the buffer.
976  * It is the caller's responsibility to make sure that the string fits in the buffer.
977  *
978  * @return number of bytes, actually copied to the buffer.
979  */
980 lit_utf8_size_t JERRY_ATTR_WARN_UNUSED_RESULT
ecma_string_copy_to_cesu8_buffer(const ecma_string_t * string_p,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)981 ecma_string_copy_to_cesu8_buffer (const ecma_string_t *string_p, /**< ecma-string descriptor */
982                                   lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
983                                                               * (can be NULL if buffer_size == 0) */
984                                   lit_utf8_size_t buffer_size) /**< size of buffer */
985 {
986   JERRY_ASSERT (string_p != NULL);
987   JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
988   JERRY_ASSERT (ecma_string_get_size (string_p) <= buffer_size);
989 
990   lit_utf8_size_t size;
991 
992   if (ECMA_IS_DIRECT_STRING (string_p))
993   {
994     if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_UINT))
995     {
996       uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
997       size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
998       JERRY_ASSERT (size <= buffer_size);
999       return size;
1000     }
1001   }
1002   else
1003   {
1004     JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
1005 
1006     if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1007     {
1008       uint32_t uint32_number = string_p->u.uint32_number;
1009       size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
1010       JERRY_ASSERT (size <= buffer_size);
1011       return size;
1012     }
1013   }
1014 
1015   const lit_utf8_byte_t *chars_p = ecma_string_get_chars_fast (string_p, &size);
1016 
1017   JERRY_ASSERT (chars_p != NULL);
1018   JERRY_ASSERT (size <= buffer_size);
1019 
1020   memcpy (buffer_p, chars_p, size);
1021   return size;
1022 } /* ecma_string_copy_to_cesu8_buffer */
1023 
1024 /**
1025  * Convert ecma-string's contents to an utf-8 string and put it to the buffer.
1026  * It is the caller's responsibility to make sure that the string fits in the buffer.
1027  *
1028  * @return number of bytes, actually copied to the buffer.
1029  */
1030 lit_utf8_size_t JERRY_ATTR_WARN_UNUSED_RESULT
ecma_string_copy_to_utf8_buffer(const ecma_string_t * string_p,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1031 ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_p, /**< ecma-string descriptor */
1032                                  lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1033                                                              * (can be NULL if buffer_size == 0) */
1034                                  lit_utf8_size_t buffer_size) /**< size of buffer */
1035 {
1036   JERRY_ASSERT (string_p != NULL);
1037   JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
1038   JERRY_ASSERT (ecma_string_get_utf8_size (string_p) <= buffer_size);
1039 
1040   lit_utf8_size_t size;
1041 
1042   if (ECMA_IS_DIRECT_STRING (string_p))
1043   {
1044     if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_UINT))
1045     {
1046       uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
1047       size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
1048       JERRY_ASSERT (size <= buffer_size);
1049       return size;
1050     }
1051   }
1052   else
1053   {
1054     JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
1055 
1056     if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1057     {
1058       uint32_t uint32_number = string_p->u.uint32_number;
1059       size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
1060       JERRY_ASSERT (size <= buffer_size);
1061       return size;
1062     }
1063   }
1064 
1065   uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
1066   const lit_utf8_byte_t *chars_p = ecma_string_get_chars (string_p, &size, NULL, NULL, &flags);
1067 
1068   JERRY_ASSERT (chars_p != NULL);
1069 
1070   if (flags & ECMA_STRING_FLAG_IS_ASCII)
1071   {
1072     JERRY_ASSERT (size <= buffer_size);
1073     memcpy (buffer_p, chars_p, size);
1074     return size;
1075   }
1076 
1077   size = lit_convert_cesu8_string_to_utf8_string (chars_p,
1078                                                   size,
1079                                                   buffer_p,
1080                                                   buffer_size);
1081 
1082   if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
1083   {
1084     jmem_heap_free_block ((void *) chars_p, size);
1085   }
1086 
1087   JERRY_ASSERT (size <= buffer_size);
1088   return size;
1089 } /* ecma_string_copy_to_utf8_buffer */
1090 
1091 /**
1092  * Convert ecma-string's contents to a cesu-8 string, extract the parts of the converted string between the specified
1093  * start position and the end position (or the end of the string, whichever comes first), and copy these characters
1094  * into the buffer.
1095  *
1096  * @return number of bytes, actually copied to the buffer.
1097  */
1098 lit_utf8_size_t
ecma_substring_copy_to_cesu8_buffer(const ecma_string_t * string_desc_p,ecma_length_t start_pos,ecma_length_t end_pos,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1099 ecma_substring_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
1100                                      ecma_length_t start_pos, /**< position of the first character */
1101                                      ecma_length_t end_pos, /**< position of the last character */
1102                                      lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1103                                                                  * (can be NULL if buffer_size == 0) */
1104                                      lit_utf8_size_t buffer_size) /**< size of buffer */
1105 {
1106   JERRY_ASSERT (string_desc_p != NULL);
1107   JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
1108 
1109   ecma_length_t string_length = ecma_string_get_length (string_desc_p);
1110   lit_utf8_size_t size = 0;
1111 
1112   if (start_pos >= string_length || start_pos >= end_pos)
1113   {
1114     return 0;
1115   }
1116 
1117   if (end_pos > string_length)
1118   {
1119     end_pos = string_length;
1120   }
1121 
1122   ECMA_STRING_TO_UTF8_STRING (string_desc_p, utf8_str_p, utf8_str_size);
1123 
1124   const lit_utf8_byte_t *start_p = utf8_str_p;
1125 
1126   if (string_length == utf8_str_size)
1127   {
1128     start_p += start_pos;
1129     size = end_pos - start_pos;
1130 
1131     if (size > buffer_size)
1132     {
1133       size = buffer_size;
1134     }
1135 
1136     memcpy (buffer_p, start_p, size);
1137   }
1138   else
1139   {
1140     end_pos -= start_pos;
1141     while (start_pos--)
1142     {
1143       start_p += lit_get_unicode_char_size_by_utf8_first_byte (*start_p);
1144     }
1145 
1146     const lit_utf8_byte_t *end_p = start_p;
1147 
1148     while (end_pos--)
1149     {
1150       lit_utf8_size_t code_unit_size = lit_get_unicode_char_size_by_utf8_first_byte (*end_p);
1151 
1152       if ((size + code_unit_size) > buffer_size)
1153       {
1154         break;
1155       }
1156 
1157       end_p += code_unit_size;
1158       size += code_unit_size;
1159     }
1160 
1161     memcpy (buffer_p, start_p, size);
1162   }
1163 
1164   ECMA_FINALIZE_UTF8_STRING (utf8_str_p, utf8_str_size);
1165 
1166   JERRY_ASSERT (size <= buffer_size);
1167   return size;
1168 } /* ecma_substring_copy_to_cesu8_buffer */
1169 
1170 /**
1171  * Convert ecma-string's contents to an utf-8 string, extract the parts of the converted string between the specified
1172  * start position and the end position (or the end of the string, whichever comes first), and copy these characters
1173  * into the buffer.
1174  *
1175  * @return number of bytes, actually copied to the buffer.
1176  */
1177 lit_utf8_size_t
ecma_substring_copy_to_utf8_buffer(const ecma_string_t * string_desc_p,ecma_length_t start_pos,ecma_length_t end_pos,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1178 ecma_substring_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
1179                                     ecma_length_t start_pos, /**< position of the first character */
1180                                     ecma_length_t end_pos, /**< position of the last character */
1181                                     lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1182                                                                 * (can be NULL if buffer_size == 0) */
1183                                     lit_utf8_size_t buffer_size) /**< size of buffer */
1184 {
1185   JERRY_ASSERT (string_desc_p != NULL);
1186   JERRY_ASSERT (ECMA_IS_DIRECT_STRING (string_desc_p) || string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
1187   JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
1188 
1189   lit_utf8_size_t size = 0;
1190 
1191   ecma_length_t utf8_str_length = ecma_string_get_utf8_length (string_desc_p);
1192 
1193   if (start_pos >= utf8_str_length || start_pos >= end_pos)
1194   {
1195     return 0;
1196   }
1197 
1198   if (end_pos > utf8_str_length)
1199   {
1200     end_pos = utf8_str_length;
1201   }
1202 
1203   ECMA_STRING_TO_UTF8_STRING (string_desc_p, cesu8_str_p, cesu8_str_size);
1204   ecma_length_t cesu8_str_length = ecma_string_get_length (string_desc_p);
1205 
1206   if (cesu8_str_length == cesu8_str_size)
1207   {
1208     cesu8_str_p += start_pos;
1209     size = end_pos - start_pos;
1210 
1211     if (size > buffer_size)
1212     {
1213       size = buffer_size;
1214     }
1215 
1216     memcpy (buffer_p, cesu8_str_p, size);
1217   }
1218   else
1219   {
1220     const lit_utf8_byte_t *cesu8_end_pos = cesu8_str_p + cesu8_str_size;
1221     end_pos -= start_pos;
1222 
1223     while (start_pos--)
1224     {
1225       ecma_char_t ch;
1226       lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &ch);
1227 
1228       cesu8_str_p += code_unit_size;
1229       if ((cesu8_str_p != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
1230       {
1231         ecma_char_t next_ch;
1232         lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_str_p, &next_ch);
1233         if (lit_is_code_point_utf16_low_surrogate (next_ch))
1234         {
1235           JERRY_ASSERT (code_unit_size == next_ch_size);
1236           cesu8_str_p += code_unit_size;
1237         }
1238       }
1239     }
1240 
1241     const lit_utf8_byte_t *cesu8_pos = cesu8_str_p;
1242 
1243     lit_utf8_byte_t *utf8_pos = buffer_p;
1244     lit_utf8_byte_t *utf8_end_pos = buffer_p + buffer_size;
1245 
1246     while (end_pos--)
1247     {
1248       ecma_char_t ch;
1249       lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_pos, &ch);
1250 
1251       if ((size + code_unit_size) > buffer_size)
1252       {
1253         break;
1254       }
1255 
1256       if (((cesu8_pos + code_unit_size) != cesu8_end_pos) && lit_is_code_point_utf16_high_surrogate (ch))
1257       {
1258         ecma_char_t next_ch;
1259         lit_utf8_size_t next_ch_size = lit_read_code_unit_from_utf8 (cesu8_pos + code_unit_size, &next_ch);
1260 
1261         if (lit_is_code_point_utf16_low_surrogate (next_ch))
1262         {
1263           JERRY_ASSERT (code_unit_size == next_ch_size);
1264 
1265           if ((size + code_unit_size + 1) > buffer_size)
1266           {
1267             break;
1268           }
1269 
1270           cesu8_pos += next_ch_size;
1271 
1272           lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (ch, next_ch);
1273           lit_code_point_to_utf8 (code_point, utf8_pos);
1274           size += (code_unit_size + 1);
1275         }
1276         else
1277         {
1278           memcpy (utf8_pos, cesu8_pos, code_unit_size);
1279           size += code_unit_size;
1280         }
1281       }
1282       else
1283       {
1284         memcpy (utf8_pos, cesu8_pos, code_unit_size);
1285         size += code_unit_size;
1286       }
1287 
1288       utf8_pos = buffer_p + size;
1289       cesu8_pos += code_unit_size;
1290     }
1291 
1292     JERRY_ASSERT (utf8_pos <= utf8_end_pos);
1293   }
1294 
1295   ECMA_FINALIZE_UTF8_STRING (cesu8_str_p, cesu8_str_size);
1296   JERRY_ASSERT (size <= buffer_size);
1297 
1298   return size;
1299 } /* ecma_substring_copy_to_utf8_buffer */
1300 
1301 /**
1302  * Convert ecma-string's contents to a cesu-8 string and put it to the buffer.
1303  * It is the caller's responsibility to make sure that the string fits in the buffer.
1304  * Check if the size of the string is equal with the size of the buffer.
1305  */
1306 inline void JERRY_ATTR_ALWAYS_INLINE
ecma_string_to_utf8_bytes(const ecma_string_t * string_desc_p,lit_utf8_byte_t * buffer_p,lit_utf8_size_t buffer_size)1307 ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
1308                            lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
1309                                                        * (can be NULL if buffer_size == 0) */
1310                            lit_utf8_size_t buffer_size) /**< size of buffer */
1311 {
1312   const lit_utf8_size_t size = ecma_string_copy_to_cesu8_buffer (string_desc_p, buffer_p, buffer_size);
1313   JERRY_ASSERT (size == buffer_size);
1314 } /* ecma_string_to_utf8_bytes */
1315 
1316 /**
1317  * Get size of the uint32 number stored locally in the string's descriptor
1318  *
1319  * Note: the represented number size and length are equal
1320  *
1321  * @return size in bytes
1322  */
1323 static inline ecma_length_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_get_uint32_size(const uint32_t uint32_number)1324 ecma_string_get_uint32_size (const uint32_t uint32_number) /**< number in the string-descriptor */
1325 {
1326   uint32_t prev_number = 1;
1327   uint32_t next_number = 100;
1328   ecma_length_t size = 1;
1329 
1330   const uint32_t max_size = 9;
1331 
1332   while (size < max_size && uint32_number >= next_number)
1333   {
1334     prev_number = next_number;
1335     next_number *= 100;
1336     size += 2;
1337   }
1338 
1339   if (uint32_number >= prev_number * 10)
1340   {
1341     size++;
1342   }
1343 
1344   return size;
1345 } /* ecma_string_get_uint32_size */
1346 
1347 /**
1348  * Checks whether the given string is a sequence of ascii characters.
1349  */
1350 #define ECMA_STRING_IS_ASCII(char_p, size) ((size) == lit_utf8_string_length ((char_p), (size)))
1351 
1352 /**
1353  * Returns with the cesu8 character array of a string.
1354  *
1355  * Note:
1356  *   - This function returns with a newly allocated buffer for uint32 strings,
1357  *     which must be freed if the optional uint32_buff_p parameter is NULL.
1358  *   - The ASCII check only happens if the flags parameter gets
1359  *     'ECMA_STRING_FLAG_IS_ASCII' as an input.
1360  *
1361  * @return start of cesu8 characters
1362  */
1363 const lit_utf8_byte_t *
ecma_string_get_chars(const ecma_string_t * string_p,lit_utf8_size_t * size_p,lit_utf8_size_t * length_p,lit_utf8_byte_t * uint32_buff_p,uint8_t * flags_p)1364 ecma_string_get_chars (const ecma_string_t *string_p, /**< ecma-string */
1365                        lit_utf8_size_t *size_p, /**< [out] size of the ecma string */
1366                        lit_utf8_size_t *length_p, /**< [out] optional argument. If the pointer is not NULL the pointed
1367                                                    *    memory area is filled with the length of the ecma string */
1368                        lit_utf8_byte_t *uint32_buff_p, /**< [out] optional argument. If the pointer is not NULL the
1369                                                         *    pointed memory area is filled with the string converted
1370                                                         *    uint32 string descriptor */
1371                        uint8_t *flags_p) /**< [in,out] any combination of ecma_string_flag_t bits */
1372 {
1373   ecma_length_t length;
1374   lit_utf8_size_t size;
1375   const lit_utf8_byte_t *result_p;
1376 
1377   if (ECMA_IS_DIRECT_STRING (string_p))
1378   {
1379     *flags_p |= ECMA_STRING_FLAG_REHASH_NEEDED;
1380 
1381     switch (ECMA_GET_DIRECT_STRING_TYPE (string_p))
1382     {
1383       case ECMA_DIRECT_STRING_MAGIC:
1384       {
1385         uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
1386 
1387         if (id >= LIT_MAGIC_STRING__COUNT)
1388         {
1389           id -= LIT_MAGIC_STRING__COUNT;
1390           size = lit_get_magic_string_ex_size (id);
1391           result_p = lit_get_magic_string_ex_utf8 (id);
1392           length = 0;
1393 
1394           if (JERRY_UNLIKELY (*flags_p & ECMA_STRING_FLAG_IS_ASCII))
1395           {
1396             length = lit_utf8_string_length (result_p, size);
1397           }
1398         }
1399         else
1400         {
1401           size = lit_get_magic_string_size (id);
1402           length = size;
1403 
1404           result_p = lit_get_magic_string_utf8 (id);
1405 
1406           /* All magic strings must be ascii strings. */
1407           JERRY_ASSERT (ECMA_STRING_IS_ASCII (result_p, size));
1408         }
1409         break;
1410       }
1411       default:
1412       {
1413         JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_UINT);
1414         uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
1415         size = (lit_utf8_size_t) ecma_string_get_uint32_size (uint32_number);
1416 
1417         if (uint32_buff_p != NULL)
1418         {
1419           result_p = uint32_buff_p;
1420         }
1421         else
1422         {
1423           result_p = (const lit_utf8_byte_t *) jmem_heap_alloc_block (size);
1424           *flags_p |= ECMA_STRING_FLAG_MUST_BE_FREED;
1425         }
1426 
1427         length = ecma_uint32_to_utf8_string (uint32_number, (lit_utf8_byte_t *) result_p, size);
1428 
1429         JERRY_ASSERT (length == size);
1430         *flags_p |= ECMA_STRING_FLAG_IS_UINT32;
1431         break;
1432       }
1433     }
1434   }
1435   else
1436   {
1437     JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
1438 
1439     switch (ECMA_STRING_GET_CONTAINER (string_p))
1440     {
1441       case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
1442       {
1443         ecma_utf8_string_t *utf8_string_desc_p = (ecma_utf8_string_t *) string_p;
1444         size = utf8_string_desc_p->size;
1445         length = utf8_string_desc_p->length;
1446         result_p = ECMA_UTF8_STRING_GET_BUFFER (utf8_string_desc_p);
1447         break;
1448       }
1449       case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
1450       {
1451         ecma_long_utf8_string_t *long_utf8_string_desc_p = (ecma_long_utf8_string_t *) string_p;
1452         size = long_utf8_string_desc_p->size;
1453         length = long_utf8_string_desc_p->length;
1454         result_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (long_utf8_string_desc_p);
1455         break;
1456       }
1457       case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
1458       {
1459         ecma_ascii_string_t *ascii_string_desc_p = (ecma_ascii_string_t *) string_p;
1460         size = ascii_string_desc_p->size;
1461         length = ascii_string_desc_p->size;
1462         result_p = ECMA_ASCII_STRING_GET_BUFFER (ascii_string_desc_p);
1463         break;
1464       }
1465       case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
1466       {
1467         size = (lit_utf8_size_t) ecma_string_get_uint32_size (string_p->u.uint32_number);
1468 
1469         if (uint32_buff_p != NULL)
1470         {
1471           result_p = uint32_buff_p;
1472         }
1473         else
1474         {
1475           result_p = (const lit_utf8_byte_t *) jmem_heap_alloc_block (size);
1476           *flags_p |= ECMA_STRING_FLAG_MUST_BE_FREED;
1477         }
1478 
1479         length = ecma_uint32_to_utf8_string (string_p->u.uint32_number, (lit_utf8_byte_t *) result_p, size);
1480 
1481         JERRY_ASSERT (length == size);
1482         *flags_p |= ECMA_STRING_FLAG_IS_UINT32 | ECMA_STRING_FLAG_REHASH_NEEDED;
1483         break;
1484 
1485       }
1486       default:
1487       {
1488         JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
1489 
1490         lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
1491         size = lit_get_magic_string_ex_size (id);
1492         length = 0;
1493 
1494         if (JERRY_UNLIKELY (*flags_p & ECMA_STRING_FLAG_IS_ASCII))
1495         {
1496           length = lit_utf8_string_length (lit_get_magic_string_ex_utf8 (id), size);
1497         }
1498 
1499         result_p = lit_get_magic_string_ex_utf8 (id);
1500         *flags_p |= ECMA_STRING_FLAG_REHASH_NEEDED;
1501         break;
1502       }
1503     }
1504   }
1505 
1506   *size_p = size;
1507   if (length_p != NULL)
1508   {
1509     *length_p = length;
1510   }
1511 
1512   if ((*flags_p & ECMA_STRING_FLAG_IS_ASCII)
1513       && length != size)
1514   {
1515     *flags_p = (uint8_t) (*flags_p & (uint8_t) ~ECMA_STRING_FLAG_IS_ASCII);
1516   }
1517 
1518   return result_p;
1519 } /* ecma_string_get_chars */
1520 
1521 /**
1522  * Checks whether the string equals to the magic string id.
1523  *
1524  * @return true - if the string equals to the magic string id
1525  *         false - otherwise
1526  */
1527 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_string_to_magic_id(const ecma_string_t * string_p,lit_magic_string_id_t id)1528 ecma_compare_ecma_string_to_magic_id (const ecma_string_t *string_p, /**< property name */
1529                                       lit_magic_string_id_t id) /**< magic string id */
1530 {
1531   return (string_p == ecma_get_magic_string (id));
1532 } /* ecma_compare_ecma_string_to_magic_id */
1533 
1534 /**
1535  * Checks whether ecma string is empty or not
1536  *
1537  * @return true - if the string is an empty string
1538  *         false - otherwise
1539  */
1540 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_string_is_empty(const ecma_string_t * string_p)1541 ecma_string_is_empty (const ecma_string_t *string_p) /**< ecma-string */
1542 {
1543   return ecma_compare_ecma_string_to_magic_id (string_p, LIT_MAGIC_STRING__EMPTY);
1544 } /* ecma_string_is_empty */
1545 
1546 /**
1547  * Checks whether the string equals to "length".
1548  *
1549  * @return true - if the string equals to "length"
1550  *         false - otherwise
1551  */
1552 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_string_is_length(const ecma_string_t * string_p)1553 ecma_string_is_length (const ecma_string_t *string_p) /**< property name */
1554 {
1555   return ecma_compare_ecma_string_to_magic_id (string_p, LIT_MAGIC_STRING_LENGTH);
1556 } /* ecma_string_is_length */
1557 
1558 /**
1559  * Converts a property name into a string
1560  *
1561  * @return pointer to the converted ecma string
1562  */
1563 static inline ecma_string_t * JERRY_ATTR_ALWAYS_INLINE
ecma_property_to_string(ecma_property_t property,jmem_cpointer_t prop_name_cp)1564 ecma_property_to_string (ecma_property_t property, /**< property name type */
1565                          jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1566 {
1567   uintptr_t property_string = ((uintptr_t) (property)) & (0x3 << ECMA_PROPERTY_NAME_TYPE_SHIFT);
1568   property_string = (property_string >> ECMA_STRING_TYPE_CONVERSION_SHIFT) | ECMA_TYPE_DIRECT_STRING;
1569   return (ecma_string_t *) (property_string | (((uintptr_t) prop_name_cp) << ECMA_DIRECT_STRING_SHIFT));
1570 } /* ecma_property_to_string */
1571 
1572 /**
1573  * Converts a string into a property name
1574  *
1575  * @return the compressed pointer part of the name
1576  */
1577 inline jmem_cpointer_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_to_property_name(ecma_string_t * prop_name_p,ecma_property_t * name_type_p)1578 ecma_string_to_property_name (ecma_string_t *prop_name_p, /**< property name */
1579                               ecma_property_t *name_type_p) /**< [out] property name type */
1580 {
1581   if (ECMA_IS_DIRECT_STRING (prop_name_p))
1582   {
1583     *name_type_p = (ecma_property_t) ECMA_DIRECT_STRING_TYPE_TO_PROP_NAME_TYPE (prop_name_p);
1584     return (jmem_cpointer_t) ECMA_GET_DIRECT_STRING_VALUE (prop_name_p);
1585   }
1586 
1587   *name_type_p = ECMA_DIRECT_STRING_PTR << ECMA_PROPERTY_NAME_TYPE_SHIFT;
1588 
1589   ecma_ref_ecma_string (prop_name_p);
1590 
1591   jmem_cpointer_t prop_name_cp;
1592   ECMA_SET_NON_NULL_POINTER (prop_name_cp, prop_name_p);
1593   return prop_name_cp;
1594 } /* ecma_string_to_property_name */
1595 
1596 /**
1597  * Converts a property name into a string
1598  *
1599  * @return the string pointer
1600  *         string must be released with ecma_deref_ecma_string
1601  */
1602 ecma_string_t *
ecma_string_from_property_name(ecma_property_t property,jmem_cpointer_t prop_name_cp)1603 ecma_string_from_property_name (ecma_property_t property, /**< property name type */
1604                                 jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1605 {
1606   if (ECMA_PROPERTY_GET_NAME_TYPE (property) != ECMA_DIRECT_STRING_PTR)
1607   {
1608     return ecma_property_to_string (property, prop_name_cp);
1609   }
1610 
1611   ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1612   ecma_ref_ecma_string (prop_name_p);
1613   return prop_name_p;
1614 } /* ecma_string_from_property_name */
1615 
1616 /**
1617  * Get hash code of property name
1618  *
1619  * @return hash code of property name
1620  */
1621 inline lit_string_hash_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_get_property_name_hash(ecma_property_t property,jmem_cpointer_t prop_name_cp)1622 ecma_string_get_property_name_hash (ecma_property_t property, /**< property name type */
1623                                     jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1624 {
1625   if (ECMA_PROPERTY_GET_NAME_TYPE (property) == ECMA_DIRECT_STRING_PTR)
1626   {
1627     ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1628     return prop_name_p->u.hash;
1629   }
1630 
1631   return (lit_string_hash_t) prop_name_cp;
1632 } /* ecma_string_get_property_name_hash */
1633 
1634 /**
1635  * Check if property name is array index.
1636  *
1637  * @return ECMA_STRING_NOT_ARRAY_INDEX if string is not array index
1638  *         the array index otherwise
1639  */
1640 uint32_t
ecma_string_get_property_index(ecma_property_t property,jmem_cpointer_t prop_name_cp)1641 ecma_string_get_property_index (ecma_property_t property, /**< property name type */
1642                                 jmem_cpointer_t prop_name_cp) /**< property name compressed pointer */
1643 {
1644   switch (ECMA_PROPERTY_GET_NAME_TYPE (property))
1645   {
1646     case ECMA_DIRECT_STRING_UINT:
1647     {
1648       return (uint32_t) prop_name_cp;
1649     }
1650     case ECMA_DIRECT_STRING_PTR:
1651     {
1652       ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1653       return ecma_string_get_array_index (prop_name_p);
1654     }
1655     default:
1656     {
1657       return ECMA_STRING_NOT_ARRAY_INDEX;
1658     }
1659   }
1660 } /* ecma_string_get_property_index */
1661 
1662 /**
1663  * Compare a property name to a string
1664  *
1665  * @return true if they are equals
1666  *         false otherwise
1667  */
1668 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_string_compare_to_property_name(ecma_property_t property,jmem_cpointer_t prop_name_cp,const ecma_string_t * string_p)1669 ecma_string_compare_to_property_name (ecma_property_t property, /**< property name type */
1670                                       jmem_cpointer_t prop_name_cp, /**< property name compressed pointer */
1671                                       const ecma_string_t *string_p) /**< other string */
1672 {
1673   if (ECMA_PROPERTY_GET_NAME_TYPE (property) != ECMA_DIRECT_STRING_PTR)
1674   {
1675     return ecma_property_to_string (property, prop_name_cp) == string_p;
1676   }
1677 
1678   if (ECMA_IS_DIRECT_STRING (string_p))
1679   {
1680     return false;
1681   }
1682 
1683   ecma_string_t *prop_name_p = ECMA_GET_NON_NULL_POINTER (ecma_string_t, prop_name_cp);
1684   return ecma_compare_ecma_non_direct_strings (prop_name_p, string_p);
1685 } /* ecma_string_compare_to_property_name */
1686 
1687 /**
1688  * Long path part of ecma-string to ecma-string comparison routine
1689  *
1690  * See also:
1691  *          ecma_compare_ecma_strings
1692  *
1693  * @return true - if strings are equal;
1694  *         false - otherwise
1695  */
1696 static bool JERRY_ATTR_NOINLINE
ecma_compare_ecma_strings_longpath(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1697 ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /**< ecma-string */
1698                                     const ecma_string_t *string2_p) /**< ecma-string */
1699 {
1700   JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_GET_CONTAINER (string2_p));
1701 
1702   const lit_utf8_byte_t *utf8_string1_p, *utf8_string2_p;
1703   lit_utf8_size_t utf8_string1_size, utf8_string2_size;
1704 
1705   if (JERRY_LIKELY (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING))
1706   {
1707     utf8_string1_p = ECMA_UTF8_STRING_GET_BUFFER (string1_p);
1708     utf8_string1_size = ((ecma_utf8_string_t *) string1_p)->size;
1709     utf8_string2_p = ECMA_UTF8_STRING_GET_BUFFER (string2_p);
1710     utf8_string2_size = ((ecma_utf8_string_t *) string2_p)->size;
1711 
1712   }
1713   else if (ECMA_STRING_GET_CONTAINER(string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)
1714   {
1715     utf8_string1_p = ECMA_ASCII_STRING_GET_BUFFER (string1_p);
1716     utf8_string1_size = ((ecma_ascii_string_t *) string1_p)->size;
1717     utf8_string2_p = ECMA_ASCII_STRING_GET_BUFFER (string2_p);
1718     utf8_string2_size = ((ecma_ascii_string_t *) string2_p)->size;
1719   }
1720   else
1721   {
1722     JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING);
1723 
1724     utf8_string1_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string1_p);
1725     utf8_string1_size = ((ecma_long_utf8_string_t *) string1_p)->size;
1726     utf8_string2_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string2_p);
1727     utf8_string2_size = ((ecma_long_utf8_string_t *) string2_p)->size;
1728   }
1729 
1730   if (utf8_string1_size != utf8_string2_size)
1731   {
1732     return false;
1733   }
1734 
1735   return !memcmp ((char *) utf8_string1_p, (char *) utf8_string2_p, utf8_string1_size);
1736 } /* ecma_compare_ecma_strings_longpath */
1737 
1738 /**
1739  * Compare two ecma-strings
1740  *
1741  * @return true - if strings are equal;
1742  *         false - otherwise
1743  */
1744 extern inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_strings(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1745 ecma_compare_ecma_strings (const ecma_string_t *string1_p, /**< ecma-string */
1746                            const ecma_string_t *string2_p) /**< ecma-string */
1747 {
1748   JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
1749 
1750   /* Fast paths first. */
1751   if (string1_p == string2_p)
1752   {
1753     return true;
1754   }
1755 
1756   /* Either string is direct, return with false. */
1757   if (ECMA_IS_DIRECT_STRING (((uintptr_t) string1_p) | ((uintptr_t) string2_p)))
1758   {
1759     return false;
1760   }
1761 
1762   if (string1_p->u.hash != string2_p->u.hash)
1763   {
1764     return false;
1765   }
1766 
1767   ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
1768 
1769   if (string1_container != ECMA_STRING_GET_CONTAINER (string2_p))
1770   {
1771     return false;
1772   }
1773 
1774   if (string1_container == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1775   {
1776     return true;
1777   }
1778 
1779 #if ENABLED (JERRY_ES2015)
1780   if (string1_container == ECMA_STRING_CONTAINER_SYMBOL)
1781   {
1782     return false;
1783   }
1784 #endif /* ENABLED (JERRY_ES2015) */
1785 
1786   return ecma_compare_ecma_strings_longpath (string1_p, string2_p);
1787 } /* ecma_compare_ecma_strings */
1788 
1789 static bool JERRY_ATTR_NOINLINE
ecma_compare_ecma_strings_longpath_with_literal(const ecma_string_t * string1_p,const ecma_string_t * string2_p,const lit_utf8_byte_t * chars_p)1790 ecma_compare_ecma_strings_longpath_with_literal (const ecma_string_t *string1_p, /**< ecma_string */
1791                                        const ecma_string_t *string2_p,
1792                                        const lit_utf8_byte_t *chars_p) /**< ecma_string */
1793 {
1794   JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_GET_CONTAINER (string2_p));
1795 
1796   const lit_utf8_byte_t *utf8_string2_p;
1797   lit_utf8_size_t utf8_string1_size,utf8_string2_size;
1798 
1799   if(JERRY_LIKELY (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING))
1800   {
1801      utf8_string1_size = ((ecma_utf8_string_t *) string1_p)->size;
1802      utf8_string2_p = ECMA_UTF8_STRING_GET_BUFFER (string2_p);
1803      utf8_string2_size = ((ecma_utf8_string_t *) string2_p)->size;
1804 
1805   }
1806   else if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)
1807   {
1808     utf8_string1_size = ((ecma_ascii_string_t *) string1_p)->size;
1809     utf8_string2_p = ECMA_ASCII_STRING_GET_BUFFER (string2_p);
1810     utf8_string2_size = ((ecma_ascii_string_t *) string2_p)->size;
1811   }
1812   else
1813   {
1814       JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING);
1815 
1816     utf8_string1_size = ((ecma_long_utf8_string_t *) string1_p)->size;
1817     utf8_string2_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string2_p);
1818     utf8_string2_size = ((ecma_long_utf8_string_t *) string2_p)->size;
1819   }
1820 
1821   if (utf8_string1_size != utf8_string2_size)
1822   {
1823       return false;
1824   }
1825 
1826   return !memcmp ((char *) chars_p, (char *) utf8_string2_p, utf8_string1_size);
1827 }  /* ecma_compare_ecma_strings_longpath */
1828 
1829 extern inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_strings_with_literal(const ecma_string_t * string1_p,const ecma_string_t * string2_p,const lit_utf8_byte_t * chars_p)1830 ecma_compare_ecma_strings_with_literal (const ecma_string_t *string1_p, /**< ecma-string */
1831                           const ecma_string_t *string2_p,\
1832                           const lit_utf8_byte_t *chars_p) /**< ecma-string */
1833 {
1834    JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
1835 
1836    /* Fast paths first. */
1837    if  (string1_p == string2_p)
1838    {
1839        return true;
1840    }
1841 
1842    /* Either string is direct, return with false. */
1843    if (ECMA_IS_DIRECT_STRING (((uintptr_t) string1_p) | ((uintptr_t) string2_p)))
1844    {
1845       return false;
1846    }
1847 
1848    if (string1_p->u.hash != string2_p->u.hash)
1849    {
1850       return false;
1851    }
1852 
1853    ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
1854 
1855    if (string1_container != ECMA_STRING_GET_CONTAINER (string2_p))
1856    {
1857       return false;
1858    }
1859 
1860    if (string1_container == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1861    {
1862       return true;
1863    }
1864 
1865 #if ENABLED (JERRY_ES2015)
1866   if (string1_container == ECMA_STRING_CONTAINER_SYMBOL)
1867   {
1868      return false;
1869   }
1870 #endif /* ENABLED (JERRY_ES2015)  */
1871 
1872   return ecma_compare_ecma_strings_longpath_with_literal (string1_p, string2_p, chars_p);
1873 }  /* ecma_compare_ecma_strings */
1874 
1875 /**
1876  * Compare two non-direct ecma-strings
1877  *
1878  * @return true - if strings are equal;
1879  *         false - otherwise
1880  */
1881 inline bool JERRY_ATTR_ALWAYS_INLINE
ecma_compare_ecma_non_direct_strings(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1882 ecma_compare_ecma_non_direct_strings (const ecma_string_t *string1_p, /**< ecma-string */
1883                                       const ecma_string_t *string2_p) /**< ecma-string */
1884 {
1885   JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
1886   JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (string1_p) && !ECMA_IS_DIRECT_STRING (string2_p));
1887 
1888   /* Fast paths first. */
1889   if (string1_p == string2_p)
1890   {
1891     return true;
1892   }
1893 
1894   if (string1_p->u.hash != string2_p->u.hash)
1895   {
1896     return false;
1897   }
1898 
1899   ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
1900 
1901   if (string1_container != ECMA_STRING_GET_CONTAINER (string2_p))
1902   {
1903     return false;
1904   }
1905 
1906   if (string1_container == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1907   {
1908     return true;
1909   }
1910 
1911 #if ENABLED (JERRY_ES2015)
1912   if (string1_container == ECMA_STRING_CONTAINER_SYMBOL)
1913   {
1914     return false;
1915   }
1916 #endif /* ENABLED (JERRY_ES2015) */
1917 
1918   return ecma_compare_ecma_strings_longpath (string1_p, string2_p);
1919 } /* ecma_compare_ecma_non_direct_strings */
1920 
1921 /**
1922  * Relational compare of ecma-strings.
1923  *
1924  * First string is less than second string if:
1925  *  - strings are not equal;
1926  *  - first string is prefix of second or is lexicographically less than second.
1927  *
1928  * @return true - if first string is less than second string,
1929  *         false - otherwise
1930  */
1931 bool
ecma_compare_ecma_strings_relational(const ecma_string_t * string1_p,const ecma_string_t * string2_p)1932 ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-string */
1933                                       const ecma_string_t *string2_p) /**< ecma-string */
1934 {
1935   if (ecma_compare_ecma_strings (string1_p,
1936                                  string2_p))
1937   {
1938     return false;
1939   }
1940 
1941   const lit_utf8_byte_t *utf8_string1_p, *utf8_string2_p;
1942   lit_utf8_size_t utf8_string1_size, utf8_string2_size;
1943 
1944   lit_utf8_byte_t uint32_to_string_buffer1[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
1945   lit_utf8_byte_t uint32_to_string_buffer2[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
1946 
1947   if (ECMA_IS_DIRECT_STRING (string1_p))
1948   {
1949     if (ECMA_GET_DIRECT_STRING_TYPE (string1_p) != ECMA_DIRECT_STRING_UINT)
1950     {
1951       utf8_string1_p = ecma_string_get_chars_fast (string1_p, &utf8_string1_size);
1952     }
1953     else
1954     {
1955       utf8_string1_size = ecma_uint32_to_utf8_string ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string1_p),
1956                                                       uint32_to_string_buffer1,
1957                                                       ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
1958       utf8_string1_p = uint32_to_string_buffer1;
1959     }
1960   }
1961   else
1962   {
1963     JERRY_ASSERT (string1_p->refs_and_container >= ECMA_STRING_REF_ONE);
1964 
1965     if (ECMA_STRING_GET_CONTAINER (string1_p) != ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1966     {
1967       utf8_string1_p = ecma_string_get_chars_fast (string1_p, &utf8_string1_size);
1968     }
1969     else
1970     {
1971       utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
1972                                                       uint32_to_string_buffer1,
1973                                                       ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
1974       utf8_string1_p = uint32_to_string_buffer1;
1975     }
1976   }
1977 
1978   if (ECMA_IS_DIRECT_STRING (string2_p))
1979   {
1980     if (ECMA_GET_DIRECT_STRING_TYPE (string2_p) != ECMA_DIRECT_STRING_UINT)
1981     {
1982       utf8_string2_p = ecma_string_get_chars_fast (string2_p, &utf8_string2_size);
1983     }
1984     else
1985     {
1986       utf8_string2_size = ecma_uint32_to_utf8_string ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string2_p),
1987                                                       uint32_to_string_buffer2,
1988                                                       ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
1989       utf8_string2_p = uint32_to_string_buffer2;
1990     }
1991   }
1992   else
1993   {
1994     JERRY_ASSERT (string2_p->refs_and_container >= ECMA_STRING_REF_ONE);
1995 
1996     if (ECMA_STRING_GET_CONTAINER (string2_p) != ECMA_STRING_CONTAINER_UINT32_IN_DESC)
1997     {
1998       utf8_string2_p = ecma_string_get_chars_fast (string2_p, &utf8_string2_size);
1999     }
2000     else
2001     {
2002       utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
2003                                                       uint32_to_string_buffer2,
2004                                                       ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
2005       utf8_string2_p = uint32_to_string_buffer2;
2006     }
2007   }
2008 
2009   return lit_compare_utf8_strings_relational (utf8_string1_p,
2010                                               utf8_string1_size,
2011                                               utf8_string2_p,
2012                                               utf8_string2_size);
2013 } /* ecma_compare_ecma_strings_relational */
2014 
2015 /**
2016  * Special value to represent that no size is available.
2017  */
2018 #define ECMA_STRING_NO_ASCII_SIZE 0xffffffff
2019 
2020 /**
2021  * Return the size of uint32 and magic strings.
2022  * The length of these strings are equal to their size.
2023  *
2024  * @return number of characters in the string
2025  */
2026 static ecma_length_t
ecma_string_get_ascii_size(const ecma_string_t * string_p)2027 ecma_string_get_ascii_size (const ecma_string_t *string_p) /**< ecma-string */
2028 {
2029   if (ECMA_IS_DIRECT_STRING (string_p))
2030   {
2031     switch (ECMA_GET_DIRECT_STRING_TYPE (string_p))
2032     {
2033       case ECMA_DIRECT_STRING_MAGIC:
2034       {
2035         uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2036 
2037         if (id >= LIT_MAGIC_STRING__COUNT)
2038         {
2039           return ECMA_STRING_NO_ASCII_SIZE;
2040         }
2041 
2042         JERRY_ASSERT (ECMA_STRING_IS_ASCII (lit_get_magic_string_utf8 (id),
2043                                             lit_get_magic_string_size (id)));
2044 
2045         return lit_get_magic_string_size (id);
2046       }
2047       default:
2048       {
2049         JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_UINT);
2050         uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2051         return ecma_string_get_uint32_size (uint32_number);
2052       }
2053     }
2054   }
2055 
2056   JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
2057 
2058   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC)
2059   {
2060     return ecma_string_get_uint32_size (string_p->u.uint32_number);
2061   }
2062   else if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)
2063   {
2064     return ((ecma_ascii_string_t *) string_p)->size;
2065   }
2066 
2067   return ECMA_STRING_NO_ASCII_SIZE;
2068 } /* ecma_string_get_ascii_size */
2069 
2070 /**
2071  * Get length of ecma-string
2072  *
2073  * @return number of characters in the string
2074  */
2075 ecma_length_t
ecma_string_get_length(const ecma_string_t * string_p)2076 ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */
2077 {
2078   ecma_length_t length = ecma_string_get_ascii_size (string_p);
2079 
2080   if (length != ECMA_STRING_NO_ASCII_SIZE)
2081   {
2082     return length;
2083   }
2084 
2085   if (ECMA_IS_DIRECT_STRING (string_p))
2086   {
2087     JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2088     JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2089 
2090     uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT;
2091     return lit_utf8_string_length (lit_get_magic_string_ex_utf8 (id),
2092                                    lit_get_magic_string_ex_size (id));
2093   }
2094 
2095   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2096   {
2097     return (ecma_length_t) (((ecma_utf8_string_t *) string_p)->length);
2098   }
2099 
2100   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2101   {
2102     return (ecma_length_t) (((ecma_long_utf8_string_t *) string_p)->length);
2103   }
2104 
2105   JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2106 
2107   lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
2108   return lit_utf8_string_length (lit_get_magic_string_ex_utf8 (id),
2109                                  lit_get_magic_string_ex_size (id));
2110 } /* ecma_string_get_length */
2111 
2112 /**
2113  * Get length of UTF-8 encoded string length from ecma-string
2114  *
2115  * @return number of characters in the UTF-8 encoded string
2116  */
2117 ecma_length_t
ecma_string_get_utf8_length(const ecma_string_t * string_p)2118 ecma_string_get_utf8_length (const ecma_string_t *string_p) /**< ecma-string */
2119 {
2120   ecma_length_t length = ecma_string_get_ascii_size (string_p);
2121 
2122   if (length != ECMA_STRING_NO_ASCII_SIZE)
2123   {
2124     return length;
2125   }
2126 
2127   if (ECMA_IS_DIRECT_STRING (string_p))
2128   {
2129     JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2130     JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2131 
2132     uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT;
2133     return lit_get_utf8_length_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2134                                                 lit_get_magic_string_ex_size (id));
2135   }
2136 
2137   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2138   {
2139     ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p;
2140 
2141     if (utf8_string_p->size == utf8_string_p->length)
2142     {
2143       return (ecma_length_t) (utf8_string_p->length);
2144     }
2145 
2146     return lit_get_utf8_length_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), utf8_string_p->size);
2147   }
2148 
2149   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2150   {
2151     ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) string_p;
2152 
2153     if (long_utf8_string_p->size == long_utf8_string_p->length)
2154     {
2155       return (ecma_length_t) (long_utf8_string_p->length);
2156     }
2157 
2158     return lit_get_utf8_length_of_cesu8_string (ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p),
2159                                                 long_utf8_string_p->size);
2160   }
2161 
2162   JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2163 
2164   lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
2165 
2166   return lit_get_utf8_length_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2167                                               lit_get_magic_string_ex_size (id));
2168 } /* ecma_string_get_utf8_length */
2169 
2170 /**
2171  * Get size of ecma-string
2172  *
2173  * @return number of bytes in the buffer needed to represent the string
2174  */
2175 lit_utf8_size_t
ecma_string_get_size(const ecma_string_t * string_p)2176 ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */
2177 {
2178   ecma_length_t length = ecma_string_get_ascii_size (string_p);
2179 
2180   if (length != ECMA_STRING_NO_ASCII_SIZE)
2181   {
2182     return length;
2183   }
2184 
2185   if (ECMA_IS_DIRECT_STRING (string_p))
2186   {
2187     JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2188     JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2189 
2190     return lit_get_magic_string_ex_size ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT);
2191   }
2192 
2193   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2194   {
2195     return (lit_utf8_size_t) (((ecma_utf8_string_t *) string_p)->size);
2196   }
2197 
2198   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2199   {
2200     return (lit_utf8_size_t) (((ecma_long_utf8_string_t *) string_p)->size);
2201   }
2202 
2203   JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2204 
2205   return lit_get_magic_string_ex_size (LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id);
2206 } /* ecma_string_get_size */
2207 
2208 /**
2209  * Get the UTF-8 encoded string size from ecma-string
2210  *
2211  * @return number of bytes in the buffer needed to represent an UTF-8 encoded string
2212  */
2213 lit_utf8_size_t
ecma_string_get_utf8_size(const ecma_string_t * string_p)2214 ecma_string_get_utf8_size (const ecma_string_t *string_p) /**< ecma-string */
2215 {
2216   ecma_length_t length = ecma_string_get_ascii_size (string_p);
2217 
2218   if (length != ECMA_STRING_NO_ASCII_SIZE)
2219   {
2220     return length;
2221   }
2222 
2223   if (ECMA_IS_DIRECT_STRING (string_p))
2224   {
2225     JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_MAGIC);
2226     JERRY_ASSERT ((uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) >= LIT_MAGIC_STRING__COUNT);
2227 
2228     uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p) - LIT_MAGIC_STRING__COUNT;
2229     return lit_get_utf8_size_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2230                                               lit_get_magic_string_ex_size (id));
2231   }
2232 
2233   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
2234   {
2235     ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p;
2236 
2237     if (utf8_string_p->size == utf8_string_p->length)
2238     {
2239       return utf8_string_p->size;
2240     }
2241 
2242     return lit_get_utf8_size_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), utf8_string_p->size);
2243   }
2244 
2245   if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
2246   {
2247     ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) string_p;
2248 
2249     if (long_utf8_string_p->size == long_utf8_string_p->length)
2250     {
2251       return long_utf8_string_p->size;
2252     }
2253 
2254     return lit_get_utf8_size_of_cesu8_string (ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p),
2255                                               long_utf8_string_p->size);
2256   }
2257 
2258   JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2259 
2260   lit_magic_string_ex_id_t id = LIT_MAGIC_STRING__COUNT - string_p->u.magic_string_ex_id;
2261   return lit_get_utf8_size_of_cesu8_string (lit_get_magic_string_ex_utf8 (id),
2262                                             lit_get_magic_string_ex_size (id));
2263 } /* ecma_string_get_utf8_size */
2264 
2265 /**
2266  * Get character from specified position in an external ecma-string.
2267  *
2268  * @return character value
2269  */
2270 static ecma_char_t JERRY_ATTR_NOINLINE
ecma_external_string_get_char_at_pos(lit_utf8_size_t id,ecma_length_t index)2271 ecma_external_string_get_char_at_pos (lit_utf8_size_t id, /**< id of the external magic string */
2272                                       ecma_length_t index) /**< index of character */
2273 {
2274   id -= LIT_MAGIC_STRING__COUNT;
2275   const lit_utf8_byte_t *data_p = lit_get_magic_string_ex_utf8 (id);
2276   lit_utf8_size_t size = lit_get_magic_string_ex_size (id);
2277   lit_utf8_size_t length = lit_utf8_string_length (data_p, size);
2278 
2279   if (JERRY_LIKELY (size == length))
2280   {
2281     return (ecma_char_t) data_p[index];
2282   }
2283 
2284   return lit_utf8_string_code_unit_at (data_p, size, index);
2285 } /* ecma_external_string_get_char_at_pos */
2286 
2287 /**
2288  * Get character from specified position in the ecma-string.
2289  *
2290  * @return character value
2291  */
2292 ecma_char_t
ecma_string_get_char_at_pos(const ecma_string_t * string_p,ecma_length_t index)2293 ecma_string_get_char_at_pos (const ecma_string_t *string_p, /**< ecma-string */
2294                              ecma_length_t index) /**< index of character */
2295 {
2296   JERRY_ASSERT (index < ecma_string_get_length (string_p));
2297 
2298   lit_utf8_byte_t uint32_to_string_buffer[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32];
2299 
2300   if (ECMA_IS_DIRECT_STRING (string_p))
2301   {
2302     switch (ECMA_GET_DIRECT_STRING_TYPE (string_p))
2303     {
2304       case ECMA_DIRECT_STRING_MAGIC:
2305       {
2306         uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2307 
2308         if (JERRY_LIKELY (id < LIT_MAGIC_STRING__COUNT))
2309         {
2310           /* All magic strings must be ascii strings. */
2311           const lit_utf8_byte_t *data_p = lit_get_magic_string_utf8 (id);
2312 
2313           return (ecma_char_t) data_p[index];
2314         }
2315 
2316         return ecma_external_string_get_char_at_pos (id, index);
2317       }
2318       default:
2319       {
2320         JERRY_ASSERT (ECMA_GET_DIRECT_STRING_TYPE (string_p) == ECMA_DIRECT_STRING_UINT);
2321         uint32_t uint32_number = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2322 
2323         ecma_uint32_to_utf8_string (uint32_number, uint32_to_string_buffer, ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
2324 
2325         return (ecma_char_t) uint32_to_string_buffer[index];
2326       }
2327     }
2328   }
2329 
2330   JERRY_ASSERT (string_p->refs_and_container >= ECMA_STRING_REF_ONE);
2331 
2332   switch (ECMA_STRING_GET_CONTAINER (string_p))
2333   {
2334     case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
2335     {
2336       ecma_utf8_string_t *utf8_string_desc_p = (ecma_utf8_string_t *) string_p;
2337       lit_utf8_size_t size = utf8_string_desc_p->size;
2338       const lit_utf8_byte_t *data_p = ECMA_UTF8_STRING_GET_BUFFER (string_p);
2339 
2340       if (JERRY_LIKELY (size == utf8_string_desc_p->length))
2341       {
2342         return (ecma_char_t) data_p[index];
2343       }
2344 
2345       return lit_utf8_string_code_unit_at (data_p, size, index);
2346     }
2347     case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
2348     {
2349       ecma_long_utf8_string_t *long_utf8_string_desc_p = (ecma_long_utf8_string_t *) string_p;
2350       lit_utf8_size_t size = long_utf8_string_desc_p->size;
2351       const lit_utf8_byte_t *data_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p);
2352 
2353       if (JERRY_LIKELY (size == long_utf8_string_desc_p->length))
2354       {
2355         return (ecma_char_t) data_p[index];
2356       }
2357 
2358       return lit_utf8_string_code_unit_at (data_p, size, index);
2359     }
2360     case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
2361     {
2362       const lit_utf8_byte_t *data_p = ECMA_ASCII_STRING_GET_BUFFER (string_p);
2363       return (ecma_char_t) data_p[index];
2364     }
2365     case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
2366     {
2367       ecma_uint32_to_utf8_string (string_p->u.uint32_number,
2368                                   uint32_to_string_buffer,
2369                                   ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32);
2370 
2371       return (ecma_char_t) uint32_to_string_buffer[index];
2372     }
2373     default:
2374     {
2375       JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
2376       return ecma_external_string_get_char_at_pos (string_p->u.magic_string_ex_id, index);
2377     }
2378   }
2379 } /* ecma_string_get_char_at_pos */
2380 
2381 /**
2382  * Check if passed string equals to one of magic strings
2383  * and if equal magic string was found, return it's id in 'out_id_p' argument.
2384  *
2385  * @return id - if magic string equal to passed string was found,
2386  *         LIT_MAGIC_STRING__COUNT - otherwise.
2387  */
2388 lit_magic_string_id_t
ecma_get_string_magic(const ecma_string_t * string_p)2389 ecma_get_string_magic (const ecma_string_t *string_p) /**< ecma-string */
2390 {
2391   if (ECMA_IS_DIRECT_STRING_WITH_TYPE (string_p, ECMA_DIRECT_STRING_MAGIC))
2392   {
2393     uint32_t id = (uint32_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2394 
2395     if (id < LIT_MAGIC_STRING__COUNT)
2396     {
2397       return (lit_magic_string_id_t) id;
2398     }
2399   }
2400 
2401   return LIT_MAGIC_STRING__COUNT;
2402 } /* ecma_get_string_magic */
2403 
2404 /**
2405  * Try to calculate hash of the ecma-string
2406  *
2407  * @return calculated hash
2408  */
2409 inline lit_string_hash_t JERRY_ATTR_ALWAYS_INLINE
ecma_string_hash(const ecma_string_t * string_p)2410 ecma_string_hash (const ecma_string_t *string_p) /**< ecma-string to calculate hash for */
2411 {
2412   if (ECMA_IS_DIRECT_STRING (string_p))
2413   {
2414     return (lit_string_hash_t) ECMA_GET_DIRECT_STRING_VALUE (string_p);
2415   }
2416 
2417   return (lit_string_hash_t) string_p->u.hash;
2418 } /* ecma_string_hash */
2419 
2420 /**
2421  * Create a substring from an ecma string
2422  *
2423  * @return a newly consturcted ecma string with its value initialized to a copy of a substring of the first argument
2424  */
2425 ecma_string_t *
ecma_string_substr(const ecma_string_t * string_p,ecma_length_t start_pos,ecma_length_t end_pos)2426 ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma string */
2427                     ecma_length_t start_pos, /**< start position, should be less or equal than string length */
2428                     ecma_length_t end_pos) /**< end position, should be less or equal than string length */
2429 {
2430   const ecma_length_t string_length = ecma_string_get_length (string_p);
2431   JERRY_ASSERT (start_pos <= string_length);
2432   JERRY_ASSERT (end_pos <= string_length);
2433 
2434   if (start_pos >= end_pos)
2435   {
2436     return ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
2437   }
2438 
2439   ecma_string_t *ecma_string_p = NULL;
2440   end_pos -= start_pos;
2441 
2442   ECMA_STRING_TO_UTF8_STRING (string_p, start_p, buffer_size);
2443 
2444   if (string_length == buffer_size)
2445   {
2446     ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p + start_pos,
2447                                                     (lit_utf8_size_t) end_pos);
2448   }
2449   else
2450   {
2451     while (start_pos--)
2452     {
2453       start_p += lit_get_unicode_char_size_by_utf8_first_byte (*start_p);
2454     }
2455 
2456     const lit_utf8_byte_t *end_p = start_p;
2457     while (end_pos--)
2458     {
2459       end_p += lit_get_unicode_char_size_by_utf8_first_byte (*end_p);
2460     }
2461 
2462     ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p, (lit_utf8_size_t) (end_p - start_p));
2463   }
2464 
2465   ECMA_FINALIZE_UTF8_STRING (start_p, buffer_size);
2466 
2467   return ecma_string_p;
2468 } /* ecma_string_substr */
2469 
2470 /**
2471  * Helper function for trimming.
2472  *
2473  * Used by:
2474  *        - ecma_string_trim
2475  *        - ecma_utf8_string_to_number
2476  *        - ecma_builtin_global_object_parse_int
2477  *        - ecma_builtin_global_object_parse_float
2478  */
2479 void
ecma_string_trim_helper(const lit_utf8_byte_t ** utf8_str_p,lit_utf8_size_t * utf8_str_size)2480 ecma_string_trim_helper (const lit_utf8_byte_t **utf8_str_p, /**< [in, out] current string position */
2481                          lit_utf8_size_t *utf8_str_size) /**< [in, out] size of the given string */
2482 {
2483   ecma_char_t ch;
2484   lit_utf8_size_t read_size;
2485   const lit_utf8_byte_t *nonws_start_p = *utf8_str_p + *utf8_str_size;
2486   const lit_utf8_byte_t *current_p = *utf8_str_p;
2487 
2488   while (current_p < nonws_start_p)
2489   {
2490     read_size = lit_read_code_unit_from_utf8 (current_p, &ch);
2491 
2492     if (!lit_char_is_white_space (ch))
2493     {
2494       nonws_start_p = current_p;
2495       break;
2496     }
2497 
2498     current_p += read_size;
2499   }
2500 
2501   current_p = *utf8_str_p + *utf8_str_size;
2502 
2503   while (current_p > nonws_start_p)
2504   {
2505     read_size = lit_read_prev_code_unit_from_utf8 (current_p, &ch);
2506 
2507     if (!lit_char_is_white_space (ch))
2508     {
2509       break;
2510     }
2511 
2512     current_p -= read_size;
2513   }
2514 
2515   *utf8_str_p = nonws_start_p;
2516   *utf8_str_size = (lit_utf8_size_t) (current_p - nonws_start_p);
2517 } /* ecma_string_trim_helper */
2518 
2519 /**
2520  * Trim leading and trailing whitespace characters from string.
2521  *
2522  * @return trimmed ecma string
2523  */
2524 ecma_string_t *
ecma_string_trim(const ecma_string_t * string_p)2525 ecma_string_trim (const ecma_string_t *string_p) /**< pointer to an ecma string */
2526 {
2527   ecma_string_t *ret_string_p;
2528 
2529   lit_utf8_size_t utf8_str_size;
2530   uint8_t flags = ECMA_STRING_FLAG_IS_ASCII;
2531   const lit_utf8_byte_t *utf8_str_p = ecma_string_get_chars (string_p, &utf8_str_size, NULL, NULL, &flags);
2532 
2533   if (utf8_str_size > 0)
2534   {
2535     ecma_string_trim_helper (&utf8_str_p, &utf8_str_size);
2536     ret_string_p = ecma_new_ecma_string_from_utf8 (utf8_str_p, utf8_str_size);
2537   }
2538   else
2539   {
2540     ret_string_p = ecma_get_magic_string (LIT_MAGIC_STRING__EMPTY);
2541   }
2542 
2543   if (flags & ECMA_STRING_FLAG_MUST_BE_FREED)
2544   {
2545     jmem_heap_free_block ((void *) utf8_str_p, utf8_str_size);
2546   }
2547 
2548   return ret_string_p;
2549 } /* ecma_string_trim */
2550 
2551 /**
2552  * Create an empty string builder
2553  *
2554  * @return new string builder
2555  */
2556 ecma_stringbuilder_t
ecma_stringbuilder_create(void)2557 ecma_stringbuilder_create (void)
2558 {
2559   const lit_utf8_size_t initial_size = sizeof (ecma_ascii_string_t);
2560   ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
2561   header_p->current_size = initial_size;
2562 #if ENABLED (JERRY_MEM_STATS)
2563   jmem_stats_allocate_string_bytes (initial_size);
2564 #endif /* ENABLED (JERRY_MEM_STATS) */
2565 
2566   ecma_stringbuilder_t ret = {.header_p = header_p};
2567   return ret;
2568 } /* ecma_stringbuilder_create */
2569 
2570 /**
2571  * Create a string builder from an ecma string
2572  *
2573  * @return new string builder
2574  */
2575 ecma_stringbuilder_t
ecma_stringbuilder_create_from(ecma_string_t * string_p)2576 ecma_stringbuilder_create_from (ecma_string_t *string_p) /**< ecma string */
2577 {
2578   const lit_utf8_size_t string_size = ecma_string_get_size (string_p);
2579   const lit_utf8_size_t initial_size = string_size + (lit_utf8_size_t) sizeof (ecma_ascii_string_t);
2580 
2581   ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
2582   header_p->current_size = initial_size;
2583 #if ENABLED (JERRY_MEM_STATS)
2584   jmem_stats_allocate_string_bytes (initial_size);
2585 #endif /* ENABLED (JERRY_MEM_STATS) */
2586 
2587   size_t copied_size = ecma_string_copy_to_cesu8_buffer (string_p,
2588                                                          ECMA_STRINGBUILDER_STRING_PTR (header_p),
2589                                                          string_size);
2590   JERRY_ASSERT (copied_size == string_size);
2591 
2592   ecma_stringbuilder_t ret = {.header_p = header_p};
2593   return ret;
2594 } /* ecma_stringbuilder_create_from */
2595 
2596 /**
2597  * Create a string builder from a raw string
2598  *
2599  * @return new string builder
2600  */
2601 ecma_stringbuilder_t
ecma_stringbuilder_create_raw(const lit_utf8_byte_t * data_p,const lit_utf8_size_t data_size)2602 ecma_stringbuilder_create_raw (const lit_utf8_byte_t *data_p, /**< pointer to data */
2603                                const lit_utf8_size_t data_size) /**< size of the data */
2604 {
2605   const lit_utf8_size_t initial_size = data_size + (lit_utf8_size_t) sizeof (ecma_ascii_string_t);
2606 
2607   ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
2608   header_p->current_size = initial_size;
2609 #if ENABLED (JERRY_MEM_STATS)
2610   jmem_stats_allocate_string_bytes (initial_size);
2611 #endif /* ENABLED (JERRY_MEM_STATS) */
2612 
2613   memcpy (ECMA_STRINGBUILDER_STRING_PTR (header_p), data_p, data_size);
2614 
2615   ecma_stringbuilder_t ret = {.header_p = header_p};
2616   return ret;
2617 } /* ecma_stringbuilder_create_raw */
2618 
2619 /**
2620  * Grow the underlying buffer of a string builder
2621  *
2622  * @return pointer to the end of the data in the underlying buffer
2623  */
2624 static lit_utf8_byte_t *
ecma_stringbuilder_grow(ecma_stringbuilder_t * builder_p,lit_utf8_size_t required_size)2625 ecma_stringbuilder_grow (ecma_stringbuilder_t *builder_p, /**< string builder */
2626                          lit_utf8_size_t required_size) /**< required size */
2627 {
2628   ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2629   JERRY_ASSERT (header_p != NULL);
2630 
2631   const lit_utf8_size_t new_size = header_p->current_size + required_size;
2632   header_p = jmem_heap_realloc_block (header_p, header_p->current_size, new_size);
2633   header_p->current_size = new_size;
2634   builder_p->header_p = header_p;
2635 
2636 #if ENABLED (JERRY_MEM_STATS)
2637   jmem_stats_allocate_string_bytes (required_size);
2638 #endif /* ENABLED (JERRY_MEM_STATS) */
2639 
2640   return ((lit_utf8_byte_t *)  header_p) + header_p->current_size - required_size;
2641 } /* ecma_stringbuilder_grow */
2642 
2643 /**
2644  * Get the current size of the string in a string builder
2645  *
2646  * @return the size of the string data
2647  */
2648 lit_utf8_size_t
ecma_stringbuilder_get_size(ecma_stringbuilder_t * builder_p)2649 ecma_stringbuilder_get_size (ecma_stringbuilder_t *builder_p) /**< string builder */
2650 {
2651   ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2652   JERRY_ASSERT (header_p != NULL);
2653 
2654   return ECMA_STRINGBUILDER_STRING_SIZE (header_p);
2655 } /* ecma_stringbuilder_get_size */
2656 
2657 /**
2658  * Get pointer to the raw string data in a string builder
2659  *
2660  * @return pointer to the string data
2661  */
2662 lit_utf8_byte_t *
ecma_stringbuilder_get_data(ecma_stringbuilder_t * builder_p)2663 ecma_stringbuilder_get_data (ecma_stringbuilder_t *builder_p) /**< string builder */
2664 {
2665   ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2666   JERRY_ASSERT (header_p != NULL);
2667 
2668   return ECMA_STRINGBUILDER_STRING_PTR (header_p);
2669 } /* ecma_stringbuilder_get_data */
2670 
2671 /**
2672  * Revert the string builder to a smaller size
2673  */
2674 void
ecma_stringbuilder_revert(ecma_stringbuilder_t * builder_p,const lit_utf8_size_t size)2675 ecma_stringbuilder_revert (ecma_stringbuilder_t *builder_p, /**< string builder */
2676                            const lit_utf8_size_t size) /**< new size */
2677 {
2678   ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2679   JERRY_ASSERT (header_p != NULL);
2680 
2681   const lit_utf8_size_t new_size = size + (lit_utf8_size_t) (sizeof (ecma_ascii_string_t));
2682   JERRY_ASSERT (new_size <= header_p->current_size);
2683 
2684 #if ENABLED (JERRY_MEM_STATS)
2685   jmem_stats_free_string_bytes (header_p->current_size - new_size);
2686 #endif /* ENABLED (JERRY_MEM_STATS) */
2687 
2688   header_p = jmem_heap_realloc_block (header_p, header_p->current_size, new_size);
2689   header_p->current_size = new_size;
2690   builder_p->header_p = header_p;
2691 } /* ecma_stringbuilder_revert */
2692 
2693 /**
2694  * Append an ecma_string_t to a string builder
2695  */
2696 void
ecma_stringbuilder_append(ecma_stringbuilder_t * builder_p,const ecma_string_t * string_p)2697 ecma_stringbuilder_append (ecma_stringbuilder_t *builder_p, /**< string builder */
2698                            const ecma_string_t *string_p) /**< ecma string */
2699 {
2700   const lit_utf8_size_t string_size = ecma_string_get_size (string_p);
2701   lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, string_size);
2702 
2703   size_t copied_size = ecma_string_copy_to_cesu8_buffer (string_p,
2704                                                          dest_p,
2705                                                          string_size);
2706   JERRY_ASSERT (copied_size == string_size);
2707 } /* ecma_stringbuilder_append */
2708 
2709 /**
2710  * Append a magic string to a string builder
2711  */
2712 void
ecma_stringbuilder_append_magic(ecma_stringbuilder_t * builder_p,const lit_magic_string_id_t id)2713 ecma_stringbuilder_append_magic (ecma_stringbuilder_t *builder_p, /**< string builder */
2714                                  const lit_magic_string_id_t id) /**< magic string id */
2715 {
2716   const lit_utf8_size_t string_size = lit_get_magic_string_size (id);
2717   lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, string_size);
2718 
2719   const lit_utf8_byte_t *string_data_p = lit_get_magic_string_utf8 (id);
2720   memcpy (dest_p, string_data_p, string_size);
2721 } /* ecma_stringbuilder_append_magic */
2722 
2723 /**
2724  * Append raw string data to a string builder
2725  */
2726 void
ecma_stringbuilder_append_raw(ecma_stringbuilder_t * builder_p,const lit_utf8_byte_t * data_p,const lit_utf8_size_t data_size)2727 ecma_stringbuilder_append_raw (ecma_stringbuilder_t *builder_p, /**< string builder */
2728                                const lit_utf8_byte_t *data_p, /**< pointer to data */
2729                                const lit_utf8_size_t data_size) /**< size of the data */
2730 {
2731   lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, data_size);
2732   memcpy (dest_p, data_p, data_size);
2733 } /* ecma_stringbuilder_append_raw */
2734 
2735 /**
2736  * Append an ecma_char_t to a string builder
2737  */
2738 void
ecma_stringbuilder_append_char(ecma_stringbuilder_t * builder_p,const ecma_char_t c)2739 ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
2740                                 const ecma_char_t c) /**< ecma char */
2741 {
2742   const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c);
2743   lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
2744 
2745   lit_code_point_to_cesu8_bytes (dest_p, c);
2746 } /* ecma_stringbuilder_append_char */
2747 
2748 /**
2749  * Append a single byte to a string builder
2750  */
2751 void
ecma_stringbuilder_append_byte(ecma_stringbuilder_t * builder_p,const lit_utf8_byte_t byte)2752 ecma_stringbuilder_append_byte (ecma_stringbuilder_t *builder_p, /**< string builder */
2753                                 const lit_utf8_byte_t byte) /**< byte */
2754 {
2755   lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, 1);
2756   *dest_p = byte;
2757 } /* ecma_stringbuilder_append_byte */
2758 
2759 /**
2760  * Finalize a string builder, returning the created string, and releasing the underlying buffer.
2761  *
2762  * Note:
2763  *      The builder should no longer be used.
2764  *
2765  * @return the created string
2766  */
2767 ecma_string_t *
ecma_stringbuilder_finalize(ecma_stringbuilder_t * builder_p)2768 ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p) /**< string builder */
2769 {
2770   ecma_stringbuilder_header_t *header_p = builder_p->header_p;
2771   JERRY_ASSERT (header_p != NULL);
2772 
2773   const lit_utf8_size_t string_size = ECMA_STRINGBUILDER_STRING_SIZE (header_p);
2774   lit_utf8_byte_t *string_begin_p = ECMA_STRINGBUILDER_STRING_PTR (header_p);
2775 
2776   ecma_string_t *string_p = ecma_find_special_string (string_begin_p, string_size);
2777 
2778   if (JERRY_UNLIKELY (string_p != NULL))
2779   {
2780     ecma_stringbuilder_destroy (builder_p);
2781     return string_p;
2782   }
2783 
2784 #ifndef JERRY_NDEBUG
2785   builder_p->header_p = NULL;
2786 #endif
2787 
2788   size_t container_size = sizeof (ecma_utf8_string_t);
2789   const lit_string_hash_t hash = lit_utf8_string_calc_hash (string_begin_p, string_size);
2790   const lit_utf8_size_t length = lit_utf8_string_length (string_begin_p, string_size);
2791 
2792   if (JERRY_LIKELY (string_size <= UINT16_MAX))
2793   {
2794     if (JERRY_LIKELY (length == string_size))
2795     {
2796       ecma_ascii_string_t *ascii_string_p = (ecma_ascii_string_t *) header_p;
2797       ascii_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
2798       ascii_string_p->header.u.hash = hash;
2799       ascii_string_p->size = (uint16_t) string_size;
2800 
2801       return (ecma_string_t *) ascii_string_p;
2802     }
2803   }
2804   else
2805   {
2806     container_size = sizeof (ecma_long_utf8_string_t);
2807   }
2808 
2809   const size_t utf8_string_size = string_size + container_size;
2810   header_p = jmem_heap_realloc_block (header_p, header_p->current_size, utf8_string_size);
2811   memmove (((lit_utf8_byte_t *) header_p + container_size),
2812            ECMA_STRINGBUILDER_STRING_PTR (header_p),
2813            string_size);
2814 
2815 #if ENABLED (JERRY_MEM_STATS)
2816   jmem_stats_allocate_string_bytes (container_size - sizeof (ecma_ascii_string_t));
2817 #endif /* ENABLED (JERRY_MEM_STATS) */
2818 
2819   if (JERRY_LIKELY (string_size <= UINT16_MAX))
2820   {
2821     ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) header_p;
2822 
2823     utf8_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
2824     utf8_string_p->header.u.hash = hash;
2825     utf8_string_p->size = (uint16_t) string_size;
2826     utf8_string_p->length = (uint16_t) length;
2827 
2828     return (ecma_string_t *) utf8_string_p;
2829   }
2830 
2831   ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) header_p;
2832 
2833   long_utf8_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
2834   long_utf8_string_p->header.u.hash = hash;
2835   long_utf8_string_p->size = string_size;
2836   long_utf8_string_p->length = length;
2837 
2838   return (ecma_string_t *) long_utf8_string_p;
2839 } /* ecma_stringbuilder_finalize */
2840 
2841 /**
2842  * Destroy a string builder that is no longer needed without creating a string from the contents.
2843  */
2844 void
ecma_stringbuilder_destroy(ecma_stringbuilder_t * builder_p)2845 ecma_stringbuilder_destroy (ecma_stringbuilder_t *builder_p) /**< string builder */
2846 {
2847   JERRY_ASSERT (builder_p->header_p != NULL);
2848   const lit_utf8_size_t size = builder_p->header_p->current_size;
2849   jmem_heap_free_block (builder_p->header_p, size);
2850 
2851 #ifndef JERRY_NDEBUG
2852   builder_p->header_p = NULL;
2853 #endif
2854 
2855 #if ENABLED (JERRY_MEM_STATS)
2856   jmem_stats_free_string_bytes (size);
2857 #endif /* ENABLED (JERRY_MEM_STATS) */
2858 } /* ecma_stringbuilder_destroy */
2859 
2860 #if ENABLED (JERRY_ES2015)
2861 /**
2862  * AdvanceStringIndex operation
2863  *
2864  * See also:
2865  *          ECMA-262 v6.0, 21.2.5.2.3
2866  *
2867  * @return uint32_t - the proper character index based on the operation
2868  */
2869 uint32_t
ecma_op_advance_string_index(ecma_string_t * str_p,uint32_t index,bool is_unicode)2870 ecma_op_advance_string_index (ecma_string_t *str_p, /**< input string */
2871                               uint32_t index, /**< given character index */
2872                               bool is_unicode) /**< true - if regexp object's "unicode" flag is set
2873                                                     false - otherwise */
2874 {
2875   if (index >= UINT32_MAX - 1)
2876   {
2877     return UINT32_MAX;
2878   }
2879 
2880   uint32_t next_index = index + 1;
2881 
2882   if (!is_unicode)
2883   {
2884     return next_index;
2885   }
2886 
2887   ecma_length_t str_len = ecma_string_get_length (str_p);
2888 
2889   if (next_index >= str_len)
2890   {
2891     return next_index;
2892   }
2893 
2894   ecma_char_t first = ecma_string_get_char_at_pos (str_p, index);
2895 
2896   if (first < LIT_UTF16_HIGH_SURROGATE_MIN || first > LIT_UTF16_HIGH_SURROGATE_MAX)
2897   {
2898     return next_index;
2899   }
2900 
2901   ecma_char_t second = ecma_string_get_char_at_pos (str_p, next_index);
2902 
2903   if (second < LIT_UTF16_LOW_SURROGATE_MIN || second > LIT_UTF16_LOW_SURROGATE_MAX)
2904   {
2905     return next_index;
2906   }
2907 
2908   return next_index + 1;
2909 } /* ecma_op_advance_string_index */
2910 #endif /* ENABLED (JERRY_ES2015) */
2911 
2912 /**
2913  * @}
2914  * @}
2915  */
2916