• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecma-helpers.h"
17 #include "lit-strings.h"
18 #include "ecma-init-finalize.h"
19 
20 #include "test-common.h"
21 
22 /* Iterations count. */
23 #define test_iters (1024)
24 
25 /* Sub iterations count. */
26 #define test_subiters (128)
27 
28 /* Max bytes in string. */
29 #define max_bytes_in_string (65 * 1024)
30 #define max_code_units_in_string (max_bytes_in_string)
31 
32 typedef enum
33 {
34   CESU8_ANY_SIZE,
35   CESU8_ONE_BYTE,
36   CESU8_TWO_BYTES,
37   CESU8_THREE_BYTES,
38 } utf8_char_size;
39 
40 static lit_utf8_size_t
generate_cesu8_char(utf8_char_size char_size,lit_utf8_byte_t * buf)41 generate_cesu8_char (utf8_char_size char_size,
42                      lit_utf8_byte_t *buf)
43 {
44   TEST_ASSERT (char_size >= 0 && char_size <= LIT_CESU8_MAX_BYTES_IN_CODE_UNIT);
45   lit_code_point_t code_point = (lit_code_point_t) rand ();
46 
47   if (char_size == 1)
48   {
49     code_point %= LIT_UTF8_1_BYTE_CODE_POINT_MAX;
50   }
51   else if (char_size == 2)
52   {
53     code_point = LIT_UTF8_2_BYTE_CODE_POINT_MIN + code_point % (LIT_UTF8_2_BYTE_CODE_POINT_MAX -
54                                                                 LIT_UTF8_2_BYTE_CODE_POINT_MIN);
55   }
56   else if (char_size == 3)
57   {
58     code_point = LIT_UTF8_3_BYTE_CODE_POINT_MIN + code_point % (LIT_UTF8_3_BYTE_CODE_POINT_MAX -
59                                                                 LIT_UTF8_3_BYTE_CODE_POINT_MIN);
60   }
61   else
62   {
63     code_point %= LIT_UTF8_3_BYTE_CODE_POINT_MAX;
64   }
65 
66   if (code_point >= LIT_UTF16_HIGH_SURROGATE_MIN
67       && code_point <= LIT_UTF16_LOW_SURROGATE_MAX)
68   {
69     code_point = LIT_UTF16_HIGH_SURROGATE_MIN - 1;
70   }
71 
72   return lit_code_unit_to_utf8 ((ecma_char_t) code_point, buf);
73 } /* generate_cesu8_char */
74 
75 static ecma_length_t
generate_cesu8_string(lit_utf8_byte_t * buf_p,lit_utf8_size_t buf_size)76 generate_cesu8_string (lit_utf8_byte_t *buf_p,
77                        lit_utf8_size_t buf_size)
78 {
79   ecma_length_t length = 0;
80 
81   lit_utf8_size_t size = 0;
82   while (size  < buf_size)
83   {
84     const utf8_char_size char_size = (((buf_size - size) > LIT_CESU8_MAX_BYTES_IN_CODE_UNIT)
85                                       ? CESU8_ANY_SIZE
86                                       : (utf8_char_size) (buf_size - size));
87 
88     lit_utf8_size_t bytes_generated = generate_cesu8_char (char_size, buf_p);
89 
90     TEST_ASSERT (lit_is_valid_cesu8_string (buf_p, bytes_generated));
91 
92     size += bytes_generated;
93     buf_p += bytes_generated;
94     length++;
95   }
96 
97   TEST_ASSERT (size == buf_size);
98 
99   return length;
100 } /* generate_cesu8_string */
101 
102 int
main(void)103 main (void)
104 {
105   TEST_INIT ();
106 
107   jmem_init ();
108   ecma_init ();
109 
110   lit_utf8_byte_t cesu8_string[max_bytes_in_string];
111   ecma_char_t code_units[max_code_units_in_string];
112   const lit_utf8_byte_t *saved_positions[max_code_units_in_string];
113 
114   for (int i = 0; i < test_iters; i++)
115   {
116     lit_utf8_size_t cesu8_string_size = (i == 0) ? 0 : (lit_utf8_size_t) (rand () % max_bytes_in_string);
117     ecma_length_t length = generate_cesu8_string (cesu8_string, cesu8_string_size);
118 
119     ecma_string_t *char_collection_string_p = ecma_new_ecma_string_from_utf8 (cesu8_string, cesu8_string_size);
120     ecma_length_t char_collection_len = ecma_string_get_length (char_collection_string_p);
121     TEST_ASSERT (char_collection_len == length);
122     ecma_deref_ecma_string (char_collection_string_p);
123 
124     TEST_ASSERT (lit_utf8_string_length (cesu8_string, cesu8_string_size) == length);
125 
126     const lit_utf8_byte_t *curr_p = cesu8_string;
127     const lit_utf8_byte_t *end_p = cesu8_string + cesu8_string_size;
128 
129     ecma_length_t calculated_length = 0;
130     ecma_length_t code_units_count = 0;
131 
132     while (curr_p < end_p)
133     {
134       code_units[code_units_count] = lit_cesu8_peek_next (curr_p);
135       saved_positions[code_units_count] = curr_p;
136       code_units_count++;
137       calculated_length++;
138 
139       lit_utf8_incr (&curr_p);
140     }
141 
142     TEST_ASSERT (length == calculated_length);
143 
144     if (code_units_count > 0)
145     {
146       for (int j = 0; j < test_subiters; j++)
147       {
148         ecma_length_t index = (ecma_length_t) rand () % code_units_count;
149         curr_p = saved_positions[index];
150         TEST_ASSERT (lit_cesu8_peek_next (curr_p) == code_units[index]);
151       }
152     }
153 
154     curr_p = (lit_utf8_byte_t *) end_p;
155     while (curr_p > cesu8_string)
156     {
157       TEST_ASSERT (code_units_count > 0);
158       calculated_length--;
159       TEST_ASSERT (code_units[calculated_length] == lit_cesu8_peek_prev (curr_p));
160       lit_utf8_decr (&curr_p);
161     }
162 
163     TEST_ASSERT (calculated_length == 0);
164 
165     while (curr_p < end_p)
166     {
167       ecma_char_t code_unit = lit_cesu8_read_next (&curr_p);
168       TEST_ASSERT (code_unit == code_units[calculated_length]);
169       calculated_length++;
170     }
171 
172     TEST_ASSERT (length == calculated_length);
173 
174     while (curr_p > cesu8_string)
175     {
176       TEST_ASSERT (code_units_count > 0);
177       calculated_length--;
178       TEST_ASSERT (code_units[calculated_length] == lit_cesu8_read_prev (&curr_p));
179     }
180 
181     TEST_ASSERT (calculated_length == 0);
182   }
183 
184   /* Overlong-encoded code point */
185   lit_utf8_byte_t invalid_cesu8_string_1[] = {0xC0, 0x82};
186   TEST_ASSERT (!lit_is_valid_cesu8_string (invalid_cesu8_string_1, sizeof (invalid_cesu8_string_1)));
187 
188   /* Overlong-encoded code point */
189   lit_utf8_byte_t invalid_cesu8_string_2[] = {0xE0, 0x80, 0x81};
190   TEST_ASSERT (!lit_is_valid_cesu8_string (invalid_cesu8_string_2, sizeof (invalid_cesu8_string_2)));
191 
192   /* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */
193   lit_utf8_byte_t invalid_cesu8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0};
194   TEST_ASSERT (lit_is_valid_cesu8_string (invalid_cesu8_string_3, sizeof (invalid_cesu8_string_3)));
195 
196   /* Isolated high surrogate 0xD901 */
197   lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81};
198   TEST_ASSERT (lit_is_valid_cesu8_string (valid_utf8_string_1, sizeof (valid_utf8_string_1)));
199 
200   lit_utf8_byte_t res_buf[3];
201   lit_utf8_size_t res_size;
202 
203   res_size = lit_code_unit_to_utf8 (0x73, res_buf);
204   TEST_ASSERT (res_size == 1);
205   TEST_ASSERT (res_buf[0] == 0x73);
206 
207   res_size = lit_code_unit_to_utf8 (0x41A, res_buf);
208   TEST_ASSERT (res_size == 2);
209   TEST_ASSERT (res_buf[0] == 0xD0);
210   TEST_ASSERT (res_buf[1] == 0x9A);
211 
212   res_size = lit_code_unit_to_utf8 (0xD7FF, res_buf);
213   TEST_ASSERT (res_size == 3);
214   TEST_ASSERT (res_buf[0] == 0xED);
215   TEST_ASSERT (res_buf[1] == 0x9F);
216   TEST_ASSERT (res_buf[2] == 0xBF);
217 
218   ecma_finalize ();
219   jmem_finalize ();
220 
221   return 0;
222 } /* main */
223