1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5 use icu_collections::codepointtrie::planes::get_planes_trie;
6 use icu_collections::codepointtrie::*;
7 use zerovec::ZeroVec;
8
9 #[test]
planes_trie_deserialize_check_test()10 fn planes_trie_deserialize_check_test() {
11 // Get expected planes trie from crate::planes::get_planes_trie()
12
13 let exp_planes_trie = get_planes_trie();
14
15 // Compute actual planes trie from planes.toml
16
17 let planes_enum_prop =
18 ::toml::from_str::<UnicodeEnumeratedProperty>(include_str!("data/cpt/planes.toml"))
19 .unwrap();
20
21 let code_point_trie_struct = planes_enum_prop.code_point_trie.trie_struct;
22
23 let trie_header = CodePointTrieHeader {
24 high_start: code_point_trie_struct.high_start,
25 shifted12_high_start: code_point_trie_struct.shifted12_high_start,
26 index3_null_offset: code_point_trie_struct.index3_null_offset,
27 data_null_offset: code_point_trie_struct.data_null_offset,
28 null_value: code_point_trie_struct.null_value,
29 trie_type: TrieType::try_from(code_point_trie_struct.trie_type_enum_val).unwrap_or_else(
30 |_| {
31 panic!(
32 "Could not parse trie_type serialized enum value in test data file: {}",
33 code_point_trie_struct.name
34 )
35 },
36 ),
37 };
38
39 let data = ZeroVec::from_slice_or_alloc(code_point_trie_struct.data_8.as_ref().unwrap());
40 let index = ZeroVec::from_slice_or_alloc(&code_point_trie_struct.index);
41 let trie_result = CodePointTrie::try_new(trie_header, index, data);
42 let act_planes_trie = trie_result.unwrap();
43
44 // Get check ranges (inversion map-style sequence of range+value) and
45 // apply the trie validation test fn on expected and actual tries
46
47 let serialized_ranges: Vec<(u32, u32, u32)> = planes_enum_prop.code_point_map.data.ranges;
48 let mut check_ranges: Vec<u32> = vec![];
49 for range_tuple in serialized_ranges {
50 let range_end = range_tuple.1 + 1;
51 let value = range_tuple.2;
52 check_ranges.push(range_end);
53 check_ranges.push(value);
54 }
55
56 check_trie(&act_planes_trie, &check_ranges);
57 check_trie(&exp_planes_trie, &check_ranges);
58 }
59
60 #[test]
free_blocks_16()61 fn free_blocks_16() {
62 run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.16.toml"));
63 }
64
65 #[test]
free_blocks_32()66 fn free_blocks_32() {
67 run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.32.toml"));
68 }
69
70 #[test]
free_blocks_8()71 fn free_blocks_8() {
72 run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.8.toml"));
73 }
74
75 #[test]
free_blocks_small16()76 fn free_blocks_small16() {
77 run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.small16.toml"));
78 }
79
80 #[test]
grow_data_16()81 fn grow_data_16() {
82 run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.16.toml"));
83 }
84
85 #[test]
grow_data_32()86 fn grow_data_32() {
87 run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.32.toml"));
88 }
89
90 #[test]
grow_data_8()91 fn grow_data_8() {
92 run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.8.toml"));
93 }
94
95 #[test]
grow_data_small16()96 fn grow_data_small16() {
97 run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.small16.toml"));
98 }
99
100 #[test]
set1_16()101 fn set1_16() {
102 run_deserialize_test_from_test_data(include_str!("data/cpt/set1.16.toml"));
103 }
104
105 #[test]
set1_32()106 fn set1_32() {
107 run_deserialize_test_from_test_data(include_str!("data/cpt/set1.32.toml"));
108 }
109
110 #[test]
set1_8()111 fn set1_8() {
112 run_deserialize_test_from_test_data(include_str!("data/cpt/set1.8.toml"));
113 }
114
115 #[test]
set1_small16()116 fn set1_small16() {
117 run_deserialize_test_from_test_data(include_str!("data/cpt/set1.small16.toml"));
118 }
119
120 #[test]
set2_overlap_16()121 fn set2_overlap_16() {
122 run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.16.toml"));
123 }
124
125 #[test]
set2_overlap_32()126 fn set2_overlap_32() {
127 run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.32.toml"));
128 }
129
130 #[test]
set2_overlap_small16()131 fn set2_overlap_small16() {
132 run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.small16.toml"));
133 }
134
135 #[test]
set3_initial_9_16()136 fn set3_initial_9_16() {
137 run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.16.toml"));
138 }
139
140 #[test]
set3_initial_9_32()141 fn set3_initial_9_32() {
142 run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.32.toml"));
143 }
144
145 #[test]
set3_initial_9_8()146 fn set3_initial_9_8() {
147 run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.8.toml"));
148 }
149
150 #[test]
set3_initial_9_small16()151 fn set3_initial_9_small16() {
152 run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.small16.toml"));
153 }
154
155 #[test]
set_empty_16()156 fn set_empty_16() {
157 run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.16.toml"));
158 }
159
160 #[test]
set_empty_32()161 fn set_empty_32() {
162 run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.32.toml"));
163 }
164
165 #[test]
set_empty_8()166 fn set_empty_8() {
167 run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.8.toml"));
168 }
169
170 #[test]
set_empty_small16()171 fn set_empty_small16() {
172 run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.small16.toml"));
173 }
174
175 #[test]
set_single_value_16()176 fn set_single_value_16() {
177 run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.16.toml"));
178 }
179
180 #[test]
set_single_value_32()181 fn set_single_value_32() {
182 run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.32.toml"));
183 }
184
185 #[test]
set_single_value_8()186 fn set_single_value_8() {
187 run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.8.toml"));
188 }
189
190 #[test]
set_single_value_small16()191 fn set_single_value_small16() {
192 run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.small16.toml"));
193 }
194
195 #[test]
short_all_same_16()196 fn short_all_same_16() {
197 run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.16.toml"));
198 }
199
200 #[test]
short_all_same_8()201 fn short_all_same_8() {
202 run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.8.toml"));
203 }
204
205 #[test]
short_all_same_small16()206 fn short_all_same_small16() {
207 run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.small16.toml"));
208 }
209
210 #[test]
small0_in_fast_16()211 fn small0_in_fast_16() {
212 run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.16.toml"));
213 }
214
215 #[test]
small0_in_fast_32()216 fn small0_in_fast_32() {
217 run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.32.toml"));
218 }
219
220 #[test]
small0_in_fast_8()221 fn small0_in_fast_8() {
222 run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.8.toml"));
223 }
224
225 #[test]
small0_in_fast_small16()226 fn small0_in_fast_small16() {
227 run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.small16.toml"));
228 }
229
230 /// The width of the elements in the data array of a [`CodePointTrie`].
231 /// See [`UCPTrieValueWidth`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
232 #[derive(Clone, Copy, PartialEq)]
233 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
234 pub enum ValueWidthEnum {
235 Bits16 = 0,
236 Bits32 = 1,
237 Bits8 = 2,
238 }
239
240 /// Test .get() on CodePointTrie by iterating through each range in
241 /// check_ranges and assert that the associated
242 /// value matches the trie value for each code point in the range.
check_trie<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32])243 pub fn check_trie<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32]) {
244 assert_eq!(
245 0,
246 check_ranges.len() % 2,
247 "check_ranges must have an even number of 32-bit values in (limit,value) pairs"
248 );
249
250 let mut i: u32 = 0;
251 let check_range_tuples = check_ranges.chunks(2);
252 // Iterate over each check range
253 for range_tuple in check_range_tuples {
254 let range_limit = range_tuple[0];
255 let range_value = range_tuple[1];
256 // Check all values in this range, one-by-one
257 while i < range_limit {
258 assert_eq!(range_value, trie.get32(i).into(), "trie_get({})", i,);
259 i += 1;
260 }
261 }
262 }
263
264 /// Test `.get_range()` / `.iter_ranges()` on CodePointTrie by calling
265 /// `.iter_ranges()` on the trie.
266 ///
267 /// `.iter_ranges()` returns an iterator that produces values
268 /// by calls to .get_range, and this checks if it matches the values in check_ranges.
test_check_ranges_get_ranges<T: TrieValue + Into<u32>>( trie: &CodePointTrie<T>, check_ranges: &[u32], )269 pub fn test_check_ranges_get_ranges<T: TrieValue + Into<u32>>(
270 trie: &CodePointTrie<T>,
271 check_ranges: &[u32],
272 ) {
273 assert_eq!(
274 0,
275 check_ranges.len() % 2,
276 "check_ranges must have an even number of 32-bit values in (limit,value) pairs"
277 );
278
279 let mut trie_ranges = trie.iter_ranges();
280
281 let mut range_start: u32 = 0;
282 let check_range_tuples = check_ranges.chunks(2);
283 // Iterate over each check range
284 for range_tuple in check_range_tuples {
285 let range_limit = range_tuple[0];
286 let range_value = range_tuple[1];
287
288 // The check ranges array seems to start with a trivial range whose
289 // limit is zero. range_start is initialized to 0, so we can skip.
290 if range_limit == 0 {
291 continue;
292 }
293
294 let cpm_range = trie_ranges.next();
295 assert!(cpm_range.is_some(), "CodePointTrie iter_ranges() produces fewer ranges than the check_ranges field in testdata has");
296 let cpm_range = cpm_range.unwrap();
297 let cpmr_start = cpm_range.range.start();
298 let cpmr_end = cpm_range.range.end();
299 let cpmr_value: u32 = cpm_range.value.into();
300
301 assert_eq!(range_start, *cpmr_start);
302 assert_eq!(range_limit, *cpmr_end + 1);
303 assert_eq!(range_value, cpmr_value);
304
305 range_start = range_limit;
306 }
307
308 assert!(trie_ranges.next().is_none(), "CodePointTrie iter_ranges() produces more ranges than the check_ranges field in testdata has");
309 }
310
311 /// Run above tests that verify the validity of CodePointTrie methods
run_trie_tests<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32])312 pub fn run_trie_tests<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32]) {
313 check_trie(trie, check_ranges);
314 test_check_ranges_get_ranges(trie, check_ranges);
315 }
316
317 // The following structs might be useful later for de-/serialization of the
318 // main `CodePointTrie` struct in the corresponding data provider.
319
320 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
321 pub struct UnicodeEnumeratedProperty {
322 pub code_point_map: EnumPropCodePointMap,
323 pub code_point_trie: EnumPropSerializedCPT,
324 }
325
326 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
327 pub struct EnumPropCodePointMap {
328 pub data: EnumPropCodePointMapData,
329 }
330
331 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
332 pub struct EnumPropCodePointMapData {
333 pub long_name: String,
334 pub name: String,
335 pub ranges: Vec<(u32, u32, u32)>,
336 }
337
338 #[allow(clippy::upper_case_acronyms)]
339 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
340 pub struct EnumPropSerializedCPT {
341 #[cfg_attr(any(feature = "serde", test), serde(rename = "struct"))]
342 pub trie_struct: EnumPropSerializedCPTStruct,
343 }
344
345 // These structs support the test data dumped as TOML files from ICU.
346 // Because the properties CodePointMap data will also be dumped from ICU
347 // using similar functions, some of these structs may be useful to refactor
348 // into main code at a later point.
349
350 #[allow(clippy::upper_case_acronyms)]
351 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
352 pub struct EnumPropSerializedCPTStruct {
353 #[cfg_attr(any(feature = "serde", test), serde(skip))]
354 pub long_name: String,
355 pub name: String,
356 pub index: Vec<u16>,
357 pub data_8: Option<Vec<u8>>,
358 pub data_16: Option<Vec<u16>>,
359 pub data_32: Option<Vec<u32>>,
360 #[cfg_attr(any(feature = "serde", test), serde(skip))]
361 pub index_length: u32,
362 #[cfg_attr(any(feature = "serde", test), serde(skip))]
363 pub data_length: u32,
364 #[cfg_attr(any(feature = "serde", test), serde(rename = "highStart"))]
365 pub high_start: u32,
366 #[cfg_attr(any(feature = "serde", test), serde(rename = "shifted12HighStart"))]
367 pub shifted12_high_start: u16,
368 #[cfg_attr(any(feature = "serde", test), serde(rename = "type"))]
369 pub trie_type_enum_val: u8,
370 #[cfg_attr(any(feature = "serde", test), serde(rename = "valueWidth"))]
371 pub value_width_enum_val: u8,
372 #[cfg_attr(any(feature = "serde", test), serde(rename = "index3NullOffset"))]
373 pub index3_null_offset: u16,
374 #[cfg_attr(any(feature = "serde", test), serde(rename = "dataNullOffset"))]
375 pub data_null_offset: u32,
376 #[cfg_attr(any(feature = "serde", test), serde(rename = "nullValue"))]
377 pub null_value: u32,
378 }
379
380 // Given a .toml file dumped from ICU4C test data for UCPTrie, run the test
381 // data file deserialization into the test file struct, convert and construct
382 // the `CodePointTrie`, and test the constructed struct against the test file's
383 // "check ranges" (inversion map ranges) using `check_trie` to verify the
384 // validity of the `CodePointTrie`'s behavior for all code points.
385 #[allow(dead_code)]
run_deserialize_test_from_test_data(test_file: &str)386 pub fn run_deserialize_test_from_test_data(test_file: &str) {
387 // The following structs are specific to the TOML format files for dumped ICU
388 // test data.
389
390 #[derive(serde::Deserialize)]
391 pub struct TestFile {
392 code_point_trie: TestCodePointTrie,
393 }
394
395 #[derive(serde::Deserialize)]
396 pub struct TestCodePointTrie {
397 // The trie_struct field for test data files is dumped from the same source
398 // (ICU4C) using the same function (usrc_writeUCPTrie) as property data
399 // for the provider, so we can reuse the same struct here.
400 #[serde(rename(deserialize = "struct"))]
401 trie_struct: EnumPropSerializedCPTStruct,
402 #[serde(rename(deserialize = "testdata"))]
403 test_data: TestData,
404 }
405
406 #[derive(serde::Deserialize)]
407 pub struct TestData {
408 #[serde(rename(deserialize = "checkRanges"))]
409 check_ranges: Vec<u32>,
410 }
411
412 let test_file = ::toml::from_str::<TestFile>(test_file).unwrap();
413
414 let test_struct = test_file.code_point_trie.trie_struct;
415
416 println!(
417 "Running CodePointTrie reader logic test on test data file: {}",
418 test_struct.name
419 );
420
421 let trie_type_enum = match TrieType::try_from(test_struct.trie_type_enum_val) {
422 Ok(enum_val) => enum_val,
423 _ => {
424 panic!(
425 "Could not parse trie_type serialized enum value in test data file: {}",
426 test_struct.name
427 );
428 }
429 };
430
431 let trie_header = CodePointTrieHeader {
432 high_start: test_struct.high_start,
433 shifted12_high_start: test_struct.shifted12_high_start,
434 index3_null_offset: test_struct.index3_null_offset,
435 data_null_offset: test_struct.data_null_offset,
436 null_value: test_struct.null_value,
437 trie_type: trie_type_enum,
438 };
439
440 let index = ZeroVec::from_slice_or_alloc(&test_struct.index);
441
442 match (test_struct.data_8, test_struct.data_16, test_struct.data_32) {
443 (Some(data_8), _, _) => {
444 let data = ZeroVec::from_slice_or_alloc(&data_8);
445 let trie_result = CodePointTrie::try_new(trie_header, index, data);
446 assert!(trie_result.is_ok(), "Could not construct trie");
447 assert_eq!(
448 test_struct.value_width_enum_val,
449 ValueWidthEnum::Bits8 as u8
450 );
451 run_trie_tests(
452 &trie_result.unwrap(),
453 &test_file.code_point_trie.test_data.check_ranges,
454 );
455 }
456
457 (_, Some(data_16), _) => {
458 let data = ZeroVec::from_slice_or_alloc(&data_16);
459 let trie_result = CodePointTrie::try_new(trie_header, index, data);
460 assert!(trie_result.is_ok(), "Could not construct trie");
461 assert_eq!(
462 test_struct.value_width_enum_val,
463 ValueWidthEnum::Bits16 as u8
464 );
465 run_trie_tests(
466 &trie_result.unwrap(),
467 &test_file.code_point_trie.test_data.check_ranges,
468 );
469 }
470
471 (_, _, Some(data_32)) => {
472 let data = ZeroVec::from_slice_or_alloc(&data_32);
473 let trie_result = CodePointTrie::try_new(trie_header, index, data);
474 assert!(trie_result.is_ok(), "Could not construct trie");
475 assert_eq!(
476 test_struct.value_width_enum_val,
477 ValueWidthEnum::Bits32 as u8
478 );
479 run_trie_tests(
480 &trie_result.unwrap(),
481 &test_file.code_point_trie.test_data.check_ranges,
482 );
483 }
484
485 (_, _, _) => {
486 panic!("Could not match test trie data to a known value width or trie type");
487 }
488 };
489 }
490