• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use icu_collections::codepointtrie::planes::get_planes_trie;
6 use icu_collections::codepointtrie::*;
7 use zerovec::ZeroVec;
8 
9 #[test]
planes_trie_deserialize_check_test()10 fn planes_trie_deserialize_check_test() {
11     // Get expected planes trie from crate::planes::get_planes_trie()
12 
13     let exp_planes_trie = get_planes_trie();
14 
15     // Compute actual planes trie from planes.toml
16 
17     let planes_enum_prop =
18         ::toml::from_str::<UnicodeEnumeratedProperty>(include_str!("data/cpt/planes.toml"))
19             .unwrap();
20 
21     let code_point_trie_struct = planes_enum_prop.code_point_trie.trie_struct;
22 
23     let trie_header = CodePointTrieHeader {
24         high_start: code_point_trie_struct.high_start,
25         shifted12_high_start: code_point_trie_struct.shifted12_high_start,
26         index3_null_offset: code_point_trie_struct.index3_null_offset,
27         data_null_offset: code_point_trie_struct.data_null_offset,
28         null_value: code_point_trie_struct.null_value,
29         trie_type: TrieType::try_from(code_point_trie_struct.trie_type_enum_val).unwrap_or_else(
30             |_| {
31                 panic!(
32                     "Could not parse trie_type serialized enum value in test data file: {}",
33                     code_point_trie_struct.name
34                 )
35             },
36         ),
37     };
38 
39     let data = ZeroVec::from_slice_or_alloc(code_point_trie_struct.data_8.as_ref().unwrap());
40     let index = ZeroVec::from_slice_or_alloc(&code_point_trie_struct.index);
41     let trie_result = CodePointTrie::try_new(trie_header, index, data);
42     let act_planes_trie = trie_result.unwrap();
43 
44     // Get check ranges (inversion map-style sequence of range+value) and
45     // apply the trie validation test fn on expected and actual tries
46 
47     let serialized_ranges: Vec<(u32, u32, u32)> = planes_enum_prop.code_point_map.data.ranges;
48     let mut check_ranges: Vec<u32> = vec![];
49     for range_tuple in serialized_ranges {
50         let range_end = range_tuple.1 + 1;
51         let value = range_tuple.2;
52         check_ranges.push(range_end);
53         check_ranges.push(value);
54     }
55 
56     check_trie(&act_planes_trie, &check_ranges);
57     check_trie(&exp_planes_trie, &check_ranges);
58 }
59 
60 #[test]
free_blocks_16()61 fn free_blocks_16() {
62     run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.16.toml"));
63 }
64 
65 #[test]
free_blocks_32()66 fn free_blocks_32() {
67     run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.32.toml"));
68 }
69 
70 #[test]
free_blocks_8()71 fn free_blocks_8() {
72     run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.8.toml"));
73 }
74 
75 #[test]
free_blocks_small16()76 fn free_blocks_small16() {
77     run_deserialize_test_from_test_data(include_str!("data/cpt/free-blocks.small16.toml"));
78 }
79 
80 #[test]
grow_data_16()81 fn grow_data_16() {
82     run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.16.toml"));
83 }
84 
85 #[test]
grow_data_32()86 fn grow_data_32() {
87     run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.32.toml"));
88 }
89 
90 #[test]
grow_data_8()91 fn grow_data_8() {
92     run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.8.toml"));
93 }
94 
95 #[test]
grow_data_small16()96 fn grow_data_small16() {
97     run_deserialize_test_from_test_data(include_str!("data/cpt/grow-data.small16.toml"));
98 }
99 
100 #[test]
set1_16()101 fn set1_16() {
102     run_deserialize_test_from_test_data(include_str!("data/cpt/set1.16.toml"));
103 }
104 
105 #[test]
set1_32()106 fn set1_32() {
107     run_deserialize_test_from_test_data(include_str!("data/cpt/set1.32.toml"));
108 }
109 
110 #[test]
set1_8()111 fn set1_8() {
112     run_deserialize_test_from_test_data(include_str!("data/cpt/set1.8.toml"));
113 }
114 
115 #[test]
set1_small16()116 fn set1_small16() {
117     run_deserialize_test_from_test_data(include_str!("data/cpt/set1.small16.toml"));
118 }
119 
120 #[test]
set2_overlap_16()121 fn set2_overlap_16() {
122     run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.16.toml"));
123 }
124 
125 #[test]
set2_overlap_32()126 fn set2_overlap_32() {
127     run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.32.toml"));
128 }
129 
130 #[test]
set2_overlap_small16()131 fn set2_overlap_small16() {
132     run_deserialize_test_from_test_data(include_str!("data/cpt/set2-overlap.small16.toml"));
133 }
134 
135 #[test]
set3_initial_9_16()136 fn set3_initial_9_16() {
137     run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.16.toml"));
138 }
139 
140 #[test]
set3_initial_9_32()141 fn set3_initial_9_32() {
142     run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.32.toml"));
143 }
144 
145 #[test]
set3_initial_9_8()146 fn set3_initial_9_8() {
147     run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.8.toml"));
148 }
149 
150 #[test]
set3_initial_9_small16()151 fn set3_initial_9_small16() {
152     run_deserialize_test_from_test_data(include_str!("data/cpt/set3-initial-9.small16.toml"));
153 }
154 
155 #[test]
set_empty_16()156 fn set_empty_16() {
157     run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.16.toml"));
158 }
159 
160 #[test]
set_empty_32()161 fn set_empty_32() {
162     run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.32.toml"));
163 }
164 
165 #[test]
set_empty_8()166 fn set_empty_8() {
167     run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.8.toml"));
168 }
169 
170 #[test]
set_empty_small16()171 fn set_empty_small16() {
172     run_deserialize_test_from_test_data(include_str!("data/cpt/set-empty.small16.toml"));
173 }
174 
175 #[test]
set_single_value_16()176 fn set_single_value_16() {
177     run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.16.toml"));
178 }
179 
180 #[test]
set_single_value_32()181 fn set_single_value_32() {
182     run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.32.toml"));
183 }
184 
185 #[test]
set_single_value_8()186 fn set_single_value_8() {
187     run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.8.toml"));
188 }
189 
190 #[test]
set_single_value_small16()191 fn set_single_value_small16() {
192     run_deserialize_test_from_test_data(include_str!("data/cpt/set-single-value.small16.toml"));
193 }
194 
195 #[test]
short_all_same_16()196 fn short_all_same_16() {
197     run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.16.toml"));
198 }
199 
200 #[test]
short_all_same_8()201 fn short_all_same_8() {
202     run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.8.toml"));
203 }
204 
205 #[test]
short_all_same_small16()206 fn short_all_same_small16() {
207     run_deserialize_test_from_test_data(include_str!("data/cpt/short-all-same.small16.toml"));
208 }
209 
210 #[test]
small0_in_fast_16()211 fn small0_in_fast_16() {
212     run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.16.toml"));
213 }
214 
215 #[test]
small0_in_fast_32()216 fn small0_in_fast_32() {
217     run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.32.toml"));
218 }
219 
220 #[test]
small0_in_fast_8()221 fn small0_in_fast_8() {
222     run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.8.toml"));
223 }
224 
225 #[test]
small0_in_fast_small16()226 fn small0_in_fast_small16() {
227     run_deserialize_test_from_test_data(include_str!("data/cpt/small0-in-fast.small16.toml"));
228 }
229 
230 /// The width of the elements in the data array of a [`CodePointTrie`].
231 /// See [`UCPTrieValueWidth`](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/ucptrie_8h.html) in ICU4C.
232 #[derive(Clone, Copy, PartialEq)]
233 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
234 pub enum ValueWidthEnum {
235     Bits16 = 0,
236     Bits32 = 1,
237     Bits8 = 2,
238 }
239 
240 /// Test .get() on CodePointTrie by iterating through each range in
241 /// check_ranges and assert that the associated
242 /// value matches the trie value for each code point in the range.
check_trie<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32])243 pub fn check_trie<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32]) {
244     assert_eq!(
245         0,
246         check_ranges.len() % 2,
247         "check_ranges must have an even number of 32-bit values in (limit,value) pairs"
248     );
249 
250     let mut i: u32 = 0;
251     let check_range_tuples = check_ranges.chunks(2);
252     // Iterate over each check range
253     for range_tuple in check_range_tuples {
254         let range_limit = range_tuple[0];
255         let range_value = range_tuple[1];
256         // Check all values in this range, one-by-one
257         while i < range_limit {
258             assert_eq!(range_value, trie.get32(i).into(), "trie_get({})", i,);
259             i += 1;
260         }
261     }
262 }
263 
264 /// Test `.get_range()` / `.iter_ranges()` on CodePointTrie by calling
265 /// `.iter_ranges()` on the trie.
266 ///
267 /// `.iter_ranges()` returns an iterator that produces values
268 /// by calls to .get_range, and this checks if it matches the values in check_ranges.
test_check_ranges_get_ranges<T: TrieValue + Into<u32>>( trie: &CodePointTrie<T>, check_ranges: &[u32], )269 pub fn test_check_ranges_get_ranges<T: TrieValue + Into<u32>>(
270     trie: &CodePointTrie<T>,
271     check_ranges: &[u32],
272 ) {
273     assert_eq!(
274         0,
275         check_ranges.len() % 2,
276         "check_ranges must have an even number of 32-bit values in (limit,value) pairs"
277     );
278 
279     let mut trie_ranges = trie.iter_ranges();
280 
281     let mut range_start: u32 = 0;
282     let check_range_tuples = check_ranges.chunks(2);
283     // Iterate over each check range
284     for range_tuple in check_range_tuples {
285         let range_limit = range_tuple[0];
286         let range_value = range_tuple[1];
287 
288         // The check ranges array seems to start with a trivial range whose
289         // limit is zero. range_start is initialized to 0, so we can skip.
290         if range_limit == 0 {
291             continue;
292         }
293 
294         let cpm_range = trie_ranges.next();
295         assert!(cpm_range.is_some(), "CodePointTrie iter_ranges() produces fewer ranges than the check_ranges field in testdata has");
296         let cpm_range = cpm_range.unwrap();
297         let cpmr_start = cpm_range.range.start();
298         let cpmr_end = cpm_range.range.end();
299         let cpmr_value: u32 = cpm_range.value.into();
300 
301         assert_eq!(range_start, *cpmr_start);
302         assert_eq!(range_limit, *cpmr_end + 1);
303         assert_eq!(range_value, cpmr_value);
304 
305         range_start = range_limit;
306     }
307 
308     assert!(trie_ranges.next().is_none(), "CodePointTrie iter_ranges() produces more ranges than the check_ranges field in testdata has");
309 }
310 
311 /// Run above tests that verify the validity of CodePointTrie methods
run_trie_tests<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32])312 pub fn run_trie_tests<T: TrieValue + Into<u32>>(trie: &CodePointTrie<T>, check_ranges: &[u32]) {
313     check_trie(trie, check_ranges);
314     test_check_ranges_get_ranges(trie, check_ranges);
315 }
316 
317 // The following structs might be useful later for de-/serialization of the
318 // main `CodePointTrie` struct in the corresponding data provider.
319 
320 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
321 pub struct UnicodeEnumeratedProperty {
322     pub code_point_map: EnumPropCodePointMap,
323     pub code_point_trie: EnumPropSerializedCPT,
324 }
325 
326 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
327 pub struct EnumPropCodePointMap {
328     pub data: EnumPropCodePointMapData,
329 }
330 
331 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
332 pub struct EnumPropCodePointMapData {
333     pub long_name: String,
334     pub name: String,
335     pub ranges: Vec<(u32, u32, u32)>,
336 }
337 
338 #[allow(clippy::upper_case_acronyms)]
339 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
340 pub struct EnumPropSerializedCPT {
341     #[cfg_attr(any(feature = "serde", test), serde(rename = "struct"))]
342     pub trie_struct: EnumPropSerializedCPTStruct,
343 }
344 
345 // These structs support the test data dumped as TOML files from ICU.
346 // Because the properties CodePointMap data will also be dumped from ICU
347 // using similar functions, some of these structs may be useful to refactor
348 // into main code at a later point.
349 
350 #[allow(clippy::upper_case_acronyms)]
351 #[cfg_attr(any(feature = "serde", test), derive(serde::Deserialize))]
352 pub struct EnumPropSerializedCPTStruct {
353     #[cfg_attr(any(feature = "serde", test), serde(skip))]
354     pub long_name: String,
355     pub name: String,
356     pub index: Vec<u16>,
357     pub data_8: Option<Vec<u8>>,
358     pub data_16: Option<Vec<u16>>,
359     pub data_32: Option<Vec<u32>>,
360     #[cfg_attr(any(feature = "serde", test), serde(skip))]
361     pub index_length: u32,
362     #[cfg_attr(any(feature = "serde", test), serde(skip))]
363     pub data_length: u32,
364     #[cfg_attr(any(feature = "serde", test), serde(rename = "highStart"))]
365     pub high_start: u32,
366     #[cfg_attr(any(feature = "serde", test), serde(rename = "shifted12HighStart"))]
367     pub shifted12_high_start: u16,
368     #[cfg_attr(any(feature = "serde", test), serde(rename = "type"))]
369     pub trie_type_enum_val: u8,
370     #[cfg_attr(any(feature = "serde", test), serde(rename = "valueWidth"))]
371     pub value_width_enum_val: u8,
372     #[cfg_attr(any(feature = "serde", test), serde(rename = "index3NullOffset"))]
373     pub index3_null_offset: u16,
374     #[cfg_attr(any(feature = "serde", test), serde(rename = "dataNullOffset"))]
375     pub data_null_offset: u32,
376     #[cfg_attr(any(feature = "serde", test), serde(rename = "nullValue"))]
377     pub null_value: u32,
378 }
379 
380 // Given a .toml file dumped from ICU4C test data for UCPTrie, run the test
381 // data file deserialization into the test file struct, convert and construct
382 // the `CodePointTrie`, and test the constructed struct against the test file's
383 // "check ranges" (inversion map ranges) using `check_trie` to verify the
384 // validity of the `CodePointTrie`'s behavior for all code points.
385 #[allow(dead_code)]
run_deserialize_test_from_test_data(test_file: &str)386 pub fn run_deserialize_test_from_test_data(test_file: &str) {
387     // The following structs are specific to the TOML format files for dumped ICU
388     // test data.
389 
390     #[derive(serde::Deserialize)]
391     pub struct TestFile {
392         code_point_trie: TestCodePointTrie,
393     }
394 
395     #[derive(serde::Deserialize)]
396     pub struct TestCodePointTrie {
397         // The trie_struct field for test data files is dumped from the same source
398         // (ICU4C) using the same function (usrc_writeUCPTrie) as property data
399         // for the provider, so we can reuse the same struct here.
400         #[serde(rename(deserialize = "struct"))]
401         trie_struct: EnumPropSerializedCPTStruct,
402         #[serde(rename(deserialize = "testdata"))]
403         test_data: TestData,
404     }
405 
406     #[derive(serde::Deserialize)]
407     pub struct TestData {
408         #[serde(rename(deserialize = "checkRanges"))]
409         check_ranges: Vec<u32>,
410     }
411 
412     let test_file = ::toml::from_str::<TestFile>(test_file).unwrap();
413 
414     let test_struct = test_file.code_point_trie.trie_struct;
415 
416     println!(
417         "Running CodePointTrie reader logic test on test data file: {}",
418         test_struct.name
419     );
420 
421     let trie_type_enum = match TrieType::try_from(test_struct.trie_type_enum_val) {
422         Ok(enum_val) => enum_val,
423         _ => {
424             panic!(
425                 "Could not parse trie_type serialized enum value in test data file: {}",
426                 test_struct.name
427             );
428         }
429     };
430 
431     let trie_header = CodePointTrieHeader {
432         high_start: test_struct.high_start,
433         shifted12_high_start: test_struct.shifted12_high_start,
434         index3_null_offset: test_struct.index3_null_offset,
435         data_null_offset: test_struct.data_null_offset,
436         null_value: test_struct.null_value,
437         trie_type: trie_type_enum,
438     };
439 
440     let index = ZeroVec::from_slice_or_alloc(&test_struct.index);
441 
442     match (test_struct.data_8, test_struct.data_16, test_struct.data_32) {
443         (Some(data_8), _, _) => {
444             let data = ZeroVec::from_slice_or_alloc(&data_8);
445             let trie_result = CodePointTrie::try_new(trie_header, index, data);
446             assert!(trie_result.is_ok(), "Could not construct trie");
447             assert_eq!(
448                 test_struct.value_width_enum_val,
449                 ValueWidthEnum::Bits8 as u8
450             );
451             run_trie_tests(
452                 &trie_result.unwrap(),
453                 &test_file.code_point_trie.test_data.check_ranges,
454             );
455         }
456 
457         (_, Some(data_16), _) => {
458             let data = ZeroVec::from_slice_or_alloc(&data_16);
459             let trie_result = CodePointTrie::try_new(trie_header, index, data);
460             assert!(trie_result.is_ok(), "Could not construct trie");
461             assert_eq!(
462                 test_struct.value_width_enum_val,
463                 ValueWidthEnum::Bits16 as u8
464             );
465             run_trie_tests(
466                 &trie_result.unwrap(),
467                 &test_file.code_point_trie.test_data.check_ranges,
468             );
469         }
470 
471         (_, _, Some(data_32)) => {
472             let data = ZeroVec::from_slice_or_alloc(&data_32);
473             let trie_result = CodePointTrie::try_new(trie_header, index, data);
474             assert!(trie_result.is_ok(), "Could not construct trie");
475             assert_eq!(
476                 test_struct.value_width_enum_val,
477                 ValueWidthEnum::Bits32 as u8
478             );
479             run_trie_tests(
480                 &trie_result.unwrap(),
481                 &test_file.code_point_trie.test_data.check_ranges,
482             );
483         }
484 
485         (_, _, _) => {
486             panic!("Could not match test trie data to a known value width or trie type");
487         }
488     };
489 }
490