• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of ICU4X. For terms of use, please see the file
2 // called LICENSE at the top level of the ICU4X source tree
3 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4 
5 use icu_locale_core::extensions::private::Private;
6 use icu_locale_core::Locale;
7 use litemap::LiteMap;
8 use std::collections::BTreeSet;
9 use writeable::Writeable;
10 use zerotrie::ZeroTriePerfectHash;
11 use zerotrie::ZeroTrieSimpleAscii;
12 use zerovec::VarZeroVec;
13 
14 mod testdata {
15     include!("data/data.rs");
16 }
17 
18 use testdata::locales_with_aux::{NUM_UNIQUE_BLOBS, STRINGS};
19 use testdata::strings_to_litemap;
20 
21 #[test]
test_combined()22 fn test_combined() {
23     let litemap = strings_to_litemap(STRINGS);
24 
25     let vzv: VarZeroVec<str> = STRINGS.into();
26 
27     // Lookup table size:
28     assert_eq!(vzv.as_bytes().len(), 10219);
29 
30     // Size including pointer array:
31     assert_eq!(
32         vzv.as_bytes().len() + STRINGS.len() * core::mem::size_of::<usize>(),
33         18635
34     );
35 
36     let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
37 
38     // Lookup table size:
39     assert_eq!(trie.byte_len(), 5104);
40 
41     // Size including pointer array:
42     assert_eq!(
43         trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
44         8392
45     );
46 
47     let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
48 
49     // Lookup table size:
50     assert_eq!(trie.byte_len(), 5157);
51 
52     // Size including pointer array:
53     assert_eq!(
54         trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
55         8445
56     );
57 
58     let total_str_len = litemap.keys().map(|k| k.len()).sum::<usize>();
59     assert_eq!(total_str_len, 8115);
60 
61     // Lookup table size:
62     assert_eq!(
63         total_str_len + STRINGS.len() * core::mem::size_of::<usize>(),
64         16531
65     );
66 
67     // Size including pointer array: (2x for the lookup array and value array)
68     assert_eq!(
69         total_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
70         24947
71     );
72 
73     // Size including u16 pointer array:
74     assert_eq!(
75         total_str_len
76             + STRINGS.len() * core::mem::size_of::<usize>()
77             + STRINGS.len() * core::mem::size_of::<u16>()
78             + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
79         21923
80     );
81 }
82 
83 #[test]
test_aux_split()84 fn test_aux_split() {
85     let locales: Vec<Locale> = STRINGS.iter().map(|s| s.parse().unwrap()).collect();
86 
87     let aux_keys: BTreeSet<&Private> = locales.iter().map(|l| &l.extensions.private).collect();
88     assert_eq!(aux_keys.len(), 6);
89 
90     let mut cumulative_index = 0;
91     let mut total_simpleascii_len = 0;
92     let mut total_perfecthash_len = 0;
93     let mut total_vzv_len = 0;
94     let mut unique_locales = BTreeSet::new();
95     for private in aux_keys.iter() {
96         let current_locales: Vec<Locale> = locales
97             .iter()
98             .filter(|l| l.extensions.private == **private)
99             .map(|l| {
100                 let mut l = l.clone();
101                 l.extensions.private = Private::default();
102                 l
103             })
104             .collect();
105         let litemap: LiteMap<Vec<u8>, usize> = current_locales
106             .iter()
107             .map(|l| {
108                 (l.write_to_string().into_owned().into_bytes(), {
109                     cumulative_index += 1;
110                     cumulative_index - 1
111                 })
112             })
113             .collect();
114 
115         let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
116         total_simpleascii_len += trie.byte_len();
117 
118         let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
119         total_perfecthash_len += trie.byte_len();
120 
121         for k in litemap.keys() {
122             unique_locales.insert(k.clone());
123         }
124 
125         let strs: Vec<String> = current_locales
126             .iter()
127             .map(|l| l.write_to_string().into_owned())
128             .collect();
129         let vzv: VarZeroVec<str> = strs.as_slice().into();
130         total_vzv_len += vzv.as_bytes().len();
131     }
132     assert_eq!(cumulative_index, locales.len());
133 
134     assert_eq!(total_simpleascii_len, 5098);
135     assert_eq!(total_perfecthash_len, 5302);
136     assert_eq!(total_vzv_len, 5486);
137 
138     let total_unique_locale_str_len = unique_locales.iter().map(|v| v.len()).sum::<usize>();
139     assert_eq!(total_unique_locale_str_len, 945);
140 
141     // Size including pointer array:
142     assert_eq!(
143         total_simpleascii_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
144         8386
145     );
146     assert_eq!(
147         total_perfecthash_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
148         8590
149     );
150     assert_eq!(
151         total_vzv_len + STRINGS.len() * core::mem::size_of::<usize>(),
152         13902
153     );
154     // 2x for the lookup arrays and value arrays
155     assert_eq!(
156         total_unique_locale_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
157         17777
158     );
159 
160     // Size including u16 pointer array:
161     assert_eq!(
162         total_unique_locale_str_len
163             + STRINGS.len() * core::mem::size_of::<usize>()
164             + STRINGS.len() * core::mem::size_of::<u16>()
165             + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
166         14753
167     );
168 }
169