1 //! Partitions a list of files into disjoint subsets. 2 //! 3 //! Files which do not belong to any explicitly configured `FileSet` belong to 4 //! the default `FileSet`. 5 use std::fmt; 6 7 use fst::{IntoStreamer, Streamer}; 8 use nohash_hasher::IntMap; 9 use rustc_hash::FxHashMap; 10 11 use crate::{AnchoredPath, FileId, Vfs, VfsPath}; 12 13 /// A set of [`VfsPath`]s identified by [`FileId`]s. 14 #[derive(Default, Clone, Eq, PartialEq)] 15 pub struct FileSet { 16 files: FxHashMap<VfsPath, FileId>, 17 paths: IntMap<FileId, VfsPath>, 18 } 19 20 impl FileSet { 21 /// Returns the number of stored paths. len(&self) -> usize22 pub fn len(&self) -> usize { 23 self.files.len() 24 } 25 26 /// Get the id of the file corresponding to `path`. 27 /// 28 /// If either `path`'s [`anchor`](AnchoredPath::anchor) or the resolved path is not in 29 /// the set, returns [`None`]. resolve_path(&self, path: AnchoredPath<'_>) -> Option<FileId>30 pub fn resolve_path(&self, path: AnchoredPath<'_>) -> Option<FileId> { 31 let mut base = self.paths[&path.anchor].clone(); 32 base.pop(); 33 let path = base.join(path.path)?; 34 self.files.get(&path).copied() 35 } 36 37 /// Get the id corresponding to `path` if it exists in the set. file_for_path(&self, path: &VfsPath) -> Option<&FileId>38 pub fn file_for_path(&self, path: &VfsPath) -> Option<&FileId> { 39 self.files.get(path) 40 } 41 42 /// Get the path corresponding to `file` if it exists in the set. path_for_file(&self, file: &FileId) -> Option<&VfsPath>43 pub fn path_for_file(&self, file: &FileId) -> Option<&VfsPath> { 44 self.paths.get(file) 45 } 46 47 /// Insert the `file_id, path` pair into the set. 48 /// 49 /// # Note 50 /// Multiple [`FileId`] can be mapped to the same [`VfsPath`], and vice-versa. insert(&mut self, file_id: FileId, path: VfsPath)51 pub fn insert(&mut self, file_id: FileId, path: VfsPath) { 52 self.files.insert(path.clone(), file_id); 53 self.paths.insert(file_id, path); 54 } 55 56 /// Iterate over this set's ids. iter(&self) -> impl Iterator<Item = FileId> + '_57 pub fn iter(&self) -> impl Iterator<Item = FileId> + '_ { 58 self.paths.keys().copied() 59 } 60 } 61 62 impl fmt::Debug for FileSet { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result63 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 64 f.debug_struct("FileSet").field("n_files", &self.files.len()).finish() 65 } 66 } 67 68 /// This contains path prefixes to partition a [`Vfs`] into [`FileSet`]s. 69 /// 70 /// # Example 71 /// ```rust 72 /// # use vfs::{file_set::FileSetConfigBuilder, VfsPath, Vfs}; 73 /// let mut builder = FileSetConfigBuilder::default(); 74 /// builder.add_file_set(vec![VfsPath::new_virtual_path("/src".to_string())]); 75 /// let config = builder.build(); 76 /// let mut file_system = Vfs::default(); 77 /// file_system.set_file_contents(VfsPath::new_virtual_path("/src/main.rs".to_string()), Some(vec![])); 78 /// file_system.set_file_contents(VfsPath::new_virtual_path("/src/lib.rs".to_string()), Some(vec![])); 79 /// file_system.set_file_contents(VfsPath::new_virtual_path("/build.rs".to_string()), Some(vec![])); 80 /// // contains the sets : 81 /// // { "/src/main.rs", "/src/lib.rs" } 82 /// // { "build.rs" } 83 /// let sets = config.partition(&file_system); 84 /// ``` 85 #[derive(Debug)] 86 pub struct FileSetConfig { 87 /// Number of sets that `self` can partition a [`Vfs`] into. 88 /// 89 /// This should be the number of sets in `self.map` + 1 for files that don't fit in any 90 /// defined set. 91 n_file_sets: usize, 92 /// Map from encoded paths to the set they belong to. 93 map: fst::Map<Vec<u8>>, 94 } 95 96 impl Default for FileSetConfig { default() -> Self97 fn default() -> Self { 98 FileSetConfig::builder().build() 99 } 100 } 101 102 impl FileSetConfig { 103 /// Returns a builder for `FileSetConfig`. builder() -> FileSetConfigBuilder104 pub fn builder() -> FileSetConfigBuilder { 105 FileSetConfigBuilder::default() 106 } 107 108 /// Partition `vfs` into `FileSet`s. 109 /// 110 /// Creates a new [`FileSet`] for every set of prefixes in `self`. partition(&self, vfs: &Vfs) -> Vec<FileSet>111 pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> { 112 let mut scratch_space = Vec::new(); 113 let mut res = vec![FileSet::default(); self.len()]; 114 for (file_id, path) in vfs.iter() { 115 let root = self.classify(path, &mut scratch_space); 116 res[root].insert(file_id, path.clone()); 117 } 118 res 119 } 120 121 /// Number of sets that `self` can partition a [`Vfs`] into. len(&self) -> usize122 fn len(&self) -> usize { 123 self.n_file_sets 124 } 125 126 /// Returns the set index for the given `path`. 127 /// 128 /// `scratch_space` is used as a buffer and will be entirely replaced. classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize129 fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize { 130 scratch_space.clear(); 131 path.encode(scratch_space); 132 let automaton = PrefixOf::new(scratch_space.as_slice()); 133 let mut longest_prefix = self.len() - 1; 134 let mut stream = self.map.search(automaton).into_stream(); 135 while let Some((_, v)) = stream.next() { 136 longest_prefix = v as usize; 137 } 138 longest_prefix 139 } 140 } 141 142 /// Builder for [`FileSetConfig`]. 143 #[derive(Default)] 144 pub struct FileSetConfigBuilder { 145 roots: Vec<Vec<VfsPath>>, 146 } 147 148 impl FileSetConfigBuilder { 149 /// Returns the number of sets currently held. len(&self) -> usize150 pub fn len(&self) -> usize { 151 self.roots.len() 152 } 153 154 /// Add a new set of paths prefixes. add_file_set(&mut self, roots: Vec<VfsPath>)155 pub fn add_file_set(&mut self, roots: Vec<VfsPath>) { 156 self.roots.push(roots); 157 } 158 159 /// Build the `FileSetConfig`. build(self) -> FileSetConfig160 pub fn build(self) -> FileSetConfig { 161 let n_file_sets = self.roots.len() + 1; 162 let map = { 163 let mut entries = Vec::new(); 164 for (i, paths) in self.roots.into_iter().enumerate() { 165 for p in paths { 166 let mut buf = Vec::new(); 167 p.encode(&mut buf); 168 entries.push((buf, i as u64)); 169 } 170 } 171 entries.sort(); 172 entries.dedup_by(|(a, _), (b, _)| a == b); 173 fst::Map::from_iter(entries).unwrap() 174 }; 175 FileSetConfig { n_file_sets, map } 176 } 177 } 178 179 /// Implements [`fst::Automaton`] 180 /// 181 /// It will match if `prefix_of` is a prefix of the given data. 182 struct PrefixOf<'a> { 183 prefix_of: &'a [u8], 184 } 185 186 impl<'a> PrefixOf<'a> { 187 /// Creates a new `PrefixOf` from the given slice. new(prefix_of: &'a [u8]) -> Self188 fn new(prefix_of: &'a [u8]) -> Self { 189 Self { prefix_of } 190 } 191 } 192 193 impl fst::Automaton for PrefixOf<'_> { 194 type State = usize; start(&self) -> usize195 fn start(&self) -> usize { 196 0 197 } is_match(&self, &state: &usize) -> bool198 fn is_match(&self, &state: &usize) -> bool { 199 state != !0 200 } can_match(&self, &state: &usize) -> bool201 fn can_match(&self, &state: &usize) -> bool { 202 state != !0 203 } accept(&self, &state: &usize, byte: u8) -> usize204 fn accept(&self, &state: &usize, byte: u8) -> usize { 205 if self.prefix_of.get(state) == Some(&byte) { 206 state + 1 207 } else { 208 !0 209 } 210 } 211 } 212 213 #[cfg(test)] 214 mod tests; 215