• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Partitions a list of files into disjoint subsets.
2 //!
3 //! Files which do not belong to any explicitly configured `FileSet` belong to
4 //! the default `FileSet`.
5 use std::fmt;
6 
7 use fst::{IntoStreamer, Streamer};
8 use nohash_hasher::IntMap;
9 use rustc_hash::FxHashMap;
10 
11 use crate::{AnchoredPath, FileId, Vfs, VfsPath};
12 
13 /// A set of [`VfsPath`]s identified by [`FileId`]s.
14 #[derive(Default, Clone, Eq, PartialEq)]
15 pub struct FileSet {
16     files: FxHashMap<VfsPath, FileId>,
17     paths: IntMap<FileId, VfsPath>,
18 }
19 
20 impl FileSet {
21     /// Returns the number of stored paths.
len(&self) -> usize22     pub fn len(&self) -> usize {
23         self.files.len()
24     }
25 
26     /// Get the id of the file corresponding to `path`.
27     ///
28     /// If either `path`'s [`anchor`](AnchoredPath::anchor) or the resolved path is not in
29     /// the set, returns [`None`].
resolve_path(&self, path: AnchoredPath<'_>) -> Option<FileId>30     pub fn resolve_path(&self, path: AnchoredPath<'_>) -> Option<FileId> {
31         let mut base = self.paths[&path.anchor].clone();
32         base.pop();
33         let path = base.join(path.path)?;
34         self.files.get(&path).copied()
35     }
36 
37     /// Get the id corresponding to `path` if it exists in the set.
file_for_path(&self, path: &VfsPath) -> Option<&FileId>38     pub fn file_for_path(&self, path: &VfsPath) -> Option<&FileId> {
39         self.files.get(path)
40     }
41 
42     /// Get the path corresponding to `file` if it exists in the set.
path_for_file(&self, file: &FileId) -> Option<&VfsPath>43     pub fn path_for_file(&self, file: &FileId) -> Option<&VfsPath> {
44         self.paths.get(file)
45     }
46 
47     /// Insert the `file_id, path` pair into the set.
48     ///
49     /// # Note
50     /// Multiple [`FileId`] can be mapped to the same [`VfsPath`], and vice-versa.
insert(&mut self, file_id: FileId, path: VfsPath)51     pub fn insert(&mut self, file_id: FileId, path: VfsPath) {
52         self.files.insert(path.clone(), file_id);
53         self.paths.insert(file_id, path);
54     }
55 
56     /// Iterate over this set's ids.
iter(&self) -> impl Iterator<Item = FileId> + '_57     pub fn iter(&self) -> impl Iterator<Item = FileId> + '_ {
58         self.paths.keys().copied()
59     }
60 }
61 
62 impl fmt::Debug for FileSet {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result63     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64         f.debug_struct("FileSet").field("n_files", &self.files.len()).finish()
65     }
66 }
67 
68 /// This contains path prefixes to partition a [`Vfs`] into [`FileSet`]s.
69 ///
70 /// # Example
71 /// ```rust
72 /// # use vfs::{file_set::FileSetConfigBuilder, VfsPath, Vfs};
73 /// let mut builder = FileSetConfigBuilder::default();
74 /// builder.add_file_set(vec![VfsPath::new_virtual_path("/src".to_string())]);
75 /// let config = builder.build();
76 /// let mut file_system = Vfs::default();
77 /// file_system.set_file_contents(VfsPath::new_virtual_path("/src/main.rs".to_string()), Some(vec![]));
78 /// file_system.set_file_contents(VfsPath::new_virtual_path("/src/lib.rs".to_string()), Some(vec![]));
79 /// file_system.set_file_contents(VfsPath::new_virtual_path("/build.rs".to_string()), Some(vec![]));
80 /// // contains the sets :
81 /// // { "/src/main.rs", "/src/lib.rs" }
82 /// // { "build.rs" }
83 /// let sets = config.partition(&file_system);
84 /// ```
85 #[derive(Debug)]
86 pub struct FileSetConfig {
87     /// Number of sets that `self` can partition a [`Vfs`] into.
88     ///
89     /// This should be the number of sets in `self.map` + 1 for files that don't fit in any
90     /// defined set.
91     n_file_sets: usize,
92     /// Map from encoded paths to the set they belong to.
93     map: fst::Map<Vec<u8>>,
94 }
95 
96 impl Default for FileSetConfig {
default() -> Self97     fn default() -> Self {
98         FileSetConfig::builder().build()
99     }
100 }
101 
102 impl FileSetConfig {
103     /// Returns a builder for `FileSetConfig`.
builder() -> FileSetConfigBuilder104     pub fn builder() -> FileSetConfigBuilder {
105         FileSetConfigBuilder::default()
106     }
107 
108     /// Partition `vfs` into `FileSet`s.
109     ///
110     /// Creates a new [`FileSet`] for every set of prefixes in `self`.
partition(&self, vfs: &Vfs) -> Vec<FileSet>111     pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> {
112         let mut scratch_space = Vec::new();
113         let mut res = vec![FileSet::default(); self.len()];
114         for (file_id, path) in vfs.iter() {
115             let root = self.classify(path, &mut scratch_space);
116             res[root].insert(file_id, path.clone());
117         }
118         res
119     }
120 
121     /// Number of sets that `self` can partition a [`Vfs`] into.
len(&self) -> usize122     fn len(&self) -> usize {
123         self.n_file_sets
124     }
125 
126     /// Returns the set index for the given `path`.
127     ///
128     /// `scratch_space` is used as a buffer and will be entirely replaced.
classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize129     fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize {
130         scratch_space.clear();
131         path.encode(scratch_space);
132         let automaton = PrefixOf::new(scratch_space.as_slice());
133         let mut longest_prefix = self.len() - 1;
134         let mut stream = self.map.search(automaton).into_stream();
135         while let Some((_, v)) = stream.next() {
136             longest_prefix = v as usize;
137         }
138         longest_prefix
139     }
140 }
141 
142 /// Builder for [`FileSetConfig`].
143 #[derive(Default)]
144 pub struct FileSetConfigBuilder {
145     roots: Vec<Vec<VfsPath>>,
146 }
147 
148 impl FileSetConfigBuilder {
149     /// Returns the number of sets currently held.
len(&self) -> usize150     pub fn len(&self) -> usize {
151         self.roots.len()
152     }
153 
154     /// Add a new set of paths prefixes.
add_file_set(&mut self, roots: Vec<VfsPath>)155     pub fn add_file_set(&mut self, roots: Vec<VfsPath>) {
156         self.roots.push(roots);
157     }
158 
159     /// Build the `FileSetConfig`.
build(self) -> FileSetConfig160     pub fn build(self) -> FileSetConfig {
161         let n_file_sets = self.roots.len() + 1;
162         let map = {
163             let mut entries = Vec::new();
164             for (i, paths) in self.roots.into_iter().enumerate() {
165                 for p in paths {
166                     let mut buf = Vec::new();
167                     p.encode(&mut buf);
168                     entries.push((buf, i as u64));
169                 }
170             }
171             entries.sort();
172             entries.dedup_by(|(a, _), (b, _)| a == b);
173             fst::Map::from_iter(entries).unwrap()
174         };
175         FileSetConfig { n_file_sets, map }
176     }
177 }
178 
179 /// Implements [`fst::Automaton`]
180 ///
181 /// It will match if `prefix_of` is a prefix of the given data.
182 struct PrefixOf<'a> {
183     prefix_of: &'a [u8],
184 }
185 
186 impl<'a> PrefixOf<'a> {
187     /// Creates a new `PrefixOf` from the given slice.
new(prefix_of: &'a [u8]) -> Self188     fn new(prefix_of: &'a [u8]) -> Self {
189         Self { prefix_of }
190     }
191 }
192 
193 impl fst::Automaton for PrefixOf<'_> {
194     type State = usize;
start(&self) -> usize195     fn start(&self) -> usize {
196         0
197     }
is_match(&self, &state: &usize) -> bool198     fn is_match(&self, &state: &usize) -> bool {
199         state != !0
200     }
can_match(&self, &state: &usize) -> bool201     fn can_match(&self, &state: &usize) -> bool {
202         state != !0
203     }
accept(&self, &state: &usize, byte: u8) -> usize204     fn accept(&self, &state: &usize, byte: u8) -> usize {
205         if self.prefix_of.get(state) == Some(&byte) {
206             state + 1
207         } else {
208             !0
209         }
210     }
211 }
212 
213 #[cfg(test)]
214 mod tests;
215