• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Recursively hash the contents of a directory
2 use anyhow::Result;
3 use hex::encode;
4 use rayon::iter::IntoParallelIterator;
5 use rayon::iter::ParallelIterator;
6 use ring::digest::{self, SHA256};
7 use serde::{Deserialize, Serialize};
8 use std::collections::HashMap;
9 use std::fs;
10 use std::io::{self, Read, Write};
11 use std::os::unix::fs::FileTypeExt;
12 use std::os::unix::fs::MetadataExt;
13 use std::os::unix::fs::PermissionsExt;
14 use std::path::{Path, PathBuf};
15 use walkdir::WalkDir;
16 
17 #[allow(missing_docs)]
18 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
19 pub enum FileType {
20     #[default]
21     File,
22     Symlink,
23     Directory,
24 }
25 
26 #[derive(Clone, Copy, PartialEq)]
27 #[allow(dead_code)]
28 pub enum DiffMode {
29     IgnorePermissions,
30     UsePermissions,
31 }
32 
33 /// Represents a file, directory, or symlink.
34 /// We need enough information to be able to tell if:
35 ///   1) A regular file changes to a directory or symlink.
36 ///   2) A symlink's target file path changes.
37 #[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
38 pub struct FileMetadata {
39     /// Is this a file, dir or symlink?
40     pub file_type: FileType,
41 
42     /// Path that this symlinks to or ""
43     #[serde(skip_serializing_if = "String::is_empty", default)]
44     pub symlink: String,
45 
46     /// Sha256 of contents for regular files.
47     #[serde(skip_serializing_if = "String::is_empty", default)]
48     pub digest: String,
49 
50     /// Permission bits.
51     #[serde(default, skip_serializing_if = "is_default")]
52     pub permission_bits: u32,
53 
54     // A unique string used only to determine if digest should be recomputed
55     // or can be used from cache.
56     // The key includes: path, size, mtime, and ctime
57     //
58     // The cache_key will be different for the  host/device and is not used to compare
59     // if the files are different
60     #[serde(skip)]
61     pub cache_key: String,
62 }
63 
is_default<T: Default + PartialEq>(t: &T) -> bool64 fn is_default<T: Default + PartialEq>(t: &T) -> bool {
65     t == &T::default()
66 }
67 
68 impl FileMetadata {
from_path(file_path: &Path, cache: &Cache) -> Result<Self>69     pub fn from_path(file_path: &Path, cache: &Cache) -> Result<Self> {
70         let metadata = fs::symlink_metadata(file_path)?;
71 
72         if metadata.is_dir() {
73             Ok(FileMetadata::from_dir())
74         } else if metadata.is_symlink() {
75             FileMetadata::from_symlink(file_path, &metadata)
76         } else {
77             Ok(FileMetadata::from_file(file_path, &metadata, cache)?)
78         }
79     }
80 
from_dir() -> Self81     pub fn from_dir() -> Self {
82         FileMetadata { file_type: FileType::Directory, ..Default::default() }
83     }
from_symlink(file_path: &Path, metadata: &fs::Metadata) -> Result<Self>84     pub fn from_symlink(file_path: &Path, metadata: &fs::Metadata) -> Result<Self> {
85         let link = fs::read_link(file_path)?;
86         let target_path_string =
87             link.into_os_string().into_string().expect("Expected valid file name");
88         let mut perms = 0;
89 
90         // Getting the permissions doesn't work on windows, so don't try and don't compare them.
91         if !cfg!(windows) {
92             perms = metadata.permissions().mode();
93         }
94         Ok(FileMetadata {
95             file_type: FileType::Symlink,
96             symlink: target_path_string,
97             permission_bits: perms,
98             ..Default::default()
99         })
100     }
from_file(file_path: &Path, metadata: &fs::Metadata, cache: &Cache) -> Result<Self>101     pub fn from_file(file_path: &Path, metadata: &fs::Metadata, cache: &Cache) -> Result<Self> {
102         // Getting the permissions doesn't work on windows, so don't try and don't compare them.
103         let mut perms = 0;
104         if !cfg!(windows) {
105             perms = metadata.permissions().mode();
106         }
107 
108         let (digest, cache_key) = get_or_compute_digest(file_path, metadata, cache)?;
109         Ok(FileMetadata {
110             file_type: FileType::File,
111             digest,
112             cache_key,
113             permission_bits: perms,
114             ..Default::default()
115         })
116     }
117 }
118 
119 /// A description of the differences on the filesystems between the host
120 /// and device. Each file that is different will be a key in one of
121 /// three maps with the value indicating the difference.
122 #[derive(Debug, Default, PartialEq)]
123 pub struct Diffs {
124     /// Files on host, but not on device
125     pub device_needs: HashMap<PathBuf, FileMetadata>,
126     /// Files on device, but not host.
127     pub device_extra: HashMap<PathBuf, FileMetadata>,
128     /// Files that are different between host and device.
129     pub device_diffs: HashMap<PathBuf, FileMetadata>,
130 }
131 
132 /// Compute the files that need to be added, removed or updated on the device.
133 /// Each file should land in of the three categories (i.e. updated, not
134 /// removed and added);
135 /// TODO(rbraunstein): Fix allow(unused) by breaking out methods not
136 /// needed by adevice_fingerprint.
137 #[allow(unused)]
diff( host_files: &HashMap<PathBuf, FileMetadata>, device_files: &HashMap<PathBuf, FileMetadata>, diff_mode: DiffMode, ) -> Diffs138 pub fn diff(
139     host_files: &HashMap<PathBuf, FileMetadata>,
140     device_files: &HashMap<PathBuf, FileMetadata>,
141     diff_mode: DiffMode,
142 ) -> Diffs {
143     let mut diffs = Diffs {
144         device_needs: HashMap::new(),
145         device_extra: HashMap::new(),
146         device_diffs: HashMap::new(),
147     };
148 
149     // Insert diffs files that are on the host, but not on the device or
150     // file on the host that are different on the device.
151     for (file_name, host_metadata) in host_files {
152         match device_files.get(file_name) {
153             // File on host and device, but the metadata is different.
154             Some(device_metadata)
155                 if is_metadata_diff(device_metadata, host_metadata, diff_mode) =>
156             {
157                 diffs.device_diffs.insert(file_name.clone(), host_metadata.clone())
158             }
159             // If the device metadata == host metadata there is nothing to do.
160             Some(_) => None,
161             // Not on the device yet, insert it
162             None => diffs.device_needs.insert(file_name.clone(), host_metadata.clone()),
163         };
164     }
165 
166     // Files on the device, but not one the host.
167     for (file_name, metadata) in device_files {
168         if host_files.get(file_name).is_none() {
169             diffs.device_extra.insert(file_name.clone(), metadata.clone());
170         }
171     }
172     diffs
173 }
174 
175 /// Return true if left != right ignoring cachekey since that include last_modifed
176 /// When useing DiffMode::IgnorePermissions, clear the permission bits before doing the comparison
is_metadata_diff(left: &FileMetadata, right: &FileMetadata, diff_mode: DiffMode) -> bool177 pub fn is_metadata_diff(left: &FileMetadata, right: &FileMetadata, diff_mode: DiffMode) -> bool {
178     let mut cleared_left = left.clone();
179     let mut cleared_right = right.clone();
180     cleared_left.cache_key = "".to_string();
181     cleared_right.cache_key = "".to_string();
182 
183     if diff_mode == DiffMode::UsePermissions {
184         return cleared_left != cleared_right;
185     }
186     cleared_left.permission_bits = 0;
187     cleared_right.permission_bits = 0;
188     cleared_left != cleared_right
189 }
190 
191 /// Given a `partition_root`, traverse all files under the named |partitions|
192 /// at the root.  Typically, ["system", "apex"] are partition_names.
193 /// The keys will be rooted at the `partition root`, ie. if system contains
194 /// a file named FILE and system is the `partition_root`, the key wil be
195 /// system/FILE.
196 /// Cache is used only to speed up computing digests; if the cache_key is the same
197 /// as an earlier fingerprint; then we reuse it rather than recomputing.
fingerprint_partitions( partition_root: &Path, partition_names: &[PathBuf], ) -> Result<HashMap<PathBuf, FileMetadata>>198 pub fn fingerprint_partitions(
199     partition_root: &Path,
200     partition_names: &[PathBuf],
201 ) -> Result<HashMap<PathBuf, FileMetadata>> {
202     let cache = Cache::read().unwrap_or_default();
203     let filenames: Vec<PathBuf> = partition_names
204         .iter()
205         .flat_map(|p| WalkDir::new(partition_root.join(p)).follow_links(false))
206         .map(|result| result.expect("Walking directory").path().to_path_buf())
207         .collect();
208 
209     // Compute digest for each file.
210     let results = filenames
211         .into_par_iter()
212         // Walking the /data partition quickly leads to sockets, filter those out.
213         .filter(|file_path| !is_special_file(file_path))
214         .map(|file_path| {
215             (
216                 file_path.strip_prefix(partition_root).unwrap().to_owned(),
217                 FileMetadata::from_path(&file_path, &cache).unwrap(),
218             )
219         })
220         .collect();
221     cache.write(&results)?;
222     Ok(results)
223 }
224 
225 /// Return true for special files like sockets that would be incorrect
226 /// to digest and we that we can skip when comparing the device
227 /// to the build tree.
is_special_file(file_path: &Path) -> bool228 fn is_special_file(file_path: &Path) -> bool {
229     // `symlink_metadata` doesn't follow links. We don't want to follow symlinks here.
230     // The stat costs much less than the digest operations we are about to perform.
231     let file_metadata = fs::symlink_metadata(file_path).expect("no metadata");
232     file_metadata.file_type().is_block_device()
233         || file_metadata.file_type().is_char_device()
234         || file_metadata.file_type().is_fifo()
235         || file_metadata.file_type().is_socket()
236 }
237 
238 /// Compute the sha256 and return it as a lowercase hex string.
compute_digest(file_path: &Path) -> Result<String>239 fn compute_digest(file_path: &Path) -> Result<String> {
240     let input = fs::File::open(file_path)?;
241     let mut reader = io::BufReader::new(input);
242     let mut context = digest::Context::new(&SHA256);
243     let mut buffer = [0; 4096];
244 
245     loop {
246         let num_bytes_read = reader.read(&mut buffer)?;
247         if num_bytes_read == 0 {
248             break;
249         }
250         context.update(&buffer[..num_bytes_read]);
251     }
252 
253     Ok(encode(context.finish().as_ref()))
254 }
255 
256 /// Get digest from cache or compute digest and return a cache_key
get_or_compute_digest( file_path: &Path, metadata: &fs::Metadata, cache: &Cache, ) -> Result<(String, String)>257 fn get_or_compute_digest(
258     file_path: &Path,
259     metadata: &fs::Metadata,
260     cache: &Cache,
261 ) -> Result<(String, String)> {
262     let cache_key = cache.cache_key(file_path, metadata)?;
263     let digest;
264     if let Some(cached_digest) = cache.get(&cache_key) {
265         digest = cached_digest.to_string();
266     } else {
267         digest = compute_digest(file_path)?;
268     }
269     Ok((digest, cache_key))
270 }
271 
272 // The cache is intended to be used to skip computing digests - it relies on the assumption that unchanged
273 // file stats imply unchanged content (and therefore, unchanged digest).
274 //
275 // The host stores the cache file in $OUT/ by default.  Acloud/physical devices don't have $OUT so will attempt to store
276 // the cache in /cache/. Any file modification (add/delete/change) triggers a cache key recomputation for that specific file
277 // and the cache file will be updated.  The cache should persist across reboots but may be deleted between flashes
278 #[derive(Default)]
279 pub struct Cache {
280     pub data: HashMap<String, String>,
281 }
282 impl Cache {
get(&self, key: &str) -> Option<&String>283     pub fn get(&self, key: &str) -> Option<&String> {
284         self.data.get(key)
285     }
286 
287     // Generate cache key from file metadata
cache_key(&self, file_path: &Path, metadata: &fs::Metadata) -> Result<String>288     pub fn cache_key(&self, file_path: &Path, metadata: &fs::Metadata) -> Result<String> {
289         Ok(format!(
290             "{}#{}#{}.{}#{}.{}",
291             file_path.display(),
292             metadata.len(),
293             metadata.mtime(),
294             metadata.mtime_nsec(),
295             metadata.ctime(),
296             metadata.ctime_nsec()
297         ))
298     }
299 
300     /// Reads cache from a file
read_from_file(file_path: &Path) -> Result<Self>301     pub fn read_from_file(file_path: &Path) -> Result<Self> {
302         let mut file = fs::File::open(file_path)?;
303         let mut contents = String::new();
304         file.read_to_string(&mut contents)?;
305         match serde_json::from_str(&contents) {
306             Ok(data) => Ok(Cache { data }),
307             Err(_error) => Err(_error.into()),
308         }
309     }
310 
read() -> Result<Self>311     pub fn read() -> Result<Self> {
312         let cache_file_path = Cache::default_cache_path();
313         Cache::read_from_file(&cache_file_path)
314     }
315 
316     /// Writes cache to a file
write_to_file( self, results: &HashMap<PathBuf, FileMetadata>, file_path: &Path, ) -> Result<()>317     pub fn write_to_file(
318         self,
319         results: &HashMap<PathBuf, FileMetadata>,
320         file_path: &Path,
321     ) -> Result<()> {
322         let mut new_cache: HashMap<String, String> = HashMap::new();
323         for meta in results.values() {
324             if !meta.cache_key.is_empty() {
325                 new_cache.insert(meta.cache_key.clone(), meta.digest.clone());
326             }
327         }
328         if new_cache == self.data {
329             // cache did not change - skip write.
330             return Ok(());
331         }
332         let cache_str = serde_json::to_string(&new_cache)?;
333         let mut file = fs::File::create(file_path)?;
334         file.write_all(cache_str.as_bytes())?;
335         Ok(())
336     }
337 
write(self, results: &HashMap<PathBuf, FileMetadata>) -> Result<()>338     pub fn write(self, results: &HashMap<PathBuf, FileMetadata>) -> Result<()> {
339         let cache_file_path = Cache::default_cache_path();
340         self.write_to_file(results, &cache_file_path)
341     }
342 
default_cache_path() -> PathBuf343     pub fn default_cache_path() -> PathBuf {
344         // Attempt to use $OUT, then /cache and finally fall back to /tmp
345         // /tmp is deleted on reboot on acloud devices
346         let mut cache_dir = std::env::var("OUT").unwrap_or_else(|_| "/cache".to_string());
347         if !Path::new(&cache_dir).is_dir() {
348             cache_dir = "/tmp".to_string();
349         }
350 
351         PathBuf::from(cache_dir).join("adevice_digest_cache.json")
352     }
353 }
354 
355 #[cfg(test)]
356 mod tests {
357     use super::*;
358     use crate::fingerprint::DiffMode::UsePermissions;
359     use std::collections::BTreeSet;
360     use std::path::PathBuf;
361     use tempfile::TempDir;
362 
363     #[test]
empty_inputs()364     fn empty_inputs() {
365         assert_eq!(diff(&HashMap::new(), &HashMap::new(), UsePermissions), Diffs::default());
366     }
367 
368     #[test]
same_inputs()369     fn same_inputs() {
370         let file_entry = HashMap::from([(
371             PathBuf::from("a/b/foo.so"),
372             FileMetadata {
373                 file_type: FileType::File,
374                 digest: "deadbeef".to_string(),
375                 ..Default::default()
376             },
377         )]);
378         assert_eq!(diff(&file_entry, &file_entry.clone(), UsePermissions), Diffs::default());
379     }
380 
381     #[test]
same_inputs_with_permissions()382     fn same_inputs_with_permissions() {
383         let file_entry = HashMap::from([(
384             PathBuf::from("a/b/foo.so"),
385             FileMetadata {
386                 file_type: FileType::File,
387                 digest: "deadbeef".to_string(),
388                 permission_bits: 0o644,
389                 ..Default::default()
390             },
391         )]);
392         assert_eq!(diff(&file_entry, &file_entry.clone(), UsePermissions), Diffs::default());
393     }
394 
395     #[test]
same_inputs_with_different_permissions_are_not_equal()396     fn same_inputs_with_different_permissions_are_not_equal() {
397         let orig = HashMap::from([(
398             PathBuf::from("a/b/foo.so"),
399             FileMetadata {
400                 file_type: FileType::File,
401                 digest: "deadbeef".to_string(),
402                 permission_bits: 0o644,
403                 ..Default::default()
404             },
405         )]);
406         let mut copy = orig.clone();
407         copy.entry(PathBuf::from("a/b/foo.so")).and_modify(|v| v.permission_bits = 0);
408 
409         // Not equal
410         assert_ne!(diff(&orig, &copy, UsePermissions), Diffs::default());
411     }
412 
413     #[test]
same_inputs_ignoring_permissions()414     fn same_inputs_ignoring_permissions() {
415         let orig = HashMap::from([(
416             PathBuf::from("a/b/foo.so"),
417             FileMetadata {
418                 file_type: FileType::File,
419                 digest: "deadbeef".to_string(),
420                 permission_bits: 0o644,
421                 ..Default::default()
422             },
423         )]);
424         let mut copy = orig.clone();
425         copy.entry(PathBuf::from("a/b/foo.so")).and_modify(|v| v.permission_bits = 0);
426 
427         // Equal when we ignore the different permission bits.
428         assert_eq!(diff(&orig, &copy, DiffMode::IgnorePermissions), Diffs::default());
429     }
430 
431     #[test]
different_file_type()432     fn different_file_type() {
433         let host_map_with_filename_as_file = HashMap::from([(
434             PathBuf::from("a/b/foo.so"),
435             FileMetadata {
436                 file_type: FileType::File,
437                 digest: "deadbeef".to_string(),
438                 ..Default::default()
439             },
440         )]);
441 
442         let device_map_with_filename_as_dir = HashMap::from([(
443             PathBuf::from("a/b/foo.so"),
444             FileMetadata { file_type: FileType::Directory, ..Default::default() },
445         )]);
446 
447         let diffs =
448             diff(&host_map_with_filename_as_file, &device_map_with_filename_as_dir, UsePermissions);
449         assert_eq!(
450             diffs.device_diffs.get(&PathBuf::from("a/b/foo.so")).expect("Missing file"),
451             // `diff` returns FileMetadata for host, but we really only care that the
452             // file name was found.
453             &FileMetadata {
454                 file_type: FileType::File,
455                 digest: "deadbeef".to_string(),
456                 ..Default::default()
457             },
458         );
459     }
460 
461     #[test]
diff_simple_trees()462     fn diff_simple_trees() {
463         let host_map = HashMap::from([
464             (PathBuf::from("matching_file"), file_metadata("digest_matching_file")),
465             (PathBuf::from("path/to/diff_file"), file_metadata("digest_file2")),
466             (PathBuf::from("path/to/new_file"), file_metadata("digest_new_file")),
467             (PathBuf::from("same_link"), link_metadata("matching_file")),
468             (PathBuf::from("diff_link"), link_metadata("targetxx")),
469             (PathBuf::from("new_link"), link_metadata("new_target")),
470             (PathBuf::from("matching dir"), dir_metadata()),
471             (PathBuf::from("new_dir"), dir_metadata()),
472         ]);
473 
474         let device_map = HashMap::from([
475             (PathBuf::from("matching_file"), file_metadata("digest_matching_file")),
476             (PathBuf::from("path/to/diff_file"), file_metadata("digest_file2_DIFF")),
477             (PathBuf::from("path/to/deleted_file"), file_metadata("digest_deleted_file")),
478             (PathBuf::from("same_link"), link_metadata("matching_file")),
479             (PathBuf::from("diff_link"), link_metadata("targetxx_DIFF")),
480             (PathBuf::from("deleted_link"), link_metadata("new_target")),
481             (PathBuf::from("matching dir"), dir_metadata()),
482             (PathBuf::from("deleted_dir"), dir_metadata()),
483         ]);
484 
485         let diffs = diff(&host_map, &device_map, UsePermissions);
486         assert_eq!(
487             BTreeSet::from_iter(diffs.device_diffs.keys()),
488             BTreeSet::from([&PathBuf::from("diff_link"), &PathBuf::from("path/to/diff_file")])
489         );
490         assert_eq!(
491             BTreeSet::from_iter(diffs.device_needs.keys()),
492             BTreeSet::from([
493                 &PathBuf::from("path/to/new_file"),
494                 &PathBuf::from("new_link"),
495                 &PathBuf::from("new_dir")
496             ])
497         );
498         assert_eq!(
499             BTreeSet::from_iter(diffs.device_extra.keys()),
500             BTreeSet::from([
501                 &PathBuf::from("path/to/deleted_file"),
502                 &PathBuf::from("deleted_link"),
503                 &PathBuf::from("deleted_dir")
504             ])
505         );
506     }
507 
508     #[test]
compute_digest_empty_file()509     fn compute_digest_empty_file() {
510         let tmpdir = TempDir::new().unwrap();
511         let file_path = tmpdir.path().join("empty_file");
512         fs::write(&file_path, "").unwrap();
513         assert_eq!(
514             "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string(),
515             compute_digest(&file_path).unwrap()
516         );
517     }
518 
519     #[test]
compute_digest_small_file()520     fn compute_digest_small_file() {
521         let tmpdir = TempDir::new().unwrap();
522         let file_path = tmpdir.path().join("small_file");
523         fs::write(&file_path, "This is a test\nof a small file.\n").unwrap();
524         assert_eq!(
525             "a519d054afdf2abfbdd90a738d248f606685d6c187e96390bde22e958240449e".to_string(),
526             compute_digest(&file_path).unwrap()
527         );
528     }
529 
530     #[test]
get_or_compute_digest_small_file()531     fn get_or_compute_digest_small_file() {
532         let tmpdir = TempDir::new().unwrap();
533         let file_path = tmpdir.path().join("small_file");
534         fs::write(&file_path, "This is a test\nof a small file.\n").unwrap();
535 
536         let metadata = fs::metadata(&file_path).expect("file metadata");
537         let mut cache = Cache::default();
538         let (digest, cache_key) = get_or_compute_digest(&file_path, &metadata, &cache).unwrap();
539         assert_eq!(
540             "a519d054afdf2abfbdd90a738d248f606685d6c187e96390bde22e958240449e".to_string(),
541             digest
542         );
543         assert_eq!(
544             cache_key,
545             format!(
546                 "{}#{}#{}.{}#{}.{}",
547                 file_path.display(),
548                 metadata.len(),
549                 metadata.mtime(),
550                 metadata.mtime_nsec(),
551                 metadata.ctime(),
552                 metadata.ctime_nsec()
553             )
554         );
555 
556         // if cache entry exists; than that is used instead of recomputing
557         cache.data.insert(cache_key, "test-saved-cache-digest".to_string());
558         let (digest, _) = get_or_compute_digest(&file_path, &metadata, &cache).unwrap();
559         assert_eq!("test-saved-cache-digest".to_string(), digest);
560     }
561 
562     // Generate some files near the buffer size to check for off-by-one errors
563     // and compute the digest and store here.
564     // We can't check these into testdata and read from testdata unless we serialize
565     // all the tests. Some tests to `cd` to create relative symlinks and that affects
566     // any tests that want to read from testdata.
567     #[test]
verify_edge_case_digests()568     fn verify_edge_case_digests() {
569         let tmpdir = TempDir::new().unwrap();
570         // We could use a RNG with a seed, but lets just create simple files of bytes.
571         let raw_bytes: &[u8; 10] = &[0, 1, 17, 200, 11, 8, 0, 32, 9, 10];
572         let mut boring_buff = Vec::new();
573         for _ in 1..1000 {
574             boring_buff.extend_from_slice(raw_bytes);
575         }
576 
577         for (num_bytes, digest) in
578             &[(4095, "a0e88b2743"), (4096, "b2e324aac3"), (4097, "70fcbe6a8d")]
579         {
580             let file_path = tmpdir.path().join(num_bytes.to_string());
581             fs::write(&file_path, &boring_buff[0..*num_bytes]).unwrap();
582             assert!(
583                 compute_digest(&file_path).unwrap().starts_with(digest),
584                 "Expected file {:?} to have a digest starting with {:?}",
585                 file_path,
586                 digest
587             );
588         }
589     }
590 
591     #[test]
fingerprint_file_for_file()592     fn fingerprint_file_for_file() {
593         let partition_root = TempDir::new().unwrap();
594         let file_path = partition_root.path().join("small_file");
595         fs::write(&file_path, "This is a test\nof a small file.\n").unwrap();
596 
597         // NOTE: files are 0x644 on the host tests and 0x655 on device tests
598         // for me and CI so changing the file to always be 655 during the test.
599         let mut perms = fs::metadata(&file_path).expect("Getting permissions").permissions();
600         perms.set_mode(0o100655);
601         assert!(fs::set_permissions(&file_path, perms).is_ok());
602         let cache = Cache::default();
603         let entry = FileMetadata::from_path(&file_path, &cache).unwrap();
604         assert_eq!(
605             FileMetadata {
606                 file_type: FileType::File,
607                 digest: "a519d054afdf2abfbdd90a738d248f606685d6c187e96390bde22e958240449e"
608                     .to_string(),
609                 permission_bits: 0o100655,
610                 cache_key: entry.cache_key.clone(),
611                 ..Default::default()
612             },
613             entry
614         )
615     }
616 
617     #[test]
fingerprint_file_for_relative_symlink()618     fn fingerprint_file_for_relative_symlink() {
619         let partition_root = TempDir::new().unwrap();
620         let file_path = partition_root.path().join("small_file");
621         fs::write(file_path, "This is a test\nof a small file.\n").unwrap();
622 
623         let link = create_symlink(
624             &PathBuf::from("small_file"),
625             "link_to_small_file",
626             partition_root.path(),
627         );
628         let cache = Cache::default();
629         let entry = FileMetadata::from_path(&link, &cache).unwrap();
630         assert_eq!(
631             FileMetadata {
632                 file_type: FileType::Symlink,
633                 symlink: "small_file".to_string(),
634                 permission_bits: 0o120777,
635                 ..Default::default()
636             },
637             entry
638         )
639     }
640 
641     #[test]
fingerprint_file_for_absolute_symlink()642     fn fingerprint_file_for_absolute_symlink() {
643         let partition_root = TempDir::new().unwrap();
644         let link = create_symlink(&PathBuf::from("/tmp"), "link_to_tmp", partition_root.path());
645         let cache = Cache::default();
646         let entry = FileMetadata::from_path(&link, &cache).unwrap();
647         assert_eq!(
648             FileMetadata {
649                 file_type: FileType::Symlink,
650                 symlink: "/tmp".to_string(),
651                 permission_bits: 0o120777,
652                 ..Default::default()
653             },
654             entry
655         )
656     }
657 
658     #[test]
fingerprint_file_for_directory()659     fn fingerprint_file_for_directory() {
660         let partition_root = TempDir::new().unwrap();
661         let newdir_path = partition_root.path().join("some_dir");
662         fs::create_dir(&newdir_path).expect("Should have create 'some_dir' in temp dir");
663         let cache = Cache::default();
664         let entry = FileMetadata::from_path(&newdir_path, &cache).unwrap();
665         assert_eq!(FileMetadata { file_type: FileType::Directory, ..Default::default() }, entry)
666     }
667 
668     #[test]
fingerprint_file_on_bad_path_reports_err()669     fn fingerprint_file_on_bad_path_reports_err() {
670         let cache = Cache::default();
671         if FileMetadata::from_path(Path::new("testdata/not_exist"), &cache).is_ok() {
672             panic!("Should have failed on invalid path")
673         }
674     }
675 
676     /// /tmp/.tmpxO0pRC/system
677     /// % tree
678     /// .
679     /// ├── cycle1 -> cycle2
680     /// ├── cycle2 -> cycle1
681     /// ├── danglers
682     /// │   ├── d1 -> nowhere
683     /// │   └── d2 -> /not/existing
684     /// ├── dir1
685     /// │   ├── dir2
686     /// │   │   ├── nested
687     /// │   │   └── nested2
688     /// │   ├── dir4
689     /// │   └── f1.txt
690     /// ├── dir3
691     /// │   ├── to_tmp -> /tmp
692     /// │   └── to_tmp2 -> /system/cycle1
693     /// ├── file1.so
694     /// ├── file2.so
695     /// ├── link1 -> file1.so
696     /// └── link2 -> link1
697     #[test]
698 
fingerprint_simple_partition()699     fn fingerprint_simple_partition() {
700         let tmp_root = TempDir::new().unwrap();
701         // TODO(rbraunstein): Change make_partition to look more like `expected` variable below.
702         // i.e. use file_type rather than pass files, dirs, and symlinks in different arrays.
703         // Or use a struct with named fields as the args.
704         make_partition(
705             tmp_root.path(),
706             "system",
707             &[
708                 ("file1.so", "some text"),
709                 ("file2.so", "more text"),
710                 ("dir1/f1.txt", ""),
711                 ("dir1/dir2/nested", "some more text"),
712                 ("dir1/dir2/nested2", "some more text"),
713             ],
714             // Empty directories/
715             &["dir3", "dir1/dir4", "danglers"],
716             // Symlinks:
717             //   Linkname, target.
718             &[
719                 ("link1", "file1.so"),
720                 ("link2", "link1"),
721                 ("cycle1", "cycle2"),
722                 ("cycle2", "cycle1"),
723                 ("dir3/to_tmp", "/tmp"),
724                 ("dir3/to_tmp2", "/system/cycle1"),
725                 ("danglers/d1", "nowhere"),
726                 ("danglers/d2", "/not/existing"),
727             ],
728         );
729         let result = fingerprint_partitions(tmp_root.path(), &[PathBuf::from("system")]).unwrap();
730         println!("RESULTS\n");
731         for x in &result {
732             println!("{:?}", x);
733         }
734         let expected = &[
735             ("system/file1.so", FileType::File, "b94f"),
736             ("system/file2.so", FileType::File, "c0dc"),
737             ("system/dir1/f1.txt", FileType::File, "e3b0c"),
738             ("system/dir1/dir2/nested", FileType::File, "bde27b"),
739             ("system/dir1/dir2/nested2", FileType::File, "bde27b"),
740             ("system/dir3", FileType::Directory, ""),
741             ("system/danglers", FileType::Directory, ""),
742             ("system/dir1", FileType::Directory, ""),
743             ("system/dir1/dir2", FileType::Directory, ""),
744             ("system/dir1/dir4", FileType::Directory, ""),
745             ("system/link1", FileType::Symlink, "file1.so"),
746             ("system/link2", FileType::Symlink, "link1"),
747             ("system/cycle1", FileType::Symlink, "cycle2"),
748             ("system/cycle2", FileType::Symlink, "cycle1"),
749             ("system/dir3/to_tmp", FileType::Symlink, "/tmp"),
750             ("system/dir3/to_tmp2", FileType::Symlink, "/system/cycle1"),
751             ("system/danglers/d1", FileType::Symlink, "nowhere"),
752             ("system/danglers/d2", FileType::Symlink, "/not/existing"),
753             ("system", FileType::Directory, ""),
754         ];
755 
756         assert_eq!(
757             expected.len(),
758             result.len(),
759             "expected: {}, result {}",
760             expected.len(),
761             result.len()
762         );
763 
764         for (file_name, file_type, data) in expected {
765             match file_type {
766                 FileType::File => assert!(
767                     matching_file_fingerprint(file_name, data, &result),
768                     "mismatch on {:?} {:?}",
769                     file_name,
770                     data
771                 ),
772                 FileType::Directory => assert!(result
773                     .get(&PathBuf::from(file_name))
774                     .is_some_and(|d| d.file_type == FileType::Directory)),
775                 FileType::Symlink => assert!(result
776                     .get(&PathBuf::from(file_name))
777                     .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
778             };
779         }
780     }
781 
782     #[test]
fingerprint_multiple_partitions()783     fn fingerprint_multiple_partitions() {
784         let tmp_root = TempDir::new().unwrap();
785         // Use same file name, with and without same contents in two different partitions.
786         make_partition(
787             tmp_root.path(),
788             "system",
789             &[("file1.so", "some text"), ("file2", "system part")],
790             // Empty directories/
791             &[],
792             // Symlinks
793             &[],
794         );
795         make_partition(
796             tmp_root.path(),
797             "data",
798             &[("file1.so", "some text"), ("file2", "data part")],
799             // Empty directories/
800             &[],
801             // Symlinks
802             &[],
803         );
804 
805         let result = fingerprint_partitions(
806             tmp_root.path(),
807             &[PathBuf::from("system"), PathBuf::from("data")],
808         )
809         .unwrap();
810         println!("RESULTS\n");
811         for x in &result {
812             println!("{:?}", x);
813         }
814         let expected = &[
815             ("system/file1.so", FileType::File, "b94f"),
816             ("data/file1.so", FileType::File, "b94f"),
817             ("system/file2", FileType::File, "ae7c6c"),
818             ("data/file2", FileType::File, "4ae46d"),
819             ("data", FileType::Directory, ""),
820             ("system", FileType::Directory, ""),
821         ];
822 
823         assert_eq!(
824             expected.len(),
825             result.len(),
826             "expected: {}, result {}",
827             expected.len(),
828             result.len()
829         );
830 
831         for (file_name, file_type, data) in expected {
832             match file_type {
833                 FileType::File => assert!(
834                     matching_file_fingerprint(file_name, data, &result),
835                     "mismatch on {:?} {:?}",
836                     file_name,
837                     data
838                 ),
839                 FileType::Directory => assert!(result
840                     .get(&PathBuf::from(file_name))
841                     .is_some_and(|d| d.file_type == FileType::Directory)),
842                 _ => (),
843             };
844         }
845     }
846 
847     #[test]
fingerprint_partition_with_interesting_file_names()848     fn fingerprint_partition_with_interesting_file_names() {
849         let tmp_dir = TempDir::new().unwrap();
850         let tmp_root = tmp_dir.path().to_owned();
851         println!("DEBUG: {tmp_root:?}");
852         make_partition(
853             &tmp_root,
854             "funky",
855             &[("안녕하세요", "hello\n")],
856             // Empty directories/
857             &[
858                 // TODO(rbraunstein): This invalid file name (embedded newlind and Nil) breaks tests.
859                 // Need to fix the code to remove `unwraps` and propagate errors.
860                 // "d\ni\0r3"
861                 ],
862             // symlinks
863             // linkname, target
864             &[("שלום", "안녕하세요")],
865         );
866         let result = fingerprint_partitions(&tmp_root, &[PathBuf::from("funky")]).unwrap();
867         println!("RESULTS\n");
868         for x in &result {
869             println!("{:?}", x);
870         }
871         let expected = &[
872             ("funky/안녕하세요", FileType::File, "5891b"),
873             // ("funky/d\ni\0r3", FileType::Directory, ""),
874             ("funky/שלום", FileType::Symlink, "안녕하세요"),
875             ("funky", FileType::Directory, ""),
876         ];
877 
878         assert_eq!(
879             expected.len(),
880             result.len(),
881             "expected: {}, result {}",
882             expected.len(),
883             result.len()
884         );
885 
886         for (file_name, file_type, data) in expected {
887             match file_type {
888                 FileType::File => assert!(
889                     matching_file_fingerprint(file_name, data, &result),
890                     "mismatch on {:?} {:?}",
891                     file_name,
892                     data
893                 ),
894                 FileType::Directory => assert!(result
895                     .get(&PathBuf::from(file_name))
896                     .is_some_and(|d| d.file_type == FileType::Directory)),
897                 FileType::Symlink => assert!(result
898                     .get(&PathBuf::from(file_name))
899                     .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
900             };
901         }
902     }
903 
904     #[test]
fingerprint_partition_cache_mismatch_test()905     fn fingerprint_partition_cache_mismatch_test() {
906         // test to assure that when a file is modified; the cache doesn't return the same digest
907         let tmp_root = TempDir::new().unwrap();
908         make_partition(
909             tmp_root.path(),
910             "system",
911             &[("file1.so", "some text"), ("file2.so", "more text")],
912             // Empty directories/
913             &[],
914             // Symlinks
915             &[("link1.so", "file1.so")],
916         );
917         let result = fingerprint_partitions(tmp_root.path(), &[PathBuf::from("system")]).unwrap();
918         let expected = &[
919             ("system/file1.so", FileType::File, "b94f"),
920             ("system/file2.so", FileType::File, "c0dc"),
921             ("system/link1.so", FileType::Symlink, "file1.so"),
922         ];
923         for (file_name, file_type, data) in expected {
924             match file_type {
925                 FileType::File => assert!(
926                     matching_file_fingerprint(file_name, data, &result),
927                     "mismatch on {:?} {:?}",
928                     file_name,
929                     data
930                 ),
931                 FileType::Directory => assert!(result
932                     .get(&PathBuf::from(file_name))
933                     .is_some_and(|d| d.file_type == FileType::Directory)),
934                 FileType::Symlink => assert!(result
935                     .get(&PathBuf::from(file_name))
936                     .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
937             }
938         }
939 
940         // modify file
941         let file_path = tmp_root.path().join("system/file1.so");
942         fs::write(file_path, "modified file.").unwrap();
943         let result2 = fingerprint_partitions(tmp_root.path(), &[PathBuf::from("system")]).unwrap();
944         let expected2 = &[
945             ("system/file1.so", FileType::File, "047c5"),
946             ("system/file2.so", FileType::File, "c0dc"),
947         ];
948         for (file_name, file_type, data) in expected2 {
949             match file_type {
950                 FileType::File => assert!(
951                     matching_file_fingerprint(file_name, data, &result2),
952                     "mismatch on {:?} {:?}",
953                     file_name,
954                     data
955                 ),
956                 FileType::Directory => assert!(result2
957                     .get(&PathBuf::from(file_name))
958                     .is_some_and(|d| d.file_type == FileType::Directory)),
959                 FileType::Symlink => assert!(result
960                     .get(&PathBuf::from(file_name))
961                     .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
962             }
963         }
964     }
965 
966     #[test]
test_write_and_read_cache_file()967     fn test_write_and_read_cache_file() {
968         let root = TempDir::new().unwrap();
969         let file_path = root.path().join("cache.json");
970         let results = HashMap::from([
971             (
972                 PathBuf::from("path1"),
973                 FileMetadata {
974                     cache_key: "key1".to_string(),
975                     digest: "value1".to_string(),
976                     ..Default::default()
977                 },
978             ),
979             (
980                 PathBuf::from("path2"),
981                 FileMetadata {
982                     cache_key: "key2".to_string(),
983                     digest: "value2".to_string(),
984                     ..Default::default()
985                 },
986             ),
987         ]);
988 
989         let cache = Cache::default();
990         let write_result = cache.write_to_file(&results, &file_path);
991         assert!(write_result.is_ok());
992 
993         let cache = Cache::read_from_file(&file_path).unwrap();
994         assert_eq!(cache.get("key1"), Some(&"value1".to_string()));
995         assert_eq!(cache.get("key2"), Some(&"value2".to_string()));
996     }
997 
998     #[test]
test_read_cache_file_no_file()999     fn test_read_cache_file_no_file() {
1000         let bad_path = Path::new("/tmp/fake/non/existing/path");
1001         let cache = Cache::read_from_file(bad_path).unwrap_or_default();
1002         assert!(cache.data.is_empty());
1003     }
1004 
1005     #[test]
test_read_cache_file_invalid_file()1006     fn test_read_cache_file_invalid_file() {
1007         let root = TempDir::new().unwrap();
1008         let file_path = root.path().join("cache.json");
1009         fs::write(file_path.clone(), "invalid cache data").unwrap();
1010 
1011         let cache = Cache::read_from_file(&file_path).unwrap_or_default();
1012         assert!(cache.data.is_empty());
1013     }
1014 
1015     // Ensure the FileMetadata for the given file matches the prefix of the digest.
1016     // We don't require whole digests as that just muddys up the test code and
1017     // other methods tests full digests.
matching_file_fingerprint( file_name: &str, digest_prefix: &str, fingerprints: &HashMap<PathBuf, FileMetadata>, ) -> bool1018     fn matching_file_fingerprint(
1019         file_name: &str,
1020         digest_prefix: &str,
1021         fingerprints: &HashMap<PathBuf, FileMetadata>,
1022     ) -> bool {
1023         match fingerprints.get(&PathBuf::from(file_name)) {
1024             None => false,
1025             Some(metadata) => {
1026                 metadata.file_type == FileType::File
1027                     && metadata.symlink.is_empty()
1028                     && metadata.digest.starts_with(digest_prefix)
1029             }
1030         }
1031     }
1032 
1033     // Create a temporary folder and create files, directories and symlinks under it.
make_partition( tmp_root: &Path, partition_name: &str, files: &[(&str, &str)], directories: &[&str], symlinks: &[(&str, &str)], )1034     fn make_partition(
1035         tmp_root: &Path,
1036         partition_name: &str,
1037         files: &[(&str, &str)],
1038         directories: &[&str],
1039         symlinks: &[(&str, &str)],
1040     ) {
1041         let partition_dir = tmp_root.join(partition_name);
1042         fs::create_dir(&partition_dir).expect("should have created directory partition_dir");
1043         // First create all empty directories.
1044         for dir in directories {
1045             fs::create_dir_all(partition_dir.join(dir))
1046                 .unwrap_or_else(|_| panic!("Should have created {dir} in {tmp_root:?}"));
1047         }
1048         for (file_name, file_content) in files {
1049             // Create parent dirs, in case they are needed.
1050             fs::create_dir_all(partition_dir.join(file_name).parent().unwrap()).unwrap();
1051             fs::write(partition_dir.join(file_name), file_content).expect("Trouble writing file");
1052         }
1053         for (symlink_name, target) in symlinks {
1054             fs::create_dir_all(partition_dir.join(symlink_name).parent().unwrap()).unwrap();
1055             create_symlink(&PathBuf::from(target), symlink_name, &partition_dir);
1056         }
1057     }
1058 
1059     // Create a symlink in `directory` named `link_name` that points to `target`.
1060     // Returns the absolute path to the created symlink.
create_symlink(target: &Path, link_name: &str, directory: &Path) -> PathBuf1061     fn create_symlink(target: &Path, link_name: &str, directory: &Path) -> PathBuf {
1062         fs::soft_link(target, directory.join(link_name))
1063             .unwrap_or_else(|e| println!("Could not symlink to {:?} {:?}", directory, e));
1064 
1065         directory.join(Path::new(link_name))
1066     }
1067 
file_metadata(digest: &str) -> FileMetadata1068     fn file_metadata(digest: &str) -> FileMetadata {
1069         FileMetadata { file_type: FileType::File, digest: digest.to_string(), ..Default::default() }
1070     }
1071 
link_metadata(target: &str) -> FileMetadata1072     fn link_metadata(target: &str) -> FileMetadata {
1073         FileMetadata {
1074             file_type: FileType::Symlink,
1075             digest: target.to_string(),
1076             ..Default::default()
1077         }
1078     }
1079 
dir_metadata() -> FileMetadata1080     fn dir_metadata() -> FileMetadata {
1081         FileMetadata { file_type: FileType::Directory, ..Default::default() }
1082     }
1083 
1084     // TODO(rbraunstein): a bunch more tests:
1085 }
1086