1 //! Recursively hash the contents of a directory
2 use anyhow::Result;
3 use hex::encode;
4 use rayon::iter::IntoParallelIterator;
5 use rayon::iter::ParallelIterator;
6 use ring::digest::{self, SHA256};
7 use serde::{Deserialize, Serialize};
8 use std::collections::HashMap;
9 use std::fs;
10 use std::io::{self, Read, Write};
11 use std::os::unix::fs::FileTypeExt;
12 use std::os::unix::fs::MetadataExt;
13 use std::os::unix::fs::PermissionsExt;
14 use std::path::{Path, PathBuf};
15 use walkdir::WalkDir;
16
17 #[allow(missing_docs)]
18 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
19 pub enum FileType {
20 #[default]
21 File,
22 Symlink,
23 Directory,
24 }
25
26 #[derive(Clone, Copy, PartialEq)]
27 #[allow(dead_code)]
28 pub enum DiffMode {
29 IgnorePermissions,
30 UsePermissions,
31 }
32
33 /// Represents a file, directory, or symlink.
34 /// We need enough information to be able to tell if:
35 /// 1) A regular file changes to a directory or symlink.
36 /// 2) A symlink's target file path changes.
37 #[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
38 pub struct FileMetadata {
39 /// Is this a file, dir or symlink?
40 pub file_type: FileType,
41
42 /// Path that this symlinks to or ""
43 #[serde(skip_serializing_if = "String::is_empty", default)]
44 pub symlink: String,
45
46 /// Sha256 of contents for regular files.
47 #[serde(skip_serializing_if = "String::is_empty", default)]
48 pub digest: String,
49
50 /// Permission bits.
51 #[serde(default, skip_serializing_if = "is_default")]
52 pub permission_bits: u32,
53
54 // A unique string used only to determine if digest should be recomputed
55 // or can be used from cache.
56 // The key includes: path, size, mtime, and ctime
57 //
58 // The cache_key will be different for the host/device and is not used to compare
59 // if the files are different
60 #[serde(skip)]
61 pub cache_key: String,
62 }
63
is_default<T: Default + PartialEq>(t: &T) -> bool64 fn is_default<T: Default + PartialEq>(t: &T) -> bool {
65 t == &T::default()
66 }
67
68 impl FileMetadata {
from_path(file_path: &Path, cache: &Cache) -> Result<Self>69 pub fn from_path(file_path: &Path, cache: &Cache) -> Result<Self> {
70 let metadata = fs::symlink_metadata(file_path)?;
71
72 if metadata.is_dir() {
73 Ok(FileMetadata::from_dir())
74 } else if metadata.is_symlink() {
75 FileMetadata::from_symlink(file_path, &metadata)
76 } else {
77 Ok(FileMetadata::from_file(file_path, &metadata, cache)?)
78 }
79 }
80
from_dir() -> Self81 pub fn from_dir() -> Self {
82 FileMetadata { file_type: FileType::Directory, ..Default::default() }
83 }
from_symlink(file_path: &Path, metadata: &fs::Metadata) -> Result<Self>84 pub fn from_symlink(file_path: &Path, metadata: &fs::Metadata) -> Result<Self> {
85 let link = fs::read_link(file_path)?;
86 let target_path_string =
87 link.into_os_string().into_string().expect("Expected valid file name");
88 let mut perms = 0;
89
90 // Getting the permissions doesn't work on windows, so don't try and don't compare them.
91 if !cfg!(windows) {
92 perms = metadata.permissions().mode();
93 }
94 Ok(FileMetadata {
95 file_type: FileType::Symlink,
96 symlink: target_path_string,
97 permission_bits: perms,
98 ..Default::default()
99 })
100 }
from_file(file_path: &Path, metadata: &fs::Metadata, cache: &Cache) -> Result<Self>101 pub fn from_file(file_path: &Path, metadata: &fs::Metadata, cache: &Cache) -> Result<Self> {
102 // Getting the permissions doesn't work on windows, so don't try and don't compare them.
103 let mut perms = 0;
104 if !cfg!(windows) {
105 perms = metadata.permissions().mode();
106 }
107
108 let (digest, cache_key) = get_or_compute_digest(file_path, metadata, cache)?;
109 Ok(FileMetadata {
110 file_type: FileType::File,
111 digest,
112 cache_key,
113 permission_bits: perms,
114 ..Default::default()
115 })
116 }
117 }
118
119 /// A description of the differences on the filesystems between the host
120 /// and device. Each file that is different will be a key in one of
121 /// three maps with the value indicating the difference.
122 #[derive(Debug, Default, PartialEq)]
123 pub struct Diffs {
124 /// Files on host, but not on device
125 pub device_needs: HashMap<PathBuf, FileMetadata>,
126 /// Files on device, but not host.
127 pub device_extra: HashMap<PathBuf, FileMetadata>,
128 /// Files that are different between host and device.
129 pub device_diffs: HashMap<PathBuf, FileMetadata>,
130 }
131
132 /// Compute the files that need to be added, removed or updated on the device.
133 /// Each file should land in of the three categories (i.e. updated, not
134 /// removed and added);
135 /// TODO(rbraunstein): Fix allow(unused) by breaking out methods not
136 /// needed by adevice_fingerprint.
137 #[allow(unused)]
diff( host_files: &HashMap<PathBuf, FileMetadata>, device_files: &HashMap<PathBuf, FileMetadata>, diff_mode: DiffMode, ) -> Diffs138 pub fn diff(
139 host_files: &HashMap<PathBuf, FileMetadata>,
140 device_files: &HashMap<PathBuf, FileMetadata>,
141 diff_mode: DiffMode,
142 ) -> Diffs {
143 let mut diffs = Diffs {
144 device_needs: HashMap::new(),
145 device_extra: HashMap::new(),
146 device_diffs: HashMap::new(),
147 };
148
149 // Insert diffs files that are on the host, but not on the device or
150 // file on the host that are different on the device.
151 for (file_name, host_metadata) in host_files {
152 match device_files.get(file_name) {
153 // File on host and device, but the metadata is different.
154 Some(device_metadata)
155 if is_metadata_diff(device_metadata, host_metadata, diff_mode) =>
156 {
157 diffs.device_diffs.insert(file_name.clone(), host_metadata.clone())
158 }
159 // If the device metadata == host metadata there is nothing to do.
160 Some(_) => None,
161 // Not on the device yet, insert it
162 None => diffs.device_needs.insert(file_name.clone(), host_metadata.clone()),
163 };
164 }
165
166 // Files on the device, but not one the host.
167 for (file_name, metadata) in device_files {
168 if host_files.get(file_name).is_none() {
169 diffs.device_extra.insert(file_name.clone(), metadata.clone());
170 }
171 }
172 diffs
173 }
174
175 /// Return true if left != right ignoring cachekey since that include last_modifed
176 /// When useing DiffMode::IgnorePermissions, clear the permission bits before doing the comparison
is_metadata_diff(left: &FileMetadata, right: &FileMetadata, diff_mode: DiffMode) -> bool177 pub fn is_metadata_diff(left: &FileMetadata, right: &FileMetadata, diff_mode: DiffMode) -> bool {
178 let mut cleared_left = left.clone();
179 let mut cleared_right = right.clone();
180 cleared_left.cache_key = "".to_string();
181 cleared_right.cache_key = "".to_string();
182
183 if diff_mode == DiffMode::UsePermissions {
184 return cleared_left != cleared_right;
185 }
186 cleared_left.permission_bits = 0;
187 cleared_right.permission_bits = 0;
188 cleared_left != cleared_right
189 }
190
191 /// Given a `partition_root`, traverse all files under the named |partitions|
192 /// at the root. Typically, ["system", "apex"] are partition_names.
193 /// The keys will be rooted at the `partition root`, ie. if system contains
194 /// a file named FILE and system is the `partition_root`, the key wil be
195 /// system/FILE.
196 /// Cache is used only to speed up computing digests; if the cache_key is the same
197 /// as an earlier fingerprint; then we reuse it rather than recomputing.
fingerprint_partitions( partition_root: &Path, partition_names: &[PathBuf], ) -> Result<HashMap<PathBuf, FileMetadata>>198 pub fn fingerprint_partitions(
199 partition_root: &Path,
200 partition_names: &[PathBuf],
201 ) -> Result<HashMap<PathBuf, FileMetadata>> {
202 let cache = Cache::read().unwrap_or_default();
203 let filenames: Vec<PathBuf> = partition_names
204 .iter()
205 .flat_map(|p| WalkDir::new(partition_root.join(p)).follow_links(false))
206 .map(|result| result.expect("Walking directory").path().to_path_buf())
207 .collect();
208
209 // Compute digest for each file.
210 let results = filenames
211 .into_par_iter()
212 // Walking the /data partition quickly leads to sockets, filter those out.
213 .filter(|file_path| !is_special_file(file_path))
214 .map(|file_path| {
215 (
216 file_path.strip_prefix(partition_root).unwrap().to_owned(),
217 FileMetadata::from_path(&file_path, &cache).unwrap(),
218 )
219 })
220 .collect();
221 cache.write(&results)?;
222 Ok(results)
223 }
224
225 /// Return true for special files like sockets that would be incorrect
226 /// to digest and we that we can skip when comparing the device
227 /// to the build tree.
is_special_file(file_path: &Path) -> bool228 fn is_special_file(file_path: &Path) -> bool {
229 // `symlink_metadata` doesn't follow links. We don't want to follow symlinks here.
230 // The stat costs much less than the digest operations we are about to perform.
231 let file_metadata = fs::symlink_metadata(file_path).expect("no metadata");
232 file_metadata.file_type().is_block_device()
233 || file_metadata.file_type().is_char_device()
234 || file_metadata.file_type().is_fifo()
235 || file_metadata.file_type().is_socket()
236 }
237
238 /// Compute the sha256 and return it as a lowercase hex string.
compute_digest(file_path: &Path) -> Result<String>239 fn compute_digest(file_path: &Path) -> Result<String> {
240 let input = fs::File::open(file_path)?;
241 let mut reader = io::BufReader::new(input);
242 let mut context = digest::Context::new(&SHA256);
243 let mut buffer = [0; 4096];
244
245 loop {
246 let num_bytes_read = reader.read(&mut buffer)?;
247 if num_bytes_read == 0 {
248 break;
249 }
250 context.update(&buffer[..num_bytes_read]);
251 }
252
253 Ok(encode(context.finish().as_ref()))
254 }
255
256 /// Get digest from cache or compute digest and return a cache_key
get_or_compute_digest( file_path: &Path, metadata: &fs::Metadata, cache: &Cache, ) -> Result<(String, String)>257 fn get_or_compute_digest(
258 file_path: &Path,
259 metadata: &fs::Metadata,
260 cache: &Cache,
261 ) -> Result<(String, String)> {
262 let cache_key = cache.cache_key(file_path, metadata)?;
263 let digest;
264 if let Some(cached_digest) = cache.get(&cache_key) {
265 digest = cached_digest.to_string();
266 } else {
267 digest = compute_digest(file_path)?;
268 }
269 Ok((digest, cache_key))
270 }
271
272 // The cache is intended to be used to skip computing digests - it relies on the assumption that unchanged
273 // file stats imply unchanged content (and therefore, unchanged digest).
274 //
275 // The host stores the cache file in $OUT/ by default. Acloud/physical devices don't have $OUT so will attempt to store
276 // the cache in /cache/. Any file modification (add/delete/change) triggers a cache key recomputation for that specific file
277 // and the cache file will be updated. The cache should persist across reboots but may be deleted between flashes
278 #[derive(Default)]
279 pub struct Cache {
280 pub data: HashMap<String, String>,
281 }
282 impl Cache {
get(&self, key: &str) -> Option<&String>283 pub fn get(&self, key: &str) -> Option<&String> {
284 self.data.get(key)
285 }
286
287 // Generate cache key from file metadata
cache_key(&self, file_path: &Path, metadata: &fs::Metadata) -> Result<String>288 pub fn cache_key(&self, file_path: &Path, metadata: &fs::Metadata) -> Result<String> {
289 Ok(format!(
290 "{}#{}#{}.{}#{}.{}",
291 file_path.display(),
292 metadata.len(),
293 metadata.mtime(),
294 metadata.mtime_nsec(),
295 metadata.ctime(),
296 metadata.ctime_nsec()
297 ))
298 }
299
300 /// Reads cache from a file
read_from_file(file_path: &Path) -> Result<Self>301 pub fn read_from_file(file_path: &Path) -> Result<Self> {
302 let mut file = fs::File::open(file_path)?;
303 let mut contents = String::new();
304 file.read_to_string(&mut contents)?;
305 match serde_json::from_str(&contents) {
306 Ok(data) => Ok(Cache { data }),
307 Err(_error) => Err(_error.into()),
308 }
309 }
310
read() -> Result<Self>311 pub fn read() -> Result<Self> {
312 let cache_file_path = Cache::default_cache_path();
313 Cache::read_from_file(&cache_file_path)
314 }
315
316 /// Writes cache to a file
write_to_file( self, results: &HashMap<PathBuf, FileMetadata>, file_path: &Path, ) -> Result<()>317 pub fn write_to_file(
318 self,
319 results: &HashMap<PathBuf, FileMetadata>,
320 file_path: &Path,
321 ) -> Result<()> {
322 let mut new_cache: HashMap<String, String> = HashMap::new();
323 for meta in results.values() {
324 if !meta.cache_key.is_empty() {
325 new_cache.insert(meta.cache_key.clone(), meta.digest.clone());
326 }
327 }
328 if new_cache == self.data {
329 // cache did not change - skip write.
330 return Ok(());
331 }
332 let cache_str = serde_json::to_string(&new_cache)?;
333 let mut file = fs::File::create(file_path)?;
334 file.write_all(cache_str.as_bytes())?;
335 Ok(())
336 }
337
write(self, results: &HashMap<PathBuf, FileMetadata>) -> Result<()>338 pub fn write(self, results: &HashMap<PathBuf, FileMetadata>) -> Result<()> {
339 let cache_file_path = Cache::default_cache_path();
340 self.write_to_file(results, &cache_file_path)
341 }
342
default_cache_path() -> PathBuf343 pub fn default_cache_path() -> PathBuf {
344 // Attempt to use $OUT, then /cache and finally fall back to /tmp
345 // /tmp is deleted on reboot on acloud devices
346 let mut cache_dir = std::env::var("OUT").unwrap_or_else(|_| "/cache".to_string());
347 if !Path::new(&cache_dir).is_dir() {
348 cache_dir = "/tmp".to_string();
349 }
350
351 PathBuf::from(cache_dir).join("adevice_digest_cache.json")
352 }
353 }
354
355 #[cfg(test)]
356 mod tests {
357 use super::*;
358 use crate::fingerprint::DiffMode::UsePermissions;
359 use std::collections::BTreeSet;
360 use std::path::PathBuf;
361 use tempfile::TempDir;
362
363 #[test]
empty_inputs()364 fn empty_inputs() {
365 assert_eq!(diff(&HashMap::new(), &HashMap::new(), UsePermissions), Diffs::default());
366 }
367
368 #[test]
same_inputs()369 fn same_inputs() {
370 let file_entry = HashMap::from([(
371 PathBuf::from("a/b/foo.so"),
372 FileMetadata {
373 file_type: FileType::File,
374 digest: "deadbeef".to_string(),
375 ..Default::default()
376 },
377 )]);
378 assert_eq!(diff(&file_entry, &file_entry.clone(), UsePermissions), Diffs::default());
379 }
380
381 #[test]
same_inputs_with_permissions()382 fn same_inputs_with_permissions() {
383 let file_entry = HashMap::from([(
384 PathBuf::from("a/b/foo.so"),
385 FileMetadata {
386 file_type: FileType::File,
387 digest: "deadbeef".to_string(),
388 permission_bits: 0o644,
389 ..Default::default()
390 },
391 )]);
392 assert_eq!(diff(&file_entry, &file_entry.clone(), UsePermissions), Diffs::default());
393 }
394
395 #[test]
same_inputs_with_different_permissions_are_not_equal()396 fn same_inputs_with_different_permissions_are_not_equal() {
397 let orig = HashMap::from([(
398 PathBuf::from("a/b/foo.so"),
399 FileMetadata {
400 file_type: FileType::File,
401 digest: "deadbeef".to_string(),
402 permission_bits: 0o644,
403 ..Default::default()
404 },
405 )]);
406 let mut copy = orig.clone();
407 copy.entry(PathBuf::from("a/b/foo.so")).and_modify(|v| v.permission_bits = 0);
408
409 // Not equal
410 assert_ne!(diff(&orig, ©, UsePermissions), Diffs::default());
411 }
412
413 #[test]
same_inputs_ignoring_permissions()414 fn same_inputs_ignoring_permissions() {
415 let orig = HashMap::from([(
416 PathBuf::from("a/b/foo.so"),
417 FileMetadata {
418 file_type: FileType::File,
419 digest: "deadbeef".to_string(),
420 permission_bits: 0o644,
421 ..Default::default()
422 },
423 )]);
424 let mut copy = orig.clone();
425 copy.entry(PathBuf::from("a/b/foo.so")).and_modify(|v| v.permission_bits = 0);
426
427 // Equal when we ignore the different permission bits.
428 assert_eq!(diff(&orig, ©, DiffMode::IgnorePermissions), Diffs::default());
429 }
430
431 #[test]
different_file_type()432 fn different_file_type() {
433 let host_map_with_filename_as_file = HashMap::from([(
434 PathBuf::from("a/b/foo.so"),
435 FileMetadata {
436 file_type: FileType::File,
437 digest: "deadbeef".to_string(),
438 ..Default::default()
439 },
440 )]);
441
442 let device_map_with_filename_as_dir = HashMap::from([(
443 PathBuf::from("a/b/foo.so"),
444 FileMetadata { file_type: FileType::Directory, ..Default::default() },
445 )]);
446
447 let diffs =
448 diff(&host_map_with_filename_as_file, &device_map_with_filename_as_dir, UsePermissions);
449 assert_eq!(
450 diffs.device_diffs.get(&PathBuf::from("a/b/foo.so")).expect("Missing file"),
451 // `diff` returns FileMetadata for host, but we really only care that the
452 // file name was found.
453 &FileMetadata {
454 file_type: FileType::File,
455 digest: "deadbeef".to_string(),
456 ..Default::default()
457 },
458 );
459 }
460
461 #[test]
diff_simple_trees()462 fn diff_simple_trees() {
463 let host_map = HashMap::from([
464 (PathBuf::from("matching_file"), file_metadata("digest_matching_file")),
465 (PathBuf::from("path/to/diff_file"), file_metadata("digest_file2")),
466 (PathBuf::from("path/to/new_file"), file_metadata("digest_new_file")),
467 (PathBuf::from("same_link"), link_metadata("matching_file")),
468 (PathBuf::from("diff_link"), link_metadata("targetxx")),
469 (PathBuf::from("new_link"), link_metadata("new_target")),
470 (PathBuf::from("matching dir"), dir_metadata()),
471 (PathBuf::from("new_dir"), dir_metadata()),
472 ]);
473
474 let device_map = HashMap::from([
475 (PathBuf::from("matching_file"), file_metadata("digest_matching_file")),
476 (PathBuf::from("path/to/diff_file"), file_metadata("digest_file2_DIFF")),
477 (PathBuf::from("path/to/deleted_file"), file_metadata("digest_deleted_file")),
478 (PathBuf::from("same_link"), link_metadata("matching_file")),
479 (PathBuf::from("diff_link"), link_metadata("targetxx_DIFF")),
480 (PathBuf::from("deleted_link"), link_metadata("new_target")),
481 (PathBuf::from("matching dir"), dir_metadata()),
482 (PathBuf::from("deleted_dir"), dir_metadata()),
483 ]);
484
485 let diffs = diff(&host_map, &device_map, UsePermissions);
486 assert_eq!(
487 BTreeSet::from_iter(diffs.device_diffs.keys()),
488 BTreeSet::from([&PathBuf::from("diff_link"), &PathBuf::from("path/to/diff_file")])
489 );
490 assert_eq!(
491 BTreeSet::from_iter(diffs.device_needs.keys()),
492 BTreeSet::from([
493 &PathBuf::from("path/to/new_file"),
494 &PathBuf::from("new_link"),
495 &PathBuf::from("new_dir")
496 ])
497 );
498 assert_eq!(
499 BTreeSet::from_iter(diffs.device_extra.keys()),
500 BTreeSet::from([
501 &PathBuf::from("path/to/deleted_file"),
502 &PathBuf::from("deleted_link"),
503 &PathBuf::from("deleted_dir")
504 ])
505 );
506 }
507
508 #[test]
compute_digest_empty_file()509 fn compute_digest_empty_file() {
510 let tmpdir = TempDir::new().unwrap();
511 let file_path = tmpdir.path().join("empty_file");
512 fs::write(&file_path, "").unwrap();
513 assert_eq!(
514 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string(),
515 compute_digest(&file_path).unwrap()
516 );
517 }
518
519 #[test]
compute_digest_small_file()520 fn compute_digest_small_file() {
521 let tmpdir = TempDir::new().unwrap();
522 let file_path = tmpdir.path().join("small_file");
523 fs::write(&file_path, "This is a test\nof a small file.\n").unwrap();
524 assert_eq!(
525 "a519d054afdf2abfbdd90a738d248f606685d6c187e96390bde22e958240449e".to_string(),
526 compute_digest(&file_path).unwrap()
527 );
528 }
529
530 #[test]
get_or_compute_digest_small_file()531 fn get_or_compute_digest_small_file() {
532 let tmpdir = TempDir::new().unwrap();
533 let file_path = tmpdir.path().join("small_file");
534 fs::write(&file_path, "This is a test\nof a small file.\n").unwrap();
535
536 let metadata = fs::metadata(&file_path).expect("file metadata");
537 let mut cache = Cache::default();
538 let (digest, cache_key) = get_or_compute_digest(&file_path, &metadata, &cache).unwrap();
539 assert_eq!(
540 "a519d054afdf2abfbdd90a738d248f606685d6c187e96390bde22e958240449e".to_string(),
541 digest
542 );
543 assert_eq!(
544 cache_key,
545 format!(
546 "{}#{}#{}.{}#{}.{}",
547 file_path.display(),
548 metadata.len(),
549 metadata.mtime(),
550 metadata.mtime_nsec(),
551 metadata.ctime(),
552 metadata.ctime_nsec()
553 )
554 );
555
556 // if cache entry exists; than that is used instead of recomputing
557 cache.data.insert(cache_key, "test-saved-cache-digest".to_string());
558 let (digest, _) = get_or_compute_digest(&file_path, &metadata, &cache).unwrap();
559 assert_eq!("test-saved-cache-digest".to_string(), digest);
560 }
561
562 // Generate some files near the buffer size to check for off-by-one errors
563 // and compute the digest and store here.
564 // We can't check these into testdata and read from testdata unless we serialize
565 // all the tests. Some tests to `cd` to create relative symlinks and that affects
566 // any tests that want to read from testdata.
567 #[test]
verify_edge_case_digests()568 fn verify_edge_case_digests() {
569 let tmpdir = TempDir::new().unwrap();
570 // We could use a RNG with a seed, but lets just create simple files of bytes.
571 let raw_bytes: &[u8; 10] = &[0, 1, 17, 200, 11, 8, 0, 32, 9, 10];
572 let mut boring_buff = Vec::new();
573 for _ in 1..1000 {
574 boring_buff.extend_from_slice(raw_bytes);
575 }
576
577 for (num_bytes, digest) in
578 &[(4095, "a0e88b2743"), (4096, "b2e324aac3"), (4097, "70fcbe6a8d")]
579 {
580 let file_path = tmpdir.path().join(num_bytes.to_string());
581 fs::write(&file_path, &boring_buff[0..*num_bytes]).unwrap();
582 assert!(
583 compute_digest(&file_path).unwrap().starts_with(digest),
584 "Expected file {:?} to have a digest starting with {:?}",
585 file_path,
586 digest
587 );
588 }
589 }
590
591 #[test]
fingerprint_file_for_file()592 fn fingerprint_file_for_file() {
593 let partition_root = TempDir::new().unwrap();
594 let file_path = partition_root.path().join("small_file");
595 fs::write(&file_path, "This is a test\nof a small file.\n").unwrap();
596
597 // NOTE: files are 0x644 on the host tests and 0x655 on device tests
598 // for me and CI so changing the file to always be 655 during the test.
599 let mut perms = fs::metadata(&file_path).expect("Getting permissions").permissions();
600 perms.set_mode(0o100655);
601 assert!(fs::set_permissions(&file_path, perms).is_ok());
602 let cache = Cache::default();
603 let entry = FileMetadata::from_path(&file_path, &cache).unwrap();
604 assert_eq!(
605 FileMetadata {
606 file_type: FileType::File,
607 digest: "a519d054afdf2abfbdd90a738d248f606685d6c187e96390bde22e958240449e"
608 .to_string(),
609 permission_bits: 0o100655,
610 cache_key: entry.cache_key.clone(),
611 ..Default::default()
612 },
613 entry
614 )
615 }
616
617 #[test]
fingerprint_file_for_relative_symlink()618 fn fingerprint_file_for_relative_symlink() {
619 let partition_root = TempDir::new().unwrap();
620 let file_path = partition_root.path().join("small_file");
621 fs::write(file_path, "This is a test\nof a small file.\n").unwrap();
622
623 let link = create_symlink(
624 &PathBuf::from("small_file"),
625 "link_to_small_file",
626 partition_root.path(),
627 );
628 let cache = Cache::default();
629 let entry = FileMetadata::from_path(&link, &cache).unwrap();
630 assert_eq!(
631 FileMetadata {
632 file_type: FileType::Symlink,
633 symlink: "small_file".to_string(),
634 permission_bits: 0o120777,
635 ..Default::default()
636 },
637 entry
638 )
639 }
640
641 #[test]
fingerprint_file_for_absolute_symlink()642 fn fingerprint_file_for_absolute_symlink() {
643 let partition_root = TempDir::new().unwrap();
644 let link = create_symlink(&PathBuf::from("/tmp"), "link_to_tmp", partition_root.path());
645 let cache = Cache::default();
646 let entry = FileMetadata::from_path(&link, &cache).unwrap();
647 assert_eq!(
648 FileMetadata {
649 file_type: FileType::Symlink,
650 symlink: "/tmp".to_string(),
651 permission_bits: 0o120777,
652 ..Default::default()
653 },
654 entry
655 )
656 }
657
658 #[test]
fingerprint_file_for_directory()659 fn fingerprint_file_for_directory() {
660 let partition_root = TempDir::new().unwrap();
661 let newdir_path = partition_root.path().join("some_dir");
662 fs::create_dir(&newdir_path).expect("Should have create 'some_dir' in temp dir");
663 let cache = Cache::default();
664 let entry = FileMetadata::from_path(&newdir_path, &cache).unwrap();
665 assert_eq!(FileMetadata { file_type: FileType::Directory, ..Default::default() }, entry)
666 }
667
668 #[test]
fingerprint_file_on_bad_path_reports_err()669 fn fingerprint_file_on_bad_path_reports_err() {
670 let cache = Cache::default();
671 if FileMetadata::from_path(Path::new("testdata/not_exist"), &cache).is_ok() {
672 panic!("Should have failed on invalid path")
673 }
674 }
675
676 /// /tmp/.tmpxO0pRC/system
677 /// % tree
678 /// .
679 /// ├── cycle1 -> cycle2
680 /// ├── cycle2 -> cycle1
681 /// ├── danglers
682 /// │ ├── d1 -> nowhere
683 /// │ └── d2 -> /not/existing
684 /// ├── dir1
685 /// │ ├── dir2
686 /// │ │ ├── nested
687 /// │ │ └── nested2
688 /// │ ├── dir4
689 /// │ └── f1.txt
690 /// ├── dir3
691 /// │ ├── to_tmp -> /tmp
692 /// │ └── to_tmp2 -> /system/cycle1
693 /// ├── file1.so
694 /// ├── file2.so
695 /// ├── link1 -> file1.so
696 /// └── link2 -> link1
697 #[test]
698
fingerprint_simple_partition()699 fn fingerprint_simple_partition() {
700 let tmp_root = TempDir::new().unwrap();
701 // TODO(rbraunstein): Change make_partition to look more like `expected` variable below.
702 // i.e. use file_type rather than pass files, dirs, and symlinks in different arrays.
703 // Or use a struct with named fields as the args.
704 make_partition(
705 tmp_root.path(),
706 "system",
707 &[
708 ("file1.so", "some text"),
709 ("file2.so", "more text"),
710 ("dir1/f1.txt", ""),
711 ("dir1/dir2/nested", "some more text"),
712 ("dir1/dir2/nested2", "some more text"),
713 ],
714 // Empty directories/
715 &["dir3", "dir1/dir4", "danglers"],
716 // Symlinks:
717 // Linkname, target.
718 &[
719 ("link1", "file1.so"),
720 ("link2", "link1"),
721 ("cycle1", "cycle2"),
722 ("cycle2", "cycle1"),
723 ("dir3/to_tmp", "/tmp"),
724 ("dir3/to_tmp2", "/system/cycle1"),
725 ("danglers/d1", "nowhere"),
726 ("danglers/d2", "/not/existing"),
727 ],
728 );
729 let result = fingerprint_partitions(tmp_root.path(), &[PathBuf::from("system")]).unwrap();
730 println!("RESULTS\n");
731 for x in &result {
732 println!("{:?}", x);
733 }
734 let expected = &[
735 ("system/file1.so", FileType::File, "b94f"),
736 ("system/file2.so", FileType::File, "c0dc"),
737 ("system/dir1/f1.txt", FileType::File, "e3b0c"),
738 ("system/dir1/dir2/nested", FileType::File, "bde27b"),
739 ("system/dir1/dir2/nested2", FileType::File, "bde27b"),
740 ("system/dir3", FileType::Directory, ""),
741 ("system/danglers", FileType::Directory, ""),
742 ("system/dir1", FileType::Directory, ""),
743 ("system/dir1/dir2", FileType::Directory, ""),
744 ("system/dir1/dir4", FileType::Directory, ""),
745 ("system/link1", FileType::Symlink, "file1.so"),
746 ("system/link2", FileType::Symlink, "link1"),
747 ("system/cycle1", FileType::Symlink, "cycle2"),
748 ("system/cycle2", FileType::Symlink, "cycle1"),
749 ("system/dir3/to_tmp", FileType::Symlink, "/tmp"),
750 ("system/dir3/to_tmp2", FileType::Symlink, "/system/cycle1"),
751 ("system/danglers/d1", FileType::Symlink, "nowhere"),
752 ("system/danglers/d2", FileType::Symlink, "/not/existing"),
753 ("system", FileType::Directory, ""),
754 ];
755
756 assert_eq!(
757 expected.len(),
758 result.len(),
759 "expected: {}, result {}",
760 expected.len(),
761 result.len()
762 );
763
764 for (file_name, file_type, data) in expected {
765 match file_type {
766 FileType::File => assert!(
767 matching_file_fingerprint(file_name, data, &result),
768 "mismatch on {:?} {:?}",
769 file_name,
770 data
771 ),
772 FileType::Directory => assert!(result
773 .get(&PathBuf::from(file_name))
774 .is_some_and(|d| d.file_type == FileType::Directory)),
775 FileType::Symlink => assert!(result
776 .get(&PathBuf::from(file_name))
777 .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
778 };
779 }
780 }
781
782 #[test]
fingerprint_multiple_partitions()783 fn fingerprint_multiple_partitions() {
784 let tmp_root = TempDir::new().unwrap();
785 // Use same file name, with and without same contents in two different partitions.
786 make_partition(
787 tmp_root.path(),
788 "system",
789 &[("file1.so", "some text"), ("file2", "system part")],
790 // Empty directories/
791 &[],
792 // Symlinks
793 &[],
794 );
795 make_partition(
796 tmp_root.path(),
797 "data",
798 &[("file1.so", "some text"), ("file2", "data part")],
799 // Empty directories/
800 &[],
801 // Symlinks
802 &[],
803 );
804
805 let result = fingerprint_partitions(
806 tmp_root.path(),
807 &[PathBuf::from("system"), PathBuf::from("data")],
808 )
809 .unwrap();
810 println!("RESULTS\n");
811 for x in &result {
812 println!("{:?}", x);
813 }
814 let expected = &[
815 ("system/file1.so", FileType::File, "b94f"),
816 ("data/file1.so", FileType::File, "b94f"),
817 ("system/file2", FileType::File, "ae7c6c"),
818 ("data/file2", FileType::File, "4ae46d"),
819 ("data", FileType::Directory, ""),
820 ("system", FileType::Directory, ""),
821 ];
822
823 assert_eq!(
824 expected.len(),
825 result.len(),
826 "expected: {}, result {}",
827 expected.len(),
828 result.len()
829 );
830
831 for (file_name, file_type, data) in expected {
832 match file_type {
833 FileType::File => assert!(
834 matching_file_fingerprint(file_name, data, &result),
835 "mismatch on {:?} {:?}",
836 file_name,
837 data
838 ),
839 FileType::Directory => assert!(result
840 .get(&PathBuf::from(file_name))
841 .is_some_and(|d| d.file_type == FileType::Directory)),
842 _ => (),
843 };
844 }
845 }
846
847 #[test]
fingerprint_partition_with_interesting_file_names()848 fn fingerprint_partition_with_interesting_file_names() {
849 let tmp_dir = TempDir::new().unwrap();
850 let tmp_root = tmp_dir.path().to_owned();
851 println!("DEBUG: {tmp_root:?}");
852 make_partition(
853 &tmp_root,
854 "funky",
855 &[("안녕하세요", "hello\n")],
856 // Empty directories/
857 &[
858 // TODO(rbraunstein): This invalid file name (embedded newlind and Nil) breaks tests.
859 // Need to fix the code to remove `unwraps` and propagate errors.
860 // "d\ni\0r3"
861 ],
862 // symlinks
863 // linkname, target
864 &[("שלום", "안녕하세요")],
865 );
866 let result = fingerprint_partitions(&tmp_root, &[PathBuf::from("funky")]).unwrap();
867 println!("RESULTS\n");
868 for x in &result {
869 println!("{:?}", x);
870 }
871 let expected = &[
872 ("funky/안녕하세요", FileType::File, "5891b"),
873 // ("funky/d\ni\0r3", FileType::Directory, ""),
874 ("funky/שלום", FileType::Symlink, "안녕하세요"),
875 ("funky", FileType::Directory, ""),
876 ];
877
878 assert_eq!(
879 expected.len(),
880 result.len(),
881 "expected: {}, result {}",
882 expected.len(),
883 result.len()
884 );
885
886 for (file_name, file_type, data) in expected {
887 match file_type {
888 FileType::File => assert!(
889 matching_file_fingerprint(file_name, data, &result),
890 "mismatch on {:?} {:?}",
891 file_name,
892 data
893 ),
894 FileType::Directory => assert!(result
895 .get(&PathBuf::from(file_name))
896 .is_some_and(|d| d.file_type == FileType::Directory)),
897 FileType::Symlink => assert!(result
898 .get(&PathBuf::from(file_name))
899 .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
900 };
901 }
902 }
903
904 #[test]
fingerprint_partition_cache_mismatch_test()905 fn fingerprint_partition_cache_mismatch_test() {
906 // test to assure that when a file is modified; the cache doesn't return the same digest
907 let tmp_root = TempDir::new().unwrap();
908 make_partition(
909 tmp_root.path(),
910 "system",
911 &[("file1.so", "some text"), ("file2.so", "more text")],
912 // Empty directories/
913 &[],
914 // Symlinks
915 &[("link1.so", "file1.so")],
916 );
917 let result = fingerprint_partitions(tmp_root.path(), &[PathBuf::from("system")]).unwrap();
918 let expected = &[
919 ("system/file1.so", FileType::File, "b94f"),
920 ("system/file2.so", FileType::File, "c0dc"),
921 ("system/link1.so", FileType::Symlink, "file1.so"),
922 ];
923 for (file_name, file_type, data) in expected {
924 match file_type {
925 FileType::File => assert!(
926 matching_file_fingerprint(file_name, data, &result),
927 "mismatch on {:?} {:?}",
928 file_name,
929 data
930 ),
931 FileType::Directory => assert!(result
932 .get(&PathBuf::from(file_name))
933 .is_some_and(|d| d.file_type == FileType::Directory)),
934 FileType::Symlink => assert!(result
935 .get(&PathBuf::from(file_name))
936 .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
937 }
938 }
939
940 // modify file
941 let file_path = tmp_root.path().join("system/file1.so");
942 fs::write(file_path, "modified file.").unwrap();
943 let result2 = fingerprint_partitions(tmp_root.path(), &[PathBuf::from("system")]).unwrap();
944 let expected2 = &[
945 ("system/file1.so", FileType::File, "047c5"),
946 ("system/file2.so", FileType::File, "c0dc"),
947 ];
948 for (file_name, file_type, data) in expected2 {
949 match file_type {
950 FileType::File => assert!(
951 matching_file_fingerprint(file_name, data, &result2),
952 "mismatch on {:?} {:?}",
953 file_name,
954 data
955 ),
956 FileType::Directory => assert!(result2
957 .get(&PathBuf::from(file_name))
958 .is_some_and(|d| d.file_type == FileType::Directory)),
959 FileType::Symlink => assert!(result
960 .get(&PathBuf::from(file_name))
961 .is_some_and(|s| s.file_type == FileType::Symlink && &s.symlink == data)),
962 }
963 }
964 }
965
966 #[test]
test_write_and_read_cache_file()967 fn test_write_and_read_cache_file() {
968 let root = TempDir::new().unwrap();
969 let file_path = root.path().join("cache.json");
970 let results = HashMap::from([
971 (
972 PathBuf::from("path1"),
973 FileMetadata {
974 cache_key: "key1".to_string(),
975 digest: "value1".to_string(),
976 ..Default::default()
977 },
978 ),
979 (
980 PathBuf::from("path2"),
981 FileMetadata {
982 cache_key: "key2".to_string(),
983 digest: "value2".to_string(),
984 ..Default::default()
985 },
986 ),
987 ]);
988
989 let cache = Cache::default();
990 let write_result = cache.write_to_file(&results, &file_path);
991 assert!(write_result.is_ok());
992
993 let cache = Cache::read_from_file(&file_path).unwrap();
994 assert_eq!(cache.get("key1"), Some(&"value1".to_string()));
995 assert_eq!(cache.get("key2"), Some(&"value2".to_string()));
996 }
997
998 #[test]
test_read_cache_file_no_file()999 fn test_read_cache_file_no_file() {
1000 let bad_path = Path::new("/tmp/fake/non/existing/path");
1001 let cache = Cache::read_from_file(bad_path).unwrap_or_default();
1002 assert!(cache.data.is_empty());
1003 }
1004
1005 #[test]
test_read_cache_file_invalid_file()1006 fn test_read_cache_file_invalid_file() {
1007 let root = TempDir::new().unwrap();
1008 let file_path = root.path().join("cache.json");
1009 fs::write(file_path.clone(), "invalid cache data").unwrap();
1010
1011 let cache = Cache::read_from_file(&file_path).unwrap_or_default();
1012 assert!(cache.data.is_empty());
1013 }
1014
1015 // Ensure the FileMetadata for the given file matches the prefix of the digest.
1016 // We don't require whole digests as that just muddys up the test code and
1017 // other methods tests full digests.
matching_file_fingerprint( file_name: &str, digest_prefix: &str, fingerprints: &HashMap<PathBuf, FileMetadata>, ) -> bool1018 fn matching_file_fingerprint(
1019 file_name: &str,
1020 digest_prefix: &str,
1021 fingerprints: &HashMap<PathBuf, FileMetadata>,
1022 ) -> bool {
1023 match fingerprints.get(&PathBuf::from(file_name)) {
1024 None => false,
1025 Some(metadata) => {
1026 metadata.file_type == FileType::File
1027 && metadata.symlink.is_empty()
1028 && metadata.digest.starts_with(digest_prefix)
1029 }
1030 }
1031 }
1032
1033 // Create a temporary folder and create files, directories and symlinks under it.
make_partition( tmp_root: &Path, partition_name: &str, files: &[(&str, &str)], directories: &[&str], symlinks: &[(&str, &str)], )1034 fn make_partition(
1035 tmp_root: &Path,
1036 partition_name: &str,
1037 files: &[(&str, &str)],
1038 directories: &[&str],
1039 symlinks: &[(&str, &str)],
1040 ) {
1041 let partition_dir = tmp_root.join(partition_name);
1042 fs::create_dir(&partition_dir).expect("should have created directory partition_dir");
1043 // First create all empty directories.
1044 for dir in directories {
1045 fs::create_dir_all(partition_dir.join(dir))
1046 .unwrap_or_else(|_| panic!("Should have created {dir} in {tmp_root:?}"));
1047 }
1048 for (file_name, file_content) in files {
1049 // Create parent dirs, in case they are needed.
1050 fs::create_dir_all(partition_dir.join(file_name).parent().unwrap()).unwrap();
1051 fs::write(partition_dir.join(file_name), file_content).expect("Trouble writing file");
1052 }
1053 for (symlink_name, target) in symlinks {
1054 fs::create_dir_all(partition_dir.join(symlink_name).parent().unwrap()).unwrap();
1055 create_symlink(&PathBuf::from(target), symlink_name, &partition_dir);
1056 }
1057 }
1058
1059 // Create a symlink in `directory` named `link_name` that points to `target`.
1060 // Returns the absolute path to the created symlink.
create_symlink(target: &Path, link_name: &str, directory: &Path) -> PathBuf1061 fn create_symlink(target: &Path, link_name: &str, directory: &Path) -> PathBuf {
1062 fs::soft_link(target, directory.join(link_name))
1063 .unwrap_or_else(|e| println!("Could not symlink to {:?} {:?}", directory, e));
1064
1065 directory.join(Path::new(link_name))
1066 }
1067
file_metadata(digest: &str) -> FileMetadata1068 fn file_metadata(digest: &str) -> FileMetadata {
1069 FileMetadata { file_type: FileType::File, digest: digest.to_string(), ..Default::default() }
1070 }
1071
link_metadata(target: &str) -> FileMetadata1072 fn link_metadata(target: &str) -> FileMetadata {
1073 FileMetadata {
1074 file_type: FileType::Symlink,
1075 digest: target.to_string(),
1076 ..Default::default()
1077 }
1078 }
1079
dir_metadata() -> FileMetadata1080 fn dir_metadata() -> FileMetadata {
1081 FileMetadata { file_type: FileType::Directory, ..Default::default() }
1082 }
1083
1084 // TODO(rbraunstein): a bunch more tests:
1085 }
1086